diff --git a/.asf.yaml b/.asf.yaml new file mode 100644 index 00000000000..b1fdee64bd1 --- /dev/null +++ b/.asf.yaml @@ -0,0 +1,88 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +github: + description: "Apache BookKeeper - a scalable, fault tolerant and low latency storage service optimized for append-only workloads" + homepage: https://bookkeeper.apache.org/ + labels: + - apache + - bookkeeper + - big-data + - distributed-log + - distributed-systems + - wal + features: + # Enable wiki for documentation + wiki: false + # Enable issues management + issues: true + # Enable projects for project management boards + projects: true + enabled_merge_buttons: + # enable squash button: + squash: true + # disable merge button: + merge: false + # disable rebase button: + rebase: false + protected_branches: + master: + required_status_checks: + # strict means "Require branches to be up to date before merging". + strict: false + # Contexts are the names of checks that must pass. + # See ./github/workflows/README.md for more documentation on this list. + contexts: + - BookKeeper CI checks completed + + required_pull_request_reviews: + dismiss_stale_reviews: false + require_code_owner_reviews: true + required_approving_review_count: 1 + + # squash or rebase must be allowed in the repo for this setting to be set to true. + required_linear_history: true + + required_signatures: false + + # The following branch protections only ensure that force pushes are not allowed + branch-4.0: {} + branch-4.1: {} + branch-4.2: {} + branch-4.3: {} + branch-4.4: {} + branch-4.5: {} + branch-4.6: {} + branch-4.7: {} + branch-4.8: {} + branch-4.9: {} + branch-4.10: {} + branch-4.11: {} + branch-4.12: {} + branch-4.13: {} + branch-4.14: {} + branch-4.15: {} + branch-4.16: {} + +notifications: + commits: commits@bookkeeper.apache.org + issues: commits@bookkeeper.apache.org + pullrequests: commits@bookkeeper.apache.org + discussions: dev@bookkeeper.apache.org + jira_options: link label diff --git a/.dlc.json b/.dlc.json new file mode 100644 index 00000000000..49a1467abac --- /dev/null +++ b/.dlc.json @@ -0,0 +1,89 @@ +{ + "ignorePatterns": [ + { + "pattern": "^http://localhost" + }, + { + "pattern": "^https://kubernetes.io/docs/resources-reference/v1.6/" + }, + { + "pattern": "^https://zookeeper.apache.org/doc/" + }, + { + "pattern": "^http://zookeeper.apache.org/doc/current/api/" + }, + { + "pattern": "^https://github.com/apache/bookkeeper/" + }, + { + "pattern": "^https://github.com/sijie/bookkeeper/tree/bookie_shell_refactor" + }, + { + "pattern": "^http://pulsar.incubator.apache.org/docs/latest/getting-started/ConceptsAndArchitecture/#persistent-storage" + }, + { + "pattern": "^https://zookeeper.apache.org/doc/current/api/org/apache/zookeeper/" + }, + { + "pattern": "^http://pulsar.incubator.apache.org/docs/latest/getting-started/ConceptsAndArchitecture/#persistent-storage" + }, + { + "pattern": "^https://bookkeeper.apache.org/docs/api/javadoc/" + }, + { + "pattern": "^http://hbtc2012.hadooper.cn/subject/track1maheswara2.pdf" + }, + { + "pattern": "^https://developer.yahoo.com/blogs/ydn/bookkeeper-durability-scale-54048.html" + }, + { + "pattern": "^http://apache.claz.org/bookkeeper" + }, + { + "pattern": "^https://dcos.io/" + }, + { + "pattern": "^ https://docs.mesosphere.com" + }, + { + "pattern": "^http://master.dcos/exhibitor" + }, + { + "pattern": "^https://docs.mesosphere.com/" + }, + { + "pattern": "^https://distributedlog.io" + }, + { + "pattern": "^https://www.opengroup.org/membership/forums/platform/unix" + }, + { + "pattern": "^https://docs.oracle.com/javase/8/docs/technotes/guides/security/jgss/tutorials/KerberosReq.html" + }, + { + "pattern": "^http://daxue.qq.com/content/content/id/2492" + }, + { + "pattern": "^https://calendar.google.com/" + }, + { + "pattern": "^#" + }, + { + "pattern": ".*\\{\\{.*" + }, + { + "pattern": "^//" + } + ], + "timeout": "10s", + "retryOn429": true, + "retryCount": 10, + "fallbackRetryDelay": "1000s", + "aliveStatusCodes": [ + 200, + 400, + 401, + 403 + ] +} \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 02b03f22dae..84e6844b3b6 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -2,7 +2,7 @@ Is this a question, feature request, bug report, or bookkeeper proposal? **QUESTION** -Have you checked our documentation at http://bookkeeper.apache.org/ , If you could not find an answer there, please consider asking your question in our community forum at user@bookkeeper.apache.org, as it would benefit other members of our community. +Have you checked our documentation at https://bookkeeper.apache.org/ , If you could not find an answer there, please consider asking your question in our community forum at user@bookkeeper.apache.org, as it would benefit other members of our community. **FEATURE REQUEST** diff --git a/.github/ISSUE_TEMPLATE/bp.md b/.github/ISSUE_TEMPLATE/bp.md new file mode 100644 index 00000000000..e71f4d7a417 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bp.md @@ -0,0 +1,19 @@ +--- +name: BookKeeper Proposal +about: Propose a major change to BookKeeper +title: '' +labels: type/proposal +assignees: '' + +--- + +**BP** + +> Follow the instructions at https://bookkeeper.apache.org/community/bookkeeper-proposals/ to create a proposal. + +This is the master ticket for tracking BP-xyz : + +[a short description for this BP] + + +Proposal PR - #abc \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000000..3021fbfe02c --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,34 @@ +--- +name: Bug report +about: Create a bug report to help us improve +title: '' +labels: type/bug +assignees: '' + +--- + +**BUG REPORT** + +***Describe the bug*** + +A clear and concise description of what the bug is. + +***To Reproduce*** + +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +***Expected behavior*** + +A clear and concise description of what you expected to happen. + +***Screenshots*** + +If applicable, add screenshots to help explain your problem. + +***Additional context*** + +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 00000000000..931a2e4bcca --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,18 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: type/feature +assignees: '' + +--- + +**FEATURE REQUEST** + +1. Please describe the feature you are requesting. + +2. Indicate the importance of this issue to you (blocker, must-have, should-have, nice-to-have). + Are you currently using any workarounds to address this issue? + +3. Provide any additional detail on your proposed use case for this feature. + diff --git a/.github/ISSUE_TEMPLATE/flaky_test.md b/.github/ISSUE_TEMPLATE/flaky_test.md new file mode 100644 index 00000000000..fc8159776c7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/flaky_test.md @@ -0,0 +1,31 @@ +--- +name: Flaky test +about: Report a flaky test failure +title: 'Flaky-test: test_class.test_method' +labels: ["area/tests", "flaky-tests"] +assignees: '' +--- + +test_class.test_method is flaky. It fails sporadically. + +``` +[relevant parts of the exception stacktrace here] +``` + + +Fix #xyz + + + +Main Issue: #xyz + + + +BP: #xyz ### Motivation @@ -10,27 +20,12 @@ Descriptions of the changes in this PR: (Describe: what changes you have made) -Master Issue: # - > --- > In order to uphold a high standard for quality for code contributions, Apache BookKeeper runs various precommit -> checks for pull requests. A pull request can only be merged when it passes precommit checks. However running all -> the precommit checks can take a long time, some trivial changes don't need to run all the precommit checks. You -> can check following list to skip the tests that don't need to run for your pull request. Leave them unchecked if -> you are not sure, committers will help you: +> checks for pull requests. A pull request can only be merged when it passes precommit checks. > -> - [ ] [skip bookkeeper-server bookie tests]: skip testing `org.apache.bookkeeper.bookie` in bookkeeper-server module. -> - [ ] [skip bookkeeper-server client tests]: skip testing `org.apache.bookkeeper.client` in bookkeeper-server module. -> - [ ] [skip bookkeeper-server replication tests]: skip testing `org.apache.bookkeeper.replication` in bookkeeper-server module. -> - [ ] [skip bookkeeper-server tls tests]: skip testing `org.apache.bookkeeper.tls` in bookkeeper-server module. -> - [ ] [skip bookkeeper-server remaining tests]: skip testing all other tests in bookkeeper-server module. -> - [ ] [skip integration tests]: skip docker based integration tests. if you make java code changes, you shouldn't skip integration tests. -> - [ ] [skip build java8]: skip build on java8. *ONLY* skip this when *ONLY* changing files under documentation under `site`. -> - [ ] [skip build java9]: skip build on java9. *ONLY* skip this when *ONLY* changing files under documentation under `site`. -> --- - > --- -> Be sure to do all of the following to help us incorporate your contribution +> Be sure to do all the following to help us incorporate your contribution > quickly and easily: > > If this PR is a BookKeeper Proposal (BP): diff --git a/.github/actions/bot/package.json b/.github/actions/bot/package.json new file mode 100644 index 00000000000..0671a99823c --- /dev/null +++ b/.github/actions/bot/package.json @@ -0,0 +1,31 @@ +{ + "name": "github-action-bot", + "description": "Bot for github actions", + "version": "1.1.1", + "author": "GitHub", + "license": "Apache LICENSE 2.0", + "main": "dist/index.js", + "private": true, + "scripts": { + "build": "ncc build src/run.js" + }, + "dependencies": { + "@actions/core": "^1.2.4", + "@actions/github": "^2.2.0", + "@actions/io": "^1.0.2" + }, + "devDependencies": { + "@types/jest": "^25.1.4", + "@typescript-eslint/eslint-plugin": "^2.33.0", + "@typescript-eslint/parser": "^2.33.0", + "@zeit/ncc": "^0.22.0", + "eslint": "^7.0.0", + "eslint-config-prettier": "^6.11.0", + "husky": "^4.2.5", + "jest": "^25.1.0", + "npm-run-all": "^4.1.5", + "prettier": "^2.0.5", + "ts-jest": "^25.2.1", + "typescript": "^3.8.3" + } +} diff --git a/.github/actions/bot/src/run.js b/.github/actions/bot/src/run.js new file mode 100644 index 00000000000..01524919a66 --- /dev/null +++ b/.github/actions/bot/src/run.js @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +async function run(core, context, github) { + + try { + const owner = process.env.PROVIDER; + const repo = process.env.REPOSITORY; + const reRunCmd = process.env.RERUN_CMD; + const comment = context.payload.comment.body; + + if (comment !== reRunCmd) { + console.log("this is not a bot command"); + return; + } + + const { + data: { + head: { + sha: prRef, + } + } + } = await github.pulls.get({ + owner, + repo, + pull_number: context.issue.number, + }); + + const jobs = await github.checks.listForRef({ + owner, + repo, + ref: prRef, + status: "completed" + }); + + jobs.data.check_runs.forEach(job => { + if (job.conclusion === 'failure' || job.conclusion === 'cancelled') { + console.log("rerun job " + job.name); + github.checks.rerequestSuite({ + owner, + repo, + check_suite_id: job.check_suite.id + }) + } + }); + } catch (e) { + core.setFailed(e); + } + +} + +module.exports = ({core}, {context}, {github}) => { + return run(core, context, github); +} diff --git a/.github/actions/clean-disk/action.yml b/.github/actions/clean-disk/action.yml new file mode 100644 index 00000000000..d74c3f25fc6 --- /dev/null +++ b/.github/actions/clean-disk/action.yml @@ -0,0 +1,57 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +name: clean disk +description: makes some more space available on the disk by removing files +inputs: + mode: + description: "Use 'full' to clean as much as possible" + required: false +runs: + using: composite + steps: + - run: | + if [[ "$OSTYPE" == "linux-gnu"* ]]; then + directories=(/usr/local/lib/android /opt/ghc) + if [[ "${{ inputs.mode }}" == "full" ]]; then + # remove these directories only when mode is 'full' + directories+=(/usr/share/dotnet /opt/hostedtoolcache/CodeQL) + fi + emptydir=/tmp/empty$$/ + mkdir $emptydir + echo "::group::Available diskspace" + time df -BM / /mnt + echo "::endgroup::" + for directory in "${directories[@]}"; do + echo "::group::Removing $directory" + # fast way to delete a lot of files on linux + time sudo eatmydata rsync -a --delete $emptydir ${directory}/ + time sudo eatmydata rm -rf ${directory} + time df -BM / /mnt + echo "::endgroup::" + done + echo "::group::Cleaning apt state" + time sudo bash -c "apt-get clean; apt-get autoclean; apt-get -y --purge autoremove" + time df -BM / /mnt + echo "::endgroup::" + fi + echo "::group::Available diskspace" + time df -BM / /mnt + echo "::endgroup::" + shell: bash diff --git a/.github/actions/copy-test-reports/action.yml b/.github/actions/copy-test-reports/action.yml new file mode 100644 index 00000000000..f2d03d5c38b --- /dev/null +++ b/.github/actions/copy-test-reports/action.yml @@ -0,0 +1,27 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +name: Copy test reports +description: Aggregates all test reports to ./test-reports and ./surefire-reports directories +runs: + using: composite + steps: + - run: | + $GITHUB_WORKSPACE/dev/ci-tool move_test_reports + shell: bash diff --git a/.github/actions/tune-runner-vm/action.yml b/.github/actions/tune-runner-vm/action.yml new file mode 100644 index 00000000000..7e5f77f9a83 --- /dev/null +++ b/.github/actions/tune-runner-vm/action.yml @@ -0,0 +1,102 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +name: Tune Runner VM performance +description: tunes the GitHub Runner VM operation system +runs: + using: composite + steps: + - run: | + if [[ "$OSTYPE" == "linux-gnu"* ]]; then + echo "::group::Configure and tune OS" + # Ensure that reverse lookups for current hostname are handled properly + # Add the current IP address, long hostname and short hostname record to /etc/hosts file + echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts + + # The default vm.swappiness setting is 60 which has a tendency to start swapping when memory + # consumption is high. + # Set vm.swappiness=1 to avoid swapping and allow high RAM usage + echo 1 | sudo tee /proc/sys/vm/swappiness + ( + shopt -s nullglob + # Set swappiness to 1 for all cgroups and sub-groups + for swappiness_file in /sys/fs/cgroup/memory/*/memory.swappiness /sys/fs/cgroup/memory/*/*/memory.swappiness; do + echo 1 | sudo tee $swappiness_file > /dev/null + done + ) || true + + # use "madvise" Linux Transparent HugePages (THP) setting + # https://www.kernel.org/doc/html/latest/admin-guide/mm/transhuge.html + # "madvise" is generally a better option than the default "always" setting + # Based on Azul instructions from https://docs.azul.com/prime/Enable-Huge-Pages#transparent-huge-pages-thp + echo madvise | sudo tee /sys/kernel/mm/transparent_hugepage/enabled + echo advise | sudo tee /sys/kernel/mm/transparent_hugepage/shmem_enabled + echo defer+madvise | sudo tee /sys/kernel/mm/transparent_hugepage/defrag + echo 1 | sudo tee /sys/kernel/mm/transparent_hugepage/khugepaged/defrag + + # tune filesystem mount options, https://www.kernel.org/doc/Documentation/filesystems/ext4.txt + # commit=999999, effectively disables automatic syncing to disk (default is every 5 seconds) + # nobarrier/barrier=0, loosen data consistency on system crash (no negative impact to empheral CI nodes) + sudo mount -o remount,nodiscard,commit=999999,barrier=0 / || true + sudo mount -o remount,nodiscard,commit=999999,barrier=0 /mnt || true + # disable discard/trim at device level since remount with nodiscard doesn't seem to be effective + # https://www.spinics.net/lists/linux-ide/msg52562.html + for i in /sys/block/sd*/queue/discard_max_bytes; do + echo 0 | sudo tee $i + done + # disable any background jobs that run SSD discard/trim + sudo systemctl disable fstrim.timer || true + sudo systemctl stop fstrim.timer || true + sudo systemctl disable fstrim.service || true + sudo systemctl stop fstrim.service || true + + # stop php-fpm + sudo systemctl stop php8.0-fpm.service || true + sudo systemctl stop php7.4-fpm.service || true + # stop mono-xsp4 + sudo systemctl disable mono-xsp4.service || true + sudo systemctl stop mono-xsp4.service || true + sudo killall mono || true + + # stop Azure Linux agent to save RAM + sudo systemctl stop walinuxagent.service || true + + # enable docker experimental mode which is + # required for using "docker build --squash" / "-Ddocker.squash=true" + daemon_json="$(sudo cat /etc/docker/daemon.json | jq '.experimental = true')" + echo "$daemon_json" | sudo tee /etc/docker/daemon.json + # restart docker daemon + sudo systemctl restart docker + echo '::endgroup::' + + # show memory + echo "::group::Available Memory" + free -m + echo '::endgroup::' + # show disk + echo "::group::Available diskspace" + df -BM + echo "::endgroup::" + # show cggroup + echo "::group::Cgroup settings for current cgroup $CURRENT_CGGROUP" + CURRENT_CGGROUP=$(cat /proc/self/cgroup | grep '0::' | awk -F: '{ print $3 }') + sudo cgget -a $CURRENT_CGGROUP || true + echo '::endgroup::' + fi + shell: bash diff --git a/.github/changes-filter.yaml b/.github/changes-filter.yaml new file mode 100644 index 00000000000..03507b7a59c --- /dev/null +++ b/.github/changes-filter.yaml @@ -0,0 +1,17 @@ +# contains pattern definitions used in workflows "changes" step +# pattern syntax: https://github.com/micromatch/picomatch +all: + - '**' +docs: + - 'site3/**' + - '.asf.yaml' + - '*.md' + - '**/*.md' + - '.github/changes-filter.yaml' + - '.github/ISSUE_TEMPLATE/**' + - 'src/owasp-dependency-check-suppressions.xml' +need_owasp: + - 'pom.xml' + - '**/pom.xml' + - 'src/owasp-dependency-check-false-positives.xml' + - 'src/owasp-dependency-check-suppressions.xml' \ No newline at end of file diff --git a/.github/workflows/README.md b/.github/workflows/README.md new file mode 100644 index 00000000000..b0c71c6deff --- /dev/null +++ b/.github/workflows/README.md @@ -0,0 +1,60 @@ + + +## GitHub Workflows + +This directory contains all BookKeeper CI checks. + +### Required Workflows + +When adding new CI workflows, please update the [.asf.yaml](../../.asf.yaml) if the workflow is required to pass before +a PR can be merged. Instructions on how to update the file are below. + +This project uses the [.asf.yaml](../../.asf.yaml) to configure which workflows are required to pass before a PR can +be merged. In the `.asf.yaml`, the required contexts are defined in the `github.protected_branches.*.required_status_checks.contexts.[]` +where * is any key in the `protected_branches` map. + +You can view the currently required status checks by running the following command: + +```shell +curl -s -H 'Accept: application/vnd.github.v3+json' https://api.github.com/repos/apache/bookkeeper/branches/master | \ +jq .protection +``` + +These contexts get their names in one of two ways depending on how the workflow file is written in this directory. The +following command will print out the names of each file and the associated with the check. If the `name` field is `null`, +the context will be named by the `id`. + +```shell +for f in .github/workflows/*.yaml .github/workflows/*.yml; \ +do FILE=$f yq eval -o j '.jobs | to_entries | {"file": env(FILE),"id":.[].key, "name":.[].value.name}' $f; \ +done +``` + +Duplicate names are allowed, and all checks with the same name will be treated the same (required or not required). + +When working on workflow changes, one way to find out the names of the status checks is to retrieve the names +from the PR build run. The "check-runs" can be found by commit id. Here's an example: + +```shell +curl -s "https://api.github.com/repos/apache/bookkeeper/commits/$(git rev-parse HEAD)/check-runs" | \ + jq -r '.check_runs | .[] | .name' |sort +``` diff --git a/.github/workflows/bk-ci.yml b/.github/workflows/bk-ci.yml new file mode 100644 index 00000000000..1f6ffd652ec --- /dev/null +++ b/.github/workflows/bk-ci.yml @@ -0,0 +1,587 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +name: BookKeeper CI + +on: + pull_request: + branches: + - master + - branch-* + push: + branches: + - master + - branch-* + workflow_dispatch: + +env: + MAVEN_OPTS: -Xss1500k -Xmx1500m -Daether.connector.http.reuseConnections=false -Daether.connector.requestTimeout=60000 -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.http.retryHandler.class=standard -Dmaven.wagon.http.retryHandler.count=3 -Dmaven.wagon.http.retryHandler.requestSentEnabled=true -Dmaven.wagon.http.serviceUnavailableRetryStrategy.class=standard -Dmaven.wagon.rto=60000 + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build-and-license-check: + name: PR Validation + runs-on: ubuntu-latest + timeout-minutes: 60 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Tune Runner VM + uses: ./.github/actions/tune-runner-vm + + - name: Detect changed files + id: changes + uses: apache/pulsar-test-infra/paths-filter@master + with: + filters: .github/changes-filter.yaml + list-files: csv + + - name: Check changed files + id: check_changes + run: | + echo "docs_only=${{ fromJSON(steps.changes.outputs.all_count) == fromJSON(steps.changes.outputs.docs_count) && fromJSON(steps.changes.outputs.docs_count) > 0 }}" >> $GITHUB_OUTPUT + echo "need_owasp=${{ fromJSON(steps.changes.outputs.need_owasp) }}" >> $GITHUB_OUTPUT + + - name: Cache local Maven repository + if: steps.check_changes.outputs.docs_only != 'true' + id: cache + uses: actions/cache@v4 + with: + path: | + ~/.m2/repository/*/*/* + !~/.m2/repository/org/apache/bookkeeper + !~/.m2/repository/org/apache/distributedlog + key: ${{ runner.os }}-bookkeeper-all-${{ hashFiles('**/pom.xml') }} + + - name: Set up JDK 11 + if: steps.check_changes.outputs.docs_only != 'true' + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: 11 + + - name: Validate pull request + if: steps.check_changes.outputs.docs_only != 'true' + run: | + mvn -T 1C -B -nsu clean install -Ddistributedlog -DskipTests + mvn -T 1C -B -nsu apache-rat:check checkstyle:check spotbugs:check package -Ddistributedlog -DskipTests + + - name: Check license files + if: steps.check_changes.outputs.docs_only != 'true' + run: dev/check-all-licenses + + - name: Generate Javadoc + if: steps.check_changes.outputs.docs_only != 'true' + run: mvn -B -nsu -am -pl bookkeeper-common,bookkeeper-server,:bookkeeper-stats-api,:bookkeeper-stats-providers,:codahale-metrics-provider,:prometheus-metrics-provider javadoc:aggregate -DskipTests -Pdelombok -Dchesktyle.skip -Dspotbugs.skip + outputs: + docs_only: ${{ steps.check_changes.outputs.docs_only }} + need_owasp: ${{ steps.check_changes.outputs.need_owasp }} + + unit-tests: + name: ${{ matrix.step_name }} + runs-on: ubuntu-latest + timeout-minutes: ${{ matrix.timeout || 60 }} + needs: [ 'build-and-license-check' ] + if: ${{ needs.build-and-license-check.outputs.docs_only != 'true' }} + strategy: + fail-fast: false + matrix: + include: + - step_name: Bookie Tests + module: bookkeeper-server + flag: bookie + test_args: "-Dtest='org.apache.bookkeeper.bookie.**'" + - step_name: Client Tests + module: bookkeeper-server + flag: client + test_args: "-Dtest='org.apache.bookkeeper.client.**'" + timeout: 75 + - step_name: Replication Tests + module: bookkeeper-server + flag: replication + test_args: "-Dtest='org.apache.bookkeeper.replication.**'" + - step_name: Remaining Tests + module: bookkeeper-server + flag: remaining + test_args: "-Dtest='!org.apache.bookkeeper.client.**,!org.apache.bookkeeper.bookie.**,!org.apache.bookkeeper.replication.**,!org.apache.bookkeeper.tls.**'" + - step_name: TLS Tests + module: bookkeeper-server + flag: tls + test_args: "-Dtest='org.apache.bookkeeper.tls.**'" + - step_name: StreamStorage Tests + test_args: "-f stream/pom.xml -DstreamTests" + flag: stream + - step_name: Shell tests + module: tests/scripts + flag: shell + + steps: + - name: checkout + uses: actions/checkout@v4 + + - name: Tune Runner VM + uses: ./.github/actions/tune-runner-vm + + - name: Cache local Maven repository + id: cache + uses: actions/cache@v4 + with: + path: | + ~/.m2/repository/*/*/* + !~/.m2/repository/org/apache/bookkeeper + !~/.m2/repository/org/apache/distributedlog + key: ${{ runner.os }}-bookkeeper-all-${{ hashFiles('**/pom.xml') }} + + - name: Set up JDK 11 + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: 11 + + - name: Build + run: | + projects_list= + if [[ ! -z "${{ matrix.module }}" ]]; then + projects_list="-pl ${{ matrix.module }}" + fi + mvn -q -T 1C -B -nsu $projects_list install -am -DskipTests -Dcheckstyle.skip -Dspotbugs.skip -Drat.skip -Dmaven.javadoc.skip + + - name: Test - ${{ matrix.step_name }} + run: | + projects_list= + if [[ ! -z "${{ matrix.module }}" ]]; then + projects_list="-pl ${{ matrix.module }}" + fi + mvn -B -nsu $projects_list verify ${{ matrix.test_args }} + + - name: Aggregates all test reports to ./test-reports and ./surefire-reports directories + if: ${{ always() }} + uses: ./.github/actions/copy-test-reports + + - name: Publish Test Report + uses: apache/pulsar-test-infra/action-junit-report@master + if: ${{ always() }} + with: + report_paths: 'surefire-reports/TEST-*.xml' + annotate_only: 'true' + + - name: Upload Surefire reports + uses: actions/upload-artifact@v4 + if: failure() + continue-on-error: true + with: + name: unit-${{ matrix.step_name }}-reports + path: surefire-reports + retention-days: 7 + + - name: print JVM thread dumps when cancelled + if: cancelled() + run: ./dev/ci-tool print_thread_dumps + + integration-tests: + name: Integration Tests + runs-on: ubuntu-latest + timeout-minutes: 75 + needs: [ 'build-and-license-check' ] + if: ${{ needs.build-and-license-check.outputs.docs_only != 'true' }} + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Tune Runner VM + uses: ./.github/actions/tune-runner-vm + + - name: Clean Disk + uses: ./.github/actions/clean-disk + with: + mode: full + + - name: Cache local Maven repository + id: cache + uses: actions/cache@v4 + with: + path: | + ~/.m2/repository/*/*/* + !~/.m2/repository/org/apache/bookkeeper + !~/.m2/repository/org/apache/distributedlog + key: ${{ runner.os }}-bookkeeper-all-${{ hashFiles('**/pom.xml') }} + + - name: Set up JDK 11 + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: 11 + + - name: Pick ubuntu mirror for the docker image build + run: | + # pick the closest ubuntu mirror and set it to UBUNTU_MIRROR environment variable + $GITHUB_WORKSPACE/dev/ci-tool pick_ubuntu_mirror + + - name: Build with Maven + run: mvn -B -nsu clean install -Pdocker -DskipTests + + - name: Run metadata driver tests + run: mvn -B -nsu -f metadata-drivers/pom.xml test -DintegrationTests + + - name: Run all integration tests (except backward compatibility tests) + run: | + mvn -B -nsu -f tests/pom.xml test -DintegrationTests -DredirectTestOutputToFile=false -DtestRetryCount=0 + + - name: print JVM thread dumps when cancelled + if: cancelled() + run: ./dev/ci-tool print_thread_dumps + + - name: Upload container logs on failure + uses: actions/upload-artifact@v4 + if: ${{ !success() }} + continue-on-error: true + with: + retention-days: 7 + name: integration-tests-container-logs + if-no-files-found: ignore + path: | + **/docker.log + + - name: Aggregates all test reports to ./test-reports and ./surefire-reports directories + if: ${{ always() }} + uses: ./.github/actions/copy-test-reports + + - name: Publish Test Report + uses: apache/pulsar-test-infra/action-junit-report@master + if: ${{ always() }} + with: + report_paths: 'surefire-reports/TEST-*.xml' + annotate_only: 'true' + + - name: Upload Surefire reports + uses: actions/upload-artifact@v4 + if: failure() + continue-on-error: true + with: + name: integration-tests-reports + path: surefire-reports + if-no-files-found: ignore + retention-days: 7 + + backward-compatibility-tests: + name: Backward compatibility tests + runs-on: ubuntu-latest + timeout-minutes: 75 + needs: [ 'build-and-license-check' ] + if: ${{ needs.build-and-license-check.outputs.docs_only != 'true' }} + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Tune Runner VM + uses: ./.github/actions/tune-runner-vm + + - name: Clean Disk + uses: ./.github/actions/clean-disk + with: + mode: full + + - name: Cache local Maven repository + id: cache + uses: actions/cache@v4 + with: + path: | + ~/.m2/repository/*/*/* + !~/.m2/repository/org/apache/bookkeeper + !~/.m2/repository/org/apache/distributedlog + key: ${{ runner.os }}-bookkeeper-all-${{ hashFiles('**/pom.xml') }} + + - name: Set up JDK 8 + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: 8 + + - name: Pick ubuntu mirror for the docker image build + run: | + # pick the closest ubuntu mirror and set it to UBUNTU_MIRROR environment variable + $GITHUB_WORKSPACE/dev/ci-tool pick_ubuntu_mirror + + - name: Build with Maven + run: mvn -B -nsu clean install -Pdocker -DskipTests + + - name: Test current server with old clients + run: mvn -B -nsu -DbackwardCompatTests -DfailIfNoTests -pl :backward-compat-current-server-old-clients test + + - name: Test progressive upgrade + run: mvn -B -nsu -DbackwardCompatTests -DfailIfNoTests -pl :upgrade test + + - name: Other tests + run: | + mvn -B -nsu -DbackwardCompatTests -DfailIfNoTests -pl :bc-non-fips,:hierarchical-ledger-manager,:hostname-bookieid,:old-cookie-new-cluster,:recovery-no-password,:upgrade-direct test + + - name: Upload container logs on failure + uses: actions/upload-artifact@v4 + if: ${{ !success() }} + continue-on-error: true + with: + retention-days: 7 + name: backward-compatibility-tests-container-logs + if-no-files-found: ignore + path: | + **/docker.log + + - name: Aggregates all test reports to ./test-reports and ./surefire-reports directories + if: ${{ always() }} + uses: ./.github/actions/copy-test-reports + + - name: Publish Test Report + uses: apache/pulsar-test-infra/action-junit-report@master + if: ${{ always() }} + with: + report_paths: 'surefire-reports/TEST-*.xml' + annotate_only: 'true' + + - name: Upload Surefire reports + uses: actions/upload-artifact@v4 + if: failure() + continue-on-error: true + with: + name: backward-compatibility-tests-reports + path: surefire-reports + if-no-files-found: ignore + retention-days: 7 + + windows-build: + name: Build with windows on JDK 11 + runs-on: windows-latest + timeout-minutes: 30 + needs: [ 'build-and-license-check' ] + if: ${{ needs.build-and-license-check.outputs.docs_only != 'true' }} + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Tune Runner VM + uses: ./.github/actions/tune-runner-vm + + - name: Install mingw + run: choco install mingw + + - name: Cache local Maven repository + id: cache + uses: actions/cache@v4 + with: + path: | + ~/.m2/repository/*/*/* + !~/.m2/repository/org/apache/bookkeeper + !~/.m2/repository/org/apache/distributedlog + key: ${{ runner.os }}-bookkeeper-all-${{ hashFiles('**/pom.xml') }} + + - name: Set up JDK 11 + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: 11 + + - name: mvn package + run: mvn -B -nsu clean package -DskipTests + + macos-build: + name: Build with macos on JDK 11 + runs-on: macos-latest + timeout-minutes: 30 + needs: [ 'build-and-license-check' ] + if: ${{ needs.build-and-license-check.outputs.docs_only != 'true' }} + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Tune Runner VM + uses: ./.github/actions/tune-runner-vm + + - name: Cache local Maven repository + id: cache + uses: actions/cache@v4 + with: + path: | + ~/.m2/repository/*/*/* + !~/.m2/repository/org/apache/bookkeeper + !~/.m2/repository/org/apache/distributedlog + key: ${{ runner.os }}-bookkeeper-all-${{ hashFiles('**/pom.xml') }} + + - name: Set up JDK 11 + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: 11 + + - name: mvn package + run: mvn -B -nsu clean package -DskipTests + + jdk-compatibility-checks: + name: ${{ matrix.step_name }} + runs-on: ubuntu-latest + timeout-minutes: ${{ matrix.timeout || 60 }} + needs: [ 'build-and-license-check' ] + if: ${{ needs.build-and-license-check.outputs.docs_only != 'true' }} + strategy: + fail-fast: false + matrix: + include: + - step_name: Compatibility Check Java8 + jdk_version: 8 + - step_name: Compatibility Check Java11 + jdk_version: 11 + - step_name: Compatibility Check Java17 + jdk_version: 17 + - step_name: Compatibility Check Java21 + jdk_version: 21 + + steps: + - name: checkout + uses: actions/checkout@v4 + + - name: Tune Runner VM + uses: ./.github/actions/tune-runner-vm + + - name: Cache local Maven repository + id: cache + uses: actions/cache@v4 + with: + path: | + ~/.m2/repository/*/*/* + !~/.m2/repository/org/apache/bookkeeper + !~/.m2/repository/org/apache/distributedlog + key: ${{ runner.os }}-bookkeeper-all-${{ hashFiles('**/pom.xml') }} + + - name: Set up JDK ${{ matrix.jdk_version }} + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: ${{ matrix.jdk_version }} + + - name: Build with Maven + run: mvn clean package -B -nsu -DskipBookKeeperServerTests + + - name: print JVM thread dumps when cancelled + if: cancelled() + run: ./dev/ci-tool print_thread_dumps + + typo-check: + name: Typo Check + # only run on pull requests because of security reasons + # we shouldn't trust external actions for builds within the repository + if: ${{ github.event_name == 'pull_request' }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Check typos + uses: crate-ci/typos@v1.22.4 + + owasp-dependency-check: + name: OWASP Dependency Check + runs-on: ubuntu-latest + timeout-minutes: 60 + needs: [ 'build-and-license-check' ] + if: ${{ needs.build-and-license-check.outputs.need_owasp == 'true' }} + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Tune Runner VM + uses: ./.github/actions/tune-runner-vm + + - name: Cache local Maven repository + id: cache + uses: actions/cache@v4 + with: + path: | + ~/.m2/repository/*/*/* + !~/.m2/repository/org/apache/bookkeeper + !~/.m2/repository/org/apache/distributedlog + key: ${{ runner.os }}-bookkeeper-all-${{ hashFiles('**/pom.xml') }} + + - name: Set up JDK 11 + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: 21 + + - name: run "clean install verify" to trigger dependency check + # excluding dlfs because it includes hadoop lib with + # CVEs that we cannot patch up anyway + run: mvn -q -B -ntp clean install verify -Powasp-dependency-check -DskipTests -pl '!stream/distributedlog/io/dlfs,!tests' + + - name: Upload report + uses: actions/upload-artifact@v4 + if: ${{ cancelled() || failure() }} + continue-on-error: true + with: + name: dependency report + path: target/dependency-check-report.html + retention-days: 7 + + bookkeeper-ci-checks-completed: + name: "BookKeeper CI checks completed" + if: ${{ always() && ((github.event_name != 'schedule') || (github.repository == 'apache/bookkeeper')) }} + runs-on: ubuntu-latest + timeout-minutes: 10 + needs: [ + 'backward-compatibility-tests', + 'build-and-license-check', + 'integration-tests', + 'jdk-compatibility-checks', + 'macos-build', + 'owasp-dependency-check', + 'typo-check', + 'unit-tests', + 'windows-build' + ] + steps: + - name: Check build-and-license-check success + run: | + if [[ ! ( \ + "${{ needs.build-and-license-check.result }}" == "success" \ + ) ]]; then + echo "Required jobs haven't been completed successfully." + exit 1 + fi + - name: Check typo-check success for pull requests + if: ${{ github.event_name == 'pull_request' }} + run: | + if [[ ! ( \ + "${{ needs.typo-check.result }}" == "success" \ + ) ]]; then + echo "Required jobs haven't been completed successfully." + exit 1 + fi + - name: Check that other required jobs were completed successfully + if: ${{ needs.build-and-license-check.outputs.docs_only != 'true' }} + run: | + if [[ ! ( \ + "${{ needs.backward-compatibility-tests.result }}" == "success" \ + && "${{ needs.integration-tests.result }}" == "success" \ + && "${{ needs.jdk-compatibility-checks.result }}" == "success" \ + && "${{ needs.macos-build.result }}" == "success" \ + && "${{ needs.unit-tests.result }}" == "success" \ + && "${{ needs.windows-build.result }}" == "success" \ + ) ]]; then + echo "Required jobs haven't been completed successfully." + exit 1 + fi diff --git a/.github/workflows/bk-streamstorage-python.yml b/.github/workflows/bk-streamstorage-python.yml new file mode 100644 index 00000000000..71dbe610452 --- /dev/null +++ b/.github/workflows/bk-streamstorage-python.yml @@ -0,0 +1,85 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +name: BookKeeper StreamStorage Python Client +on: + pull_request: + branches: + - master + - branch-* + paths: + - 'stream/**' + - '.github/workflows/bk-streamstorage-python.yml' + push: + branches: + - master + - branch-* + paths: + - 'stream/**' + - '.github/workflows/bk-streamstorage-python.yml' + +jobs: + stream-storage-python-client-unit-tests: + name: StreamStorage Python Client Unit Tests + runs-on: ubuntu-latest + timeout-minutes: 60 + steps: + - name: checkout + uses: actions/checkout@v4 + - name: Tune Runner VM + uses: ./.github/actions/tune-runner-vm + - name: Test + run: ./stream/clients/python/scripts/test.sh + + + Stream-storage-python-client-integration-tests: + name: StreamStorage Python Client Integration Tests + runs-on: ubuntu-latest + timeout-minutes: 60 + steps: + - name: checkout + uses: actions/checkout@v4 + - name: Tune Runner VM + uses: ./.github/actions/tune-runner-vm + - name: Cache local Maven repository + id: cache + uses: actions/cache@v4 + with: + path: | + ~/.m2/repository/*/*/* + !~/.m2/repository/org/apache/bookkeeper + !~/.m2/repository/org/apache/distributedlog + key: ${{ runner.os }}-bookkeeper-all-${{ hashFiles('**/pom.xml') }} + - name: Set up JDK 11 + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: 11 + - name: Build + run: mvn -q -T 1C -B -nsu clean install -DskipTests -Dcheckstyle.skip -Dspotbugs.skip -Drat.skip -Dmaven.javadoc.skip + - name: Pick ubuntu mirror for the docker image build + run: | + # pick the closest ubuntu mirror and set it to UBUNTU_MIRROR environment variable + $GITHUB_WORKSPACE/dev/ci-tool pick_ubuntu_mirror + - name: Build Test image + run: ./stream/clients/python/docker/build-local-image.sh + - name: Test + run: ./stream/clients/python/scripts/docker_integration_tests.sh + + diff --git a/.github/workflows/bot.yml b/.github/workflows/bot.yml new file mode 100644 index 00000000000..43d61c1179a --- /dev/null +++ b/.github/workflows/bot.yml @@ -0,0 +1,45 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +name: Bot tests +on: + issue_comment: + types: [created] + +jobs: + bot: + name: Bot tests + runs-on: ubuntu-latest + + steps: + - name: clone repository + uses: actions/checkout@v4 + + - name: bot actions + uses: actions/github-script@v7 + env: + PROVIDER: 'apache' + REPOSITORY: 'bookkeeper' + RERUN_CMD: 'rerun failure checks' + with: + github-token: ${{secrets.BKBOT_TOKEN}} + script: | + const path = require('path') + const scriptPath = path.resolve('.github/actions/bot/src/run.js') + require(scriptPath)({core}, {context}, {github}) \ No newline at end of file diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 00000000000..e838fccc36e --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,97 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +name: "CodeQL" + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + schedule: + - cron: '27 11 * * 6' + +jobs: + analyze: + name: Analyze + runs-on: 'ubuntu-latest' + timeout-minutes: 360 + permissions: + # required for all workflows + security-events: write + + # only required for workflows in private repositories + actions: read + contents: read + pull-requests: read + + strategy: + fail-fast: false + matrix: + language: [ 'c-cpp', 'java-kotlin', 'python' ] + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Detect changed files + id: changes + uses: apache/pulsar-test-infra/paths-filter@master + with: + filters: .github/changes-filter.yaml + list-files: csv + + - name: Check changed files + id: check_changes + run: | + echo "docs_only=${{ fromJSON(steps.changes.outputs.all_count) == fromJSON(steps.changes.outputs.docs_count) && fromJSON(steps.changes.outputs.docs_count) > 0 }}" >> $GITHUB_OUTPUT + + - name: Cache local Maven repository + if: steps.check_changes.outputs.docs_only != 'true' + id: cache + uses: actions/cache@v4 + with: + path: | + ~/.m2/repository/*/*/* + !~/.m2/repository/org/apache/bookkeeper + !~/.m2/repository/org/apache/distributedlog + key: ${{ runner.os }}-bookkeeper-all-${{ hashFiles('**/pom.xml') }} + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + + - name: Set up JDK 11 + if: steps.check_changes.outputs.docs_only != 'true' + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: 11 + + - name: Validate pull request + if: steps.check_changes.outputs.docs_only != 'true' + run: | + mvn -T 1C -B -nsu clean install -Ddistributedlog -DskipTests + + - name: Perform CodeQL Analysis + if: steps.check_changes.outputs.docs_only != 'true' + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{matrix.language}}" diff --git a/.github/workflows/dead-link-checker.yaml b/.github/workflows/dead-link-checker.yaml new file mode 100644 index 00000000000..8dae314d409 --- /dev/null +++ b/.github/workflows/dead-link-checker.yaml @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Dead link checker + +on: + push: + pull_request: + branches: + - master + - branch-* + paths: + - '**.md' + +concurrency: + group: dlc-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + check-dead-links: + name: Dead link checker + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v4 + - run: sudo npm install -g markdown-link-check@3.11.2 + - run: | + for file in $(find . -name "*.md"); do + markdown-link-check -c .dlc.json -q "$file" + done \ No newline at end of file diff --git a/.github/workflows/java21-daily-build.yml b/.github/workflows/java21-daily-build.yml new file mode 100644 index 00000000000..fa599a32a33 --- /dev/null +++ b/.github/workflows/java21-daily-build.yml @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: JDK 21 Daily Build + +on: + schedule: + - cron: '0 0 * * *' # Runs at 00:00 UTC every day + workflow_dispatch: + +jobs: + jdk21-daily-build: + name: Build on JDK 21 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up JDK 21 + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: 21 + - name: Build with Maven + run: mvn -B clean install + - name: Aggregates all test reports to ./test-reports and ./surefire-reports directories If failure + if: failure() + continue-on-error: true + uses: ./.github/actions/copy-test-reports + - name: Upload Surefire reports + uses: actions/upload-artifact@v4 + if: failure() + continue-on-error: true + with: + name: jdk21-tests-reports + path: surefire-reports diff --git a/.github/workflows/owasp-daily-build.yml b/.github/workflows/owasp-daily-build.yml new file mode 100644 index 00000000000..2da08c4c945 --- /dev/null +++ b/.github/workflows/owasp-daily-build.yml @@ -0,0 +1,44 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: OWASP Daily Build + +on: + schedule: + - cron: '0 0 * * *' # Runs at 00:00 UTC every day + workflow_dispatch: + +jobs: + owasp-daily-build: + name: OWASP Dependency Check + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Tune Runner VM + uses: ./.github/actions/tune-runner-vm + + - name: Set up JDK 21 + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: 21 + + - name: run "clean install verify" to trigger dependency check + # excluding dlfs because it includes hadoop lib with + # CVEs that we cannot patch up anyway + run: mvn -q -B -ntp clean install verify -Powasp-dependency-check -DskipTests -pl '!stream/distributedlog/io/dlfs,!tests' diff --git a/.github/workflows/website-deploy.yaml b/.github/workflows/website-deploy.yaml new file mode 100644 index 00000000000..868b1db0c7b --- /dev/null +++ b/.github/workflows/website-deploy.yaml @@ -0,0 +1,63 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +name: Website deploy +on: + workflow_dispatch: + push: + branches: + - master + paths: + - 'site3/**' + - '.github/workflows/website-deploy.yaml' + +env: + DEPLOY_URL: "https://bookkeeper.apache.org/" + +jobs: + build-website: + name: Build and deploy the website + if: ${{ github.repository == 'apache/bookkeeper' }} + runs-on: ubuntu-latest + timeout-minutes: 180 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up JDK 11 + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: 11 + + + - name: Setup NodeJS + uses: actions/setup-node@v4 + with: + node-version: '16' + + - name: Setup yarn + run: npm install -g yarn + + - name: Publish + env: + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} + run: | + ./site3/website/scripts/build-website.sh + ./site3/website/scripts/publish-website.sh \ No newline at end of file diff --git a/.github/workflows/website-pr-validation.yml b/.github/workflows/website-pr-validation.yml new file mode 100644 index 00000000000..9d38cb59f28 --- /dev/null +++ b/.github/workflows/website-pr-validation.yml @@ -0,0 +1,57 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +name: Website PR validation + +on: + workflow_dispatch: + pull_request: + branches: + - master + - branch-* + paths: + - 'site3/**' + - '.github/workflows/website-pr-validation.yaml' + +jobs: + website-pull-validation: + runs-on: ubuntu-latest + timeout-minutes: 60 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up JDK 11 + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: 11 + + + - name: Setup NodeJS + uses: actions/setup-node@v4 + with: + node-version: '16' + + - name: Setup yarn + run: npm install -g yarn + + - name: Build website + run: | + ./site3/website/scripts/build-website.sh diff --git a/.github/workflows/windows-daily-build.yml b/.github/workflows/windows-daily-build.yml new file mode 100644 index 00000000000..ff130de29af --- /dev/null +++ b/.github/workflows/windows-daily-build.yml @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Windows Daily Build + +on: + schedule: + - cron: '0 0 * * *' # Runs at 00:00 UTC every day + workflow_dispatch: + +jobs: + windows-daily-build: + name: Daily Build and Test on Windows + runs-on: windows-latest + steps: + - uses: actions/checkout@v4 + - name: Set up JDK 17 + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: 21 + - name: Build with Maven + run: mvn -B clean install + - name: Aggregates all test reports to ./test-reports and ./surefire-reports directories If failure + if: failure() + continue-on-error: true + uses: ./.github/actions/copy-test-reports + - name: Upload Surefire reports + uses: actions/upload-artifact@v4 + if: failure() + continue-on-error: true + with: + name: windows-tests-reports + path: surefire-reports diff --git a/.gitignore b/.gitignore index 4865c98eb45..27b4fba8738 100644 --- a/.gitignore +++ b/.gitignore @@ -4,8 +4,10 @@ .settings/ .recommenders/ -# Intellij +# IntelliJ .idea/ +!.idea/icon.svg +!.idea/vcs.xml *.iml *.iws @@ -29,3 +31,20 @@ logs/ # Data directory data/ + +# Pid files +**/*.pid + +# files are generated under following directories +tools/all/src/main/resources + +# Exclude versionBackup file (generated by `mvn versions:set`) +**/*.versionsBackup + +node_modules +package-lock.json +# gradle +build/ +.gradle/ +*.log +*.dat \ No newline at end of file diff --git a/.idea/icon.svg b/.idea/icon.svg new file mode 100644 index 00000000000..f1603840ef2 --- /dev/null +++ b/.idea/icon.svg @@ -0,0 +1,70 @@ + + + + + Produced by OmniGraffle 7.4.2 + 2017-09-12 22:10:18 +0000 + + + + + + + + + + + + + + + + + + + + Canvas 1 + + Layer 1 + + + + + + + + + + + + + + + + + + + + + + + diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 00000000000..fd1e88b2cd2 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,36 @@ + + + + + + + + + + diff --git a/.test-infra/jenkins/common_job_properties.groovy b/.test-infra/jenkins/common_job_properties.groovy deleted file mode 100644 index 999f9db509a..00000000000 --- a/.test-infra/jenkins/common_job_properties.groovy +++ /dev/null @@ -1,263 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Contains functions that help build Jenkins projects. Functions typically set -// common properties that are shared among all Jenkins projects. -// Code in this directory should conform to the Groovy style guide. -// http://groovy-lang.org/style-guide.html -class common_job_properties { - - // Sets common top-level job properties for website repository jobs. - static void setTopLevelWebsiteJobProperties(context, - String branch = 'master') { - // GitHub project. - context.properties { - githubProjectUrl('https://jenkins@github.com/apache/bookkeeper/') - } - - setTopLevelJobProperties( - context, - 'https://gitbox.apache.org/repos/asf/bookkeeper.git', - branch, - 'git-websites', - 30) - } - - // Sets common top-level job properties for main repository jobs. - static void setTopLevelMainJobProperties(context, - String branch = 'master', - String jdkVersion = 'JDK 1.8 (latest)', - int timeout = 200, - String jenkinsExecutorLabel = 'ubuntu', - String branchVarName = '${sha1}') { - // GitHub project. - context.properties { - githubProjectUrl('https://github.com/apache/bookkeeper/') - } - - - setTopLevelJobProperties( - context, - 'https://github.com/apache/bookkeeper.git', - branch, - jenkinsExecutorLabel, - timeout, - jdkVersion, - branchVarName) - } - - // Sets common top-level job properties. Accessed through one of the above - // methods to protect jobs from internal details of param defaults. - private static void setTopLevelJobProperties(context, - String scmUrl, - String defaultBranch, - String jenkinsExecutorLabel, - int defaultTimeout, - String jdkVersion = 'JDK 1.8 (latest)', - String branchVarName = '${sha1}') { - // Set JDK version. - context.jdk(jdkVersion) - - // Restrict this project to run only on Jenkins executors as specified - context.label(jenkinsExecutorLabel) - - // Discard old builds. Build records are only kept up to this number of days. - context.logRotator { - daysToKeep(14) - } - - // Source code management. - context.scm { - git { - remote { - url(scmUrl) - refspec('+refs/heads/*:refs/remotes/origin/* ' + - '+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*') - } - branch(branchVarName) - extensions { - cleanAfterCheckout() - } - } - } - - // add the parameter when branch var name is `sha1` - if (branchVarName == '${sha1}') { - context.parameters { - // This is a recommended setup if you want to run the job manually. The - // ${sha1} parameter needs to be provided, and defaults to the main branch. - stringParam( - 'sha1', - defaultBranch, - 'Commit id or refname (eg: origin/pr/9/head) you want to build.') - } - } - - context.wrappers { - // Abort the build if it's stuck for more minutes than specified. - timeout { - absolute(defaultTimeout) - abortBuild() - } - - credentialsBinding { - string("COVERALLS_REPO_TOKEN", "bookkeeper-coveralls-token") - usernamePassword('DOCKER_USER', 'DOCKER_PASSWORD', 'bookkeeper_dockerhub') - } - } - } - - // Sets the pull request build trigger. Accessed through precommit methods - // below to insulate callers from internal parameter defaults. - private static void setPullRequestBuildTrigger(context, - String commitStatusContext, - String prTriggerPhrase = '', - String prSkipBuildPhrase = '', - boolean onlyMaster = false) { - context.triggers { - githubPullRequest { - admins(['asfbot']) - useGitHubHooks() - orgWhitelist(['apache']) - allowMembersOfWhitelistedOrgsAsAdmin() - permitAll() - // prTriggerPhrase is the argument which gets set when we want to allow - // post-commit builds to run against pending pull requests. This block - // overrides the default trigger phrase with the new one. Setting this - // will disable automatic invocation of this build; the phrase will be - // required to start it. - if (prTriggerPhrase) { - triggerPhrase(prTriggerPhrase) - } - if (prSkipBuildPhrase) { - skipBuildPhrase(prSkipBuildPhrase) - } - if (onlyMaster) { - whiteListTargetBranches(['master']) - } - - extensions { - commitStatus { - // This is the name that will show up in the GitHub pull request UI - // for this Jenkins project. - delegate.context("Jenkins: " + commitStatusContext) - } - } - } - } - // Comment messages after build completes. - context.configure { - def messages = it / triggers / 'org.jenkinsci.plugins.ghprb.GhprbTrigger' / extensions / 'org.jenkinsci.plugins.ghprb.extensions.comments.GhprbBuildStatus' / messages - messages << 'org.jenkinsci.plugins.ghprb.extensions.comments.GhprbBuildResultMessage' { - message('--none--') - result('SUCCESS') - } - messages << 'org.jenkinsci.plugins.ghprb.extensions.comments.GhprbBuildResultMessage' { - message('--none--') - result('ERROR') - } - messages << 'org.jenkinsci.plugins.ghprb.extensions.comments.GhprbBuildResultMessage' { - message('--none--') - result('FAILURE') - } - } - } - - // Sets common config for Maven jobs. - static void setMavenConfig(context, mavenInstallation='Maven 3.5.0', mavenOpts='-Xmx4096m -Xms2048m') { - context.mavenInstallation(mavenInstallation) - context.mavenOpts('-Dorg.slf4j.simpleLogger.showDateTime=true') - context.mavenOpts('-Dorg.slf4j.simpleLogger.dateTimeFormat=yyyy-MM-dd\\\'T\\\'HH:mm:ss.SSS') - // The -XX:+TieredCompilation -XX:TieredStopAtLevel=1 JVM options enable - // tiered compilation to make the JVM startup times faster during the tests. - context.mavenOpts('-XX:+TieredCompilation') - context.mavenOpts('-XX:TieredStopAtLevel=1') - context.mavenOpts(mavenOpts) - context.rootPOM('pom.xml') - // Use a repository local to the workspace for better isolation of jobs. - context.localRepository(LocalRepositoryLocation.LOCAL_TO_WORKSPACE) - // Disable archiving the built artifacts by default, as this is slow and flaky. - // We can usually recreate them easily, and we can also opt-in individual jobs - // to artifact archiving. - if (context.metaClass.respondsTo(context, 'archivingDisabled', boolean)) { - context.archivingDisabled(true) - } - } - - // Sets common config for PreCommit jobs. - static void setPreCommit(context, - String commitStatusName, - String prTriggerPhrase = '', - String prSkipBuildPhrase = '', - boolean onlyMaster = false) { - // Set pull request build trigger. - setPullRequestBuildTrigger(context, commitStatusName, prTriggerPhrase, prSkipBuildPhrase, onlyMaster) - } - - // Enable triggering postcommit runs against pull requests. Users can comment the trigger phrase - // specified in the postcommit job and have the job run against their PR to run - // tests not in the presubmit suite for additional confidence. - static void enablePhraseTriggeringFromPullRequest(context, - String commitStatusName, - String prTriggerPhrase) { - setPullRequestBuildTrigger( - context, - commitStatusName, - prTriggerPhrase) - } - - // Sets common config for PostCommit jobs. - static void setPostCommit(context, - String buildSchedule = '0 */6 * * *', - boolean triggerEveryPush = true, - String notifyAddress = 'issues@bookkeeper.apache.org', - boolean emailIndividuals = true) { - // Set build triggers - context.triggers { - // By default runs every 6 hours. - cron(buildSchedule) - if (triggerEveryPush) { - githubPush() - } - } - - context.publishers { - // Notify an email address for each failed build (defaults to commits@). - mailer(notifyAddress, false, emailIndividuals) - } - } - - // Sets common config for Website PostCommit jobs. - static void setWebsitePostCommit(context, - String buildSchedule = 'H 1 * * *', - String notifyAddress = 'issues@bookkeeper.apache.org', - boolean emailIndividuals = true) { - // Set build triggers - context.triggers { - // By default runs every 6 hours. - scm(buildSchedule) - githubPush() - } - - context.publishers { - // Notify an email address for each failed build (defaults to commits@). - mailer(notifyAddress, false, emailIndividuals) - } - } - -} diff --git a/.test-infra/jenkins/jenkins_testing_job_seed.groovy b/.test-infra/jenkins/jenkins_testing_job_seed.groovy deleted file mode 100644 index a4cb50ae40f..00000000000 --- a/.test-infra/jenkins/jenkins_testing_job_seed.groovy +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -job('bookkeeper-jenkins-testing-seed') { - description('Seed job, which allows DSL jobs to be tested before being pushed for review') - - // Source code management. - scm { - git { - remote { - url('${gitrepo}') - refspec('+refs/heads/*:refs/remotes/origin/*') - } - branch('${sha1}') - extensions { - cleanAfterCheckout() - } - } - } - - parameters { - stringParam( - 'gitrepo', 'https://github.com/apache/bookkeeper/', 'Repo to clone') - - stringParam( - 'sha1', - 'master', - 'Commit id or refname (eg: origin/pr/9/head) you want to build.') - } - - steps { - dsl { - // A list or a glob of other groovy files to process. - external('.test-infra/jenkins/jenkins_testing_job_*.groovy') - lookupStrategy('SEED_JOB') - // If a job is removed from the script, delete it - removeAction('DELETE') - } - } -} diff --git a/.test-infra/jenkins/job_bookkeeper_codecoverage.groovy b/.test-infra/jenkins/job_bookkeeper_codecoverage.groovy deleted file mode 100644 index 07635497d04..00000000000 --- a/.test-infra/jenkins/job_bookkeeper_codecoverage.groovy +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import common_job_properties - -// This job deploys a snapshot of latest master to artifactory nightly -mavenJob('bookkeeper_codecoverage') { - description('runs a `mvn clean verify` of the nightly snapshot for bookkeeper,\n\ - running tests and gathering code coverage metrics.') - - // clean up the workspace before build - wrappers { preBuildCleanup() } - - // Set common parameters. - common_job_properties.setTopLevelMainJobProperties(delegate) - - // Sets that this is a PostCommit job. - common_job_properties.setPostCommit( - delegate, - 'H 12 * * *', - false) - - // Set maven parameters. - common_job_properties.setMavenConfig(delegate) - - // Maven build project. - goals('clean verify jacoco:report coveralls:report -Pcode-coverage -DrepoToken=$COVERALLS_REPO_TOKEN -Dmaven.test.failure.ignore=true -Dstream') -} diff --git a/.test-infra/jenkins/job_bookkeeper_postcommit_master_java8.groovy b/.test-infra/jenkins/job_bookkeeper_postcommit_master_java8.groovy deleted file mode 100644 index bb5a5d5cf08..00000000000 --- a/.test-infra/jenkins/job_bookkeeper_postcommit_master_java8.groovy +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import common_job_properties - -// This job runs the Java postcommit tests on Java 8 -mavenJob('bookkeeper_postcommit_master_java8') { - description('Runs nightly build for bookkeeper in Java 8.') - - // clean up the workspace before build - wrappers { preBuildCleanup() } - - // Set common parameters. - common_job_properties.setTopLevelMainJobProperties( - delegate, 'master', 'JDK 1.8 (latest)') - - // Sets that this is a PostCommit job. - common_job_properties.setPostCommit( - delegate, - 'H 12 * * *', - false) - - // Set maven parameters. - common_job_properties.setMavenConfig(delegate) - - // Maven build project. - goals('clean package spotbugs:check -Ddistributedlog -Dstream -DstreamTests') -} diff --git a/.test-infra/jenkins/job_bookkeeper_postcommit_master_java9.groovy b/.test-infra/jenkins/job_bookkeeper_postcommit_master_java9.groovy deleted file mode 100644 index 6669104c93a..00000000000 --- a/.test-infra/jenkins/job_bookkeeper_postcommit_master_java9.groovy +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import common_job_properties - -// This job runs the Java postcommit tests on Java 9 -mavenJob('bookkeeper_postcommit_master_java9') { - description('Runs nightly build for bookkeeper in Java 9.') - - // clean up the workspace before build - wrappers { preBuildCleanup() } - - // Set common parameters. - common_job_properties.setTopLevelMainJobProperties( - delegate, 'master', 'JDK 1.9 (latest)') - - // Sets that this is a PostCommit job. - common_job_properties.setPostCommit( - delegate, - 'H 12 * * *', - false) - - // Set maven parameters. - common_job_properties.setMavenConfig(delegate) - - // Maven build project. - goals('clean package spotbugs:check -Ddistributedlog -Dstream -DstreamTests') -} diff --git a/.test-infra/jenkins/job_bookkeeper_postcommit_validation_master.groovy b/.test-infra/jenkins/job_bookkeeper_postcommit_validation_master.groovy deleted file mode 100644 index f7005b3c469..00000000000 --- a/.test-infra/jenkins/job_bookkeeper_postcommit_validation_master.groovy +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import common_job_properties - -// This job runs the Java postcommit validation on master branch -mavenJob('bookkeeper_postcommit_validation_master') { - description('Runs postcommit validation nightly for bookkeeper.') - - // clean up the workspace before build - wrappers { preBuildCleanup() } - - // Set common parameters. - common_job_properties.setTopLevelMainJobProperties( - delegate, 'master', 'JDK 1.8 (latest)') - - // Sets that this is a PostCommit job. - common_job_properties.setPostCommit( - delegate, - 'H 12 * * *', - false) - - // Set maven parameters. - common_job_properties.setMavenConfig(delegate) - - // Maven build project. - goals('clean apache-rat:check checkstyle:check package -Ddistributedlog -Dstream -DskipTests') -} diff --git a/.test-infra/jenkins/job_bookkeeper_postcommit_website.groovy b/.test-infra/jenkins/job_bookkeeper_postcommit_website.groovy deleted file mode 100644 index f99cb42e6d9..00000000000 --- a/.test-infra/jenkins/job_bookkeeper_postcommit_website.groovy +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import common_job_properties - -// This job builds and publishes the website -job('bookkeeper_postcommit_publish_website') { - description('Publish website to asf-site branch') - - // clean up the workspace before build - wrappers { preBuildCleanup() } - - // Set common parameters. - common_job_properties.setTopLevelWebsiteJobProperties(delegate) - - // Sets that this is a WebsitePostCommit job. - common_job_properties.setWebsitePostCommit(delegate) - - steps { - // Run the following shell script as a build step. - shell ''' -export MAVEN_HOME=/home/jenkins/tools/maven/latest -export PATH=$JAVA_HOME/bin:$MAVEN_HOME/bin:$PATH -export MAVEN_OPTS=-Xmx2048m -export JEKYLL_ENV=production - -# CD site/ -cd site - -# Build the javadoc -make clean - -# generate javadoc -make javadoc - -# run the docker image to build the website -./docker/ci.sh - -# publish website -source scripts/common.sh - -ORIGIN_REPO=$(git remote show origin | grep 'Push URL' | awk -F// '{print $NF}') -echo "ORIGIN_REPO: $ORIGIN_REPO" - -( - cd $APACHE_GENERATED_DIR - - rm -rf $TMP_DIR - mkdir -p $TMP_DIR - cd $TMP_DIR - - # clone the remote repo - git clone "https://$ORIGIN_REPO" . - git config user.name "Apache BookKeeper Site Updater" - git config user.email "dev@bookkeeper.apache.org" - git fetch origin - git checkout asf-site - git log | head - # copy the apache generated dir - cp -r $APACHE_GENERATED_DIR/content/* $TMP_DIR/content - - git add -A . - git diff-index --quiet HEAD || (git commit -m "Updated site at revision $REVISION" && (git log | head) && git push -q origin HEAD:asf-site) - - rm -rf $TMP_DIR -) - '''.stripIndent().trim() - } -} diff --git a/.test-infra/jenkins/job_bookkeeper_precommit_bookie_tests.groovy b/.test-infra/jenkins/job_bookkeeper_precommit_bookie_tests.groovy deleted file mode 100644 index f0e9ec085e4..00000000000 --- a/.test-infra/jenkins/job_bookkeeper_precommit_bookie_tests.groovy +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import common_job_properties - -// test `org.apache.bookkeeper.bookie.**` -freeStyleJob('bookkeeper_precommit_bookie_tests') { - description('Run bookkeeper bookie tests in Java 8.') - - // clean up the workspace before build - wrappers { preBuildCleanup() } - - // Execute concurrent builds if necessary. - concurrentBuild() - - // Set common parameters. - common_job_properties.setTopLevelMainJobProperties( - delegate, - 'master', - 'JDK 1.8 (latest)', - 200, - 'ubuntu', - '${ghprbActualCommit}') - - // Sets that this is a PreCommit job. - common_job_properties.setPreCommit( - delegate, - 'Bookie Tests', - '.*(re)?run bookkeeper-server (bookie )?tests.*', - '.*\\[x\\] \\[skip bookkeeper-server (bookie )?tests\\].*', - true) - - steps { - // Temporary information gathering to see if full disks are causing the builds to flake - shell("id") - shell("ulimit -a") - shell("pwd") - shell("df -h") - shell("ps aux") - - // Build everything - maven { - // Set Maven parameters. - common_job_properties.setMavenConfig(delegate) - - goals('-B -am -pl bookkeeper-server clean install -DskipTests') - } - - // Test the package - maven { - // Set Maven parameters. - common_job_properties.setMavenConfig(delegate) - - goals('-pl bookkeeper-server test -Dtest="org.apache.bookkeeper.bookie.**"') - } - } - -} diff --git a/.test-infra/jenkins/job_bookkeeper_precommit_client_tests.groovy b/.test-infra/jenkins/job_bookkeeper_precommit_client_tests.groovy deleted file mode 100644 index b09607c51bd..00000000000 --- a/.test-infra/jenkins/job_bookkeeper_precommit_client_tests.groovy +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import common_job_properties - -// test `org.apache.bookkeeper.client.**` -freeStyleJob('bookkeeper_precommit_client_tests') { - description('Run bookkeeper client tests in Java 8.') - - // clean up the workspace before build - wrappers { preBuildCleanup() } - - // Execute concurrent builds if necessary. - concurrentBuild() - - // Set common parameters. - common_job_properties.setTopLevelMainJobProperties( - delegate, - 'master', - 'JDK 1.8 (latest)', - 200, - 'ubuntu', - '${ghprbActualCommit}') - - // Sets that this is a PreCommit job. - common_job_properties.setPreCommit( - delegate, - 'Client Tests', - '.*(re)?run bookkeeper-server (client )?tests.*', - '.*\\[x\\] \\[skip bookkeeper-server (client )?tests\\].*', - true) - - steps { - // Temporary information gathering to see if full disks are causing the builds to flake - shell("id") - shell("ulimit -a") - shell("pwd") - shell("df -h") - shell("ps aux") - - // Build everything - maven { - // Set Maven parameters. - common_job_properties.setMavenConfig(delegate) - - goals('-B -am -pl bookkeeper-server clean install -DskipTests') - } - - // Test the package - maven { - // Set Maven parameters. - common_job_properties.setMavenConfig(delegate) - - goals('-pl bookkeeper-server test -Dtest="org.apache.bookkeeper.client.**"') - } - } - -} diff --git a/.test-infra/jenkins/job_bookkeeper_precommit_integrationtests.groovy b/.test-infra/jenkins/job_bookkeeper_precommit_integrationtests.groovy deleted file mode 100644 index f070822bee9..00000000000 --- a/.test-infra/jenkins/job_bookkeeper_precommit_integrationtests.groovy +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import common_job_properties - -// This is the Java precommit which runs a maven install, and the current set of precommit tests. -freeStyleJob('bookkeeper_precommit_integrationtests') { - description('precommit integration test verification for pull requests of Apache BookKeeper.') - - // clean up the workspace before build - wrappers { preBuildCleanup() } - - // Set common parameters. - common_job_properties.setTopLevelMainJobProperties( - delegate, - 'master', - 'JDK 1.8 (latest)', - 200, - 'ubuntu', - '${ghprbActualCommit}') - - throttleConcurrentBuilds { - // limit builds to 1 per node to avoid conflicts on building docker images - maxPerNode(1) - } - - // Sets that this is a PreCommit job. - common_job_properties.setPreCommit( - delegate, - 'Integration Tests', - '.*(re)?run integration tests.*', - '.*\\[x\\] \\[skip integration tests\\].*') - - steps { - // Temporary information gathering to see if full disks are causing the builds to flake - shell('id') - shell('ulimit -a') - shell('pwd') - shell('df -h') - shell('ps -eo euser,pid,ppid,pgid,start,pcpu,pmem,cmd') - shell('docker network prune -f --filter name=testnetwork_*') // clean up any dangling networks from previous runs - shell('docker system events > docker.log & echo $! > docker-log.pid') - - shell('docker pull apachebookkeeper/bookkeeper-all-released-versions:latest') - - // Build everything - maven { - // Set Maven parameters. - common_job_properties.setMavenConfig(delegate) - - goals('-B clean install -Dstream -Pdocker') - properties(skipTests: true, interactiveMode: false) - } - - maven { - // Set Maven parameters. - common_job_properties.setMavenConfig(delegate) - rootPOM('tests/pom.xml') - goals('-B test -Dstream -DintegrationTests') - } - - shell('kill $(cat docker-log.pid) || true') - } - - publishers { - archiveArtifacts { - allowEmpty(true) - pattern('**/target/container-logs/**') - pattern('docker.log') - } - archiveJunit('**/surefire-reports/TEST-*.xml') - } -} diff --git a/.test-infra/jenkins/job_bookkeeper_precommit_java8.groovy b/.test-infra/jenkins/job_bookkeeper_precommit_java8.groovy deleted file mode 100644 index 67d7646432e..00000000000 --- a/.test-infra/jenkins/job_bookkeeper_precommit_java8.groovy +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import common_job_properties - -// This is the Java precommit which runs a maven install, and the current set of precommit tests. -mavenJob('bookkeeper_precommit_pullrequest_java8') { - description('precommit verification for pull requests of Apache BookKeeper in Java 8.') - - // clean up the workspace before build - wrappers { preBuildCleanup() } - - // Temporary information gathering to see if full disks are causing the builds to flake - preBuildSteps { - shell("id") - shell("ulimit -a") - shell("pwd") - shell("df -h") - shell("ps aux") - } - - // Execute concurrent builds if necessary. - concurrentBuild() - - // Set common parameters. - common_job_properties.setTopLevelMainJobProperties( - delegate, - 'master', - 'JDK 1.8 (latest)', - 200, - 'ubuntu', - '${ghprbActualCommit}') - - // Sets that this is a PreCommit job. - common_job_properties.setPreCommit( - delegate, - 'Build (Java 8)', - '.*(re)?build java8.*', - '.*\\[x\\] \\[skip build java8\\].*') - - // Set Maven parameters. - common_job_properties.setMavenConfig(delegate) - - // Maven build project - goals('clean package spotbugs:check -Dstream -DskipBookKeeperServerTests') -} diff --git a/.test-infra/jenkins/job_bookkeeper_precommit_java9.groovy b/.test-infra/jenkins/job_bookkeeper_precommit_java9.groovy deleted file mode 100644 index 43cb02b69f0..00000000000 --- a/.test-infra/jenkins/job_bookkeeper_precommit_java9.groovy +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import common_job_properties - -// This is the Java precommit which runs a maven install, and the current set of precommit tests. -mavenJob('bookkeeper_precommit_pullrequest_java9') { - description('precommit verification for pull requests of Apache BookKeeper in Java 9.') - - // clean up the workspace before build - wrappers { preBuildCleanup() } - - // Temporary information gathering to see if full disks are causing the builds to flake - preBuildSteps { - shell("id") - shell("ulimit -a") - shell("pwd") - shell("df -h") - shell("ps aux") - } - - // Execute concurrent builds if necessary. - concurrentBuild() - - // Set common parameters. - common_job_properties.setTopLevelMainJobProperties( - delegate, - 'master', - 'JDK 1.9 (latest)', - 200, - 'ubuntu', - '${ghprbActualCommit}') - - // Sets that this is a PreCommit job. - common_job_properties.setPreCommit( - delegate, - 'Build (Java 9)', - '.*(re)?build java9.*', - '.*\\[x\\] \\[skip build java9\\].*') - - // Set Maven parameters. - common_job_properties.setMavenConfig(delegate) - - // Maven build project - goals('clean package spotbugs:check -Dstream -DskipBookKeeperServerTests') -} diff --git a/.test-infra/jenkins/job_bookkeeper_precommit_remaining_tests.groovy b/.test-infra/jenkins/job_bookkeeper_precommit_remaining_tests.groovy deleted file mode 100644 index d57605bdf96..00000000000 --- a/.test-infra/jenkins/job_bookkeeper_precommit_remaining_tests.groovy +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import common_job_properties - -// run tests except: -// - `org.apache.bookkeeper.client.**` -// - `org.apache.bookkeeper.bookie.**` -// - `org.apache.bookkeeper.replication.**` -// - `org.apache.bookkeeper.tls.**` -freeStyleJob('bookkeeper_precommit_remaining_tests') { - description('Run bookkeeper remaining tests in Java 8.') - - // clean up the workspace before build - wrappers { preBuildCleanup() } - - // Execute concurrent builds if necessary. - concurrentBuild() - - // Set common parameters. - common_job_properties.setTopLevelMainJobProperties( - delegate, - 'master', - 'JDK 1.8 (latest)', - 200, - 'ubuntu', - '${ghprbActualCommit}') - - // Sets that this is a PreCommit job. - common_job_properties.setPreCommit( - delegate, - 'All Other Tests', - '.*(re)?run bookkeeper-server (remaining )?tests.*', - '.*\\[x\\] \\[skip bookkeeper-server (remaining )?tests\\].*', - true) - - steps { - // Temporary information gathering to see if full disks are causing the builds to flake - shell("id") - shell("ulimit -a") - shell("pwd") - shell("df -h") - shell("ps aux") - - // Build everything - maven { - // Set Maven parameters. - common_job_properties.setMavenConfig(delegate) - - goals('-B -am -pl bookkeeper-server clean install -DskipTests') - } - - // Test the package - maven { - // Set Maven parameters. - common_job_properties.setMavenConfig(delegate) - - goals('-pl bookkeeper-server test -Dtest="!org.apache.bookkeeper.client.**,!org.apache.bookkeeper.bookie.**,!org.apache.bookkeeper.replication.**,!org.apache.bookkeeper.tls.**"') - } - } - -} diff --git a/.test-infra/jenkins/job_bookkeeper_precommit_replication_tests.groovy b/.test-infra/jenkins/job_bookkeeper_precommit_replication_tests.groovy deleted file mode 100644 index 61893bac041..00000000000 --- a/.test-infra/jenkins/job_bookkeeper_precommit_replication_tests.groovy +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import common_job_properties - -// test `org.apache.bookkeeper.replication.**` -freeStyleJob('bookkeeper_precommit_replication_tests') { - description('Run bookkeeper replication tests in Java 8.') - - // clean up the workspace before build - wrappers { preBuildCleanup() } - - // Execute concurrent builds if necessary. - concurrentBuild() - - // Set common parameters. - common_job_properties.setTopLevelMainJobProperties( - delegate, - 'master', - 'JDK 1.8 (latest)', - 200, - 'ubuntu', - '${ghprbActualCommit}') - - // Sets that this is a PreCommit job. - common_job_properties.setPreCommit( - delegate, - 'Replication Tests', - '.*(re)?run bookkeeper-server (replication )?tests.*', - '.*\\[x\\] \\[skip bookkeeper-server (replication )?tests\\].*', - true) - - steps { - // Temporary information gathering to see if full disks are causing the builds to flake - shell("id") - shell("ulimit -a") - shell("pwd") - shell("df -h") - shell("ps aux") - - // Build everything - maven { - // Set Maven parameters. - common_job_properties.setMavenConfig(delegate) - - goals('-B -am -pl bookkeeper-server clean install -DskipTests') - } - - // Test the package - maven { - // Set Maven parameters. - common_job_properties.setMavenConfig(delegate) - - goals('-pl bookkeeper-server test -Dtest="org.apache.bookkeeper.replication.**"') - } - } - -} diff --git a/.test-infra/jenkins/job_bookkeeper_precommit_tls_tests.groovy b/.test-infra/jenkins/job_bookkeeper_precommit_tls_tests.groovy deleted file mode 100644 index 1adc19e89a4..00000000000 --- a/.test-infra/jenkins/job_bookkeeper_precommit_tls_tests.groovy +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import common_job_properties - -// test `org.apache.bookkeeper.tls.**` -freeStyleJob('bookkeeper_precommit_tls_tests') { - description('Run bookkeeper tls tests in Java 8.') - - // clean up the workspace before build - wrappers { preBuildCleanup() } - - // Execute concurrent builds if necessary. - concurrentBuild() - - // Set common parameters. - common_job_properties.setTopLevelMainJobProperties( - delegate, - 'master', - 'JDK 1.8 (latest)', - 200, - 'ubuntu', - '${ghprbActualCommit}') - - // Sets that this is a PreCommit job. - common_job_properties.setPreCommit( - delegate, - 'TLS Tests', - '.*(re)?run bookkeeper-server (tls )?tests.*', - '.*\\[x\\] \\[skip bookkeeper-server (tls )?tests\\].*', - true) - - steps { - // Temporary information gathering to see if full disks are causing the builds to flake - shell("id") - shell("ulimit -a") - shell("pwd") - shell("df -h") - shell("ps aux") - - // Build everything - maven { - // Set Maven parameters. - common_job_properties.setMavenConfig(delegate) - - goals('-B -am -pl bookkeeper-server clean install -DskipTests') - } - - // Test the package - maven { - // Set Maven parameters. - common_job_properties.setMavenConfig(delegate) - - goals('-pl bookkeeper-server test -Dtest="org.apache.bookkeeper.tls.**"') - } - } - -} diff --git a/.test-infra/jenkins/job_bookkeeper_precommit_validation.groovy b/.test-infra/jenkins/job_bookkeeper_precommit_validation.groovy deleted file mode 100644 index 57a2791ce5b..00000000000 --- a/.test-infra/jenkins/job_bookkeeper_precommit_validation.groovy +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import common_job_properties - -// This is the Java precommit validation job that validates pull requests (e.g. checkstyle) -mavenJob('bookkeeper_precommit_pullrequest_validation') { - description('precommit validation for pull requests of Apache BookKeeper.') - - // clean up the workspace before build - wrappers { preBuildCleanup() } - - // Execute concurrent builds if necessary. - concurrentBuild() - - // Set common parameters. - common_job_properties.setTopLevelMainJobProperties( - delegate, - 'master', - 'JDK 1.8 (latest)', - 200, - 'ubuntu', - '${ghprbActualCommit}') - - // Sets that this is a PreCommit job. - common_job_properties.setPreCommit( - delegate, - 'PR Validation', - '.*(re)?run pr validation.*', - '', - true) - - // Set Maven parameters. - common_job_properties.setMavenConfig(delegate) - - // Maven build project - goals('clean apache-rat:check checkstyle:check package -Ddistributedlog -Dstream -DskipTests') -} diff --git a/.test-infra/jenkins/job_bookkeeper_release_branch_46.groovy b/.test-infra/jenkins/job_bookkeeper_release_branch_46.groovy deleted file mode 100644 index 0fd822898ca..00000000000 --- a/.test-infra/jenkins/job_bookkeeper_release_branch_46.groovy +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import common_job_properties - -// This job runs nightly build for bookkeeper release branch-4.6 -mavenJob('bookkeeper_release_branch_46') { - description('Run nightly build for bookkeeper release branch-4.6') - - // Set common parameters. - common_job_properties.setTopLevelMainJobProperties( - delegate, - "branch-4.6") - - // Sets that this is a PostCommit job. - common_job_properties.setPostCommit( - delegate, - 'H 12 * * *', - false) - - // Set maven parameters. - common_job_properties.setMavenConfig(delegate) - - // Maven build project. - goals('clean apache-rat:check package findbugs:check') -} diff --git a/.test-infra/jenkins/job_bookkeeper_release_branch_47_integrationtests.groovy b/.test-infra/jenkins/job_bookkeeper_release_branch_47_integrationtests.groovy deleted file mode 100644 index e6fe9be7889..00000000000 --- a/.test-infra/jenkins/job_bookkeeper_release_branch_47_integrationtests.groovy +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import common_job_properties - -// This is the Java postcommit which runs a maven install, and the current set of precommit tests. -freeStyleJob('bookkeeper_release_branch_47_integrationtests') { - description('Run integration tests on bookkeeper branch-4.7.') - - // Set common parameters. - common_job_properties.setTopLevelMainJobProperties( - delegate, - 'branch-4.7', - 'JDK 1.8 (latest)', - 120) - - // Sets that this is a PostCommit job. - common_job_properties.setPostCommit( - delegate, - 'H 12 * * *', - false) - - steps { - // Temporary information gathering to see if full disks are causing the builds to flake - shell('id') - shell('ulimit -a') - shell('pwd') - shell('df -h') - shell('ps -eo euser,pid,ppid,pgid,start,pcpu,pmem,cmd') - shell('docker network prune -f --filter name=testnetwork_*') // clean up any dangling networks from previous runs - shell('docker system events > docker.log & echo $! > docker-log.pid') - - shell('docker pull apachebookkeeper/bookkeeper-all-released-versions:latest') - - // Build everything - maven { - // Set Maven parameters. - common_job_properties.setMavenConfig(delegate) - - goals('-B clean install -Pdocker') - properties(skipTests: true, interactiveMode: false) - } - - maven { - // Set Maven parameters. - common_job_properties.setMavenConfig(delegate) - rootPOM('tests/pom.xml') - goals('-B test -DintegrationTests') - } - - shell('kill $(cat docker-log.pid) || true') - } - - publishers { - archiveArtifacts { - allowEmpty(true) - pattern('**/target/container-logs/**') - pattern('docker.log') - } - archiveJunit('**/surefire-reports/TEST-*.xml') - } -} diff --git a/.test-infra/jenkins/job_bookkeeper_release_branch_47_java8.groovy b/.test-infra/jenkins/job_bookkeeper_release_branch_47_java8.groovy deleted file mode 100644 index 7dc7e78638e..00000000000 --- a/.test-infra/jenkins/job_bookkeeper_release_branch_47_java8.groovy +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import common_job_properties - -// This job runs the Java postcommit tests on Java 8 for branch-4.7 -mavenJob('bookkeeper_release_branch_47_java8') { - description('Runs nightly build for bookkeeper branch-4.7 in Java 8.') - - // Set common parameters. - common_job_properties.setTopLevelMainJobProperties( - delegate, 'branch-4.7', 'JDK 1.8 (latest)') - - // Sets that this is a PostCommit job. - common_job_properties.setPostCommit( - delegate, - 'H 12 * * *', - false) - - // Set maven parameters. - common_job_properties.setMavenConfig(delegate) - - // Maven build project. - goals('clean apache-rat:check package spotbugs:check -Ddistributedlog -Dstream -DstreamTests') -} diff --git a/.test-infra/jenkins/job_bookkeeper_release_branch_47_java9.groovy b/.test-infra/jenkins/job_bookkeeper_release_branch_47_java9.groovy deleted file mode 100644 index 4feaa2fb544..00000000000 --- a/.test-infra/jenkins/job_bookkeeper_release_branch_47_java9.groovy +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import common_job_properties - -// This job runs the Java postcommit tests on Java 9 for branch-4.7 -mavenJob('bookkeeper_release_branch_47_java9') { - description('Runs nightly build for bookkeeper branch-4.7 in Java 9.') - - // Set common parameters. - common_job_properties.setTopLevelMainJobProperties( - delegate, 'branch-4.7', 'JDK 1.9 (latest)') - - // Sets that this is a PostCommit job. - common_job_properties.setPostCommit( - delegate, - 'H 12 * * *', - false) - - // Set maven parameters. - common_job_properties.setMavenConfig(delegate) - - // Maven build project. - goals('clean apache-rat:check package spotbugs:check -Ddistributedlog -Dstream -DstreamTests') -} diff --git a/.test-infra/jenkins/job_bookkeeper_release_branch_48_integrationtests.groovy b/.test-infra/jenkins/job_bookkeeper_release_branch_48_integrationtests.groovy deleted file mode 100644 index 0db8a88c13f..00000000000 --- a/.test-infra/jenkins/job_bookkeeper_release_branch_48_integrationtests.groovy +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import common_job_properties - -// This is the Java postcommit which runs a maven install, and the current set of precommit tests. -freeStyleJob('bookkeeper_release_branch_48_integrationtests') { - description('Run integration tests on bookkeeper branch-4.8.') - - // Set common parameters. - common_job_properties.setTopLevelMainJobProperties( - delegate, - 'branch-4.8', - 'JDK 1.8 (latest)', - 120) - - // Sets that this is a PostCommit job. - common_job_properties.setPostCommit( - delegate, - 'H 12 * * *', - false) - - steps { - // Temporary information gathering to see if full disks are causing the builds to flake - shell('id') - shell('ulimit -a') - shell('pwd') - shell('df -h') - shell('ps -eo euser,pid,ppid,pgid,start,pcpu,pmem,cmd') - shell('docker network prune -f --filter name=testnetwork_*') // clean up any dangling networks from previous runs - shell('docker system events > docker.log & echo $! > docker-log.pid') - - shell('docker pull apachebookkeeper/bookkeeper-all-released-versions:latest') - - // Build everything - maven { - // Set Maven parameters. - common_job_properties.setMavenConfig(delegate) - - goals('-B clean install -Pdocker') - properties(skipTests: true, interactiveMode: false) - } - - maven { - // Set Maven parameters. - common_job_properties.setMavenConfig(delegate) - rootPOM('tests/pom.xml') - goals('-B test -DintegrationTests') - } - - shell('kill $(cat docker-log.pid) || true') - } - - publishers { - archiveArtifacts { - allowEmpty(true) - pattern('**/target/container-logs/**') - pattern('docker.log') - } - archiveJunit('**/surefire-reports/TEST-*.xml') - } -} diff --git a/.test-infra/jenkins/job_bookkeeper_release_branch_48_java8.groovy b/.test-infra/jenkins/job_bookkeeper_release_branch_48_java8.groovy deleted file mode 100644 index f9f911c7c0c..00000000000 --- a/.test-infra/jenkins/job_bookkeeper_release_branch_48_java8.groovy +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import common_job_properties - -// This job runs the Java postcommit tests on Java 8 for branch-4.8 -mavenJob('bookkeeper_release_branch_48_java8') { - description('Runs nightly build for bookkeeper branch-4.8 in Java 8.') - - // Set common parameters. - common_job_properties.setTopLevelMainJobProperties( - delegate, 'branch-4.8', 'JDK 1.8 (latest)') - - // Sets that this is a PostCommit job. - common_job_properties.setPostCommit( - delegate, - 'H 12 * * *', - false) - - // Set maven parameters. - common_job_properties.setMavenConfig(delegate) - - // Maven build project. - goals('clean apache-rat:check package spotbugs:check -Ddistributedlog -Dstream -DstreamTests') -} diff --git a/.test-infra/jenkins/job_bookkeeper_release_branch_48_java9.groovy b/.test-infra/jenkins/job_bookkeeper_release_branch_48_java9.groovy deleted file mode 100644 index 8a20415e10f..00000000000 --- a/.test-infra/jenkins/job_bookkeeper_release_branch_48_java9.groovy +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import common_job_properties - -// This job runs the Java postcommit tests on Java 9 for branch-4.8 -mavenJob('bookkeeper_release_branch_48_java9') { - description('Runs nightly build for bookkeeper branch-4.8 in Java 9.') - - // Set common parameters. - common_job_properties.setTopLevelMainJobProperties( - delegate, 'branch-4.8', 'JDK 1.9 (latest)') - - // Sets that this is a PostCommit job. - common_job_properties.setPostCommit( - delegate, - 'H 12 * * *', - false) - - // Set maven parameters. - common_job_properties.setMavenConfig(delegate) - - // Maven build project. - goals('clean apache-rat:check package spotbugs:check -Ddistributedlog -Dstream -DstreamTests') -} diff --git a/.test-infra/jenkins/job_bookkeeper_release_nightly_snapshot.groovy b/.test-infra/jenkins/job_bookkeeper_release_nightly_snapshot.groovy deleted file mode 100644 index ced65276d63..00000000000 --- a/.test-infra/jenkins/job_bookkeeper_release_nightly_snapshot.groovy +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import common_job_properties - -// This job deploys a snapshot of latest master to artifactory nightly -freeStyleJob('bookkeeper_release_nightly_snapshot') { - description('runs a `mvn clean deploy` of the nightly snapshot for bookkeeper.') - - // Set common parameters. - common_job_properties.setTopLevelMainJobProperties(delegate) - - // Sets that this is a PostCommit job. - common_job_properties.setPostCommit( - delegate, - 'H 12 * * *', - false) - - parameters { - stringParam( - 'sha1', - 'master', - 'Commit id or refname (eg: origin/pr/9/head) you want to build.') - - stringParam( - 'PUBLISH_GITSHA', - 'false', - 'Whether to publish a snapshot with gitsha information. Options: (true|false).') - } - - steps { - // update snapshot version if `PUBLISH_GITSHA` is `true` - shell ''' -export MAVEN_HOME=/home/jenkins/tools/maven/latest -export PATH=$JAVA_HOME/bin:$MAVEN_HOME/bin:$PATH -export MAVEN_OPTS=-Xmx2048m - -./dev/update-snapshot-version.sh - '''.stripIndent().trim() - - maven { - // Set maven parameters. - common_job_properties.setMavenConfig(delegate) - - // Maven build project. - goals('clean package -Dmaven.test.failure.ignore=true deploy -Ddistributedlog -Dstream -DstreamTests -Pdocker') - } - - // publish the docker images - shell ''' -export MAVEN_HOME=/home/jenkins/tools/maven/latest -export PATH=$JAVA_HOME/bin:$MAVEN_HOME/bin:$PATH -export MAVEN_OPTS=-Xmx2048m - -./dev/publish-docker-images.sh - '''.stripIndent().trim() - } -} diff --git a/.test-infra/jenkins/job_seed.groovy b/.test-infra/jenkins/job_seed.groovy deleted file mode 100644 index c8b16b06685..00000000000 --- a/.test-infra/jenkins/job_seed.groovy +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import common_job_properties - -// Defines the seed job, which creates or updates all other Jenkins projects. -job('bookkeeper-seed') { - description('Automatically configures all Apache BookKeeper Jenkins projects based' + - ' on Jenkins DSL groovy files checked into the code repository.') - - // Set common parameters. - common_job_properties.setTopLevelMainJobProperties(delegate) - - // This is a post-commit job that runs once per day, not for every push. - common_job_properties.setPostCommit( - delegate, - 'H 6 * * *', - false, - 'issues@bookkeeper.apache.org') - - steps { - folder('bookkeeper-jenkins-testing') - dsl { - // A list or a glob of other groovy files to process. - external('.test-infra/jenkins/job_*.groovy') - - // If a job is removed from the script, delete it - removeAction('DELETE') - } - } -} diff --git a/.test-infra/scripts/post-docker-tests.sh b/.test-infra/scripts/post-docker-tests.sh new file mode 100755 index 00000000000..8af7ab8b914 --- /dev/null +++ b/.test-infra/scripts/post-docker-tests.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +set -ex +kill $(cat docker-log.pid) || true diff --git a/.test-infra/scripts/pre-docker-tests.sh b/.test-infra/scripts/pre-docker-tests.sh new file mode 100755 index 00000000000..911ab1958bd --- /dev/null +++ b/.test-infra/scripts/pre-docker-tests.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +set -ex + +id +ulimit -a +pwd +df -Th +ps -eo euser,pid,ppid,pgid,start,pcpu,pmem,cmd +docker info +docker system prune -f +# clean up any dangling networks from previous runs +docker network prune -f --filter "until=12h" +docker system events > docker.debug-info & echo $! > docker-log.pid +docker pull quay.io/coreos/etcd:v3.5.14 diff --git a/.test-infra/scripts/slack-email-digest/.gitignore b/.test-infra/scripts/slack-email-digest/.gitignore new file mode 100644 index 00000000000..e169e23b370 --- /dev/null +++ b/.test-infra/scripts/slack-email-digest/.gitignore @@ -0,0 +1 @@ +configuration.yaml diff --git a/.test-infra/scripts/slack-email-digest/README.md b/.test-infra/scripts/slack-email-digest/README.md new file mode 100644 index 00000000000..b000fcf5706 --- /dev/null +++ b/.test-infra/scripts/slack-email-digest/README.md @@ -0,0 +1,4 @@ +# Slack to email digest bot + +This script is responsible for generating daily per channel digests and emailing them to certail mailing lists. + diff --git a/.test-infra/scripts/slack-email-digest/configuration-example.yaml b/.test-infra/scripts/slack-email-digest/configuration-example.yaml new file mode 100644 index 00000000000..8d40b96e3d8 --- /dev/null +++ b/.test-infra/scripts/slack-email-digest/configuration-example.yaml @@ -0,0 +1,34 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +slack: + token: xoxp-xxxxxxxxxxxxxx + +mail: + fromAddress: '"My Slack" ' + smtp: smtp.gmail.com:587 + useTLS: true + username: my.slack@gmail.com + password: xxxxxxxxx + +channels: + # Maps channels to a particular email address + general: users@my.project.org + dev: dev@my.project.org + random: users@my.project.org diff --git a/.test-infra/scripts/slack-email-digest/requirements.txt b/.test-infra/scripts/slack-email-digest/requirements.txt new file mode 100644 index 00000000000..f078f8f3c52 --- /dev/null +++ b/.test-infra/scripts/slack-email-digest/requirements.txt @@ -0,0 +1,20 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +slacker +PyYAML diff --git a/.test-infra/scripts/slack-email-digest/slack_email_digest.py b/.test-infra/scripts/slack-email-digest/slack_email_digest.py new file mode 100755 index 00000000000..b1012e64e28 --- /dev/null +++ b/.test-infra/scripts/slack-email-digest/slack_email_digest.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +import slacker +import yaml +import time +import re +import datetime +import smtplib +from email.mime.text import MIMEText + + +conf = yaml.load(open('configuration.yaml')) + + +def send_digest(channel, address, digest): + msg = MIMEText(digest, _charset='utf-8') + msg['From'] = conf['mail']['fromAddress'] + msg['To'] = address + msg['Subject'] = 'Slack digest for #%s - %s' % ( + channel, datetime.datetime.now().strftime('%Y-%m-%d')) + server = smtplib.SMTP(conf['mail']['smtp']) + if conf['mail']['useTLS']: + server.starttls() + if 'username' in conf['mail']: + server.login(conf['mail']['username'], conf['mail']['password']) + + server.sendmail(conf['mail']['fromAddress'], address, msg.as_string()) + server.quit() + + +slack = slacker.Slacker(conf['slack']['token']) + +channels = slack.channels.list().body['channels'] + +# Get a mapping between Slack internal user ids and real names +users = {} +for user in slack.users.list().body['members']: + real_name = user.get('real_name', user.get('name')) + users[user['id']] = real_name + +last_day_timestamp = time.time() - (24 * 3600) + +for channel in channels: + id = channel['id'] + name = channel['name'] + topic = channel['topic']['value'] + + if name not in conf['channels']: + print('Ignoring channel: #%s' % name) + continue + + toAddress = conf['channels'][name] + print('Getting digest of #%s --> %s' % (name, toAddress)) + + messages = slack.channels.history(channel=id, + oldest=last_day_timestamp, + count=1000) + digest = '' + for m in reversed(messages.body['messages']): + if not m['type'] == 'message': + continue + + user = m.get('user') + if not user: + user = m['comment']['user'] + sender = users.get(user, '') + + date = datetime.datetime.utcfromtimestamp(float(m['ts'])).strftime('%Y-%m-%d %H:%M:%S UTC') + # Replace users id mentions with real names + text = re.sub(r'<@(\w+)>', lambda m: '@' + users[m.group(1)], m['text']) + + digest += '%s - %s: %s\n' % (date, sender, text) + for reaction in m.get('reactions', []): + digest += '%s : %s\n' % (reaction['name'], ', '.join(map(users.get, reaction['users']))) + digest += '----\n' + + if digest: + send_digest(name, toAddress, digest) diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 56b6805467e..00000000000 --- a/.travis.yml +++ /dev/null @@ -1,71 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -language: java - -# blacklist -branches: - except: - - asf-site - - gh-pages - -services: - - docker - -matrix: - include: - - os: osx - osx_image: xcode8 - - os: osx - osx_image: xcode9.2 - - os: linux - env: CUSTOM_JDK="oraclejdk8" - - os: linux - env: CUSTOM_JDK="oraclejdk9" - - os: linux - env: CUSTOM_JDK="oraclejdk10" - -before_install: -- | - echo "MAVEN_OPTS='-Xmx3072m -XX:MaxPermSize=512m'" > ~/.mavenrc - if [ "$TRAVIS_OS_NAME" == "osx" ]; then - export JAVA_HOME=$(/usr/libexec/java_home); - fi - if [ "$TRAVIS_OS_NAME" == "linux" ]; then - jdk_switcher use "$CUSTOM_JDK"; - fi - echo "TRAVIS_PULL_REQUEST=${TRAVIS_PULL_REQUEST}" - if [ "$TRAVIS_PULL_REQUEST" == "false" ]; then - export DLOG_MODIFIED="true" - echo "Enable testing distributedlog modules since they are not pull requests." - else - if [ `git diff --name-only $TRAVIS_COMMIT_RANGE | grep "^stream\/distributedlog" | wc -l` -gt 0 ]; then - export DLOG_MODIFIED="true" - echo "Enable testing distributedlog modules if this pull request modifies files under directory 'stream/distributedlog'." - fi - if [ `git diff --name-only $TRAVIS_COMMIT_RANGE | grep "^site\/" | wc -l` -gt 0 ]; then - # building the website to ensure the changes don't break - export WEBSITE_MODIFIED="true" - echo "Enable building website modules if this pull request modifies files under directory 'site'." - fi - fi - -install: true - -script: .travis_scripts/build.sh - -cache: - directories: - - $HOME/.m2 diff --git a/.travis_scripts/before_install.sh b/.travis_scripts/before_install.sh deleted file mode 100755 index b5b141f1c3f..00000000000 --- a/.travis_scripts/before_install.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env bash - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -set -ev - -echo "MAVEN_OPTS='-Xmx3072m -XX:MaxPermSize=512m'" > ~/.mavenrc -if [ "$TRAVIS_OS_NAME" == "osx" ]; then - export JAVA_HOME=$(/usr/libexec/java_home); -fi -echo "TRAVIS_PULL_REQUEST=${TRAVIS_PULL_REQUEST}" -if [ "$TRAVIS_PULL_REQUEST" == "false" ]; then - export DLOG_MODIFIED="true" - echo "Enable testing distributedlog modules since they are not pull requests." -else - if [ `git diff --name-only $TRAVIS_COMMIT_RANGE | grep "^stream\/distributedlog" | wc -l` -gt 0 ]; then - export DLOG_MODIFIED="true" - echo "Enable testing distributedlog modules if this pull request modifies files under directory 'stream/distributedlog'." - fi - if [ `git diff --name-only $TRAVIS_COMMIT_RANGE | grep "^site\/" | wc -l` -gt 0 ]; then - # building the website to ensure the changes don't break - export WEBSITE_MODIFIED="true" - echo "Enable building website modules if this pull request modifies files under directory 'site'." - fi -fi diff --git a/.travis_scripts/build.sh b/.travis_scripts/build.sh deleted file mode 100755 index f751bd84cf5..00000000000 --- a/.travis_scripts/build.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env bash - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -set -ev - -BINDIR=`dirname "$0"` -BK_HOME=`cd $BINDIR/..;pwd` - -mvn --batch-mode clean apache-rat:check compile spotbugs:check install -DskipTests -Dstream -$BK_HOME/dev/check-binary-license ./bookkeeper-dist/all/target/bookkeeper-all-*-bin.tar.gz; -$BK_HOME/dev/check-binary-license ./bookkeeper-dist/server/target/bookkeeper-server-*-bin.tar.gz; -if [ "$DLOG_MODIFIED" == "true" ]; then - cd $BK_HOME/stream/distributedlog - mvn --batch-mode clean package -Ddistributedlog -fi -if [ "$TRAVIS_OS_NAME" == "linux" ] && [ "$WEBSITE_MODIFIED" == "true" ]; then - cd $BK_HOME/site - make clean - # run the docker image to build the website - ./docker/ci.sh -fi diff --git a/.typos.toml b/.typos.toml new file mode 100644 index 00000000000..e8ef1d3a280 --- /dev/null +++ b/.typos.toml @@ -0,0 +1,56 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +[default.extend-words] +# abbr +"ba" = "ba" +"bve" = "bve" +"cace" = "cace" +"cann" = "cann" +"dbe" = "dbe" +"entrys" = "entrys" +"fo" = "fo" +"ine" = "ine" +"isse" = "isse" +"mor" = "mor" +"nwe" = "nwe" +"nd" = "nd" +"nin" = "nin" +"oce" = "oce" +"ot" = "ot" +"ser" = "ser" +"shouldnot" = "shouldnot" +"tio" = "tio" +"ue" = "ue" +# keep for comptability +"deleteable" = "deleteable" +"infinit" = "infinit" +"explict" = "explict" +"uninitalize" = "uninitalize" +# keyword fp +"guage" = "guage" +"passin" = "passin" +"testng" = "testng" +"vertx" = "vertx" +"verticle" = "verticle" + +[files] +extend-exclude = [ + "bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/TestLedgerMetadataSerDe.java", +] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000000..8c79e533876 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,25 @@ + + +## Contributing to Apache BookKeeper +The Apache BookKeeper community welcomes contributions from anyone with a passion for distributed systems! BookKeeper has many different opportunities for contributions -- write new examples/tutorials, add new user-facing libraries, work on the core storage components, integrate with different metadata stores (ZooKeeper, Etcd etc), or participate on the documentation effort. + +We use a review-then-commit workflow in BookKeeper for all contributions. + +We would love for you to contribute to Apache BookKeeper and make it even better! +Please check the [Contributing to Apache BookKeeper](https://bookkeeper.apache.org/community/contributing/) +page before starting to work on the project. diff --git a/NOTICE b/NOTICE index 64cd460010b..490e17b4b3a 100644 --- a/NOTICE +++ b/NOTICE @@ -1,5 +1,5 @@ Apache BookKeeper -Copyright 2011-2018 The Apache Software Foundation +Copyright 2011-2024 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). diff --git a/README.md b/README.md index aeeac68c187..23475c62541 100644 --- a/README.md +++ b/README.md @@ -1,52 +1,34 @@ logo -[![Build Status](https://travis-ci.org/apache/bookkeeper.svg?branch=master)](https://travis-ci.org/apache/bookkeeper) -[![Build Status](https://builds.apache.org/buildStatus/icon?job=bookkeeper-master)](https://builds.apache.org/job/bookkeeper-master/) -[![Coverage Status](https://coveralls.io/repos/github/apache/bookkeeper/badge.svg?branch=master)](https://coveralls.io/github/apache/bookkeeper?branch=master) [![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.bookkeeper/bookkeeper/badge.svg)](https://maven-badges.herokuapp.com/maven-central/org.apache.bookkeeper/bookkeeper) # Apache BookKeeper -Apache BookKeeper is a scalable, fault tolerant and low latency storage service optimized for append-only workloads. +Apache BookKeeper is a scalable, fault-tolerant and low latency storage service optimized for append-only workloads. It is suitable for being used in following scenarios: -- WAL (Write-Ahead-Logging), e.g. HDFS NameNode. +- WAL (Write-Ahead-Logging), e.g. HDFS NameNode, Pravega. - Message Store, e.g. Apache Pulsar. - Offset/Cursor Store, e.g. Apache Pulsar. - Object/Blob Store, e.g. storing state machine snapshots. ## Get Started -* *Concepts*: Start with the [basic concepts](http://bookkeeper.apache.org/docs/master/bookkeeperOverview.html) of Apache BookKeeper. +* Checkout the project [website](https://bookkeeper.apache.org/). +* *Concepts*: Start with the [basic concepts](https://bookkeeper.apache.org/docs/getting-started/concepts) of Apache BookKeeper. This will help you to fully understand the other parts of the documentation. -* [Getting Started](http://bookkeeper.apache.org/docs/master/bookkeeperStarted.html) to setup BookKeeper to write logs. +* Follow the [Installation](https://bookkeeper.apache.org/docs/getting-started/installation) guide to set up BookKeeper. ## Documentation -### Developers - -* [Programmer Guide](http://bookkeeper.apache.org/docs/master/bookkeeperProgrammer.html) -* [Tutorial](http://bookkeeper.apache.org/docs/master/bookkeeperTutorial.html) -* [Java API](http://bookkeeper.apache.org/docs/master/apidocs/) - -You can also read [Turning Ledgers into Logs](http://bookkeeper.apache.org/docs/master/bookkeeperLedgers2Logs.html) to learn how to turn **ledgers** into continuous **log streams**. -If you are looking for a high level **log stream** API, you can checkout [DistributedLog](http://distributedlog.io). - -### Administrators - -* [Admin Guide](http://bookkeeper.apache.org/docs/master/bookkeeperConfig.html) -* [Configuration Parameters](http://bookkeeper.apache.org/docs/master/bookieConfigParams.html) - -### Contributors - -* [BookKeeper Internals](http://bookkeeper.apache.org/docs/master/bookkeeperInternals.html) +Please visit the [Documentation](https://bookkeeper.apache.org/docs/overview/) from the project website for more information. ## Get In Touch ### Report a Bug -For filing bugs, suggesting improvements, or requesting new features, help us out by [opening a Github issue](https://github.com/apache/bookkeeper/issues) or [opening an Apache jira](https://issues.apache.org/jira/browse/BOOKKEEPER). +For filing bugs, suggesting improvements, or requesting new features, help us out by [opening a GitHub issue](https://github.com/apache/bookkeeper/issues). ### Need Help? @@ -54,7 +36,7 @@ For filing bugs, suggesting improvements, or requesting new features, help us ou [Subscribe](mailto:dev-subscribe@bookkeeper.apache.org) or [mail](mailto:dev@bookkeeper.apache.org) the [dev@bookkeeper.apache.org](mailto:dev@bookkeeper.apache.org) list - Join development discussions, propose new ideas and connect with contributors. -[Join us on Slack](https://apachebookkeeper.herokuapp.com/) - This is the most immediate way to connect with Apache BookKeeper committers and contributors. +[Join us on Slack](https://communityinviter.com/apps/apachebookkeeper/apache-bookkeeper) - This is the most immediate way to connect with Apache BookKeeper committers and contributors. ## Contributing @@ -62,15 +44,10 @@ We feel that a welcoming open community is important and welcome contributions. ### Contributing Code -1. See [Developer Setup](https://cwiki.apache.org/confluence/display/BOOKKEEPER/Developer+Setup) to get your local environment setup. - -2. Take a look at our open issues: [JIRA Issues](https://issues.apache.org/jira/browse/BOOKKEEPER) [Github Issues](https://github.com/apache/bookkeeper/issues). - -3. Review our [coding style](https://cwiki.apache.org/confluence/display/BOOKKEEPER/Coding+Guide) and follow our [pull requests](https://github.com/apache/bookkeeper/pulls) to learn about our conventions. - -4. Make your changes according to our [contribution guide](https://cwiki.apache.org/confluence/display/BOOKKEEPER/Contributing+to+BookKeeper). +1. See our [installation guide](https://bookkeeper.apache.org/docs/next/getting-started/installation/) to get your local environment setup. -### Improving Website and Documentation +2. Take a look at our open issues: [GitHub Issues](https://github.com/apache/bookkeeper/issues). -1. See [Building the website and documentation](https://cwiki.apache.org/confluence/display/BOOKKEEPER/Building+the+website+and+documentation) on how to build the website and documentation. +3. Review our [coding style](https://bookkeeper.apache.org/community/coding-guide/) and follow our [pull requests](https://github.com/apache/bookkeeper/pulls) to learn more about our conventions. +4. Make your changes according to our [contributing guide](https://bookkeeper.apache.org/community/contributing/) diff --git a/bin/bkctl b/bin/bkctl index 1cb73571652..26d3b6ececa 100755 --- a/bin/bkctl +++ b/bin/bkctl @@ -22,6 +22,7 @@ BINDIR=`dirname "$0"` BK_HOME=`cd ${BINDIR}/..;pwd` +mkdir -p $BK_HOME/logs source ${BK_HOME}/bin/common.sh source ${BK_HOME}/conf/bk_cli_env.sh @@ -41,13 +42,14 @@ if [ -z "${CLI_CONF}" ]; then CLI_CONF=${DEFAULT_CONF} fi -DEFAULT_LOG_CONF=${BK_HOME}/conf/log4j.cli.properties +DEFAULT_LOG_CONF=${BK_HOME}/conf/log4j2.cli.xml if [ -z "${CLI_LOG_CONF}" ]; then CLI_LOG_CONF=${DEFAULT_LOG_CONF} fi CLI_LOG_DIR=${CLI_LOG_DIR:-"$BK_HOME/logs"} CLI_LOG_FILE=${CLI_LOG_FILE:-"bkctl.log"} -CLI_ROOT_LOGGER=${CLI_ROOT_LOGGER:-"INFO,ROLLINGFILE"} +CLI_ROOT_LOG_LEVEL=${CLI_ROOT_LOG_LEVEL:-"INFO"} +CLI_ROOT_LOG_APPENDER=${CLI_ROOT_LOG_APPENDER:-"CONSOLE"} # Configure the classpath CLI_CLASSPATH="$CLI_JAR:$CLI_CLASSPATH:$CLI_EXTRA_CLASSPATH" @@ -57,10 +59,10 @@ CLI_CLASSPATH="`dirname $CLI_LOG_CONF`:$CLI_CLASSPATH" BOOKIE_OPTS=$(build_bookie_opts) GC_OPTS=$(build_cli_jvm_opts ${CLI_LOG_DIR} "bkctl-gc.log") NETTY_OPTS=$(build_netty_opts) -LOGGING_OPTS=$(build_cli_logging_opts ${CLI_LOG_CONF} ${CLI_LOG_DIR} ${CLI_LOG_FILE} ${CLI_ROOT_LOGGER}) +LOGGING_OPTS=$(build_cli_logging_opts ${CLI_LOG_CONF} ${CLI_ROOT_LOG_LEVEL} ${CLI_ROOT_LOG_APPENDER} ${CLI_LOG_DIR} ${CLI_LOG_FILE}) OPTS="${OPTS} -cp ${CLI_CLASSPATH} ${BOOKIE_OPTS} ${GC_OPTS} ${NETTY_OPTS} ${LOGGING_OPTS} ${CLI_EXTRA_OPTS}" #Change to BK_HOME to support relative paths cd "$BK_HOME" -exec ${JAVA} ${OPTS} org.apache.bookkeeper.tools.cli.BKCtl --conf ${CLI_CONF} $@ +exec "${JAVA}" ${OPTS} org.apache.bookkeeper.tools.cli.BKCtl --conf ${CLI_CONF} $@ diff --git a/bin/bkperf b/bin/bkperf new file mode 100755 index 00000000000..02f8b20da27 --- /dev/null +++ b/bin/bkperf @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# +#/** +# * Licensed to the Apache Software Foundation (ASF) under one +# * or more contributor license agreements. See the NOTICE file +# * distributed with this work for additional information +# * regarding copyright ownership. The ASF licenses this file +# * to you under the Apache License, Version 2.0 (the +# * "License"); you may not use this file except in compliance +# * with the License. You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ + +# BookKeeper Perf Tool (experimental) + +BINDIR=`dirname "$0"` +BK_HOME=`cd ${BINDIR}/..;pwd` + +source ${BK_HOME}/bin/common.sh +source ${BK_HOME}/conf/bk_cli_env.sh + +CLI_MODULE_PATH=tools/perf +CLI_MODULE_NAME="(org.apache.bookkeeper-)?bookkeeper-perf" +CLI_MODULE_HOME=${BK_HOME}/${CLI_MODULE_PATH} + +# find the module jar +CLI_JAR=$(find_module_jar ${CLI_MODULE_PATH} ${CLI_MODULE_NAME}) + +# set up the classpath +CLI_CLASSPATH=$(set_module_classpath ${CLI_MODULE_PATH}) + +DEFAULT_CONF=${BK_HOME}/conf/bk_server.conf +if [ -z "${CLI_CONF}" ]; then + CLI_CONF=${DEFAULT_CONF} +fi + +DEFAULT_LOG_CONF=${BK_HOME}/conf/log4j2.cli.xml +if [ -z "${CLI_LOG_CONF}" ]; then + CLI_LOG_CONF=${DEFAULT_LOG_CONF} +fi +CLI_LOG_DIR=${CLI_LOG_DIR:-"$BK_HOME/logs"} +CLI_LOG_FILE=${CLI_LOG_FILE:-"bkperf.log"} +CLI_ROOT_LOG_LEVEL=${CLI_ROOT_LOG_LEVEL:-"INFO"} +CLI_ROOT_LOG_APPENDER=${CLI_ROOT_LOG_APPENDER:-"CONSOLE"} + +mkdir -p ${CLI_LOG_DIR} + +# Configure the classpath +CLI_CLASSPATH="$CLI_JAR:$CLI_CLASSPATH:$CLI_EXTRA_CLASSPATH" +CLI_CLASSPATH="`dirname $CLI_LOG_CONF`:$CLI_CLASSPATH" + +# Build the OPTs +BOOKIE_OPTS=$(build_bookie_opts) +GC_OPTS=$(build_cli_jvm_opts ${CLI_LOG_DIR} "bkperf-gc.log") +NETTY_OPTS=$(build_netty_opts) +LOGGING_OPTS=$(build_cli_logging_opts ${CLI_LOG_CONF} ${CLI_ROOT_LOG_LEVEL} ${CLI_ROOT_LOG_APPENDER} ${CLI_LOG_DIR} ${CLI_LOG_FILE}) + +OPTS="${OPTS} -cp ${CLI_CLASSPATH} ${BOOKIE_OPTS} ${GC_OPTS} ${NETTY_OPTS} ${LOGGING_OPTS} ${CLI_EXTRA_OPTS}" + +#Change to BK_HOME to support relative paths +cd "$BK_HOME" +exec ${JAVA} ${OPTS} org.apache.bookkeeper.tools.perf.BKPerf --conf ${CLI_CONF} $@ diff --git a/bin/bookkeeper b/bin/bookkeeper index 07e7db3ecd7..7ad73b655bb 100755 --- a/bin/bookkeeper +++ b/bin/bookkeeper @@ -18,17 +18,34 @@ # * limitations under the License. # */ +set -e + BINDIR=`dirname "$0"` BK_HOME=`cd ${BINDIR}/..;pwd` source ${BK_HOME}/bin/common.sh -if [ "x$1" == "xstandalone" ]; then - BOOKIE_MODULE_PATH=stream/server - BOOKIE_MODULE_NAME="(org.apache.bookkeeper-)?stream-storage-server" +# default variables +DEFAULT_CONF=${BK_HOME}/conf/bk_server.conf +DEFAULT_ZK_CONF=${BK_HOME}/conf/zookeeper.conf + +if [ -z "$BOOKIE_CONF" ]; then + BOOKIE_CONF_TO_CHECK=${DEFAULT_CONF} else + BOOKIE_CONF_TO_CHECK=${BOOKIE_CONF} +fi + +FIND_TABLE_SERVICE_RESULT=$(find_table_service ${BOOKIE_CONF_TO_CHECK} $1) + +if [ "x${FIND_TABLE_SERVICE_RESULT}" == "xtrue" ]; then + BOOKIE_MODULE_PATH=stream/server + BOOKIE_MODULE_NAME=${TABLE_SERVICE_MODULE_NAME} +elif [ "x${FIND_TABLE_SERVICE_RESULT}" == "xfalse" ]; then BOOKIE_MODULE_PATH=bookkeeper-server - BOOKIE_MODULE_NAME="(org.apache.bookkeeper-)?bookkeeper-server" + BOOKIE_MODULE_NAME=${BOOKIE_SERVER_MODULE_NAME} +else + echo ${FIND_TABLE_SERVICE_RESULT} + exit 1 fi # find the module jar @@ -37,21 +54,29 @@ BOOKIE_JAR=$(find_module_jar ${BOOKIE_MODULE_PATH} ${BOOKIE_MODULE_NAME}) # set up the classpath BOOKIE_CLASSPATH=$(set_module_classpath ${BOOKIE_MODULE_PATH}) -# default variables -DEFAULT_CONF=${BK_HOME}/conf/bk_server.conf -DEFAULT_ZK_CONF=${BK_HOME}/conf/zookeeper.conf - bookkeeper_help() { cat < where command is one of: + +[service commands] + bookie Run a bookie server - autorecovery Run AutoRecovery service daemon + autorecovery Run AutoRecovery service + zookeeper Run zookeeper server + +[development commands] + localbookie Run a test ensemble of bookies locally standalone Run a standalone cluster (with all service components) locally + +[tooling commands] + upgrade Upgrade bookie filesystem shell Run shell for admin commands - zookeeper Run zookeeper server + +[other commands] + help This help message or command is the full name of a class with a defined main() method. @@ -62,7 +87,6 @@ Environment variables: BOOKIE_ZK_CONF Configuration file for zookeeper (default: $DEFAULT_ZK_CONF) BOOKIE_EXTRA_OPTS Extra options to be passed to the jvm BOOKIE_EXTRA_CLASSPATH Add extra paths to the bookkeeper classpath - ENTRY_FORMATTER_CLASS Entry formatter class to format entries. BOOKIE_PID_DIR Folder where the Bookie server PID file should be stored BOOKIE_STOP_TIMEOUT Wait time before forcefully kill the Bookie server instance, if the stop is not successful @@ -82,7 +106,7 @@ shift LOCALBOOKIES_CONFIG_DIR="${LOCALBOOKIES_CONFIG_DIR:-/tmp/localbookies-config}" if [ ${COMMAND} == "shell" ]; then - DEFAULT_LOG_CONF=${BK_HOME}/conf/log4j.shell.properties + DEFAULT_LOG_CONF=${BK_HOME}/conf/log4j2.shell.xml if [[ $1 == "-localbookie" ]]; then if [[ $2 == *:* ]]; then @@ -109,7 +133,8 @@ if [ -z "$BOOKIE_LOG_CONF" ]; then fi BOOKIE_LOG_DIR=${BOOKIE_LOG_DIR:-"$BK_HOME/logs"} BOOKIE_LOG_FILE=${BOOKIE_LOG_FILE:-"bookkeeper-server.log"} -BOOKIE_ROOT_LOGGER=${BOOKIE_ROOT_LOGGER:-"INFO,CONSOLE"} +BOOKIE_ROOT_LOG_LEVEL=${BOOKIE_ROOT_LOG_LEVEL:-"INFO"} +BOOKIE_ROOT_LOG_APPENDER=${BOOKIE_ROOT_LOG_APPENDER:-"CONSOLE"} # Configure the classpath BOOKIE_CLASSPATH="$BOOKIE_JAR:$BOOKIE_CLASSPATH:$BOOKIE_EXTRA_CLASSPATH" @@ -119,8 +144,9 @@ BOOKIE_CLASSPATH="`dirname $BOOKIE_LOG_CONF`:$BOOKIE_CLASSPATH" BOOKIE_OPTS=$(build_bookie_opts) GC_OPTS=$(build_bookie_jvm_opts ${BOOKIE_LOG_DIR} "gc_%p.log") NETTY_OPTS=$(build_netty_opts) -LOGGING_OPTS=$(build_logging_opts ${BOOKIE_LOG_CONF} ${BOOKIE_LOG_DIR} ${BOOKIE_LOG_FILE} ${BOOKIE_ROOT_LOGGER}) +LOGGING_OPTS=$(build_logging_opts ${BOOKIE_LOG_CONF} ${BOOKIE_ROOT_LOG_LEVEL} ${BOOKIE_ROOT_LOG_APPENDER} ${BOOKIE_LOG_DIR} ${BOOKIE_LOG_FILE}) +BOOKIE_EXTRA_OPTS="${BOOKIE_EXTRA_OPTS} -Dorg.bouncycastle.fips.approved_only=true" OPTS="${OPTS} -cp ${BOOKIE_CLASSPATH} ${BOOKIE_OPTS} ${GC_OPTS} ${NETTY_OPTS} ${LOGGING_OPTS} ${BOOKIE_EXTRA_OPTS}" # Create log dir if it doesn't exist @@ -131,26 +157,25 @@ fi #Change to BK_HOME to support relative paths cd "$BK_HOME" if [ ${COMMAND} == "bookie" ]; then - exec ${JAVA} ${OPTS} ${JMX_ARGS} org.apache.bookkeeper.server.Main --conf ${BOOKIE_CONF} $@ + exec "${JAVA}" ${OPTS} ${JMX_ARGS} org.apache.bookkeeper.server.Main --conf ${BOOKIE_CONF} $@ elif [ ${COMMAND} == "autorecovery" ]; then - exec ${JAVA} ${OPTS} ${JMX_ARGS} org.apache.bookkeeper.replication.AutoRecoveryMain --conf ${BOOKIE_CONF} $@ + exec "${JAVA}" ${OPTS} ${JMX_ARGS} org.apache.bookkeeper.replication.AutoRecoveryMain --conf ${BOOKIE_CONF} $@ elif [ ${COMMAND} == "localbookie" ]; then NUMBER=$1 shift - exec ${JAVA} ${OPTS} ${JMX_ARGS} -Dzookeeper.4lw.commands.whitelist='*' org.apache.bookkeeper.util.LocalBookKeeper ${NUMBER} ${BOOKIE_CONF} $@ + exec "${JAVA}" ${OPTS} ${JMX_ARGS} -Dzookeeper.4lw.commands.whitelist='*' org.apache.bookkeeper.util.LocalBookKeeper ${NUMBER} ${BOOKIE_CONF} $@ elif [ ${COMMAND} == "standalone" ]; then - exec ${JAVA} ${OPTS} ${JMX_ARGS} -Dzookeeper.4lw.commands.whitelist='*' org.apache.bookkeeper.stream.cluster.StandaloneStarter --conf ${BK_HOME}/conf/standalone.conf $@ + exec "${JAVA}" ${OPTS} ${JMX_ARGS} -Dzookeeper.4lw.commands.whitelist='*' org.apache.bookkeeper.stream.cluster.StandaloneStarter --conf ${BK_HOME}/conf/standalone.conf $@ elif [ ${COMMAND} == "upgrade" ]; then - exec ${JAVA} ${OPTS} org.apache.bookkeeper.bookie.FileSystemUpgrade --conf ${BOOKIE_CONF} $@ + exec "${JAVA}" ${OPTS} org.apache.bookkeeper.bookie.FileSystemUpgrade --conf ${BOOKIE_CONF} $@ elif [ $COMMAND == "zookeeper" ]; then BOOKIE_LOG_FILE=${BOOKIE_LOG_FILE:-"zookeeper.log"} - exec $JAVA $OPTS -Dbookkeeper.log.file=$BOOKIE_LOG_FILE org.apache.zookeeper.server.quorum.QuorumPeerMain $BOOKIE_ZK_CONF $@ + exec "${JAVA}" $OPTS -Dbookkeeper.log.file=$BOOKIE_LOG_FILE org.apache.zookeeper.server.quorum.QuorumPeerMain $BOOKIE_ZK_CONF $@ elif [ ${COMMAND} == "shell" ]; then - ENTRY_FORMATTER_ARG="-DentryFormatterClass=${ENTRY_FORMATTER_CLASS:-org.apache.bookkeeper.util.StringEntryFormatter}" - exec ${JAVA} ${OPTS} ${ENTRY_FORMATTER_ARG} org.apache.bookkeeper.bookie.BookieShell -conf ${BOOKIE_CONF} $@ + exec "${JAVA}" ${OPTS} org.apache.bookkeeper.bookie.BookieShell -conf ${BOOKIE_CONF} $@ elif [ ${COMMAND} == "help" ]; then bookkeeper_help; else - exec ${JAVA} ${OPTS} ${COMMAND} $@ + exec "${JAVA}" ${OPTS} ${COMMAND} $@ fi diff --git a/bin/bookkeeper-daemon.sh b/bin/bookkeeper-daemon.sh index d64a4489bc1..ed144533fb0 100755 --- a/bin/bookkeeper-daemon.sh +++ b/bin/bookkeeper-daemon.sh @@ -41,7 +41,8 @@ fi BOOKIE_LOG_DIR=${BOOKIE_LOG_DIR:-"$BK_HOME/logs"} -BOOKIE_ROOT_LOGGER=${BOOKIE_ROOT_LOGGER:-'INFO,ROLLINGFILE'} +BOOKIE_ROOT_LOG_LEVEL=${BOOKIE_ROOT_LOG_LEVEL:-'INFO'} +BOOKIE_ROOT_LOG_APPENDER=${BOOKIE_ROOT_LOG_APPENDER:-'ROLLINGFILE'} BOOKIE_STOP_TIMEOUT=${BOOKIE_STOP_TIMEOUT:-30} @@ -80,7 +81,8 @@ case $command in esac export BOOKIE_LOG_DIR=$BOOKIE_LOG_DIR -export BOOKIE_ROOT_LOGGER=$BOOKIE_ROOT_LOGGER +export BOOKIE_ROOT_LOG_LEVEL=$BOOKIE_ROOT_LOG_LEVEL +export BOOKIE_ROOT_LOG_APPENDER=$BOOKIE_ROOT_LOG_APPENDER export BOOKIE_LOG_FILE=bookkeeper-$command-$HOSTNAME.log pid_file="${BOOKIE_PID_DIR}/bookkeeper-${command}.pid" @@ -106,75 +108,111 @@ rotate_out_log () mkdir -p "$BOOKIE_LOG_DIR" -case $startStop in - (start) - if [ -f $pid_file ]; then - PREVIOUS_PID=$(cat $pid_file) - if kill -0 $PREVIOUS_PID > /dev/null 2>&1; then - echo $command running as process $PREVIOUS_PID. Stop it first. - exit 1 - fi - fi - - rotate_out_log $out - echo starting $command, logging to $logfile - bookkeeper=$BK_HOME/bin/bookkeeper - nohup $bookkeeper $command "$@" > "$out" 2>&1 < /dev/null & - echo $! > $pid_file - sleep 1; head $out - sleep 2; - if ! kill -0 $! > /dev/null ; then +start() +{ + if [ -f $pid_file ]; then + PREVIOUS_PID=$(cat $pid_file) + if ps -p $PREVIOUS_PID > /dev/null 2>&1; then + echo $command running as process $PREVIOUS_PID. Stop it first. exit 1 fi - ;; + fi - (stop) - if [ -f $pid_file ]; then - TARGET_PID=$(cat $pid_file) - if kill -0 $TARGET_PID > /dev/null 2>&1; then - echo stopping $command - kill $TARGET_PID - - count=0 - location=$BOOKIE_LOG_DIR - while kill -0 $TARGET_PID > /dev/null 2>&1; - do - echo "Shutdown is in progress... Please wait..." - sleep 1 - count=$(expr $count + 1) - - if [ "$count" = "$BOOKIE_STOP_TIMEOUT" ]; then - break - fi - done + rotate_out_log $out + echo starting $command, logging to $logfile + bookkeeper=$BK_HOME/bin/bookkeeper + nohup $bookkeeper $command "$@" > "$out" 2>&1 < /dev/null & + echo $! > $pid_file + sleep 1; head $out + sleep 2; + if ! ps -p $! > /dev/null ; then + exit 1 + fi +} - if [ "$count" != "$BOOKIE_STOP_TIMEOUT" ]; then - echo "Shutdown completed." +stop() +{ + if [ -f $pid_file ]; then + TARGET_PID=$(cat $pid_file) + if ps -p $TARGET_PID > /dev/null 2>&1; then + echo stopping $command + kill $TARGET_PID + + count=0 + location=$BOOKIE_LOG_DIR + while ps -p $TARGET_PID > /dev/null 2>&1; + do + echo "Shutdown is in progress... Please wait..." + sleep 1 + count=$(expr $count + 1) + + if [ "$count" = "$BOOKIE_STOP_TIMEOUT" ]; then + break fi + done - if kill -0 $TARGET_PID > /dev/null 2>&1; then - fileName=$location/$command.out - $JAVA_HOME/bin/jstack $TARGET_PID > $fileName - echo Thread dumps are taken for analysis at $fileName - if [ "$1" == "-force" ] - then - echo forcefully stopping $command - kill -9 $TARGET_PID >/dev/null 2>&1 - echo Successfully stopped the process - else - echo "WARNNING : Bookie Server is not stopped completely." + if [ "$count" != "$BOOKIE_STOP_TIMEOUT" ]; then + echo "Shutdown completed." + fi + + if ps -p $TARGET_PID > /dev/null 2>&1; then + fileName=$location/$command.out + # Check for the java to use + if [[ -z ${JAVA_HOME} ]]; then + JSTACK=$(which jstack) + if [ $? -ne 0 ]; then + echo "Error: JAVA_HOME not set, and no jstack executable found in $PATH." 1>&2 exit 1 fi + else + JSTACK=${JAVA_HOME}/bin/jstack + fi + $JSTACK $TARGET_PID > $fileName + echo Thread dumps are taken for analysis at $fileName + if [ "$1" == "-force" ] + then + echo forcefully stopping $command + kill -9 $TARGET_PID >/dev/null 2>&1 + echo Successfully stopped the process + else + echo "WARNNING : Bookie Server is not stopped completely." + exit 1 fi - else - echo no $command to stop fi - rm $pid_file else echo no $command to stop fi + rm $pid_file + else + echo no $command to stop + fi +} +case $startStop in + (start) + start "$*" ;; + (stop) + stop $1 + ;; + (restart) + forceStopFlag=$(echo "$*"|grep "\-force") + if [[ "$forceStopFlag" != "" ]] + then + stop "-force" + else + stop + fi + if [ "$?" == 0 ] + then + sleep 3 + parameters="$*" + startParameters=${parameters//-force/} + start "$startParameters" + else + echo "WARNNING : $command failed restart, for $command is not stopped completely." + fi + ;; (*) usage echo $supportedargs diff --git a/bin/common.sh b/bin/common.sh index d54b1d52ce7..fd9bd2f0b85 100755 --- a/bin/common.sh +++ b/bin/common.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# #/** # * Licensed to the Apache Software Foundation (ASF) under one # * or more contributor license agreements. See the NOTICE file @@ -19,7 +18,7 @@ # */ # Check net.ipv6.bindv6only -if [ -f /sbin/sysctl ]; then +if [ -f /sbin/sysctl ] && [ -f /proc/sys/net/ipv6/bindv6only ]; then # check if net.ipv6.bindv6only is set to 1 bindv6only=$(/sbin/sysctl -n net.ipv6.bindv6only 2> /dev/null) if [ -n "$bindv6only" ] && [ "$bindv6only" -eq "1" ] @@ -52,9 +51,7 @@ fi # Check for the java to use if [[ -z ${JAVA_HOME} ]]; then JAVA=$(which java) - if [ $? = 0 ]; then - echo "JAVA_HOME not set, using java from PATH. ($JAVA)" - else + if [ $? != 0 ]; then echo "Error: JAVA_HOME not set, and no java executable found in $PATH." 1>&2 exit 1 fi @@ -65,34 +62,50 @@ fi BINDIR=${BK_BINDIR:-"`dirname "$0"`"} BK_HOME=${BK_HOME:-"`cd ${BINDIR}/..;pwd`"} BK_CONFDIR=${BK_HOME}/conf -DEFAULT_LOG_CONF=${BK_CONFDIR}/log4j.properties +DEFAULT_LOG_CONF=${BK_CONFDIR}/log4j2.xml source ${BK_CONFDIR}/nettyenv.sh source ${BK_CONFDIR}/bkenv.sh source ${BK_CONFDIR}/bk_cli_env.sh +detect_jdk8() { + local is_java_8=$($JAVA -version 2>&1 | grep version | grep '"1\.8') + if [ -z "$is_java_8" ]; then + echo "0" + else + echo "1" + fi +} + # default netty settings NETTY_LEAK_DETECTION_LEVEL=${NETTY_LEAK_DETECTION_LEVEL:-"disabled"} -NETTY_RECYCLER_MAXCAPACITY=${NETTY_RECYCLER_MAXCAPACITY:-"1000"} -NETTY_RECYCLER_LINKCAPACITY=${NETTY_RECYCLER_LINKCAPACITY:-"1024"} -# default bookie JVM settings -DEFAULT_BOOKIE_GC_OPTS="-XX:+UseG1GC \ +USING_JDK8=$(detect_jdk8) + +if [ "$USING_JDK8" -ne "1" ]; then + DEFAULT_BOOKIE_GC_OPTS="-XX:+UseG1GC \ + -XX:MaxGCPauseMillis=10 \ + -XX:+ParallelRefProcEnabled \ + -XX:+DisableExplicitGC" + DEFAULT_BOOKIE_GC_LOGGING_OPTS="" +else + DEFAULT_BOOKIE_GC_OPTS="-XX:+UseG1GC \ -XX:MaxGCPauseMillis=10 \ -XX:+ParallelRefProcEnabled \ -XX:+UnlockExperimentalVMOptions \ - -XX:+AggressiveOpts \ -XX:+DoEscapeAnalysis \ -XX:ParallelGCThreads=32 \ -XX:ConcGCThreads=32 \ -XX:G1NewSizePercent=50 \ -XX:+DisableExplicitGC \ -XX:-ResizePLAB" -DEFAULT_BOOKIE_GC_LOGGING_OPTS="-XX:+PrintGCDetails \ + DEFAULT_BOOKIE_GC_LOGGING_OPTS="-XX:+PrintGCDetails \ -XX:+PrintGCApplicationStoppedTime \ -XX:+UseGCLogFileRotation \ -XX:NumberOfGCLogFiles=5 \ -XX:GCLogFileSize=64m" +fi + BOOKIE_MAX_HEAP_MEMORY=${BOOKIE_MAX_HEAP_MEMORY:-"1g"} BOOKIE_MIN_HEAP_MEMORY=${BOOKIE_MIN_HEAP_MEMORY:-"1g"} BOOKIE_MAX_DIRECT_MEMORY=${BOOKIE_MAX_DIRECT_MEMORY:-"2g"} @@ -103,21 +116,40 @@ BOOKIE_GC_LOGGING_OPTS=${BOOKIE_GC_LOGGING_OPTS:-"${DEFAULT_BOOKIE_GC_LOGGING_OP # default CLI JVM settings DEFAULT_CLI_GC_OPTS="-XX:+UseG1GC \ -XX:MaxGCPauseMillis=10" -DEFAULT_CLI_GC_LOGGING_OPTS="-XX:+PrintGCDetails \ +if [ "$USING_JDK8" -ne "1" ]; then + DEFAULT_CLI_GC_LOGGING_OPTS="" +else + DEFAULT_CLI_GC_LOGGING_OPTS="-XX:+PrintGCDetails \ -XX:+PrintGCApplicationStoppedTime \ -XX:+UseGCLogFileRotation \ -XX:NumberOfGCLogFiles=5 \ -XX:GCLogFileSize=64m" +fi + CLI_MAX_HEAP_MEMORY=${CLI_MAX_HEAP_MEMORY:-"512M"} CLI_MIN_HEAP_MEMORY=${CLI_MIN_HEAP_MEMORY:-"256M"} CLI_MEM_OPTS=${CLI_MEM_OPTS:-"-Xms${CLI_MIN_HEAP_MEMORY} -Xmx${CLI_MAX_HEAP_MEMORY}"} CLI_GC_OPTS=${CLI_GC_OPTS:-"${DEFAULT_CLI_GC_OPTS}"} CLI_GC_LOGGING_OPTS=${CLI_GC_LOGGING_OPTS:-"${DEFAULT_CLI_GC_LOGGING_OPTS}"} +# module names +BOOKIE_SERVER_MODULE_NAME="(org.apache.bookkeeper-)?bookkeeper-server" +TABLE_SERVICE_MODULE_NAME="(org.apache.bookkeeper-)?stream-storage-server" + +is_released_binary() { + if [ -d ${BK_HOME}/lib ]; then + echo "true" + return + else + echo "false" + return + fi +} + find_module_jar_at() { DIR=$1 MODULE=$2 - REGEX="^${MODULE}-[0-9\\.]*(-SNAPSHOT)?.jar$" + REGEX="^${MODULE}-[0-9\\.]*((-[a-zA-Z]*(-[0-9]*)?)|(-SNAPSHOT))?.jar$" if [ -d ${DIR} ]; then cd ${DIR} for f in *.jar; do @@ -129,6 +161,21 @@ find_module_jar_at() { fi } +find_module_release_jar() { + MODULE_NAME=$1 + RELEASE_JAR=$(find_module_jar_at ${BK_HOME} ${MODULE_NAME}) + if [ -n "${RELEASE_JAR}" ]; then + MODULE_JAR=${RELEASE_JAR} + else + RELEASE_JAR=$(find_module_jar_at ${BK_HOME}/lib ${MODULE_NAME}) + if [ -n "${RELEASE_JAR}" ]; then + MODULE_JAR=${RELEASE_JAR} + fi + fi + echo ${RELEASE_JAR} + return +} + find_module_jar() { MODULE_PATH=$1 MODULE_NAME=$2 @@ -146,10 +193,13 @@ find_module_jar() { BUILT_JAR=$(find_module_jar_at ${BK_HOME}/${MODULE_PATH}/target ${MODULE_NAME}) if [ -z "${BUILT_JAR}" ]; then echo "Couldn't find module '${MODULE_NAME}' jar." >&2 - read -p "Do you want me to run \`mvn package -DskiptTests\` for you ? " answer + read -p "Do you want me to run \`mvn install -DskipTests\` for you ? (y|n) " answer case "${answer:0:1}" in y|Y ) - mvn package -DskipTests -Dstream + mkdir -p ${BK_HOME}/logs + output="${BK_HOME}/logs/build.out" + echo "see output at ${output} for the progress ..." >&2 + mvn install -DskipTests &> ${output} ;; * ) exit 1 @@ -182,8 +232,13 @@ add_maven_deps_to_classpath() { # and cache it. Save the file into our target dir so a mvn clean will get # clean it up and force us create a new one. f="${BK_HOME}/${MODULE_PATH}/target/cached_classpath.txt" + output="${BK_HOME}/${MODULE_PATH}/target/build_classpath.out" + if [ ! -f ${f} ]; then - ${MVN} -f "${BK_HOME}/${MODULE_PATH}/pom.xml" -Dstream dependency:build-classpath -Dmdep.outputFile="target/cached_classpath.txt" &> /dev/null + echo "the classpath of module '${MODULE_PATH}' is not found, generating it ..." >&2 + echo "see output at ${output} for the progress ..." >&2 + ${MVN} -f "${BK_HOME}/${MODULE_PATH}/pom.xml" dependency:build-classpath -Dmdep.outputFile="target/cached_classpath.txt" &> ${output} + echo "the classpath of module '${MODULE_PATH}' is generated at '${f}'." >&2 fi } @@ -205,47 +260,117 @@ set_module_classpath() { build_bookie_jvm_opts() { LOG_DIR=$1 GC_LOG_FILENAME=$2 - - echo "$BOOKIE_MEM_OPTS $BOOKIE_GC_OPTS $BOOKIE_GC_LOGGING_OPTS $BOOKIE_PERF_OPTS -Xloggc:${LOG_DIR}/${GC_LOG_FILENAME}" + if [ "$USING_JDK8" -eq "1" ]; then + echo "$BOOKIE_MEM_OPTS $BOOKIE_GC_OPTS $BOOKIE_GC_LOGGING_OPTS $BOOKIE_PERF_OPTS -Xloggc:${LOG_DIR}/${GC_LOG_FILENAME}" + else + echo "$BOOKIE_MEM_OPTS $BOOKIE_GC_OPTS $BOOKIE_GC_LOGGING_OPTS $BOOKIE_PERF_OPTS -Xlog:gc=info:file=${LOG_DIR}/${GC_LOG_FILENAME}::filecount=5,filesize=64m" + fi + return } build_cli_jvm_opts() { LOG_DIR=$1 GC_LOG_FILENAME=$2 - - echo "$CLI_MEM_OPTS $CLI_GC_OPTS $CLI_GC_LOGGING_OPTS -Xloggc:${LOG_DIR}/${GC_LOG_FILENAME}" + if [ "$USING_JDK8" -eq "1" ]; then + echo "$CLI_MEM_OPTS $CLI_GC_OPTS $CLI_GC_LOGGING_OPTS -Xloggc:${LOG_DIR}/${GC_LOG_FILENAME}" + else + echo "$CLI_MEM_OPTS $CLI_GC_OPTS $CLI_GC_LOGGING_OPTS -Xlog:gc=info:file=${LOG_DIR}/${GC_LOG_FILENAME}::filecount=5,filesize=64m" + fi + return } build_netty_opts() { - echo "-Dio.netty.leakDetectionLevel=${NETTY_LEAK_DETECTION_LEVEL} \ - -Dio.netty.recycler.maxCapacity.default=${NETTY_RECYCLER_MAXCAPACITY} \ - -Dio.netty.recycler.linkCapacity=${NETTY_RECYCLER_LINKCAPACITY}" + NETTY_OPTS="-Dio.netty.leakDetectionLevel=${NETTY_LEAK_DETECTION_LEVEL} -Dio.netty.tryReflectionSetAccessible=true" + # --add-opens does not exist on jdk8 + if [ "$USING_JDK8" -eq "0" ]; then + # Enable java.nio.DirectByteBuffer + # https://github.com/netty/netty/blob/4.1/common/src/main/java/io/netty/util/internal/PlatformDependent0.java + # https://github.com/netty/netty/issues/12265 + NETTY_OPTS="$NETTY_OPTS --add-opens java.base/java.nio=ALL-UNNAMED --add-opens java.base/jdk.internal.misc=ALL-UNNAMED" + fi + echo $NETTY_OPTS } build_logging_opts() { CONF_FILE=$1 - LOG_DIR=$2 - LOG_FILE=$3 - LOGGER=$4 + LOG_LEVEL=$2 + LOG_APPENDER=$3 + LOG_DIR=$4 + LOG_FILE=$5 - echo "-Dlog4j.configuration=`basename ${CONF_FILE}` \ - -Dbookkeeper.root.logger=${LOGGER} \ + echo "-Dlog4j.configurationFile=`basename ${CONF_FILE}` \ + -Dbookkeeper.log.root.level=${LOG_LEVEL} \ + -Dbookkeeper.log.root.appender=${LOG_APPENDER} \ -Dbookkeeper.log.dir=${LOG_DIR} \ -Dbookkeeper.log.file=${LOG_FILE}" } build_cli_logging_opts() { CONF_FILE=$1 - LOG_DIR=$2 - LOG_FILE=$3 - LOGGER=$4 + LOG_LEVEL=$2 + LOG_APPENDER=$3 + LOG_DIR=$4 + LOG_FILE=$5 - echo "-Dlog4j.configuration=`basename ${CONF_FILE}` \ - -Dbookkeeper.cli.root.logger=${LOGGER} \ + echo "-Dlog4j.configurationFile=`basename ${CONF_FILE}` \ + -Dbookkeeper.cli.log.root.level=${LOG_LEVEL} \ + -Dbookkeeper.cli.log.root.appender=${LOG_APPENDER} \ -Dbookkeeper.cli.log.dir=${LOG_DIR} \ -Dbookkeeper.cli.log.file=${LOG_FILE}" } build_bookie_opts() { - echo "-Djava.net.preferIPv4Stack=true" + BOOKIE_OPTS="-Djava.net.preferIPv4Stack=true" + # --add-opens does not exist on jdk8 + if [ "$USING_JDK8" -eq "0" ]; then + # enable posix_fadvise usage in the Journal + BOOKIE_OPTS="$BOOKIE_OPTS --add-opens java.base/java.io=ALL-UNNAMED" + # DirectMemoryCRC32Digest + BOOKIE_OPTS="$BOOKIE_OPTS --add-opens java.base/java.util.zip=ALL-UNNAMED" + fi + echo $BOOKIE_OPTS +} + +find_table_service() { + BOOKIE_CONF_TO_CHECK=$1 + SERVICE_COMMAND=$2 + + # check if it is a released binary + IS_RELEASED_BINARY=$(is_released_binary) + + # check if table service is released + TABLE_SERVICE_RELEASED="true" + if [ "x${IS_RELEASED_BINARY}" == "xtrue" ]; then + TABLE_SERVICE_RELEASE_JAR=$(find_module_release_jar ${TABLE_SERVICE_MODULE_NAME}) + if [ "x${TABLE_SERVICE_RELEASE_JAR}" == "x" ]; then + TABLE_SERVICE_RELEASED="false" + fi + fi + + # check the configuration to see if table service is enabled or not. + if [ -z "${ENABLE_TABLE_SERVICE}" ]; then + # mask exit code if the configuration file doesn't contain `StreamStorageLifecycleComponent` + TABLE_SERVICE_SETTING=$(grep StreamStorageLifecycleComponent ${BOOKIE_CONF_TO_CHECK} | cat) + if [[ "${TABLE_SERVICE_SETTING}" =~ ^extraServerComponents.* ]]; then + if [ "x${TABLE_SERVICE_RELEASED}" == "xfalse" ]; then + echo "The release binary is built without table service. Please disable \`StreamStorageLifecycleComponent\` in your bookie configuration at '${BOOKIE_CONF_TO_CHECK}'." + return + fi + ENABLE_TABLE_SERVICE="true" + fi + fi + + # standalone only run + if [ \( "x${SERVICE_COMMAND}" == "xstandalone" \) -a \( "x${TABLE_SERVICE_RELEASED}" == "xfalse" \) ]; then + echo "The release binary is built without table service. Use \`localbookie \` instead of \`standalone\` for local development." + return + fi + + if [ \( "x${SERVICE_COMMAND}" == "xstandalone" \) -o \( "x${ENABLE_TABLE_SERVICE}" == "xtrue" \) ]; then + echo "true" + return + else + echo "false" + return + fi } diff --git a/bin/dlog b/bin/dlog index 9a9b8f16131..2e8c2a32add 100755 --- a/bin/dlog +++ b/bin/dlog @@ -38,8 +38,8 @@ DLOG_CLASSPATH=$(set_module_classpath ${DLOG_MODULE_PATH}) # default variables DEFAULT_CONF=${BK_HOME}/conf/bk_server.conf DEFAULT_CLI_CONF=${BK_HOME}/conf/bk_server.conf -DEFAULT_LOG_CONF=${BK_HOME}/conf/log4j.properties -DEFAULT_CLI_LOG_CONF=${BK_HOME}/conf/log4j.cli.properties +DEFAULT_LOG_CONF=${BK_HOME}/conf/log4j2.xml +DEFAULT_CLI_LOG_CONF=${BK_HOME}/conf/log4j2.cli.xml dlog_help() { cat < (, )+ - -# DEFAULT: console appender only -log4j.rootLogger=ERROR, CONSOLE - -# Example with rolling log file -#log4j.rootLogger=DEBUG, CONSOLE, ROLLINGFILE - -# Example with rolling log file and tracing -#log4j.rootLogger=TRACE, CONSOLE, ROLLINGFILE, TRACEFILE - -# -# Log INFO level and above messages to the console -# -log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender -log4j.appender.CONSOLE.Threshold=INFO -log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout -log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} - %-5p - [%t:%C{1}@%L] - %m%n -log4j.logger.org.apache.bookkeeper.benchmark=INFO - -# -# Add ROLLINGFILE to rootLogger to get log file output -# Log DEBUG level and above messages to a log file -log4j.appender.ROLLINGFILE=org.apache.log4j.DailyRollingFileAppender -log4j.appender.ROLLINGFILE.Threshold=DEBUG -log4j.appender.ROLLINGFILE.File=bookkeeper-benchmark.log -log4j.appender.ROLLINGFILE.layout=org.apache.log4j.PatternLayout -log4j.appender.ROLLINGFILE.layout.ConversionPattern=%d{ISO8601} - %-5p - [%t:%C{1}@%L] - %m%n - -# Max log file size of 10MB -log4j.appender.ROLLINGFILE.MaxFileSize=10MB -# uncomment the next line to limit number of backup files -#log4j.appender.ROLLINGFILE.MaxBackupIndex=10 - -log4j.appender.ROLLINGFILE.layout=org.apache.log4j.PatternLayout -log4j.appender.ROLLINGFILE.layout.ConversionPattern=%d{ISO8601} - %-5p [%t:%C{1}@%L] - %m%n - - -# -# Add TRACEFILE to rootLogger to get log file output -# Log DEBUG level and above messages to a log file -log4j.appender.TRACEFILE=org.apache.log4j.FileAppender -log4j.appender.TRACEFILE.Threshold=TRACE -log4j.appender.TRACEFILE.File=bookkeeper_trace.log - -log4j.appender.TRACEFILE.layout=org.apache.log4j.PatternLayout -### Notice we are including log4j's NDC here (%x) -log4j.appender.TRACEFILE.layout.ConversionPattern=%d{ISO8601} - %-5p [%t:%C{1}@%L][%x] - %m%n diff --git a/bookkeeper-benchmark/conf/log4j2.xml b/bookkeeper-benchmark/conf/log4j2.xml new file mode 100644 index 00000000000..21257d6dc3a --- /dev/null +++ b/bookkeeper-benchmark/conf/log4j2.xml @@ -0,0 +1,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/bookkeeper-benchmark/pom.xml b/bookkeeper-benchmark/pom.xml index 163c1bd5f26..72322e9b977 100644 --- a/bookkeeper-benchmark/pom.xml +++ b/bookkeeper-benchmark/pom.xml @@ -21,7 +21,7 @@ bookkeeper org.apache.bookkeeper - 4.9.0-SNAPSHOT + 4.18.0-SNAPSHOT org.apache.bookkeeper bookkeeper-benchmark @@ -43,22 +43,46 @@ + + org.apache.bookkeeper + bookkeeper-server + ${project.version} + + + org.apache.bookkeeper + testtools + ${project.parent.version} + test + org.apache.zookeeper zookeeper test-jar test + + + org.xerial.snappy + snappy-java + test + + + + io.dropwizard.metrics + metrics-core + test + org.apache.bookkeeper - bookkeeper-server - ${project.parent.version} - compile + bookkeeper-common + ${project.version} + test-jar + test org.apache.bookkeeper bookkeeper-server - ${project.parent.version} + ${project.version} test-jar test diff --git a/bookkeeper-benchmark/src/main/java/org/apache/bookkeeper/benchmark/BenchBookie.java b/bookkeeper-benchmark/src/main/java/org/apache/bookkeeper/benchmark/BenchBookie.java index 216aff9890b..a4d2ae99050 100644 --- a/bookkeeper-benchmark/src/main/java/org/apache/bookkeeper/benchmark/BenchBookie.java +++ b/bookkeeper-benchmark/src/main/java/org/apache/bookkeeper/benchmark/BenchBookie.java @@ -20,6 +20,7 @@ package org.apache.bookkeeper.benchmark; import io.netty.buffer.ByteBuf; +import io.netty.buffer.PooledByteBufAllocator; import io.netty.buffer.Unpooled; import io.netty.channel.EventLoopGroup; import io.netty.channel.epoll.EpollEventLoopGroup; @@ -34,6 +35,7 @@ import org.apache.bookkeeper.client.api.WriteFlag; import org.apache.bookkeeper.common.util.OrderedExecutor; import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.proto.BookieClient; import org.apache.bookkeeper.proto.BookieClientImpl; @@ -62,7 +64,7 @@ static class LatencyCallback implements WriteCallback { boolean complete; @Override public synchronized void writeComplete(int rc, long ledgerId, long entryId, - BookieSocketAddress addr, Object ctx) { + BookieId addr, Object ctx) { if (rc != 0) { LOG.error("Got error " + rc); } @@ -82,8 +84,9 @@ public synchronized void waitForComplete() throws InterruptedException { static class ThroughputCallback implements WriteCallback { int count; int waitingCount = Integer.MAX_VALUE; + @Override public synchronized void writeComplete(int rc, long ledgerId, long entryId, - BookieSocketAddress addr, Object ctx) { + BookieId addr, Object ctx) { if (rc != 0) { LOG.error("Got error " + rc); } @@ -175,7 +178,8 @@ public static void main(String[] args) new DefaultThreadFactory("BookKeeperClientScheduler")); ClientConfiguration conf = new ClientConfiguration(); - BookieClient bc = new BookieClientImpl(conf, eventLoop, executor, scheduler, NullStatsLogger.INSTANCE); + BookieClient bc = new BookieClientImpl(conf, eventLoop, PooledByteBufAllocator.DEFAULT, executor, scheduler, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); LatencyCallback lc = new LatencyCallback(); ThroughputCallback tc = new ThroughputCallback(); @@ -188,7 +192,7 @@ public static void main(String[] args) toSend.writeLong(ledger); toSend.writeLong(entry); toSend.writerIndex(toSend.capacity()); - bc.addEntry(new BookieSocketAddress(addr, port), ledger, new byte[20], + bc.addEntry(new BookieSocketAddress(addr, port).toBookieId(), ledger, new byte[20], entry, ByteBufList.get(toSend), tc, null, BookieProtocol.FLAG_NONE, false, WriteFlag.NONE); } @@ -206,7 +210,7 @@ public static void main(String[] args) toSend.writeLong(entry); toSend.writerIndex(toSend.capacity()); lc.resetComplete(); - bc.addEntry(new BookieSocketAddress(addr, port), ledger, new byte[20], + bc.addEntry(new BookieSocketAddress(addr, port).toBookieId(), ledger, new byte[20], entry, ByteBufList.get(toSend), lc, null, BookieProtocol.FLAG_NONE, false, WriteFlag.NONE); lc.waitForComplete(); @@ -225,7 +229,7 @@ public static void main(String[] args) toSend.writeLong(ledger); toSend.writeLong(entry); toSend.writerIndex(toSend.capacity()); - bc.addEntry(new BookieSocketAddress(addr, port), ledger, new byte[20], + bc.addEntry(new BookieSocketAddress(addr, port).toBookieId(), ledger, new byte[20], entry, ByteBufList.get(toSend), tc, null, BookieProtocol.FLAG_NONE, false, WriteFlag.NONE); } diff --git a/bookkeeper-benchmark/src/main/java/org/apache/bookkeeper/benchmark/BenchReadThroughputLatency.java b/bookkeeper-benchmark/src/main/java/org/apache/bookkeeper/benchmark/BenchReadThroughputLatency.java index 18c73e88e1b..bfff2ed53d1 100644 --- a/bookkeeper-benchmark/src/main/java/org/apache/bookkeeper/benchmark/BenchReadThroughputLatency.java +++ b/bookkeeper-benchmark/src/main/java/org/apache/bookkeeper/benchmark/BenchReadThroughputLatency.java @@ -19,8 +19,9 @@ */ package org.apache.bookkeeper.benchmark; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.util.ArrayList; import java.util.Comparator; import java.util.Enumeration; @@ -35,6 +36,7 @@ import org.apache.bookkeeper.client.LedgerEntry; import org.apache.bookkeeper.client.LedgerHandle; import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.util.StringUtils; import org.apache.bookkeeper.zookeeper.ZooKeeperClient; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; @@ -72,12 +74,12 @@ public int compare(String o1, String o2) { } }; - private static void readLedger(ClientConfiguration conf, long ledgerId, byte[] passwd) { + private static void readLedger(ClientConfiguration conf, long ledgerId, byte[] passwd, int batchEntries) { LOG.info("Reading ledger {}", ledgerId); BookKeeper bk = null; long time = 0; long entriesRead = 0; - long lastRead = 0; + long lastRead = -1; int nochange = 0; long absoluteLimit = 5000000; @@ -88,7 +90,7 @@ private static void readLedger(ClientConfiguration conf, long ledgerId, byte[] p lh = bk.openLedgerNoRecovery(ledgerId, BookKeeper.DigestType.CRC32, passwd); long lastConfirmed = Math.min(lh.getLastAddConfirmed(), absoluteLimit); - if (lastConfirmed == lastRead) { + if (lastConfirmed <= lastRead + 1) { nochange++; if (nochange == 10) { break; @@ -103,15 +105,21 @@ private static void readLedger(ClientConfiguration conf, long ledgerId, byte[] p while (lastRead < lastConfirmed) { long nextLimit = lastRead + 100000; - long readTo = Math.min(nextLimit, lastConfirmed); - Enumeration entries = lh.readEntries(lastRead + 1, readTo); - lastRead = readTo; + Enumeration entries; + if (batchEntries <= 0) { + long readTo = Math.min(nextLimit, lastConfirmed); + entries = lh.readEntries(lastRead + 1, readTo); + } else { + entries = lh.batchReadEntries(lastRead + 1, batchEntries, -1); + } while (entries.hasMoreElements()) { LedgerEntry e = entries.nextElement(); entriesRead++; + lastRead = e.getEntryId(); if ((entriesRead % 10000) == 0) { - LOG.info("{} entries read", entriesRead); + LOG.info("{} entries read from ledger {}", entriesRead, ledgerId); } + e.getEntryBuffer().release(); } } long endtime = System.nanoTime(); @@ -147,15 +155,22 @@ private static void usage(Options options) { } @SuppressWarnings("deprecation") + @SuppressFBWarnings("RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption("ledger", true, "Ledger to read. If empty, read all ledgers which come available. " + " Cannot be used with -listen"); + //How to generate ledger node path. + options.addOption("ledgerManagerType", true, "The ledger manager type. " + + "The optional value: flat, hierarchical, legacyHierarchical, longHierarchical. Default: flat"); options.addOption("listen", true, "Listen for creation of ledgers, and read each one fully"); options.addOption("password", true, "Password used to access ledgers (default 'benchPasswd')"); options.addOption("zookeeper", true, "Zookeeper ensemble, default \"localhost:2181\""); options.addOption("sockettimeout", true, "Socket timeout for bookkeeper client. In seconds. Default 5"); + options.addOption("useV2", false, "Whether use V2 protocol to read ledgers from the bookie server."); options.addOption("help", false, "This message"); + options.addOption("batchentries", true, "The batch read entries count. " + + "If the value is greater than 0, uses batch read. Or uses the single read. Default 1000"); CommandLineParser parser = new PosixParser(); CommandLine cmd = parser.parse(options, args); @@ -168,6 +183,7 @@ public static void main(String[] args) throws Exception { final String servers = cmd.getOptionValue("zookeeper", "localhost:2181"); final byte[] passwd = cmd.getOptionValue("password", "benchPasswd").getBytes(UTF_8); final int sockTimeout = Integer.parseInt(cmd.getOptionValue("sockettimeout", "5")); + final int batchentries = Integer.parseInt(cmd.getOptionValue("batchentries", "1000")); if (cmd.hasOption("ledger") && cmd.hasOption("listen")) { LOG.error("Cannot used -ledger and -listen together"); usage(options); @@ -187,11 +203,29 @@ public static void main(String[] args) throws Exception { } final CountDownLatch shutdownLatch = new CountDownLatch(1); - final String nodepath = String.format("/ledgers/L%010d", ledger.get()); + + String ledgerManagerType = cmd.getOptionValue("ledgerManagerType", "flat"); + String nodepath; + if ("flat".equals(ledgerManagerType)) { + nodepath = String.format("/ledgers/L%010d", ledger.get()); + } else if ("hierarchical".equals(ledgerManagerType)) { + nodepath = String.format("/ledgers%s", StringUtils.getHybridHierarchicalLedgerPath(ledger.get())); + } else if ("legacyHierarchical".equals(ledgerManagerType)) { + nodepath = String.format("/ledgers%s", StringUtils.getShortHierarchicalLedgerPath(ledger.get())); + } else if ("longHierarchical".equals(ledgerManagerType)) { + nodepath = String.format("/ledgers%s", StringUtils.getLongHierarchicalLedgerPath(ledger.get())); + } else { + LOG.warn("Unknown ledger manager type: {}, use flat as the value", ledgerManagerType); + nodepath = String.format("/ledgers/L%010d", ledger.get()); + } final ClientConfiguration conf = new ClientConfiguration(); conf.setReadTimeout(sockTimeout).setZkServers(servers); + if (cmd.hasOption("useV2")) { + conf.setUseV2WireProtocol(true); + } + try (ZooKeeperClient zk = ZooKeeperClient.newBuilder() .connectString(servers) .sessionTimeoutMs(3000) @@ -203,7 +237,7 @@ public void process(WatchedEvent event) { try { if (event.getType() == Event.EventType.NodeCreated && event.getPath().equals(nodepath)) { - readLedger(conf, ledger.get(), passwd); + readLedger(conf, ledger.get(), passwd, batchentries); shutdownLatch.countDown(); } else if (event.getType() == Event.EventType.NodeChildrenChanged) { if (numLedgers.get() < 0) { @@ -229,7 +263,7 @@ public void process(WatchedEvent event) { Thread t = new Thread() { @Override public void run() { - readLedger(conf, ledgerId, passwd); + readLedger(conf, ledgerId, passwd, batchentries); } }; t.start(); @@ -252,7 +286,7 @@ public void run() { if (ledger.get() != 0) { if (zk.exists(nodepath, true) != null) { - readLedger(conf, ledger.get(), passwd); + readLedger(conf, ledger.get(), passwd, batchentries); shutdownLatch.countDown(); } else { LOG.info("Watching for creation of" + nodepath); diff --git a/bookkeeper-benchmark/src/main/java/org/apache/bookkeeper/benchmark/BenchThroughputLatency.java b/bookkeeper-benchmark/src/main/java/org/apache/bookkeeper/benchmark/BenchThroughputLatency.java index 08dc00266c0..9eeae28f1a3 100644 --- a/bookkeeper-benchmark/src/main/java/org/apache/bookkeeper/benchmark/BenchThroughputLatency.java +++ b/bookkeeper-benchmark/src/main/java/org/apache/bookkeeper/benchmark/BenchThroughputLatency.java @@ -19,7 +19,7 @@ */ package org.apache.bookkeeper.benchmark; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.bookkeeper.util.BookKeeperConstants.AVAILABLE_NODE; import java.io.BufferedOutputStream; @@ -64,13 +64,13 @@ public class BenchThroughputLatency implements AddCallback, Runnable { static final Logger LOG = LoggerFactory.getLogger(BenchThroughputLatency.class); BookKeeper bk; - LedgerHandle lh[]; + LedgerHandle[] lh; AtomicLong counter; Semaphore sem; int numberOfLedgers = 1; final int sendLimit; - final long latencies[]; + final long[] latencies; static class Context { long localStartTime; @@ -99,7 +99,9 @@ public BenchThroughputLatency(int ensemble, int writeQuorumSize, int ackQuorumSi ackQuorumSize, BookKeeper.DigestType.CRC32, passwd); - LOG.debug("Ledger Handle: " + lh[i].getId()); + if (LOG.isDebugEnabled()) { + LOG.debug("Ledger Handle: " + lh[i].getId()); + } } } catch (BKException e) { e.printStackTrace(); @@ -109,15 +111,17 @@ public BenchThroughputLatency(int ensemble, int writeQuorumSize, int ackQuorumSi Random rand = new Random(); public void close() throws InterruptedException, BKException { for (int i = 0; i < numberOfLedgers; i++) { - lh[i].close(); + if (lh[i] != null) { + lh[i].close(); + } } bk.close(); } long previous = 0; - byte bytes[]; + byte[] bytes; - void setEntryData(byte data[]) { + void setEntryData(byte[] data) { bytes = data; } @@ -134,6 +138,7 @@ public synchronized long getDuration() { return duration; } + @Override public void run() { LOG.info("Running..."); long start = previous = System.currentTimeMillis(); @@ -141,6 +146,7 @@ public void run() { int sent = 0; Thread reporter = new Thread() { + @Override public void run() { try { while (true) { @@ -253,6 +259,8 @@ public static void main(String[] args) options.addOption("skipwarmup", false, "Skip warm up, default false"); options.addOption("sendlimit", true, "Max number of entries to send. Default 20000000"); options.addOption("latencyFile", true, "File to dump latencies. Default is latencyDump.dat"); + options.addOption("useV2", false, "Whether use V2 protocol to send requests to the bookie server."); + options.addOption("warmupMessages", true, "Number of messages to warm up. Default 10000"); options.addOption("help", false, "This message"); CommandLineParser parser = new PosixParser(); @@ -277,6 +285,7 @@ public static void main(String[] args) } int throttle = Integer.parseInt(cmd.getOptionValue("throttle", "10000")); int sendLimit = Integer.parseInt(cmd.getOptionValue("sendlimit", "20000000")); + int warmupMessages = Integer.parseInt(cmd.getOptionValue("warmupMessages", "10000")); final int sockTimeout = Integer.parseInt(cmd.getOptionValue("sockettimeout", "5")); @@ -290,6 +299,7 @@ public static void main(String[] args) final long timeout = Long.parseLong(cmd.getOptionValue("timeout", "360")) * 1000; timeouter.schedule(new TimerTask() { + @Override public void run() { System.err.println("Timing out benchmark after " + timeout + "ms"); System.exit(-1); @@ -310,17 +320,21 @@ public void run() { // Do a warmup run Thread thread; - byte data[] = new byte[entrysize]; + byte[] data = new byte[entrysize]; Arrays.fill(data, (byte) 'x'); ClientConfiguration conf = new ClientConfiguration(); conf.setThrottleValue(throttle).setReadTimeout(sockTimeout).setZkServers(servers); + if (cmd.hasOption("useV2")) { + conf.setUseV2WireProtocol(true); + } + if (!cmd.hasOption("skipwarmup")) { long throughput; LOG.info("Starting warmup"); - throughput = warmUp(data, ledgers, ensemble, quorum, passwd, conf); + throughput = warmUp(data, ledgers, ensemble, quorum, passwd, warmupMessages, conf); LOG.info("Warmup tp: " + throughput); LOG.info("Warmup phase finished"); } @@ -401,7 +415,7 @@ public void process(WatchedEvent event) { OutputStream fos = new BufferedOutputStream(new FileOutputStream(latencyFile)); for (Long l: latency) { - fos.write((Long.toString(l) + "\t" + (l / 1000000) + "ms\n").getBytes(UTF_8)); + fos.write((l + "\t" + (l / 1000000) + "ms\n").getBytes(UTF_8)); } fos.flush(); fos.close(); @@ -416,14 +430,10 @@ public void process(WatchedEvent event) { private static double percentile(long[] latency, int percentile) { int size = latency.length; - int sampleSize = (size * percentile) / 100; - long total = 0; - int count = 0; - for (int i = 0; i < sampleSize; i++) { - total += latency[i]; - count++; - } - return ((double) total / (double) count) / 1000000.0; + double percent = (double) percentile / 100; + int index = (int) (size * percent); + double lat = index > 0 ? (double) latency[index - 1] / 1000000.0 : 0.0; + return lat; } /** @@ -432,7 +442,7 @@ private static double percentile(long[] latency, int percentile) { *

TODO: update benchmark to use metadata service uri {@link https://github.com/apache/bookkeeper/issues/1331} */ private static long warmUp(byte[] data, int ledgers, int ensemble, int qSize, - byte[] passwd, ClientConfiguration conf) + byte[] passwd, int warmupMessages, ClientConfiguration conf) throws KeeperException, IOException, InterruptedException, BKException { final CountDownLatch connectLatch = new CountDownLatch(1); final int bookies; @@ -459,7 +469,7 @@ public void process(WatchedEvent event) { } BenchThroughputLatency warmup = new BenchThroughputLatency(bookies, bookies, bookies, passwd, - ledgers, 10000, conf); + ledgers, warmupMessages, conf); warmup.setEntryData(data); Thread thread = new Thread(warmup); thread.start(); diff --git a/bookkeeper-benchmark/src/main/java/org/apache/bookkeeper/benchmark/TestClient.java b/bookkeeper-benchmark/src/main/java/org/apache/bookkeeper/benchmark/TestClient.java index 3d96c5c95d2..9f1b225fa20 100644 --- a/bookkeeper-benchmark/src/main/java/org/apache/bookkeeper/benchmark/TestClient.java +++ b/bookkeeper-benchmark/src/main/java/org/apache/bookkeeper/benchmark/TestClient.java @@ -20,7 +20,7 @@ */ package org.apache.bookkeeper.benchmark; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import java.io.FileOutputStream; import java.io.IOException; @@ -103,6 +103,7 @@ public static void main(String[] args) throws ParseException { final long timeout = Long.parseLong(cmd.getOptionValue("timeout", "360")) * 1000; timeouter.schedule(new TimerTask() { + @Override public void run() { System.err.println("Timing out benchmark after " + timeout + "ms"); System.exit(-1); @@ -209,6 +210,7 @@ static class FileClient implements Callable { this.r = new Random(System.identityHashCode(this)); } + @Override public Long call() { try { long count = 0; @@ -253,6 +255,7 @@ static class BKClient implements Callable, AddCallback { this.sync = sync; } + @Override public Long call() { try { long start = System.currentTimeMillis(); diff --git a/bookkeeper-benchmark/src/test/java/org/apache/bookkeeper/benchmark/TestBenchmark.java b/bookkeeper-benchmark/src/test/java/org/apache/bookkeeper/benchmark/TestBenchmark.java index 8678017d882..9813a97536f 100644 --- a/bookkeeper-benchmark/src/test/java/org/apache/bookkeeper/benchmark/TestBenchmark.java +++ b/bookkeeper-benchmark/src/test/java/org/apache/bookkeeper/benchmark/TestBenchmark.java @@ -68,7 +68,8 @@ public void testThroughputLatency() throws Exception { @Test public void testBookie() throws Exception { - BookieSocketAddress bookie = getBookie(0); + BookieSocketAddress bookie = serverByIndex(0).getLocalAddress(); + BenchBookie.main(new String[] { "--host", bookie.getSocketAddress().getHostName(), "--port", String.valueOf(bookie.getPort()), @@ -97,7 +98,7 @@ public void run() { t.start(); Thread.sleep(10000); - byte data[] = new byte[1024]; + byte[] data = new byte[1024]; Arrays.fill(data, (byte) 'x'); long lastLedgerId = 0; diff --git a/bookkeeper-benchmark/src/test/resources/log4j.properties b/bookkeeper-benchmark/src/test/resources/log4j.properties deleted file mode 100644 index c0d9a42134e..00000000000 --- a/bookkeeper-benchmark/src/test/resources/log4j.properties +++ /dev/null @@ -1,72 +0,0 @@ -# -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# - -# -# Bookkeeper Logging Configuration -# - -# Format is " (, )+ - -# DEFAULT: console appender only -log4j.rootLogger=INFO, CONSOLE - -# Example with rolling log file -#log4j.rootLogger=DEBUG, CONSOLE, ROLLINGFILE - -# Example with rolling log file and tracing -#log4j.rootLogger=TRACE, CONSOLE, ROLLINGFILE, TRACEFILE - -# -# Log INFO level and above messages to the console -# -log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender -log4j.appender.CONSOLE.Threshold=INFO -log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout -log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} - %-5p - [%t:%C{1}@%L] - %m%n - -# -# Add ROLLINGFILE to rootLogger to get log file output -# Log DEBUG level and above messages to a log file -log4j.appender.ROLLINGFILE=org.apache.log4j.DailyRollingFileAppender -log4j.appender.ROLLINGFILE.Threshold=DEBUG -log4j.appender.ROLLINGFILE.File=bookkeeper-benchmark.log -log4j.appender.ROLLINGFILE.layout=org.apache.log4j.PatternLayout -log4j.appender.ROLLINGFILE.layout.ConversionPattern=%d{ISO8601} - %-5p - [%t:%C{1}@%L] - %m%n - -# Max log file size of 10MB -log4j.appender.ROLLINGFILE.MaxFileSize=10MB -# uncomment the next line to limit number of backup files -#log4j.appender.ROLLINGFILE.MaxBackupIndex=10 - -log4j.appender.ROLLINGFILE.layout=org.apache.log4j.PatternLayout -log4j.appender.ROLLINGFILE.layout.ConversionPattern=%d{ISO8601} - %-5p [%t:%C{1}@%L] - %m%n - - -# -# Add TRACEFILE to rootLogger to get log file output -# Log DEBUG level and above messages to a log file -log4j.appender.TRACEFILE=org.apache.log4j.FileAppender -log4j.appender.TRACEFILE.Threshold=TRACE -log4j.appender.TRACEFILE.File=bookkeeper_trace.log - -log4j.appender.TRACEFILE.layout=org.apache.log4j.PatternLayout -### Notice we are including log4j's NDC here (%x) -log4j.appender.TRACEFILE.layout.ConversionPattern=%d{ISO8601} - %-5p [%t:%C{1}@%L][%x] - %m%n diff --git a/bookkeeper-common-allocator/pom.xml b/bookkeeper-common-allocator/pom.xml new file mode 100644 index 00000000000..ecc5699fbde --- /dev/null +++ b/bookkeeper-common-allocator/pom.xml @@ -0,0 +1,60 @@ + + + + 4.0.0 + + org.apache.bookkeeper + bookkeeper + 4.18.0-SNAPSHOT + + bookkeeper-common-allocator + Apache BookKeeper :: Common :: Allocator + + + io.netty + netty-buffer + + + + + + com.github.spotbugs + spotbugs-maven-plugin + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + + diff --git a/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/ByteBufAllocatorBuilder.java b/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/ByteBufAllocatorBuilder.java new file mode 100644 index 00000000000..3e36a23d170 --- /dev/null +++ b/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/ByteBufAllocatorBuilder.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.common.allocator; + +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.PooledByteBufAllocator; +import io.netty.buffer.UnpooledByteBufAllocator; +import java.util.function.Consumer; +import org.apache.bookkeeper.common.allocator.impl.ByteBufAllocatorBuilderImpl; + +/** + * Builder object to customize a ByteBuf allocator. + */ +public interface ByteBufAllocatorBuilder { + /** + * Creates a new {@link ByteBufAllocatorBuilder}. + */ + static ByteBufAllocatorBuilder create() { + return new ByteBufAllocatorBuilderImpl(); + } + + /** + * Finalize the configured {@link ByteBufAllocator}. + */ + ByteBufAllocatorWithOomHandler build(); + + /** + * Specify a custom allocator where the allocation requests should be + * forwarded to. + * + *

Default is to use a new instance of {@link PooledByteBufAllocator}. + */ + ByteBufAllocatorBuilder pooledAllocator(ByteBufAllocator pooledAllocator); + + /** + * Specify a custom allocator where the allocation requests should be + * forwarded to. + * + *

Default is to use {@link UnpooledByteBufAllocator#DEFAULT}. + */ + ByteBufAllocatorBuilder unpooledAllocator(ByteBufAllocator unpooledAllocator); + + /** + * Define the memory pooling policy. + * + *

Default is {@link PoolingPolicy#PooledDirect} + */ + ByteBufAllocatorBuilder poolingPolicy(PoolingPolicy policy); + + /** + * Controls the amount of concurrency for the memory pool. + * + *

Default is to have a number of allocator arenas equals to 2 * CPUS. + * + *

Decreasing this number will reduce the amount of memory overhead, at the + * expense of increased allocation contention. + */ + ByteBufAllocatorBuilder poolingConcurrency(int poolingConcurrency); + + /** + * Define the OutOfMemory handling policy. + * + *

Default is {@link OutOfMemoryPolicy#FallbackToHeap} + */ + ByteBufAllocatorBuilder outOfMemoryPolicy(OutOfMemoryPolicy policy); + + /** + * Add a listener that is triggered whenever there is an allocation failure. + * + *

Application can use this to trigger alerting or process restarting. + */ + ByteBufAllocatorBuilder outOfMemoryListener(Consumer listener); + + /** + * Enable the leak detection for the allocator. + * + *

Default is {@link LeakDetectionPolicy#Disabled} + */ + ByteBufAllocatorBuilder leakDetectionPolicy(LeakDetectionPolicy leakDetectionPolicy); +} diff --git a/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/ByteBufAllocatorWithOomHandler.java b/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/ByteBufAllocatorWithOomHandler.java new file mode 100644 index 00000000000..209386d16dc --- /dev/null +++ b/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/ByteBufAllocatorWithOomHandler.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.common.allocator; + +import io.netty.buffer.ByteBufAllocator; +import java.util.function.Consumer; + +/** + * A ByteBufAllocatr interface with a OOM handler. + */ +public interface ByteBufAllocatorWithOomHandler extends ByteBufAllocator { + void setOomHandler(Consumer handler); +} diff --git a/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/LeakDetectionPolicy.java b/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/LeakDetectionPolicy.java new file mode 100644 index 00000000000..90d19a4aeef --- /dev/null +++ b/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/LeakDetectionPolicy.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.common.allocator; + +import lombok.extern.slf4j.Slf4j; + +/** + * Define the policy for the Netty leak detector. + */ +@Slf4j +public enum LeakDetectionPolicy { + + /** + * No leak detection and no overhead. + */ + Disabled, + + /** + * Instruments 1% of the allocated buffer to track for leaks. + */ + Simple, + + /** + * Instruments 1% of the allocated buffer to track for leaks, reporting + * stack traces of places where the buffer was used. + */ + Advanced, + + /** + * Instruments 100% of the allocated buffer to track for leaks, reporting + * stack traces of places where the buffer was used. Introduce very + * significant overhead. + */ + Paranoid; + + public static LeakDetectionPolicy parseLevel(String levelStr) { + String trimmedLevelStr = levelStr.trim(); + for (LeakDetectionPolicy policy : values()) { + if (trimmedLevelStr.equalsIgnoreCase(policy.name())) { + return policy; + } + } + log.warn("Parse leak detection policy level {} failed. Use the default level: {}", levelStr, + LeakDetectionPolicy.Disabled.name()); + return LeakDetectionPolicy.Disabled; + } +} diff --git a/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/OutOfMemoryPolicy.java b/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/OutOfMemoryPolicy.java new file mode 100644 index 00000000000..d566ca8b87c --- /dev/null +++ b/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/OutOfMemoryPolicy.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.common.allocator; + +/** + * Represents the action to take when it's not possible to allocate memory. + */ +public enum OutOfMemoryPolicy { + + /** + * Throw regular OOM exception without taking addition actions. + */ + ThrowException, + + /** + * If it's not possible to allocate a buffer from direct memory, fallback to + * allocate an unpooled buffer from JVM heap. + * + *

This will help absorb memory allocation spikes because the heap + * allocations will naturally slow down the process and will result if full + * GC cleanup if the Heap itself is full. + */ + FallbackToHeap, +} diff --git a/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/PoolingPolicy.java b/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/PoolingPolicy.java new file mode 100644 index 00000000000..40917b42a1c --- /dev/null +++ b/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/PoolingPolicy.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.common.allocator; + +/** + * Define a policy for allocating buffers. + */ +public enum PoolingPolicy { + + /** + * Allocate memory from JVM heap without any pooling. + * + *

This option has the least overhead in terms of memory usage since the + * memory will be automatically reclaimed by the JVM GC but might impose a + * performance penalty at high throughput. + */ + UnpooledHeap, + + /** + * Use Direct memory for all buffers and pool the memory. + * + *

Direct memory will avoid the overhead of JVM GC and most memory copies + * when reading and writing to socket channel. + * + *

Pooling will add memory space overhead due to the fact that there will be + * fragmentation in the allocator and that threads will keep a portion of + * memory as thread-local to avoid contention when possible. + */ + PooledDirect +} diff --git a/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/impl/ByteBufAllocatorBuilderImpl.java b/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/impl/ByteBufAllocatorBuilderImpl.java new file mode 100644 index 00000000000..69c57232aff --- /dev/null +++ b/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/impl/ByteBufAllocatorBuilderImpl.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.common.allocator.impl; + +import io.netty.buffer.ByteBufAllocator; +import java.util.function.Consumer; +import org.apache.bookkeeper.common.allocator.ByteBufAllocatorBuilder; +import org.apache.bookkeeper.common.allocator.ByteBufAllocatorWithOomHandler; +import org.apache.bookkeeper.common.allocator.LeakDetectionPolicy; +import org.apache.bookkeeper.common.allocator.OutOfMemoryPolicy; +import org.apache.bookkeeper.common.allocator.PoolingPolicy; + +/** + * Implementation of {@link ByteBufAllocatorBuilder}. + */ +public class ByteBufAllocatorBuilderImpl implements ByteBufAllocatorBuilder { + + ByteBufAllocator pooledAllocator = null; + ByteBufAllocator unpooledAllocator = null; + PoolingPolicy poolingPolicy = PoolingPolicy.PooledDirect; + int poolingConcurrency = 2 * Runtime.getRuntime().availableProcessors(); + OutOfMemoryPolicy outOfMemoryPolicy = OutOfMemoryPolicy.FallbackToHeap; + Consumer outOfMemoryListener = null; + LeakDetectionPolicy leakDetectionPolicy = LeakDetectionPolicy.Disabled; + + @Override + public ByteBufAllocatorWithOomHandler build() { + return new ByteBufAllocatorImpl(pooledAllocator, unpooledAllocator, poolingPolicy, poolingConcurrency, + outOfMemoryPolicy, outOfMemoryListener, leakDetectionPolicy); + } + + @Override + public ByteBufAllocatorBuilder pooledAllocator(ByteBufAllocator pooledAllocator) { + this.pooledAllocator = pooledAllocator; + return this; + } + + @Override + public ByteBufAllocatorBuilder unpooledAllocator(ByteBufAllocator unpooledAllocator) { + this.unpooledAllocator = unpooledAllocator; + return this; + } + + @Override + public ByteBufAllocatorBuilder poolingPolicy(PoolingPolicy policy) { + this.poolingPolicy = policy; + return this; + } + + @Override + public ByteBufAllocatorBuilder poolingConcurrency(int poolingConcurrency) { + this.poolingConcurrency = poolingConcurrency; + return this; + } + + @Override + public ByteBufAllocatorBuilder outOfMemoryPolicy(OutOfMemoryPolicy policy) { + this.outOfMemoryPolicy = policy; + return this; + } + + @Override + public ByteBufAllocatorBuilder outOfMemoryListener(Consumer listener) { + this.outOfMemoryListener = listener; + return this; + } + + @Override + public ByteBufAllocatorBuilder leakDetectionPolicy(LeakDetectionPolicy leakDetectionPolicy) { + this.leakDetectionPolicy = leakDetectionPolicy; + return this; + } + +} diff --git a/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/impl/ByteBufAllocatorImpl.java b/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/impl/ByteBufAllocatorImpl.java new file mode 100644 index 00000000000..87582cca92c --- /dev/null +++ b/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/impl/ByteBufAllocatorImpl.java @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.common.allocator.impl; + +import io.netty.buffer.AbstractByteBufAllocator; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.PooledByteBufAllocator; +import io.netty.buffer.UnpooledByteBufAllocator; +import io.netty.util.ResourceLeakDetector; +import io.netty.util.ResourceLeakDetector.Level; +import java.util.function.Consumer; +import org.apache.bookkeeper.common.allocator.ByteBufAllocatorWithOomHandler; +import org.apache.bookkeeper.common.allocator.LeakDetectionPolicy; +import org.apache.bookkeeper.common.allocator.OutOfMemoryPolicy; +import org.apache.bookkeeper.common.allocator.PoolingPolicy; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Implementation of {@link ByteBufAllocator}. + */ +public class ByteBufAllocatorImpl extends AbstractByteBufAllocator implements ByteBufAllocatorWithOomHandler { + + private static final Logger log = LoggerFactory.getLogger(ByteBufAllocatorImpl.class); + + // Same as AbstractByteBufAllocator, but copied here since it's not visible + private static final int DEFAULT_INITIAL_CAPACITY = 256; + private static final int DEFAULT_MAX_CAPACITY = Integer.MAX_VALUE; + + private final ByteBufAllocator pooledAllocator; + private final ByteBufAllocator unpooledAllocator; + private final PoolingPolicy poolingPolicy; + private final OutOfMemoryPolicy outOfMemoryPolicy; + private Consumer outOfMemoryListener; + + ByteBufAllocatorImpl(ByteBufAllocator pooledAllocator, ByteBufAllocator unpooledAllocator, + PoolingPolicy poolingPolicy, int poolingConcurrency, OutOfMemoryPolicy outOfMemoryPolicy, + Consumer outOfMemoryListener, + LeakDetectionPolicy leakDetectionPolicy) { + super(poolingPolicy == PoolingPolicy.PooledDirect /* preferDirect */); + + this.poolingPolicy = poolingPolicy; + this.outOfMemoryPolicy = outOfMemoryPolicy; + if (outOfMemoryListener == null) { + this.outOfMemoryListener = (v) -> { + log.error("Unable to allocate memory", v); + }; + } else { + this.outOfMemoryListener = outOfMemoryListener; + } + + if (poolingPolicy == PoolingPolicy.PooledDirect) { + if (pooledAllocator == null) { + if (poolingConcurrency == PooledByteBufAllocator.defaultNumDirectArena()) { + // If all the parameters are the same as in the default Netty pool, + // just reuse the static instance as the underlying allocator. + this.pooledAllocator = PooledByteBufAllocator.DEFAULT; + } else { + this.pooledAllocator = new PooledByteBufAllocator( + true /* preferDirect */, + poolingConcurrency /* nHeapArena */, + poolingConcurrency /* nDirectArena */, + PooledByteBufAllocator.defaultPageSize(), + PooledByteBufAllocator.defaultMaxOrder(), + PooledByteBufAllocator.defaultSmallCacheSize(), + PooledByteBufAllocator.defaultNormalCacheSize(), + PooledByteBufAllocator.defaultUseCacheForAllThreads()); + } + } else { + this.pooledAllocator = pooledAllocator; + } + } else { + this.pooledAllocator = null; + } + + this.unpooledAllocator = (unpooledAllocator != null) ? unpooledAllocator : UnpooledByteBufAllocator.DEFAULT; + + // The setting is static in Netty, so it will actually affect all + // allocators + switch (leakDetectionPolicy) { + case Disabled: + if (log.isDebugEnabled()) { + log.debug("Disable Netty allocator leak detector"); + } + ResourceLeakDetector.setLevel(Level.DISABLED); + break; + + case Simple: + log.info("Setting Netty allocator leak detector to Simple"); + ResourceLeakDetector.setLevel(Level.SIMPLE); + break; + + case Advanced: + log.info("Setting Netty allocator leak detector to Advanced"); + ResourceLeakDetector.setLevel(Level.ADVANCED); + break; + + case Paranoid: + log.info("Setting Netty allocator leak detector to Paranoid"); + ResourceLeakDetector.setLevel(Level.PARANOID); + break; + } + } + + @Override + public ByteBuf buffer() { + return buffer(DEFAULT_INITIAL_CAPACITY); + } + + @Override + public ByteBuf buffer(int initialCapacity) { + return buffer(initialCapacity, DEFAULT_MAX_CAPACITY); + } + + @Override + public ByteBuf buffer(int initialCapacity, int maxCapacity) { + if (poolingPolicy == PoolingPolicy.PooledDirect) { + return newDirectBuffer(initialCapacity, maxCapacity, true /* can fallback to heap if needed */); + } else { + return newHeapBuffer(initialCapacity, maxCapacity); + } + } + + @Override + protected ByteBuf newHeapBuffer(int initialCapacity, int maxCapacity) { + try { + // There are few cases in which we ask explicitly for a pooled + // heap buffer. + ByteBufAllocator alloc = (poolingPolicy == PoolingPolicy.PooledDirect) ? pooledAllocator + : unpooledAllocator; + return alloc.heapBuffer(initialCapacity, maxCapacity); + } catch (OutOfMemoryError e) { + outOfMemoryListener.accept(e); + throw e; + } + } + + @Override + protected ByteBuf newDirectBuffer(int initialCapacity, int maxCapacity) { + // If caller asked specifically for a direct buffer, we cannot fallback to heap + return newDirectBuffer(initialCapacity, maxCapacity, false); + } + + private ByteBuf newDirectBuffer(int initialCapacity, int maxCapacity, boolean canFallbackToHeap) { + if (poolingPolicy == PoolingPolicy.PooledDirect) { + try { + return pooledAllocator.directBuffer(initialCapacity, maxCapacity); + } catch (OutOfMemoryError e) { + if (canFallbackToHeap && outOfMemoryPolicy == OutOfMemoryPolicy.FallbackToHeap) { + try { + return unpooledAllocator.heapBuffer(initialCapacity, maxCapacity); + } catch (OutOfMemoryError e2) { + outOfMemoryListener.accept(e2); + throw e2; + } + } else { + // ThrowException + outOfMemoryListener.accept(e); + throw e; + } + } + } else { + // Unpooled heap buffer. Force heap buffers because unpooled direct + // buffers have very high overhead of allocation/reclaiming + try { + return unpooledAllocator.directBuffer(initialCapacity, maxCapacity); + } catch (OutOfMemoryError e) { + outOfMemoryListener.accept(e); + throw e; + } + } + } + + @Override + public boolean isDirectBufferPooled() { + return pooledAllocator != null && pooledAllocator.isDirectBufferPooled(); + } + + @Override + public void setOomHandler(Consumer handler) { + this.outOfMemoryListener = handler; + } +} diff --git a/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/impl/package-info.java b/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/impl/package-info.java new file mode 100644 index 00000000000..74564caeb09 --- /dev/null +++ b/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/impl/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Implements the utilities for allocator used across the project. + */ +package org.apache.bookkeeper.common.allocator.impl; \ No newline at end of file diff --git a/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/package-info.java b/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/package-info.java new file mode 100644 index 00000000000..d7529ef7b94 --- /dev/null +++ b/bookkeeper-common-allocator/src/main/java/org/apache/bookkeeper/common/allocator/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * defines the utilities for allocator used across the project. + */ +package org.apache.bookkeeper.common.allocator; \ No newline at end of file diff --git a/bookkeeper-common-allocator/src/test/java/org/apache/bookkeeper/common/allocator/impl/ByteBufAllocatorBuilderTest.java b/bookkeeper-common-allocator/src/test/java/org/apache/bookkeeper/common/allocator/impl/ByteBufAllocatorBuilderTest.java new file mode 100644 index 00000000000..6f2538d6c81 --- /dev/null +++ b/bookkeeper-common-allocator/src/test/java/org/apache/bookkeeper/common/allocator/impl/ByteBufAllocatorBuilderTest.java @@ -0,0 +1,269 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.common.allocator.impl; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.PooledByteBufAllocator; +import io.netty.buffer.UnpooledByteBufAllocator; +import io.netty.util.ReferenceCountUtil; +import java.lang.reflect.Constructor; +import java.util.concurrent.atomic.AtomicReference; +import org.apache.bookkeeper.common.allocator.ByteBufAllocatorBuilder; +import org.apache.bookkeeper.common.allocator.OutOfMemoryPolicy; +import org.apache.bookkeeper.common.allocator.PoolingPolicy; +import org.junit.Test; + +/** + * Tests for {@link ByteBufAllocatorBuilderImpl}. + */ +public class ByteBufAllocatorBuilderTest { + + private static final OutOfMemoryError outOfDirectMemException; + + static { + try { + Class clazz = (Class) ByteBufAllocatorBuilderTest.class.getClassLoader() + .loadClass("io.netty.util.internal.OutOfDirectMemoryError"); + @SuppressWarnings("unchecked") + Constructor constructor = (Constructor) clazz + .getDeclaredConstructor(String.class); + + constructor.setAccessible(true); + outOfDirectMemException = constructor.newInstance("no mem"); + } catch (Exception e) { + throw new RuntimeException(e); + } + + } + + @Test + public void testOomWithException() { + ByteBufAllocator baseAlloc = mock(ByteBufAllocator.class); + when(baseAlloc.directBuffer(anyInt(), anyInt())).thenThrow(outOfDirectMemException); + + AtomicReference receivedException = new AtomicReference<>(); + + ByteBufAllocator alloc = ByteBufAllocatorBuilder.create() + .pooledAllocator(baseAlloc) + .outOfMemoryPolicy(OutOfMemoryPolicy.ThrowException) + .outOfMemoryListener((e) -> { + receivedException.set(e); + }) + .build(); + + try { + alloc.buffer(); + fail("Should have thrown exception"); + } catch (OutOfMemoryError e) { + // Expected + assertEquals(outOfDirectMemException, e); + } + + // Ensure the notification was triggered even when exception is thrown + assertEquals(outOfDirectMemException, receivedException.get()); + } + + @Test + public void testOomWithFallback() { + ByteBufAllocator baseAlloc = mock(ByteBufAllocator.class); + when(baseAlloc.directBuffer(anyInt(), anyInt())).thenThrow(outOfDirectMemException); + + AtomicReference receivedException = new AtomicReference<>(); + + ByteBufAllocator alloc = ByteBufAllocatorBuilder.create() + .pooledAllocator(baseAlloc) + .unpooledAllocator(UnpooledByteBufAllocator.DEFAULT) + .outOfMemoryPolicy(OutOfMemoryPolicy.FallbackToHeap) + .outOfMemoryListener((e) -> { + receivedException.set(e); + }) + .build(); + + // Should not throw exception + ByteBuf buf = alloc.buffer(); + assertEquals(UnpooledByteBufAllocator.DEFAULT, buf.alloc()); + + // No notification should have been triggered + assertEquals(null, receivedException.get()); + } + + @Test + public void testOomWithFallbackAndNoMoreHeap() { + ByteBufAllocator baseAlloc = mock(ByteBufAllocator.class); + when(baseAlloc.directBuffer(anyInt(), anyInt())).thenThrow(outOfDirectMemException); + + ByteBufAllocator heapAlloc = mock(ByteBufAllocator.class); + OutOfMemoryError noHeapError = new OutOfMemoryError("no more heap"); + when(heapAlloc.heapBuffer(anyInt(), anyInt())).thenThrow(noHeapError); + + AtomicReference receivedException = new AtomicReference<>(); + + ByteBufAllocator alloc = ByteBufAllocatorBuilder.create() + .pooledAllocator(baseAlloc) + .unpooledAllocator(heapAlloc) + .outOfMemoryPolicy(OutOfMemoryPolicy.FallbackToHeap) + .outOfMemoryListener((e) -> { + receivedException.set(e); + }) + .build(); + + try { + alloc.buffer(); + fail("Should have thrown exception"); + } catch (OutOfMemoryError e) { + // Expected + assertEquals(noHeapError, e); + } + + // Ensure the notification was triggered even when exception is thrown + assertEquals(noHeapError, receivedException.get()); + } + + @Test + public void testOomUnpooledDirect() { + ByteBufAllocator heapAlloc = mock(ByteBufAllocator.class); + OutOfMemoryError noMemError = new OutOfMemoryError("no more direct mem"); + when(heapAlloc.directBuffer(anyInt(), anyInt())).thenThrow(noMemError); + + AtomicReference receivedException = new AtomicReference<>(); + + ByteBufAllocator alloc = ByteBufAllocatorBuilder.create() + .poolingPolicy(PoolingPolicy.UnpooledHeap) + .unpooledAllocator(heapAlloc) + .outOfMemoryPolicy(OutOfMemoryPolicy.FallbackToHeap) + .outOfMemoryListener((e) -> { + receivedException.set(e); + }) + .build(); + + try { + alloc.directBuffer(); + fail("Should have thrown exception"); + } catch (OutOfMemoryError e) { + // Expected + assertEquals(noMemError, e); + } + + // Ensure the notification was triggered even when exception is thrown + assertEquals(noMemError, receivedException.get()); + } + + @Test + public void testOomUnpooledWithHeap() { + ByteBufAllocator heapAlloc = mock(ByteBufAllocator.class); + OutOfMemoryError noHeapError = new OutOfMemoryError("no more heap"); + when(heapAlloc.heapBuffer(anyInt(), anyInt())).thenThrow(noHeapError); + + AtomicReference receivedException = new AtomicReference<>(); + + ByteBufAllocator alloc = ByteBufAllocatorBuilder.create() + .poolingPolicy(PoolingPolicy.UnpooledHeap) + .unpooledAllocator(heapAlloc) + .outOfMemoryPolicy(OutOfMemoryPolicy.FallbackToHeap) + .outOfMemoryListener((e) -> { + receivedException.set(e); + }) + .build(); + + try { + alloc.heapBuffer(); + fail("Should have thrown exception"); + } catch (OutOfMemoryError e) { + // Expected + assertEquals(noHeapError, e); + } + + // Ensure the notification was triggered even when exception is thrown + assertEquals(noHeapError, receivedException.get()); + } + + @Test + public void testUnpooled() { + ByteBufAllocator alloc = ByteBufAllocatorBuilder.create() + .poolingPolicy(PoolingPolicy.UnpooledHeap) + .build(); + + ByteBuf buf = alloc.buffer(); + assertEquals(UnpooledByteBufAllocator.DEFAULT, buf.alloc()); + assertTrue(buf.hasArray()); + + ByteBuf buf2 = alloc.directBuffer(); + assertEquals(UnpooledByteBufAllocator.DEFAULT, buf2.alloc()); + assertFalse(buf2.hasArray()); + } + + @Test + public void testPooled() { + PooledByteBufAllocator pooledAlloc = new PooledByteBufAllocator(true); + + ByteBufAllocator alloc = ByteBufAllocatorBuilder.create() + .poolingPolicy(PoolingPolicy.PooledDirect) + .pooledAllocator(pooledAlloc) + .build(); + + assertTrue(alloc.isDirectBufferPooled()); + + ByteBuf buf1 = alloc.buffer(); + assertEquals(pooledAlloc, buf1.alloc()); + assertFalse(buf1.hasArray()); + ReferenceCountUtil.release(buf1); + + ByteBuf buf2 = alloc.directBuffer(); + assertEquals(pooledAlloc, buf2.alloc()); + assertFalse(buf2.hasArray()); + ReferenceCountUtil.release(buf2); + + ByteBuf buf3 = alloc.heapBuffer(); + assertEquals(pooledAlloc, buf3.alloc()); + assertTrue(buf3.hasArray()); + ReferenceCountUtil.release(buf3); + } + + @Test + public void testPooledWithDefaultAllocator() { + ByteBufAllocator alloc = ByteBufAllocatorBuilder.create() + .poolingPolicy(PoolingPolicy.PooledDirect) + .poolingConcurrency(3) + .build(); + + assertTrue(alloc.isDirectBufferPooled()); + + ByteBuf buf1 = alloc.buffer(); + assertEquals(PooledByteBufAllocator.class, buf1.alloc().getClass()); + assertEquals(3, ((PooledByteBufAllocator) buf1.alloc()).metric().numDirectArenas()); + assertFalse(buf1.hasArray()); + ReferenceCountUtil.release(buf1); + + ByteBuf buf2 = alloc.directBuffer(); + assertFalse(buf2.hasArray()); + ReferenceCountUtil.release(buf2); + + ByteBuf buf3 = alloc.heapBuffer(); + assertTrue(buf3.hasArray()); + ReferenceCountUtil.release(buf3); + } +} diff --git a/bookkeeper-common/pom.xml b/bookkeeper-common/pom.xml index 4b8b7e1fa67..aa733d20ff9 100644 --- a/bookkeeper-common/pom.xml +++ b/bookkeeper-common/pom.xml @@ -20,7 +20,7 @@ org.apache.bookkeeper bookkeeper - 4.9.0-SNAPSHOT + 4.18.0-SNAPSHOT bookkeeper-common Apache BookKeeper :: Common @@ -30,6 +30,11 @@ bookkeeper-stats-api ${project.parent.version} + + org.apache.bookkeeper + cpu-affinity + ${project.parent.version} + com.google.guava guava @@ -50,6 +55,20 @@ com.fasterxml.jackson.core jackson-annotations + + org.jctools + jctools-core + + + io.netty.incubator + netty-incubator-transport-native-io_uring + linux-x86_64 + + + io.netty.incubator + netty-incubator-transport-native-io_uring + linux-aarch_64 + com.google.code.findbugs jsr305 @@ -60,11 +79,27 @@ error_prone_annotations provided + + + io.reactivex.rxjava3 + rxjava + + + org.apache.bookkeeper + testtools + ${project.parent.version} + test + org.apache.commons commons-lang3 test + + org.awaitility + awaitility + test + @@ -79,7 +114,6 @@ org.apache.maven.plugins maven-jar-plugin - ${maven-jar-plugin.version} @@ -92,6 +126,32 @@ org.apache.maven.plugins maven-surefire-plugin + + org.apache.maven.plugins + maven-javadoc-plugin + + ${src.dir} + + none + + + + Bookkeeper Client + org.apache.bookkeeper.common.annotation* + + + + + + attach-javadocs + + jar + + + + diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/annotation/InterfaceAudience.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/annotation/InterfaceAudience.java index 40fcef66865..571dde3b96a 100644 --- a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/annotation/InterfaceAudience.java +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/annotation/InterfaceAudience.java @@ -34,7 +34,7 @@ public class InterfaceAudience { */ @Documented @Retention(RetentionPolicy.RUNTIME) - public @interface Public {}; + public @interface Public {} /** * Intended for use only within the project(s) specified in the annotation. @@ -42,14 +42,14 @@ public class InterfaceAudience { */ @Documented @Retention(RetentionPolicy.RUNTIME) - public @interface LimitedPrivate {}; + public @interface LimitedPrivate {} /** * Intended for use only within bookkeeper itself. */ @Documented @Retention(RetentionPolicy.RUNTIME) - public @interface Private {}; + public @interface Private {} private InterfaceAudience() {} // Audience can't exist on its own diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/annotation/InterfaceStability.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/annotation/InterfaceStability.java index 174afd530a4..e57f677c0f9 100644 --- a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/annotation/InterfaceStability.java +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/annotation/InterfaceStability.java @@ -32,20 +32,20 @@ public class InterfaceStability { * can break compatibility only at major release (ie. at m.0). */ @Documented - public @interface Stable {}; + public @interface Stable {} /** * Evolving, but can break compatibility at minor release (i.e. m.x) */ @Documented - public @interface Evolving {}; + public @interface Evolving {} /** * No guarantee is provided as to reliability or stability across any * level of release granularity. */ @Documented - public @interface Unstable {}; + public @interface Unstable {} private InterfaceStability() {} } diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/collections/BatchedArrayBlockingQueue.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/collections/BatchedArrayBlockingQueue.java new file mode 100644 index 00000000000..646391b49e3 --- /dev/null +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/collections/BatchedArrayBlockingQueue.java @@ -0,0 +1,409 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.common.collections; + +import java.util.AbstractQueue; +import java.util.Arrays; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.ReentrantLock; + +/** + * This implements a {@link BlockingQueue} backed by an array with fixed capacity. + * + *

This queue only allows 1 consumer thread to dequeue items and multiple producer threads. + */ +public class BatchedArrayBlockingQueue + extends AbstractQueue + implements BlockingQueue, BatchedBlockingQueue { + + private final ReentrantLock lock = new ReentrantLock(); + + private final Condition notEmpty = lock.newCondition(); + private final Condition notFull = lock.newCondition(); + + private final int capacity; + private final T[] data; + + private int size; + + private int consumerIdx; + private int producerIdx; + + @SuppressWarnings("unchecked") + public BatchedArrayBlockingQueue(int capacity) { + this.capacity = capacity; + this.data = (T[]) new Object[this.capacity]; + } + + private T dequeueOne() { + T item = data[consumerIdx]; + data[consumerIdx] = null; + if (++consumerIdx == capacity) { + consumerIdx = 0; + } + + if (size-- == capacity) { + notFull.signalAll(); + } + + return item; + } + + private void enqueueOne(T item) { + data[producerIdx] = item; + if (++producerIdx == capacity) { + producerIdx = 0; + } + + if (size++ == 0) { + notEmpty.signalAll(); + } + } + + @Override + public T poll() { + lock.lock(); + + try { + if (size == 0) { + return null; + } + + return dequeueOne(); + } finally { + lock.unlock(); + } + } + + @Override + public T peek() { + lock.lock(); + + try { + if (size == 0) { + return null; + } + + return data[consumerIdx]; + } finally { + lock.unlock(); + } + } + + @Override + public boolean offer(T e) { + lock.lock(); + + try { + if (size == capacity) { + return false; + } + + enqueueOne(e); + + return true; + } finally { + lock.unlock(); + } + } + + @Override + public void put(T e) throws InterruptedException { + lock.lockInterruptibly(); + + try { + while (size == capacity) { + notFull.await(); + } + + enqueueOne(e); + } finally { + lock.unlock(); + } + } + + public int putAll(List c) throws InterruptedException { + lock.lockInterruptibly(); + + try { + while (size == capacity) { + notFull.await(); + } + + int availableCapacity = capacity - size; + + int toInsert = Math.min(availableCapacity, c.size()); + + int producerIdx = this.producerIdx; + for (int i = 0; i < toInsert; i++) { + data[producerIdx] = c.get(i); + if (++producerIdx == capacity) { + producerIdx = 0; + } + } + + this.producerIdx = producerIdx; + + if (size == 0) { + notEmpty.signalAll(); + } + + size += toInsert; + + return toInsert; + } finally { + lock.unlock(); + } + } + + @Override + public void putAll(T[] a, int offset, int len) throws InterruptedException { + while (len > 0) { + int published = internalPutAll(a, offset, len); + offset += published; + len -= published; + } + } + + private int internalPutAll(T[] a, int offset, int len) throws InterruptedException { + lock.lockInterruptibly(); + + try { + while (size == capacity) { + notFull.await(); + } + + int availableCapacity = capacity - size; + int toInsert = Math.min(availableCapacity, len); + int producerIdx = this.producerIdx; + + // First span + int firstSpan = Math.min(toInsert, capacity - producerIdx); + System.arraycopy(a, offset, data, producerIdx, firstSpan); + producerIdx += firstSpan; + + int secondSpan = toInsert - firstSpan; + if (secondSpan > 0) { + System.arraycopy(a, offset + firstSpan, data, 0, secondSpan); + producerIdx = secondSpan; + } + + if (producerIdx == capacity) { + producerIdx = 0; + } + + this.producerIdx = producerIdx; + + if (size == 0) { + notEmpty.signalAll(); + } + + size += toInsert; + return toInsert; + } finally { + lock.unlock(); + } + } + + @Override + public boolean offer(T e, long timeout, TimeUnit unit) throws InterruptedException { + long remainingTimeNanos = unit.toNanos(timeout); + + lock.lockInterruptibly(); + try { + while (size == capacity) { + if (remainingTimeNanos <= 0L) { + return false; + } + + remainingTimeNanos = notFull.awaitNanos(remainingTimeNanos); + } + + enqueueOne(e); + return true; + } finally { + lock.unlock(); + } + } + + @Override + public T take() throws InterruptedException { + lock.lockInterruptibly(); + + try { + while (size == 0) { + notEmpty.await(); + } + + return dequeueOne(); + } finally { + lock.unlock(); + } + } + + @Override + public T poll(long timeout, TimeUnit unit) throws InterruptedException { + long remainingTimeNanos = unit.toNanos(timeout); + + lock.lockInterruptibly(); + try { + while (size == 0) { + if (remainingTimeNanos <= 0L) { + return null; + } + + remainingTimeNanos = notEmpty.awaitNanos(remainingTimeNanos); + } + + return dequeueOne(); + } finally { + lock.unlock(); + } + } + + @Override + public int remainingCapacity() { + return capacity - size; + } + + @Override + public int drainTo(Collection c) { + return drainTo(c, capacity); + } + + @Override + public int drainTo(Collection c, int maxElements) { + lock.lock(); + try { + int toDrain = Math.min(size, maxElements); + + int consumerIdx = this.consumerIdx; + for (int i = 0; i < toDrain; i++) { + T item = data[consumerIdx]; + data[consumerIdx] = null; + c.add(item); + + if (++consumerIdx == capacity) { + consumerIdx = 0; + } + } + + this.consumerIdx = consumerIdx; + if (size == capacity) { + notFull.signalAll(); + } + + size -= toDrain; + return toDrain; + } finally { + lock.unlock(); + } + } + + @Override + public int takeAll(T[] array) throws InterruptedException { + return internalTakeAll(array, true, 0, TimeUnit.SECONDS); + } + + @Override + public int pollAll(T[] array, long timeout, TimeUnit unit) throws InterruptedException { + return internalTakeAll(array, false, timeout, unit); + } + + private int internalTakeAll(T[] array, boolean waitForever, long timeout, TimeUnit unit) + throws InterruptedException { + lock.lockInterruptibly(); + try { + while (size == 0) { + if (waitForever) { + notEmpty.await(); + } else { + if (!notEmpty.await(timeout, unit)) { + return 0; + } + } + } + + int toDrain = Math.min(size, array.length); + + int consumerIdx = this.consumerIdx; + + // First span + int firstSpan = Math.min(toDrain, capacity - consumerIdx); + System.arraycopy(data, consumerIdx, array, 0, firstSpan); + Arrays.fill(data, consumerIdx, consumerIdx + firstSpan, null); + consumerIdx += firstSpan; + + int secondSpan = toDrain - firstSpan; + if (secondSpan > 0) { + System.arraycopy(data, 0, array, firstSpan, secondSpan); + Arrays.fill(data, 0, secondSpan, null); + consumerIdx = secondSpan; + } + + if (consumerIdx == capacity) { + consumerIdx = 0; + } + this.consumerIdx = consumerIdx; + if (size == capacity) { + notFull.signalAll(); + } + + size -= toDrain; + return toDrain; + } finally { + lock.unlock(); + } + } + + @Override + public void clear() { + lock.lock(); + try { + while (size > 0) { + dequeueOne(); + } + } finally { + lock.unlock(); + } + } + + @Override + public int size() { + lock.lock(); + + try { + return size; + } finally { + lock.unlock(); + } + } + + @Override + public Iterator iterator() { + throw new UnsupportedOperationException(); + } +} diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/collections/BatchedBlockingQueue.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/collections/BatchedBlockingQueue.java new file mode 100644 index 00000000000..5a0e0a72ea0 --- /dev/null +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/collections/BatchedBlockingQueue.java @@ -0,0 +1,55 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.common.collections; + +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.TimeUnit; + +public interface BatchedBlockingQueue extends BlockingQueue { + void putAll(T[] a, int offset, int len) throws InterruptedException; + + /** + * Drain the queue into an array. + * Wait if there are no items in the queue. + * + * @param array + * @return + * @throws InterruptedException + */ + int takeAll(T[] array) throws InterruptedException; + + /** + * Removes multiple items from the queue. + * + * The method returns when either: + * 1. At least one item is available + * 2. The timeout expires + * + * + * @param array + * @param timeout + * @param unit + * @return + * @throws InterruptedException + */ + int pollAll(T[] array, long timeout, TimeUnit unit) throws InterruptedException; + +} diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/collections/BlockingMpscQueue.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/collections/BlockingMpscQueue.java new file mode 100644 index 00000000000..56d9627e846 --- /dev/null +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/collections/BlockingMpscQueue.java @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.common.collections; + +import java.util.Collection; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.TimeUnit; +import org.jctools.queues.MpscArrayQueue; + +/** + * Blocking queue optimized for multiple producers and single consumer. + */ +public class BlockingMpscQueue extends MpscArrayQueue implements BlockingQueue, BatchedBlockingQueue { + + public BlockingMpscQueue(int size) { + super(size); + } + + @Override + public void put(T e) throws InterruptedException { + while (!this.relaxedOffer(e)) { + // Do busy-spin loop + if (Thread.interrupted()) { + throw new InterruptedException(); + } + } + } + + @Override + public boolean offer(T e, long timeout, TimeUnit unit) throws InterruptedException { + long absoluteEndTime = System.nanoTime() + unit.toNanos(timeout); + + while (!this.relaxedOffer(e)) { + // Do busy-spin loop + + if (System.nanoTime() > absoluteEndTime) { + return false; + } + + if (Thread.interrupted()) { + throw new InterruptedException(); + } + } + + return true; + } + + @Override + public T take() throws InterruptedException { + int idleCounter = 0; + while (true) { + T item = relaxedPoll(); + if (item == null) { + if (Thread.interrupted()) { + throw new InterruptedException(); + } + + idleCounter = WAIT_STRATEGY.idle(idleCounter); + continue; + } + + + return item; + } + } + + @Override + public T poll(long timeout, TimeUnit unit) throws InterruptedException { + long absoluteEndTime = System.nanoTime() + unit.toNanos(timeout); + + int idleCounter = 0; + while (true) { + T item = relaxedPoll(); + if (item == null) { + if (Thread.interrupted()) { + throw new InterruptedException(); + } + + if (System.nanoTime() > absoluteEndTime) { + return null; + } else { + idleCounter = WAIT_STRATEGY.idle(idleCounter); + continue; + } + } + + return item; + } + } + + @Override + public int remainingCapacity() { + return capacity() - size(); + } + + @Override + public int drainTo(Collection c) { + int initialSize = c.size(); + + final DrainStrategy ds = new DrainStrategy(); + drain(c::add, ds, ds); + return c.size() - initialSize; + } + + @Override + public int drainTo(Collection c, int maxElements) { + return drain(c::add, maxElements); + } + + @Override + public void putAll(T[] a, int offset, int len) throws InterruptedException { + for (int i = 0; i < len; i++) { + put(a[offset + i]); + } + } + + @Override + public int takeAll(T[] array) throws InterruptedException { + int items = 0; + + T t; + while (items < array.length && (t = poll()) != null) { + array[items++] = t; + } + + if (items == 0) { + array[items++] = take(); + } + + return items; + } + + @Override + public int pollAll(T[] array, long timeout, TimeUnit unit) throws InterruptedException { + int items = 0; + + T t; + while (items < array.length && (t = poll()) != null) { + array[items++] = t; + } + + if (items == 0 && (t = poll(timeout, unit)) != null) { + array[items++] = t; + } + + return items; + } + + /** + * Wait strategy combined with exit condition, for draining the queue. + */ + private static final class DrainStrategy implements WaitStrategy, ExitCondition { + + boolean reachedEnd = false; + + @Override + public boolean keepRunning() { + return !reachedEnd; + } + + @Override + public int idle(int idleCounter) { + reachedEnd = true; + return idleCounter; + } + + } + + /** + * Waiting strategy that starts with busy loop and gradually falls back to sleeping if no items are available. + */ + private static final WaitStrategy SPIN_STRATEGY = new WaitStrategy() { + + @Override + public int idle(int idleCounter) { + BusyWait.onSpinWait(); + return idleCounter + 1; + } + }; + + private static final WaitStrategy WAIT_STRATEGY = SPIN_STRATEGY; +} diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/collections/BusyWait.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/collections/BusyWait.java new file mode 100644 index 00000000000..25017e0162e --- /dev/null +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/collections/BusyWait.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.common.collections; + +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodType; +import lombok.experimental.UtilityClass; +import lombok.extern.slf4j.Slf4j; + +/** + * Utility class to use "Thread.onSpinWait()" when available. + */ +@UtilityClass +@Slf4j +public class BusyWait { + + /** + * If available (Java 9+), use intrinsic {@link Thread#onSpinWait} which will + * reduce CPU consumption during the wait, otherwise fallback to regular + * spinning. + */ + public static void onSpinWait() { + if (ON_SPIN_WAIT != null) { + try { + ON_SPIN_WAIT.invokeExact(); + } catch (Throwable t) { + // Ignore + } + } + } + + private static final MethodHandle ON_SPIN_WAIT; + + static { + MethodHandle handle = null; + try { + handle = MethodHandles.lookup().findStatic(Thread.class, "onSpinWait", MethodType.methodType(void.class)); + } catch (Throwable t) { + // Ignore + if (log.isDebugEnabled()) { + log.debug("Unable to use 'onSpinWait' from JVM", t); + } + } + + ON_SPIN_WAIT = handle; + } +} diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/collections/GrowableMpScArrayConsumerBlockingQueue.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/collections/GrowableMpScArrayConsumerBlockingQueue.java new file mode 100644 index 00000000000..1e614f1820b --- /dev/null +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/collections/GrowableMpScArrayConsumerBlockingQueue.java @@ -0,0 +1,331 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.common.collections; + +import java.util.AbstractQueue; +import java.util.Collection; +import java.util.Iterator; +import java.util.NoSuchElementException; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.locks.LockSupport; +import java.util.concurrent.locks.StampedLock; +import org.apache.bookkeeper.common.util.MathUtils; + + +/** + * This implements a {@link BlockingQueue} backed by an array with no fixed capacity. + * + *

When the capacity is reached, data will be moved to a bigger array. + * + *

This queue only allows 1 consumer thread to dequeue items and multiple producer threads. + */ +public class GrowableMpScArrayConsumerBlockingQueue extends AbstractQueue implements BlockingQueue { + + private final StampedLock headLock = new StampedLock(); + private final PaddedInt headIndex = new PaddedInt(); + private final PaddedInt tailIndex = new PaddedInt(); + private final StampedLock tailLock = new StampedLock(); + + private T[] data; + private final AtomicInteger size = new AtomicInteger(0); + + private volatile Thread waitingConsumer; + + public GrowableMpScArrayConsumerBlockingQueue() { + this(64); + } + + @SuppressWarnings("unchecked") + public GrowableMpScArrayConsumerBlockingQueue(int initialCapacity) { + int capacity = MathUtils.findNextPositivePowerOfTwo(initialCapacity); + data = (T[]) new Object[capacity]; + } + + @Override + public T remove() { + T item = poll(); + if (item == null) { + throw new NoSuchElementException(); + } + + return item; + } + + @Override + public T poll() { + if (size.get() > 0) { + // Since this is a single-consumer queue, we don't expect multiple threads calling poll(), though we need + // to protect against array expansions + long stamp = headLock.readLock(); + + try { + T item = data[headIndex.value]; + data[headIndex.value] = null; + headIndex.value = (headIndex.value + 1) & (data.length - 1); + size.decrementAndGet(); + return item; + } finally { + headLock.unlockRead(stamp); + } + } else { + return null; + } + } + + @Override + public T element() { + T item = peek(); + if (item == null) { + throw new NoSuchElementException(); + } + + return item; + } + + @Override + public T peek() { + if (size.get() > 0) { + long stamp = headLock.readLock(); + + try { + return data[headIndex.value]; + } finally { + headLock.unlockRead(stamp); + } + } else { + return null; + } + } + + @Override + public boolean offer(T e) { + // Queue is unbounded and it will never reject new items + put(e); + return true; + } + + @Override + public void put(T e) { + long stamp = tailLock.writeLock(); + + try { + int oldSize = size.get(); + if (oldSize == data.length) { + expandArray(); + } + + data[tailIndex.value] = e; + tailIndex.value = (tailIndex.value + 1) & (data.length - 1); + + if (size.getAndIncrement() == 0 && waitingConsumer != null) { + Thread waitingConsumer = this.waitingConsumer; + this.waitingConsumer = null; + LockSupport.unpark(waitingConsumer); + } + } finally { + tailLock.unlockWrite(stamp); + } + } + + @Override + public boolean add(T e) { + put(e); + return true; + } + + @Override + public boolean offer(T e, long timeout, TimeUnit unit) { + // Queue is unbounded and it will never reject new items + put(e); + return true; + } + + @Override + public T take() throws InterruptedException { + while (size() == 0) { + waitingConsumer = Thread.currentThread(); + + // Double check that size has not changed after we have registered ourselves for notification + if (size() == 0) { + LockSupport.park(); + if (Thread.interrupted()) { + throw new InterruptedException(); + } + } + } + + return poll(); + } + + @Override + public T poll(long timeout, TimeUnit unit) throws InterruptedException { + long deadline = System.currentTimeMillis() + unit.toMillis(timeout); + + while (size.get() == 0) { + waitingConsumer = Thread.currentThread(); + + // Double check that size has not changed after we have registered ourselves for notification + if (size.get() == 0) { + LockSupport.parkUntil(deadline); + if (Thread.interrupted()) { + throw new InterruptedException(); + } + + if (System.currentTimeMillis() >= deadline) { + return null; + } + } + } + + return poll(); + } + + @Override + public int remainingCapacity() { + return Integer.MAX_VALUE; + } + + @Override + public int drainTo(Collection c) { + return drainTo(c, Integer.MAX_VALUE); + } + + @Override + public int drainTo(Collection c, int maxElements) { + long stamp = headLock.readLock(); + + try { + int toDrain = Math.min(size.get(), maxElements); + + for (int i = 0; i < toDrain; i++) { + T item = data[headIndex.value]; + data[headIndex.value] = null; + c.add(item); + + headIndex.value = (headIndex.value + 1) & (data.length - 1); + } + + this.size.addAndGet(-toDrain); + return toDrain; + } finally { + headLock.unlockRead(stamp); + } + } + + @Override + public void clear() { + long stamp = headLock.readLock(); + + try { + int size = this.size.get(); + + for (int i = 0; i < size; i++) { + data[headIndex.value] = null; + headIndex.value = (headIndex.value + 1) & (data.length - 1); + } + + this.size.addAndGet(-size); + } finally { + headLock.unlockRead(stamp); + } + } + + @Override + public int size() { + return size.get(); + } + + @Override + public Iterator iterator() { + throw new UnsupportedOperationException(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + + long tailStamp = tailLock.writeLock(); + long headStamp = headLock.writeLock(); + + try { + int headIndex = this.headIndex.value; + int size = this.size.get(); + + sb.append('['); + + for (int i = 0; i < size; i++) { + T item = data[headIndex]; + if (i > 0) { + sb.append(", "); + } + + sb.append(item); + + headIndex = (headIndex + 1) & (data.length - 1); + } + + sb.append(']'); + } finally { + headLock.unlockWrite(headStamp); + tailLock.unlockWrite(tailStamp); + } + return sb.toString(); + } + + @SuppressWarnings("unchecked") + private void expandArray() { + // We already hold the tailLock + long headLockStamp = headLock.writeLock(); + + try { + int size = this.size.get(); + int newCapacity = data.length * 2; + T[] newData = (T[]) new Object[newCapacity]; + + + int oldHeadIndex = headIndex.value; + int lenHeadToEnd = Math.min(size, data.length - oldHeadIndex); + + System.arraycopy(data, oldHeadIndex, newData, 0, lenHeadToEnd); + System.arraycopy(data, 0, newData, lenHeadToEnd, size - lenHeadToEnd); + + data = newData; + headIndex.value = 0; + tailIndex.value = size; + } finally { + headLock.unlockWrite(headLockStamp); + } + } + + private static final class PaddedInt { + int value = 0; + + // Padding to avoid false sharing + public volatile int pi1 = 1; + public volatile long p1 = 1L, p2 = 2L, p3 = 3L, p4 = 4L, p5 = 5L, p6 = 6L; + + public long exposeToAvoidOptimization() { + return pi1 + p1 + p2 + p3 + p4 + p5 + p6; + } + } +} diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/collections/RecyclableArrayList.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/collections/RecyclableArrayList.java index 4915d7749ac..a4932c0412c 100644 --- a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/collections/RecyclableArrayList.java +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/collections/RecyclableArrayList.java @@ -64,4 +64,15 @@ public void recycle() { handle.recycle(this); } } + + @Override + public boolean equals(Object obj) { + return super.equals(obj); + } + + @Override + public int hashCode() { + return super.hashCode(); + } + } diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/AbstractLifecycleComponent.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/AbstractLifecycleComponent.java index 015d54d6d35..534c952f337 100644 --- a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/AbstractLifecycleComponent.java +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/AbstractLifecycleComponent.java @@ -25,6 +25,8 @@ import lombok.extern.slf4j.Slf4j; import org.apache.bookkeeper.common.conf.ComponentConfiguration; import org.apache.bookkeeper.stats.StatsLogger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * A mix of {@link AbstractComponent} and {@link LifecycleComponent}. @@ -33,6 +35,8 @@ public abstract class AbstractLifecycleComponent extends AbstractComponent implements LifecycleComponent { + private static final Logger LOG = LoggerFactory.getLogger(AbstractLifecycleComponent.class); + protected final Lifecycle lifecycle = new Lifecycle(); private final Set listeners = new CopyOnWriteArraySet<>(); protected final StatsLogger statsLogger; @@ -75,7 +79,17 @@ public void start() { return; } listeners.forEach(LifecycleListener::beforeStart); - doStart(); + try { + doStart(); + } catch (Throwable exc) { + LOG.error("Failed to start Component: {}", getName(), exc); + if (uncaughtExceptionHandler != null) { + LOG.error("Calling uncaughtExceptionHandler"); + uncaughtExceptionHandler.uncaughtException(Thread.currentThread(), exc); + } else { + throw exc; + } + } lifecycle.moveToStarted(); listeners.forEach(LifecycleListener::afterStart); } @@ -108,7 +122,7 @@ public void close() { try { doClose(); } catch (IOException e) { - log.warn("failed to close {}", getClass().getName(), e); + log.warn("failed to close {}", componentName, e); } listeners.forEach(LifecycleListener::afterClose); } diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/AutoCloseableLifecycleComponent.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/AutoCloseableLifecycleComponent.java new file mode 100644 index 00000000000..ded3707a058 --- /dev/null +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/AutoCloseableLifecycleComponent.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.common.component; + +import java.lang.Thread.UncaughtExceptionHandler; +import java.util.Set; +import java.util.concurrent.CopyOnWriteArraySet; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Allows for AutoClosable resources to be added to the component + * lifecycle without having to implement ServerLifecycleComponent directly. + */ +public class AutoCloseableLifecycleComponent implements LifecycleComponent { + private static final Logger LOG = LoggerFactory.getLogger(AutoCloseableLifecycleComponent.class); + + protected final Lifecycle lifecycle = new Lifecycle(); + private final Set listeners = new CopyOnWriteArraySet<>(); + protected volatile UncaughtExceptionHandler uncaughtExceptionHandler; + private final String componentName; + private final AutoCloseable closeable; + + public AutoCloseableLifecycleComponent(String componentName, AutoCloseable closeable) { + this.componentName = componentName; + this.closeable = closeable; + } + + @Override + public String getName() { + return this.componentName; + } + + @Override + public void setExceptionHandler(UncaughtExceptionHandler handler) { + this.uncaughtExceptionHandler = handler; + } + + @Override + public Lifecycle.State lifecycleState() { + return this.lifecycle.state(); + } + + @Override + public void addLifecycleListener(LifecycleListener listener) { + listeners.add(listener); + } + + @Override + public void removeLifecycleListener(LifecycleListener listener) { + listeners.remove(listener); + } + + @Override + public void start() { + if (!lifecycle.canMoveToStarted()) { + return; + } + listeners.forEach(LifecycleListener::beforeStart); + lifecycle.moveToStarted(); + listeners.forEach(LifecycleListener::afterStart); + } + + @Override + public void stop() { + if (!lifecycle.canMoveToStopped()) { + return; + } + listeners.forEach(LifecycleListener::beforeStop); + lifecycle.moveToStopped(); + listeners.forEach(LifecycleListener::afterStop); + } + + @Override + public void close() { + if (lifecycle.started()) { + stop(); + } + if (!lifecycle.canMoveToClosed()) { + return; + } + listeners.forEach(LifecycleListener::beforeClose); + lifecycle.moveToClosed(); + try { + closeable.close(); + } catch (Exception e) { + LOG.warn("failed to close {}", componentName, e); + } + listeners.forEach(LifecycleListener::afterClose); + } +} diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/ComponentInfoPublisher.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/ComponentInfoPublisher.java new file mode 100644 index 00000000000..24b0c99760f --- /dev/null +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/ComponentInfoPublisher.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.common.component; + +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.ConcurrentHashMap; +import lombok.extern.slf4j.Slf4j; + +/** + * Allows a component to publish information about + * the services it implements, the endpoints it exposes + * and other useful information for management tools and client. + */ +@Slf4j +public class ComponentInfoPublisher { + + private final Map properties = new ConcurrentHashMap<>(); + private final Map endpoints = new ConcurrentHashMap<>(); + + /** + * Endpoint information. + */ + public static final class EndpointInfo { + + private final String id; + private final int port; + private final String host; + private final String protocol; + private final List auth; + private final List extensions; + + public EndpointInfo(String id, int port, String host, String protocol, + List auth, List extensions) { + this.id = id; + this.port = port; + this.host = host; + this.protocol = protocol; + this.auth = auth == null ? Collections.emptyList() : Collections.unmodifiableList(auth); + this.extensions = extensions == null ? Collections.emptyList() : Collections.unmodifiableList(extensions); + } + + public String getId() { + return id; + } + + public int getPort() { + return port; + } + + public String getHost() { + return host; + } + + public String getProtocol() { + return protocol; + } + + public List getAuth() { + return auth; + } + + public List getExtensions() { + return extensions; + } + + @Override + public String toString() { + return "EndpointInfo{" + "id=" + id + ", port=" + port + ", host=" + host + ", protocol=" + protocol + ", " + + "auth=" + auth + ", extensions=" + extensions + '}'; + } + + } + + private volatile boolean startupFinished; + + /** + * Publish an information about the system, like an endpoint address. + * + * @param key the key + * @param value the value, null values are not allowed. + */ + public void publishProperty(String key, String value) { + if (log.isDebugEnabled()) { + log.debug("publish {}={}", key, value); + } + if (startupFinished) { + throw new IllegalStateException("Server already started, cannot publish " + key); + } + Objects.requireNonNull(key); + Objects.requireNonNull(value, "Value for " + key + " cannot be null"); + + properties.put(key, value); + } + + public void publishEndpoint(EndpointInfo endpoint) { + if (log.isDebugEnabled()) { + log.debug("publishEndpoint {} on {}", endpoint, this); + } + EndpointInfo exists = endpoints.put(endpoint.id, endpoint); + if (exists != null) { + throw new IllegalStateException("An endpoint with id " + endpoint.id + + " has already been published: " + exists); + } + } + + public Map getProperties() { + if (!startupFinished) { + throw new IllegalStateException("Startup not yet finished"); + } + return Collections.unmodifiableMap(properties); + } + + public Map getEndpoints() { + if (!startupFinished) { + throw new IllegalStateException("Startup not yet finished"); + } + return Collections.unmodifiableMap(endpoints); + } + + /** + * Called by the framework to signal that preparation of startup is done, + * so we have gathered all of the available information. + */ + public void startupFinished() { + startupFinished = true; + } + +} diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/ComponentStarter.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/ComponentStarter.java index e95274722e9..3b44d67edb4 100644 --- a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/ComponentStarter.java +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/ComponentStarter.java @@ -46,7 +46,7 @@ public void run() { component.close(); log.info("Closed component {} in shutdown hook successfully. Exiting.", component.getName()); FutureUtils.complete(future, null); - } catch (Exception e) { + } catch (Throwable e) { log.error("Failed to close component {} in shutdown hook gracefully, Exiting anyway", component.getName(), e); future.completeExceptionally(e); @@ -72,10 +72,14 @@ public static CompletableFuture startComponent(LifecycleComponent componen // register a component exception handler component.setExceptionHandler((t, e) -> { + log.error("Triggered exceptionHandler of Component: {} because of Exception in Thread: {}", + component.getName(), t, e); // start the shutdown hook when an uncaught exception happen in the lifecycle component. shutdownHookThread.start(); }); + component.publishInfo(new ComponentInfoPublisher()); + log.info("Starting component {}.", component.getName()); component.start(); log.info("Started component {}.", component.getName()); diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/LifecycleComponent.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/LifecycleComponent.java index d3820663ee4..660dfb0c78b 100644 --- a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/LifecycleComponent.java +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/LifecycleComponent.java @@ -33,10 +33,14 @@ public interface LifecycleComponent extends AutoCloseable { void removeLifecycleListener(LifecycleListener listener); + default void publishInfo(ComponentInfoPublisher componentInfoPublisher) { + } + void start(); void stop(); + @Override void close(); /** diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/LifecycleComponentStack.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/LifecycleComponentStack.java index d60691009f1..5b94e049e7d 100644 --- a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/LifecycleComponentStack.java +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/LifecycleComponentStack.java @@ -26,10 +26,12 @@ import com.google.common.collect.Lists; import java.lang.Thread.UncaughtExceptionHandler; import java.util.List; +import lombok.extern.slf4j.Slf4j; /** * A stack of {@link LifecycleComponent}s. */ +@Slf4j public class LifecycleComponentStack implements LifecycleComponent { public static Builder newBuilder() { @@ -42,12 +44,19 @@ public static Builder newBuilder() { public static class Builder { private String name; + private ComponentInfoPublisher componentInfoPublisher; private final List components; private Builder() { components = Lists.newArrayList(); } + public Builder withComponentInfoPublisher(ComponentInfoPublisher componentInfoPublisher) { + checkNotNull(componentInfoPublisher, "ComponentInfoPublisher is null"); + this.componentInfoPublisher = componentInfoPublisher; + return this; + } + public Builder addComponent(LifecycleComponent component) { checkNotNull(component, "Lifecycle component is null"); components.add(component); @@ -64,6 +73,7 @@ public LifecycleComponentStack build() { checkArgument(!components.isEmpty(), "Lifecycle component stack is empty : " + components); return new LifecycleComponentStack( name, + componentInfoPublisher != null ? componentInfoPublisher : new ComponentInfoPublisher(), ImmutableList.copyOf(components)); } @@ -71,10 +81,13 @@ public LifecycleComponentStack build() { private final String name; private final ImmutableList components; + private final ComponentInfoPublisher componentInfoPublisher; private LifecycleComponentStack(String name, + ComponentInfoPublisher componentInfoPublisher, ImmutableList components) { this.name = name; + this.componentInfoPublisher = componentInfoPublisher; this.components = components; } @@ -108,8 +121,26 @@ public void removeLifecycleListener(LifecycleListener listener) { components.forEach(component -> component.removeLifecycleListener(listener)); } + @Override + public void publishInfo(ComponentInfoPublisher componentInfoPublisher) { + components.forEach(component -> { + if (log.isDebugEnabled()) { + log.debug("calling publishInfo on {} ", component); + } + component.publishInfo(componentInfoPublisher); + }); + } + @Override public void start() { + components.forEach(component -> { + if (log.isDebugEnabled()) { + log.debug("calling publishInfo on {} ", component); + } + component.publishInfo(componentInfoPublisher); + }); + componentInfoPublisher.startupFinished(); + components.forEach(component -> component.start()); } diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/RxSchedulerLifecycleComponent.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/RxSchedulerLifecycleComponent.java new file mode 100644 index 00000000000..ba1759704ec --- /dev/null +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/component/RxSchedulerLifecycleComponent.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.common.component; + +import io.reactivex.rxjava3.core.Scheduler; +import java.util.concurrent.ExecutorService; +import org.apache.bookkeeper.common.conf.ComponentConfiguration; +import org.apache.bookkeeper.stats.StatsLogger; + +/** + * The scheduler for rxjava based jobs, such as data integrity checking. + */ +public class RxSchedulerLifecycleComponent extends AbstractLifecycleComponent { + private final Scheduler scheduler; + private final ExecutorService rxExecutor; + + public RxSchedulerLifecycleComponent(String componentName, + ComponentConfiguration conf, + StatsLogger stats, + Scheduler scheduler, + ExecutorService rxExecutor) { + super(componentName, conf, stats); + this.scheduler = scheduler; + this.rxExecutor = rxExecutor; + } + + @Override + protected void doStart() { + scheduler.start(); + } + + @Override + protected void doStop() { + scheduler.shutdown(); + rxExecutor.shutdown(); + } + + @Override + public void doClose() { + scheduler.shutdown(); + rxExecutor.shutdown(); + } +} diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/concurrent/FutureUtils.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/concurrent/FutureUtils.java index ab2d1ca9ecb..daba9c41ce5 100644 --- a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/concurrent/FutureUtils.java +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/concurrent/FutureUtils.java @@ -69,7 +69,19 @@ public static T result(CompletableFuture future, long timeout, TimeUnit t public static T result( CompletableFuture future, Function exceptionHandler) throws ExceptionT { try { - return future.get(); + try { + /* + * CompletableFuture.get() in JDK8 spins before blocking and wastes CPU time. + * CompletableFuture.get(long, TimeUnit) blocks immediately (if the result is + * not yet available). While the implementation of get() has changed in JDK9 + * (not spinning any more), using CompletableFuture.get(long, TimeUnit) allows + * us to avoid spinning for all current JDK versions. + */ + return future.get(Long.MAX_VALUE, TimeUnit.NANOSECONDS); + } catch (TimeoutException eignore) { + // it's ok to return null if we timeout after 292 years (2^63 nanos) + return null; + } } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw e; @@ -222,7 +234,9 @@ public void onFailure(final Throwable cause) { @Override public void run() { if (done) { - log.debug("ListFutureProcessor is interrupted."); + if (log.isDebugEnabled()) { + log.debug("ListFutureProcessor is interrupted."); + } return; } if (!itemsIter.hasNext()) { @@ -285,7 +299,7 @@ public static CompletableFuture within(final CompletableFuture promise }, timeout, unit); // when the promise is satisfied, cancel the timeout task promise.whenComplete((value, throwable) -> { - if (!task.cancel(true)) { + if (!task.cancel(true) && log.isDebugEnabled()) { log.debug("Failed to cancel the timeout task"); } } diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/ComponentConfiguration.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/ComponentConfiguration.java index 8b2759709e4..64e4a935624 100644 --- a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/ComponentConfiguration.java +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/ComponentConfiguration.java @@ -310,7 +310,7 @@ private Map toMap() { Map configMap = new HashMap<>(); Iterator iterator = this.getKeys(); while (iterator.hasNext()) { - String key = iterator.next().toString(); + String key = iterator.next(); Object property = this.getProperty(key); if (property != null) { configMap.put(key, property.toString()); diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/ConfigDef.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/ConfigDef.java new file mode 100644 index 00000000000..6e37ebcd061 --- /dev/null +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/ConfigDef.java @@ -0,0 +1,327 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.common.conf; + +import static com.google.common.base.Preconditions.checkArgument; +import static java.nio.charset.StandardCharsets.UTF_8; + +import com.google.common.collect.Sets; +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintStream; +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.configuration.Configuration; +import org.apache.commons.lang.StringUtils; + +/** + * A definition of a configuration instance. + */ +@Slf4j +@Getter +public class ConfigDef { + + /** + * Builder to build a configuration definition. + */ + public static class Builder { + + private final Set groups = new TreeSet<>(ConfigKeyGroup.ORDERING); + private final Map> settings = new HashMap<>(); + + private Builder() {} + + /** + * Add the config key group to the builder. + * + * @param group config key group + * @return builder to build this configuration def + */ + public Builder withConfigKeyGroup(ConfigKeyGroup group) { + groups.add(group); + return this; + } + + /** + * Add the config key to the builder. + * + * @param key the key to add to the builder. + * @return builder to build this configuration def + */ + public Builder withConfigKey(ConfigKey key) { + ConfigKeyGroup group = key.group(); + Set keys; + String groupName; + if (null == group) { + groupName = ""; + } else { + groupName = group.name(); + groups.add(group); + } + keys = settings.computeIfAbsent(groupName, name -> new TreeSet<>(ConfigKey.ORDERING)); + keys.add(key); + return this; + } + + public ConfigDef build() { + checkArgument( + Sets.difference( + groups.stream().map(group -> group.name()).collect(Collectors.toSet()), + settings.keySet() + ).isEmpty(), + "Configuration Key Groups doesn't match with keys"); + return new ConfigDef(groups, settings); + } + + } + + /** + * Create a builder to build a config def. + * + * @return builder to build a config def. + */ + public static Builder builder() { + return new Builder(); + } + + private final Set groups; + private final Map> settings; + private final Map keys; + + private ConfigDef(Set groups, + Map> settings) { + this.groups = groups; + this.settings = settings; + this.keys = settings.values() + .stream() + .flatMap(keys -> keys.stream()) + .collect(Collectors.toSet()) + .stream() + .collect(Collectors.toMap( + key -> key.name(), + key -> key + )); + } + + /** + * Validate if the provided conf is a valid configuration of this configuration definition. + * + * @param conf the configuration to validate + */ + public void validate(Configuration conf) throws ConfigException { + for (ConfigKey key : keys.values()) { + key.validate(conf); + } + } + + /** + * Build the config definitation of a config class. + * + * @param configClass config class + * @return config definition. + */ + @SuppressWarnings("unchecked") + public static ConfigDef of(Class configClass) { + ConfigDef.Builder builder = ConfigDef.builder(); + + Field[] fields = configClass.getDeclaredFields(); + for (Field field : fields) { + if (Modifier.isStatic(field.getModifiers()) && field.getType().equals(ConfigKey.class)) { + field.setAccessible(true); + try { + builder.withConfigKey((ConfigKey) field.get(null)); + } catch (IllegalAccessException e) { + log.error("Illegal to access {}#{}", configClass.getSimpleName(), field.getName(), e); + } + } + } + + return builder.build(); + } + + // + // Methods to save the configuration to an {@link OutputStream} + // + + private static final int MAX_COLUMN_SIZE = 80; + private static final String COMMENT_PREFIX = "# "; + + public void save(Path path) throws IOException { + try (OutputStream stream = Files.newOutputStream( + path, StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING)) { + save(stream); + } + } + + public void save(OutputStream os) throws IOException { + try (PrintStream ps = new PrintStream(os, false, UTF_8.name())) { + save(ps); + ps.flush(); + } + } + + private void writeNSharps(PrintStream stream, int num) { + IntStream.range(0, num).forEach(ignored -> stream.print("#")); + } + + private void writeConfigKeyGroup(PrintStream stream, ConfigKeyGroup group) { + int maxLength = Math.min( + group.description().length() + COMMENT_PREFIX.length(), + MAX_COLUMN_SIZE + ); + // "###########" + writeNSharps(stream, maxLength); + stream.println(); + // "# Settings of `` + writeSentence(stream, COMMENT_PREFIX, "Settings of `" + group.name() + "`"); + stream.println("#"); + // "# " + writeSentence(stream, COMMENT_PREFIX, group.description()); + // "###########" + writeNSharps(stream, maxLength); + stream.println(); + } + + private void writeConfigKey(PrintStream stream, + ConfigKey key) { + // "# " + // "#" + if (StringUtils.isNotBlank(key.description())) { + writeSentence(stream, COMMENT_PREFIX, key.description()); + stream.println("#"); + } + // "# " + // "#" + if (StringUtils.isNotBlank(key.documentation())) { + writeSentence(stream, COMMENT_PREFIX, key.documentation()); + stream.println("#"); + } + // "# type: , required" + writeSentence( + stream, + COMMENT_PREFIX, + "TYPE: " + key.type() + ", " + (key.required() ? "required" : "optional")); + if (null != key.validator() && StringUtils.isNotBlank(key.validator().documentation())) { + writeSentence( + stream, COMMENT_PREFIX, + "@constraints : " + key.validator().documentation() + ); + } + if (!key.optionValues().isEmpty()) { + writeSentence( + stream, COMMENT_PREFIX, "@options :" + ); + key.optionValues().forEach(value -> { + writeSentence( + stream, COMMENT_PREFIX, " " + value + ); + }); + } + // "#" + // "# @Since" + if (StringUtils.isNotBlank(key.since())) { + stream.println("#"); + writeSentence(stream, COMMENT_PREFIX, + "@since " + key.since() + ""); + } + // "#" + // "# @Deprecated" + if (key.deprecated()) { + stream.println("#"); + writeSentence(stream, COMMENT_PREFIX, getDeprecationDescription(key)); + } + // = + stream.print(key.name()); + stream.print("="); + if (null != key.defaultValue()) { + stream.print(key.defaultValue()); + } + stream.println(); + } + + private String getDeprecationDescription(ConfigKey key) { + StringBuilder sb = new StringBuilder(); + sb.append("@deprecated"); + if (StringUtils.isNotBlank(key.deprecatedSince())) { + sb.append(" since `") + .append(key.deprecatedSince()) + .append("`"); + } + if (StringUtils.isNotBlank(key.deprecatedByConfigKey())) { + sb.append(" in favor of using `") + .append(key.deprecatedByConfigKey()) + .append("`"); + } + return sb.toString(); + } + + private void writeSentence(PrintStream stream, + String prefix, + String sentence) { + int max = MAX_COLUMN_SIZE; + String[] words = sentence.split(" "); + int i = 0; + stream.print(prefix); + int current = prefix.length(); + while (i < words.length) { + String word = words[i]; + if (word.length() > max || current + word.length() <= max) { + if (i != 0) { + stream.print(" "); + } + stream.print(word); + current += (word.length() + 1); + } else { + stream.println(); + stream.print(prefix); + stream.print(word); + current = prefix.length() + word.length(); + } + ++i; + } + stream.println(); + } + + private void save(PrintStream stream) { + for (ConfigKeyGroup group : groups) { + writeConfigKeyGroup(stream, group); + stream.println(); + Set groupKeys = settings.getOrDefault(group.name(), Collections.emptySet()); + groupKeys.forEach(key -> { + writeConfigKey(stream, key); + stream.println(); + }); + } + } + + +} diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/ConfigException.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/ConfigException.java new file mode 100644 index 00000000000..a0534a6b2de --- /dev/null +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/ConfigException.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.common.conf; + +/** + * Exception thrown for configuration errors. + */ +public class ConfigException extends Exception { + + private static final long serialVersionUID = -7842276571881795108L; + + /** + * Construct a config exception with provided error. + * + * @param error error message + */ + public ConfigException(String error) { + super(error); + } + + /** + * Construct a config exception with provided error and reason. + * + * @param error error message + * @param cause error cause + */ + public ConfigException(String error, Throwable cause) { + super(error, cause); + } +} diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/ConfigKey.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/ConfigKey.java new file mode 100644 index 00000000000..b2bb47a92b9 --- /dev/null +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/ConfigKey.java @@ -0,0 +1,371 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.common.conf; + +import static com.google.common.base.Preconditions.checkArgument; + +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Objects; +import lombok.Builder; +import lombok.Builder.Default; +import lombok.Data; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.common.annotation.InterfaceAudience.Public; +import org.apache.bookkeeper.common.conf.validators.NullValidator; +import org.apache.bookkeeper.common.util.ReflectionUtils; +import org.apache.commons.configuration.Configuration; +import org.apache.commons.configuration.ConfigurationException; + +/** + * A configuration key in a configuration. + */ +@Data +@Builder(builderMethodName = "internalBuilder") +@Accessors(fluent = true) +@Public +@Slf4j +public class ConfigKey { + + public static final Comparator ORDERING = (o1, o2) -> { + int ret = Integer.compare(o1.orderInGroup, o2.orderInGroup); + if (ret == 0) { + return o1.name().compareTo(o2.name()); + } else { + return ret; + } + }; + + /** + * Build a config key of name. + * + * @param name config key name + * @return config key builder + */ + public static ConfigKeyBuilder builder(String name) { + return internalBuilder().name(name); + } + + /** + * Flag indicates whether the setting is required. + */ + @Default + private boolean required = false; + + /** + * Name of the configuration setting. + */ + private String name; + + /** + * Type of the configuration setting. + */ + @Default + private Type type = Type.STRING; + + /** + * Description of the configuration setting. + */ + @Default + private String description = ""; + + /** + * Documentation of the configuration setting. + */ + @Default + private String documentation = ""; + + /** + * Default value as a string representation. + */ + @Default + private Object defaultValue = null; + + private String defaultValueAsString() { + if (null == defaultValue) { + return null; + } else if (defaultValue instanceof String) { + return (String) defaultValue; + } else if (defaultValue instanceof Class) { + return ((Class) defaultValue).getName(); + } else { + return defaultValue.toString(); + } + } + + /** + * The list of options for this setting. + */ + @Default + private List optionValues = Collections.emptyList(); + + /** + * The validator used for validating configuration value. + */ + @Default + private Validator validator = NullValidator.of(); + + /** + * The key-group to group settings together. + */ + @Default + private ConfigKeyGroup group = ConfigKeyGroup.DEFAULT; + + /** + * The order of the setting in the key-group. + */ + @Default + private int orderInGroup = Integer.MIN_VALUE; + + /** + * The list of settings dependents on this setting. + */ + @Default + private List dependents = Collections.emptyList(); + + /** + * Whether this setting is deprecated or not. + */ + @Default + private boolean deprecated = false; + + /** + * The config key that deprecates this key. + */ + @Default + private String deprecatedByConfigKey = ""; + + /** + * The version when this settings was deprecated. + */ + @Default + private String deprecatedSince = ""; + + /** + * The version when this setting was introduced. + */ + @Default + private String since = ""; + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object o) { + if (!(o instanceof ConfigKey)) { + return false; + } + ConfigKey other = (ConfigKey) o; + return Objects.equals(name, other.name); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() { + return name.hashCode(); + } + + /** + * Validate the setting is valid in the provided config conf. + * + * @param conf configuration to test + */ + public void validate(Configuration conf) throws ConfigException { + if (conf.containsKey(name()) && validator() != null) { + Object value = get(conf); + if (!validator().validate(name(), value)) { + throw new ConfigException("Invalid setting of '" + name() + + "' found the configuration: value = '" + value + "', requirement = '" + validator + "'"); + } + } else if (required()) { // missing config on a required field + throw new ConfigException( + "Setting '" + name() + "' is required but missing in the configuration"); + } + } + + /** + * Update the setting name in the configuration conf with the provided value. + * + * @param conf configuration to set + * @param value value of the setting + */ + public void set(Configuration conf, Object value) { + if (!type().validator().validate(name(), value)) { + throw new IllegalArgumentException( + "Invalid value '" + value + "' to set on setting '" + name() + "': expected type = " + type); + } + + if (null != validator() && !validator().validate(name(), value)) { + throw new IllegalArgumentException( + "Invalid value '" + value + "' to set on setting '" + name() + "': required '" + validator() + "'"); + } + + if (value instanceof Class) { + conf.setProperty(name(), ((Class) value).getName()); + } else { + conf.setProperty(name(), value); + } + } + + /** + * Retrieve the setting from the configuration conf as a {@link Long} value. + * + * @param conf configuration to retrieve the setting + * @return the value as a long number + */ + public long getLong(Configuration conf) { + checkArgument(type() == Type.LONG, "'" + name() + "' is NOT a LONG numeric setting"); + return conf.getLong(name(), (Long) defaultValue()); + } + + /** + * Retrieve the setting from the configuration conf as a {@link Integer} value. + * + * @param conf configuration to retrieve the setting + * @return the value as an integer number + */ + public int getInt(Configuration conf) { + checkArgument(type() == Type.INT, "'" + name() + "' is NOT a INT numeric setting"); + return conf.getInt(name(), (Integer) defaultValue()); + } + + /** + * Retrieve the setting from the configuration conf as a {@link Short} value. + * + * @param conf configuration to retrieve the setting + * @return the value as a short number + */ + public short getShort(Configuration conf) { + checkArgument(type() == Type.SHORT, "'" + name() + "' is NOT a SHORT numeric setting"); + return conf.getShort(name(), (Short) defaultValue()); + } + + /** + * Retrieve the setting from the configuration conf as a {@link Boolean} value. + * + * @param conf configuration to retrieve the setting + * @return the value as a boolean flag + */ + public boolean getBoolean(Configuration conf) { + checkArgument(type() == Type.BOOLEAN, "'" + name() + "' is NOT a BOOL numeric setting"); + return conf.getBoolean(name(), (Boolean) defaultValue()); + } + + /** + * Retrieve the setting from the configuration conf as a {@link Double} value. + * + * @param conf configuration to retrieve the setting + * @return the value as a double number + */ + public double getDouble(Configuration conf) { + checkArgument(type() == Type.DOUBLE, "'" + name() + "' is NOT a DOUBLE numeric setting"); + return conf.getDouble(name(), (Double) defaultValue()); + } + + /** + * Retrieve the setting from the configuration conf as a {@link String} value. + * + * @param conf configuration to retrieve the setting + * @return the value as a string. + */ + public String getString(Configuration conf) { + return conf.getString(name(), defaultValueAsString()); + } + + /** + * Retrieve the setting from the configuration conf as a {@link Class} value. + * + * @param conf configuration to retrieve the setting + * @return the value as a class + */ + @SuppressWarnings("unchecked") + public Class getClass(Configuration conf, Class interfaceCls) { + checkArgument(type() == Type.CLASS, "'" + name() + "' is NOT a CLASS setting"); + try { + Class defaultClass = (Class) defaultValue(); + return ReflectionUtils.getClass(conf, name(), defaultClass, interfaceCls, getClass().getClassLoader()); + } catch (ConfigurationException e) { + throw new IllegalArgumentException("Invalid class is set to setting '" + name() + "': ", e); + } + } + + /** + * Retrieve the setting from the configuration conf as a {@link Class} value. + * + * @param conf configuration to retrieve the setting + * @return the value as a class + */ + @SuppressWarnings("unchecked") + public Class getClass(Configuration conf) { + checkArgument(type() == Type.CLASS, "'" + name() + "' is NOT a CLASS setting"); + try { + Class defaultClass = (Class) defaultValue(); + return ReflectionUtils.getClass(conf, name(), defaultClass, getClass().getClassLoader()); + } catch (ConfigurationException e) { + throw new IllegalArgumentException("Invalid class is set to setting '" + name() + "': ", e); + } + } + + /** + * Retrieve the setting from the configuration conf as a {@link Class} value. + * + * @param conf configuration to retrieve the setting + * @return the value as list of values + */ + @SuppressWarnings("unchecked") + public List getList(Configuration conf) { + checkArgument(type() == Type.LIST, "'" + name() + "' is NOT a LIST setting"); + List list = (List) defaultValue(); + if (null == list) { + list = Collections.emptyList(); + } + return conf.getList(name(), list); + } + + /** + * Retrieve the setting value from the provided conf. + * + * @return the setting value + */ + public Object get(Configuration conf) { + switch (type()) { + case LONG: + return getLong(conf); + case INT: + return getInt(conf); + case SHORT: + return getShort(conf); + case DOUBLE: + return getDouble(conf); + case BOOLEAN: + return getBoolean(conf); + case LIST: + return getList(conf); + case CLASS: + return getClass(conf); + default: + return getString(conf); + } + } +} diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/ConfigKeyGroup.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/ConfigKeyGroup.java new file mode 100644 index 00000000000..833e907b38e --- /dev/null +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/ConfigKeyGroup.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.common.conf; + +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Objects; +import lombok.Builder; +import lombok.Builder.Default; +import lombok.Data; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.common.annotation.InterfaceAudience.Public; + +/** + * Define a group of configuration settings. + */ +@Data +@Accessors(fluent = true) +@Builder(builderMethodName = "internalBuilder") +@Public +public class ConfigKeyGroup { + + /** + * Ordering the key groups in a configuration. + */ + public static final Comparator ORDERING = (o1, o2) -> { + int ret = Integer.compare(o1.order, o2.order); + if (0 == ret) { + return o1.name().compareTo(o2.name()); + } else { + return ret; + } + }; + + /** + * Create a config key group of name. + * + * @param name key group name + * @return key group builder + */ + public static ConfigKeyGroupBuilder builder(String name) { + return internalBuilder().name(name); + } + + /** + * The default key group. + */ + public static final ConfigKeyGroup DEFAULT = builder("").build(); + + /** + * Name of the key group. + */ + private String name; + + /** + * Description of the key group. + */ + @Default + private String description = ""; + + /** + * The list of sub key-groups of this key group. + */ + @Default + private List children = Collections.emptyList(); + + /** + * The order of the key-group in a configuration. + */ + @Default + private int order = Integer.MIN_VALUE; + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object o) { + if (!(o instanceof ConfigKeyGroup)) { + return false; + } + ConfigKeyGroup other = (ConfigKeyGroup) o; + return Objects.equals(name, other.name); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() { + return name.hashCode(); + } + +} diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/Type.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/Type.java new file mode 100644 index 00000000000..c48e94c8b97 --- /dev/null +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/Type.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.common.conf; + +import java.util.List; +import org.apache.bookkeeper.common.annotation.InterfaceAudience.Public; + +/** + * Config key types. + */ +@Public +public enum Type { + + BOOLEAN((name, value) -> value instanceof Boolean), + STRING((name, value) -> value instanceof String), + INT((name, value) -> value instanceof Integer), + SHORT((name, value) -> value instanceof Short), + LONG((name, value) -> value instanceof Long), + DOUBLE((name, value) -> value instanceof Double), + LIST((name, value) -> value instanceof List), + CLASS((name, value) -> value instanceof Class || value instanceof String); + + private Validator validator; + + Type(Validator validator) { + this.validator = validator; + } + + public Validator validator() { + return validator; + } + +} diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/Validator.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/Validator.java new file mode 100644 index 00000000000..249ad310416 --- /dev/null +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/Validator.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.common.conf; + +import org.apache.bookkeeper.common.annotation.InterfaceAudience.Public; + +/** + * Validator that validates configuration settings. + */ +@Public +public interface Validator { + + /** + * Validates the configuration value. + * + * @param name name of the configuration setting + * @param value value of the configuration setting + * @return true if it is a valid value, otherwise false. + */ + boolean validate(String name, Object value); + + /** + * Return the documentation for a given validator. + * + * @return the documentation for a given validator + */ + default String documentation() { + return ""; + } + +} diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/package-info.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/package-info.java index ba4d756491f..aa2e5eaa67f 100644 --- a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/package-info.java +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/package-info.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/validators/ClassValidator.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/validators/ClassValidator.java new file mode 100644 index 00000000000..dcd5f41cbdc --- /dev/null +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/validators/ClassValidator.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.common.conf.validators; + +import lombok.Data; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.common.conf.Validator; +import org.apache.bookkeeper.common.util.ReflectionUtils; + +/** + * Validator that validates a configuration setting is returning a given type of class. + */ +@Slf4j +@Data +public class ClassValidator implements Validator { + + /** + * Create a validator to validate if a setting is returning a class that extends from + * interfaceClass. + * + * @param interfaceClass interface class + * @return the validator that expects a setting return a class that extends from interfaceClass + */ + public static ClassValidator of(Class interfaceClass) { + return new ClassValidator<>(interfaceClass); + } + + private final Class interfaceClass; + + @Override + public boolean validate(String name, Object value) { + if (value instanceof String) { + try { + ReflectionUtils.forName((String) value, interfaceClass); + return true; + } catch (RuntimeException re) { + log.warn("Setting value of '{}' is not '{}' : {}", + name, interfaceClass.getName(), value, re); + return false; + } + } else if (value instanceof Class) { + Class cls = (Class) value; + if (!interfaceClass.isAssignableFrom(cls)) { + log.warn("Setting value of '{}' is not '{}' : {}", + name, interfaceClass.getName(), cls.getName()); + return false; + } else { + return true; + } + } else { + return false; + } + } + + @Override + public String toString() { + return "Class extends " + interfaceClass.getName(); + } + + @Override + public String documentation() { + return "class extends `" + interfaceClass.getName() + "`"; + } +} diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/validators/NullValidator.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/validators/NullValidator.java new file mode 100644 index 00000000000..1d384dfb73a --- /dev/null +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/validators/NullValidator.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.common.conf.validators; + +import org.apache.bookkeeper.common.conf.Validator; + +/** + * A validator that does nothing. + */ +public class NullValidator implements Validator { + + /** + * Return the instance of NullValidator. + * + * @return the instance of NullValidator. + */ + public static NullValidator of() { + return INSTANCE; + } + + private static final NullValidator INSTANCE = new NullValidator(); + + private NullValidator() {} + + @Override + public boolean validate(String name, Object value) { + return true; + } +} diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/validators/RangeValidator.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/validators/RangeValidator.java new file mode 100644 index 00000000000..06147d0c0e5 --- /dev/null +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/validators/RangeValidator.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.common.conf.validators; + +import lombok.Data; +import org.apache.bookkeeper.common.conf.Validator; + +/** + * Validator that validates a configuration value is in a numeric range. + */ +@Data +public class RangeValidator implements Validator { + + /** + * A numeric range that checks the lower bound. + * + * @param min the minimum acceptable value + * @return a numeric range that checks the lower bound + */ + public static RangeValidator atLeast(Number min) { + return new RangeValidator(min, null); + } + + /** + * A numeric range that checks the upper bound. + * + * @param max the maximum acceptable value + * @return a numeric range that checks the upper bound + */ + public static RangeValidator atMost(Number max) { + return new RangeValidator(null, max); + } + + /** + * A numeric range that checks both lower and upper bounds. + * + * @param min the minimum acceptable value + * @param max the maximum acceptable value + * @return a numeric range that checks both lower and upper bounds + */ + public static RangeValidator between(Number min, Number max) { + return new RangeValidator(min, max); + } + + private final Number min; + private final Number max; + + @Override + public boolean validate(String name, Object value) { + if (value instanceof Number) { + Number n = (Number) value; + if (min != null && n.doubleValue() < min.doubleValue()) { + return false; + } else { + return max == null || n.doubleValue() <= max.doubleValue(); + } + } else { + return false; + } + } + + @Override + public String toString() { + if (null == min) { + return "[... , " + max + "]"; + } else if (null == max) { + return "[" + min + ", ...]"; + } else { + return "[" + min + ", " + max + "]"; + } + } + + @Override + public String documentation() { + return toString(); + } +} diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/validators/package-info.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/validators/package-info.java new file mode 100644 index 00000000000..e4c141a0f93 --- /dev/null +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/conf/validators/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * A collection of validators that validate configuration settings. + */ +package org.apache.bookkeeper.common.conf.validators; \ No newline at end of file diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/net/ServiceURI.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/net/ServiceURI.java index 289c3f3726c..3d419f3ccfc 100644 --- a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/net/ServiceURI.java +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/net/ServiceURI.java @@ -27,6 +27,7 @@ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.net.URI; import java.util.List; +import java.util.stream.Collectors; import lombok.AccessLevel; import lombok.EqualsAndHashCode; import lombok.Getter; @@ -122,6 +123,13 @@ public class ServiceURI { * Service string for bookkeeper service. */ public static final String SERVICE_BK = "bk"; + public static final int SERVICE_BK_PORT = 4181; + + /** + * The default local bk service uri. + */ + public static final ServiceURI DEFAULT_LOCAL_STREAM_STORAGE_SERVICE_URI = + ServiceURI.create("bk://localhost:4181"); private static final String SERVICE_SEP = "+"; private static final String SERVICE_DLOG_SEP = "-"; @@ -154,7 +162,7 @@ public static ServiceURI create(String uriStr) { public static ServiceURI create(URI uri) { checkNotNull(uri, "service uri instance is null"); - String serviceName = null; + String serviceName; String[] serviceInfos = new String[0]; String scheme = uri.getScheme(); if (null != scheme) { @@ -169,6 +177,8 @@ public static ServiceURI create(URI uri) { serviceName = schemeParts[0]; serviceInfos = new String[schemeParts.length - 1]; System.arraycopy(schemeParts, 1, serviceInfos, 0, serviceInfos.length); + } else { + serviceName = null; } String userAndHostInformation = uri.getAuthority(); @@ -186,7 +196,10 @@ public static ServiceURI create(URI uri) { serviceUser = null; serviceHosts = splitter.splitToList(userAndHostInformation); } - serviceHosts.forEach(host -> validateHostName(host)); + serviceHosts = serviceHosts + .stream() + .map(host -> validateHostName(serviceName, host)) + .collect(Collectors.toList()); String servicePath = uri.getPath(); checkArgument(null != servicePath, @@ -201,7 +214,7 @@ public static ServiceURI create(URI uri) { uri); } - private static void validateHostName(String hostname) { + private static String validateHostName(String serviceName, String hostname) { String[] parts = hostname.split(":"); if (parts.length >= 3) { throw new IllegalArgumentException("Invalid hostname : " + hostname); @@ -211,8 +224,12 @@ private static void validateHostName(String hostname) { } catch (NumberFormatException nfe) { throw new IllegalArgumentException("Invalid hostname : " + hostname); } + return hostname; + } else if (parts.length == 1 && serviceName.toLowerCase().equals(SERVICE_BK)) { + return hostname + ":" + SERVICE_BK_PORT; + } else { + return hostname; } - } private final String serviceName; diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/stats/BroadCastStatsLogger.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/stats/BroadCastStatsLogger.java index 1b9fdea6dcf..26cb3349575 100644 --- a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/stats/BroadCastStatsLogger.java +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/stats/BroadCastStatsLogger.java @@ -127,9 +127,16 @@ public void dec() { } @Override - public void add(long l) { - firstCounter.add(l); - secondCounter.add(l); + public void addCount(long l) { + firstCounter.addCount(l); + secondCounter.addCount(l); + } + + @Override + public void addLatency(long eventLatency, TimeUnit unit) { + long valueMillis = unit.toMillis(eventLatency); + firstCounter.addCount(valueMillis); + secondCounter.addCount(valueMillis); } @Override @@ -167,6 +174,22 @@ public void removeScope(String scope, StatsLogger statsLogger) { first.removeScope(scope, another.first); second.removeScope(scope, another.second); } + + /** + Thread-scoped stats not currently supported. + */ + @Override + public OpStatsLogger getThreadScopedOpStatsLogger(String name) { + return getOpStatsLogger(name); + } + + /** + Thread-scoped stats not currently supported. + */ + @Override + public Counter getThreadScopedCounter(String name) { + return getCounter(name); + } } /** diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/stats/OpStatsListener.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/stats/OpStatsListener.java index ca6eb74c4bb..3e8cd60205b 100644 --- a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/stats/OpStatsListener.java +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/stats/OpStatsListener.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/stats/package-info.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/stats/package-info.java index f211381595c..0e2c831c412 100644 --- a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/stats/package-info.java +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/stats/package-info.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/BoundedExecutorService.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/BoundedExecutorService.java index 655d49de716..e7d9303830b 100644 --- a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/BoundedExecutorService.java +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/BoundedExecutorService.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,7 +18,6 @@ package org.apache.bookkeeper.common.util; import com.google.common.util.concurrent.ForwardingExecutorService; - import java.util.Collection; import java.util.List; import java.util.concurrent.BlockingQueue; diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/BoundedScheduledExecutorService.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/BoundedScheduledExecutorService.java index 44c6f38d279..bc30c18a537 100644 --- a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/BoundedScheduledExecutorService.java +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/BoundedScheduledExecutorService.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -23,7 +23,6 @@ import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.ListeningScheduledExecutorService; import com.google.common.util.concurrent.MoreExecutors; - import java.util.Collection; import java.util.List; import java.util.concurrent.BlockingQueue; diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/ExceptionMessageHelper.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/ExceptionMessageHelper.java new file mode 100644 index 00000000000..f5facdca054 --- /dev/null +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/ExceptionMessageHelper.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.common.util; + +/** + * Utility to make it easier to add context to exception messages. + */ +public class ExceptionMessageHelper { + public StringBuilder sb = new StringBuilder(); + private boolean firstKV = true; + + public static ExceptionMessageHelper exMsg(String msg) { + return new ExceptionMessageHelper(msg); + } + + ExceptionMessageHelper(String msg) { + sb.append(msg).append("("); + } + + public ExceptionMessageHelper kv(String key, Object value) { + if (firstKV) { + firstKV = false; + } else { + sb.append(","); + } + sb.append(key).append("=").append(value.toString()); + return this; + } + + public String toString() { + return sb.append(")").toString(); + } +} diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/JsonUtil.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/JsonUtil.java index afc90a4f48b..90576aeab1a 100644 --- a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/JsonUtil.java +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/JsonUtil.java @@ -30,7 +30,7 @@ public static String toJson(Object object) throws ParseJsonException { try { return mapper.writerWithDefaultPrettyPrinter().writeValueAsString(object); } catch (Exception e) { - throw new ParseJsonException("Failed to serialize Object to Json string"); + throw new ParseJsonException("Failed to serialize Object to Json string", e); } } @@ -38,7 +38,7 @@ public static T fromJson(String jsonStr, Class valueType) throws ParseJso try { return mapper.readValue(jsonStr, valueType); } catch (Exception e) { - throw new ParseJsonException("Failed to deserialize Object from Json string"); + throw new ParseJsonException("Failed to deserialize Object from Json string", e); } } @@ -49,5 +49,9 @@ public static class ParseJsonException extends Exception { public ParseJsonException(String message) { super(message); } + + public ParseJsonException(String message, Throwable cause) { + super(message, cause); + } } } diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/MathUtils.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/MathUtils.java index 94999a49ec4..7e38d852a31 100644 --- a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/MathUtils.java +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/MathUtils.java @@ -40,21 +40,6 @@ public static int findNextPositivePowerOfTwo(final int value) { return 1 << (32 - Integer.numberOfLeadingZeros(value - 1)); } - /** - * Current time from some arbitrary time base in the past, counting in - * milliseconds, and not affected by settimeofday or similar system clock - * changes. This is appropriate to use when computing how much longer to - * wait for an interval to expire. - * - *

NOTE: only use it for measuring. - * http://docs.oracle.com/javase/1.5.0/docs/api/java/lang/System.html#nanoTime%28%29 - * - * @return current time in milliseconds. - */ - public static long now() { - return System.nanoTime() / NANOSECONDS_PER_MILLISECOND; - } - /** * Current time from some arbitrary time base in the past, counting in * nanoseconds, and not affected by settimeofday or similar system clock @@ -86,7 +71,7 @@ public static long elapsedMSec(long startNanoTime) { * the only conversion happens when computing the elapsed time. * * @param startNanoTime the start of the interval that we are measuring - * @return elapsed time in milliseconds. + * @return elapsed time in microseconds. */ public static long elapsedMicroSec(long startNanoTime) { return TimeUnit.NANOSECONDS.toMicros(System.nanoTime() - startNanoTime); @@ -97,7 +82,7 @@ public static long elapsedMicroSec(long startNanoTime) { * the only conversion happens when computing the elapsed time. * * @param startNanoTime the start of the interval that we are measuring - * @return elapsed time in milliseconds. + * @return elapsed time in nanoseconds. */ public static long elapsedNanos(long startNanoTime) { return System.nanoTime() - startNanoTime; diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/MdcUtils.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/MdcUtils.java new file mode 100644 index 00000000000..c7a2f4e3c08 --- /dev/null +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/MdcUtils.java @@ -0,0 +1,39 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.bookkeeper.common.util; + +import java.util.Map; +import org.slf4j.MDC; + +/** + * Utils for work with Slf4j MDC. + */ +public class MdcUtils { + + public static void restoreContext(Map mdcContextMap) { + if (mdcContextMap == null || mdcContextMap.isEmpty()) { + MDC.clear(); + } else { + MDC.setContextMap(mdcContextMap); + } + } +} diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/MemoryLimitController.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/MemoryLimitController.java new file mode 100644 index 00000000000..8a67072c5a6 --- /dev/null +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/MemoryLimitController.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.common.util; + +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.ReentrantLock; + +/** + * Controller for tracking the amount of memory used for some task. + */ +public class MemoryLimitController { + + private final long memoryLimit; + private final AtomicLong currentUsage = new AtomicLong(); + private final ReentrantLock mutex = new ReentrantLock(false); + private final Condition condition = mutex.newCondition(); + + public MemoryLimitController(long memoryLimitBytes) { + this.memoryLimit = memoryLimitBytes; + } + + public boolean tryReserveMemory(long size) { + while (true) { + long current = currentUsage.get(); + long newUsage = current + size; + + // We allow one request to go over the limit, to make the notification + // path simpler and more efficient + if (current > memoryLimit && memoryLimit > 0) { + return false; + } + + if (currentUsage.compareAndSet(current, newUsage)) { + return true; + } + } + } + + public void reserveMemory(long size) throws InterruptedException { + if (!tryReserveMemory(size)) { + mutex.lock(); + + try { + while (!tryReserveMemory(size)) { + condition.await(); + } + } finally { + mutex.unlock(); + } + } + } + + public void releaseMemory(long size) { + long newUsage = currentUsage.addAndGet(-size); + if (newUsage + size > memoryLimit && newUsage <= memoryLimit) { + // We just crossed the limit. Now we have more space + mutex.lock(); + try { + condition.signalAll(); + } finally { + mutex.unlock(); + } + } + } + + public long currentUsage() { + return currentUsage.get(); + } +} \ No newline at end of file diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/OrderedExecutor.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/OrderedExecutor.java index 6a86141256f..a6f137f4aaa 100644 --- a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/OrderedExecutor.java +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/OrderedExecutor.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -19,42 +19,37 @@ import static com.google.common.base.Preconditions.checkArgument; +import com.google.common.util.concurrent.ForwardingExecutorService; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.SettableFuture; import com.google.common.util.concurrent.ThreadFactoryBuilder; - import io.netty.util.concurrent.DefaultThreadFactory; - import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.Map; import java.util.Random; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; -import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadFactory; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; - +import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; - +import org.apache.bookkeeper.common.util.affinity.CpuAffinity; import org.apache.bookkeeper.stats.Gauge; import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.stats.OpStatsLogger; import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.ThreadRegistry; import org.apache.commons.lang.StringUtils; +import org.slf4j.MDC; /** - * This class provides 2 things over the java {@link ExecutorService}. - * - *

1. It takes {@link SafeRunnable objects} instead of plain Runnable objects. - * This means that exceptions in scheduled tasks wont go unnoticed and will be - * logged. - * - *

2. It supports submitting tasks with an ordering key, so that tasks submitted + * This class supports submitting tasks with an ordering key, so that tasks submitted * with the same key will always be executed in order, but tasks across * different keys can be unordered. This retains parallelism while retaining the * basic amount of ordering we want (e.g. , per ledger handle). Ordering is @@ -64,18 +59,22 @@ @Slf4j public class OrderedExecutor implements ExecutorService { public static final int NO_TASK_LIMIT = -1; + private static final int DEFAULT_MAX_ARRAY_QUEUE_SIZE = 10_000; protected static final long WARN_TIME_MICRO_SEC_DEFAULT = TimeUnit.SECONDS.toMicros(1); final String name; - final ExecutorService threads[]; - final long threadIds[]; + final ExecutorService[] threads; + final long[] threadIds; final Random rand = new Random(); final OpStatsLogger taskExecutionStats; final OpStatsLogger taskPendingStats; final boolean traceTaskExecution; + final boolean preserveMdcForTaskExecution; final long warnTimeMicroSec; final int maxTasksInQueue; - + final boolean enableBusyWait; + // we only want thread-scoped metrics on the server-side where it can be explicitly enabled + final boolean enableThreadScopedMetrics; public static Builder newBuilder() { return new Builder(); @@ -92,7 +91,9 @@ public OrderedExecutor build() { threadFactory = new DefaultThreadFactory("bookkeeper-ordered-safe-executor"); } return new OrderedExecutor(name, numThreads, threadFactory, statsLogger, - traceTaskExecution, warnTimeMicroSec, maxTasksInQueue); + traceTaskExecution, preserveMdcForTaskExecution, + warnTimeMicroSec, maxTasksInQueue, enableBusyWait, + enableThreadScopedMetrics); } } @@ -105,8 +106,11 @@ public abstract static class AbstractBuilder { protected ThreadFactory threadFactory = null; protected StatsLogger statsLogger = NullStatsLogger.INSTANCE; protected boolean traceTaskExecution = false; + protected boolean preserveMdcForTaskExecution = false; protected long warnTimeMicroSec = WARN_TIME_MICRO_SEC_DEFAULT; protected int maxTasksInQueue = NO_TASK_LIMIT; + protected boolean enableBusyWait = false; + protected boolean enableThreadScopedMetrics = false; public AbstractBuilder name(String name) { this.name = name; @@ -138,11 +142,26 @@ public AbstractBuilder traceTaskExecution(boolean enabled) { return this; } + public AbstractBuilder preserveMdcForTaskExecution(boolean enabled) { + this.preserveMdcForTaskExecution = enabled; + return this; + } + public AbstractBuilder traceTaskWarnTimeMicroSec(long warnTimeMicroSec) { this.warnTimeMicroSec = warnTimeMicroSec; return this; } + public AbstractBuilder enableBusyWait(boolean enableBusyWait) { + this.enableBusyWait = enableBusyWait; + return this; + } + + public AbstractBuilder enableThreadScopedMetrics(boolean enableThreadScopedMetrics) { + this.enableThreadScopedMetrics = enableThreadScopedMetrics; + return this; + } + @SuppressWarnings("unchecked") public T build() { if (null == threadFactory) { @@ -154,8 +173,11 @@ public T build() { threadFactory, statsLogger, traceTaskExecution, + preserveMdcForTaskExecution, warnTimeMicroSec, - maxTasksInQueue); + maxTasksInQueue, + enableBusyWait, + enableThreadScopedMetrics); } } @@ -165,32 +187,172 @@ public T build() { protected class TimedRunnable implements Runnable { final Runnable runnable; final long initNanos; + final Class runnableClass; TimedRunnable(Runnable runnable) { this.runnable = runnable; this.initNanos = MathUtils.nowInNano(); + this.runnableClass = runnable.getClass(); } @Override public void run() { taskPendingStats.registerSuccessfulEvent(MathUtils.elapsedNanos(initNanos), TimeUnit.NANOSECONDS); long startNanos = MathUtils.nowInNano(); - this.runnable.run(); - long elapsedMicroSec = MathUtils.elapsedMicroSec(startNanos); - taskExecutionStats.registerSuccessfulEvent(elapsedMicroSec, TimeUnit.MICROSECONDS); - if (elapsedMicroSec >= warnTimeMicroSec) { - log.warn("Runnable {}:{} took too long {} micros to execute.", runnable, runnable.getClass(), - elapsedMicroSec); + try { + this.runnable.run(); + } finally { + long elapsedMicroSec = MathUtils.elapsedMicroSec(startNanos); + taskExecutionStats.registerSuccessfulEvent(elapsedMicroSec, TimeUnit.MICROSECONDS); + if (elapsedMicroSec >= warnTimeMicroSec) { + log.warn("Runnable {} took too long {} micros to execute.", runnableClass, elapsedMicroSec); + } } } } - protected ThreadPoolExecutor createSingleThreadExecutor(ThreadFactory factory) { - return new ThreadPoolExecutor(1, 1, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<>(), factory); + /** + * Decorator class for a callable that measure the execution time. + */ + protected class TimedCallable implements Callable { + final Callable callable; + final long initNanos; + final Class callableClass; + + TimedCallable(Callable callable) { + this.callable = callable; + this.initNanos = MathUtils.nowInNano(); + this.callableClass = callable.getClass(); + } + + @Override + public T call() throws Exception { + taskPendingStats.registerSuccessfulEvent(MathUtils.elapsedNanos(initNanos), TimeUnit.NANOSECONDS); + long startNanos = MathUtils.nowInNano(); + try { + return this.callable.call(); + } finally { + long elapsedMicroSec = MathUtils.elapsedMicroSec(startNanos); + taskExecutionStats.registerSuccessfulEvent(elapsedMicroSec, TimeUnit.MICROSECONDS); + if (elapsedMicroSec >= warnTimeMicroSec) { + log.warn("Callable {} took too long {} micros to execute.", callableClass, elapsedMicroSec); + } + } + } } - protected ExecutorService getBoundedExecutor(ThreadPoolExecutor executor) { - return new BoundedExecutorService(executor, this.maxTasksInQueue); + /** + * Decorator class for a runnable that preserves MDC context. + */ + static class ContextPreservingRunnable implements Runnable { + private final Runnable runnable; + private final Map mdcContextMap; + + ContextPreservingRunnable(Runnable runnable) { + this.runnable = runnable; + this.mdcContextMap = MDC.getCopyOfContextMap(); + } + + @Override + public void run() { + MdcUtils.restoreContext(mdcContextMap); + try { + runnable.run(); + } finally { + MDC.clear(); + } + } + } + + /** + * Decorator class for a callable that preserves MDC context. + */ + static class ContextPreservingCallable implements Callable { + private final Callable callable; + private final Map mdcContextMap; + + ContextPreservingCallable(Callable callable) { + this.callable = callable; + this.mdcContextMap = MDC.getCopyOfContextMap(); + } + + @Override + public T call() throws Exception { + MdcUtils.restoreContext(mdcContextMap); + try { + return callable.call(); + } finally { + MDC.clear(); + } + } + } + + protected ExecutorService createSingleThreadExecutor(ThreadFactory factory) { + if (maxTasksInQueue > 0) { + return new SingleThreadExecutor(factory, maxTasksInQueue, true); + } else { + return new SingleThreadExecutor(factory); + } + } + + protected ExecutorService getBoundedExecutor(ExecutorService executor) { + checkArgument(executor instanceof ThreadPoolExecutor); + return new BoundedExecutorService((ThreadPoolExecutor) executor, this.maxTasksInQueue); + } + + protected ExecutorService addExecutorDecorators(ExecutorService executor) { + return new ForwardingExecutorService() { + @Override + protected ExecutorService delegate() { + return executor; + } + + @Override + public List> invokeAll(Collection> tasks) + throws InterruptedException { + return super.invokeAll(timedCallables(tasks)); + } + + @Override + public List> invokeAll(Collection> tasks, + long timeout, TimeUnit unit) + throws InterruptedException { + return super.invokeAll(timedCallables(tasks), timeout, unit); + } + + @Override + public T invokeAny(Collection> tasks) + throws InterruptedException, ExecutionException { + return super.invokeAny(timedCallables(tasks)); + } + + @Override + public T invokeAny(Collection> tasks, + long timeout, TimeUnit unit) + throws InterruptedException, ExecutionException, TimeoutException { + return super.invokeAny(timedCallables(tasks), timeout, unit); + } + + @Override + public void execute(Runnable command) { + super.execute(timedRunnable(command)); + } + + @Override + public Future submit(Callable task) { + return super.submit(timedCallable(task)); + } + + @Override + public Future submit(Runnable task) { + return super.submit(timedRunnable(task)); + } + + @Override + public Future submit(Runnable task, T result) { + return super.submit(timedRunnable(task), result); + } + }; } /** @@ -206,6 +368,8 @@ protected ExecutorService getBoundedExecutor(ThreadPoolExecutor executor) { * - for reporting executor stats * @param traceTaskExecution * - should we stat task execution + * @param preserveMdcForTaskExecution + * - should we preserve MDC for task execution * @param warnTimeMicroSec * - log long task exec warning after this interval * @param maxTasksInQueue @@ -213,25 +377,52 @@ protected ExecutorService getBoundedExecutor(ThreadPoolExecutor executor) { */ protected OrderedExecutor(String baseName, int numThreads, ThreadFactory threadFactory, StatsLogger statsLogger, boolean traceTaskExecution, - long warnTimeMicroSec, int maxTasksInQueue) { + boolean preserveMdcForTaskExecution, long warnTimeMicroSec, int maxTasksInQueue, + boolean enableBusyWait, boolean enableThreadScopedMetrics) { checkArgument(numThreads > 0); checkArgument(!StringUtils.isBlank(baseName)); this.maxTasksInQueue = maxTasksInQueue; this.warnTimeMicroSec = warnTimeMicroSec; + this.enableBusyWait = enableBusyWait; + this.enableThreadScopedMetrics = enableThreadScopedMetrics; name = baseName; threads = new ExecutorService[numThreads]; threadIds = new long[numThreads]; for (int i = 0; i < numThreads; i++) { - ThreadPoolExecutor thread = createSingleThreadExecutor( + ExecutorService thread = createSingleThreadExecutor( new ThreadFactoryBuilder().setNameFormat(name + "-" + getClass().getSimpleName() + "-" + i + "-%d") .setThreadFactory(threadFactory).build()); - threads[i] = getBoundedExecutor(thread); + SingleThreadExecutor ste = null; + if (thread instanceof SingleThreadExecutor) { + ste = (SingleThreadExecutor) thread; + } + + if (traceTaskExecution || preserveMdcForTaskExecution) { + thread = addExecutorDecorators(thread); + } + threads[i] = thread; final int idx = i; try { threads[idx].submit(() -> { threadIds[idx] = Thread.currentThread().getId(); + + if (enableThreadScopedMetrics) { + ThreadRegistry.register(baseName, idx); + } + + if (enableBusyWait) { + // Try to acquire 1 CPU core to the executor thread. If it fails we + // are just logging the error and continuing, falling back to + // non-isolated CPUs. + try { + CpuAffinity.acquireCore(); + } catch (Throwable t) { + log.warn("Failed to acquire CPU core for thread {}: {}", Thread.currentThread().getName(), + t.getMessage(), t); + } + } }).get(); } catch (InterruptedException e) { Thread.currentThread().interrupt(); @@ -240,46 +431,54 @@ protected OrderedExecutor(String baseName, int numThreads, ThreadFactory threadF throw new RuntimeException("Couldn't start thread " + i, e); } - // Register gauges - statsLogger.registerGauge(String.format("%s-queue-%d", name, idx), new Gauge() { - @Override - public Number getDefaultValue() { - return 0; - } + if (ste != null) { + ste.registerMetrics(statsLogger); + } + } - @Override - public Number getSample() { - return thread.getQueue().size(); - } - }); - statsLogger.registerGauge(String.format("%s-completed-tasks-%d", name, idx), new Gauge() { - @Override - public Number getDefaultValue() { - return 0; - } + statsLogger.registerGauge(String.format("%s-threads", name), new Gauge() { + @Override + public Number getDefaultValue() { + return numThreads; + } - @Override - public Number getSample() { - return thread.getCompletedTaskCount(); - } - }); - statsLogger.registerGauge(String.format("%s-total-tasks-%d", name, idx), new Gauge() { - @Override - public Number getDefaultValue() { - return 0; - } + @Override + public Number getSample() { + return numThreads; + } + }); - @Override - public Number getSample() { - return thread.getTaskCount(); - } - }); - } + statsLogger.registerGauge(String.format("%s-max-queue-size", name), new Gauge() { + @Override + public Number getDefaultValue() { + return maxTasksInQueue; + } - // Stats - this.taskExecutionStats = statsLogger.scope(name).getOpStatsLogger("task_execution"); - this.taskPendingStats = statsLogger.scope(name).getOpStatsLogger("task_queued"); + @Override + public Number getSample() { + return maxTasksInQueue; + } + }); + + if (enableThreadScopedMetrics) { + this.taskExecutionStats = statsLogger.scope(name).getThreadScopedOpStatsLogger("task_execution"); + this.taskPendingStats = statsLogger.scope(name).getThreadScopedOpStatsLogger("task_queued"); + } else { + this.taskExecutionStats = statsLogger.scope(name).getOpStatsLogger("task_execution"); + this.taskPendingStats = statsLogger.scope(name).getOpStatsLogger("task_queued"); + } this.traceTaskExecution = traceTaskExecution; + this.preserveMdcForTaskExecution = preserveMdcForTaskExecution; + } + + /** + * Flag describing executor's expectation in regards of MDC. + * All tasks submitted through executor's submit/execute methods will automatically respect this. + * + * @return true if runnable/callable is expected to preserve MDC, false otherwise. + */ + public boolean preserveMdc() { + return preserveMdcForTaskExecution; } /** @@ -287,8 +486,8 @@ public Number getSample() { * @param orderingKey * @param r */ - public void executeOrdered(Object orderingKey, SafeRunnable r) { - chooseThread(orderingKey).execute(timedRunnable(r)); + public void executeOrdered(Object orderingKey, Runnable r) { + chooseThread(orderingKey).execute(r); } /** @@ -296,8 +495,8 @@ public void executeOrdered(Object orderingKey, SafeRunnable r) { * @param orderingKey * @param r */ - public void executeOrdered(long orderingKey, SafeRunnable r) { - chooseThread(orderingKey).execute(timedRunnable(r)); + public void executeOrdered(long orderingKey, Runnable r) { + chooseThread(orderingKey).execute(r); } /** @@ -305,8 +504,8 @@ public void executeOrdered(long orderingKey, SafeRunnable r) { * @param orderingKey * @param r */ - public void executeOrdered(int orderingKey, SafeRunnable r) { - chooseThread(orderingKey).execute(timedRunnable(r)); + public void executeOrdered(int orderingKey, Runnable r) { + chooseThread(orderingKey).execute(r); } public ListenableFuture submitOrdered(long orderingKey, Callable task) { @@ -330,7 +529,7 @@ public long getThreadID(long orderingKey) { return threadIds[0]; } - return threadIds[MathUtils.signSafeMod(orderingKey, threadIds.length)]; + return threadIds[chooseThreadIdx(orderingKey, threads.length)]; } public ExecutorService chooseThread() { @@ -351,7 +550,7 @@ public ExecutorService chooseThread(Object orderingKey) { if (null == orderingKey) { return threads[rand.nextInt(threads.length)]; } else { - return threads[MathUtils.signSafeMod(orderingKey.hashCode(), threads.length)]; + return threads[chooseThreadIdx(orderingKey.hashCode(), threads.length)]; } } @@ -366,15 +565,30 @@ public ExecutorService chooseThread(long orderingKey) { return threads[0]; } - return threads[MathUtils.signSafeMod(orderingKey, threads.length)]; + return threads[chooseThreadIdx(orderingKey, threads.length)]; } - private Runnable timedRunnable(Runnable r) { - if (traceTaskExecution) { - return new TimedRunnable(r); - } else { - return r; + protected static int chooseThreadIdx(long orderingKey, int numThreads) { + return MathUtils.signSafeMod(orderingKey >>> 1, numThreads); + } + + protected Runnable timedRunnable(Runnable r) { + final Runnable runMe = traceTaskExecution ? new TimedRunnable(r) : r; + return preserveMdcForTaskExecution ? new ContextPreservingRunnable(runMe) : runMe; + } + + protected Callable timedCallable(Callable c) { + final Callable callMe = traceTaskExecution ? new TimedCallable<>(c) : c; + return preserveMdcForTaskExecution ? new ContextPreservingCallable<>(callMe) : callMe; + } + + protected Collection> timedCallables(Collection> tasks) { + if (traceTaskExecution || preserveMdcForTaskExecution) { + return tasks.stream() + .map(this::timedCallable) + .collect(Collectors.toList()); } + return tasks; } /** @@ -390,7 +604,7 @@ public Future submit(Callable task) { */ @Override public Future submit(Runnable task, T result) { - return chooseThread().submit(timedRunnable(task), result); + return chooseThread().submit(task, result); } /** @@ -398,7 +612,7 @@ public Future submit(Runnable task, T result) { */ @Override public Future submit(Runnable task) { - return chooseThread().submit(timedRunnable(task)); + return chooseThread().submit(task); } /** diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/OrderedScheduler.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/OrderedScheduler.java index 6f05832b9d5..5ed4b6073c6 100644 --- a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/OrderedScheduler.java +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/OrderedScheduler.java @@ -17,29 +17,28 @@ */ package org.apache.bookkeeper.common.util; +import com.google.common.util.concurrent.ForwardingListeningExecutorService; import com.google.common.util.concurrent.ListenableFuture; +import com.google.common.util.concurrent.ListenableScheduledFuture; +import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.ListeningScheduledExecutorService; - import io.netty.util.concurrent.DefaultThreadFactory; - +import java.util.Collection; +import java.util.List; import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.ThreadFactory; -import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; - +import java.util.concurrent.TimeoutException; import org.apache.bookkeeper.stats.StatsLogger; /** - * This class provides 2 things over the java {@link ScheduledExecutorService}. - * - *

1. It takes {@link SafeRunnable objects} instead of plain Runnable objects. - * This means that exceptions in scheduled tasks wont go unnoticed and will be - * logged. - * - *

2. It supports submitting tasks with an ordering key, so that tasks submitted + * This class provides supports submitting tasks with an ordering key, so that tasks submitted * with the same key will always be executed in order, but tasks across * different keys can be unordered. This retains parallelism while retaining the * basic amount of ordering we want (e.g. , per ledger handle). Ordering is @@ -72,8 +71,10 @@ public OrderedScheduler build() { threadFactory, statsLogger, traceTaskExecution, + preserveMdcForTaskExecution, warnTimeMicroSec, - maxTasksInQueue); + maxTasksInQueue, + enableThreadScopedMetrics); } } @@ -90,6 +91,8 @@ public OrderedScheduler build() { * - for reporting executor stats * @param traceTaskExecution * - should we stat task execution + * @param preserveMdcForTaskExecution + * - should we preserve MDC for task execution * @param warnTimeMicroSec * - log long task exec warning after this interval */ @@ -98,22 +101,35 @@ private OrderedScheduler(String baseName, ThreadFactory threadFactory, StatsLogger statsLogger, boolean traceTaskExecution, + boolean preserveMdcForTaskExecution, long warnTimeMicroSec, - int maxTasksInQueue) { - super(baseName, numThreads, threadFactory, statsLogger, traceTaskExecution, warnTimeMicroSec, maxTasksInQueue); + int maxTasksInQueue, + boolean enableThreadScopedMetrics) { + super(baseName, numThreads, threadFactory, statsLogger, traceTaskExecution, + preserveMdcForTaskExecution, warnTimeMicroSec, maxTasksInQueue, + false /* enableBusyWait */, enableThreadScopedMetrics); } - @Override - protected ScheduledThreadPoolExecutor createSingleThreadExecutor(ThreadFactory factory) { - return new ScheduledThreadPoolExecutor(1, factory); + protected ExecutorService createSingleThreadExecutor(ThreadFactory factory) { + return new BoundedScheduledExecutorService( + new SingleThreadSafeScheduledExecutorService(factory), + this.maxTasksInQueue); } @Override - protected ListeningScheduledExecutorService getBoundedExecutor(ThreadPoolExecutor executor) { + protected ListeningScheduledExecutorService getBoundedExecutor(ExecutorService executor) { return new BoundedScheduledExecutorService((ScheduledThreadPoolExecutor) executor, this.maxTasksInQueue); } + @Override + protected ListeningScheduledExecutorService addExecutorDecorators(ExecutorService executor) { + if (!(executor instanceof ListeningScheduledExecutorService)) { + executor = new BoundedScheduledExecutorService((ScheduledThreadPoolExecutor) executor, 0); + } + return new OrderedSchedulerDecoratedThread((ListeningScheduledExecutorService) executor); + } + @Override public ListeningScheduledExecutorService chooseThread() { return (ListeningScheduledExecutorService) super.chooseThread(); @@ -140,50 +156,20 @@ public ListenableFuture submitOrdered(Object orderingKey, return chooseThread(orderingKey).submit(callable); } - /** - * Creates and executes a one-shot action that becomes enabled after the given delay. - * - * @param command - the SafeRunnable to execute - * @param delay - the time from now to delay execution - * @param unit - the time unit of the delay parameter - * @return a ScheduledFuture representing pending completion of the task and whose get() method - * will return null upon completion - */ - public ScheduledFuture schedule(SafeRunnable command, long delay, TimeUnit unit) { - return chooseThread().schedule(command, delay, unit); - } - /** * Creates and executes a one-shot action that becomes enabled after the given delay. * * @param orderingKey - the key used for ordering - * @param command - the SafeRunnable to execute + * @param command - the Runnable to execute * @param delay - the time from now to delay execution * @param unit - the time unit of the delay parameter * @return a ScheduledFuture representing pending completion of the task and whose get() method * will return null upon completion */ - public ScheduledFuture scheduleOrdered(Object orderingKey, SafeRunnable command, long delay, TimeUnit unit) { + public ScheduledFuture scheduleOrdered(Object orderingKey, Runnable command, long delay, TimeUnit unit) { return chooseThread(orderingKey).schedule(command, delay, unit); } - /** - * Creates and executes a periodic action that becomes enabled first after - * the given initial delay, and subsequently with the given period. - * - *

For more details check {@link ScheduledExecutorService#scheduleAtFixedRate(Runnable, long, long, TimeUnit)}. - * - * @param command - the SafeRunnable to execute - * @param initialDelay - the time to delay first execution - * @param period - the period between successive executions - * @param unit - the time unit of the initialDelay and period parameters - * @return a ScheduledFuture representing pending completion of the task, and whose get() - * method will throw an exception upon cancellation - */ - public ScheduledFuture scheduleAtFixedRate(SafeRunnable command, long initialDelay, long period, TimeUnit unit) { - return chooseThread().scheduleAtFixedRate(command, initialDelay, period, unit); - } - /** * Creates and executes a periodic action that becomes enabled first after * the given initial delay, and subsequently with the given period. @@ -191,37 +177,18 @@ public ScheduledFuture scheduleAtFixedRate(SafeRunnable command, long initial *

For more details check {@link ScheduledExecutorService#scheduleAtFixedRate(Runnable, long, long, TimeUnit)}. * * @param orderingKey - the key used for ordering - * @param command - the SafeRunnable to execute + * @param command - the Runnable to execute * @param initialDelay - the time to delay first execution * @param period - the period between successive executions * @param unit - the time unit of the initialDelay and period parameters * @return a ScheduledFuture representing pending completion of the task, and whose get() method * will throw an exception upon cancellation */ - public ScheduledFuture scheduleAtFixedRateOrdered(Object orderingKey, SafeRunnable command, long initialDelay, + public ScheduledFuture scheduleAtFixedRateOrdered(Object orderingKey, Runnable command, long initialDelay, long period, TimeUnit unit) { return chooseThread(orderingKey).scheduleAtFixedRate(command, initialDelay, period, unit); } - /** - * Creates and executes a periodic action that becomes enabled first after the given initial delay, and subsequently - * with the given delay between the termination of one execution and the commencement of the next. - * - *

For more details check {@link ScheduledExecutorService#scheduleWithFixedDelay(Runnable, long, long, TimeUnit)} - * . - * - * @param command - the SafeRunnable to execute - * @param initialDelay - the time to delay first execution - * @param delay - the delay between the termination of one execution and the commencement of the next - * @param unit - the time unit of the initialDelay and delay parameters - * @return a ScheduledFuture representing pending completion of the task, and whose get() method - * will throw an exception upon cancellation - */ - public ScheduledFuture scheduleWithFixedDelay(SafeRunnable command, long initialDelay, long delay, - TimeUnit unit) { - return chooseThread().scheduleWithFixedDelay(command, initialDelay, delay, unit); - } - /** * Creates and executes a periodic action that becomes enabled first after the given initial delay, and subsequently * with the given delay between the termination of one execution and the commencement of the next. @@ -230,14 +197,14 @@ public ScheduledFuture scheduleWithFixedDelay(SafeRunnable command, long init * . * * @param orderingKey - the key used for ordering - * @param command - the SafeRunnable to execute + * @param command - the Runnable to execute * @param initialDelay - the time to delay first execution * @param delay - the delay between the termination of one execution and the commencement of the next * @param unit - the time unit of the initialDelay and delay parameters * @return a ScheduledFuture representing pending completion of the task, and whose get() method * will throw an exception upon cancellation */ - public ScheduledFuture scheduleWithFixedDelayOrdered(Object orderingKey, SafeRunnable command, long initialDelay, + public ScheduledFuture scheduleWithFixedDelayOrdered(Object orderingKey, Runnable command, long initialDelay, long delay, TimeUnit unit) { return chooseThread(orderingKey).scheduleWithFixedDelay(command, initialDelay, delay, unit); } @@ -281,4 +248,84 @@ public ScheduledFuture scheduleWithFixedDelay(Runnable command, return chooseThread().scheduleWithFixedDelay(command, initialDelay, delay, unit); } + class OrderedSchedulerDecoratedThread extends ForwardingListeningExecutorService + implements ListeningScheduledExecutorService { + private final ListeningScheduledExecutorService delegate; + + private OrderedSchedulerDecoratedThread(ListeningScheduledExecutorService delegate) { + this.delegate = delegate; + } + + @Override + protected ListeningExecutorService delegate() { + return delegate; + } + + @Override + public ListenableScheduledFuture schedule(Runnable command, long delay, TimeUnit unit) { + return delegate.schedule(timedRunnable(command), delay, unit); + } + + @Override + public ListenableScheduledFuture schedule(Callable callable, long delay, TimeUnit unit) { + return delegate.schedule(timedCallable(callable), delay, unit); + } + + @Override + public ListenableScheduledFuture scheduleAtFixedRate(Runnable command, + long initialDelay, long period, TimeUnit unit) { + return delegate.scheduleAtFixedRate(timedRunnable(command), initialDelay, period, unit); + } + + @Override + public ListenableScheduledFuture scheduleWithFixedDelay(Runnable command, + long initialDelay, long delay, TimeUnit unit) { + return delegate.scheduleAtFixedRate(timedRunnable(command), initialDelay, delay, unit); + } + + @Override + public ListenableFuture submit(Callable task) { + return super.submit(timedCallable(task)); + } + + @Override + public ListenableFuture submit(Runnable task) { + return super.submit(timedRunnable(task)); + } + + @Override + public List> invokeAll(Collection> tasks) throws InterruptedException { + return super.invokeAll(timedCallables(tasks)); + } + + @Override + public List> invokeAll(Collection> tasks, + long timeout, TimeUnit unit) throws InterruptedException { + return super.invokeAll(timedCallables(tasks), timeout, unit); + } + + @Override + public T invokeAny(Collection> tasks) + throws InterruptedException, ExecutionException { + return super.invokeAny(timedCallables(tasks)); + } + + @Override + public T invokeAny(Collection> tasks, long timeout, + TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException { + + return super.invokeAny(timedCallables(tasks), timeout, unit); + } + + @Override + public ListenableFuture submit(Runnable task, T result) { + return super.submit(timedRunnable(task), result); + } + + @Override + public void execute(Runnable command) { + super.execute(timedRunnable(command)); + } + } + } diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/ReferenceCounted.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/ReferenceCounted.java index 0e17a10dc37..2ecc03ba075 100644 --- a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/ReferenceCounted.java +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/ReferenceCounted.java @@ -15,6 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.apache.bookkeeper.common.util; /** diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ReflectionUtils.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/ReflectionUtils.java similarity index 99% rename from bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ReflectionUtils.java rename to bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/ReflectionUtils.java index 6297651c633..64cf34b93dd 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ReflectionUtils.java +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/ReflectionUtils.java @@ -1,5 +1,4 @@ /* - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -16,9 +15,8 @@ * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. - * */ -package org.apache.bookkeeper.util; +package org.apache.bookkeeper.common.util; import java.lang.reflect.Constructor; import java.util.Map; diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/Retries.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/Retries.java index 6a52ef597c1..dff0075ac6d 100644 --- a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/Retries.java +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/Retries.java @@ -75,7 +75,7 @@ public static CompletableFuture run( * @param task a task to execute. * @param scheduler scheduler to schedule the task and complete the futures. * @param key the submit key for the scheduler. - * @param the return tye. + * @param the return type. * @return future represents the result of the task with retries. */ public static CompletableFuture run( diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/SingleThreadExecutor.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/SingleThreadExecutor.java new file mode 100644 index 00000000000..3c514ebbdaf --- /dev/null +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/SingleThreadExecutor.java @@ -0,0 +1,305 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.common.util; + +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.AbstractExecutorService; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.LongAdder; +import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.common.collections.GrowableMpScArrayConsumerBlockingQueue; +import org.apache.bookkeeper.stats.Gauge; +import org.apache.bookkeeper.stats.StatsLogger; + +/** + * Implements a single thread executor that drains the queue in batches to minimize contention between threads. + * + *

Tasks are executed in a safe manner: if there are exceptions they are logged and the executor will + * proceed with the next tasks. + */ +@Slf4j +public class SingleThreadExecutor extends AbstractExecutorService implements ExecutorService, Runnable { + private final BlockingQueue queue; + private final Thread runner; + + private final boolean rejectExecution; + + private final LongAdder tasksCount = new LongAdder(); + private final LongAdder tasksCompleted = new LongAdder(); + private final LongAdder tasksRejected = new LongAdder(); + private final LongAdder tasksFailed = new LongAdder(); + + enum State { + Running, + Shutdown, + Terminated + } + + private volatile State state; + + private final CountDownLatch startLatch; + + public SingleThreadExecutor(ThreadFactory tf) { + this(tf, 0, false); + } + + @SneakyThrows + @SuppressFBWarnings(value = {"SC_START_IN_CTOR"}) + public SingleThreadExecutor(ThreadFactory tf, int maxQueueCapacity, boolean rejectExecution) { + if (rejectExecution && maxQueueCapacity == 0) { + throw new IllegalArgumentException("Executor cannot reject new items if the queue is unbound"); + } + + if (maxQueueCapacity > 0) { + this.queue = new ArrayBlockingQueue<>(maxQueueCapacity); + } else { + this.queue = new GrowableMpScArrayConsumerBlockingQueue<>(); + } + this.runner = tf.newThread(this); + this.state = State.Running; + this.rejectExecution = rejectExecution; + this.startLatch = new CountDownLatch(1); + this.runner.start(); + + // Ensure the runner is already fully working by the time the constructor is done + this.startLatch.await(); + } + + public void run() { + try { + boolean isInitialized = false; + List localTasks = new ArrayList<>(); + + while (state == State.Running) { + if (!isInitialized) { + startLatch.countDown(); + isInitialized = true; + } + + int n = queue.drainTo(localTasks); + if (n > 0) { + for (int i = 0; i < n; i++) { + if (!safeRunTask(localTasks.get(i))) { + return; + } + } + localTasks.clear(); + } else { + if (!safeRunTask(queue.take())) { + return; + } + } + } + + // Clear the queue in orderly shutdown + int n = queue.drainTo(localTasks); + for (int i = 0; i < n; i++) { + safeRunTask(localTasks.get(i)); + } + } catch (InterruptedException ie) { + // Exit loop when interrupted + Thread.currentThread().interrupt(); + } catch (Throwable t) { + log.error("Exception in executor: {}", t.getMessage(), t); + throw t; + } finally { + state = State.Terminated; + } + } + + private boolean safeRunTask(Runnable r) { + try { + r.run(); + tasksCompleted.increment(); + } catch (Throwable t) { + if (t instanceof InterruptedException) { + Thread.currentThread().interrupt(); + return false; + } else { + tasksFailed.increment(); + log.error("Error while running task: {}", t.getMessage(), t); + } + } + + return true; + } + + @Override + public void shutdown() { + state = State.Shutdown; + if (queue.isEmpty()) { + runner.interrupt(); + } + } + + @Override + public List shutdownNow() { + this.state = State.Shutdown; + this.runner.interrupt(); + List remainingTasks = new ArrayList<>(); + queue.drainTo(remainingTasks); + return remainingTasks; + } + + @Override + public boolean isShutdown() { + return state != State.Running; + } + + @Override + public boolean isTerminated() { + return state == State.Terminated; + } + + @Override + public boolean awaitTermination(long timeout, TimeUnit unit) throws InterruptedException { + runner.join(unit.toMillis(timeout)); + return runner.isAlive(); + } + + public long getQueuedTasksCount() { + return Math.max(0, getSubmittedTasksCount() - getCompletedTasksCount()); + } + + public long getSubmittedTasksCount() { + return tasksCount.sum(); + } + + public long getCompletedTasksCount() { + return tasksCompleted.sum(); + } + + public long getRejectedTasksCount() { + return tasksRejected.sum(); + } + + public long getFailedTasksCount() { + return tasksFailed.sum(); + } + + @Override + public void execute(Runnable r) { + if (state != State.Running) { + throw new RejectedExecutionException("Executor is shutting down"); + } + + try { + if (!rejectExecution) { + queue.put(r); + tasksCount.increment(); + } else { + if (queue.offer(r)) { + tasksCount.increment(); + } else { + tasksRejected.increment(); + throw new ExecutorRejectedException("Executor queue is full"); + } + } + } catch (InterruptedException e) { + throw new RejectedExecutionException("Executor thread was interrupted", e); + } + } + + public void registerMetrics(StatsLogger statsLogger) { + // Register gauges + statsLogger.scopeLabel("thread", runner.getName()) + .registerGauge("thread_executor_queue", new Gauge() { + @Override + public Number getDefaultValue() { + return 0; + } + + @Override + public Number getSample() { + return getQueuedTasksCount(); + } + }); + statsLogger.scopeLabel("thread", runner.getName()) + .registerGauge("thread_executor_completed", new Gauge() { + @Override + public Number getDefaultValue() { + return 0; + } + + @Override + public Number getSample() { + return getCompletedTasksCount(); + } + }); + statsLogger.scopeLabel("thread", runner.getName()) + .registerGauge("thread_executor_tasks_completed", new Gauge() { + @Override + public Number getDefaultValue() { + return 0; + } + + @Override + public Number getSample() { + return getCompletedTasksCount(); + } + }); + statsLogger.scopeLabel("thread", runner.getName()) + .registerGauge("thread_executor_tasks_rejected", new Gauge() { + @Override + public Number getDefaultValue() { + return 0; + } + + @Override + public Number getSample() { + return getRejectedTasksCount(); + } + }); + statsLogger.scopeLabel("thread", runner.getName()) + .registerGauge("thread_executor_tasks_failed", new Gauge() { + @Override + public Number getDefaultValue() { + return 0; + } + + @Override + public Number getSample() { + return getFailedTasksCount(); + } + }); + } + + private static class ExecutorRejectedException extends RejectedExecutionException { + + private ExecutorRejectedException(String msg) { + super(msg); + } + @Override + public Throwable fillInStackTrace() { + // Avoid the stack traces to be generated for this exception. This is done + // because when rejectExecution=true, there could be many such exceptions + // getting thrown, and filling the stack traces is very expensive + return this; + } + } +} diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/SingleThreadSafeScheduledExecutorService.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/SingleThreadSafeScheduledExecutorService.java new file mode 100644 index 00000000000..d41f2411d3e --- /dev/null +++ b/bookkeeper-common/src/main/java/org/apache/bookkeeper/common/util/SingleThreadSafeScheduledExecutorService.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.common.util; + +import java.util.concurrent.Future; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.TimeUnit; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class SingleThreadSafeScheduledExecutorService extends ScheduledThreadPoolExecutor + implements ScheduledExecutorService { + + public SingleThreadSafeScheduledExecutorService(ThreadFactory threadFactory) { + super(1, threadFactory); + } + + private static final class SafeRunnable implements Runnable { + private final Runnable task; + + SafeRunnable(Runnable task) { + this.task = task; + } + + @Override + public void run() { + try { + task.run(); + } catch (Throwable t) { + log.warn("Unexpected throwable from task {}: {}", task.getClass(), t.getMessage(), t); + } + } + } + + @Override + public ScheduledFuture schedule(Runnable command, long delay, TimeUnit unit) { + return super.schedule(new SafeRunnable(command), delay, unit); + } + + @Override + public ScheduledFuture scheduleAtFixedRate(Runnable command, + long initialDelay, long period, TimeUnit unit) { + return super.scheduleAtFixedRate(new SafeRunnable(command), initialDelay, period, unit); + } + + @Override + public ScheduledFuture scheduleWithFixedDelay(Runnable command, + long initialDelay, long delay, TimeUnit unit) { + return super.scheduleWithFixedDelay(new SafeRunnable(command), initialDelay, delay, unit); + } + + @Override + public Future submit(Runnable task) { + return super.submit(new SafeRunnable(task)); + } +} diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/util/MathUtils.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/util/MathUtils.java deleted file mode 100644 index 0124b193358..00000000000 --- a/bookkeeper-common/src/main/java/org/apache/bookkeeper/util/MathUtils.java +++ /dev/null @@ -1,26 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bookkeeper.util; - -/** - * Provides misc math functions that don't come standard. - * - *

Deprecated since 4.6.0, in favor of using {@link org.apache.bookkeeper.common.util.MathUtils}. - */ -public class MathUtils extends org.apache.bookkeeper.common.util.MathUtils { -} diff --git a/bookkeeper-common/src/main/java/org/apache/bookkeeper/util/package-info.java b/bookkeeper-common/src/main/java/org/apache/bookkeeper/util/package-info.java deleted file mode 100644 index 28180d15951..00000000000 --- a/bookkeeper-common/src/main/java/org/apache/bookkeeper/util/package-info.java +++ /dev/null @@ -1,21 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - * defines the utilities used across the project. - */ -package org.apache.bookkeeper.util; diff --git a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/collections/BatchedArrayBlockingQueueTest.java b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/collections/BatchedArrayBlockingQueueTest.java new file mode 100644 index 00000000000..20e2f3723f3 --- /dev/null +++ b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/collections/BatchedArrayBlockingQueueTest.java @@ -0,0 +1,312 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.common.collections; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import com.google.common.collect.Lists; +import java.util.ArrayList; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import org.junit.Test; + +/** + * Test the growable array blocking queue. + */ +public class BatchedArrayBlockingQueueTest { + + @Test + public void simple() throws Exception { + BlockingQueue queue = new BatchedArrayBlockingQueue<>(4); + + assertEquals(null, queue.poll()); + + assertEquals(4, queue.remainingCapacity()); + + try { + queue.element(); + fail("Should have thrown exception"); + } catch (NoSuchElementException e) { + // Expected + } + + try { + queue.iterator(); + fail("Should have thrown exception"); + } catch (UnsupportedOperationException e) { + // Expected + } + + // Test index rollover + for (int i = 0; i < 100; i++) { + queue.add(i); + + assertEquals(i, queue.take().intValue()); + } + + queue.offer(1); + queue.offer(2); + queue.offer(3); + queue.offer(4); + + assertEquals(4, queue.size()); + + List list = new ArrayList<>(); + queue.drainTo(list, 3); + + assertEquals(1, queue.size()); + assertEquals(Lists.newArrayList(1, 2, 3), list); + assertEquals(4, queue.peek().intValue()); + + assertEquals(4, queue.element().intValue()); + assertEquals(4, queue.remove().intValue()); + try { + queue.remove(); + fail("Should have thrown exception"); + } catch (NoSuchElementException e) { + // Expected + } + } + + @Test + public void blockingTake() throws Exception { + BlockingQueue queue = new GrowableMpScArrayConsumerBlockingQueue<>(); + + CountDownLatch latch = new CountDownLatch(1); + + new Thread(() -> { + try { + int expected = 0; + + for (int i = 0; i < 100; i++) { + int n = queue.take(); + + assertEquals(expected++, n); + } + + latch.countDown(); + } catch (Exception e) { + e.printStackTrace(); + } + }).start(); + + int n = 0; + for (int i = 0; i < 10; i++) { + for (int j = 0; j < 10; j++) { + queue.put(n); + ++n; + } + + // Wait until all the entries are consumed + while (!queue.isEmpty()) { + Thread.sleep(1); + } + } + + latch.await(); + } + + @Test + public void blockWhenFull() throws Exception { + BlockingQueue queue = new BatchedArrayBlockingQueue<>(4); + + assertEquals(null, queue.poll()); + + assertTrue(queue.offer(1)); + assertTrue(queue.offer(2)); + assertTrue(queue.offer(3)); + assertTrue(queue.offer(4)); + assertFalse(queue.offer(5)); + + assertEquals(4, queue.size()); + + CountDownLatch latch = new CountDownLatch(1); + + new Thread(() -> { + try { + queue.put(5); + latch.countDown(); + } catch (Exception e) { + e.printStackTrace(); + } + }).start(); + + Thread.sleep(100); + assertEquals(1, latch.getCount()); + + assertEquals(1, (int) queue.poll()); + + assertTrue(latch.await(1, TimeUnit.SECONDS)); + assertEquals(4, queue.size()); + + + queue.clear(); + assertEquals(0, queue.size()); + + assertTrue(queue.offer(1, 1, TimeUnit.SECONDS)); + assertTrue(queue.offer(2, 1, TimeUnit.SECONDS)); + assertTrue(queue.offer(3, 1, TimeUnit.SECONDS)); + assertEquals(3, queue.size()); + + List list = new ArrayList<>(); + queue.drainTo(list); + assertEquals(0, queue.size()); + + assertEquals(Lists.newArrayList(1, 2, 3), list); + } + + @Test + public void pollTimeout() throws Exception { + BlockingQueue queue = new BatchedArrayBlockingQueue<>(4); + + assertEquals(null, queue.poll(1, TimeUnit.MILLISECONDS)); + + queue.put(1); + assertEquals(1, queue.poll(1, TimeUnit.MILLISECONDS).intValue()); + + // 0 timeout should not block + assertEquals(null, queue.poll(0, TimeUnit.HOURS)); + + queue.put(2); + queue.put(3); + assertEquals(2, queue.poll(1, TimeUnit.HOURS).intValue()); + assertEquals(3, queue.poll(1, TimeUnit.HOURS).intValue()); + } + + @Test + public void pollTimeout2() throws Exception { + BlockingQueue queue = new BatchedArrayBlockingQueue<>(10); + + CountDownLatch latch = new CountDownLatch(1); + + new Thread(() -> { + try { + queue.poll(1, TimeUnit.HOURS); + + latch.countDown(); + } catch (Exception e) { + e.printStackTrace(); + } + }).start(); + + // Make sure background thread is waiting on poll + Thread.sleep(100); + queue.put(1); + + latch.await(); + } + + + @Test + public void drainToArray() throws Exception { + BatchedArrayBlockingQueue queue = new BatchedArrayBlockingQueue<>(100); + + for (int i = 0; i < 10; i++) { + queue.add(i); + } + + Integer[] local = new Integer[5]; + int items = queue.takeAll(local); + assertEquals(5, items); + for (int i = 0; i < items; i++) { + assertEquals(i, (int) local[i]); + } + + assertEquals(5, queue.size()); + + items = queue.takeAll(local); + assertEquals(5, items); + for (int i = 0; i < items; i++) { + assertEquals(i + 5, (int) local[i]); + } + + assertEquals(0, queue.size()); + + /// Block when empty + CountDownLatch latch = new CountDownLatch(1); + + new Thread(() -> { + try { + int c = queue.takeAll(local); + assertEquals(1, c); + latch.countDown(); + } catch (Exception e) { + e.printStackTrace(); + } + }).start(); + + Thread.sleep(100); + assertEquals(1, latch.getCount()); + + assertEquals(0, queue.size()); + + // Unblock the drain + queue.put(1); + + assertTrue(latch.await(1, TimeUnit.SECONDS)); + assertEquals(0, queue.size()); + } + + @Test + public void putAll() throws Exception { + BatchedArrayBlockingQueue queue = new BatchedArrayBlockingQueue<>(10); + + Integer[] items = new Integer[100]; + for (int i = 0; i < 100; i++) { + items[i] = i; + } + + queue.putAll(items, 0, 5); + assertEquals(5, queue.size()); + queue.putAll(items, 0, 5); + assertEquals(10, queue.size()); + + queue.clear(); + + /// Block when empty + CountDownLatch latch = new CountDownLatch(1); + + new Thread(() -> { + try { + queue.putAll(items, 0, 11); + latch.countDown(); + } catch (Exception e) { + e.printStackTrace(); + } + }).start(); + + Thread.sleep(100); + assertEquals(1, latch.getCount()); + assertEquals(10, queue.size()); + + // Unblock the putAll + queue.take(); + + assertTrue(latch.await(1, TimeUnit.SECONDS)); + assertEquals(10, queue.size()); + } +} diff --git a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/collections/BlockingMpscQueueTest.java b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/collections/BlockingMpscQueueTest.java new file mode 100644 index 00000000000..f37c6cca341 --- /dev/null +++ b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/collections/BlockingMpscQueueTest.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.common.collections; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.TimeUnit; +import org.junit.Test; + +/** + * Unit tests for {@link BlockingMpscQueue}. + */ +public class BlockingMpscQueueTest { + + @Test + public void basicTest() throws Exception { + final int size = 15; + BlockingQueue queue = new BlockingMpscQueue<>(size); + + for (int i = 0; i < size; i++) { + queue.put(i); + + assertEquals(size - i, queue.remainingCapacity()); + } + + assertEquals(size, queue.size()); + + for (int i = 0; i < size; i++) { + Integer n = queue.take(); + assertTrue(n != null); + } + + assertEquals(0, queue.size()); + + Integer res = queue.poll(100, TimeUnit.MILLISECONDS); + assertNull(res); + } + + @Test + public void testOffer() throws Exception { + final int size = 16; + BlockingQueue queue = new BlockingMpscQueue<>(size); + + for (int i = 0; i < size; i++) { + assertTrue(queue.offer(1, 100, TimeUnit.MILLISECONDS)); + } + + assertEquals(size, queue.size()); + + assertFalse(queue.offer(1, 100, TimeUnit.MILLISECONDS)); + assertEquals(size, queue.size()); + } + + @Test + public void testDrain() throws Exception { + final int size = 10; + BlockingQueue queue = new BlockingMpscQueue<>(size); + + for (int i = 0; i < size; i++) { + queue.put(i); + } + + List list = new ArrayList<>(size); + queue.drainTo(list); + + assertEquals(size, list.size()); + + assertEquals(0, queue.size()); + + Integer res = queue.poll(100, TimeUnit.MILLISECONDS); + assertNull(res); + } + + @Test + public void testDrainWithLimit() throws Exception { + final int size = 10; + BlockingQueue queue = new BlockingMpscQueue<>(size); + + for (int i = 0; i < size; i++) { + queue.put(i); + } + + List list = new ArrayList<>(); + queue.drainTo(list, 5); + assertEquals(5, list.size()); + + assertEquals(5, queue.size()); + } +} diff --git a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/collections/GrowableArrayBlockingQueueTest.java b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/collections/GrowableArrayBlockingQueueTest.java new file mode 100644 index 00000000000..7b20294d581 --- /dev/null +++ b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/collections/GrowableArrayBlockingQueueTest.java @@ -0,0 +1,273 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.common.collections; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import com.google.common.collect.Lists; +import java.util.ArrayList; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; +import org.junit.Test; + +/** + * Test the growable array blocking queue. + */ +public class GrowableArrayBlockingQueueTest { + + @Test + public void simple() throws Exception { + BlockingQueue queue = new GrowableMpScArrayConsumerBlockingQueue<>(4); + + assertEquals(null, queue.poll()); + + assertEquals(Integer.MAX_VALUE, queue.remainingCapacity()); + assertEquals("[]", queue.toString()); + + try { + queue.element(); + fail("Should have thrown exception"); + } catch (NoSuchElementException e) { + // Expected + } + + try { + queue.iterator(); + fail("Should have thrown exception"); + } catch (UnsupportedOperationException e) { + // Expected + } + + // Test index rollover + for (int i = 0; i < 100; i++) { + queue.add(i); + + assertEquals(i, queue.take().intValue()); + } + + queue.offer(1); + assertEquals("[1]", queue.toString()); + queue.offer(2); + assertEquals("[1, 2]", queue.toString()); + queue.offer(3); + assertEquals("[1, 2, 3]", queue.toString()); + queue.offer(4); + assertEquals("[1, 2, 3, 4]", queue.toString()); + + assertEquals(4, queue.size()); + + List list = new ArrayList<>(); + queue.drainTo(list, 3); + + assertEquals(1, queue.size()); + assertEquals(Lists.newArrayList(1, 2, 3), list); + assertEquals("[4]", queue.toString()); + assertEquals(4, queue.peek().intValue()); + + assertEquals(4, queue.element().intValue()); + assertEquals(4, queue.remove().intValue()); + try { + queue.remove(); + fail("Should have thrown exception"); + } catch (NoSuchElementException e) { + // Expected + } + } + + @Test + public void blockingTake() throws Exception { + BlockingQueue queue = new GrowableMpScArrayConsumerBlockingQueue<>(); + + CountDownLatch latch = new CountDownLatch(1); + + new Thread(() -> { + try { + int expected = 0; + + for (int i = 0; i < 100; i++) { + int n = queue.take(); + + assertEquals(expected++, n); + } + + latch.countDown(); + } catch (Exception e) { + e.printStackTrace(); + } + }).start(); + + int n = 0; + for (int i = 0; i < 10; i++) { + for (int j = 0; j < 10; j++) { + queue.put(n); + ++n; + } + + // Wait until all the entries are consumed + while (!queue.isEmpty()) { + Thread.sleep(1); + } + } + + latch.await(); + } + + @Test + public void growArray() throws Exception { + BlockingQueue queue = new GrowableMpScArrayConsumerBlockingQueue<>(4); + + assertEquals(null, queue.poll()); + + assertTrue(queue.offer(1)); + assertTrue(queue.offer(2)); + assertTrue(queue.offer(3)); + assertTrue(queue.offer(4)); + assertTrue(queue.offer(5)); + + assertEquals(5, queue.size()); + + queue.clear(); + assertEquals(0, queue.size()); + + assertTrue(queue.offer(1, 1, TimeUnit.SECONDS)); + assertTrue(queue.offer(2, 1, TimeUnit.SECONDS)); + assertTrue(queue.offer(3, 1, TimeUnit.SECONDS)); + assertEquals(3, queue.size()); + + List list = new ArrayList<>(); + queue.drainTo(list); + assertEquals(0, queue.size()); + + assertEquals(Lists.newArrayList(1, 2, 3), list); + } + + @Test + public void pollTimeout() throws Exception { + BlockingQueue queue = new GrowableMpScArrayConsumerBlockingQueue<>(4); + + assertEquals(null, queue.poll(1, TimeUnit.MILLISECONDS)); + + queue.put(1); + assertEquals(1, queue.poll(1, TimeUnit.MILLISECONDS).intValue()); + + // 0 timeout should not block + assertEquals(null, queue.poll(0, TimeUnit.HOURS)); + + queue.put(2); + queue.put(3); + assertEquals(2, queue.poll(1, TimeUnit.HOURS).intValue()); + assertEquals(3, queue.poll(1, TimeUnit.HOURS).intValue()); + } + + @Test + public void pollTimeout2() throws Exception { + BlockingQueue queue = new GrowableMpScArrayConsumerBlockingQueue<>(); + + CountDownLatch latch = new CountDownLatch(1); + + new Thread(() -> { + try { + queue.poll(1, TimeUnit.HOURS); + + latch.countDown(); + } catch (Exception e) { + e.printStackTrace(); + } + }).start(); + + // Make sure background thread is waiting on poll + Thread.sleep(100); + queue.put(1); + + latch.await(); + } + + + static class TestThread extends Thread { + + private volatile boolean stop; + private final BlockingQueue readQ; + private final BlockingQueue writeQ; + + private final AtomicLong counter = new AtomicLong(); + + TestThread(BlockingQueue readQ, BlockingQueue writeQ) { + this.readQ = readQ; + this.writeQ = writeQ; + } + + @Override + public void run() { + ArrayList localQ = new ArrayList<>(); + + while (!stop) { + int items = readQ.drainTo(localQ); + if (items > 0) { + for (int i = 0; i < items; i++) { + writeQ.add(localQ.get(i)); + } + + counter.addAndGet(items); + localQ.clear(); + } else { + try { + writeQ.add(readQ.take()); + counter.incrementAndGet(); + } catch (InterruptedException e) { + return; + } + } + } + } + } + + public static void main(String[] args) throws Exception { + int n = 10_000; + BlockingQueue q1 = new GrowableMpScArrayConsumerBlockingQueue<>(); + BlockingQueue q2 = new GrowableMpScArrayConsumerBlockingQueue<>(); +// BlockingQueue q1 = new ArrayBlockingQueue<>(N * 2); +// BlockingQueue q2 = new ArrayBlockingQueue<>(N * 2); +// BlockingQueue q1 = new LinkedBlockingQueue<>(); +// BlockingQueue q2 = new LinkedBlockingDeque<>(); + + TestThread t1 = new TestThread(q1, q2); + TestThread t2 = new TestThread(q2, q1); + + for (int i = 0; i < n; i++) { + q1.add(i); + } + + t1.start(); + t2.start(); + + Thread.sleep(10_000); + + System.out.println("Throughput " + (t1.counter.get() / 10 / 1e6) + " Millions items/s"); + t1.stop = true; + t2.stop = true; + } +} diff --git a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/component/TestComponentStarter.java b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/component/TestComponentStarter.java index ed97adcacea..ef35a9d7b39 100644 --- a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/component/TestComponentStarter.java +++ b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/component/TestComponentStarter.java @@ -42,6 +42,7 @@ public void testStartComponent() { LifecycleComponent component = mock(LifecycleComponent.class); when(component.getName()).thenReturn("test-start-component"); ComponentStarter.startComponent(component); + verify(component).publishInfo(any(ComponentInfoPublisher.class)); verify(component).start(); } diff --git a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/concurrent/TestFutureUtils.java b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/concurrent/TestFutureUtils.java index fe11e5ac859..4a347f38440 100644 --- a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/concurrent/TestFutureUtils.java +++ b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/concurrent/TestFutureUtils.java @@ -43,7 +43,6 @@ import java.util.function.Function; import java.util.stream.LongStream; import org.apache.bookkeeper.common.util.OrderedScheduler; -import org.apache.bookkeeper.common.util.SafeRunnable; import org.apache.bookkeeper.stats.OpStatsLogger; import org.junit.Test; @@ -175,7 +174,7 @@ public void testWithinAlreadyDone() throws Exception { assertFalse(withinFuture.isCancelled()); assertFalse(withinFuture.isCompletedExceptionally()); verify(scheduler, times(0)) - .scheduleOrdered(eq(1234L), isA(SafeRunnable.class), eq(10), eq(TimeUnit.MILLISECONDS)); + .scheduleOrdered(eq(1234L), isA(Runnable.class), eq(10), eq(TimeUnit.MILLISECONDS)); } @Test @@ -194,14 +193,14 @@ public void testWithinZeroTimeout() throws Exception { assertFalse(withinFuture.isCancelled()); assertFalse(withinFuture.isCompletedExceptionally()); verify(scheduler, times(0)) - .scheduleOrdered(eq(1234L), isA(SafeRunnable.class), eq(10), eq(TimeUnit.MILLISECONDS)); + .scheduleOrdered(eq(1234L), isA(Runnable.class), eq(10), eq(TimeUnit.MILLISECONDS)); } @Test public void testWithinCompleteBeforeTimeout() throws Exception { OrderedScheduler scheduler = mock(OrderedScheduler.class); ScheduledFuture scheduledFuture = mock(ScheduledFuture.class); - when(scheduler.scheduleOrdered(any(Object.class), any(SafeRunnable.class), anyLong(), any(TimeUnit.class))) + when(scheduler.scheduleOrdered(any(Object.class), any(Runnable.class), anyLong(), any(TimeUnit.class))) .thenAnswer(invocationOnMock -> scheduledFuture); CompletableFuture newFuture = FutureUtils.createFuture(); CompletableFuture withinFuture = FutureUtils.within( diff --git a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/conf/ConfigDefTest.java b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/conf/ConfigDefTest.java new file mode 100644 index 00000000000..eb295d85570 --- /dev/null +++ b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/conf/ConfigDefTest.java @@ -0,0 +1,305 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.common.conf; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; + +import com.google.common.collect.Lists; +import java.io.BufferedReader; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Iterator; +import java.util.Set; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.common.conf.validators.ClassValidator; +import org.apache.bookkeeper.common.conf.validators.RangeValidator; +import org.junit.Test; + +/** + * Unit test {@link ConfigDef}. + */ +@Slf4j +public class ConfigDefTest { + + private static class TestConfig { + + private static final ConfigKeyGroup group1 = ConfigKeyGroup.builder("group1") + .description("Group 1 Settings") + .order(1) + .build(); + + private static final ConfigKey key11 = ConfigKey.builder("key11") + .type(Type.LONG) + .group(group1) + .validator(RangeValidator.atLeast(1000)) + .build(); + + private static final ConfigKeyGroup group2 = ConfigKeyGroup.builder("group2") + .description("Group 2 Settings") + .order(2) + .build(); + + private static final ConfigKey key21 = ConfigKey.builder("key21") + .type(Type.LONG) + .group(group2) + .validator(RangeValidator.atMost(1000)) + .orderInGroup(2) + .build(); + + private static final ConfigKey key22 = ConfigKey.builder("key22") + .type(Type.STRING) + .group(group2) + .validator(ClassValidator.of(Runnable.class)) + .orderInGroup(1) + .build(); + + } + + private static class TestConfig2 { + + private static final ConfigKeyGroup emptyGroup = ConfigKeyGroup.builder("empty_group") + .description("Empty Group Settings") + .order(1) + .build(); + + private static final ConfigKeyGroup group1 = ConfigKeyGroup.builder("group1") + .description("This is a very long description : Lorem ipsum dolor sit amet," + + " consectetur adipiscing elit. Maecenas bibendum ac felis id commodo." + + " Etiam mauris purus, fringilla id tempus in, mollis vel orci. Duis" + + " ultricies at erat eget iaculis.") + .order(2) + .build(); + + private static final ConfigKey intKey = ConfigKey.builder("int_key") + .type(Type.INT) + .description("it is an int key") + .group(group1) + .validator(RangeValidator.atLeast(1000)) + .build(); + + private static final ConfigKey longKey = ConfigKey.builder("long_key") + .type(Type.LONG) + .description("it is a long key") + .group(group1) + .validator(RangeValidator.atMost(1000)) + .build(); + + private static final ConfigKey shortKey = ConfigKey.builder("short_key") + .type(Type.SHORT) + .description("it is a short key") + .group(group1) + .validator(RangeValidator.between(500, 1000)) + .build(); + + private static final ConfigKey doubleKey = ConfigKey.builder("double_key") + .type(Type.DOUBLE) + .description("it is a double key") + .group(group1) + .validator(RangeValidator.between(1234.0f, 5678.0f)) + .build(); + + private static final ConfigKey boolKey = ConfigKey.builder("bool_key") + .type(Type.BOOLEAN) + .description("it is a bool key") + .group(group1) + .build(); + + private static final ConfigKey classKey = ConfigKey.builder("class_key") + .type(Type.CLASS) + .description("it is a class key") + .validator(ClassValidator.of(Runnable.class)) + .group(group1) + .build(); + + private static final ConfigKey listKey = ConfigKey.builder("list_key") + .type(Type.LIST) + .description("it is a list key") + .group(group1) + .build(); + + private static final ConfigKey stringKey = ConfigKey.builder("string_key") + .type(Type.STRING) + .description("it is a string key") + .group(group1) + .build(); + + private static final ConfigKeyGroup group2 = ConfigKeyGroup.builder("group2") + .description("This group has short description") + .order(3) + .build(); + + private static final ConfigKey keyWithSince = ConfigKey.builder("key_with_since") + .type(Type.STRING) + .description("it is a string key with since") + .since("4.7.0") + .group(group2) + .orderInGroup(10) + .build(); + + private static final ConfigKey keyWithDocumentation = ConfigKey.builder("key_with_short_documentation") + .type(Type.STRING) + .description("it is a string key with documentation") + .documentation("it has a short documentation") + .group(group2) + .orderInGroup(9) + .build(); + + private static final ConfigKey keyWithLongDocumentation = + ConfigKey.builder("key_long_short_documentation") + .type(Type.STRING) + .description("it is a string key with documentation") + .documentation("it has a long documentation : Lorem ipsum dolor sit amet," + + " consectetur adipiscing elit. Maecenas bibendum ac felis id commodo." + + " Etiam mauris purus, fringilla id tempus in, mollis vel orci. Duis" + + " ultricies at erat eget iaculis.") + .group(group2) + .orderInGroup(8) + .build(); + + private static final ConfigKey keyWithDefaultValue = ConfigKey.builder("key_with_default_value") + .type(Type.STRING) + .description("it is a string key with default value") + .defaultValue("this-is-a-test-value") + .group(group2) + .orderInGroup(7) + .build(); + + private static final ConfigKey keyWithOptionalValues = ConfigKey.builder("key_with_optional_values") + .type(Type.STRING) + .description("it is a string key with optional values") + .defaultValue("this-is-a-default-value") + .optionValues(Lists.newArrayList( + "item1", "item2", "item3", "item3" + )) + .group(group2) + .orderInGroup(6) + .build(); + + private static final ConfigKey deprecatedKey = ConfigKey.builder("deprecated_key") + .type(Type.STRING) + .deprecated(true) + .description("it is a deprecated key") + .group(group2) + .orderInGroup(5) + .build(); + + private static final ConfigKey deprecatedKeyWithSince = ConfigKey.builder("deprecated_key_with_since") + .type(Type.STRING) + .deprecated(true) + .deprecatedSince("4.3.0") + .description("it is a deprecated key with since") + .group(group2) + .orderInGroup(4) + .build(); + + private static final ConfigKey deprecatedKeyWithReplacedKey = + ConfigKey.builder("deprecated_key_with_replaced_key") + .type(Type.STRING) + .deprecated(true) + .deprecatedByConfigKey("key_with_optional_values") + .description("it is a deprecated key with replaced key") + .group(group2) + .orderInGroup(3) + .build(); + + private static final ConfigKey deprecatedKeyWithSinceAndReplacedKey = + ConfigKey.builder("deprecated_key_with_since_and_replaced_key") + .type(Type.STRING) + .deprecated(true) + .deprecatedSince("4.3.0") + .deprecatedByConfigKey("key_with_optional_values") + .description("it is a deprecated key with since and replaced key") + .group(group2) + .orderInGroup(2) + .build(); + + private static final ConfigKey requiredKey = ConfigKey.builder("required_key") + .type(Type.STRING) + .required(true) + .description("it is a required key") + .group(group2) + .orderInGroup(1) + .build(); + + } + + @Test + public void testBuildConfigDef() { + ConfigDef configDef = ConfigDef.of(TestConfig.class); + assertEquals(2, configDef.getGroups().size()); + + Iterator grpIter = configDef.getGroups().iterator(); + + // iterate over group 1 + assertTrue(grpIter.hasNext()); + ConfigKeyGroup group1 = grpIter.next(); + assertSame(TestConfig.group1, group1); + Set keys = configDef.getSettings().get(group1.name()); + assertNotNull(keys); + assertEquals(1, keys.size()); + assertEquals(TestConfig.key11, keys.iterator().next()); + + // iterate over group 2 + assertTrue(grpIter.hasNext()); + ConfigKeyGroup group2 = grpIter.next(); + assertSame(TestConfig.group2, group2); + keys = configDef.getSettings().get(group2.name()); + assertNotNull(keys); + assertEquals(2, keys.size()); + Iterator keyIter = keys.iterator(); + assertEquals(TestConfig.key22, keyIter.next()); + assertEquals(TestConfig.key21, keyIter.next()); + assertFalse(keyIter.hasNext()); + + // no more group + assertFalse(grpIter.hasNext()); + } + + @Test + public void testSaveConfigDef() throws IOException { + StringBuilder sb = new StringBuilder(); + try (InputStream is = this.getClass().getClassLoader().getResourceAsStream("test_conf_2.conf"); + BufferedReader reader = new BufferedReader(new InputStreamReader(is, UTF_8))) { + String line; + while ((line = reader.readLine()) != null) { + sb.append(line).append(System.lineSeparator()); + } + } + String confData = sb.toString(); + + ConfigDef configDef = ConfigDef.of(TestConfig2.class); + String readConf; + try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + configDef.save(baos); + readConf = baos.toString(); + log.info("\n{}", readConf); + } + + assertEquals(confData, readConf); + } + +} diff --git a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/conf/ConfigKeyGroupTest.java b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/conf/ConfigKeyGroupTest.java new file mode 100644 index 00000000000..a8abefa7b3d --- /dev/null +++ b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/conf/ConfigKeyGroupTest.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.common.conf; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import org.junit.Test; + +/** + * Unit test {@link ConfigKeyGroup}. + */ +public class ConfigKeyGroupTest { + + @Test + public void testEquals() { + ConfigKeyGroup grp1 = ConfigKeyGroup.builder("group1") + .description("test group 1") + .build(); + ConfigKeyGroup anotherGrp1 = ConfigKeyGroup.builder("group1") + .description("test another group 1") + .build(); + + assertEquals(grp1, anotherGrp1); + } + + @Test + public void testOrdering() { + ConfigKeyGroup grp10 = ConfigKeyGroup.builder("group1") + .order(0) + .build(); + ConfigKeyGroup grp20 = ConfigKeyGroup.builder("group2") + .order(0) + .build(); + ConfigKeyGroup grp21 = ConfigKeyGroup.builder("group2") + .order(1) + .build(); + + assertTrue(ConfigKeyGroup.ORDERING.compare(grp10, grp20) < 0); + assertTrue(ConfigKeyGroup.ORDERING.compare(grp20, grp21) < 0); + } + +} diff --git a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/conf/ConfigKeyTest.java b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/conf/ConfigKeyTest.java new file mode 100644 index 00000000000..858a615db54 --- /dev/null +++ b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/conf/ConfigKeyTest.java @@ -0,0 +1,336 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.common.conf; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import com.google.common.collect.Lists; +import java.util.List; +import java.util.concurrent.ThreadLocalRandom; +import java.util.function.Function; +import org.apache.commons.configuration.CompositeConfiguration; +import org.apache.commons.configuration.Configuration; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; + +/** + * Unit test {@link ConfigKey}. + */ +public class ConfigKeyTest { + + /** + * Test Function A. + */ + private static class TestFunctionA implements Function { + + @Override + public String apply(String s) { + return s + "!"; + } + } + + /** + * Test Function B. + */ + private static class TestFunctionB implements Function { + + @Override + public String apply(String s) { + return s + "!"; + } + } + + /** + * Test Function C. + */ + private static class TestFunctionC implements Function { + + @Override + public String apply(String s) { + return s + "!"; + } + } + + @Rule + public final TestName runtime = new TestName(); + + @Test + public void testValidateRequiredField() { + String keyName = runtime.getMethodName(); + Configuration conf = new ConcurrentConfiguration(); + ConfigKey key = ConfigKey.builder(keyName) + .required(true) + .build(); + + try { + key.validate(conf); + fail("Required key should exist in the configuration"); + } catch (ConfigException ce) { + // expected + } + } + + @Test + public void testValidateFieldSuccess() throws ConfigException { + String keyName = runtime.getMethodName(); + Validator validator = mock(Validator.class); + when(validator.validate(anyString(), any())).thenReturn(true); + Configuration conf = new ConcurrentConfiguration(); + conf.setProperty(keyName, "test-value"); + ConfigKey key = ConfigKey.builder(keyName) + .validator(validator) + .build(); + + key.validate(conf); + verify(validator, times(1)).validate(eq(keyName), eq("test-value")); + } + + @Test + public void testValidateFieldFailure() { + String keyName = runtime.getMethodName(); + Validator validator = mock(Validator.class); + when(validator.validate(anyString(), any())).thenReturn(false); + Configuration conf = new ConcurrentConfiguration(); + conf.setProperty(keyName, "test-value"); + ConfigKey key = ConfigKey.builder(keyName) + .validator(validator) + .build(); + + try { + key.validate(conf); + fail("Should fail validation if validator#validate returns false"); + } catch (ConfigException ce) { + // expected + } + verify(validator, times(1)).validate(eq(keyName), eq("test-value")); + } + + @Test + public void testGetLong() { + String keyName = runtime.getMethodName(); + long defaultValue = System.currentTimeMillis(); + ConfigKey key = ConfigKey.builder(keyName) + .required(true) + .type(Type.LONG) + .defaultValue(defaultValue) + .build(); + + Configuration conf = new ConcurrentConfiguration(); + + // get default value + assertEquals(defaultValue, key.getLong(conf)); + assertEquals(defaultValue, key.get(conf)); + + // set value + long newValue = System.currentTimeMillis() * 2; + key.set(conf, newValue); + assertEquals(newValue, key.getLong(conf)); + assertEquals(newValue, key.get(conf)); + } + + @Test + public void testGetInt() { + String keyName = runtime.getMethodName(); + int defaultValue = ThreadLocalRandom.current().nextInt(10000); + ConfigKey key = ConfigKey.builder(keyName) + .required(true) + .type(Type.INT) + .defaultValue(defaultValue) + .build(); + + Configuration conf = new ConcurrentConfiguration(); + + // get default value + assertEquals(defaultValue, key.getInt(conf)); + assertEquals(defaultValue, key.get(conf)); + + // set value + int newValue = defaultValue * 2; + key.set(conf, newValue); + assertEquals(newValue, key.getInt(conf)); + assertEquals(newValue, key.get(conf)); + } + + @Test + public void testGetShort() { + String keyName = runtime.getMethodName(); + short defaultValue = (short) ThreadLocalRandom.current().nextInt(10000); + ConfigKey key = ConfigKey.builder(keyName) + .required(true) + .type(Type.SHORT) + .defaultValue(defaultValue) + .build(); + + Configuration conf = new ConcurrentConfiguration(); + + // get default value + assertEquals(defaultValue, key.getShort(conf)); + assertEquals(defaultValue, key.get(conf)); + + // set value + short newValue = (short) (defaultValue * 2); + key.set(conf, newValue); + assertEquals(newValue, key.getShort(conf)); + assertEquals(newValue, key.get(conf)); + } + + @Test + public void testGetDouble() { + String keyName = runtime.getMethodName(); + double defaultValue = ThreadLocalRandom.current().nextDouble(10000.0f); + ConfigKey key = ConfigKey.builder(keyName) + .required(true) + .type(Type.DOUBLE) + .defaultValue(defaultValue) + .build(); + + Configuration conf = new ConcurrentConfiguration(); + + // get default value + assertEquals(defaultValue, key.getDouble(conf), 0.0001); + assertEquals(defaultValue, key.get(conf)); + + // set value + double newValue = (defaultValue * 2); + key.set(conf, newValue); + assertEquals(newValue, key.getDouble(conf), 0.0001); + assertEquals(newValue, key.get(conf)); + } + + @Test + public void testGetBoolean() { + String keyName = runtime.getMethodName(); + boolean defaultValue = ThreadLocalRandom.current().nextBoolean(); + ConfigKey key = ConfigKey.builder(keyName) + .required(true) + .type(Type.BOOLEAN) + .defaultValue(defaultValue) + .build(); + + Configuration conf = new ConcurrentConfiguration(); + + // get default value + assertEquals(defaultValue, key.getBoolean(conf)); + assertEquals(defaultValue, key.get(conf)); + + // set value + boolean newValue = !defaultValue; + key.set(conf, newValue); + assertEquals(newValue, key.getBoolean(conf)); + assertEquals(newValue, key.get(conf)); + } + + @Test + public void testGetList() { + String keyName = runtime.getMethodName(); + List defaultList = Lists.newArrayList( + "item1", "item2", "item3" + ); + ConfigKey key = ConfigKey.builder(keyName) + .required(true) + .type(Type.LIST) + .defaultValue(defaultList) + .build(); + + Configuration conf = new CompositeConfiguration(); + + // get default value + assertEquals(defaultList, key.getList(conf)); + assertEquals(defaultList, key.get(conf)); + + // set value + List newList = Lists.newArrayList( + "item4", "item5", "item6" + ); + key.set(conf, newList); + assertEquals(newList, key.getList(conf)); + assertEquals(newList, key.get(conf)); + + // set string value + newList = Lists.newArrayList( + "item7", "item8", "item9" + ); + conf.setProperty(key.name(), "item7,item8,item9"); + assertEquals(newList, key.getList(conf)); + assertEquals(newList, key.get(conf)); + } + + @Test + public void testGetClass() { + String keyName = runtime.getMethodName(); + Class defaultClass = TestFunctionA.class; + ConfigKey key = ConfigKey.builder(keyName) + .required(true) + .type(Type.CLASS) + .defaultValue(defaultClass) + .build(); + + Configuration conf = new CompositeConfiguration(); + + // get default value + assertEquals(defaultClass, key.getClass(conf)); + assertEquals(defaultClass, key.get(conf)); + + // set value + Class newClass = TestFunctionB.class; + key.set(conf, newClass); + assertEquals(newClass, key.getClass(conf)); + assertEquals(newClass, key.get(conf)); + + // set string value + String newClassName = TestFunctionC.class.getName(); + conf.setProperty(key.name(), newClassName); + assertEquals(TestFunctionC.class, key.getClass(conf)); + assertEquals(TestFunctionC.class, key.get(conf)); + } + + @Test + public void testGetString() { + String keyName = runtime.getMethodName(); + String defaultValue = "default-string-value"; + ConfigKey key = ConfigKey.builder(keyName) + .required(true) + .type(Type.STRING) + .defaultValue(defaultValue) + .build(); + + Configuration conf = new CompositeConfiguration(); + + // get default value + assertEquals(defaultValue, key.getString(conf)); + assertEquals(defaultValue, key.get(conf)); + + // set value + String newValue = "new-string-value"; + key.set(conf, newValue); + assertEquals(newValue, key.getString(conf)); + assertEquals(newValue, key.get(conf)); + } + +} diff --git a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/conf/validators/ClassValidatorTest.java b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/conf/validators/ClassValidatorTest.java new file mode 100644 index 00000000000..bfb7971b45b --- /dev/null +++ b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/conf/validators/ClassValidatorTest.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.common.conf.validators; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.function.Function; +import org.junit.Test; + +/** + * Unit test for {@link ClassValidator}. + */ +public class ClassValidatorTest { + + private static class TestFunction implements Function { + + @Override + public String apply(String s) { + return s + "!"; + } + } + + @Test + public void testValidateStrings() { + ClassValidator validator = ClassValidator.of(Function.class); + assertTrue(validator.validate("test-valid-classname", TestFunction.class.getName())); + assertFalse(validator.validate("test-invalid-classname", "unknown")); + } + + @Test + public void testValidateClass() { + ClassValidator validator = ClassValidator.of(Function.class); + assertTrue(validator.validate("test-valid-class", TestFunction.class)); + assertFalse(validator.validate("test-invalid-class", Integer.class)); + } + + @Test + public void testValidateWrongType() { + ClassValidator validator = ClassValidator.of(Function.class); + assertFalse(validator.validate("test-invalid-type", 12345)); + } + +} diff --git a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/conf/validators/RangeValidatorTest.java b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/conf/validators/RangeValidatorTest.java new file mode 100644 index 00000000000..b8725957c2a --- /dev/null +++ b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/conf/validators/RangeValidatorTest.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.common.conf.validators; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import org.junit.Test; + +/** + * Unit test {@link RangeValidator} validator. + */ +public class RangeValidatorTest { + + @Test + public void testAtLeastRangeValidator() { + RangeValidator range = RangeValidator.atLeast(1234L); + assertTrue(range.validate("test-0", 1235L)); + assertTrue(range.validate("test-1", 1234L)); + assertFalse(range.validate("test-2", 1233L)); + } + + @Test + public void testAtMostRangeValidator() { + RangeValidator range = RangeValidator.atMost(1234L); + assertFalse(range.validate("test-0", 1235L)); + assertTrue(range.validate("test-1", 1234L)); + assertTrue(range.validate("test-2", 1233L)); + } + + @Test + public void testBetweenRangeValidator() { + RangeValidator range = RangeValidator.between(1230L, 1240L); + assertTrue(range.validate("test-0", 1230L)); + assertTrue(range.validate("test-1", 1235L)); + assertTrue(range.validate("test-2", 1240L)); + assertFalse(range.validate("test-3", 1229L)); + assertFalse(range.validate("test-4", 1241L)); + } + +} diff --git a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/net/ServiceURITest.java b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/net/ServiceURITest.java index 9982c43a08b..2ec83df32c0 100644 --- a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/net/ServiceURITest.java +++ b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/net/ServiceURITest.java @@ -154,7 +154,19 @@ public void testMultipleHostsWithoutPorts() { "bk", new String[0], null, - new String[] { "host1", "host2", "host3" }, + new String[] { "host1:4181", "host2:4181", "host3:4181" }, + "/path/to/namespace"); + } + + @Test + public void testMultipleHostsMixedPorts() { + String serviceUri = "bk://host1:3181,host2,host3:2181/path/to/namespace"; + assertServiceUri( + serviceUri, + "bk", + new String[0], + null, + new String[] { "host1:3181", "host2:4181", "host3:2181" }, "/path/to/namespace"); } @@ -166,7 +178,7 @@ public void testMultipleHostsMixed() { "bk", new String[0], null, - new String[] { "host1:2181", "host2", "host3:2181" }, + new String[] { "host1:2181", "host2:4181", "host3:2181" }, "/path/to/namespace"); } diff --git a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/testing/annotations/FlakyTest.java b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/testing/annotations/FlakyTest.java index 8240cf2d303..27c26b123d6 100644 --- a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/testing/annotations/FlakyTest.java +++ b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/testing/annotations/FlakyTest.java @@ -15,6 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.apache.bookkeeper.common.testing.annotations; import java.lang.annotation.Documented; diff --git a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/testing/executors/MockExecutorController.java b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/testing/executors/MockExecutorController.java index 4942e348bac..7299d1d9311 100644 --- a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/testing/executors/MockExecutorController.java +++ b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/testing/executors/MockExecutorController.java @@ -42,6 +42,7 @@ import lombok.NoArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.stats.ThreadRegistry; import org.mockito.stubbing.Answer; /** @@ -172,6 +173,10 @@ private static Answer> answerDelay(MockExecutorController exe private static Answer> answerNow() { return invocationOnMock -> { + // this method executes everything in the caller thread + // this messes up assertions that verify + // that a thread is part of only a threadpool + ThreadRegistry.forceClearRegistrationForTests(Thread.currentThread().getId()); Runnable task = invocationOnMock.getArgument(0); task.run(); diff --git a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/MemoryLimitControllerTest.java b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/MemoryLimitControllerTest.java new file mode 100644 index 00000000000..2d81f62f927 --- /dev/null +++ b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/MemoryLimitControllerTest.java @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.common.util; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Tests for {@link MemoryLimitController}. + */ +public class MemoryLimitControllerTest { + + private ExecutorService executor; + + @Before + public void setup() { + executor = Executors.newCachedThreadPool(); + } + + @After + public void teardown() { + executor.shutdownNow(); + } + + @Test + public void testLimit() throws Exception { + MemoryLimitController mlc = new MemoryLimitController(100); + + for (int i = 0; i < 101; i++) { + mlc.reserveMemory(1); + } + + assertEquals(101, mlc.currentUsage()); + assertFalse(mlc.tryReserveMemory(1)); + mlc.releaseMemory(1); + assertEquals(100, mlc.currentUsage()); + + assertTrue(mlc.tryReserveMemory(1)); + assertEquals(101, mlc.currentUsage()); + } + + @Test + public void testBlocking() throws Exception { + MemoryLimitController mlc = new MemoryLimitController(100); + + for (int i = 0; i < 101; i++) { + mlc.reserveMemory(1); + } + + CountDownLatch l1 = new CountDownLatch(1); + executor.submit(() -> { + try { + mlc.reserveMemory(1); + l1.countDown(); + } catch (InterruptedException e) { + } + }); + + CountDownLatch l2 = new CountDownLatch(1); + executor.submit(() -> { + try { + mlc.reserveMemory(1); + l2.countDown(); + } catch (InterruptedException e) { + } + }); + + CountDownLatch l3 = new CountDownLatch(1); + executor.submit(() -> { + try { + mlc.reserveMemory(1); + l3.countDown(); + } catch (InterruptedException e) { + } + }); + + // The threads are blocked since the quota is full + assertFalse(l1.await(100, TimeUnit.MILLISECONDS)); + assertFalse(l2.await(100, TimeUnit.MILLISECONDS)); + assertFalse(l3.await(100, TimeUnit.MILLISECONDS)); + + assertEquals(101, mlc.currentUsage()); + mlc.releaseMemory(3); + + assertTrue(l1.await(1, TimeUnit.SECONDS)); + assertTrue(l2.await(1, TimeUnit.SECONDS)); + assertTrue(l3.await(1, TimeUnit.SECONDS)); + assertEquals(101, mlc.currentUsage()); + } + + @Test + public void testStepRelease() throws Exception { + MemoryLimitController mlc = new MemoryLimitController(100); + + for (int i = 0; i < 101; i++) { + mlc.reserveMemory(1); + } + + CountDownLatch l1 = new CountDownLatch(1); + executor.submit(() -> { + try { + mlc.reserveMemory(1); + l1.countDown(); + } catch (InterruptedException e) { + } + }); + + CountDownLatch l2 = new CountDownLatch(1); + executor.submit(() -> { + try { + mlc.reserveMemory(1); + l2.countDown(); + } catch (InterruptedException e) { + } + }); + + CountDownLatch l3 = new CountDownLatch(1); + executor.submit(() -> { + try { + mlc.reserveMemory(1); + l3.countDown(); + } catch (InterruptedException e) { + } + }); + + // The threads are blocked since the quota is full + assertFalse(l1.await(100, TimeUnit.MILLISECONDS)); + assertFalse(l2.await(100, TimeUnit.MILLISECONDS)); + assertFalse(l3.await(100, TimeUnit.MILLISECONDS)); + + assertEquals(101, mlc.currentUsage()); + + mlc.releaseMemory(1); + mlc.releaseMemory(1); + mlc.releaseMemory(1); + + assertTrue(l1.await(1, TimeUnit.SECONDS)); + assertTrue(l2.await(1, TimeUnit.SECONDS)); + assertTrue(l3.await(1, TimeUnit.SECONDS)); + assertEquals(101, mlc.currentUsage()); + } +} \ No newline at end of file diff --git a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/MockTicker.java b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/MockTicker.java new file mode 100644 index 00000000000..3df980fb759 --- /dev/null +++ b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/MockTicker.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.common.util; + +import com.google.common.base.Ticker; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; + +/** + * Test implementation of Ticker. + */ +public class MockTicker extends Ticker { + private AtomicLong tick = new AtomicLong(0); + + public void advance(int period, TimeUnit unit) { + tick.addAndGet(unit.toNanos(period)); + } + + @Override + public long read() { + return tick.get(); + } +} + + diff --git a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/TestMathUtils.java b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/TestMathUtils.java index 3bd58c21939..fc2318b9392 100644 --- a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/TestMathUtils.java +++ b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/TestMathUtils.java @@ -19,13 +19,11 @@ package org.apache.bookkeeper.common.util; import static org.apache.bookkeeper.common.util.MathUtils.findNextPositivePowerOfTwo; -import static org.apache.bookkeeper.common.util.MathUtils.now; import static org.apache.bookkeeper.common.util.MathUtils.nowInNano; import static org.apache.bookkeeper.common.util.MathUtils.signSafeMod; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -import java.util.concurrent.TimeUnit; import org.junit.Test; /** @@ -46,12 +44,6 @@ public void testFindNextPositivePowerOfTwo() { assertEquals(16384, findNextPositivePowerOfTwo(12345)); } - @Test - public void testNow() { - long nowInMillis = now(); - assertTrue(TimeUnit.NANOSECONDS.toMillis(System.nanoTime()) >= nowInMillis); - } - @Test public void testNowInNanos() { long nowInNanos = nowInNano(); diff --git a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/TestOrderedExecutor.java b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/TestOrderedExecutor.java new file mode 100644 index 00000000000..f8419ec3049 --- /dev/null +++ b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/TestOrderedExecutor.java @@ -0,0 +1,58 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.bookkeeper.common.util; + +import org.apache.bookkeeper.test.TestStatsProvider; +import org.junit.Assert; +import org.junit.Test; + +/** + * Test OrderedExecutor/Scheduler . + */ +public class TestOrderedExecutor { + + @Test + public void testOrderExecutorPrometheusMetric() { + testGenerateMetric(false); + testGenerateMetric(true); + } + + private void testGenerateMetric(boolean isTraceTaskExecution) { + TestStatsProvider provider = new TestStatsProvider(); + + TestStatsProvider.TestStatsLogger rootStatsLogger = provider.getStatsLogger(""); + TestStatsProvider.TestStatsLogger bookieStats = + (TestStatsProvider.TestStatsLogger) rootStatsLogger.scope("bookkeeper_server"); + + OrderedExecutor executor = OrderedExecutor.newBuilder().statsLogger(bookieStats) + .name("test").numThreads(1).traceTaskExecution(isTraceTaskExecution).build(); + + TestStatsProvider.TestStatsLogger testStatsLogger = (TestStatsProvider.TestStatsLogger) + bookieStats.scope("thread_test_OrderedExecutor_0_0"); + + Assert.assertNotNull(testStatsLogger.getGauge("thread_executor_queue").getSample()); + Assert.assertNotNull(testStatsLogger.getGauge("thread_executor_completed").getSample()); + Assert.assertNotNull(testStatsLogger.getGauge("thread_executor_tasks_completed").getSample()); + Assert.assertNotNull(testStatsLogger.getGauge("thread_executor_tasks_rejected").getSample()); + Assert.assertNotNull(testStatsLogger.getGauge("thread_executor_tasks_failed").getSample()); + } +} diff --git a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/TestOrderedExecutorDecorators.java b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/TestOrderedExecutorDecorators.java new file mode 100644 index 00000000000..5d83369fe3b --- /dev/null +++ b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/TestOrderedExecutorDecorators.java @@ -0,0 +1,154 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.bookkeeper.common.util; + +import static org.hamcrest.Matchers.hasItem; +import static org.junit.Assert.assertThat; +import static org.mockito.AdditionalAnswers.answerVoid; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.spy; + +import java.util.Queue; +import java.util.UUID; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.TimeUnit; +import org.apache.logging.log4j.Level; +import org.apache.logging.log4j.ThreadContext; +import org.apache.logging.log4j.core.LogEvent; +import org.apache.logging.log4j.core.LoggerContext; +import org.apache.logging.log4j.core.appender.NullAppender; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Test that decorators applied by OrderedExecutor/Scheduler are correctly applied. + */ +public class TestOrderedExecutorDecorators { + private static final Logger log = LoggerFactory.getLogger(TestOrderedExecutorDecorators.class); + private static final String MDC_KEY = "mdc-key"; + + private NullAppender mockAppender; + private final Queue capturedEvents = new ConcurrentLinkedQueue<>(); + + public static String mdcFormat(Object mdc, String message) { + return String.format("[%s:%s] %s", MDC_KEY, mdc, message); + } + + @Before + public void setUp() throws Exception { + ThreadContext.clearMap(); + LoggerContext lc = (LoggerContext) org.apache.logging.log4j.LogManager.getContext(false); + mockAppender = spy(NullAppender.createAppender(UUID.randomUUID().toString())); + mockAppender.start(); + lc.getConfiguration().addAppender(mockAppender); + lc.getRootLogger().addAppender(lc.getConfiguration().getAppender(mockAppender.getName())); + lc.getConfiguration().getRootLogger().setLevel(Level.INFO); + lc.updateLoggers(); + + doAnswer(answerVoid((LogEvent event) -> { + capturedEvents.add(mdcFormat(event.getContextData().getValue(MDC_KEY), + event.getMessage().getFormattedMessage())); + })).when(mockAppender).append(any()); + } + + @After + public void tearDown() throws Exception { + LoggerContext lc = (LoggerContext) org.apache.logging.log4j.LogManager.getContext(false); + lc.getRootLogger().removeAppender(lc.getConfiguration().getAppender(mockAppender.getName())); + lc.updateLoggers(); + capturedEvents.clear(); + ThreadContext.clearMap(); + } + + @Test + public void testMDCInvokeOrdered() throws Exception { + OrderedExecutor executor = OrderedExecutor.newBuilder() + .name("test").numThreads(20).preserveMdcForTaskExecution(true).build(); + + try { + ThreadContext.put(MDC_KEY, "testMDCInvokeOrdered"); + executor.submitOrdered(10, () -> { + log.info("foobar"); + return 10; + }).get(); + assertThat(capturedEvents, + hasItem(mdcFormat("testMDCInvokeOrdered", "foobar"))); + } finally { + executor.shutdown(); + } + } + + @Test + public void testMDCInvokeDirectOnChosen() throws Exception { + OrderedExecutor executor = OrderedExecutor.newBuilder() + .name("test").numThreads(20).preserveMdcForTaskExecution(true).build(); + + try { + ThreadContext.put(MDC_KEY, "testMDCInvokeOrdered"); + executor.chooseThread(10).submit(() -> { + log.info("foobar"); + return 10; + }).get(); + assertThat(capturedEvents, + hasItem(mdcFormat("testMDCInvokeOrdered", "foobar"))); + } finally { + executor.shutdown(); + } + + } + + + @Test + public void testMDCScheduleOrdered() throws Exception { + OrderedScheduler scheduler = OrderedScheduler.newSchedulerBuilder() + .name("test").numThreads(20).preserveMdcForTaskExecution(true).build(); + + try { + ThreadContext.put(MDC_KEY, "testMDCInvokeOrdered"); + scheduler.scheduleOrdered(10, () -> log.info("foobar"), 0, TimeUnit.DAYS).get(); + assertThat(capturedEvents, + hasItem(mdcFormat("testMDCInvokeOrdered", "foobar"))); + } finally { + scheduler.shutdown(); + } + } + + @Test + public void testMDCScheduleDirectOnChosen() throws Exception { + OrderedScheduler scheduler = OrderedScheduler.newSchedulerBuilder() + .name("test").numThreads(20).preserveMdcForTaskExecution(true).build(); + + try { + ThreadContext.put(MDC_KEY, "testMDCInvokeOrdered"); + scheduler.chooseThread(10).schedule(() -> log.info("foobar"), 0, TimeUnit.DAYS).get(); + assertThat(capturedEvents, + hasItem(mdcFormat("testMDCInvokeOrdered", "foobar"))); + } finally { + scheduler.shutdown(); + } + } + +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/TestReflectionUtils.java b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/TestReflectionUtils.java similarity index 95% rename from bookkeeper-server/src/test/java/org/apache/bookkeeper/util/TestReflectionUtils.java rename to bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/TestReflectionUtils.java index 61712397e3d..dd1535fa2ce 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/TestReflectionUtils.java +++ b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/TestReflectionUtils.java @@ -17,9 +17,9 @@ * under the License. */ -package org.apache.bookkeeper.util; +package org.apache.bookkeeper.common.util; -import static org.apache.bookkeeper.util.ReflectionUtils.forName; +import static org.apache.bookkeeper.common.util.ReflectionUtils.forName; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; diff --git a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/TestSharedResourceManager.java b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/TestSharedResourceManager.java index fab622f0c7b..be09259aed1 100644 --- a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/TestSharedResourceManager.java +++ b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/TestSharedResourceManager.java @@ -98,7 +98,7 @@ public void destroyResourceWhenRefCountReachesZero() { assertEquals(SharedResourceManager.DESTROY_DELAY_SECONDS, scheduledDestroyTask.getDelay(TimeUnit.SECONDS)); - // Simluate that the destroyer executes the foo destroying task + // Simulate that the destroyer executes the foo destroying task scheduledDestroyTask.runTask(); assertTrue(sharedFoo.closed); diff --git a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/TestSingleThreadExecutor.java b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/TestSingleThreadExecutor.java new file mode 100644 index 00000000000..671318de6e2 --- /dev/null +++ b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/TestSingleThreadExecutor.java @@ -0,0 +1,319 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.common.util; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import io.netty.util.concurrent.DefaultThreadFactory; +import java.util.List; +import java.util.concurrent.BrokenBarrierException; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.CyclicBarrier; +import java.util.concurrent.Future; +import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; +import lombok.Cleanup; +import org.awaitility.Awaitility; +import org.junit.Test; + +/** + * Unit test for {@link SingleThreadExecutor}. + */ +public class TestSingleThreadExecutor { + + private static final ThreadFactory THREAD_FACTORY = new DefaultThreadFactory("test"); + + @Test + public void testSimple() throws Exception { + @Cleanup("shutdown") + SingleThreadExecutor ste = new SingleThreadExecutor(THREAD_FACTORY); + + AtomicInteger count = new AtomicInteger(); + + assertEquals(0, ste.getSubmittedTasksCount()); + assertEquals(0, ste.getCompletedTasksCount()); + assertEquals(0, ste.getQueuedTasksCount()); + + for (int i = 0; i < 10; i++) { + ste.execute(() -> count.incrementAndGet()); + } + + assertEquals(10, ste.getSubmittedTasksCount()); + + ste.submit(() -> { + }).get(); + + assertEquals(10, count.get()); + assertEquals(11, ste.getSubmittedTasksCount()); + + Awaitility.await().untilAsserted(() -> assertEquals(11, ste.getCompletedTasksCount())); + assertEquals(0, ste.getRejectedTasksCount()); + assertEquals(0, ste.getFailedTasksCount()); + assertEquals(0, ste.getQueuedTasksCount()); + } + + @Test + public void testRejectWhenQueueIsFull() throws Exception { + @Cleanup("shutdownNow") + SingleThreadExecutor ste = new SingleThreadExecutor(THREAD_FACTORY, 10, true); + + CyclicBarrier barrier = new CyclicBarrier(10 + 1); + CountDownLatch startedLatch = new CountDownLatch(1); + + for (int i = 0; i < 10; i++) { + ste.execute(() -> { + startedLatch.countDown(); + + try { + barrier.await(); + } catch (InterruptedException | BrokenBarrierException e) { + // ignore + } + }); + + // Wait until the first task is already running in the thread + startedLatch.await(); + } + + // Next task should go through, because the runner thread has already pulled out 1 item from the + // queue: the first tasks which is currently stuck + ste.execute(() -> { + }); + + // Now the queue is really full and should reject tasks + try { + ste.execute(() -> { + }); + fail("should have rejected the task"); + } catch (RejectedExecutionException e) { + // Expected + } + + assertTrue(ste.getSubmittedTasksCount() >= 11); + assertTrue(ste.getRejectedTasksCount() >= 1); + assertEquals(0, ste.getFailedTasksCount()); + } + + @Test + public void testBlockWhenQueueIsFull() throws Exception { + @Cleanup("shutdown") + SingleThreadExecutor ste = new SingleThreadExecutor(THREAD_FACTORY, 10, false); + + CyclicBarrier barrier = new CyclicBarrier(10 + 1); + + for (int i = 0; i < 10; i++) { + ste.execute(() -> { + try { + barrier.await(1, TimeUnit.SECONDS); + } catch (TimeoutException te) { + // ignore + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + } + + assertEquals(10, ste.getQueuedTasksCount()); + + ste.submit(() -> { + }).get(); + + assertEquals(11, ste.getSubmittedTasksCount()); + assertEquals(0, ste.getRejectedTasksCount()); + } + + @Test + public void testShutdown() throws Exception { + @Cleanup("shutdown") + SingleThreadExecutor ste = new SingleThreadExecutor(THREAD_FACTORY); + + assertFalse(ste.isShutdown()); + assertFalse(ste.isTerminated()); + + AtomicInteger count = new AtomicInteger(); + + for (int i = 0; i < 3; i++) { + ste.execute(() -> { + try { + Thread.sleep(1000); + count.incrementAndGet(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + }); + } + + ste.shutdown(); + assertTrue(ste.isShutdown()); + assertFalse(ste.isTerminated()); + + try { + ste.execute(() -> { + }); + fail("should have rejected the task"); + } catch (RejectedExecutionException e) { + // Expected + } + + ste.awaitTermination(10, TimeUnit.SECONDS); + assertTrue(ste.isShutdown()); + assertTrue(ste.isTerminated()); + + assertEquals(3, count.get()); + } + + @Test + public void testShutdownNow() throws Exception { + @Cleanup("shutdown") + SingleThreadExecutor ste = new SingleThreadExecutor(THREAD_FACTORY); + + assertFalse(ste.isShutdown()); + assertFalse(ste.isTerminated()); + + AtomicInteger count = new AtomicInteger(); + + for (int i = 0; i < 3; i++) { + ste.execute(() -> { + try { + Thread.sleep(2000); + count.incrementAndGet(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + }); + + // Ensure the 3 tasks are not picked up in one shot by the runner thread + Thread.sleep(500); + } + + List remainingTasks = ste.shutdownNow(); + assertEquals(2, remainingTasks.size()); + assertTrue(ste.isShutdown()); + + try { + ste.execute(() -> { + }); + fail("should have rejected the task"); + } catch (RejectedExecutionException e) { + // Expected + } + + ste.awaitTermination(10, TimeUnit.SECONDS); + assertTrue(ste.isShutdown()); + assertTrue(ste.isTerminated()); + + assertEquals(0, count.get()); + } + + @Test + public void testTasksWithException() throws Exception { + @Cleanup("shutdown") + SingleThreadExecutor ste = new SingleThreadExecutor(THREAD_FACTORY); + + AtomicInteger count = new AtomicInteger(); + + for (int i = 0; i < 10; i++) { + ste.execute(() -> { + count.incrementAndGet(); + throw new RuntimeException("xyz"); + }); + } + + ste.submit(() -> { + }).get(); + assertEquals(10, count.get()); + + assertEquals(11, ste.getSubmittedTasksCount()); + Awaitility.await().untilAsserted(() -> assertEquals(1, ste.getCompletedTasksCount())); + assertEquals(0, ste.getRejectedTasksCount()); + assertEquals(10, ste.getFailedTasksCount()); + } + + @Test + public void testTasksWithNPE() throws Exception { + @Cleanup("shutdown") + SingleThreadExecutor ste = new SingleThreadExecutor(THREAD_FACTORY); + + AtomicInteger count = new AtomicInteger(); + String npeTest = null; + + for (int i = 0; i < 10; i++) { + ste.execute(() -> { + count.incrementAndGet(); + + // Trigger the NPE exception + System.out.println(npeTest.length()); + }); + } + + ste.submit(() -> { + }).get(); + assertEquals(10, count.get()); + + assertEquals(11, ste.getSubmittedTasksCount()); + Awaitility.await().untilAsserted(() -> assertEquals(1, ste.getCompletedTasksCount())); + assertEquals(0, ste.getRejectedTasksCount()); + assertEquals(10, ste.getFailedTasksCount()); + } + + @Test + public void testShutdownEmpty() throws Exception { + SingleThreadExecutor ste = new SingleThreadExecutor(THREAD_FACTORY); + ste.shutdown(); + assertTrue(ste.isShutdown()); + + ste.awaitTermination(10, TimeUnit.SECONDS); + assertTrue(ste.isShutdown()); + assertTrue(ste.isTerminated()); + } + + @Test + public void testExecutorQueueIsNotFixedSize() throws Exception { + int n = 1_000_000; + @Cleanup("shutdown") + SingleThreadExecutor ste = new SingleThreadExecutor(THREAD_FACTORY); + + CountDownLatch latch = new CountDownLatch(1); + // First task is blocking + ste.execute(() -> { + try { + latch.await(); + } catch (Exception e) { + e.printStackTrace(); + } + }); + + for (int i = 0; i < n; i++) { + ste.execute(() -> {}); + } + + // Submit last task and wait for completion + Future future = ste.submit(() -> {}); + + latch.countDown(); + + future.get(); + } +} diff --git a/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/TestThreadSelection.java b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/TestThreadSelection.java new file mode 100644 index 00000000000..72b48b0f205 --- /dev/null +++ b/bookkeeper-common/src/test/java/org/apache/bookkeeper/common/util/TestThreadSelection.java @@ -0,0 +1,129 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.bookkeeper.common.util; + +import com.google.common.primitives.Longs; +import java.security.SecureRandom; +import java.util.Random; +import lombok.extern.slf4j.Slf4j; +import org.junit.Assert; +import org.junit.Test; + +/** + * Test how even is the distribution of ledgers across the threads of OrderedExecutor. + */ +@Slf4j +public class TestThreadSelection { + + public static final long MAX_KEY = 1_000_000L; + public static final int MAX_THREADS = 96; + public static final double MAX_DISPARITY = 1.25d; + + private final Random rnd = new SecureRandom(); + + /** + * Only even keys. + */ + @Test + public void testThreadSelectionEvenKeys() { + runTest(0L, 2L); + } + + /** + * Only odd keys. + */ + @Test + public void testThreadSelectionOddKeys() { + runTest(1L, 2L); + } + + /** + * All keys. + */ + @Test + public void testThreadSelectionAllKeys() { + runTest(0L, 1L); + } + + /** + * Random keys. + */ + @Test + public void testThreadSelectionRandKeys() { + for (int numThreads = 2; numThreads <= MAX_THREADS; numThreads++) { + long[] placement = new long[numThreads]; + + log.info("testing {} threads", numThreads); + for (long key = 0L; key < MAX_KEY; key += 1L) { + int threadId = OrderedExecutor.chooseThreadIdx(rnd.nextLong(), numThreads); + placement[threadId]++; + } + validateTest(placement, numThreads); + } + } + + /** + * Confirm the same key assigned to the same thread on consequent calls. + */ + @Test + public void testKeyAssignedToTheSameThread() { + for (int numThreads = 2; numThreads <= MAX_THREADS; numThreads++) { + + log.info("testing {} threads", numThreads); + for (long key = 0L; key < MAX_KEY; key += 1L) { + int threadId = OrderedExecutor.chooseThreadIdx(key, numThreads); + for (int i = 0; i < 10; i++) { + Assert.assertEquals("must be assigned to the same thread", + threadId, OrderedExecutor.chooseThreadIdx(key, numThreads)); + } + } + } + } + + + private void runTest(long start, long step) { + for (int numThreads = 2; numThreads <= MAX_THREADS; numThreads++) { + long[] placement = new long[numThreads]; + + log.info("testing {} threads", numThreads); + for (long key = start; key < MAX_KEY; key += step) { + int threadId = OrderedExecutor.chooseThreadIdx(key, numThreads); + placement[threadId]++; + } + validateTest(placement, numThreads); + } + } + + private void validateTest(long[] placement, int numThreads) { + long min = Longs.min(placement); + long max = Longs.max(placement); + log.info("got min={}, max={} (disparity: {}) for {} threads with {} ids", + min, max, max - min, numThreads, MAX_KEY); + Assert.assertTrue("all threads were used [numThreads: " + numThreads + "]", + min > 0); + log.info("disparity = {}", String.format("%,.2f", (double) max / min)); + Assert.assertTrue("no large disparity found [numThreads: " + numThreads + "], got " + + (double) max / min, + (double) max / min <= MAX_DISPARITY); + } + +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/TestStatsProvider.java b/bookkeeper-common/src/test/java/org/apache/bookkeeper/test/TestStatsProvider.java similarity index 85% rename from bookkeeper-server/src/test/java/org/apache/bookkeeper/test/TestStatsProvider.java rename to bookkeeper-common/src/test/java/org/apache/bookkeeper/test/TestStatsProvider.java index 9b9fdb54dd6..d3299b077e8 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/TestStatsProvider.java +++ b/bookkeeper-common/src/test/java/org/apache/bookkeeper/test/TestStatsProvider.java @@ -1,5 +1,4 @@ /* - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -16,7 +15,6 @@ * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. - * */ package org.apache.bookkeeper.test; @@ -33,6 +31,7 @@ import org.apache.bookkeeper.stats.StatsLogger; import org.apache.bookkeeper.stats.StatsProvider; import org.apache.commons.configuration.Configuration; +import org.apache.commons.lang.StringUtils; /** * Simple in-memory stat provider for use in unit tests. @@ -61,10 +60,16 @@ public void dec() { } @Override - public void add(long delta) { + public void addCount(long delta) { updateMax(val.addAndGet(delta)); } + @Override + public void addLatency(long eventLatency, TimeUnit unit) { + long valueMillis = unit.toMillis(eventLatency); + updateMax(val.addAndGet(valueMillis)); + } + @Override public Long get() { return val.get(); @@ -103,12 +108,12 @@ public class TestOpStatsLogger implements OpStatsLogger { @Override public void registerFailedEvent(long eventLatency, TimeUnit unit) { - registerFailedValue(unit.convert(eventLatency, TimeUnit.NANOSECONDS)); + registerFailedValue(TimeUnit.NANOSECONDS.convert(eventLatency, unit)); } @Override public void registerSuccessfulEvent(long eventLatency, TimeUnit unit) { - registerSuccessfulValue(unit.convert(eventLatency, TimeUnit.NANOSECONDS)); + registerSuccessfulValue(TimeUnit.NANOSECONDS.convert(eventLatency, unit)); } @Override @@ -147,6 +152,10 @@ public synchronized double getSuccessAverage() { public synchronized long getSuccessCount() { return successCount; } + + public synchronized long getFailureCount() { + return failureCount; + } } /** @@ -178,7 +187,7 @@ public Counter getCounter(String name) { } public Gauge getGauge(String name) { - return gaugeMap.get(path); + return gaugeMap.get(getSubPath(name)); } @Override @@ -198,6 +207,16 @@ public StatsLogger scope(String name) { @Override public void removeScope(String name, StatsLogger statsLogger) {} + + @Override + public OpStatsLogger getThreadScopedOpStatsLogger(String name) { + return getOpStatsLogger(name); + } + + @Override + public Counter getThreadScopedCounter(String name) { + return getCounter(name); + } } @Override @@ -263,4 +282,15 @@ private void registerGauge(String name, Gauge gauge) { private void unregisterGauge(String name, Gauge gauge) { gaugeMap.remove(name, gauge); } + + @Override + public String getStatsName(String... statsComponents) { + if (statsComponents.length == 0) { + return ""; + } else if (statsComponents[0].isEmpty()) { + return StringUtils.join(statsComponents, '.', 1, statsComponents.length); + } else { + return StringUtils.join(statsComponents, '.'); + } + } } diff --git a/bookkeeper-common/src/test/resources/log4j.properties b/bookkeeper-common/src/test/resources/log4j.properties deleted file mode 100644 index 10ae6bfcbba..00000000000 --- a/bookkeeper-common/src/test/resources/log4j.properties +++ /dev/null @@ -1,42 +0,0 @@ -# -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# - -# -# Bookkeeper Logging Configuration -# - -# Format is " (, )+ - -# DEFAULT: console appender only, level INFO -bookkeeper.root.logger=INFO,CONSOLE -log4j.rootLogger=${bookkeeper.root.logger} - -# -# Log INFO level and above messages to the console -# -log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender -log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout -log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} - %-5p - [%t:%C{1}@%L] - %m%n - -#disable zookeeper logging -log4j.logger.org.apache.zookeeper=OFF -log4j.logger.org.apache.bookkeeper.bookie=INFO -log4j.logger.org.apache.bookkeeper.meta=INFO diff --git a/bookkeeper-common/src/test/resources/test_conf_2.conf b/bookkeeper-common/src/test/resources/test_conf_2.conf new file mode 100644 index 00000000000..ca6f7bb2dbc --- /dev/null +++ b/bookkeeper-common/src/test/resources/test_conf_2.conf @@ -0,0 +1,130 @@ +################################################################################ +# Settings of `group1` +# +# This is a very long description : Lorem ipsum dolor sit amet, consectetur +# adipiscing elit. Maecenas bibendum ac felis id commodo. Etiam mauris purus, +# fringilla id tempus in, mollis vel orci. Duis ultricies at erat eget iaculis. +################################################################################ + +# it is a bool key +# +# TYPE: BOOLEAN, optional +bool_key= + +# it is a class key +# +# TYPE: CLASS, optional +# @constraints : class extends `java.lang.Runnable` +class_key= + +# it is a double key +# +# TYPE: DOUBLE, optional +# @constraints : [1234.0, 5678.0] +double_key= + +# it is an int key +# +# TYPE: INT, optional +# @constraints : [1000, ...] +int_key= + +# it is a list key +# +# TYPE: LIST, optional +list_key= + +# it is a long key +# +# TYPE: LONG, optional +# @constraints : [... , 1000] +long_key= + +# it is a short key +# +# TYPE: SHORT, optional +# @constraints : [500, 1000] +short_key= + +# it is a string key +# +# TYPE: STRING, optional +string_key= + +################################## +# Settings of `group2` +# +# This group has short description +################################## + +# it is a required key +# +# TYPE: STRING, required +required_key= + +# it is a deprecated key with since and replaced key +# +# TYPE: STRING, optional +# +# @deprecated since `4.3.0` in favor of using `key_with_optional_values` +deprecated_key_with_since_and_replaced_key= + +# it is a deprecated key with replaced key +# +# TYPE: STRING, optional +# +# @deprecated in favor of using `key_with_optional_values` +deprecated_key_with_replaced_key= + +# it is a deprecated key with since +# +# TYPE: STRING, optional +# +# @deprecated since `4.3.0` +deprecated_key_with_since= + +# it is a deprecated key +# +# TYPE: STRING, optional +# +# @deprecated +deprecated_key= + +# it is a string key with optional values +# +# TYPE: STRING, optional +# @options : +# item1 +# item2 +# item3 +# item3 +key_with_optional_values=this-is-a-default-value + +# it is a string key with default value +# +# TYPE: STRING, optional +key_with_default_value=this-is-a-test-value + +# it is a string key with documentation +# +# it has a long documentation : Lorem ipsum dolor sit amet, consectetur +# adipiscing elit. Maecenas bibendum ac felis id commodo. Etiam mauris purus, +# fringilla id tempus in, mollis vel orci. Duis ultricies at erat eget iaculis. +# +# TYPE: STRING, optional +key_long_short_documentation= + +# it is a string key with documentation +# +# it has a short documentation +# +# TYPE: STRING, optional +key_with_short_documentation= + +# it is a string key with since +# +# TYPE: STRING, optional +# +# @since 4.7.0 +key_with_since= + diff --git a/bookkeeper-dist/all/pom.xml b/bookkeeper-dist/all/pom.xml index 79e9808596c..89dded19fbc 100644 --- a/bookkeeper-dist/all/pom.xml +++ b/bookkeeper-dist/all/pom.xml @@ -23,12 +23,12 @@ bookkeeper-dist org.apache.bookkeeper - 4.9.0-SNAPSHOT + 4.18.0-SNAPSHOT .. bookkeeper-dist-all - jar + pom Apache BookKeeper :: Dist (All) @@ -49,6 +49,12 @@ org.apache.bookkeeper.stats codahale-metrics-provider ${project.version} + + + amqp-client + com.rabbitmq + + org.apache.bookkeeper.stats @@ -57,7 +63,7 @@ org.apache.bookkeeper.stats - twitter-finagle-provider + otel-metrics-provider ${project.version} @@ -67,11 +73,6 @@ http-server ${project.version} - - org.apache.bookkeeper.http - twitter-http-server - ${project.version} - org.apache.bookkeeper.http vertx-http-server @@ -92,6 +93,14 @@ ${project.version} + + + + org.apache.bookkeeper + stream-storage-server + ${project.version} + + org.apache.bookkeeper @@ -99,24 +108,29 @@ ${project.version} - - org.slf4j - slf4j-log4j12 + org.apache.bookkeeper + bookkeeper-perf + ${project.version} - - org.codehaus.jackson - jackson-mapper-asl + + org.xerial.snappy + snappy-java + + + + io.dropwizard.metrics + metrics-core + org.apache.maven.plugins maven-assembly-plugin - ${maven-assembly-plugin.version} bookkeeper-all-${project.version} false @@ -138,29 +152,10 @@ org.apache.maven.plugins maven-deploy-plugin - ${maven-deploy-plugin.version} true - - - stream - - - stream - - - - - - org.apache.bookkeeper - stream-storage-server - ${project.version} - - - - diff --git a/bookkeeper-dist/bkctl/pom.xml b/bookkeeper-dist/bkctl/pom.xml new file mode 100644 index 00000000000..ecfb2697d5f --- /dev/null +++ b/bookkeeper-dist/bkctl/pom.xml @@ -0,0 +1,81 @@ + + + 4.0.0 + + bookkeeper-dist + org.apache.bookkeeper + 4.18.0-SNAPSHOT + .. + + + bkctl + pom + Apache BookKeeper :: Dist (Bkctl) + + + + + org.apache.bookkeeper + bookkeeper-tools + ${project.version} + + + org.rocksdb + rocksdbjni + + + + + + + + + org.apache.maven.plugins + maven-assembly-plugin + + bkctl-${project.version} + true + + ../src/assemble/bkctl.xml + + posix + + + + package + + single + + + + + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + + diff --git a/bookkeeper-dist/pom.xml b/bookkeeper-dist/pom.xml index 7ae8c387f59..e9efd48143c 100644 --- a/bookkeeper-dist/pom.xml +++ b/bookkeeper-dist/pom.xml @@ -19,7 +19,7 @@ bookkeeper org.apache.bookkeeper - 4.9.0-SNAPSHOT + 4.18.0-SNAPSHOT 4.0.0 bookkeeper-dist @@ -28,6 +28,7 @@ all server + bkctl UTF-8 @@ -36,8 +37,8 @@ + org.apache.maven.plugins maven-jar-plugin - ${maven-jar-plugin.version} default-jar @@ -46,8 +47,8 @@ + org.apache.maven.plugins maven-assembly-plugin - ${maven-assembly-plugin.version} bookkeeper-${project.version} @@ -64,15 +65,6 @@ - - - org.apache.maven.plugins - maven-deploy-plugin - ${maven-deploy-plugin.version} - - true - - diff --git a/bookkeeper-dist/server/pom.xml b/bookkeeper-dist/server/pom.xml index 186a652c1b5..a3f3d8d3d39 100644 --- a/bookkeeper-dist/server/pom.xml +++ b/bookkeeper-dist/server/pom.xml @@ -23,12 +23,12 @@ bookkeeper-dist org.apache.bookkeeper - 4.9.0-SNAPSHOT + 4.18.0-SNAPSHOT .. bookkeeper-dist-server - jar + pom Apache BookKeeper :: Dist (Server) @@ -49,6 +49,11 @@ prometheus-metrics-provider ${project.version} + + org.apache.bookkeeper.stats + otel-metrics-provider + ${project.version} + @@ -62,6 +67,13 @@ ${project.version} + + + org.apache.bookkeeper + stream-storage-server + ${project.version} + + org.apache.bookkeeper @@ -76,24 +88,23 @@ ${project.version} - - org.slf4j - slf4j-log4j12 + + org.xerial.snappy + snappy-java - - - org.codehaus.jackson - jackson-mapper-asl + + io.dropwizard.metrics + metrics-core + org.apache.maven.plugins maven-assembly-plugin - ${maven-assembly-plugin.version} bookkeeper-server-${project.version} true @@ -115,7 +126,6 @@ org.apache.maven.plugins maven-deploy-plugin - ${maven-deploy-plugin.version} true @@ -123,22 +133,4 @@ - - - stream - - - stream - - - - - - org.apache.bookkeeper - stream-storage-server - ${project.version} - - - - diff --git a/bookkeeper-dist/src/assemble/bin-all.xml b/bookkeeper-dist/src/assemble/bin-all.xml index 7b047c44682..506bfaaf9aa 100644 --- a/bookkeeper-dist/src/assemble/bin-all.xml +++ b/bookkeeper-dist/src/assemble/bin-all.xml @@ -44,6 +44,11 @@ 755 bin + + ../../stream/bin + 755 + bin + 644 @@ -54,19 +59,20 @@ ../src/main/resources/deps deps - google-auth-library-credentials-0.9.0/LICENSE - javax.servlet-api-3.1.0/CDDL+GPL-1.1 + checker-qual-3.5.0/LICENSE + google-auth-library-credentials-0.20.0/LICENSE + javax.servlet-api-4.0.0/CDDL+GPL-1.1 + bouncycastle-1.0.2.3/LICENSE.html jsr-305/LICENSE - netty-3.10.1.Final/* - netty-4.1.22.Final/* + netty/* paranamer-2.8/LICENSE.txt - protobuf-3.0.0/LICENSE - jline-0.9.94/LICENSE - protobuf-3.5.1/LICENSE + protobuf-3.14.0/LICENSE + protobuf-3.12.0/LICENSE + reactivestreams-1.0.3/LICENSE scala-library-2.11.7/LICENSE.md scala-parser-combinators_2.11-1.0.4/LICENSE.md scala-reflect-2.11.8/LICENSE.md - slf4j-1.7.25/LICENSE.txt + slf4j-2.0.12/LICENSE.txt 644 @@ -99,27 +105,13 @@ ${artifact.groupId}-${artifact.artifactId}-${artifact.version}${dashClassifier?}.${artifact.extension} - - io.netty:netty-buffer - io.netty:netty-codec - io.netty:netty-codec-dns - io.netty:netty-codec-http - io.netty:netty-codec-http2 - io.netty:netty-codec-socks - io.netty:netty-common - io.netty:netty-handler - io.netty:netty-handler-proxy - io.netty:netty-resolver - io.netty:netty-resolver-dns - io.netty:netty-tcnative-boringssl-static - io.netty:netty-transport-native-epoll - io.netty:netty-transport org.apache.bookkeeper:stream-storage-common org.apache.bookkeeper:stream-storage-proto org.apache.bookkeeper:stream-storage-api org.apache.bookkeeper:stream-storage-java-client-base org.apache.bookkeeper:stream-storage-java-kv-client + org.codehaus.mojo:animal-sniffer-annotations diff --git a/bookkeeper-dist/src/assemble/bin-server.xml b/bookkeeper-dist/src/assemble/bin-server.xml index aa7d1b89c8c..f80dac43c41 100644 --- a/bookkeeper-dist/src/assemble/bin-server.xml +++ b/bookkeeper-dist/src/assemble/bin-server.xml @@ -49,13 +49,15 @@ ../src/main/resources/deps deps - google-auth-library-credentials-0.9.0/LICENSE - javax.servlet-api-3.1.0/CDDL+GPL-1.1 - netty-4.1.22.Final/* - protobuf-3.0.0/LICENSE - jline-0.9.94/LICENSE - protobuf-3.5.1/LICENSE - slf4j-1.7.25/LICENSE.txt + checker-qual-3.5.0/LICENSE + google-auth-library-credentials-0.20.0/LICENSE + javax.servlet-api-4.0.0/CDDL+GPL-1.1 + netty/* + bouncycastle-1.0.2.3/LICENSE.html + protobuf-3.14.0/LICENSE + protobuf-3.12.0/LICENSE + reactivestreams-1.0.3/LICENSE + slf4j-2.0.12/LICENSE.txt 644 @@ -89,26 +91,13 @@ ${artifact.groupId}-${artifact.artifactId}-${artifact.version}${dashClassifier?}.${artifact.extension} com.google.code.findbugs:jsr305 - - io.netty:netty-buffer - io.netty:netty-codec - io.netty:netty-codec-dns - io.netty:netty-codec-http - io.netty:netty-codec-http2 - io.netty:netty-codec-socks - io.netty:netty-common - io.netty:netty-handler - io.netty:netty-handler-proxy - io.netty:netty-resolver - io.netty:netty-resolver-dns - io.netty:netty-tcnative-boringssl-static - io.netty:netty-transport org.apache.bookkeeper:stream-storage-common org.apache.bookkeeper:stream-storage-proto org.apache.bookkeeper:stream-storage-api org.apache.bookkeeper:stream-storage-java-client-base org.apache.bookkeeper:stream-storage-java-kv-client + org.codehaus.mojo:animal-sniffer-annotations diff --git a/bookkeeper-dist/src/assemble/bkctl.xml b/bookkeeper-dist/src/assemble/bkctl.xml new file mode 100644 index 00000000000..45f9d1b12a5 --- /dev/null +++ b/bookkeeper-dist/src/assemble/bkctl.xml @@ -0,0 +1,122 @@ + + + bin + + tar.gz + + true + + + target + + + ${project.artifactId}-${project.version}.jar + + + + ../../conf + conf + + bk_cli_env.sh + bk_server.conf + bkenv.sh + jaas_example.conf + log4j2.cli.xml + log4j2.shell.xml + nettyenv.sh + default_rocksdb.conf + entry_location_rocksdb.conf + ledger_metadata_rocksdb.conf + + + + ../../bin + 755 + bin + + bkctl + bkperf + bookkeeper + common.sh + + + + 644 + + ${basedir}/*.txt + + + + ../src/main/resources/deps + deps + + checker-qual-3.5.0/LICENSE + google-auth-library-credentials-0.20.0/LICENSE + netty/* + bouncycastle-1.0.2.3/LICENSE.html + protobuf-3.14.0/LICENSE + protobuf-3.12.0/LICENSE + reactivestreams-1.0.3/LICENSE + slf4j-2.0.12/LICENSE.txt + + 644 + + + + + ../../README.md + + 644 + + + ../src/main/resources/LICENSE-bkctl.bin.txt + + LICENSE + 644 + + + ../src/main/resources/NOTICE-bkctl.bin.txt + + NOTICE + 644 + + + + + lib + false + runtime + false + + ${artifact.groupId}-${artifact.artifactId}-${artifact.version}${dashClassifier?}.${artifact.extension} + + com.google.code.findbugs:jsr305 + + org.apache.bookkeeper:stream-storage-common + org.apache.bookkeeper:stream-storage-proto + org.apache.bookkeeper:stream-storage-api + org.apache.bookkeeper:stream-storage-java-client-base + org.codehaus.mojo:animal-sniffer-annotations + + org.rocksdb:rocksdbjni + + + + diff --git a/bookkeeper-dist/src/assemble/src.xml b/bookkeeper-dist/src/assemble/src.xml index 9d63cf26815..94cf8dc347f 100644 --- a/bookkeeper-dist/src/assemble/src.xml +++ b/bookkeeper-dist/src/assemble/src.xml @@ -31,6 +31,7 @@ **/LICENSE **/NOTICE **/pom.xml + **/*gradle* **/src/** **/conf/** **/bin/** @@ -58,9 +59,7 @@ dev/** - site/** - - jenkins/** + site3/** diff --git a/bookkeeper-dist/src/main/resources/LICENSE-all.bin.txt b/bookkeeper-dist/src/main/resources/LICENSE-all.bin.txt index 5303d528625..64ebd7c5bdc 100644 --- a/bookkeeper-dist/src/main/resources/LICENSE-all.bin.txt +++ b/bookkeeper-dist/src/main/resources/LICENSE-all.bin.txt @@ -205,257 +205,437 @@ The following bundled 3rd party jars are distributed under the Apache Software License, Version 2. -- lib/com.fasterxml.jackson.core-jackson-annotations-2.8.9.jar [1] -- lib/com.fasterxml.jackson.core-jackson-core-2.8.9.jar [2] -- lib/com.fasterxml.jackson.core-jackson-databind-2.8.9.jar [3] -- lib/com.fasterxml.jackson.module-jackson-module-paranamer-2.8.4.jar [4] -- lib/com.fasterxml.jackson.module-jackson-module-scala_2.11-2.8.4.jar [5] -- lib/com.github.ben-manes.caffeine-caffeine-2.3.4.jar [9] -- lib/com.google.guava-guava-21.0.jar [6] -- lib/commons-cli-commons-cli-1.2.jar [7] -- lib/commons-codec-commons-codec-1.6.jar [8] -- lib/commons-configuration-commons-configuration-1.10.jar [10] -- lib/commons-io-commons-io-2.4.jar [11] -- lib/commons-lang-commons-lang-2.6.jar [12] -- lib/commons-logging-commons-logging-1.1.1.jar [13] -- lib/com.twitter-finagle-base-http_2.11-6.44.0.jar [14] -- lib/com.twitter-finagle-core_2.11-6.44.0.jar [15] -- lib/com.twitter-finagle-http_2.11-6.44.0.jar [14] -- lib/com.twitter-finagle-http2_2.11-6.44.0.jar [14] -- lib/com.twitter-finagle-netty4_2.11-6.44.0.jar [14] -- lib/com.twitter-finagle-netty4-http_2.11-6.44.0.jar [14] -- lib/com.twitter-finagle-thrift_2.11-6.44.0.jar [14] -- lib/com.twitter-finagle-toggle_2.11-6.44.0.jar [14] -- lib/com.twitter-finagle-tunable_2.11-6.44.0.jar [14] -- lib/com.twitter-finagle-zipkin-core_2.11-6.44.0.jar [14] -- lib/com.twitter-libthrift-0.5.0-7.jar [16] -- lib/com.twitter-scrooge-core_2.11-4.16.0.jar [17] -- lib/com.twitter-twitter-server_2.11-1.29.0.jar [18] -- lib/com.twitter-util-app_2.11-6.43.0.jar [19] -- lib/com.twitter-util-cache_2.11-6.43.0.jar [19] -- lib/com.twitter-util-codec_2.11-6.43.0.jar [19] -- lib/com.twitter-util-collection_2.11-6.43.0.jar [19] -- lib/com.twitter-util-core_2.11-6.43.0.jar [19] -- lib/com.twitter-util-events_2.11-6.43.0.jar [20] -- lib/com.twitter-util-function_2.11-6.43.0.jar [19] -- lib/com.twitter-util-hashing_2.11-6.43.0.jar [19] -- lib/com.twitter-util-jvm_2.11-6.43.0.jar [19] -- lib/com.twitter-util-lint_2.11-6.43.0.jar [19] -- lib/com.twitter-util-logging_2.11-6.43.0.jar [19] -- lib/com.twitter-util-registry_2.11-6.43.0.jar [19] -- lib/com.twitter-util-security_2.11-6.43.0.jar [19] -- lib/com.twitter-util-stats_2.11-6.43.0.jar [19] -- lib/com.twitter-util-tunable_2.11-6.43.0.jar [20] -- lib/io.dropwizard.metrics-metrics-core-3.1.0.jar [21] -- lib/io.dropwizard.metrics-metrics-graphite-3.1.0.jar [21] -- lib/io.dropwizard.metrics-metrics-jvm-3.1.0.jar [21] -- lib/io.netty-netty-3.10.1.Final.jar [22] -- lib/io.netty-netty-all-4.1.22.Final.jar [23] -- lib/io.prometheus-simpleclient-0.0.21.jar [24] -- lib/io.prometheus-simpleclient_common-0.0.21.jar [24] -- lib/io.prometheus-simpleclient_hotspot-0.0.21.jar [24] -- lib/io.prometheus-simpleclient_servlet-0.0.21.jar [24] -- lib/io.vertx-vertx-auth-common-3.4.1.jar [25] -- lib/io.vertx-vertx-core-3.4.1.jar [26] -- lib/io.vertx-vertx-web-3.4.1.jar [27] -- lib/log4j-log4j-1.2.17.jar [29] -- lib/net.java.dev.jna-jna-3.2.7.jar [30] -- lib/org.apache.commons-commons-collections4-4.1.jar [31] -- lib/org.apache.commons-commons-lang3-3.6.jar [32] -- lib/org.apache.zookeeper-zookeeper-3.4.13.jar [33] -- lib/org.eclipse.jetty-jetty-http-9.4.5.v20170502.jar [34] -- lib/org.eclipse.jetty-jetty-io-9.4.5.v20170502.jar [34] -- lib/org.eclipse.jetty-jetty-security-9.4.5.v20170502.jar [34] -- lib/org.eclipse.jetty-jetty-server-9.4.5.v20170502.jar [34] -- lib/org.eclipse.jetty-jetty-servlet-9.4.5.v20170502.jar [34] -- lib/org.eclipse.jetty-jetty-util-9.4.5.v20170502.jar [34] -- lib/org.rocksdb-rocksdbjni-5.13.1.jar [35] -- lib/com.beust-jcommander-1.48.jar [36] -- lib/com.yahoo.datasketches-memory-0.8.3.jar [37] -- lib/com.yahoo.datasketches-sketches-core-0.8.3.jar [37] -- lib/net.jpountz.lz4-lz4-1.3.0.jar [38] -- lib/org.codehaus.jackson-jackson-core-asl-1.9.11.jar [39] -- lib/org.codehaus.jackson-jackson-mapper-asl-1.9.11.jar [40] -- lib/com.google.api.grpc-proto-google-common-protos-1.0.0.jar [41] -- lib/com.google.code.gson-gson-2.7.jar [42] -- lib/io.opencensus-opencensus-api-0.11.0.jar [43] -- lib/io.opencensus-opencensus-contrib-grpc-metrics-0.11.0.jar [43] -- lib/com.squareup.okhttp-okhttp-2.5.0.jar [44] -- lib/com.squareup.okio-okio-1.13.0.jar [45] -- lib/io.grpc-grpc-all-1.12.0.jar [46] -- lib/io.grpc-grpc-auth-1.12.0.jar [46] -- lib/io.grpc-grpc-context-1.12.0.jar [46] -- lib/io.grpc-grpc-core-1.12.0.jar [46] -- lib/io.grpc-grpc-netty-1.12.0.jar [46] -- lib/io.grpc-grpc-okhttp-1.12.0.jar [46] -- lib/io.grpc-grpc-protobuf-1.12.0.jar [46] -- lib/io.grpc-grpc-protobuf-lite-1.12.0.jar [46] -- lib/io.grpc-grpc-protobuf-nano-1.12.0.jar [46] -- lib/io.grpc-grpc-stub-1.12.0.jar [46] -- lib/io.grpc-grpc-testing-1.12.0.jar [46] -- lib/org.apache.curator-curator-client-4.0.1.jar [47] -- lib/org.apache.curator-curator-framework-4.0.1.jar [47] -- lib/org.apache.curator-curator-recipes-4.0.1.jar [47] -- lib/org.inferred-freebuilder-1.14.9.jar [48] -- lib/com.google.errorprone-error_prone_annotations-2.1.2.jar [49] -- lib/org.apache.yetus-audience-annotations-0.5.0.jar [50] - -[1] Source available at https://github.com/FasterXML/jackson-annotations/tree/jackson-annotations-2.8.9 -[2] Source available at https://github.com/FasterXML/jackson-core/tree/jackson-core-2.8.9 -[3] Source available at https://github.com/FasterXML/jackson-databind/tree/jackson-databind-2.8.9 -[4] Source available at https://github.com/FasterXML/jackson-modules-base/tree/jackson-modules-base-2.8.4 -[5] Source available at https://github.com/FasterXML/jackson-module-scala/tree/f9e099 -[6] Source available at https://github.com/google/guava/tree/v21.0 -[7] Source available at https://git-wip-us.apache.org/repos/asf?p=commons-cli.git;a=tag;h=bc8f0e -[8] Source available at http://svn.apache.org/viewvc/commons/proper/codec/tags/1_6/ -[9] Source available at https://github.com/ben-manes/caffeine/tree/v2.3.4 -[10] Source available at http://svn.apache.org/viewvc/commons/proper/configuration/tags/CONFIGURATION_1_10/ -[11] Source available at https://git-wip-us.apache.org/repos/asf?p=commons-io.git;a=tag;h=603579 -[12] Source available at https://git-wip-us.apache.org/repos/asf?p=commons-lang.git;a=tag;h=375459 -[13] Source available at http://svn.apache.org/viewvc/commons/proper/logging/tags/commons-logging-1.1.1/ -[14] Source available at https://github.com/twitter/finagle/tree/finagle-6.44.0 -[15] Source available at https://github.com/twitter/finagle/tree/finagle-6.43.0 -[16] Source unavailable -[17] Source available at https://github.com/twitter/scrooge/tree/scrooge-4.16.0 -[18] Source available at https://github.com/twitter/twitter-server/tree/twitter-server-1.29.0 -[19] Source available at https://github.com/twitter/util/tree/util-6.43.0 -[20] Source available at https://github.com/twitter/util/tree/util-6.43.0 -[21] Source available at https://github.com/dropwizard/metrics/tree/v3.1.0 -[22] Source available at https://bintray.com/netty/downloads/download_file?file_path=netty-3.10.1.Final-dist.tar.bz2 -[23] Source available at https://github.com/netty/netty/tree/netty-4.1.22.Final -[24] Source available at https://github.com/prometheus/client_java/tree/parent-0.0.21 -[25] Source available at https://github.com/vert-x3/vertx-auth/tree/3.4.1 -[26] Source available at https://github.com/eclipse/vert.x/tree/3.4.1 -[27] Source available at https://github.com/vert-x3/vertx-web/tree/3.4.1 -[29] Source available at http://logging.apache.org/log4j/1.2/download.html -[30] Source available at https://github.com/java-native-access/jna/tree/3.2.7 -[31] Source available at https://git-wip-us.apache.org/repos/asf?p=commons-collections.git;a=tag;h=a3a5ad -[32] Source available at https://git-wip-us.apache.org/repos/asf?p=commons-lang.git;a=tag;h=3ad2e8 -[33] Source available at https://github.com/apache/zookeeper/tree/release-3.4.13 -[34] Source available at https://github.com/eclipse/jetty.project/tree/jetty-9.4.5.v20170502 -[35] Source available at https://github.com/facebook/rocksdb/tree/v5.13.1 -[36] Source available at https://github.com/cbeust/jcommander/tree/jcommander-1.48 -[37] Source available at https://github.com/DataSketches/sketches-core/tree/sketches-0.8.3 -[38] Source available at https://github.com/lz4/lz4-java/tree/1.3.0 -[39] Source available at https://github.com/codehaus/jackson/tree/1.9 -[40] Source available at https://github.com/codehaus/jackson/tree/1.9 -[41] Source available at https://github.com/googleapis/googleapis -[42] Source available at https://github.com/google/gson/tree/gson-parent-2.7 -[43] Source available at https://github.com/census-instrumentation/opencensus-java/tree/v0.11.0 -[44] Source available at https://github.com/square/okhttp/tree/parent-2.5.0 -[45] Source available at https://github.com/square/okio/tree/okio-parent-1.13.0 -[46] Source available at https://github.com/grpc/grpc-java/tree/v1.12.0 -[47] Source available at https://github.com/apache/curator/tree/apache-curator-4.0.1 -[48] Source available at https://github.com/inferred/FreeBuilder/tree/v1.14.9 -[49] Source available at https://github.com/google/error-prone/tree/v2.1.2 -[50] Source available at https://github.com/apache/yetus/tree/rel/0.5.0 - +- lib/com.fasterxml.jackson.core-jackson-annotations-2.17.1.jar [1] +- lib/com.fasterxml.jackson.core-jackson-core-2.17.1.jar [2] +- lib/com.fasterxml.jackson.core-jackson-databind-2.17.1.jar [3] +- lib/com.google.guava-guava-32.0.1-jre.jar [4] +- lib/com.google.guava-failureaccess-1.0.1.jar [4] +- lib/com.google.guava-listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar [4] +- lib/commons-cli-commons-cli-1.2.jar [5] +- lib/commons-codec-commons-codec-1.6.jar [6] +- lib/commons-configuration-commons-configuration-1.10.jar [7] +- lib/commons-io-commons-io-2.7.jar [8] +- lib/commons-lang-commons-lang-2.6.jar [9] +- lib/commons-logging-commons-logging-1.1.1.jar [10] +- lib/io.netty-netty-buffer-4.1.111.Final.jar [11] +- lib/io.netty-netty-codec-4.1.111.Final.jar [11] +- lib/io.netty-netty-codec-dns-4.1.111.Final.jar [11] +- lib/io.netty-netty-codec-http-4.1.111.Final.jar [11] +- lib/io.netty-netty-codec-http2-4.1.111.Final.jar [11] +- lib/io.netty-netty-codec-socks-4.1.111.Final.jar [11] +- lib/io.netty-netty-common-4.1.111.Final.jar [11] +- lib/io.netty-netty-handler-4.1.111.Final.jar [11] +- lib/io.netty-netty-handler-proxy-4.1.111.Final.jar [11] +- lib/io.netty-netty-resolver-4.1.111.Final.jar [11] +- lib/io.netty-netty-resolver-dns-4.1.111.Final.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-linux-aarch_64.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-linux-x86_64.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-osx-aarch_64.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-osx-x86_64.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-windows-x86_64.jar [11] +- lib/io.netty-netty-tcnative-classes-2.0.65.Final.jar [11] +- lib/io.netty-netty-transport-4.1.111.Final.jar [11] +- lib/io.netty-netty-transport-classes-epoll-4.1.111.Final.jar [11] +- lib/io.netty-netty-transport-native-epoll-4.1.111.Final-linux-aarch_64.jar [11] +- lib/io.netty-netty-transport-native-epoll-4.1.111.Final-linux-x86_64.jar [11] +- lib/io.netty.incubator-netty-incubator-transport-native-io_uring-0.0.25.Final-linux-x86_64.jar [11] +- lib/io.netty.incubator-netty-incubator-transport-native-io_uring-0.0.25.Final-linux-aarch_64.jar [11] +- lib/io.netty.incubator-netty-incubator-transport-classes-io_uring-0.0.25.Final.jar [11] +- lib/io.netty-netty-transport-native-unix-common-4.1.111.Final.jar [11] +- lib/io.prometheus-simpleclient-0.15.0.jar [12] +- lib/io.prometheus-simpleclient_common-0.15.0.jar [12] +- lib/io.prometheus-simpleclient_hotspot-0.15.0.jar [12] +- lib/io.prometheus-simpleclient_servlet-0.15.0.jar [12] +- lib/io.prometheus-simpleclient_servlet_common-0.15.0.jar [12] +- lib/io.prometheus-simpleclient_tracer_common-0.15.0.jar [12] +- lib/io.prometheus-simpleclient_tracer_otel-0.15.0.jar [12] +- lib/io.prometheus-simpleclient_tracer_otel_agent-0.15.0.jar [12] +- lib/io.vertx-vertx-auth-common-4.5.7.jar [13] +- lib/io.vertx-vertx-bridge-common-4.5.7.jar [14] +- lib/io.vertx-vertx-core-4.5.7.jar [15] +- lib/io.vertx-vertx-web-4.5.7.jar [16] +- lib/io.vertx-vertx-web-common-4.5.7.jar [16] +- lib/org.apache.logging.log4j-log4j-api-2.23.1.jar [17] +- lib/org.apache.logging.log4j-log4j-core-2.23.1.jar [17] +- lib/org.apache.logging.log4j-log4j-slf4j2-impl-2.23.1.jar [17] +- lib/org.apache.commons-commons-collections4-4.1.jar [19] +- lib/org.apache.commons-commons-lang3-3.6.jar [20] +- lib/org.apache.zookeeper-zookeeper-3.8.4.jar [21] +- lib/org.apache.zookeeper-zookeeper-jute-3.8.4.jar [21] +- lib/org.apache.zookeeper-zookeeper-3.8.4-tests.jar [21] +- lib/org.eclipse.jetty-jetty-http-9.4.53.v20231009.jar [22] +- lib/org.eclipse.jetty-jetty-io-9.4.53.v20231009.jar [22] +- lib/org.eclipse.jetty-jetty-security-9.4.53.v20231009.jar [22] +- lib/org.eclipse.jetty-jetty-server-9.4.53.v20231009.jar [22] +- lib/org.eclipse.jetty-jetty-servlet-9.4.53.v20231009.jar [22] +- lib/org.eclipse.jetty-jetty-util-9.4.53.v20231009.jar [22] +- lib/org.eclipse.jetty-jetty-util-ajax-9.4.53.v20231009.jar [22] +- lib/org.rocksdb-rocksdbjni-7.10.2.jar [23] +- lib/com.beust-jcommander-1.82.jar [24] +- lib/com.yahoo.datasketches-memory-0.8.3.jar [25] +- lib/com.yahoo.datasketches-sketches-core-0.8.3.jar [25] +- lib/net.jpountz.lz4-lz4-1.3.0.jar [26] +- lib/com.google.api.grpc-proto-google-common-protos-2.29.0.jar [28] +- lib/com.google.code.gson-gson-2.10.1.jar [29] +- lib/io.opencensus-opencensus-api-0.31.1.jar [30] +- lib/io.opencensus-opencensus-contrib-http-util-0.31.1.jar [30] +- lib/io.opencensus-opencensus-proto-0.2.0.jar [30] +- lib/io.grpc-grpc-all-1.64.0.jar [33] +- lib/io.grpc-grpc-alts-1.64.0.jar [33] +- lib/io.grpc-grpc-api-1.64.0.jar [33] +- lib/io.grpc-grpc-auth-1.64.0.jar [33] +- lib/io.grpc-grpc-context-1.64.0.jar [33] +- lib/io.grpc-grpc-core-1.64.0.jar [33] +- lib/io.grpc-grpc-grpclb-1.64.0.jar [33] +- lib/io.grpc-grpc-inprocess-1.64.0.jar [33] +- lib/io.grpc-grpc-opentelemetry-1.64.0.jar [33] +- lib/io.grpc-grpc-netty-shaded-1.64.0.jar [33] +- lib/io.grpc-grpc-protobuf-1.64.0.jar [33] +- lib/io.grpc-grpc-protobuf-lite-1.64.0.jar [33] +- lib/io.grpc-grpc-services-1.64.0.jar [33] +- lib/io.grpc-grpc-stub-1.64.0.jar [33] +- lib/io.grpc-grpc-testing-1.64.0.jar [33] +- lib/io.grpc-grpc-util-1.64.0.jar [33] +- lib/io.grpc-grpc-xds-1.64.0.jar [33] +- lib/io.grpc-grpc-rls-1.64.0.jar[33] +- lib/org.apache.curator-curator-client-5.1.0.jar [34] +- lib/org.apache.curator-curator-framework-5.1.0.jar [34] +- lib/org.apache.curator-curator-recipes-5.1.0.jar [34] +- lib/com.google.errorprone-error_prone_annotations-2.9.0.jar [36] +- lib/org.apache.yetus-audience-annotations-0.12.0.jar [37] +- lib/org.jctools-jctools-core-2.1.2.jar [38] +- lib/org.apache.httpcomponents-httpclient-4.5.13.jar [39] +- lib/org.apache.httpcomponents-httpcore-4.4.15.jar [40] +- lib/org.apache.thrift-libthrift-0.14.2.jar [41] +- lib/com.google.android-annotations-4.1.1.4.jar [42] +- lib/com.google.http-client-google-http-client-1.43.3.jar [43] +- lib/com.google.http-client-google-http-client-gson-1.43.3.jar [43] +- lib/com.google.auto.value-auto-value-annotations-1.10.4.jar [44] +- lib/com.google.j2objc-j2objc-annotations-2.8.jar [45] +- lib/com.google.re2j-re2j-1.7.jar [46] +- lib/io.dropwizard.metrics-metrics-core-4.1.12.1.jar [47] +- lib/io.dropwizard.metrics-metrics-graphite-4.1.12.1.jar [47] +- lib/io.dropwizard.metrics-metrics-jmx-4.1.12.1.jar [47] +- lib/io.dropwizard.metrics-metrics-jvm-4.1.12.1.jar [47] +- lib/io.perfmark-perfmark-api-0.26.0.jar [48] +- lib/org.conscrypt-conscrypt-openjdk-uber-2.5.2.jar [49] +- lib/org.xerial.snappy-snappy-java-1.1.10.5.jar [50] +- lib/io.reactivex.rxjava3-rxjava-3.0.1.jar [51] +- lib/org.hdrhistogram-HdrHistogram-2.1.10.jar [52] +- lib/com.carrotsearch-hppc-0.9.1.jar [53] +- lib/com.squareup.okhttp3-okhttp-4.12.0.jar [54] +- lib/com.squareup.okio-okio-3.6.0.jar [54] +- lib/com.squareup.okio-okio-jvm-3.6.0.jar [54] +- lib/io.opentelemetry-opentelemetry-api-1.26.0.jar [55] +- lib/io.opentelemetry-opentelemetry-api-events-1.26.0-alpha.jar [55] +- lib/io.opentelemetry-opentelemetry-api-logs-1.26.0-alpha.jar [55] +- lib/io.opentelemetry-opentelemetry-context-1.26.0.jar [55] +- lib/io.opentelemetry-opentelemetry-exporter-common-1.26.0.jar [55] +- lib/io.opentelemetry-opentelemetry-exporter-otlp-1.26.0.jar [55] +- lib/io.opentelemetry-opentelemetry-exporter-otlp-common-1.26.0.jar [55] +- lib/io.opentelemetry-opentelemetry-exporter-prometheus-1.26.0-alpha.jar [55] +- lib/io.opentelemetry-opentelemetry-extension-incubator-1.26.0-alpha.jar [55] +- lib/io.opentelemetry-opentelemetry-sdk-1.26.0.jar [55] +- lib/io.opentelemetry-opentelemetry-sdk-common-1.26.0.jar [55] +- lib/io.opentelemetry-opentelemetry-sdk-extension-autoconfigure-1.26.0-alpha.jar [55] +- lib/io.opentelemetry-opentelemetry-sdk-extension-autoconfigure-spi-1.26.0.jar [55] +- lib/io.opentelemetry-opentelemetry-sdk-logs-1.26.0-alpha.jar [55] +- lib/io.opentelemetry-opentelemetry-sdk-metrics-1.26.0.jar [55] +- lib/io.opentelemetry-opentelemetry-sdk-trace-1.26.0.jar [55] +- lib/io.opentelemetry-opentelemetry-semconv-1.26.0-alpha.jar [55] +- lib/io.opentelemetry.instrumentation-opentelemetry-instrumentation-api-1.26.0.jar [55] +- lib/io.opentelemetry.instrumentation-opentelemetry-instrumentation-api-semconv-1.26.0-alpha.jar [55] +- lib/io.opentelemetry.instrumentation-opentelemetry-runtime-metrics-1.26.0-alpha.jar [54] +- lib/org.jetbrains-annotations-13.0.jar [56] +- lib/org.jetbrains.kotlin-kotlin-stdlib-1.8.21.jar [56] +- lib/org.jetbrains.kotlin-kotlin-stdlib-common-1.8.21.jar [56] +- lib/org.jetbrains.kotlin-kotlin-stdlib-jdk7-1.8.21.jar [56] +- lib/org.jetbrains.kotlin-kotlin-stdlib-jdk8-1.8.21.jar [56] +- lib/com.lmax-disruptor-4.0.0.jar [57] + +[1] Source available at https://github.com/FasterXML/jackson-annotations/tree/jackson-annotations-2.17.1 +[2] Source available at https://github.com/FasterXML/jackson-core/tree/jackson-core-2.17.1 +[3] Source available at https://github.com/FasterXML/jackson-databind/tree/jackson-databind-2.17.1 +[4] Source available at https://github.com/google/guava/tree/v32.0.1 +[5] Source available at https://github.com/apache/commons-cli/tree/cli-1.2 +[6] Source available at https://github.com/apache/commons-codec/tree/commons-codec-1.6-RC2 +[7] Source available at https://github.com/apache/commons-configuration/tree/CONFIGURATION_1_10 +[8] Source available at https://github.com/apache/commons-io/tree/rel/commons-io-2.7 +[9] Source available at https://github.com/apache/commons-lang/tree/LANG_2_6 +[10] Source available at https://github.com/apache/commons-logging/tree/commons-logging-1.1.1 +[11] Source available at https://github.com/netty/netty/tree/netty-4.1.111.Final +[12] Source available at https://github.com/prometheus/client_java/tree/parent-0.15.0 +[13] Source available at https://github.com/vert-x3/vertx-auth/tree/4.3.2 +[14] Source available at https://github.com/vert-x3/vertx-bridge-common/tree/4.3.2 +[15] Source available at https://github.com/eclipse/vert.x/tree/4.3.2 +[16] Source available at https://github.com/vert-x3/vertx-web/tree/4.3.2 +[17] Source available at https://github.com/apache/logging-log4j2/tree/rel/2.23.1 +[19] Source available at https://github.com/apache/commons-collections/tree/collections-4.1 +[20] Source available at https://github.com/apache/commons-lang/tree/LANG_3_6 +[21] Source available at https://github.com/apache/zookeeper/tree/release-3.8.0 +[22] Source available at https://github.com/eclipse/jetty.project/tree/jetty-9.4.48.v20220622 +[23] Source available at https://github.com/facebook/rocksdb/tree/v7.10.2 +[24] Source available at https://github.com/cbeust/jcommander/tree/1.82 +[25] Source available at https://github.com/DataSketches/sketches-core/tree/sketches-0.8.3 +[26] Source available at https://github.com/lz4/lz4-java/tree/1.3.0 +[28] Source available at https://github.com/googleapis/java-common-protos/tree/v2.29.0 +[29] Source available at https://github.com/google/gson/tree/gson-parent-2.10.1 +[30] Source available at https://github.com/census-instrumentation/opencensus-java/tree/v0.31.1 +[33] Source available at https://github.com/grpc/grpc-java/tree/v1.64.0 +[34] Source available at https://github.com/apache/curator/releases/tag/apache.curator-5.1.0 +[36] Source available at https://github.com/google/error-prone/tree/v2.9.0 +[37] Source available at https://github.com/apache/yetus/tree/rel/0.12.0 +[38] Source available at https://github.com/JCTools/JCTools/tree/v2.1.2 +[39] Source available at https://github.com/apache/httpcomponents-client/tree/rel/v4.5.13 +[40] Source available at https://github.com/apache/httpcomponents-core/tree/rel/v4.4.15 +[41] Source available at https://github.com/apache/thrift/tree/0.14.2 +[42] Source available at https://source.android.com/ +[43] Source available at https://github.com/googleapis/google-http-java-client/releases/tag/v1.43.3 +[44] Source available at https://github.com/google/auto/releases/tag/auto-value-1.10.4 +[45] Source available at https://github.com/google/j2objc/releases/tag/1.3 +[46] Source available at https://github.com/google/re2j/releases/tag/re2j-1.7 +[47] Source available at https://github.com/dropwizard/metrics/releases/tag/v4.1.12.1 +[48] Source available at https://github.com/perfmark/perfmark/releases/tag/v0.26.0 +[49] Source available at https://github.com/google/conscrypt/releases/tag/2.5.2 +[50] Source available at https://github.com/xerial/snappy-java/releases/tag/v1.1.10.5 +[51] Source available at https://github.com/ReactiveX/RxJava/tree/v3.0.1 +[52] Source available at https://github.com/HdrHistogram/HdrHistogram/tree/HdrHistogram-2.1.10 +[53] Source available at https://github.com/carrotsearch/hppc/tree/0.9.1 +[54] Source available at https://github.com/square/okio/releases/tag/parent-3.6.0 +[55] Source available at https://github.com/open-telemetry/opentelemetry-java/releases/tag/v1.26.0 +[56] Source available at https://github.com/JetBrains/kotlin/releases/tag/v1.8.21 +[57] Source available at https://github.com/LMAX-Exchange/disruptor/releases/tag/4.0.0 ------------------------------------------------------------------------------------ -lib/io.netty-netty-3.10.1.Final.jar contains the extensions to Java Collections Framework which has +lib/io.netty-netty-codec-4.1.111.Final.jar bundles some 3rd party dependencies + +lib/io.netty-netty-codec-4.1.111.Final.jar contains the extensions to Java Collections Framework which has been derived from the works by JSR-166 EG, Doug Lea, and Jason T. Greene: * LICENSE: - * deps/netty-3.10.1.Final/LICENSE.jsr166y.txt (Public Domain) + * deps/netty/LICENSE.jsr166y.txt (Public Domain) * HOMEPAGE: * http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/ * http://viewvc.jboss.org/cgi-bin/viewvc.cgi/jbosscache/experimental/jsr166/ -lib/io.netty-netty-3.10.1.Final.jar contains a modified version of Robert Harder's Public Domain +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified version of Robert Harder's Public Domain Base64 Encoder and Decoder, which can be obtained at: * LICENSE: - * deps/netty-3.10.1.Final/LICENSE.base64.txt (Public Domain) + * deps/netty/LICENSE.base64.txt (Public Domain) * HOMEPAGE: * http://iharder.sourceforge.net/current/java/base64/ -lib/io.netty-netty-3.10.1.Final.jar contains a modified version of 'JZlib', a re-implementation of -zlib in pure Java, which can be obtained at: +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of 'Webbit', an event based +WebSocket and HTTP server, which can be obtained at: * LICENSE: - * deps/netty-3.10.1.Final/LICENSE.jzlib.txt (BSD Style License) + * deps/netty/LICENSE.webbit.txt (BSD License) * HOMEPAGE: - * http://www.jcraft.com/jzlib/ + * https://github.com/joewalnes/webbit -lib/io.netty-netty-3.10.1.Final.jar contains a modified version of 'Webbit', a Java event based -WebSocket and HTTP server: +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of 'SLF4J', a simple logging +facade for Java, which can be obtained at: * LICENSE: - * deps/netty-3.10.1.Final/LICENSE.webbit.txt (BSD License) + * deps/netty/LICENSE.slf4j.txt (MIT License) * HOMEPAGE: - * https://github.com/joewalnes/webbit + * http://www.slf4j.org/ ------------------------------------------------------------------------------------- -lib/io.netty-netty-all-4.1.22.Final.jar bundles some 3rd party dependencies +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of 'Apache Harmony', an open source +Java SE, which can be obtained at: -lib/io.netty-netty-all-4.1.22.Final.jar contains the extensions to Java Collections Framework which has -been derived from the works by JSR-166 EG, Doug Lea, and Jason T. Greene: + * NOTICE: + * deps/netty/NOTICE.harmony.txt + * LICENSE: + * deps/netty/LICENSE.harmony.txt (Apache License 2.0) + * HOMEPAGE: + * http://archive.apache.org/dist/harmony/ + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of 'jbzip2', a Java bzip2 compression +and decompression library written by Matthew J. Francis. It can be obtained at: * LICENSE: - * deps/netty-4.1.22.Final/LICENSE.jsr166y.txt (Public Domain) + * deps/netty/LICENSE.jbzip2.txt (MIT License) * HOMEPAGE: - * http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/ - * http://viewvc.jboss.org/cgi-bin/viewvc.cgi/jbosscache/experimental/jsr166/ + * https://code.google.com/p/jbzip2/ -lib/io.netty-netty-all-4.1.22.Final.jar contains a modified version of Robert Harder's Public Domain -Base64 Encoder and Decoder, which can be obtained at: +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of 'libdivsufsort', a C API library to construct +the suffix array and the Burrows-Wheeler transformed string for any input string of +a constant-size alphabet written by Yuta Mori. It can be obtained at: * LICENSE: - * deps/netty-4.1.22.Final/LICENSE.base64.txt (Public Domain) + * deps/netty/LICENSE.libdivsufsort.txt (MIT License) * HOMEPAGE: - * http://iharder.sourceforge.net/current/java/base64/ + * https://github.com/y-256/libdivsufsort -lib/io.netty-netty-all-4.1.22.Final.jar contains a modified portion of 'Webbit', an event based -WebSocket and HTTP server, which can be obtained at: +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of Nitsan Wakart's 'JCTools', +Java Concurrency Tools for the JVM, which can be obtained at: * LICENSE: - * deps/netty-4.1.22.Final/LICENSE.webbit.txt (BSD License) + * deps/netty/LICENSE.jctools.txt (ASL2 License) * HOMEPAGE: - * https://github.com/joewalnes/webbit + * https://github.com/JCTools/JCTools -lib/io.netty-netty-all-4.1.22.Final.jar contains a modified portion of 'SLF4J', a simple logging -facade for Java, which can be obtained at: +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'JZlib', a re-implementation of zlib in +pure Java, which can be obtained at: * LICENSE: - * deps/netty-4.1.22.Final/LICENSE.slf4j.txt (MIT License) + * deps/netty/LICENSE.jzlib.txt (BSD style License) * HOMEPAGE: - * http://www.slf4j.org/ + * http://www.jcraft.com/jzlib/ -lib/io.netty-netty-all-4.1.22.Final.jar contains a modified portion of 'jbzip2', a Java bzip2 compression -and decompression library written by Matthew J. Francis. It can be obtained at: +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'Compress-LZF', a Java library for encoding and +decoding data in LZF format, written by Tatu Saloranta. It can be obtained at: * LICENSE: - * deps/netty-4.1.22.Final/LICENSE.jbzip2.txt (MIT License) + * deps/netty/LICENSE.compress-lzf.txt (Apache License 2.0) * HOMEPAGE: - * https://code.google.com/p/jbzip2/ + * https://github.com/ning/compress -lib/io.netty-netty-all-4.1.22.Final.jar contains a modified portion of 'libdivsufsort', a C API library to construct -the suffix array and the Burrows-Wheeler transformed string for any input string of -a constant-size alphabet written by Yuta Mori. It can be obtained at: +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'lz4', a LZ4 Java compression +and decompression library written by Adrien Grand. It can be obtained at: * LICENSE: - * deps/netty-4.1.22.Final/LICENSE.libdivsufsort.txt (MIT License) + * deps/netty/LICENSE.lz4.txt (Apache License 2.0) * HOMEPAGE: - * https://github.com/y-256/libdivsufsort + * https://github.com/jpountz/lz4-java -lib/io.netty-netty-all-4.1.22.Final.jar contains a modified portion of 'jfastlz', a Java port of FastLZ compression +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'lzma-java', a LZMA Java compression +and decompression library, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.lzma-java.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/jponge/lzma-java + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of 'jfastlz', a Java port of FastLZ compression and decompression library written by William Kinney. It can be obtained at: * LICENSE: - * deps/netty-4.1.22.Final/LICENSE.jfastlz.txt (MIT License) + * deps/netty/LICENSE.jfastlz.txt (MIT License) * HOMEPAGE: * https://code.google.com/p/jfastlz/ -lib/io.netty-netty-all-4.1.22.Final.jar contains a modified portion of and optionally depends on 'Protocol Buffers', Google's data -interchange format, which can be obtained at: +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of and optionally depends on 'Protocol Buffers', +Google's data interchange format, which can be obtained at: * LICENSE: - * deps/netty-4.1.22.Final/LICENSE.protobuf.txt (New BSD License) + * deps/netty/LICENSE.protobuf.txt (New BSD License) * HOMEPAGE: * https://github.com/google/protobuf +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'Bouncy Castle Crypto APIs' to generate +a temporary self-signed X.509 certificate when the JVM does not provide the +equivalent functionality. It can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.bouncycastle.txt (MIT License) + * HOMEPAGE: + * http://www.bouncycastle.org/ + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'Snappy', a compression library produced +by Google Inc, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.snappy.txt (New BSD License) + * HOMEPAGE: + * https://github.com/google/snappy + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'JBoss Marshalling', an alternative Java +serialization API, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.jboss-marshalling.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/jboss-remoting/jboss-marshalling + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'Caliper', Google's micro- +benchmarking framework, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.caliper.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/google/caliper + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'Apache Commons Logging', a logging +framework, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.commons-logging.txt (Apache License 2.0) + * HOMEPAGE: + * http://commons.apache.org/logging/ + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'Apache Log4J', a logging framework, which +can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.log4j.txt (Apache License 2.0) + * HOMEPAGE: + * http://logging.apache.org/log4j/ + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'Aalto XML', an ultra-high performance +non-blocking XML processor, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.aalto-xml.txt (Apache License 2.0) + * HOMEPAGE: + * http://wiki.fasterxml.com/AaltoHome + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified version of 'HPACK', a Java implementation of +the HTTP/2 HPACK algorithm written by Twitter. It can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.hpack.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/twitter/hpack + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified version of 'HPACK', a Java implementation of +the HTTP/2 HPACK algorithm written by Cory Benfield. It can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.hyper-hpack.txt (MIT License) + * HOMEPAGE: + * https://github.com/python-hyper/hpack/ + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified version of 'HPACK', a Java implementation of +the HTTP/2 HPACK algorithm written by Tatsuhiro Tsujikawa. It can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.nghttp2-hpack.txt (MIT License) + * HOMEPAGE: + * https://github.com/nghttp2/nghttp2/ + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of 'Apache Commons Lang', a Java library +provides utilities for the java.lang API, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.commons-lang.txt (Apache License 2.0) + * HOMEPAGE: + * https://commons.apache.org/proper/commons-lang/ + + +lib/io.netty-netty-codec-4.1.111.Final.jar contains the Maven wrapper scripts from 'Maven Wrapper', +that provides an easy way to ensure a user has everything necessary to run the Maven build. + + * LICENSE: + * deps/netty/LICENSE.mvn-wrapper.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/takari/maven-wrapper + +lib/io.netty-netty-codec-4.1.111.Final.jar contains the dnsinfo.h header file, +that provides a way to retrieve the system DNS configuration on MacOS. +This private header is also used by Apple's open source + mDNSResponder (https://opensource.apple.com/tarballs/mDNSResponder/). + + * LICENSE: + * deps/netty/LICENSE.dnsinfo.txt (Apache License 2.0) + * HOMEPAGE: + * http://www.opensource.apple.com/source/configd/configd-453.19/dnsinfo/dnsinfo.h + ------------------------------------------------------------------------------------ -lib/org.rocksdb-rocksdbjni-5.13.1.jar is derived from leveldb, which is under the following license. +lib/org.rocksdb-rocksdbjni-7.10.2.jar is derived from leveldb, which is under the following license. Copyright (c) 2011 The LevelDB Authors. All rights reserved. @@ -492,67 +672,54 @@ a "3-clause BSD" license. For details, see deps/jsr-305/LICENSE. Bundled as lib/com.google.code.findbugs-jsr305-3.0.2.jar Source available at https://storage.googleapis.com/google-code-archive-source/v2/code.google.com/jsr-305/source-archive.zip ------------------------------------------------------------------------------------ -This product bundles Google Protocal Buffers, which is available under a "3-clause BSD" +This product bundles Google Protocol Buffers, which is available under a "3-clause BSD" license. Bundled as - - lib/com.google.protobuf-protobuf-java-3.5.1.jar - - lib/com.google.protobuf-protobuf-java-util-3.5.1.jar -Source available at https://github.com/google/protobuf/tree/v3.5.1 -For details, see deps/protobuf-3.5.1/LICENSE. - -Bundled as lib/com.google.protobuf.nano-protobuf-javanano-3.0.0-alpha-5.jar -Source available at https://github.com/google/protobuf/tree/3.0.0-pre -For details, see deps/protobuf-3.0.0/LICENSE. ------------------------------------------------------------------------------------- -This product bundles Paranamer, which is available under a "3-clause BSD" license. -For details, see deps/paranamer-2.8/LICENSE.txt. + - lib/com.google.protobuf-protobuf-java-3.25.1.jar +Source available at https://github.com/google/protobuf/tree/v3.25.1 +For details, see deps/protobuf-3.14.0/LICENSE. -Bundled as lib/com.thoughtworks.paranamer-paranamer-2.8.jar -Source available at https://github.com/paul-hammant/paranamer/tree/paranamer-parent-2.8 +Bundled as + - lib/com.google.protobuf-protobuf-java-util-3.25.1.jar +Source available at https://github.com/protocolbuffers/protobuf/tree/v3.25.1 +For details, see deps/protobuf-3.12.0/LICENSE. ------------------------------------------------------------------------------------ This product bundles the JCP Standard Java Servlet API, which is available under a -CDDL 1.1 license. For details, see deps/javax.servlet-api-3.1.0/CDDL+GPL-1.1. - -Bundled as lib/javax.servlet-javax.servlet-api-3.1.0.jar -Source available at https://github.com/javaee/servlet-spec/tree/3.1.0 ------------------------------------------------------------------------------------- -This product bundles the Scala Standard Parser Combinator Library, which is available -under a "3-clause BSD" license. For details, see deps/scala-parser-combinators_2.11-1.0.4/LICENSE.md. - -Bundled as lib/org.scala-lang.modules-scala-parser-combinators_2.11-1.0.4.jar -Source available at https://github.com/scala/scala-parser-combinators/tree/v1.0.4 ------------------------------------------------------------------------------------- -This product bundles the Scala Standard Library, which is available under a -"3-clause BSD" license. For details, see deps/scala-library-2.11.7/LICENSE.md. - -Bundled as lib/org.scala-lang-scala-library-2.11.8.jar -Source available at org.scala-lang-scala-library-2.11.8.jar ------------------------------------------------------------------------------------- -This product bundles the Scala Reflection API, which is available under a -"3-clause BSD" license. For details, see deps/scala-reflect-2.11.8/LICENSE.md. +CDDL 1.1 license. For details, see deps/javax.servlet-api-4.0.0/CDDL+GPL-1.1. -Bundled as lib/org.scala-lang-scala-reflect-2.11.8.jar -Source available at https://github.com/scala/scala/tree/v2.11.8 +Bundled as lib/javax.servlet-javax.servlet-api-4.0.0.jar +Source available at https://github.com/javaee/servlet-spec/tree/4.0.0 ------------------------------------------------------------------------------------ This product bundles Simple Logging Facade for Java, which is available under a -MIT license. For details, see deps/slf4j-1.7.25/LICENSE.txt. +MIT license. For details, see deps/slf4j-2.0.12/LICENSE.txt. Bundled as - - lib/org.slf4j-slf4j-api-1.7.25.jar - - lib/org.slf4j-slf4j-log4j12-1.7.25.jar -Source available at https://github.com/qos-ch/slf4j/tree/v_1.7.25 + - lib/org.slf4j-slf4j-api-2.0.12.jar +Source available at https://github.com/qos-ch/slf4j/tree/v_2.0.12 ------------------------------------------------------------------------------------ This product bundles the Google Auth Library, which is available under a "3-clause BSD" -license. For details, see deps/google-auth-library-credentials-0.9.0/LICENSE +license. For details, see deps/google-auth-library-credentials-0.20.0/LICENSE Bundled as - - lib/com.google.auth-google-auth-library-credentials-0.9.0.jar -Source available at https://github.com/google/google-auth-library-java/tree/0.9.0 + - lib/com.google.auth-google-auth-library-credentials-1.22.0.jar + - lib/com.google.auth-google-auth-library-oauth2-http-1.22.0.jar +Source available at https://github.com/googleapis/google-auth-library-java/releases/tag/v1.22.0 ------------------------------------------------------------------------------------ -This product bundles the JLine Library, which is available under a "2-clause BSD" -license. For details, see deps/jline-0.9.94/LICENSE +This product bundles the bouncycastle Library. +For license details, see deps/bouncycastle-1.0.2.3/LICENSE.html Bundled as - - lib/jline-jline-0.9.94.jar + - lib/org.bouncycastle-bc-fips-1.0.2.5.jar +------------------------------------------------------------------------------------ +This product uses the annotations from The Checker Framework, which are licensed under +MIT License. For details, see deps/checker-qual-3.5.0/LICENSE + +Bundles as + - lib/org.checkerframework-checker-qual-3.33.0.jar +------------------------------------------------------------------------------------ +This product bundles the Reactive Streams library, which is licensed under +Public Domain (CC0). For details, see deps/reactivestreams-1.0.3/LICENSE +Bundles as + - lib/org.reactivestreams-reactive-streams-1.0.3.jar diff --git a/bookkeeper-dist/src/main/resources/LICENSE-bkctl.bin.txt b/bookkeeper-dist/src/main/resources/LICENSE-bkctl.bin.txt new file mode 100644 index 00000000000..0e2cf9f5ef8 --- /dev/null +++ b/bookkeeper-dist/src/main/resources/LICENSE-bkctl.bin.txt @@ -0,0 +1,613 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +------------------------------------------------------------------------------------ +The following bundled 3rd party jars are distributed under the +Apache Software License, Version 2. + +- lib/com.fasterxml.jackson.core-jackson-annotations-2.17.1.jar [1] +- lib/com.fasterxml.jackson.core-jackson-core-2.17.1.jar [2] +- lib/com.fasterxml.jackson.core-jackson-databind-2.17.1.jar [3] +- lib/com.google.guava-guava-32.0.1-jre.jar [4] +- lib/com.google.guava-failureaccess-1.0.1.jar [4] +- lib/com.google.guava-listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar [4] +- lib/commons-cli-commons-cli-1.2.jar [5] +- lib/commons-codec-commons-codec-1.6.jar [6] +- lib/commons-configuration-commons-configuration-1.10.jar [7] +- lib/commons-io-commons-io-2.7.jar [8] +- lib/commons-lang-commons-lang-2.6.jar [9] +- lib/commons-logging-commons-logging-1.1.1.jar [10] +- lib/io.netty-netty-buffer-4.1.111.Final.jar [11] +- lib/io.netty-netty-codec-4.1.111.Final.jar [11] +- lib/io.netty-netty-common-4.1.111.Final.jar [11] +- lib/io.netty-netty-handler-4.1.111.Final.jar [11] +- lib/io.netty-netty-resolver-4.1.111.Final.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-linux-aarch_64.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-linux-x86_64.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-osx-aarch_64.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-osx-x86_64.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-windows-x86_64.jar [11] +- lib/io.netty-netty-tcnative-classes-2.0.65.Final.jar [11] +- lib/io.netty-netty-transport-4.1.111.Final.jar [11] +- lib/io.netty-netty-transport-classes-epoll-4.1.111.Final.jar [11] +- lib/io.netty-netty-transport-native-epoll-4.1.111.Final-linux-aarch_64.jar [11] +- lib/io.netty-netty-transport-native-epoll-4.1.111.Final-linux-x86_64.jar [11] +- lib/io.netty.incubator-netty-incubator-transport-native-io_uring-0.0.25.Final-linux-x86_64.jar [11] +- lib/io.netty.incubator-netty-incubator-transport-native-io_uring-0.0.25.Final-linux-aarch_64.jar [11] +- lib/io.netty.incubator-netty-incubator-transport-classes-io_uring-0.0.25.Final.jar [11] +- lib/io.netty-netty-transport-native-unix-common-4.1.111.Final.jar [11] +- lib/org.apache.logging.log4j-log4j-api-2.23.1.jar [16] +- lib/org.apache.logging.log4j-log4j-core-2.23.1.jar [16] +- lib/org.apache.logging.log4j-log4j-slf4j2-impl-2.23.1.jar [16] +- lib/org.apache.commons-commons-collections4-4.1.jar [18] +- lib/org.apache.commons-commons-lang3-3.6.jar [19] +- lib/org.apache.zookeeper-zookeeper-3.8.4.jar [20] +- lib/org.apache.zookeeper-zookeeper-jute-3.8.4.jar [20] +- lib/org.apache.zookeeper-zookeeper-3.8.4-tests.jar [20] +- lib/com.beust-jcommander-1.82.jar [23] +- lib/net.jpountz.lz4-lz4-1.3.0.jar [25] +- lib/com.google.api.grpc-proto-google-common-protos-2.29.0.jar [27] +- lib/com.google.code.gson-gson-2.10.1.jar [28] +- lib/io.opencensus-opencensus-api-0.31.1.jar [29] +- lib/io.opencensus-opencensus-contrib-http-util-0.31.1.jar [29] +- lib/io.opencensus-opencensus-proto-0.2.0.jar [29] +- lib/io.grpc-grpc-all-1.64.0.jar [32] +- lib/io.grpc-grpc-alts-1.64.0.jar [32] +- lib/io.grpc-grpc-api-1.64.0.jar [32] +- lib/io.grpc-grpc-auth-1.64.0.jar [32] +- lib/io.grpc-grpc-context-1.64.0.jar [32] +- lib/io.grpc-grpc-core-1.64.0.jar [32] +- lib/io.grpc-grpc-grpclb-1.64.0.jar [32] +- lib/io.grpc-grpc-inprocess-1.64.0.jar [32] +- lib/io.grpc-grpc-opentelemetry-1.64.0.jar [32] +- lib/io.grpc-grpc-netty-shaded-1.64.0.jar [32] +- lib/io.grpc-grpc-protobuf-1.64.0.jar [32] +- lib/io.grpc-grpc-protobuf-lite-1.64.0.jar [32] +- lib/io.grpc-grpc-services-1.64.0.jar [32] +- lib/io.grpc-grpc-stub-1.64.0.jar [32] +- lib/io.grpc-grpc-testing-1.64.0.jar [32] +- lib/io.grpc-grpc-util-1.64.0.jar [32] +- lib/io.grpc-grpc-xds-1.64.0.jar [32] +- lib/io.grpc-grpc-rls-1.64.0.jar[32] +- lib/org.apache.curator-curator-client-5.1.0.jar [33] +- lib/org.apache.curator-curator-framework-5.1.0.jar [33] +- lib/org.apache.curator-curator-recipes-5.1.0.jar [33] +- lib/com.google.errorprone-error_prone_annotations-2.9.0.jar [35] +- lib/org.apache.yetus-audience-annotations-0.12.0.jar [36] +- lib/org.jctools-jctools-core-2.1.2.jar [37] +- lib/org.apache.httpcomponents-httpclient-4.5.13.jar [38] +- lib/org.apache.httpcomponents-httpcore-4.4.15.jar [39] +- lib/org.apache.thrift-libthrift-0.14.2.jar [40] +- lib/com.google.android-annotations-4.1.1.4.jar [41] +- lib/com.google.auto.value-auto-value-annotations-1.10.4.jar [42] +- lib/com.google.http-client-google-http-client-1.43.3.jar [43] +- lib/com.google.http-client-google-http-client-gson-1.43.3.jar [43] +- lib/com.google.j2objc-j2objc-annotations-2.8.jar [44] +- lib/com.google.re2j-re2j-1.7.jar [45] +- lib/io.dropwizard.metrics-metrics-core-4.1.12.1.jar [46] +- lib/io.perfmark-perfmark-api-0.26.0.jar [47] +- lib/org.conscrypt-conscrypt-openjdk-uber-2.5.2.jar [49] +- lib/org.xerial.snappy-snappy-java-1.1.10.5.jar [50] +- lib/io.reactivex.rxjava3-rxjava-3.0.1.jar [51] +- lib/com.carrotsearch-hppc-0.9.1.jar [52] +- lib/com.lmax-disruptor-4.0.0.jar [53] +- lib/io.opentelemetry-opentelemetry-api-1.26.0.jar [54] +- lib/io.opentelemetry-opentelemetry-context-1.26.0.jar [54] + +[1] Source available at https://github.com/FasterXML/jackson-annotations/tree/jackson-annotations-2.17.1 +[2] Source available at https://github.com/FasterXML/jackson-core/tree/jackson-core-2.17.1 +[3] Source available at https://github.com/FasterXML/jackson-databind/tree/jackson-databind-2.17.1 +[4] Source available at https://github.com/google/guava/tree/v32.0.1 +[5] Source available at https://github.com/apache/commons-cli/tree/cli-1.2 +[6] Source available at https://github.com/apache/commons-codec/tree/commons-codec-1.6-RC2 +[7] Source available at https://github.com/apache/commons-configuration/tree/CONFIGURATION_1_10 +[8] Source available at https://github.com/apache/commons-io/tree/rel/commons-io-2.7 +[9] Source available at https://github.com/apache/commons-lang/tree/LANG_2_6 +[10] Source available at https://github.com/apache/commons-logging/tree/commons-logging-1.1.1 +[11] Source available at https://github.com/netty/netty/tree/netty-4.1.111.Final +[16] Source available at https://github.com/apache/logging-log4j2/tree/rel/2.23.1 +[18] Source available at https://github.com/apache/commons-collections/tree/collections-4.1 +[19] Source available at https://github.com/apache/commons-lang/tree/LANG_3_6 +[20] Source available at https://github.com/apache/zookeeper/tree/release-3.8.0 +[23] Source available at https://github.com/cbeust/jcommander/tree/1.82 +[25] Source available at https://github.com/lz4/lz4-java/tree/1.3.0 +[27] Source available at https://github.com/googleapis/java-common-protos/tree/v2.29.0 +[28] Source available at https://github.com/google/gson/tree/gson-parent-2.10.1 +[29] Source available at https://github.com/census-instrumentation/opencensus-java/tree/v0.31.1 +[32] Source available at https://github.com/grpc/grpc-java/tree/v1.64.0 +[33] Source available at https://github.com/apache/curator/tree/apache-curator-5.1.0 +[35] Source available at https://github.com/google/error-prone/tree/v2.9.0 +[36] Source available at https://github.com/apache/yetus/tree/rel/0.12.0 +[37] Source available at https://github.com/JCTools/JCTools/tree/v2.1.2 +[38] Source available at https://github.com/apache/httpcomponents-client/tree/rel/v4.5.13 +[39] Source available at https://github.com/apache/httpcomponents-core/tree/rel/v4.4.15 +[40] Source available at https://github.com/apache/thrift/tree/0.14.2 +[41] Source available at https://source.android.com/ +[42] Source available at https://github.com/google/auto/releases/tag/auto-value-1.10.4 +[43] Source available at https://github.com/googleapis/google-http-java-client/releases/tag/v1.43.3 +[44] Source available at https://github.com/google/j2objc/releases/tag/1.3 +[45] Source available at https://github.com/google/re2j/releases/tag/re2j-1.7 +[46] Source available at https://github.com/dropwizard/metrics/releases/tag/v4.1.12.1 +[47] Source available at https://github.com/perfmark/perfmark/releases/tag/v0.26.0 +[49] Source available at https://github.com/google/conscrypt/releases/tag/2.5.2 +[50] Source available at https://github.com/xerial/snappy-java/releases/tag/v1.1.10.5 +[51] Source available at https://github.com/ReactiveX/RxJava/tree/v3.0.1 +[52] Source available at https://github.com/carrotsearch/hppc/tree/0.9.1 +[53] Source available at https://github.com/LMAX-Exchange/disruptor/releases/tag/4.0.0 +[54] Source available at https://github.com/open-telemetry/opentelemetry-java/releases/tag/v1.26.0 + +------------------------------------------------------------------------------------ +lib/io.netty-netty-codec-4.1.111.Final.jar bundles some 3rd party dependencies + +lib/io.netty-netty-codec-4.1.111.Final.jar contains the extensions to Java Collections Framework which has +been derived from the works by JSR-166 EG, Doug Lea, and Jason T. Greene: + + * LICENSE: + * deps/netty/LICENSE.jsr166y.txt (Public Domain) + * HOMEPAGE: + * http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/ + * http://viewvc.jboss.org/cgi-bin/viewvc.cgi/jbosscache/experimental/jsr166/ + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified version of Robert Harder's Public Domain +Base64 Encoder and Decoder, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.base64.txt (Public Domain) + * HOMEPAGE: + * http://iharder.sourceforge.net/current/java/base64/ + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of 'Webbit', an event based +WebSocket and HTTP server, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.webbit.txt (BSD License) + * HOMEPAGE: + * https://github.com/joewalnes/webbit + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of 'SLF4J', a simple logging +facade for Java, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.slf4j.txt (MIT License) + * HOMEPAGE: + * http://www.slf4j.org/ + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of 'Apache Harmony', an open source +Java SE, which can be obtained at: + + * NOTICE: + * deps/netty/NOTICE.harmony.txt + * LICENSE: + * deps/netty/LICENSE.harmony.txt (Apache License 2.0) + * HOMEPAGE: + * http://archive.apache.org/dist/harmony/ + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of 'jbzip2', a Java bzip2 compression +and decompression library written by Matthew J. Francis. It can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.jbzip2.txt (MIT License) + * HOMEPAGE: + * https://code.google.com/p/jbzip2/ + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of 'libdivsufsort', a C API library to construct +the suffix array and the Burrows-Wheeler transformed string for any input string of +a constant-size alphabet written by Yuta Mori. It can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.libdivsufsort.txt (MIT License) + * HOMEPAGE: + * https://github.com/y-256/libdivsufsort + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of Nitsan Wakart's 'JCTools', +Java Concurrency Tools for the JVM, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.jctools.txt (ASL2 License) + * HOMEPAGE: + * https://github.com/JCTools/JCTools + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'JZlib', a re-implementation of zlib in +pure Java, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.jzlib.txt (BSD style License) + * HOMEPAGE: + * http://www.jcraft.com/jzlib/ + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'Compress-LZF', a Java library for encoding and +decoding data in LZF format, written by Tatu Saloranta. It can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.compress-lzf.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/ning/compress + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'lz4', a LZ4 Java compression +and decompression library written by Adrien Grand. It can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.lz4.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/jpountz/lz4-java + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'lzma-java', a LZMA Java compression +and decompression library, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.lzma-java.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/jponge/lzma-java + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of 'jfastlz', a Java port of FastLZ compression +and decompression library written by William Kinney. It can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.jfastlz.txt (MIT License) + * HOMEPAGE: + * https://code.google.com/p/jfastlz/ + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of and optionally depends on 'Protocol Buffers', +Google's data interchange format, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.protobuf.txt (New BSD License) + * HOMEPAGE: + * https://github.com/google/protobuf + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'Bouncy Castle Crypto APIs' to generate +a temporary self-signed X.509 certificate when the JVM does not provide the +equivalent functionality. It can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.bouncycastle.txt (MIT License) + * HOMEPAGE: + * http://www.bouncycastle.org/ + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'Snappy', a compression library produced +by Google Inc, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.snappy.txt (New BSD License) + * HOMEPAGE: + * https://github.com/google/snappy + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'JBoss Marshalling', an alternative Java +serialization API, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.jboss-marshalling.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/jboss-remoting/jboss-marshalling + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'Caliper', Google's micro- +benchmarking framework, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.caliper.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/google/caliper + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'Apache Commons Logging', a logging +framework, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.commons-logging.txt (Apache License 2.0) + * HOMEPAGE: + * http://commons.apache.org/logging/ + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'Apache Log4J', a logging framework, which +can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.log4j.txt (Apache License 2.0) + * HOMEPAGE: + * http://logging.apache.org/log4j/ + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'Aalto XML', an ultra-high performance +non-blocking XML processor, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.aalto-xml.txt (Apache License 2.0) + * HOMEPAGE: + * http://wiki.fasterxml.com/AaltoHome + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified version of 'HPACK', a Java implementation of +the HTTP/2 HPACK algorithm written by Twitter. It can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.hpack.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/twitter/hpack + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified version of 'HPACK', a Java implementation of +the HTTP/2 HPACK algorithm written by Cory Benfield. It can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.hyper-hpack.txt (MIT License) + * HOMEPAGE: + * https://github.com/python-hyper/hpack/ + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified version of 'HPACK', a Java implementation of +the HTTP/2 HPACK algorithm written by Tatsuhiro Tsujikawa. It can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.nghttp2-hpack.txt (MIT License) + * HOMEPAGE: + * https://github.com/nghttp2/nghttp2/ + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of 'Apache Commons Lang', a Java library +provides utilities for the java.lang API, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.commons-lang.txt (Apache License 2.0) + * HOMEPAGE: + * https://commons.apache.org/proper/commons-lang/ + + +lib/io.netty-netty-codec-4.1.111.Final.jar contains the Maven wrapper scripts from 'Maven Wrapper', +that provides an easy way to ensure a user has everything necessary to run the Maven build. + + * LICENSE: + * deps/netty/LICENSE.mvn-wrapper.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/takari/maven-wrapper + +lib/io.netty-netty-codec-4.1.111.Final.jar contains the dnsinfo.h header file, +that provides a way to retrieve the system DNS configuration on MacOS. +This private header is also used by Apple's open source + mDNSResponder (https://opensource.apple.com/tarballs/mDNSResponder/). + + * LICENSE: + * deps/netty/LICENSE.dnsinfo.txt (Apache License 2.0) + * HOMEPAGE: + * http://www.opensource.apple.com/source/configd/configd-453.19/dnsinfo/dnsinfo.h + +------------------------------------------------------------------------------------ +This product bundles Google Protocol Buffers, which is available under a "3-clause BSD" +license. + +Bundled as + - lib/com.google.protobuf-protobuf-java-3.25.1.jar +Source available at https://github.com/google/protobuf/tree/v3.25.1 +For details, see deps/protobuf-3.14.0/LICENSE. + +Bundled as + - lib/com.google.protobuf-protobuf-java-util-3.25.1.jar +Source available at https://github.com/protocolbuffers/protobuf/tree/v3.25.1 +For details, see deps/protobuf-3.12.0/LICENSE. +------------------------------------------------------------------------------------ +This product bundles Simple Logging Facade for Java, which is available under a +MIT license. For details, see deps/slf4j-2.0.12/LICENSE.txt. + +Bundled as + - lib/org.slf4j-slf4j-api-2.0.12.jar +Source available at https://github.com/qos-ch/slf4j/tree/v_2.0.12 +------------------------------------------------------------------------------------ +This product bundles the Google Auth Library, which is available under a "3-clause BSD" +license. For details, see deps/google-auth-library-credentials-0.20.0/LICENSE + +Bundled as + - lib/com.google.auth-google-auth-library-credentials-1.22.0.jar + - lib/com.google.auth-google-auth-library-oauth2-http-1.22.0.jar +Source available at https://github.com/google/google-auth-library-java/tree/1.22.0 +------------------------------------------------------------------------------------ +This product bundles the bouncycastle Library. +For license details, see deps/bouncycastle-1.0.2.3/LICENSE.html + +Bundled as + - lib/org.bouncycastle-bc-fips-1.0.2.5.jar +------------------------------------------------------------------------------------ + +This product uses the annotations from The Checker Framework, which are licensed under +MIT License. For details, see deps/checker-qual-3.5.0/LICENSE + +Bundles as + - lib/org.checkerframework-checker-qual-3.33.0.jar +------------------------------------------------------------------------------------ +This product bundles the Reactive Streams library, which is licensed under +Public Domain (CC0). For details, see deps/reactivestreams-1.0.3/LICENSE + +Bundles as + - lib/org.reactivestreams-reactive-streams-1.0.3.jar diff --git a/bookkeeper-dist/src/main/resources/LICENSE-server.bin.txt b/bookkeeper-dist/src/main/resources/LICENSE-server.bin.txt index eac571468ea..0e3f88702a2 100644 --- a/bookkeeper-dist/src/main/resources/LICENSE-server.bin.txt +++ b/bookkeeper-dist/src/main/resources/LICENSE-server.bin.txt @@ -205,175 +205,432 @@ The following bundled 3rd party jars are distributed under the Apache Software License, Version 2. -- lib/com.fasterxml.jackson.core-jackson-annotations-2.8.9.jar [1] -- lib/com.fasterxml.jackson.core-jackson-core-2.8.9.jar [2] -- lib/com.fasterxml.jackson.core-jackson-databind-2.8.9.jar [3] -- lib/com.google.guava-guava-21.0.jar [4] +- lib/com.fasterxml.jackson.core-jackson-annotations-2.17.1.jar [1] +- lib/com.fasterxml.jackson.core-jackson-core-2.17.1.jar [2] +- lib/com.fasterxml.jackson.core-jackson-databind-2.17.1.jar [3] +- lib/com.google.guava-guava-32.0.1-jre.jar [4] +- lib/com.google.guava-failureaccess-1.0.1.jar [4] +- lib/com.google.guava-listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar [4] - lib/commons-cli-commons-cli-1.2.jar [5] - lib/commons-codec-commons-codec-1.6.jar [6] - lib/commons-configuration-commons-configuration-1.10.jar [7] -- lib/commons-io-commons-io-2.4.jar [8] +- lib/commons-io-commons-io-2.7.jar [8] - lib/commons-lang-commons-lang-2.6.jar [9] - lib/commons-logging-commons-logging-1.1.1.jar [10] -- lib/io.netty-netty-all-4.1.22.Final.jar [11] -- lib/io.prometheus-simpleclient-0.0.21.jar [12] -- lib/io.prometheus-simpleclient_common-0.0.21.jar [12] -- lib/io.prometheus-simpleclient_hotspot-0.0.21.jar [12] -- lib/io.prometheus-simpleclient_servlet-0.0.21.jar [12] -- lib/io.vertx-vertx-auth-common-3.4.1.jar [13] -- lib/io.vertx-vertx-core-3.4.1.jar [14] -- lib/io.vertx-vertx-web-3.4.1.jar [15] -- lib/log4j-log4j-1.2.17.jar [16] -- lib/net.java.dev.jna-jna-3.2.7.jar [17] -- lib/org.apache.commons-commons-collections4-4.1.jar [18] -- lib/org.apache.commons-commons-lang3-3.6.jar [19] -- lib/org.apache.zookeeper-zookeeper-3.4.13.jar [20] -- lib/org.eclipse.jetty-jetty-http-9.4.5.v20170502.jar [21] -- lib/org.eclipse.jetty-jetty-io-9.4.5.v20170502.jar [21] -- lib/org.eclipse.jetty-jetty-security-9.4.5.v20170502.jar [21] -- lib/org.eclipse.jetty-jetty-server-9.4.5.v20170502.jar [21] -- lib/org.eclipse.jetty-jetty-servlet-9.4.5.v20170502.jar [21] -- lib/org.eclipse.jetty-jetty-util-9.4.5.v20170502.jar [21] -- lib/org.rocksdb-rocksdbjni-5.13.1.jar [22] -- lib/com.beust-jcommander-1.48.jar [23] -- lib/com.yahoo.datasketches-memory-0.8.3.jar [24] -- lib/com.yahoo.datasketches-sketches-core-0.8.3.jar [24] -- lib/net.jpountz.lz4-lz4-1.3.0.jar [25] -- lib/org.codehaus.jackson-jackson-core-asl-1.9.11.jar [26] -- lib/org.codehaus.jackson-jackson-mapper-asl-1.9.11.jar [27] -- lib/com.google.api.grpc-proto-google-common-protos-1.0.0.jar [28] -- lib/com.google.code.gson-gson-2.7.jar [29] -- lib/io.opencensus-opencensus-api-0.11.0.jar [30] -- lib/io.opencensus-opencensus-contrib-grpc-metrics-0.11.0.jar [30] -- lib/com.squareup.okhttp-okhttp-2.5.0.jar [31] -- lib/com.squareup.okio-okio-1.13.0.jar [32] -- lib/io.grpc-grpc-all-1.12.0.jar [33] -- lib/io.grpc-grpc-auth-1.12.0.jar [33] -- lib/io.grpc-grpc-context-1.12.0.jar [33] -- lib/io.grpc-grpc-core-1.12.0.jar [33] -- lib/io.grpc-grpc-netty-1.12.0.jar [33] -- lib/io.grpc-grpc-okhttp-1.12.0.jar [33] -- lib/io.grpc-grpc-protobuf-1.12.0.jar [33] -- lib/io.grpc-grpc-protobuf-lite-1.12.0.jar [33] -- lib/io.grpc-grpc-protobuf-nano-1.12.0.jar [33] -- lib/io.grpc-grpc-stub-1.12.0.jar [33] -- lib/io.grpc-grpc-testing-1.12.0.jar [33] -- lib/org.apache.curator-curator-client-4.0.1.jar [34] -- lib/org.apache.curator-curator-framework-4.0.1.jar [34] -- lib/org.apache.curator-curator-recipes-4.0.1.jar [34] -- lib/org.inferred-freebuilder-1.14.9.jar [35] -- lib/com.google.errorprone-error_prone_annotations-2.1.2.jar [36] -- lib/org.apache.yetus-audience-annotations-0.5.0.jar [37] - -[1] Source available at https://github.com/FasterXML/jackson-annotations/tree/jackson-annotations-2.8.9 -[2] Source available at https://github.com/FasterXML/jackson-core/tree/jackson-core-2.8.9 -[3] Source available at https://github.com/FasterXML/jackson-databind/tree/jackson-databind-2.8.9 -[4] Source available at https://github.com/google/guava/tree/v21.0 -[5] Source available at https://git-wip-us.apache.org/repos/asf?p=commons-cli.git;a=tag;h=bc8f0e -[6] Source available at http://svn.apache.org/viewvc/commons/proper/codec/tags/1_6/ -[7] Source available at http://svn.apache.org/viewvc/commons/proper/configuration/tags/CONFIGURATION_1_10/ -[8] Source available at https://git-wip-us.apache.org/repos/asf?p=commons-io.git;a=tag;h=603579 -[9] Source available at https://git-wip-us.apache.org/repos/asf?p=commons-lang.git;a=tag;h=375459 -[10] Source available at http://svn.apache.org/viewvc/commons/proper/logging/tags/commons-logging-1.1.1/ -[11] Source available at https://github.com/netty/netty/tree/netty-4.1.22.Final -[12] Source available at https://github.com/prometheus/client_java/tree/parent-0.0.21 -[13] Source available at https://github.com/vert-x3/vertx-auth/tree/3.4.1 -[14] Source available at https://github.com/eclipse/vert.x/tree/3.4.1 -[15] Source available at https://github.com/vert-x3/vertx-web/tree/3.4.1 -[16] Source available at http://logging.apache.org/log4j/1.2/download.html -[17] Source available at https://github.com/java-native-access/jna/tree/3.2.7 -[18] Source available at https://git-wip-us.apache.org/repos/asf?p=commons-collections.git;a=tag;h=a3a5ad -[19] Source available at https://git-wip-us.apache.org/repos/asf?p=commons-lang.git;a=tag;h=3ad2e8 -[20] Source available at https://github.com/apache/zookeeper/tree/release-3.4.13 -[21] Source available at https://github.com/eclipse/jetty.project/tree/jetty-9.4.5.v20170502 -[22] Source available at https://github.com/facebook/rocksdb/tree/v5.13.1 -[23] Source available at https://github.com/cbeust/jcommander/tree/jcommander-1.48 -[24] Source available at https://github.com/DataSketches/sketches-core/tree/sketches-0.8.3 -[25] Source available at https://github.com/lz4/lz4-java/tree/1.3.0 -[26] Source available at https://github.com/codehaus/jackson/tree/1.9 -[27] Source available at https://github.com/codehaus/jackson/tree/1.9 -[28] Source available at https://github.com/googleapis/googleapis -[29] Source available at https://github.com/google/gson/tree/gson-parent-2.7 -[30] Source available at https://github.com/census-instrumentation/opencensus-java/tree/v0.11.0 -[31] Source available at https://github.com/square/okhttp/tree/parent-2.5.0 -[32] Source available at https://github.com/square/okio/tree/okio-parent-1.13.0 -[33] Source available at https://github.com/grpc/grpc-java/tree/v1.12.0 -[34] Source available at https://github.com/apache/curator/tree/apache-curator-4.0.1 -[35] Source available at https://github.com/inferred/FreeBuilder/tree/v1.14.9 -[36] Source available at https://github.com/google/error-prone/tree/v2.1.2 -[37] Source available at https://github.com/apache/yetus/tree/rel/0.5.0 +- lib/io.netty-netty-buffer-4.1.111.Final.jar [11] +- lib/io.netty-netty-codec-4.1.111.Final.jar [11] +- lib/io.netty-netty-codec-dns-4.1.111.Final.jar [11] +- lib/io.netty-netty-codec-http-4.1.111.Final.jar [11] +- lib/io.netty-netty-codec-http2-4.1.111.Final.jar [11] +- lib/io.netty-netty-codec-socks-4.1.111.Final.jar [11] +- lib/io.netty-netty-common-4.1.111.Final.jar [11] +- lib/io.netty-netty-handler-4.1.111.Final.jar [11] +- lib/io.netty-netty-handler-proxy-4.1.111.Final.jar [11] +- lib/io.netty-netty-resolver-4.1.111.Final.jar [11] +- lib/io.netty-netty-resolver-dns-4.1.111.Final.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-linux-aarch_64.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-linux-x86_64.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-osx-aarch_64.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-osx-x86_64.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-windows-x86_64.jar [11] +- lib/io.netty-netty-tcnative-classes-2.0.65.Final.jar [11] +- lib/io.netty-netty-transport-4.1.111.Final.jar [11] +- lib/io.netty-netty-transport-classes-epoll-4.1.111.Final.jar [11] +- lib/io.netty-netty-transport-native-epoll-4.1.111.Final-linux-aarch_64.jar [11] +- lib/io.netty-netty-transport-native-epoll-4.1.111.Final-linux-x86_64.jar [11] +- lib/io.netty.incubator-netty-incubator-transport-native-io_uring-0.0.25.Final-linux-x86_64.jar [11] +- lib/io.netty.incubator-netty-incubator-transport-native-io_uring-0.0.25.Final-linux-aarch_64.jar [11] +- lib/io.netty.incubator-netty-incubator-transport-classes-io_uring-0.0.25.Final.jar [11] +- lib/io.netty-netty-transport-native-unix-common-4.1.111.Final.jar [11] +- lib/io.prometheus-simpleclient-0.15.0.jar [12] +- lib/io.prometheus-simpleclient_common-0.15.0.jar [12] +- lib/io.prometheus-simpleclient_hotspot-0.15.0.jar [12] +- lib/io.prometheus-simpleclient_servlet-0.15.0.jar [12] +- lib/io.prometheus-simpleclient_servlet_common-0.15.0.jar [12] +- lib/io.prometheus-simpleclient_tracer_common-0.15.0.jar [12] +- lib/io.prometheus-simpleclient_tracer_otel-0.15.0.jar [12] +- lib/io.prometheus-simpleclient_tracer_otel_agent-0.15.0.jar [12] +- lib/io.vertx-vertx-auth-common-4.5.7.jar [13] +- lib/io.vertx-vertx-bridge-common-4.5.7.jar [14] +- lib/io.vertx-vertx-core-4.5.7.jar [15] +- lib/io.vertx-vertx-web-4.5.7.jar [16] +- lib/io.vertx-vertx-web-common-4.5.7.jar [16] +- lib/org.apache.logging.log4j-log4j-api-2.23.1.jar [17] +- lib/org.apache.logging.log4j-log4j-core-2.23.1.jar [17] +- lib/org.apache.logging.log4j-log4j-slf4j2-impl-2.23.1.jar [17] +- lib/org.apache.commons-commons-collections4-4.1.jar [19] +- lib/org.apache.commons-commons-lang3-3.6.jar [20] +- lib/org.apache.zookeeper-zookeeper-3.8.4.jar [21] +- lib/org.apache.zookeeper-zookeeper-jute-3.8.4.jar [21] +- lib/org.apache.zookeeper-zookeeper-3.8.4-tests.jar [21] +- lib/org.eclipse.jetty-jetty-http-9.4.53.v20231009.jar [22] +- lib/org.eclipse.jetty-jetty-io-9.4.53.v20231009.jar [22] +- lib/org.eclipse.jetty-jetty-security-9.4.53.v20231009.jar [22] +- lib/org.eclipse.jetty-jetty-server-9.4.53.v20231009.jar [22] +- lib/org.eclipse.jetty-jetty-servlet-9.4.53.v20231009.jar [22] +- lib/org.eclipse.jetty-jetty-util-9.4.53.v20231009.jar [22] +- lib/org.eclipse.jetty-jetty-util-ajax-9.4.53.v20231009.jar [22] +- lib/org.rocksdb-rocksdbjni-7.10.2.jar [23] +- lib/com.beust-jcommander-1.82.jar [24] +- lib/com.yahoo.datasketches-memory-0.8.3.jar [25] +- lib/com.yahoo.datasketches-sketches-core-0.8.3.jar [25] +- lib/net.jpountz.lz4-lz4-1.3.0.jar [26] +- lib/com.google.api.grpc-proto-google-common-protos-2.29.0.jar [28] +- lib/com.google.code.gson-gson-2.10.1.jar [29] +- lib/io.opencensus-opencensus-api-0.31.1.jar [30] +- lib/io.opencensus-opencensus-contrib-http-util-0.31.1.jar [30] +- lib/io.opencensus-opencensus-proto-0.2.0.jar [30] +- lib/io.grpc-grpc-all-1.64.0.jar [33] +- lib/io.grpc-grpc-alts-1.64.0.jar [33] +- lib/io.grpc-grpc-api-1.64.0.jar [33] +- lib/io.grpc-grpc-auth-1.64.0.jar [33] +- lib/io.grpc-grpc-context-1.64.0.jar [33] +- lib/io.grpc-grpc-core-1.64.0.jar [33] +- lib/io.grpc-grpc-grpclb-1.64.0.jar [33] +- lib/io.grpc-grpc-inprocess-1.64.0.jar [33] +- lib/io.grpc-grpc-opentelemetry-1.64.0.jar [33] +- lib/io.grpc-grpc-netty-shaded-1.64.0.jar [33] +- lib/io.grpc-grpc-protobuf-1.64.0.jar [33] +- lib/io.grpc-grpc-protobuf-lite-1.64.0.jar [33] +- lib/io.grpc-grpc-services-1.64.0.jar [33] +- lib/io.grpc-grpc-stub-1.64.0.jar [33] +- lib/io.grpc-grpc-testing-1.64.0.jar [33] +- lib/io.grpc-grpc-util-1.64.0.jar [33] +- lib/io.grpc-grpc-xds-1.64.0.jar [33] +- lib/io.grpc-grpc-rls-1.64.0.jar[33] +- lib/org.apache.curator-curator-client-5.1.0.jar [34] +- lib/org.apache.curator-curator-framework-5.1.0.jar [34] +- lib/org.apache.curator-curator-recipes-5.1.0.jar [34] +- lib/com.google.errorprone-error_prone_annotations-2.9.0.jar [36] +- lib/org.apache.yetus-audience-annotations-0.12.0.jar [37] +- lib/org.jctools-jctools-core-2.1.2.jar [38] +- lib/org.apache.httpcomponents-httpclient-4.5.13.jar [39] +- lib/org.apache.httpcomponents-httpcore-4.4.15.jar [40] +- lib/org.apache.thrift-libthrift-0.14.2.jar [41] +- lib/com.google.android-annotations-4.1.1.4.jar [42] +- lib/com.google.http-client-google-http-client-1.43.3.jar [43] +- lib/com.google.http-client-google-http-client-gson-1.43.3.jar [43] +- lib/com.google.auto.value-auto-value-annotations-1.10.4.jar [44] +- lib/com.google.j2objc-j2objc-annotations-2.8.jar [45] +- lib/com.google.re2j-re2j-1.7.jar [46] +- lib/io.dropwizard.metrics-metrics-core-4.1.12.1.jar [47] +- lib/io.perfmark-perfmark-api-0.26.0.jar [48] +- lib/org.conscrypt-conscrypt-openjdk-uber-2.5.2.jar [49] +- lib/org.xerial.snappy-snappy-java-1.1.10.5.jar [50] +- lib/io.reactivex.rxjava3-rxjava-3.0.1.jar [51] +- lib/com.carrotsearch-hppc-0.9.1.jar [52] +- lib/com.squareup.okhttp3-okhttp-4.12.0.jar [53] +- lib/com.squareup.okio-okio-3.6.0.jar [53] +- lib/com.squareup.okio-okio-jvm-3.6.0.jar [53] +- lib/io.opentelemetry-opentelemetry-api-1.26.0.jar [54] +- lib/io.opentelemetry-opentelemetry-api-events-1.26.0-alpha.jar [54] +- lib/io.opentelemetry-opentelemetry-api-logs-1.26.0-alpha.jar [54] +- lib/io.opentelemetry-opentelemetry-context-1.26.0.jar [54] +- lib/io.opentelemetry-opentelemetry-exporter-common-1.26.0.jar [54] +- lib/io.opentelemetry-opentelemetry-exporter-otlp-1.26.0.jar [54] +- lib/io.opentelemetry-opentelemetry-exporter-otlp-common-1.26.0.jar [54] +- lib/io.opentelemetry-opentelemetry-exporter-prometheus-1.26.0-alpha.jar [54] +- lib/io.opentelemetry-opentelemetry-extension-incubator-1.26.0-alpha.jar [54] +- lib/io.opentelemetry-opentelemetry-sdk-1.26.0.jar [54] +- lib/io.opentelemetry-opentelemetry-sdk-common-1.26.0.jar [54] +- lib/io.opentelemetry-opentelemetry-sdk-extension-autoconfigure-1.26.0-alpha.jar [54] +- lib/io.opentelemetry-opentelemetry-sdk-extension-autoconfigure-spi-1.26.0.jar [54] +- lib/io.opentelemetry-opentelemetry-sdk-logs-1.26.0-alpha.jar [54] +- lib/io.opentelemetry-opentelemetry-sdk-metrics-1.26.0.jar [54] +- lib/io.opentelemetry-opentelemetry-sdk-trace-1.26.0.jar [54] +- lib/io.opentelemetry-opentelemetry-semconv-1.26.0-alpha.jar [54] +- lib/io.opentelemetry.instrumentation-opentelemetry-instrumentation-api-1.26.0.jar [54] +- lib/io.opentelemetry.instrumentation-opentelemetry-instrumentation-api-semconv-1.26.0-alpha.jar [54] +- lib/io.opentelemetry.instrumentation-opentelemetry-runtime-metrics-1.26.0-alpha.jar [54] +- lib/org.jetbrains-annotations-13.0.jar [55] +- lib/org.jetbrains.kotlin-kotlin-stdlib-1.8.21.jar [55] +- lib/org.jetbrains.kotlin-kotlin-stdlib-common-1.8.21.jar [55] +- lib/org.jetbrains.kotlin-kotlin-stdlib-jdk7-1.8.21.jar [55] +- lib/org.jetbrains.kotlin-kotlin-stdlib-jdk8-1.8.21.jar [55] +- lib/com.lmax-disruptor-4.0.0.jar [56] + +[1] Source available at https://github.com/FasterXML/jackson-annotations/tree/jackson-annotations-2.17.1 +[2] Source available at https://github.com/FasterXML/jackson-core/tree/jackson-core-2.17.1 +[3] Source available at https://github.com/FasterXML/jackson-databind/tree/jackson-databind-2.17.1 +[4] Source available at https://github.com/google/guava/tree/v32.0.1 +[5] Source available at https://github.com/apache/commons-cli/tree/cli-1.2 +[6] Source available at https://github.com/apache/commons-codec/tree/commons-codec-1.6-RC2 +[7] Source available at https://github.com/apache/commons-configuration/tree/CONFIGURATION_1_10 +[8] Source available at https://github.com/apache/commons-io/tree/rel/commons-io-2.7 +[9] Source available at https://github.com/apache/commons-lang/tree/LANG_2_6 +[10] Source available at https://github.com/apache/commons-logging/tree/commons-logging-1.1.1 +[11] Source available at https://github.com/netty/netty/tree/netty-4.1.111.Final +[12] Source available at https://github.com/prometheus/client_java/tree/parent-0.15.0 +[13] Source available at https://github.com/vert-x3/vertx-auth/tree/4.3.2 +[14] Source available at https://github.com/vert-x3/vertx-bridge-common/tree/4.3.2 +[15] Source available at https://github.com/eclipse/vert.x/tree/4.3.2 +[16] Source available at https://github.com/vert-x3/vertx-web/tree/4.3.2 +[17] Source available at https://github.com/apache/logging-log4j2/tree/rel/2.23.1 +[19] Source available at https://github.com/apache/commons-collections/tree/collections-4.1 +[20] Source available at https://github.com/apache/commons-lang/tree/LANG_3_6 +[21] Source available at https://github.com/apache/zookeeper/tree/release-3.8.0 +[22] Source available at https://github.com/eclipse/jetty.project/tree/jetty-9.4.48.v20220622 +[23] Source available at https://github.com/facebook/rocksdb/tree/v7.10.2 +[24] Source available at https://github.com/cbeust/jcommander/tree/1.82 +[25] Source available at https://github.com/DataSketches/sketches-core/tree/sketches-0.8.3 +[26] Source available at https://github.com/lz4/lz4-java/tree/1.3.0 +[28] Source available at https://github.com/googleapis/java-common-protos/tree/v2.29.0 +[29] Source available at https://github.com/google/gson/tree/gson-parent-2.10.1 +[30] Source available at https://github.com/census-instrumentation/opencensus-java/tree/v0.31.1 +[33] Source available at https://github.com/grpc/grpc-java/tree/v1.64.0 +[34] Source available at https://github.com/apache/curator/releases/tag/apache.curator-5.1.0 +[36] Source available at https://github.com/google/error-prone/tree/v2.9.0 +[37] Source available at https://github.com/apache/yetus/tree/rel/0.12.0 +[38] Source available at https://github.com/JCTools/JCTools/tree/v2.1.2 +[39] Source available at https://github.com/apache/httpcomponents-client/tree/rel/v4.5.13 +[40] Source available at https://github.com/apache/httpcomponents-core/tree/rel/v4.4.15 +[41] Source available at https://github.com/apache/thrift/tree/0.14.2 +[42] Source available at https://source.android.com/ +[43] Source available at https://github.com/googleapis/google-http-java-client/releases/tag/v1.43.3 +[44] Source available at https://github.com/google/auto/releases/tag/auto-value-1.10.4 +[45] Source available at https://github.com/google/j2objc/releases/tag/1.3 +[46] Source available at https://github.com/google/re2j/releases/tag/re2j-1.7 +[47] Source available at https://github.com/dropwizard/metrics/releases/tag/v4.1.12.1 +[48] Source available at https://github.com/perfmark/perfmark/releases/tag/v0.26.0 +[49] Source available at https://github.com/google/conscrypt/releases/tag/2.5.2 +[50] Source available at https://github.com/xerial/snappy-java/releases/tag/v1.1.10.5 +[51] Source available at https://github.com/ReactiveX/RxJava/tree/v3.0.1 +[52] Source available at https://github.com/carrotsearch/hppc/tree/0.9.1 +[53] Source available at https://github.com/square/okio/releases/tag/parent-3.6.0 +[54] Source available at https://github.com/open-telemetry/opentelemetry-java/releases/tag/v1.26.0 +[55] Source available at https://github.com/JetBrains/kotlin/releases/tag/v1.8.21 +[56] Source available at https://github.com/LMAX-Exchange/disruptor/releases/tag/4.0.0 ------------------------------------------------------------------------------------ -lib/io.netty-netty-all-4.1.22.Final.jar bundles some 3rd party dependencies +lib/io.netty-netty-codec-4.1.111.Final.jar bundles some 3rd party dependencies -lib/io.netty-netty-all-4.1.22.Final.jar contains the extensions to Java Collections Framework which has +lib/io.netty-netty-codec-4.1.111.Final.jar contains the extensions to Java Collections Framework which has been derived from the works by JSR-166 EG, Doug Lea, and Jason T. Greene: * LICENSE: - * deps/netty-4.1.22.Final/LICENSE.jsr166y.txt (Public Domain) + * deps/netty/LICENSE.jsr166y.txt (Public Domain) * HOMEPAGE: * http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/ * http://viewvc.jboss.org/cgi-bin/viewvc.cgi/jbosscache/experimental/jsr166/ -lib/io.netty-netty-all-4.1.22.Final.jar contains a modified version of Robert Harder's Public Domain +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified version of Robert Harder's Public Domain Base64 Encoder and Decoder, which can be obtained at: * LICENSE: - * deps/netty-4.1.22.Final/LICENSE.base64.txt (Public Domain) + * deps/netty/LICENSE.base64.txt (Public Domain) * HOMEPAGE: * http://iharder.sourceforge.net/current/java/base64/ -lib/io.netty-netty-all-4.1.22.Final.jar contains a modified portion of 'Webbit', an event based +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of 'Webbit', an event based WebSocket and HTTP server, which can be obtained at: * LICENSE: - * deps/netty-4.1.22.Final/LICENSE.webbit.txt (BSD License) + * deps/netty/LICENSE.webbit.txt (BSD License) * HOMEPAGE: * https://github.com/joewalnes/webbit -lib/io.netty-netty-all-4.1.22.Final.jar contains a modified portion of 'SLF4J', a simple logging +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of 'SLF4J', a simple logging facade for Java, which can be obtained at: * LICENSE: - * deps/netty-4.1.22.Final/LICENSE.slf4j.txt (MIT License) + * deps/netty/LICENSE.slf4j.txt (MIT License) * HOMEPAGE: * http://www.slf4j.org/ -lib/io.netty-netty-all-4.1.22.Final.jar contains a modified portion of 'jbzip2', a Java bzip2 compression +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of 'Apache Harmony', an open source +Java SE, which can be obtained at: + + * NOTICE: + * deps/netty/NOTICE.harmony.txt + * LICENSE: + * deps/netty/LICENSE.harmony.txt (Apache License 2.0) + * HOMEPAGE: + * http://archive.apache.org/dist/harmony/ + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of 'jbzip2', a Java bzip2 compression and decompression library written by Matthew J. Francis. It can be obtained at: * LICENSE: - * deps/netty-4.1.22.Final/LICENSE.jbzip2.txt (MIT License) + * deps/netty/LICENSE.jbzip2.txt (MIT License) * HOMEPAGE: * https://code.google.com/p/jbzip2/ -lib/io.netty-netty-all-4.1.22.Final.jar contains a modified portion of 'libdivsufsort', a C API library to construct +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of 'libdivsufsort', a C API library to construct the suffix array and the Burrows-Wheeler transformed string for any input string of a constant-size alphabet written by Yuta Mori. It can be obtained at: * LICENSE: - * deps/netty-4.1.22.Final/LICENSE.libdivsufsort.txt (MIT License) + * deps/netty/LICENSE.libdivsufsort.txt (MIT License) * HOMEPAGE: * https://github.com/y-256/libdivsufsort -lib/io.netty-netty-all-4.1.22.Final.jar contains a modified portion of 'jfastlz', a Java port of FastLZ compression +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of Nitsan Wakart's 'JCTools', +Java Concurrency Tools for the JVM, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.jctools.txt (ASL2 License) + * HOMEPAGE: + * https://github.com/JCTools/JCTools + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'JZlib', a re-implementation of zlib in +pure Java, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.jzlib.txt (BSD style License) + * HOMEPAGE: + * http://www.jcraft.com/jzlib/ + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'Compress-LZF', a Java library for encoding and +decoding data in LZF format, written by Tatu Saloranta. It can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.compress-lzf.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/ning/compress + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'lz4', a LZ4 Java compression +and decompression library written by Adrien Grand. It can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.lz4.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/jpountz/lz4-java + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'lzma-java', a LZMA Java compression +and decompression library, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.lzma-java.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/jponge/lzma-java + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of 'jfastlz', a Java port of FastLZ compression and decompression library written by William Kinney. It can be obtained at: * LICENSE: - * deps/netty-4.1.22.Final/LICENSE.jfastlz.txt (MIT License) + * deps/netty/LICENSE.jfastlz.txt (MIT License) * HOMEPAGE: * https://code.google.com/p/jfastlz/ -lib/io.netty-netty-all-4.1.22.Final.jar contains a modified portion of and optionally depends on 'Protocol Buffers', Google's data -interchange format, which can be obtained at: +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of and optionally depends on 'Protocol Buffers', +Google's data interchange format, which can be obtained at: * LICENSE: - * deps/netty-4.1.22.Final/LICENSE.protobuf.txt (New BSD License) + * deps/netty/LICENSE.protobuf.txt (New BSD License) * HOMEPAGE: * https://github.com/google/protobuf +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'Bouncy Castle Crypto APIs' to generate +a temporary self-signed X.509 certificate when the JVM does not provide the +equivalent functionality. It can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.bouncycastle.txt (MIT License) + * HOMEPAGE: + * http://www.bouncycastle.org/ + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'Snappy', a compression library produced +by Google Inc, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.snappy.txt (New BSD License) + * HOMEPAGE: + * https://github.com/google/snappy + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'JBoss Marshalling', an alternative Java +serialization API, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.jboss-marshalling.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/jboss-remoting/jboss-marshalling + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'Caliper', Google's micro- +benchmarking framework, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.caliper.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/google/caliper + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'Apache Commons Logging', a logging +framework, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.commons-logging.txt (Apache License 2.0) + * HOMEPAGE: + * http://commons.apache.org/logging/ + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'Apache Log4J', a logging framework, which +can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.log4j.txt (Apache License 2.0) + * HOMEPAGE: + * http://logging.apache.org/log4j/ + +lib/io.netty-netty-codec-4.1.111.Final.jar optionally depends on 'Aalto XML', an ultra-high performance +non-blocking XML processor, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.aalto-xml.txt (Apache License 2.0) + * HOMEPAGE: + * http://wiki.fasterxml.com/AaltoHome + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified version of 'HPACK', a Java implementation of +the HTTP/2 HPACK algorithm written by Twitter. It can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.hpack.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/twitter/hpack + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified version of 'HPACK', a Java implementation of +the HTTP/2 HPACK algorithm written by Cory Benfield. It can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.hyper-hpack.txt (MIT License) + * HOMEPAGE: + * https://github.com/python-hyper/hpack/ + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified version of 'HPACK', a Java implementation of +the HTTP/2 HPACK algorithm written by Tatsuhiro Tsujikawa. It can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.nghttp2-hpack.txt (MIT License) + * HOMEPAGE: + * https://github.com/nghttp2/nghttp2/ + +lib/io.netty-netty-codec-4.1.111.Final.jar contains a modified portion of 'Apache Commons Lang', a Java library +provides utilities for the java.lang API, which can be obtained at: + + * LICENSE: + * deps/netty/LICENSE.commons-lang.txt (Apache License 2.0) + * HOMEPAGE: + * https://commons.apache.org/proper/commons-lang/ + + +lib/io.netty-netty-codec-4.1.111.Final.jar contains the Maven wrapper scripts from 'Maven Wrapper', +that provides an easy way to ensure a user has everything necessary to run the Maven build. + + * LICENSE: + * deps/netty/LICENSE.mvn-wrapper.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/takari/maven-wrapper + +lib/io.netty-netty-codec-4.1.111.Final.jar contains the dnsinfo.h header file, +that provides a way to retrieve the system DNS configuration on MacOS. +This private header is also used by Apple's open source + mDNSResponder (https://opensource.apple.com/tarballs/mDNSResponder/). + + * LICENSE: + * deps/netty/LICENSE.dnsinfo.txt (Apache License 2.0) + * HOMEPAGE: + * http://www.opensource.apple.com/source/configd/configd-453.19/dnsinfo/dnsinfo.h + ------------------------------------------------------------------------------------ -lib/org.rocksdb-rocksdbjni-5.13.1.jar is derived from leveldb, which is under the following license. +lib/org.rocksdb-rocksdbjni-7.10.2.jar is derived from leveldb, which is under the following license. Copyright (c) 2011 The LevelDB Authors. All rights reserved. @@ -404,42 +661,54 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------------ -This product bundles Google Protocal Buffers, which is available under a "3-clause BSD" +This product bundles Google Protocol Buffers, which is available under a "3-clause BSD" license. Bundled as - - lib/com.google.protobuf-protobuf-java-3.5.1.jar - - lib/com.google.protobuf-protobuf-java-util-3.5.1.jar -Source available at https://github.com/google/protobuf/tree/v3.5.1 -For details, see deps/protobuf-3.5.1/LICENSE. - -Bundled as lib/com.google.protobuf.nano-protobuf-javanano-3.0.0-alpha-5.jar -Source available at https://github.com/google/protobuf/tree/3.0.0-pre -For details, see deps/protobuf-3.0.0/LICENSE. + - lib/com.google.protobuf-protobuf-java-3.25.1.jar +Source available at https://github.com/google/protobuf/tree/v3.25.1 +For details, see deps/protobuf-3.14.0/LICENSE. + +Bundled as + - lib/com.google.protobuf-protobuf-java-util-3.25.1.jar +Source available at https://github.com/protocolbuffers/protobuf/tree/v3.25.1 +For details, see deps/protobuf-3.12.0/LICENSE. ------------------------------------------------------------------------------------ This product bundles the JCP Standard Java Servlet API, which is available under a -CDDL 1.1 license. For details, see deps/javax.servlet-api-3.1.0/CDDL+GPL-1.1. +CDDL 1.1 license. For details, see deps/javax.servlet-api-4.0.0/CDDL+GPL-1.1. -Bundled as lib/javax.servlet-javax.servlet-api-3.1.0.jar -Source available at https://github.com/javaee/servlet-spec/tree/3.1.0 +Bundled as lib/javax.servlet-javax.servlet-api-4.0.0.jar +Source available at https://github.com/javaee/servlet-spec/tree/4.0.0 ------------------------------------------------------------------------------------ This product bundles Simple Logging Facade for Java, which is available under a -MIT license. For details, see deps/slf4j-1.7.25/LICENSE.txt. +MIT license. For details, see deps/slf4j-2.0.12/LICENSE.txt. Bundled as - - lib/org.slf4j-slf4j-api-1.7.25.jar - - lib/org.slf4j-slf4j-log4j12-1.7.25.jar -Source available at https://github.com/qos-ch/slf4j/tree/v_1.7.25 + - lib/org.slf4j-slf4j-api-2.0.12.jar +Source available at https://github.com/qos-ch/slf4j/tree/v_2.0.12 ------------------------------------------------------------------------------------ This product bundles the Google Auth Library, which is available under a "3-clause BSD" -license. For details, see deps/google-auth-library-credentials-0.9.0/LICENSE +license. For details, see deps/google-auth-library-credentials-0.20.0/LICENSE Bundled as - - lib/com.google.auth-google-auth-library-credentials-0.9.0.jar -Source available at https://github.com/google/google-auth-library-java/tree/0.9.0 + - lib/com.google.auth-google-auth-library-credentials-1.22.0.jar + - lib/com.google.auth-google-auth-library-oauth2-http-1.22.0.jar +Source available at https://github.com/googleapis/google-auth-library-java/releases/tag/v1.22.0 ------------------------------------------------------------------------------------ -This product bundles the JLine Library, which is available under a "2-clause BSD" -license. For details, see deps/jline-0.9.94/LICENSE +This product bundles the bouncycastle Library. +For license details, see deps/bouncycastle-1.0.2.3/LICENSE.html Bundled as - - lib/jline-jline-0.9.94.jar + - lib/org.bouncycastle-bc-fips-1.0.2.5.jar +------------------------------------------------------------------------------------ +This product uses the annotations from The Checker Framework, which are licensed under +MIT License. For details, see deps/checker-qual-3.5.0/LICENSE + +Bundles as + - lib/org.checkerframework-checker-qual-3.33.0.jar +------------------------------------------------------------------------------------ +This product bundles the Reactive Streams library, which is licensed under +Public Domain (CC0). For details, see deps/reactivestreams-1.0.3/LICENSE + +Bundles as + - lib/org.reactivestreams-reactive-streams-1.0.3.jar diff --git a/bookkeeper-dist/src/main/resources/NOTICE-all.bin.txt b/bookkeeper-dist/src/main/resources/NOTICE-all.bin.txt index d9984b6908b..53b7942b544 100644 --- a/bookkeeper-dist/src/main/resources/NOTICE-all.bin.txt +++ b/bookkeeper-dist/src/main/resources/NOTICE-all.bin.txt @@ -1,13 +1,14 @@ Apache BookKeeper -Copyright 2011-2018 The Apache Software Foundation +Copyright 2011-2020 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). ------------------------------------------------------------------------------------ -- lib/io.dropwizard.metrics-metrics-core-3.1.0.jar -- lib/io.dropwizard.metrics-metrics-graphite-3.1.0.jar -- lib/io.dropwizard.metrics-metrics-jvm-3.1.0.jar +- lib/io.dropwizard.metrics-metrics-core-4.1.12.1.jar +- lib/io.dropwizard.metrics-metrics-graphite-4.1.12.1.jar +- lib/io.dropwizard.metrics-metrics-jmx-4.1.12.1.jar +- lib/io.dropwizard.metrics-metrics-jvm-4.1.12.1.jar Metrics Copyright 2010-2013 Coda Hale and Yammer, Inc. @@ -20,32 +21,34 @@ LongAdder), which was released with the following comments: Written by Doug Lea with assistance from members of JCP JSR-166 Expert Group and released to the public domain, as explained at http://creativecommons.org/publicdomain/zero/1.0/ ------------------------------------------------------------------------------------- -- lib/io.netty-netty-3.10.1.Final.jar - - The Netty Project - ================= - -Please visit the Netty web site for more information: - - * http://netty.io/ - -Copyright 2011 The Netty Project - -The Netty Project licenses this file to you under the Apache License, -version 2.0 (the "License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License at: - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -License for the specific language governing permissions and limitations -under the License. ------------------------------------------------------------------------------------ -- lib/io.netty-netty-all-4.1.22.Final.jar +- lib/io.netty-netty-buffer-4.1.111.Final.jar +- lib/io.netty-netty-codec-4.1.111.Final.jar +- lib/io.netty-netty-codec-dns-4.1.111.Final.jar +- lib/io.netty-netty-codec-http-4.1.111.Final.jar +- lib/io.netty-netty-codec-http2-4.1.111.Final.jar +- lib/io.netty-netty-codec-socks-4.1.111.Final.jar +- lib/io.netty-netty-common-4.1.111.Final.jar +- lib/io.netty-netty-handler-4.1.111.Final.jar +- lib/io.netty-netty-handler-proxy-4.1.111.Final.jar +- lib/io.netty-netty-resolver-4.1.111.Final.jar +- lib/io.netty-netty-resolver-dns-4.1.111.Final.jar +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final.jar +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-linux-aarch_64.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-linux-x86_64.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-osx-aarch_64.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-osx-x86_64.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-windows-x86_64.jar [11] +- lib/io.netty-netty-tcnative-classes-2.0.65.Final.jar +- lib/io.netty-netty-transport-4.1.111.Final.jar +- lib/io.netty-netty-transport-classes-epoll-4.1.111.Final.jar +- lib/io.netty-netty-transport-native-epoll-4.1.111.Final-linux-aarch_64.jar +- lib/io.netty-netty-transport-native-epoll-4.1.111.Final-linux-x86_64.jar +- lib/io.netty.incubator-netty-incubator-transport-native-io_uring-0.0.25.Final-linux-x86_64.jar +- lib/io.netty.incubator-netty-incubator-transport-native-io_uring-0.0.25.Final-linux-aarch_64.jar +- lib/io.netty-netty-transport-native-unix-common-4.1.111.Final.jar + The Netty Project ================= @@ -69,10 +72,14 @@ License for the specific language governing permissions and limitations under the License. ------------------------------------------------------------------------------------ -- lib/io.prometheus-simpleclient-0.0.21.jar -- lib/io.prometheus-simpleclient_common-0.0.21.jar -- lib/io.prometheus-simpleclient_hotspot-0.0.21.jar -- lib/io.prometheus-simpleclient_servlet-0.0.21.jar +- lib/io.prometheus-simpleclient-0.15.0.jar +- lib/io.prometheus-simpleclient_common-0.15.0.jar +- lib/io.prometheus-simpleclient_hotspot-0.15.0.jar +- lib/io.prometheus-simpleclient_servlet-0.15.0.jar +- lib/io.prometheus-simpleclient_servlet_common-0.15.0.jar +- lib/io.prometheus-simpleclient_tracer_common-0.15.0.jar +- lib/io.prometheus-simpleclient_tracer_otel-0.15.0.jar +- lib/io.prometheus-simpleclient_tracer_otel_agent-0.15.0.jar Prometheus instrumentation library for JVM applications Copyright 2012-2015 The Prometheus Authors @@ -86,12 +93,13 @@ SoundCloud Ltd. (http://soundcloud.com/). This product includes software developed as part of the Ocelli project by Netflix Inc. (https://github.com/Netflix/ocelli/). ------------------------------------------------------------------------------------ -- lib/org.eclipse.jetty-jetty-http-9.4.5.v20170502.jar -- lib/org.eclipse.jetty-jetty-io-9.4.5.v20170502.jar -- lib/org.eclipse.jetty-jetty-security-9.4.5.v20170502.jar -- lib/org.eclipse.jetty-jetty-server-9.4.5.v20170502.jar -- lib/org.eclipse.jetty-jetty-servlet-9.4.5.v20170502.jar -- lib/org.eclipse.jetty-jetty-util-9.4.5.v20170502.jar +- lib/org.eclipse.jetty-jetty-http-9.4.53.v20231009.jar +- lib/org.eclipse.jetty-jetty-io-9.4.53.v20231009.jar +- lib/org.eclipse.jetty-jetty-security-9.4.53.v20231009.jar +- lib/org.eclipse.jetty-jetty-server-9.4.53.v20231009.jar +- lib/org.eclipse.jetty-jetty-servlet-9.4.53.v20231009.jar +- lib/org.eclipse.jetty-jetty-util-9.4.53.v20231009.jar +- lib/org.eclipse.jetty-jetty-util-ajax-9.4.53.v20231009.jar ============================================================== Jetty Web Container @@ -113,7 +121,7 @@ Jetty is dual licensed under both Jetty may be distributed under either license. -lib/org.eclipse.jetty-jetty-util-9.4.5.v20170502.jar bundles UnixCrypt +lib/org.eclipse.jetty-jetty-util-9.4.53.v20231009.jar bundles UnixCrypt The UnixCrypt.java code implements the one way cryptography used by Unix systems for simple password protection. Copyright 1996 Aki Yoshida, @@ -122,8 +130,43 @@ Permission to use, copy, modify and distribute UnixCrypt for non-commercial or commercial purposes and without fee is granted provided that the copyright notice appears in all copies. ------------------------------------------------------------------------------------ -- lib/com.beust-jcommander-1.48.jar +- lib/com.beust-jcommander-1.82.jar Copyright 2010 Cedric Beust cedric@beust.com ------------------------------------------------------------------------------------ +- lib/io.grpc-grpc-all-1.64.0.jar +- lib/io.grpc-grpc-auth-1.64.0.jar +- lib/io.grpc-grpc-context-1.64.0.jar +- lib/io.grpc-grpc-core-1.64.0.jar +- lib/io.grpc-grpc-netty-shaded-1.64.0.jar +- lib/io.grpc-grpc-protobuf-1.64.0.jar +- lib/io.grpc-grpc-protobuf-lite-1.64.0.jar +- lib/io.grpc-grpc-stub-1.64.0.jar +- lib/io.grpc-grpc-testing-1.64.0.jar + +Copyright 2014, gRPC Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +This product contains a modified portion of 'Netty', an open source +networking library, which can be obtained at: + + * LICENSE: + * netty/third_party/netty/LICENSE.txt (Apache License 2.0) + * HOMEPAGE: + * https://netty.io + * LOCATION_IN_GRPC: + * netty/third_party/netty + +------------------------------------------------------------------------------------ diff --git a/bookkeeper-dist/src/main/resources/NOTICE-bkctl.bin.txt b/bookkeeper-dist/src/main/resources/NOTICE-bkctl.bin.txt new file mode 100644 index 00000000000..2a410c2a087 --- /dev/null +++ b/bookkeeper-dist/src/main/resources/NOTICE-bkctl.bin.txt @@ -0,0 +1,90 @@ +Apache BookKeeper +Copyright 2011-2020 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +------------------------------------------------------------------------------------ +- lib/io.netty-netty-buffer-4.1.111.Final.jar +- lib/io.netty-netty-codec-4.1.111.Final.jar +- lib/io.netty-netty-common-4.1.111.Final.jar +- lib/io.netty-netty-handler-4.1.111.Final.jar +- lib/io.netty-netty-resolver-4.1.111.Final.jar +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final.jar +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-linux-aarch_64.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-linux-x86_64.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-osx-aarch_64.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-osx-x86_64.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-windows-x86_64.jar [11] +- lib/io.netty-netty-tcnative-classes-2.0.65.Final.jar +- lib/io.netty-netty-transport-4.1.111.Final.jar +- lib/io.netty-netty-transport-classes-epoll-4.1.111.Final.jar +- lib/io.netty-netty-transport-native-epoll-4.1.111.Final-linux-aarch_64.jar +- lib/io.netty-netty-transport-native-epoll-4.1.111.Final-linux-x86_64.jar +- lib/io.netty.incubator-netty-incubator-transport-native-io_uring-0.0.25.Final-linux-x86_64.jar +- lib/io.netty.incubator-netty-incubator-transport-native-io_uring-0.0.25.Final-linux-aarch_64.jar +- lib/io.netty-netty-transport-native-unix-common-4.1.111.Final.jar + + + The Netty Project + ================= + +Please visit the Netty web site for more information: + + * http://netty.io/ + +Copyright 2014 The Netty Project + +The Netty Project licenses this file to you under the Apache License, +version 2.0 (the "License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +License for the specific language governing permissions and limitations +under the License. + +------------------------------------------------------------------------------------ +- lib/com.beust-jcommander-1.82.jar + +Copyright 2010 Cedric Beust cedric@beust.com + +------------------------------------------------------------------------------------ +- lib/io.grpc-grpc-all-1.64.0.jar +- lib/io.grpc-grpc-auth-1.64.0.jar +- lib/io.grpc-grpc-context-1.64.0.jar +- lib/io.grpc-grpc-core-1.64.0.jar +- lib/io.grpc-grpc-netty-shaded-1.64.0.jar +- lib/io.grpc-grpc-protobuf-1.64.0.jar +- lib/io.grpc-grpc-protobuf-lite-1.64.0.jar +- lib/io.grpc-grpc-stub-1.64.0.jar +- lib/io.grpc-grpc-testing-1.64.0.jar + +Copyright 2014, gRPC Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +This product contains a modified portion of 'Netty', an open source +networking library, which can be obtained at: + + * LICENSE: + * netty/third_party/netty/LICENSE.txt (Apache License 2.0) + * HOMEPAGE: + * https://netty.io + * LOCATION_IN_GRPC: + * netty/third_party/netty + +------------------------------------------------------------------------------------ diff --git a/bookkeeper-dist/src/main/resources/NOTICE-server.bin.txt b/bookkeeper-dist/src/main/resources/NOTICE-server.bin.txt index 1fd2f53ba3b..b9d15c993fd 100644 --- a/bookkeeper-dist/src/main/resources/NOTICE-server.bin.txt +++ b/bookkeeper-dist/src/main/resources/NOTICE-server.bin.txt @@ -1,11 +1,36 @@ Apache BookKeeper -Copyright 2011-2018 The Apache Software Foundation +Copyright 2011-2020 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). ------------------------------------------------------------------------------------ -- lib/io.netty-netty-all-4.1.22.Final.jar +- lib/io.netty-netty-buffer-4.1.111.Final.jar +- lib/io.netty-netty-codec-4.1.111.Final.jar +- lib/io.netty-netty-codec-dns-4.1.111.Final.jar +- lib/io.netty-netty-codec-http-4.1.111.Final.jar +- lib/io.netty-netty-codec-http2-4.1.111.Final.jar +- lib/io.netty-netty-codec-socks-4.1.111.Final.jar +- lib/io.netty-netty-common-4.1.111.Final.jar +- lib/io.netty-netty-handler-4.1.111.Final.jar +- lib/io.netty-netty-handler-proxy-4.1.111.Final.jar +- lib/io.netty-netty-resolver-4.1.111.Final.jar +- lib/io.netty-netty-resolver-dns-4.1.111.Final.jar +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final.jar +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-linux-aarch_64.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-linux-x86_64.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-osx-aarch_64.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-osx-x86_64.jar [11] +- lib/io.netty-netty-tcnative-boringssl-static-2.0.65.Final-windows-x86_64.jar [11] +- lib/io.netty-netty-tcnative-classes-2.0.65.Final.jar +- lib/io.netty-netty-transport-4.1.111.Final.jar +- lib/io.netty-netty-transport-classes-epoll-4.1.111.Final.jar +- lib/io.netty-netty-transport-native-epoll-4.1.111.Final-linux-aarch_64.jar +- lib/io.netty-netty-transport-native-epoll-4.1.111.Final-linux-x86_64.jar +- lib/io.netty.incubator-netty-incubator-transport-native-io_uring-0.0.25.Final-linux-x86_64.jar +- lib/io.netty.incubator-netty-incubator-transport-native-io_uring-0.0.25.Final-linux-aarch_64.jar +- lib/io.netty-netty-transport-native-unix-common-4.1.111.Final.jar + The Netty Project ================= @@ -29,10 +54,14 @@ License for the specific language governing permissions and limitations under the License. ------------------------------------------------------------------------------------ -- lib/io.prometheus-simpleclient-0.0.21.jar -- lib/io.prometheus-simpleclient_common-0.0.21.jar -- lib/io.prometheus-simpleclient_hotspot-0.0.21.jar -- lib/io.prometheus-simpleclient_servlet-0.0.21.jar +- lib/io.prometheus-simpleclient-0.15.0.jar +- lib/io.prometheus-simpleclient_common-0.15.0.jar +- lib/io.prometheus-simpleclient_hotspot-0.15.0.jar +- lib/io.prometheus-simpleclient_servlet-0.15.0.jar +- lib/io.prometheus-simpleclient_servlet_common-0.15.0.jar +- lib/io.prometheus-simpleclient_tracer_common-0.15.0.jar +- lib/io.prometheus-simpleclient_tracer_otel-0.15.0.jar +- lib/io.prometheus-simpleclient_tracer_otel_agent-0.15.0.jar Prometheus instrumentation library for JVM applications Copyright 2012-2015 The Prometheus Authors @@ -46,12 +75,13 @@ SoundCloud Ltd. (http://soundcloud.com/). This product includes software developed as part of the Ocelli project by Netflix Inc. (https://github.com/Netflix/ocelli/). ------------------------------------------------------------------------------------ -- lib/org.eclipse.jetty-jetty-http-9.4.5.v20170502.jar -- lib/org.eclipse.jetty-jetty-io-9.4.5.v20170502.jar -- lib/org.eclipse.jetty-jetty-security-9.4.5.v20170502.jar -- lib/org.eclipse.jetty-jetty-server-9.4.5.v20170502.jar -- lib/org.eclipse.jetty-jetty-servlet-9.4.5.v20170502.jar -- lib/org.eclipse.jetty-jetty-util-9.4.5.v20170502.jar +- lib/org.eclipse.jetty-jetty-http-9.4.53.v20231009.jar +- lib/org.eclipse.jetty-jetty-io-9.4.53.v20231009.jar +- lib/org.eclipse.jetty-jetty-security-9.4.53.v20231009.jar +- lib/org.eclipse.jetty-jetty-server-9.4.53.v20231009.jar +- lib/org.eclipse.jetty-jetty-servlet-9.4.53.v20231009.jar +- lib/org.eclipse.jetty-jetty-util-9.4.53.v20231009.jar +- lib/org.eclipse.jetty-jetty-util-ajax-9.4.53.v20231009.jar ============================================================== Jetty Web Container @@ -73,7 +103,7 @@ Jetty is dual licensed under both Jetty may be distributed under either license. -lib/org.eclipse.jetty-jetty-util-9.4.5.v20170502.jar bundles UnixCrypt +lib/org.eclipse.jetty-jetty-util-9.4.53.v20231009.jar bundles UnixCrypt The UnixCrypt.java code implements the one way cryptography used by Unix systems for simple password protection. Copyright 1996 Aki Yoshida, @@ -82,8 +112,43 @@ Permission to use, copy, modify and distribute UnixCrypt for non-commercial or commercial purposes and without fee is granted provided that the copyright notice appears in all copies. ------------------------------------------------------------------------------------ -- lib/com.beust-jcommander-1.48.jar +- lib/com.beust-jcommander-1.82.jar Copyright 2010 Cedric Beust cedric@beust.com ------------------------------------------------------------------------------------ +- lib/io.grpc-grpc-all-1.64.0.jar +- lib/io.grpc-grpc-auth-1.64.0.jar +- lib/io.grpc-grpc-context-1.64.0.jar +- lib/io.grpc-grpc-core-1.64.0.jar +- lib/io.grpc-grpc-netty-shaded-1.64.0.jar +- lib/io.grpc-grpc-protobuf-1.64.0.jar +- lib/io.grpc-grpc-protobuf-lite-1.64.0.jar +- lib/io.grpc-grpc-stub-1.64.0.jar +- lib/io.grpc-grpc-testing-1.64.0.jar + +Copyright 2014, gRPC Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +This product contains a modified portion of 'Netty', an open source +networking library, which can be obtained at: + + * LICENSE: + * netty/third_party/netty/LICENSE.txt (Apache License 2.0) + * HOMEPAGE: + * https://netty.io + * LOCATION_IN_GRPC: + * netty/third_party/netty + +------------------------------------------------------------------------------------ diff --git a/bookkeeper-dist/src/main/resources/deps/bouncycastle-1.0.2.3/LICENSE.html b/bookkeeper-dist/src/main/resources/deps/bouncycastle-1.0.2.3/LICENSE.html new file mode 100644 index 00000000000..a3acf7f069d --- /dev/null +++ b/bookkeeper-dist/src/main/resources/deps/bouncycastle-1.0.2.3/LICENSE.html @@ -0,0 +1,22 @@ + + + +Copyright (c) 2000-2019 The Legion of the Bouncy Castle Inc. (http://www.bouncycastle.org) +

+Permission is hereby granted, free of charge, to any person obtaining a copy of this software +and associated documentation files (the "Software"), to deal in the Software without restriction, +including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: +

+The above copyright notice and this permission notice shall be included in all copies or substantial +portions of the Software. +

+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + + \ No newline at end of file diff --git a/bookkeeper-dist/src/main/resources/deps/checker-qual-3.5.0/LICENSE b/bookkeeper-dist/src/main/resources/deps/checker-qual-3.5.0/LICENSE new file mode 100644 index 00000000000..7d677403f06 --- /dev/null +++ b/bookkeeper-dist/src/main/resources/deps/checker-qual-3.5.0/LICENSE @@ -0,0 +1,413 @@ +The Checker Framework +Copyright 2004-present by the Checker Framework developers + + +Most of the Checker Framework is licensed under the GNU General Public +License, version 2 (GPL2), with the classpath exception. The text of this +license appears below. This is the same license used for OpenJDK. + +A few parts of the Checker Framework have more permissive licenses. + + * The annotations are licensed under the MIT License. (The text of this + license appears below.) More specifically, all the parts of the Checker + Framework that you might want to include with your own program use the + MIT License. This is the checker-qual.jar file and all the files that + appear in it: every file in a qual/ directory, plus utility files such + as NullnessUtil.java, RegexUtil.java, SignednessUtil.java, etc. + In addition, the cleanroom implementations of third-party annotations, + which the Checker Framework recognizes as aliases for its own + annotations, are licensed under the MIT License. + +Some external libraries that are included with the Checker Framework have +different licenses. + + * javaparser is dual licensed under the LGPL or the Apache license -- you + may use it under whichever one you want. (The javaparser source code + contains a file with the text of the GPL, but it is not clear why, since + javaparser does not use the GPL.) See file stubparser/LICENSE + and the source code of all its files. + + * JUnit is licensed under the Common Public License v1.0 (see + http://www.junit.org/license), with parts (Hamcrest) licensed under the + BSD License (see http://hamcrest.org/JavaHamcrest/). + + * Libraries in plume-lib (https://github.com/plume-lib/) are licensed + under the MIT License. + +The Checker Framework includes annotations for the JDK in directory +checker/jdk/, and for some other libraries. Each annotated library uses +the same license as the unannotated version of the library. + +=========================================================================== + +The GNU General Public License (GPL) + +Version 2, June 1991 + +Copyright (C) 1989, 1991 Free Software Foundation, Inc. +59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Everyone is permitted to copy and distribute verbatim copies of this license +document, but changing it is not allowed. + +Preamble + +The licenses for most software are designed to take away your freedom to share +and change it. By contrast, the GNU General Public License is intended to +guarantee your freedom to share and change free software--to make sure the +software is free for all its users. This General Public License applies to +most of the Free Software Foundation's software and to any other program whose +authors commit to using it. (Some other Free Software Foundation software is +covered by the GNU Library General Public License instead.) You can apply it to +your programs, too. + +When we speak of free software, we are referring to freedom, not price. Our +General Public Licenses are designed to make sure that you have the freedom to +distribute copies of free software (and charge for this service if you wish), +that you receive source code or can get it if you want it, that you can change +the software or use pieces of it in new free programs; and that you know you +can do these things. + +To protect your rights, we need to make restrictions that forbid anyone to deny +you these rights or to ask you to surrender the rights. These restrictions +translate to certain responsibilities for you if you distribute copies of the +software, or if you modify it. + +For example, if you distribute copies of such a program, whether gratis or for +a fee, you must give the recipients all the rights that you have. You must +make sure that they, too, receive or can get the source code. And you must +show them these terms so they know their rights. + +We protect your rights with two steps: (1) copyright the software, and (2) +offer you this license which gives you legal permission to copy, distribute +and/or modify the software. + +Also, for each author's protection and ours, we want to make certain that +everyone understands that there is no warranty for this free software. If the +software is modified by someone else and passed on, we want its recipients to +know that what they have is not the original, so that any problems introduced +by others will not reflect on the original authors' reputations. + +Finally, any free program is threatened constantly by software patents. We +wish to avoid the danger that redistributors of a free program will +individually obtain patent licenses, in effect making the program proprietary. +To prevent this, we have made it clear that any patent must be licensed for +everyone's free use or not licensed at all. + +The precise terms and conditions for copying, distribution and modification +follow. + +TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + +0. This License applies to any program or other work which contains a notice +placed by the copyright holder saying it may be distributed under the terms of +this General Public License. The "Program", below, refers to any such program +or work, and a "work based on the Program" means either the Program or any +derivative work under copyright law: that is to say, a work containing the +Program or a portion of it, either verbatim or with modifications and/or +translated into another language. (Hereinafter, translation is included +without limitation in the term "modification".) Each licensee is addressed as +"you". + +Activities other than copying, distribution and modification are not covered by +this License; they are outside its scope. The act of running the Program is +not restricted, and the output from the Program is covered only if its contents +constitute a work based on the Program (independent of having been made by +running the Program). Whether that is true depends on what the Program does. + +1. You may copy and distribute verbatim copies of the Program's source code as +you receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice and +disclaimer of warranty; keep intact all the notices that refer to this License +and to the absence of any warranty; and give any other recipients of the +Program a copy of this License along with the Program. + +You may charge a fee for the physical act of transferring a copy, and you may +at your option offer warranty protection in exchange for a fee. + +2. You may modify your copy or copies of the Program or any portion of it, thus +forming a work based on the Program, and copy and distribute such modifications +or work under the terms of Section 1 above, provided that you also meet all of +these conditions: + + a) You must cause the modified files to carry prominent notices stating + that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in whole or + in part contains or is derived from the Program or any part thereof, to be + licensed as a whole at no charge to all third parties under the terms of + this License. + + c) If the modified program normally reads commands interactively when run, + you must cause it, when started running for such interactive use in the + most ordinary way, to print or display an announcement including an + appropriate copyright notice and a notice that there is no warranty (or + else, saying that you provide a warranty) and that users may redistribute + the program under these conditions, and telling the user how to view a copy + of this License. (Exception: if the Program itself is interactive but does + not normally print such an announcement, your work based on the Program is + not required to print an announcement.) + +These requirements apply to the modified work as a whole. If identifiable +sections of that work are not derived from the Program, and can be reasonably +considered independent and separate works in themselves, then this License, and +its terms, do not apply to those sections when you distribute them as separate +works. But when you distribute the same sections as part of a whole which is a +work based on the Program, the distribution of the whole must be on the terms +of this License, whose permissions for other licensees extend to the entire +whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest your +rights to work written entirely by you; rather, the intent is to exercise the +right to control the distribution of derivative or collective works based on +the Program. + +In addition, mere aggregation of another work not based on the Program with the +Program (or with a work based on the Program) on a volume of a storage or +distribution medium does not bring the other work under the scope of this +License. + +3. You may copy and distribute the Program (or a work based on it, under +Section 2) in object code or executable form under the terms of Sections 1 and +2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable source + code, which must be distributed under the terms of Sections 1 and 2 above + on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three years, to + give any third party, for a charge no more than your cost of physically + performing source distribution, a complete machine-readable copy of the + corresponding source code, to be distributed under the terms of Sections 1 + and 2 above on a medium customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer to + distribute corresponding source code. (This alternative is allowed only + for noncommercial distribution and only if you received the program in + object code or executable form with such an offer, in accord with + Subsection b above.) + +The source code for a work means the preferred form of the work for making +modifications to it. For an executable work, complete source code means all +the source code for all modules it contains, plus any associated interface +definition files, plus the scripts used to control compilation and installation +of the executable. However, as a special exception, the source code +distributed need not include anything that is normally distributed (in either +source or binary form) with the major components (compiler, kernel, and so on) +of the operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the source +code from the same place counts as distribution of the source code, even though +third parties are not compelled to copy the source along with the object code. + +4. You may not copy, modify, sublicense, or distribute the Program except as +expressly provided under this License. Any attempt otherwise to copy, modify, +sublicense or distribute the Program is void, and will automatically terminate +your rights under this License. However, parties who have received copies, or +rights, from you under this License will not have their licenses terminated so +long as such parties remain in full compliance. + +5. You are not required to accept this License, since you have not signed it. +However, nothing else grants you permission to modify or distribute the Program +or its derivative works. These actions are prohibited by law if you do not +accept this License. Therefore, by modifying or distributing the Program (or +any work based on the Program), you indicate your acceptance of this License to +do so, and all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + +6. Each time you redistribute the Program (or any work based on the Program), +the recipient automatically receives a license from the original licensor to +copy, distribute or modify the Program subject to these terms and conditions. +You may not impose any further restrictions on the recipients' exercise of the +rights granted herein. You are not responsible for enforcing compliance by +third parties to this License. + +7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), conditions +are imposed on you (whether by court order, agreement or otherwise) that +contradict the conditions of this License, they do not excuse you from the +conditions of this License. If you cannot distribute so as to satisfy +simultaneously your obligations under this License and any other pertinent +obligations, then as a consequence you may not distribute the Program at all. +For example, if a patent license would not permit royalty-free redistribution +of the Program by all those who receive copies directly or indirectly through +you, then the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply and +the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any patents or +other property right claims or to contest validity of any such claims; this +section has the sole purpose of protecting the integrity of the free software +distribution system, which is implemented by public license practices. Many +people have made generous contributions to the wide range of software +distributed through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing to +distribute software through any other system and a licensee cannot impose that +choice. + +This section is intended to make thoroughly clear what is believed to be a +consequence of the rest of this License. + +8. If the distribution and/or use of the Program is restricted in certain +countries either by patents or by copyrighted interfaces, the original +copyright holder who places the Program under this License may add an explicit +geographical distribution limitation excluding those countries, so that +distribution is permitted only in or among countries not thus excluded. In +such case, this License incorporates the limitation as if written in the body +of this License. + +9. The Free Software Foundation may publish revised and/or new versions of the +General Public License from time to time. Such new versions will be similar in +spirit to the present version, but may differ in detail to address new problems +or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any later +version", you have the option of following the terms and conditions either of +that version or of any later version published by the Free Software Foundation. +If the Program does not specify a version number of this License, you may +choose any version ever published by the Free Software Foundation. + +10. If you wish to incorporate parts of the Program into other free programs +whose distribution conditions are different, write to the author to ask for +permission. For software which is copyrighted by the Free Software Foundation, +write to the Free Software Foundation; we sometimes make exceptions for this. +Our decision will be guided by the two goals of preserving the free status of +all derivatives of our free software and of promoting the sharing and reuse of +software generally. + +NO WARRANTY + +11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR +THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE +STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE +PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND +PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, +YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL +ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE +PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR +INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA +BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER +OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +END OF TERMS AND CONDITIONS + +How to Apply These Terms to Your New Programs + +If you develop a new program, and you want it to be of the greatest possible +use to the public, the best way to achieve this is to make it free software +which everyone can redistribute and change under these terms. + +To do so, attach the following notices to the program. It is safest to attach +them to the start of each source file to most effectively convey the exclusion +of warranty; and each file should have at least the "copyright" line and a +pointer to where the full notice is found. + + One line to give the program's name and a brief idea of what it does. + + Copyright (C) + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., 59 + Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this when it +starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author Gnomovision comes + with ABSOLUTELY NO WARRANTY; for details type 'show w'. This is free + software, and you are welcome to redistribute it under certain conditions; + type 'show c' for details. + +The hypothetical commands 'show w' and 'show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may be +called something other than 'show w' and 'show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your school, +if any, to sign a "copyright disclaimer" for the program, if necessary. Here +is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + 'Gnomovision' (which makes passes at compilers) written by James Hacker. + + signature of Ty Coon, 1 April 1989 + + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General Public +License instead of this License. + + +"CLASSPATH" EXCEPTION TO THE GPL + +Certain source files distributed by Oracle America and/or its affiliates are +subject to the following clarification and special exception to the GPL, but +only where Oracle has expressly included in the particular source file's header +the words "Oracle designates this particular file as subject to the "Classpath" +exception as provided by Oracle in the LICENSE file that accompanied this code." + + Linking this library statically or dynamically with other modules is making + a combined work based on this library. Thus, the terms and conditions of + the GNU General Public License cover the whole combination. + + As a special exception, the copyright holders of this library give you + permission to link this library with independent modules to produce an + executable, regardless of the license terms of these independent modules, + and to copy and distribute the resulting executable under terms of your + choice, provided that you also meet, for each linked independent module, + the terms and conditions of the license of that module. An independent + module is a module which is not derived from or based on this library. If + you modify this library, you may extend this exception to your version of + the library, but you are not obligated to do so. If you do not wish to do + so, delete this exception statement from your version. + +=========================================================================== + +MIT License: + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +=========================================================================== diff --git a/bookkeeper-dist/src/main/resources/deps/google-auth-library-credentials-0.9.0/LICENSE b/bookkeeper-dist/src/main/resources/deps/google-auth-library-credentials-0.20.0/LICENSE similarity index 100% rename from bookkeeper-dist/src/main/resources/deps/google-auth-library-credentials-0.9.0/LICENSE rename to bookkeeper-dist/src/main/resources/deps/google-auth-library-credentials-0.20.0/LICENSE diff --git a/bookkeeper-dist/src/main/resources/deps/javax.servlet-api-3.1.0/CDDL+GPL-1.1 b/bookkeeper-dist/src/main/resources/deps/javax.servlet-api-4.0.0/CDDL+GPL-1.1 similarity index 100% rename from bookkeeper-dist/src/main/resources/deps/javax.servlet-api-3.1.0/CDDL+GPL-1.1 rename to bookkeeper-dist/src/main/resources/deps/javax.servlet-api-4.0.0/CDDL+GPL-1.1 diff --git a/bookkeeper-dist/src/main/resources/deps/jline-0.9.94/LICENSE b/bookkeeper-dist/src/main/resources/deps/jline-0.9.94/LICENSE deleted file mode 100644 index 246f54f7365..00000000000 --- a/bookkeeper-dist/src/main/resources/deps/jline-0.9.94/LICENSE +++ /dev/null @@ -1,32 +0,0 @@ -Copyright (c) 2002-2006, Marc Prud'hommeaux -All rights reserved. - -Redistribution and use in source and binary forms, with or -without modification, are permitted provided that the following -conditions are met: - -Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with -the distribution. - -Neither the name of JLine nor the names of its contributors -may be used to endorse or promote products derived from this -software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, -BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY -AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO -EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, -OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/bookkeeper-dist/src/main/resources/deps/netty-3.10.1.Final/LICENSE.webbit.txt b/bookkeeper-dist/src/main/resources/deps/netty-3.10.1.Final/LICENSE.webbit.txt deleted file mode 100644 index ec5f348998b..00000000000 --- a/bookkeeper-dist/src/main/resources/deps/netty-3.10.1.Final/LICENSE.webbit.txt +++ /dev/null @@ -1,38 +0,0 @@ -(BSD License: http://www.opensource.org/licenses/bsd-license) - -Copyright (c) 2011, Joe Walnes, Aslak Hellesøy and contributors -All rights reserved. - -Redistribution and use in source and binary forms, with or -without modification, are permitted provided that the -following conditions are met: - -* Redistributions of source code must retain the above - copyright notice, this list of conditions and the - following disclaimer. - -* Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the - following disclaimer in the documentation and/or other - materials provided with the distribution. - -* Neither the name of the Webbit nor the names of - its contributors may be used to endorse or promote products - derived from this software without specific prior written - permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND -CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, -INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE -GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT -OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - diff --git a/bookkeeper-dist/src/main/resources/deps/netty-4.1.22.Final/LICENSE.base64.txt b/bookkeeper-dist/src/main/resources/deps/netty-4.1.22.Final/LICENSE.base64.txt deleted file mode 100644 index 31ebc840539..00000000000 --- a/bookkeeper-dist/src/main/resources/deps/netty-4.1.22.Final/LICENSE.base64.txt +++ /dev/null @@ -1,26 +0,0 @@ -The person or persons who have associated work with this document (the -"Dedicator" or "Certifier") hereby either (a) certifies that, to the best of -his knowledge, the work of authorship identified is in the public domain of -the country from which the work is published, or (b) hereby dedicates whatever -copyright the dedicators holds in the work of authorship identified below (the -"Work") to the public domain. A certifier, moreover, dedicates any copyright -interest he may have in the associated work, and for these purposes, is -described as a "dedicator" below. - -A certifier has taken reasonable steps to verify the copyright status of this -work. Certifier recognizes that his good faith efforts may not shield him from -liability if in fact the work certified is not in the public domain. - -Dedicator makes this dedication for the benefit of the public at large and to -the detriment of the Dedicator's heirs and successors. Dedicator intends this -dedication to be an overt act of relinquishment in perpetuate of all present -and future rights under copyright law, whether vested or contingent, in the -Work. Dedicator understands that such relinquishment of all rights includes -the relinquishment of all rights to enforce (by lawsuit or otherwise) those -copyrights in the Work. - -Dedicator recognizes that, once placed in the public domain, the Work may be -freely reproduced, distributed, transmitted, used, modified, built upon, or -otherwise exploited by anyone for any purpose, commercial or non-commercial, -and in any way, including by methods that have not yet been invented or -conceived. diff --git a/bookkeeper-dist/src/main/resources/deps/netty-4.1.22.Final/LICENSE.jsr166y.txt b/bookkeeper-dist/src/main/resources/deps/netty-4.1.22.Final/LICENSE.jsr166y.txt deleted file mode 100644 index b1c292b54cb..00000000000 --- a/bookkeeper-dist/src/main/resources/deps/netty-4.1.22.Final/LICENSE.jsr166y.txt +++ /dev/null @@ -1,26 +0,0 @@ -The person or persons who have associated work with this document (the -"Dedicator" or "Certifier") hereby either (a) certifies that, to the best of -his knowledge, the work of authorship identified is in the public domain of -the country from which the work is published, or (b) hereby dedicates whatever -copyright the dedicators holds in the work of authorship identified below (the -"Work") to the public domain. A certifier, moreover, dedicates any copyright -interest he may have in the associated work, and for these purposes, is -described as a "dedicator" below. - -A certifier has taken reasonable steps to verify the copyright status of this -work. Certifier recognizes that his good faith efforts may not shield him from -liability if in fact the work certified is not in the public domain. - -Dedicator makes this dedication for the benefit of the public at large and to -the detriment of the Dedicator's heirs and successors. Dedicator intends this -dedication to be an overt act of relinquishment in perpetuity of all present -and future rights under copyright law, whether vested or contingent, in the -Work. Dedicator understands that such relinquishment of all rights includes -the relinquishment of all rights to enforce (by lawsuit or otherwise) those -copyrights in the Work. - -Dedicator recognizes that, once placed in the public domain, the Work may be -freely reproduced, distributed, transmitted, used, modified, built upon, or -otherwise exploited by anyone for any purpose, commercial or non-commercial, -and in any way, including by methods that have not yet been invented or -conceived. diff --git a/bookkeeper-dist/src/main/resources/deps/netty-4.1.22.Final/LICENSE.webbit.txt b/bookkeeper-dist/src/main/resources/deps/netty-4.1.22.Final/LICENSE.webbit.txt deleted file mode 100644 index 05ae225fa31..00000000000 --- a/bookkeeper-dist/src/main/resources/deps/netty-4.1.22.Final/LICENSE.webbit.txt +++ /dev/null @@ -1,37 +0,0 @@ -(BSD License: http://www.opensource.org/licenses/bsd-license) - -Copyright (c) 2011, Joe Walnes, Aslak Hellesøy and contributors -All rights reserved. - -Redistribution and use in source and binary forms, with or -without modification, are permitted provided that the -following conditions are met: - -* Redistributions of source code must retain the above - copyright notice, this list of conditions and the - following disclaimer. - -* Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the - following disclaimer in the documentation and/or other - materials provided with the distribution. - -* Neither the name of the Webbit nor the names of - its contributors may be used to endorse or promote products - derived from this software without specific prior written - permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND -CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, -INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE -GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT -OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. diff --git a/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.aalto-xml.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.aalto-xml.txt new file mode 100644 index 00000000000..1fd3ae4f886 --- /dev/null +++ b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.aalto-xml.txt @@ -0,0 +1,13 @@ +This copy of Aalto XML processor is licensed under the +Apache (Software) License, version 2.0 ("the License"). +See the License for details about distribution rights, and the +specific rights regarding derivate works. + +You may obtain a copy of the License at: + +https://www.apache.org/licenses/ + +A copy is also included with both the the downloadable source code package +and jar that contains class bytecodes, as file "ASL 2.0". In both cases, +that file should be located next to this file: in source distribution +the location should be "release-notes/asl"; and in jar "META-INF/" \ No newline at end of file diff --git a/bookkeeper-dist/src/main/resources/deps/netty-3.10.1.Final/LICENSE.base64.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.base64.txt similarity index 100% rename from bookkeeper-dist/src/main/resources/deps/netty-3.10.1.Final/LICENSE.base64.txt rename to bookkeeper-dist/src/main/resources/deps/netty/LICENSE.base64.txt diff --git a/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.bouncycastle.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.bouncycastle.txt new file mode 100644 index 00000000000..aa21043dc10 --- /dev/null +++ b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.bouncycastle.txt @@ -0,0 +1,23 @@ +The MIT License (MIT) + +Copyright (c) 2000 - 2013 The Legion of the Bouncy Castle Inc. + (https://www.bouncycastle.org) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + diff --git a/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.caliper.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.caliper.txt new file mode 100644 index 00000000000..62589edd12a --- /dev/null +++ b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.caliper.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.commons-lang.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.commons-lang.txt new file mode 100644 index 00000000000..92a8d0d2e2e --- /dev/null +++ b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.commons-lang.txt @@ -0,0 +1,177 @@ + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + diff --git a/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.commons-logging.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.commons-logging.txt new file mode 100644 index 00000000000..92a8d0d2e2e --- /dev/null +++ b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.commons-logging.txt @@ -0,0 +1,177 @@ + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + diff --git a/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.compress-lzf.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.compress-lzf.txt new file mode 100644 index 00000000000..d3dfbd0e523 --- /dev/null +++ b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.compress-lzf.txt @@ -0,0 +1,11 @@ +Copyright 2009-2010 Ning, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); you may not +use this file except in compliance with the License. You may obtain a copy of +the License at https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS,WITHOUT +WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +License for the specific language governing permissions and limitations under +the License. diff --git a/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.dnsinfo.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.dnsinfo.txt new file mode 100644 index 00000000000..29dd192fcfb --- /dev/null +++ b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.dnsinfo.txt @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2004-2006, 2008, 2009, 2011 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * https://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ diff --git a/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.harmony.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.harmony.txt new file mode 100644 index 00000000000..92a8d0d2e2e --- /dev/null +++ b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.harmony.txt @@ -0,0 +1,177 @@ + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + diff --git a/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.hpack.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.hpack.txt new file mode 100644 index 00000000000..9b259bdfcf9 --- /dev/null +++ b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.hpack.txt @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.hyper-hpack.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.hyper-hpack.txt new file mode 100644 index 00000000000..d24c351e188 --- /dev/null +++ b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.hyper-hpack.txt @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2014 Cory Benfield + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.jboss-marshalling.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.jboss-marshalling.txt new file mode 100644 index 00000000000..a4546c5c306 --- /dev/null +++ b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.jboss-marshalling.txt @@ -0,0 +1,178 @@ + + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + diff --git a/bookkeeper-dist/src/main/resources/deps/netty-4.1.22.Final/LICENSE.jbzip2.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.jbzip2.txt similarity index 100% rename from bookkeeper-dist/src/main/resources/deps/netty-4.1.22.Final/LICENSE.jbzip2.txt rename to bookkeeper-dist/src/main/resources/deps/netty/LICENSE.jbzip2.txt diff --git a/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.jctools.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.jctools.txt new file mode 100644 index 00000000000..92a8d0d2e2e --- /dev/null +++ b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.jctools.txt @@ -0,0 +1,177 @@ + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + diff --git a/bookkeeper-dist/src/main/resources/deps/netty-4.1.22.Final/LICENSE.jfastlz.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.jfastlz.txt similarity index 100% rename from bookkeeper-dist/src/main/resources/deps/netty-4.1.22.Final/LICENSE.jfastlz.txt rename to bookkeeper-dist/src/main/resources/deps/netty/LICENSE.jfastlz.txt diff --git a/bookkeeper-dist/src/main/resources/deps/netty-3.10.1.Final/LICENSE.jsr166y.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.jsr166y.txt similarity index 100% rename from bookkeeper-dist/src/main/resources/deps/netty-3.10.1.Final/LICENSE.jsr166y.txt rename to bookkeeper-dist/src/main/resources/deps/netty/LICENSE.jsr166y.txt diff --git a/bookkeeper-dist/src/main/resources/deps/netty-3.10.1.Final/LICENSE.jzlib.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.jzlib.txt similarity index 100% rename from bookkeeper-dist/src/main/resources/deps/netty-3.10.1.Final/LICENSE.jzlib.txt rename to bookkeeper-dist/src/main/resources/deps/netty/LICENSE.jzlib.txt diff --git a/bookkeeper-dist/src/main/resources/deps/netty-4.1.22.Final/LICENSE.libdivsufsort.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.libdivsufsort.txt similarity index 100% rename from bookkeeper-dist/src/main/resources/deps/netty-4.1.22.Final/LICENSE.libdivsufsort.txt rename to bookkeeper-dist/src/main/resources/deps/netty/LICENSE.libdivsufsort.txt diff --git a/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.log4j.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.log4j.txt new file mode 100644 index 00000000000..92a8d0d2e2e --- /dev/null +++ b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.log4j.txt @@ -0,0 +1,177 @@ + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + diff --git a/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.lz4.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.lz4.txt new file mode 100644 index 00000000000..62589edd12a --- /dev/null +++ b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.lz4.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.lzma-java.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.lzma-java.txt new file mode 100644 index 00000000000..62589edd12a --- /dev/null +++ b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.lzma-java.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.mvn-wrapper.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.mvn-wrapper.txt new file mode 100644 index 00000000000..62589edd12a --- /dev/null +++ b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.mvn-wrapper.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.nghttp2-hpack.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.nghttp2-hpack.txt new file mode 100644 index 00000000000..80201792ec7 --- /dev/null +++ b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.nghttp2-hpack.txt @@ -0,0 +1,23 @@ +The MIT License + +Copyright (c) 2012, 2014, 2015, 2016 Tatsuhiro Tsujikawa +Copyright (c) 2012, 2014, 2015, 2016 nghttp2 contributors + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/bookkeeper-dist/src/main/resources/deps/netty-4.1.22.Final/LICENSE.protobuf.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.protobuf.txt similarity index 100% rename from bookkeeper-dist/src/main/resources/deps/netty-4.1.22.Final/LICENSE.protobuf.txt rename to bookkeeper-dist/src/main/resources/deps/netty/LICENSE.protobuf.txt diff --git a/bookkeeper-dist/src/main/resources/deps/netty-4.1.22.Final/LICENSE.slf4j.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.slf4j.txt similarity index 100% rename from bookkeeper-dist/src/main/resources/deps/netty-4.1.22.Final/LICENSE.slf4j.txt rename to bookkeeper-dist/src/main/resources/deps/netty/LICENSE.slf4j.txt diff --git a/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.snappy.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.snappy.txt new file mode 100644 index 00000000000..ba6a3ae553a --- /dev/null +++ b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.snappy.txt @@ -0,0 +1,28 @@ +Copyright 2011, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. +* Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.webbit.txt b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.webbit.txt new file mode 100644 index 00000000000..47a75441060 --- /dev/null +++ b/bookkeeper-dist/src/main/resources/deps/netty/LICENSE.webbit.txt @@ -0,0 +1,37 @@ +(BSD License: https://www.opensource.org/licenses/bsd-license) + +Copyright (c) 2011, Joe Walnes, Aslak Hellesøy and contributors +All rights reserved. + +Redistribution and use in source and binary forms, with or +without modification, are permitted provided that the +following conditions are met: + +* Redistributions of source code must retain the above + copyright notice, this list of conditions and the + following disclaimer. + +* Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the + following disclaimer in the documentation and/or other + materials provided with the distribution. + +* Neither the name of the Webbit nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND +CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/bookkeeper-dist/src/main/resources/deps/netty/NOTICE.harmony.txt b/bookkeeper-dist/src/main/resources/deps/netty/NOTICE.harmony.txt new file mode 100644 index 00000000000..c37ec28276e --- /dev/null +++ b/bookkeeper-dist/src/main/resources/deps/netty/NOTICE.harmony.txt @@ -0,0 +1,6 @@ +Apache Harmony + +Copyright 2006, 2010 The Apache Software Foundation. + +This product includes software developed at +The Apache Software Foundation (https://www.apache.org/). diff --git a/bookkeeper-dist/src/main/resources/deps/paranamer-2.8/LICENSE.txt b/bookkeeper-dist/src/main/resources/deps/paranamer-2.8/LICENSE.txt deleted file mode 100644 index fca18473ba0..00000000000 --- a/bookkeeper-dist/src/main/resources/deps/paranamer-2.8/LICENSE.txt +++ /dev/null @@ -1,28 +0,0 @@ -[ ParaNamer used to be 'Pubic Domain', but since it includes a small piece of ASM it is now the same license as that: BSD ] - - Copyright (c) 2006 Paul Hammant & ThoughtWorks Inc - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. Neither the name of the copyright holders nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/bookkeeper-dist/src/main/resources/deps/protobuf-3.0.0/LICENSE b/bookkeeper-dist/src/main/resources/deps/protobuf-3.12.0/LICENSE similarity index 100% rename from bookkeeper-dist/src/main/resources/deps/protobuf-3.0.0/LICENSE rename to bookkeeper-dist/src/main/resources/deps/protobuf-3.12.0/LICENSE diff --git a/bookkeeper-dist/src/main/resources/deps/protobuf-3.5.1/LICENSE b/bookkeeper-dist/src/main/resources/deps/protobuf-3.14.0/LICENSE similarity index 100% rename from bookkeeper-dist/src/main/resources/deps/protobuf-3.5.1/LICENSE rename to bookkeeper-dist/src/main/resources/deps/protobuf-3.14.0/LICENSE diff --git a/bookkeeper-dist/src/main/resources/deps/reactivestreams-1.0.3/LICENSE b/bookkeeper-dist/src/main/resources/deps/reactivestreams-1.0.3/LICENSE new file mode 100644 index 00000000000..eadae05fc84 --- /dev/null +++ b/bookkeeper-dist/src/main/resources/deps/reactivestreams-1.0.3/LICENSE @@ -0,0 +1,8 @@ +Licensed under Public Domain (CC0) + +To the extent possible under law, the person who associated CC0 with +this code has waived all copyright and related or neighboring +rights to this code. + +You should have received a copy of the CC0 legalcode along with this +work. If not, see . \ No newline at end of file diff --git a/bookkeeper-dist/src/main/resources/deps/scala-library-2.11.7/LICENSE.md b/bookkeeper-dist/src/main/resources/deps/scala-library-2.11.7/LICENSE.md deleted file mode 100644 index 2a6034cc548..00000000000 --- a/bookkeeper-dist/src/main/resources/deps/scala-library-2.11.7/LICENSE.md +++ /dev/null @@ -1,34 +0,0 @@ -Scala is licensed under the [BSD 3-Clause License](http://opensource.org/licenses/BSD-3-Clause). - -## Scala License - -Copyright (c) 2002-2013 EPFL - -Copyright (c) 2011-2013 Typesafe, Inc. - -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of the EPFL nor the names of its contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - diff --git a/bookkeeper-dist/src/main/resources/deps/scala-parser-combinators_2.11-1.0.4/LICENSE.md b/bookkeeper-dist/src/main/resources/deps/scala-parser-combinators_2.11-1.0.4/LICENSE.md deleted file mode 100644 index c9f2ca8a789..00000000000 --- a/bookkeeper-dist/src/main/resources/deps/scala-parser-combinators_2.11-1.0.4/LICENSE.md +++ /dev/null @@ -1,28 +0,0 @@ -Copyright (c) 2002-2013 EPFL -Copyright (c) 2011-2013 Typesafe, Inc. - -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of the EPFL nor the names of its contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/bookkeeper-dist/src/main/resources/deps/scala-reflect-2.11.8/LICENSE.md b/bookkeeper-dist/src/main/resources/deps/scala-reflect-2.11.8/LICENSE.md deleted file mode 100644 index 68afe18b5d8..00000000000 --- a/bookkeeper-dist/src/main/resources/deps/scala-reflect-2.11.8/LICENSE.md +++ /dev/null @@ -1,33 +0,0 @@ -Scala is licensed under the [BSD 3-Clause License](http://opensource.org/licenses/BSD-3-Clause). - -## Scala License - -Copyright (c) 2002-2016 EPFL - -Copyright (c) 2011-2016 Lightbend, Inc. - -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of the EPFL nor the names of its contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/bookkeeper-dist/src/main/resources/deps/slf4j-1.7.25/LICENSE.txt b/bookkeeper-dist/src/main/resources/deps/slf4j-2.0.12/LICENSE.txt similarity index 100% rename from bookkeeper-dist/src/main/resources/deps/slf4j-1.7.25/LICENSE.txt rename to bookkeeper-dist/src/main/resources/deps/slf4j-2.0.12/LICENSE.txt diff --git a/bookkeeper-http/http-server/pom.xml b/bookkeeper-http/http-server/pom.xml index ec99dfd954c..c05953ddd78 100644 --- a/bookkeeper-http/http-server/pom.xml +++ b/bookkeeper-http/http-server/pom.xml @@ -19,7 +19,7 @@ bookkeeper org.apache.bookkeeper - 4.9.0-SNAPSHOT + 4.18.0-SNAPSHOT ../.. 4.0.0 @@ -28,10 +28,6 @@ Apache BookKeeper :: Http :: Http Server http://maven.apache.org - - commons-configuration - commons-configuration - com.fasterxml.jackson.core jackson-core diff --git a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/AbstractHttpHandlerFactory.java b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/AbstractHttpHandlerFactory.java index 480a47a244e..3fa2613fccc 100644 --- a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/AbstractHttpHandlerFactory.java +++ b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/AbstractHttpHandlerFactory.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/HttpRouter.java b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/HttpRouter.java index a856c720700..296201e9e8b 100644 --- a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/HttpRouter.java +++ b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/HttpRouter.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -45,7 +45,19 @@ public abstract class HttpRouter { public static final String LAST_LOG_MARK = "/api/v1/bookie/last_log_mark"; public static final String LIST_DISK_FILE = "/api/v1/bookie/list_disk_file"; public static final String EXPAND_STORAGE = "/api/v1/bookie/expand_storage"; + public static final String GC = "/api/v1/bookie/gc"; + public static final String GC_DETAILS = "/api/v1/bookie/gc_details"; + public static final String SUSPEND_GC_COMPACTION = "/api/v1/bookie/gc/suspend_compaction"; + public static final String RESUME_GC_COMPACTION = "/api/v1/bookie/gc/resume_compaction"; + public static final String BOOKIE_STATE = "/api/v1/bookie/state"; + public static final String BOOKIE_SANITY = "/api/v1/bookie/sanity"; + public static final String BOOKIE_STATE_READONLY = "/api/v1/bookie/state/readonly"; + public static final String BOOKIE_IS_READY = "/api/v1/bookie/is_ready"; + public static final String BOOKIE_INFO = "/api/v1/bookie/info"; + public static final String CLUSTER_INFO = "/api/v1/bookie/cluster_info"; + public static final String ENTRY_LOCATION_COMPACT = "/api/v1/bookie/entry_location_compact"; // autorecovery + public static final String AUTORECOVERY_STATUS = "/api/v1/autorecovery/status"; public static final String RECOVERY_BOOKIE = "/api/v1/autorecovery/bookie"; public static final String LIST_UNDER_REPLICATED_LEDGER = "/api/v1/autorecovery/list_under_replicated_ledger"; public static final String WHO_IS_AUDITOR = "/api/v1/autorecovery/who_is_auditor"; @@ -73,8 +85,25 @@ public HttpRouter(AbstractHttpHandlerFactory handlerFactory) { this.endpointHandlers.put(LAST_LOG_MARK, handlerFactory.newHandler(HttpServer.ApiType.LAST_LOG_MARK)); this.endpointHandlers.put(LIST_DISK_FILE, handlerFactory.newHandler(HttpServer.ApiType.LIST_DISK_FILE)); this.endpointHandlers.put(EXPAND_STORAGE, handlerFactory.newHandler(HttpServer.ApiType.EXPAND_STORAGE)); + this.endpointHandlers.put(GC, handlerFactory.newHandler(HttpServer.ApiType.GC)); + this.endpointHandlers.put(GC_DETAILS, handlerFactory.newHandler(HttpServer.ApiType.GC_DETAILS)); + this.endpointHandlers.put(BOOKIE_STATE, handlerFactory.newHandler(HttpServer.ApiType.BOOKIE_STATE)); + this.endpointHandlers.put(BOOKIE_SANITY, handlerFactory.newHandler(HttpServer.ApiType.BOOKIE_SANITY)); + this.endpointHandlers.put(BOOKIE_STATE_READONLY, + handlerFactory.newHandler(HttpServer.ApiType.BOOKIE_STATE_READONLY)); + this.endpointHandlers.put(BOOKIE_IS_READY, handlerFactory.newHandler(HttpServer.ApiType.BOOKIE_IS_READY)); + this.endpointHandlers.put(BOOKIE_INFO, handlerFactory.newHandler(HttpServer.ApiType.BOOKIE_INFO)); + this.endpointHandlers.put(CLUSTER_INFO, handlerFactory.newHandler(HttpServer.ApiType.CLUSTER_INFO)); + this.endpointHandlers.put(SUSPEND_GC_COMPACTION, + handlerFactory.newHandler(HttpServer.ApiType.SUSPEND_GC_COMPACTION)); + this.endpointHandlers.put(RESUME_GC_COMPACTION, + handlerFactory.newHandler(HttpServer.ApiType.RESUME_GC_COMPACTION)); + this.endpointHandlers.put(ENTRY_LOCATION_COMPACT, + handlerFactory.newHandler(HttpServer.ApiType.TRIGGER_ENTRY_LOCATION_COMPACT)); // autorecovery + this.endpointHandlers.put(AUTORECOVERY_STATUS, handlerFactory + .newHandler(HttpServer.ApiType.AUTORECOVERY_STATUS)); this.endpointHandlers.put(RECOVERY_BOOKIE, handlerFactory.newHandler(HttpServer.ApiType.RECOVERY_BOOKIE)); this.endpointHandlers.put(LIST_UNDER_REPLICATED_LEDGER, handlerFactory.newHandler(HttpServer.ApiType.LIST_UNDER_REPLICATED_LEDGER)); diff --git a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/HttpServer.java b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/HttpServer.java index 30e4d05ae0c..71d597d5ffa 100644 --- a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/HttpServer.java +++ b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/HttpServer.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -33,9 +33,12 @@ public interface HttpServer { enum StatusCode { OK(200), REDIRECT(302), + BAD_REQUEST(400), FORBIDDEN(403), NOT_FOUND(404), - INTERNAL_ERROR(500); + METHOD_NOT_ALLOWED(405), + INTERNAL_ERROR(500), + SERVICE_UNAVAILABLE(503); private int value; @@ -77,7 +80,19 @@ enum ApiType { LAST_LOG_MARK, LIST_DISK_FILE, EXPAND_STORAGE, + GC, + GC_DETAILS, + BOOKIE_STATE, + BOOKIE_SANITY, + BOOKIE_STATE_READONLY, + BOOKIE_IS_READY, + BOOKIE_INFO, + CLUSTER_INFO, + RESUME_GC_COMPACTION, + SUSPEND_GC_COMPACTION, + TRIGGER_ENTRY_LOCATION_COMPACT, // autorecovery + AUTORECOVERY_STATUS, RECOVERY_BOOKIE, LIST_UNDER_REPLICATED_LEDGER, WHO_IS_AUDITOR, @@ -96,6 +111,16 @@ enum ApiType { */ boolean startServer(int port); + /** + * Start the HTTP server on given port and host. + */ + boolean startServer(int port, String host); + + /** + * Start the HTTP server on given port and host. + */ + boolean startServer(int port, String host, HttpServerConfiguration httpServerConfiguration); + /** * Stop the HTTP server. */ diff --git a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/HttpServerConfiguration.java b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/HttpServerConfiguration.java new file mode 100644 index 00000000000..813ff2ce510 --- /dev/null +++ b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/HttpServerConfiguration.java @@ -0,0 +1,89 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.http; + +/** + * class to describe http server configuration. Like tls, etc. + */ +public class HttpServerConfiguration { + + private boolean tlsEnable; + + private String keyStorePath; + + private String keyStorePassword; + + private String trustStorePath; + + private String trustStorePassword; + + public HttpServerConfiguration() { + } + + public HttpServerConfiguration(boolean tlsEnable, String keyStorePath, String keyStorePassword, + String trustStorePath, String trustStorePassword) { + this.tlsEnable = tlsEnable; + this.keyStorePath = keyStorePath; + this.keyStorePassword = keyStorePassword; + this.trustStorePath = trustStorePath; + this.trustStorePassword = trustStorePassword; + } + + public boolean isTlsEnable() { + return tlsEnable; + } + + public void setTlsEnable(boolean tlsEnable) { + this.tlsEnable = tlsEnable; + } + + public String getKeyStorePath() { + return keyStorePath; + } + + public void setKeyStorePath(String keyStorePath) { + this.keyStorePath = keyStorePath; + } + + public String getKeyStorePassword() { + return keyStorePassword; + } + + public void setKeyStorePassword(String keyStorePassword) { + this.keyStorePassword = keyStorePassword; + } + + public String getTrustStorePath() { + return trustStorePath; + } + + public void setTrustStorePath(String trustStorePath) { + this.trustStorePath = trustStorePath; + } + + public String getTrustStorePassword() { + return trustStorePassword; + } + + public void setTrustStorePassword(String trustStorePassword) { + this.trustStorePassword = trustStorePassword; + } +} diff --git a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/HttpServerLoader.java b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/HttpServerLoader.java index 3235c3ae176..b0aa27c3e6e 100644 --- a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/HttpServerLoader.java +++ b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/HttpServerLoader.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -22,7 +22,6 @@ import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; - import org.apache.commons.configuration.Configuration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/HttpServiceProvider.java b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/HttpServiceProvider.java index 5d1ceca7b3c..33e56d05f68 100644 --- a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/HttpServiceProvider.java +++ b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/HttpServiceProvider.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/NullHttpServiceProvider.java b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/NullHttpServiceProvider.java index 4bff10e582f..02ffa1f3af9 100644 --- a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/NullHttpServiceProvider.java +++ b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/NullHttpServiceProvider.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/service/ErrorHttpService.java b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/service/ErrorHttpService.java index 30bdff7a912..4662f5c347f 100644 --- a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/service/ErrorHttpService.java +++ b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/service/ErrorHttpService.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/service/HeartbeatService.java b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/service/HeartbeatService.java index bb1349735d4..62560b1efc8 100644 --- a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/service/HeartbeatService.java +++ b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/service/HeartbeatService.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/service/HttpEndpointService.java b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/service/HttpEndpointService.java index 20f45c215b0..c75aedb376e 100644 --- a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/service/HttpEndpointService.java +++ b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/service/HttpEndpointService.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/service/HttpServiceRequest.java b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/service/HttpServiceRequest.java index cce7981efc9..da2e2486347 100644 --- a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/service/HttpServiceRequest.java +++ b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/service/HttpServiceRequest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -22,7 +22,6 @@ import java.util.HashMap; import java.util.Map; - import org.apache.bookkeeper.http.HttpServer; /** diff --git a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/service/HttpServiceResponse.java b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/service/HttpServiceResponse.java index 9224d1e6b92..75b2a6a7753 100644 --- a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/service/HttpServiceResponse.java +++ b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/service/HttpServiceResponse.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -28,6 +28,7 @@ */ public class HttpServiceResponse { private String body; + private String contentType; private HttpServer.StatusCode code = HttpServer.StatusCode.OK; public HttpServiceResponse() {} @@ -41,6 +42,10 @@ public String getBody() { return body; } + public String getContentType() { + return contentType; + } + public int getStatusCode() { return code.getValue(); } @@ -50,6 +55,11 @@ public HttpServiceResponse setBody(String body) { return this; } + public HttpServiceResponse setContentType(String contentType) { + this.contentType = contentType; + return this; + } + public HttpServiceResponse setCode(HttpServer.StatusCode code) { this.code = code; return this; diff --git a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/service/NullHttpService.java b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/service/NullHttpService.java index 2f93f34efd2..9fead69581e 100644 --- a/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/service/NullHttpService.java +++ b/bookkeeper-http/http-server/src/main/java/org/apache/bookkeeper/http/service/NullHttpService.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -30,6 +30,9 @@ public class NullHttpService implements HttpEndpointService { @Override public HttpServiceResponse handle(HttpServiceRequest request) { + if (request.getBody() != null) { + return new HttpServiceResponse(request.getBody(), HttpServer.StatusCode.OK); + } return new HttpServiceResponse(CONTENT, HttpServer.StatusCode.OK); } } diff --git a/bookkeeper-http/pom.xml b/bookkeeper-http/pom.xml index 0daa352ef0d..d026ebf0436 100644 --- a/bookkeeper-http/pom.xml +++ b/bookkeeper-http/pom.xml @@ -19,7 +19,7 @@ bookkeeper org.apache.bookkeeper - 4.9.0-SNAPSHOT + 4.18.0-SNAPSHOT 4.0.0 org.apache.bookkeeper.http @@ -28,7 +28,7 @@ Apache BookKeeper :: Http http-server - twitter-http-server vertx-http-server + servlet-http-server diff --git a/bookkeeper-http/servlet-http-server/pom.xml b/bookkeeper-http/servlet-http-server/pom.xml new file mode 100644 index 00000000000..44cecbd2453 --- /dev/null +++ b/bookkeeper-http/servlet-http-server/pom.xml @@ -0,0 +1,56 @@ + + + + + bookkeeper + org.apache.bookkeeper + 4.18.0-SNAPSHOT + ../.. + + 4.0.0 + org.apache.bookkeeper.http + servlet-http-server + Apache BookKeeper :: Bookkeeper Http :: Servlet Http Server + http://maven.apache.org + + + + org.apache.bookkeeper.http + http-server + ${project.version} + + + javax.servlet + javax.servlet-api + + + commons-io + commons-io + + + org.eclipse.jetty + jetty-server + test + + + org.eclipse.jetty + jetty-webapp + test + + + diff --git a/bookkeeper-http/servlet-http-server/src/main/java/org/apache/bookkeeper/http/servlet/BookieHttpServiceServlet.java b/bookkeeper-http/servlet-http-server/src/main/java/org/apache/bookkeeper/http/servlet/BookieHttpServiceServlet.java new file mode 100644 index 00000000000..30fe063786a --- /dev/null +++ b/bookkeeper-http/servlet-http-server/src/main/java/org/apache/bookkeeper/http/servlet/BookieHttpServiceServlet.java @@ -0,0 +1,135 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.http.servlet; + +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import java.io.IOException; +import java.io.Writer; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import org.apache.bookkeeper.http.AbstractHttpHandlerFactory; +import org.apache.bookkeeper.http.HttpRouter; +import org.apache.bookkeeper.http.HttpServer; +import org.apache.bookkeeper.http.HttpServer.ApiType; +import org.apache.bookkeeper.http.HttpServiceProvider; +import org.apache.bookkeeper.http.service.HttpEndpointService; +import org.apache.bookkeeper.http.service.HttpServiceRequest; +import org.apache.bookkeeper.http.service.HttpServiceResponse; +import org.apache.commons.io.IOUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Bookie http service servlet. + * + **/ +public class BookieHttpServiceServlet extends HttpServlet { + static final Logger LOG = LoggerFactory.getLogger(BookieHttpServiceServlet.class); + + // url to api + private final Map mappings = new ConcurrentHashMap<>(); + + public BookieHttpServiceServlet(){ + HttpRouter router = new HttpRouter( + new AbstractHttpHandlerFactory(BookieServletHttpServer.getBookieHttpServiceProvider()) { + @Override + public HttpServer.ApiType newHandler(HttpServer.ApiType apiType) { + return apiType; + } + }) { + @Override + public void bindHandler(String endpoint, HttpServer.ApiType mapping) { + mappings.put(endpoint, mapping); + } + }; + router.bindAll(); + } + + @Override + @SuppressFBWarnings("RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE") + protected void service(HttpServletRequest httpRequest, HttpServletResponse httpResponse) throws IOException { + HttpServiceRequest request = new HttpServiceRequest() + .setMethod(httpServerMethod(httpRequest)) + .setParams(httpServletParams(httpRequest)) + .setBody(IOUtils.toString(httpRequest.getInputStream(), "UTF-8")); + String uri = httpRequest.getRequestURI(); + HttpServiceResponse response; + try { + HttpServer.ApiType apiType = mappings.get(uri); + HttpServiceProvider bookie = BookieServletHttpServer.getBookieHttpServiceProvider(); + if (bookie == null) { + httpResponse.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + return; + } + HttpEndpointService httpEndpointService = bookie.provideHttpEndpointService(apiType); + if (httpEndpointService == null) { + httpResponse.sendError(HttpServletResponse.SC_NOT_FOUND); + return; + } + response = httpEndpointService.handle(request); + httpResponse.setStatus(response.getStatusCode()); + try (Writer out = httpResponse.getWriter()) { + out.write(response.getBody()); + } + } catch (Throwable e) { + LOG.error("Error while service Bookie API request {}", uri, e); + httpResponse.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + } + } + + + /** + * Convert http request parameters to a map. + */ + @SuppressWarnings("unchecked") + Map httpServletParams(HttpServletRequest request) { + Map map = new HashMap<>(); + for (Enumeration param = request.getParameterNames(); + param.hasMoreElements();) { + String pName = param.nextElement(); + map.put(pName, request.getParameter(pName)); + } + return map; + } + + /** + * Get servlet request method and convert to the method that can be recognized by HttpServer. + */ + HttpServer.Method httpServerMethod(HttpServletRequest request) { + switch (request.getMethod()) { + case "POST": + return HttpServer.Method.POST; + case "DELETE": + return HttpServer.Method.DELETE; + case "PUT": + return HttpServer.Method.PUT; + case "GET": + return HttpServer.Method.GET; + default: + throw new UnsupportedOperationException("Unsupported http method"); + } + } +} diff --git a/bookkeeper-http/servlet-http-server/src/main/java/org/apache/bookkeeper/http/servlet/BookieServletHttpServer.java b/bookkeeper-http/servlet-http-server/src/main/java/org/apache/bookkeeper/http/servlet/BookieServletHttpServer.java new file mode 100644 index 00000000000..5af0d83fe2d --- /dev/null +++ b/bookkeeper-http/servlet-http-server/src/main/java/org/apache/bookkeeper/http/servlet/BookieServletHttpServer.java @@ -0,0 +1,98 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.http.servlet; + +import org.apache.bookkeeper.http.HttpServer; +import org.apache.bookkeeper.http.HttpServerConfiguration; +import org.apache.bookkeeper.http.HttpServiceProvider; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Only use for hold Http service provider,not a fully implement bookie http service server. + **/ +public class BookieServletHttpServer implements HttpServer { + static final Logger LOG = LoggerFactory.getLogger(BookieServletHttpServer.class); + private static HttpServiceProvider bookieHttpServiceProvider; + private static int listenPort = -1; + private static String listenHost = "0.0.0.0"; + + + public static HttpServiceProvider getBookieHttpServiceProvider(){ + return bookieHttpServiceProvider; + } + /** + * Listen port. + **/ + public static int getListenPort(){ + return listenPort; + } + + public static String getListenHost() { + return listenHost; + } + + @Override + public void initialize(HttpServiceProvider httpServiceProvider) { + setHttpServiceProvider(httpServiceProvider); + LOG.info("Bookie HTTP Server initialized: {}", httpServiceProvider); + } + + public static synchronized void setHttpServiceProvider(HttpServiceProvider httpServiceProvider){ + bookieHttpServiceProvider = httpServiceProvider; + } + + public static synchronized void setPort(int port){ + listenPort = port; + } + + public static void setHost(String host) { + listenHost = host; + } + + @Override + public boolean startServer(int port) { + setPort(port); + return true; + } + + @Override + public boolean startServer(int port, String host) { + setPort(port); + setHost(host); + return true; + } + + @Override + public boolean startServer(int port, String host, HttpServerConfiguration httpServerConfiguration) { + return startServer(port, host); + } + + @Override + public void stopServer() { + + } + + @Override + public boolean isRunning() { + return true; + } +} diff --git a/bookkeeper-http/servlet-http-server/src/main/java/org/apache/bookkeeper/http/servlet/package-info.java b/bookkeeper-http/servlet-http-server/src/main/java/org/apache/bookkeeper/http/servlet/package-info.java new file mode 100644 index 00000000000..bdbb4f85974 --- /dev/null +++ b/bookkeeper-http/servlet-http-server/src/main/java/org/apache/bookkeeper/http/servlet/package-info.java @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +/** + * Package for Servlet based http server. + */ +package org.apache.bookkeeper.http.servlet; diff --git a/bookkeeper-http/servlet-http-server/src/test/java/org/apache/bookkeeper/http/servlet/JettyHttpServer.java b/bookkeeper-http/servlet-http-server/src/test/java/org/apache/bookkeeper/http/servlet/JettyHttpServer.java new file mode 100644 index 00000000000..5eb32a196e5 --- /dev/null +++ b/bookkeeper-http/servlet-http-server/src/test/java/org/apache/bookkeeper/http/servlet/JettyHttpServer.java @@ -0,0 +1,80 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.bookkeeper.http.servlet; + +import java.io.File; +import java.io.IOException; +import java.net.InetSocketAddress; +import java.nio.file.Files; +import java.util.List; +import javax.servlet.Servlet; +import org.eclipse.jetty.server.Server; +import org.eclipse.jetty.server.handler.ContextHandlerCollection; +import org.eclipse.jetty.servlet.ServletHolder; +import org.eclipse.jetty.webapp.WebAppContext; + +/** + * Jetty based http server. + **/ + +public class JettyHttpServer { + + private Server jettyServer; + private ContextHandlerCollection contexts; + + public JettyHttpServer(String host, int port){ + this.jettyServer = new Server(new InetSocketAddress(host, port)); + this.contexts = new ContextHandlerCollection(); + this.jettyServer.setHandler(contexts); + } + /** + * Add servlet. + **/ + public void addServlet(String webApp, String contextPath, String pathSpec, List servlets) throws IOException{ + if (servlets == null){ + return; + } + File bookieApi = new File(webApp); + if (!bookieApi.isDirectory()) { + Files.createDirectories(bookieApi.toPath()); + } + WebAppContext webAppBookie = new WebAppContext(bookieApi.getAbsolutePath(), contextPath); + for (Servlet s:servlets) { + webAppBookie.addServlet(new ServletHolder(s), pathSpec); + } + contexts.addHandler(webAppBookie); + } + + /** + * Start jetty server. + **/ + public void startServer() throws Exception{ + jettyServer.start(); + } + + /** + * Stop jetty server. + **/ + public void stopServer() throws Exception{ + jettyServer.stop(); + } +} diff --git a/bookkeeper-http/servlet-http-server/src/test/java/org/apache/bookkeeper/http/servlet/TestBookieHttpServiceServlet.java b/bookkeeper-http/servlet-http-server/src/test/java/org/apache/bookkeeper/http/servlet/TestBookieHttpServiceServlet.java new file mode 100644 index 00000000000..0f86cb91ee5 --- /dev/null +++ b/bookkeeper-http/servlet-http-server/src/test/java/org/apache/bookkeeper/http/servlet/TestBookieHttpServiceServlet.java @@ -0,0 +1,67 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.http.servlet; + +import static org.hamcrest.CoreMatchers.containsString; +import static org.junit.Assert.assertThat; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.List; +import javax.servlet.Servlet; +import org.apache.bookkeeper.http.NullHttpServiceProvider; +import org.apache.commons.io.IOUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +/** + * Test bookie http service servlet. + **/ +public class TestBookieHttpServiceServlet { + + private JettyHttpServer jettyHttpServer; + private String host = "localhost"; + private int port = 8080; + private BookieServletHttpServer bookieServletHttpServer; + @Before + public void setUp() throws Exception { + this.bookieServletHttpServer = new BookieServletHttpServer(); + this.bookieServletHttpServer.initialize(new NullHttpServiceProvider()); + this.jettyHttpServer = new JettyHttpServer(host, port); + List servlets = new ArrayList<>(); + servlets.add(new BookieHttpServiceServlet()); + jettyHttpServer.addServlet("web/bookie", "/", "/", servlets); + jettyHttpServer.startServer(); + } + + @Test + public void testBookieHeartBeat() throws URISyntaxException, IOException { + assertThat(IOUtils.toString(new URI(String.format("http://%s:%d/heartbeat", host, port)), "UTF-8"), + containsString("OK")); + } + + @After + public void stop() throws Exception{ + jettyHttpServer.stopServer(); + } +} diff --git a/bookkeeper-http/twitter-http-server/pom.xml b/bookkeeper-http/twitter-http-server/pom.xml deleted file mode 100644 index 0a214061aad..00000000000 --- a/bookkeeper-http/twitter-http-server/pom.xml +++ /dev/null @@ -1,41 +0,0 @@ - - - - - bookkeeper - org.apache.bookkeeper - 4.9.0-SNAPSHOT - ../.. - - 4.0.0 - org.apache.bookkeeper.http - twitter-http-server - Apache BookKeeper :: Bookkeeper Http :: Twitter Http Server - http://maven.apache.org - - - com.twitter - twitter-server_2.11 - - - org.apache.bookkeeper.http - http-server - ${project.version} - - - diff --git a/bookkeeper-http/twitter-http-server/src/main/java/org/apache/bookkeeper/http/twitter/TwitterAbstractHandler.java b/bookkeeper-http/twitter-http-server/src/main/java/org/apache/bookkeeper/http/twitter/TwitterAbstractHandler.java deleted file mode 100644 index 87e48dfca18..00000000000 --- a/bookkeeper-http/twitter-http-server/src/main/java/org/apache/bookkeeper/http/twitter/TwitterAbstractHandler.java +++ /dev/null @@ -1,90 +0,0 @@ -/** - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * - */ -package org.apache.bookkeeper.http.twitter; - -import com.twitter.finagle.Service; -import com.twitter.finagle.http.Request; -import com.twitter.finagle.http.Response; -import com.twitter.util.Future; - -import java.util.HashMap; -import java.util.Map; - -import org.apache.bookkeeper.http.HttpServer; -import org.apache.bookkeeper.http.service.ErrorHttpService; -import org.apache.bookkeeper.http.service.HttpEndpointService; -import org.apache.bookkeeper.http.service.HttpServiceRequest; -import org.apache.bookkeeper.http.service.HttpServiceResponse; - -/** - * Http handler for TwitterServer. - */ -public abstract class TwitterAbstractHandler extends Service { - - /** - * Process the request using the given httpEndpointService. - */ - Future processRequest(HttpEndpointService httpEndpointService, Request request) { - HttpServiceRequest httpServiceRequest = new HttpServiceRequest() - .setMethod(convertMethod(request)) - .setParams(convertParams(request)) - .setBody(request.contentString()); - HttpServiceResponse httpServiceResponse = null; - try { - httpServiceResponse = httpEndpointService.handle(httpServiceRequest); - } catch (Exception e) { - httpServiceResponse = new ErrorHttpService().handle(httpServiceRequest); - } - Response response = Response.apply(); - response.setContentString(httpServiceResponse.getBody()); - response.statusCode(httpServiceResponse.getStatusCode()); - return Future.value(response); - } - - /** - * Convert http request parameters to Map. - */ - @SuppressWarnings("unchecked") - Map convertParams(Request request) { - Map map = new HashMap<>(); - for (Map.Entry entry : request.getParams()) { - map.put(entry.getKey(), entry.getValue()); - } - return map; - } - - /** - * Convert http request method to the method that - * can be recognized by HttpServer. - */ - HttpServer.Method convertMethod(Request request) { - switch (request.method().name()) { - case "POST": - return HttpServer.Method.POST; - case "DELETE": - return HttpServer.Method.DELETE; - case "PUT": - return HttpServer.Method.PUT; - default: - return HttpServer.Method.GET; - } - } -} diff --git a/bookkeeper-http/twitter-http-server/src/main/java/org/apache/bookkeeper/http/twitter/TwitterHttpHandlerFactory.java b/bookkeeper-http/twitter-http-server/src/main/java/org/apache/bookkeeper/http/twitter/TwitterHttpHandlerFactory.java deleted file mode 100644 index 569235b0cf3..00000000000 --- a/bookkeeper-http/twitter-http-server/src/main/java/org/apache/bookkeeper/http/twitter/TwitterHttpHandlerFactory.java +++ /dev/null @@ -1,52 +0,0 @@ -/** - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * - */ -package org.apache.bookkeeper.http.twitter; - -import com.twitter.finagle.http.Request; -import com.twitter.finagle.http.Response; -import com.twitter.util.Future; -import org.apache.bookkeeper.http.AbstractHttpHandlerFactory; -import org.apache.bookkeeper.http.HttpServer; -import org.apache.bookkeeper.http.HttpServiceProvider; -import org.apache.bookkeeper.http.service.HttpEndpointService; - - -/** - * Factory which provide http handlers for TwitterServer based Http Server. - */ -public class TwitterHttpHandlerFactory extends AbstractHttpHandlerFactory { - - public TwitterHttpHandlerFactory(HttpServiceProvider httpServiceProvider) { - super(httpServiceProvider); - } - - - @Override - public TwitterAbstractHandler newHandler(HttpServer.ApiType type) { - return new TwitterAbstractHandler() { - @Override - public Future apply(Request request) { - HttpEndpointService service = getHttpServiceProvider().provideHttpEndpointService(type); - return processRequest(service, request); - } - }; - } -} diff --git a/bookkeeper-http/twitter-http-server/src/main/java/org/apache/bookkeeper/http/twitter/TwitterHttpServer.java b/bookkeeper-http/twitter-http-server/src/main/java/org/apache/bookkeeper/http/twitter/TwitterHttpServer.java deleted file mode 100644 index ad42f469ba0..00000000000 --- a/bookkeeper-http/twitter-http-server/src/main/java/org/apache/bookkeeper/http/twitter/TwitterHttpServer.java +++ /dev/null @@ -1,108 +0,0 @@ -/** - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * - */ - -package org.apache.bookkeeper.http.twitter; - -import com.twitter.finagle.Http; -import com.twitter.finagle.ListeningServer; -import com.twitter.finagle.http.HttpMuxer; -import com.twitter.finagle.http.HttpMuxer$; -import com.twitter.server.AbstractTwitterServer; - -import java.io.IOException; -import java.net.InetSocketAddress; - -import org.apache.bookkeeper.http.HttpRouter; -import org.apache.bookkeeper.http.HttpServer; -import org.apache.bookkeeper.http.HttpServiceProvider; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * TwitterServer implementation of Http Server. - */ -public class TwitterHttpServer extends AbstractTwitterServer implements HttpServer { - - static final Logger LOG = LoggerFactory.getLogger(TwitterHttpServer.class); - - private ListeningServer server; - private boolean isRunning; - private int port; - private HttpServiceProvider httpServiceProvider; - - @Override - public void initialize(HttpServiceProvider httpServiceProvider) { - this.httpServiceProvider = httpServiceProvider; - } - - @Override - public boolean startServer(int port) { - try { - this.port = port; - this.main(); - isRunning = true; - LOG.info("Twitter HTTP server started successfully"); - return true; - } catch (Throwable throwable) { - LOG.error("Failed to start Twitter Http Server", throwable); - } - return false; - } - - @Override - public void stopServer() { - try { - httpServiceProvider.close(); - } catch (IOException ioe) { - LOG.error("Error while close httpServiceProvider", ioe); - } - if (server != null) { - server.close(); - isRunning = false; - } - } - - @Override - public boolean isRunning() { - return isRunning; - } - - @Override - public void main() throws Throwable { - LOG.info("Starting Twitter HTTP server on port {}", port); - TwitterHttpHandlerFactory handlerFactory = new TwitterHttpHandlerFactory(httpServiceProvider); - HttpRouter requestRouter = new HttpRouter(handlerFactory) { - @Override - public void bindHandler(String endpoint, TwitterAbstractHandler handler) { - HttpMuxer.addHandler(endpoint, handler); - } - }; - requestRouter.bindAll(); - InetSocketAddress addr = new InetSocketAddress(port); - server = Http.server().serve(addr, HttpMuxer$.MODULE$); - } - - @Override - public void onExit() { - stopServer(); - } - -} diff --git a/bookkeeper-http/twitter-http-server/src/main/java/org/apache/bookkeeper/http/twitter/package-info.java b/bookkeeper-http/twitter-http-server/src/main/java/org/apache/bookkeeper/http/twitter/package-info.java deleted file mode 100644 index d52fd62118d..00000000000 --- a/bookkeeper-http/twitter-http-server/src/main/java/org/apache/bookkeeper/http/twitter/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -/** - * Package for TwitterServer based http server. - */ -package org.apache.bookkeeper.http.twitter; diff --git a/bookkeeper-http/twitter-http-server/src/test/java/org/apache/bookkeeper/http/twitter/TestTwitterHttpServer.java b/bookkeeper-http/twitter-http-server/src/test/java/org/apache/bookkeeper/http/twitter/TestTwitterHttpServer.java deleted file mode 100644 index b7e032e7a3b..00000000000 --- a/bookkeeper-http/twitter-http-server/src/test/java/org/apache/bookkeeper/http/twitter/TestTwitterHttpServer.java +++ /dev/null @@ -1,104 +0,0 @@ -/** - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * - */ -package org.apache.bookkeeper.http.twitter; - -import static org.junit.Assert.assertEquals; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.net.HttpURLConnection; -import java.net.URL; - -import org.apache.bookkeeper.http.HttpRouter; -import org.apache.bookkeeper.http.HttpServer; -import org.apache.bookkeeper.http.HttpServiceProvider; -import org.apache.bookkeeper.http.NullHttpServiceProvider; -import org.apache.bookkeeper.http.service.HeartbeatService; -import org.junit.Test; - -/** - * Unit teset {@link TwitterHttpServer}. - */ -public class TestTwitterHttpServer { - - private int port = 8080; - - @Test - public void testStartBasicHttpServer() throws Exception { - TwitterHttpServer httpServer = new TwitterHttpServer(); - HttpServiceProvider httpServiceProvider = NullHttpServiceProvider.getInstance(); - httpServer.initialize(httpServiceProvider); - int port = getNextPort(); - while (!httpServer.startServer(port)) { - httpServer.stopServer(); - port = getNextPort(); - } - HttpResponse httpResponse = sendGet(getUrl(port, HttpRouter.HEARTBEAT)); - assertEquals(HttpServer.StatusCode.OK.getValue(), httpResponse.responseCode); - assertEquals(HeartbeatService.HEARTBEAT.trim(), httpResponse.responseBody.trim()); - httpServer.stopServer(); - } - - // HTTP GET request - private HttpResponse sendGet(String url) throws IOException { - URL obj = new URL(url); - HttpURLConnection con = (HttpURLConnection) obj.openConnection(); - // optional, default is GET - con.setRequestMethod("GET"); - int responseCode = con.getResponseCode(); - StringBuilder response = new StringBuilder(); - BufferedReader in = null; - try { - in = new BufferedReader(new InputStreamReader(con.getInputStream())); - String inputLine; - while ((inputLine = in.readLine()) != null) { - response.append(inputLine); - } - } finally { - if (in != null) { - in.close(); - } - } - return new HttpResponse(responseCode, response.toString()); - } - - private String getUrl(int port, String path) { - return "http://localhost:" + port + path; - } - - private class HttpResponse { - private int responseCode; - private String responseBody; - - public HttpResponse(int responseCode, String responseBody) { - this.responseCode = responseCode; - this.responseBody = responseBody; - } - } - - private int getNextPort() throws Exception { - if (port > 65535) { - throw new Exception("No port available"); - } - return port++; - } -} diff --git a/bookkeeper-http/vertx-http-server/pom.xml b/bookkeeper-http/vertx-http-server/pom.xml index 0b3c9c23ae8..29a00f21d0d 100644 --- a/bookkeeper-http/vertx-http-server/pom.xml +++ b/bookkeeper-http/vertx-http-server/pom.xml @@ -19,7 +19,7 @@ bookkeeper org.apache.bookkeeper - 4.9.0-SNAPSHOT + 4.18.0-SNAPSHOT ../.. 4.0.0 diff --git a/bookkeeper-http/vertx-http-server/src/main/java/org/apache/bookkeeper/http/vertx/VertxAbstractHandler.java b/bookkeeper-http/vertx-http-server/src/main/java/org/apache/bookkeeper/http/vertx/VertxAbstractHandler.java index c433376847a..9c595511f61 100644 --- a/bookkeeper-http/vertx-http-server/src/main/java/org/apache/bookkeeper/http/vertx/VertxAbstractHandler.java +++ b/bookkeeper-http/vertx-http-server/src/main/java/org/apache/bookkeeper/http/vertx/VertxAbstractHandler.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,15 +20,15 @@ */ package org.apache.bookkeeper.http.vertx; +import io.netty.handler.codec.http.HttpHeaderNames; import io.vertx.core.Handler; +import io.vertx.core.http.HttpMethod; import io.vertx.core.http.HttpServerRequest; import io.vertx.core.http.HttpServerResponse; import io.vertx.ext.web.RoutingContext; - import java.util.HashMap; import java.util.Iterator; import java.util.Map; - import org.apache.bookkeeper.http.HttpServer; import org.apache.bookkeeper.http.service.ErrorHttpService; import org.apache.bookkeeper.http.service.HttpEndpointService; @@ -49,7 +49,7 @@ void processRequest(HttpEndpointService httpEndpointService, RoutingContext cont HttpServiceRequest request = new HttpServiceRequest() .setMethod(convertMethod(httpRequest)) .setParams(convertParams(httpRequest)) - .setBody(context.getBodyAsString()); + .setBody(context.body().asString()); HttpServiceResponse response = null; try { response = httpEndpointService.handle(request); @@ -57,6 +57,9 @@ void processRequest(HttpEndpointService httpEndpointService, RoutingContext cont response = new ErrorHttpService().handle(request); } httpResponse.setStatusCode(response.getStatusCode()); + if (response.getContentType() != null) { + httpResponse.putHeader(HttpHeaderNames.CONTENT_TYPE, response.getContentType()); + } httpResponse.end(response.getBody()); } @@ -79,15 +82,14 @@ Map convertParams(HttpServerRequest request) { * can be recognized by HttpServer. */ HttpServer.Method convertMethod(HttpServerRequest request) { - switch (request.method()) { - case POST: - return HttpServer.Method.POST; - case DELETE: - return HttpServer.Method.DELETE; - case PUT: - return HttpServer.Method.PUT; - default: - return HttpServer.Method.GET; + HttpMethod method = request.method(); + if (HttpMethod.POST.equals(method)) { + return HttpServer.Method.POST; + } else if (HttpMethod.DELETE.equals(method)) { + return HttpServer.Method.DELETE; + } else if (HttpMethod.PUT.equals(method)) { + return HttpServer.Method.PUT; } + return HttpServer.Method.GET; } } diff --git a/bookkeeper-http/vertx-http-server/src/main/java/org/apache/bookkeeper/http/vertx/VertxHttpHandlerFactory.java b/bookkeeper-http/vertx-http-server/src/main/java/org/apache/bookkeeper/http/vertx/VertxHttpHandlerFactory.java index f8f0bf0e329..69cb7b6d293 100644 --- a/bookkeeper-http/vertx-http-server/src/main/java/org/apache/bookkeeper/http/vertx/VertxHttpHandlerFactory.java +++ b/bookkeeper-http/vertx-http-server/src/main/java/org/apache/bookkeeper/http/vertx/VertxHttpHandlerFactory.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -22,7 +22,6 @@ package org.apache.bookkeeper.http.vertx; import io.vertx.ext.web.RoutingContext; - import org.apache.bookkeeper.http.AbstractHttpHandlerFactory; import org.apache.bookkeeper.http.HttpServer; import org.apache.bookkeeper.http.HttpServiceProvider; diff --git a/bookkeeper-http/vertx-http-server/src/main/java/org/apache/bookkeeper/http/vertx/VertxHttpServer.java b/bookkeeper-http/vertx-http-server/src/main/java/org/apache/bookkeeper/http/vertx/VertxHttpServer.java index 26c07e84b69..c7c6c20113e 100644 --- a/bookkeeper-http/vertx-http-server/src/main/java/org/apache/bookkeeper/http/vertx/VertxHttpServer.java +++ b/bookkeeper-http/vertx-http-server/src/main/java/org/apache/bookkeeper/http/vertx/VertxHttpServer.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -23,15 +23,18 @@ import io.vertx.core.AbstractVerticle; import io.vertx.core.AsyncResult; import io.vertx.core.Vertx; +import io.vertx.core.http.ClientAuth; +import io.vertx.core.http.HttpServerOptions; +import io.vertx.core.net.JksOptions; import io.vertx.ext.web.Router; - +import io.vertx.ext.web.handler.BodyHandler; import java.io.IOException; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; - import org.apache.bookkeeper.http.HttpRouter; import org.apache.bookkeeper.http.HttpServer; +import org.apache.bookkeeper.http.HttpServerConfiguration; import org.apache.bookkeeper.http.HttpServiceProvider; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,14 +46,19 @@ public class VertxHttpServer implements HttpServer { static final Logger LOG = LoggerFactory.getLogger(VertxHttpServer.class); - private Vertx vertx; + private final Vertx vertx; private boolean isRunning; private HttpServiceProvider httpServiceProvider; + private int listeningPort = -1; public VertxHttpServer() { this.vertx = Vertx.vertx(); } + int getListeningPort() { + return listeningPort; + } + @Override public void initialize(HttpServiceProvider httpServiceProvider) { this.httpServiceProvider = httpServiceProvider; @@ -58,27 +66,55 @@ public void initialize(HttpServiceProvider httpServiceProvider) { @Override public boolean startServer(int port) { - CompletableFuture future = new CompletableFuture<>(); + return startServer(port, "0.0.0.0"); + } + + @Override + public boolean startServer(int port, String host) { + return startServer(port, host, new HttpServerConfiguration()); + } + + @Override + public boolean startServer(int port, String host, HttpServerConfiguration httpServerConfiguration) { + CompletableFuture> future = new CompletableFuture<>(); VertxHttpHandlerFactory handlerFactory = new VertxHttpHandlerFactory(httpServiceProvider); Router router = Router.router(vertx); + router.route().handler(BodyHandler.create(false)); HttpRouter requestRouter = new HttpRouter(handlerFactory) { @Override public void bindHandler(String endpoint, VertxAbstractHandler handler) { - router.get(endpoint).handler(handler); + router.get(endpoint).blockingHandler(handler); + router.put(endpoint).blockingHandler(handler); + router.post(endpoint).blockingHandler(handler); + router.delete(endpoint).blockingHandler(handler); } }; requestRouter.bindAll(); vertx.deployVerticle(new AbstractVerticle() { @Override public void start() throws Exception { + HttpServerOptions httpServerOptions = new HttpServerOptions(); + if (httpServerConfiguration.isTlsEnable()) { + httpServerOptions.setSsl(true); + httpServerOptions.setClientAuth(ClientAuth.REQUIRED); + JksOptions keyStoreOptions = new JksOptions(); + keyStoreOptions.setPath(httpServerConfiguration.getKeyStorePath()); + keyStoreOptions.setPassword(httpServerConfiguration.getKeyStorePassword()); + httpServerOptions.setKeyCertOptions(keyStoreOptions); + JksOptions trustStoreOptions = new JksOptions(); + trustStoreOptions.setPath(httpServerConfiguration.getTrustStorePath()); + trustStoreOptions.setPassword(httpServerConfiguration.getTrustStorePassword()); + httpServerOptions.setTrustOptions(trustStoreOptions); + } LOG.info("Starting Vertx HTTP server on port {}", port); - vertx.createHttpServer().requestHandler(router::accept).listen(port, future::complete); + vertx.createHttpServer(httpServerOptions).requestHandler(router).listen(port, host, future::complete); } }); try { - AsyncResult asyncResult = future.get(); + AsyncResult asyncResult = future.get(); if (asyncResult.succeeded()) { LOG.info("Vertx Http server started successfully"); + listeningPort = asyncResult.result().actualPort(); isRunning = true; return true; } else { diff --git a/bookkeeper-http/vertx-http-server/src/test/java/org/apache/bookkeeper/http/vertx/TestVertxHttpServer.java b/bookkeeper-http/vertx-http-server/src/test/java/org/apache/bookkeeper/http/vertx/TestVertxHttpServer.java index 59fcfd80921..a0c6c4ae69a 100644 --- a/bookkeeper-http/vertx-http-server/src/test/java/org/apache/bookkeeper/http/vertx/TestVertxHttpServer.java +++ b/bookkeeper-http/vertx-http-server/src/test/java/org/apache/bookkeeper/http/vertx/TestVertxHttpServer.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,14 +20,23 @@ */ package org.apache.bookkeeper.http.vertx; +import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import com.google.common.io.Files; +import io.vertx.ext.web.handler.BodyHandler; import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; import java.net.HttpURLConnection; import java.net.URL; - +import java.nio.charset.StandardCharsets; import org.apache.bookkeeper.http.HttpRouter; import org.apache.bookkeeper.http.HttpServer; import org.apache.bookkeeper.http.HttpServiceProvider; @@ -39,20 +48,27 @@ * Unit test {@link VertxHttpServer}. */ public class TestVertxHttpServer { - - private int port = 8080; - @Test public void testStartBasicHttpServer() throws Exception { VertxHttpServer httpServer = new VertxHttpServer(); HttpServiceProvider httpServiceProvider = NullHttpServiceProvider.getInstance(); httpServer.initialize(httpServiceProvider); - int port = getNextPort(); - while (!httpServer.startServer(port)) { - httpServer.stopServer(); - port = getNextPort(); - } - HttpResponse httpResponse = sendGet(getUrl(port, HttpRouter.HEARTBEAT)); + assertTrue(httpServer.startServer(0)); + int port = httpServer.getListeningPort(); + HttpResponse httpResponse = send(getUrl(port, HttpRouter.HEARTBEAT), HttpServer.Method.GET); + assertEquals(HttpServer.StatusCode.OK.getValue(), httpResponse.responseCode); + assertEquals(HeartbeatService.HEARTBEAT.trim(), httpResponse.responseBody.trim()); + httpServer.stopServer(); + } + + @Test + public void testStartBasicHttpServerConfigHost() throws Exception { + VertxHttpServer httpServer = new VertxHttpServer(); + HttpServiceProvider httpServiceProvider = NullHttpServiceProvider.getInstance(); + httpServer.initialize(httpServiceProvider); + assertTrue(httpServer.startServer(0, "localhost")); + int port = httpServer.getListeningPort(); + HttpResponse httpResponse = send(getUrl(port, HttpRouter.HEARTBEAT), HttpServer.Method.GET); assertEquals(HttpServer.StatusCode.OK.getValue(), httpResponse.responseCode); assertEquals(HeartbeatService.HEARTBEAT.trim(), httpResponse.responseBody.trim()); httpServer.stopServer(); @@ -63,22 +79,76 @@ public void testStartMetricsServiceOnRouterPath() throws Exception { VertxHttpServer httpServer = new VertxHttpServer(); HttpServiceProvider httpServiceProvider = NullHttpServiceProvider.getInstance(); httpServer.initialize(httpServiceProvider); - int port = getNextPort(); - while (!httpServer.startServer(port)) { - httpServer.stopServer(); - port = getNextPort(); - } - HttpResponse httpResponse = sendGet(getUrl(port, HttpRouter.METRICS)); + assertTrue(httpServer.startServer(0)); + int port = httpServer.getListeningPort(); + HttpResponse httpResponse = send(getUrl(port, HttpRouter.METRICS), HttpServer.Method.GET); + assertEquals(HttpServer.StatusCode.OK.getValue(), httpResponse.responseCode); + httpServer.stopServer(); + } + + @Test + public void testHttpMethods() throws Exception { + VertxHttpServer httpServer = new VertxHttpServer(); + HttpServiceProvider httpServiceProvider = NullHttpServiceProvider.getInstance(); + httpServer.initialize(httpServiceProvider); + assertTrue(httpServer.startServer(0)); + int port = httpServer.getListeningPort(); + HttpResponse httpResponse = send(getUrl(port, HttpRouter.GC), HttpServer.Method.GET); + assertEquals(HttpServer.StatusCode.OK.getValue(), httpResponse.responseCode); + httpResponse = send(getUrl(port, HttpRouter.GC), HttpServer.Method.POST); + assertEquals(HttpServer.StatusCode.OK.getValue(), httpResponse.responseCode); + httpResponse = send(getUrl(port, HttpRouter.GC), HttpServer.Method.PUT); + assertEquals(HttpServer.StatusCode.OK.getValue(), httpResponse.responseCode); + httpServer.stopServer(); + } + + @Test + public void testHttpMethodsWithBody() throws IOException { + VertxHttpServer httpServer = new VertxHttpServer(); + HttpServiceProvider httpServiceProvider = NullHttpServiceProvider.getInstance(); + httpServer.initialize(httpServiceProvider); + assertTrue(httpServer.startServer(0)); + int port = httpServer.getListeningPort(); + String body = "{\"bookie_src\": \"localhost:3181\"}"; + HttpResponse httpResponse = send(getUrl(port, HttpRouter.DECOMMISSION), HttpServer.Method.PUT, body); + assertEquals(HttpServer.StatusCode.OK.getValue(), httpResponse.responseCode); + assertEquals(body, httpResponse.responseBody); + httpServer.stopServer(); + } + + @Test + public void testArbitraryFileUpload() throws IOException { + VertxHttpServer httpServer = new VertxHttpServer(); + HttpServiceProvider httpServiceProvider = NullHttpServiceProvider.getInstance(); + httpServer.initialize(httpServiceProvider); + assertTrue(httpServer.startServer(0)); + int port = httpServer.getListeningPort(); + File tempFile = File.createTempFile("test-" + System.currentTimeMillis(), null); + Files.asCharSink(tempFile, StandardCharsets.UTF_8).write(TestVertxHttpServer.class.getName()); + String[] filenamesBeforeUploadRequest = listFiles(BodyHandler.DEFAULT_UPLOADS_DIRECTORY); + HttpResponse httpResponse = sendFile(getUrl(port, HttpRouter.BOOKIE_INFO), tempFile); assertEquals(HttpServer.StatusCode.OK.getValue(), httpResponse.responseCode); + assertArrayEquals(filenamesBeforeUploadRequest, listFiles(BodyHandler.DEFAULT_UPLOADS_DIRECTORY)); httpServer.stopServer(); } - // HTTP GET request - private HttpResponse sendGet(String url) throws IOException { + private HttpResponse send(String url, HttpServer.Method method) throws IOException { + return send(url, method, ""); + } + + // HTTP request + private HttpResponse send(String url, HttpServer.Method method, String body) throws IOException { URL obj = new URL(url); HttpURLConnection con = (HttpURLConnection) obj.openConnection(); // optional, default is GET - con.setRequestMethod("GET"); + con.setRequestMethod(method.toString()); + if (body != "") { + con.setDoOutput(true); + con.setFixedLengthStreamingMode(body.length()); + OutputStream outputStream = con.getOutputStream(); + outputStream.write(body.getBytes(StandardCharsets.UTF_8)); + outputStream.flush(); + } int responseCode = con.getResponseCode(); StringBuilder response = new StringBuilder(); BufferedReader in = null; @@ -96,6 +166,59 @@ private HttpResponse sendGet(String url) throws IOException { return new HttpResponse(responseCode, response.toString()); } + private HttpResponse sendFile(String url, File file) throws IOException { + URL obj = new URL(url); + HttpURLConnection con = (HttpURLConnection) obj.openConnection(); + String boundary = "---------------------------" + System.currentTimeMillis(); + // optional, default is GET + con.setRequestMethod("POST"); + con.setDoOutput(true); + con.setRequestProperty("Content-Type", "multipart/form-data; boundary=" + boundary); + try ( + OutputStream outputStream = con.getOutputStream(); + PrintWriter writer = new PrintWriter(new OutputStreamWriter(outputStream, StandardCharsets.UTF_8), + true); + FileInputStream fileInputStream = new FileInputStream(file); + ) { + writer.append("--" + boundary).append("\r\n"); + writer.append("Content-Disposition: form-data; name=\"file\"; filename=\"file.txt\"").append("\r\n"); + writer.append("Content-Type: text/plain").append("\r\n"); + writer.append("\r\n"); + + byte[] buffer = new byte[4096]; + int bytesRead; + while ((bytesRead = fileInputStream.read(buffer)) != -1) { + outputStream.write(buffer, 0, bytesRead); + } + + writer.append("\r\n"); + writer.append("--" + boundary + "--").append("\r\n"); + } + int responseCode = con.getResponseCode(); + StringBuilder response = new StringBuilder(); + BufferedReader in = null; + try { + in = new BufferedReader(new InputStreamReader(con.getInputStream())); + String inputLine; + while ((inputLine = in.readLine()) != null) { + response.append(inputLine); + } + } finally { + if (in != null) { + in.close(); + } + } + return new HttpResponse(responseCode, response.toString()); + } + + private String[] listFiles(String directory) { + File directoryFile = new File(directory); + if (!directoryFile.exists() || !directoryFile.isDirectory()) { + return new String[0]; + } + return directoryFile.list(); + } + private String getUrl(int port, String path) { return "http://localhost:" + port + path; } @@ -109,11 +232,4 @@ public HttpResponse(int responseCode, String responseBody) { this.responseBody = responseBody; } } - - private int getNextPort() throws Exception { - if (port > 65535) { - throw new Exception("No port available"); - } - return port++; - } } diff --git a/bookkeeper-http/vertx-http-server/src/test/java/org/apache/bookkeeper/http/vertx/TestVertxHttpsServer.java b/bookkeeper-http/vertx-http-server/src/test/java/org/apache/bookkeeper/http/vertx/TestVertxHttpsServer.java new file mode 100644 index 00000000000..03f6651ee27 --- /dev/null +++ b/bookkeeper-http/vertx-http-server/src/test/java/org/apache/bookkeeper/http/vertx/TestVertxHttpsServer.java @@ -0,0 +1,141 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.http.vertx; + +import java.io.FileInputStream; +import java.net.URL; +import java.security.KeyStore; +import java.security.SecureRandom; +import javax.net.ssl.HttpsURLConnection; +import javax.net.ssl.KeyManager; +import javax.net.ssl.KeyManagerFactory; +import javax.net.ssl.SSLContext; +import javax.net.ssl.SSLHandshakeException; +import javax.net.ssl.SSLSocketFactory; +import javax.net.ssl.TrustManagerFactory; +import org.apache.bookkeeper.http.HttpServerConfiguration; +import org.apache.bookkeeper.http.HttpServiceProvider; +import org.apache.bookkeeper.http.NullHttpServiceProvider; +import org.junit.Test; + +/** + * Unit test {@link VertxHttpServer}. + */ +public class TestVertxHttpsServer { + + private static final String CLIENT_KEYSTORE_PATH = "./src/test/resources/vertx_client_key.jks"; + + private static final String CLIENT_TRUSTSTORE_PATH = "./src/test/resources/vertx_client_trust.jks"; + + private static final String CLIENT_WRONG_TRUSTSTORE_PATH = "./src/test/resources/vertx_client_wrong_trust.jks"; + + private static final String CLIENT_KEYSTORE_PASSWORD = "vertx_client_pwd"; + + private static final String CLIENT_TRUSTSTORE_PASSWORD = "vertx_client_pwd"; + + private static final String SERVER_KEYSTORE_PATH = "./src/test/resources/vertx_server_key.jks"; + + private static final String SERVER_TRUSTSTORE_PATH = "./src/test/resources/vertx_server_trust.jks"; + + private static final String SERVER_KEYSTORE_PASSWORD = "vertx_server_pwd"; + + private static final String SERVER_TRUSTSTORE_PASSWORD = "vertx_server_pwd"; + + @Test(timeout = 60_000) + public void testVertxServerTls() throws Exception { + VertxHttpServer httpServer = new VertxHttpServer(); + HttpServiceProvider httpServiceProvider = NullHttpServiceProvider.getInstance(); + httpServer.initialize(httpServiceProvider); + HttpServerConfiguration httpServerConfiguration = new HttpServerConfiguration(); + httpServerConfiguration.setTlsEnable(true); + httpServerConfiguration.setKeyStorePath(SERVER_KEYSTORE_PATH); + httpServerConfiguration.setKeyStorePassword(SERVER_KEYSTORE_PASSWORD); + httpServerConfiguration.setTrustStorePath(SERVER_TRUSTSTORE_PATH); + httpServerConfiguration.setTrustStorePassword(SERVER_TRUSTSTORE_PASSWORD); + httpServer.startServer(0, "localhost", httpServerConfiguration); + int actualPort = httpServer.getListeningPort(); + SSLContext sslContext = SSLContext.getInstance("TLSv1.2"); + // key store + KeyManagerFactory keyManagerFactory = KeyManagerFactory.getInstance(KeyManagerFactory.getDefaultAlgorithm()); + KeyStore keyStore = KeyStore.getInstance("JKS"); + try (FileInputStream inputStream = new FileInputStream(CLIENT_KEYSTORE_PATH)) { + keyStore.load(inputStream, CLIENT_KEYSTORE_PASSWORD.toCharArray()); + } + keyManagerFactory.init(keyStore, "vertx_client_pwd".toCharArray()); + KeyManager[] keyManagers = keyManagerFactory.getKeyManagers(); + // trust store + TrustManagerFactory trustManagerFactory = TrustManagerFactory.getInstance( + TrustManagerFactory.getDefaultAlgorithm()); + KeyStore trustStore = KeyStore.getInstance("JKS"); + try (FileInputStream inputStream = new FileInputStream(CLIENT_TRUSTSTORE_PATH)) { + trustStore.load(inputStream, CLIENT_TRUSTSTORE_PASSWORD.toCharArray()); + } + trustManagerFactory.init(trustStore); + sslContext.init(keyManagers, trustManagerFactory.getTrustManagers(), new SecureRandom()); + URL url = new URL("https://localhost:" + actualPort); + HttpsURLConnection urlConnection = (HttpsURLConnection) url.openConnection(); + urlConnection.setHostnameVerifier((s, sslSession) -> true); + SSLSocketFactory socketFactory = sslContext.getSocketFactory(); + urlConnection.setSSLSocketFactory(socketFactory); + urlConnection.setRequestMethod("GET"); + urlConnection.getResponseCode(); + httpServer.stopServer(); + } + + @Test(timeout = 60_000, expected = SSLHandshakeException.class) + public void testVertxServerTlsFailByCertNotMatch() throws Exception { + VertxHttpServer httpServer = new VertxHttpServer(); + HttpServerConfiguration httpServerConfiguration = new HttpServerConfiguration(); + httpServerConfiguration.setTlsEnable(true); + httpServerConfiguration.setKeyStorePath(SERVER_KEYSTORE_PATH); + httpServerConfiguration.setKeyStorePassword(SERVER_KEYSTORE_PASSWORD); + httpServerConfiguration.setTrustStorePath(SERVER_TRUSTSTORE_PATH); + httpServerConfiguration.setTrustStorePassword(SERVER_TRUSTSTORE_PASSWORD); + httpServer.startServer(0, "localhost", httpServerConfiguration); + int actualPort = httpServer.getListeningPort(); + SSLContext sslContext = SSLContext.getInstance("TLSv1.2"); + // key store + KeyManagerFactory keyManagerFactory = KeyManagerFactory.getInstance(KeyManagerFactory.getDefaultAlgorithm()); + KeyStore keyStore = KeyStore.getInstance("JKS"); + try (FileInputStream inputStream = new FileInputStream(CLIENT_KEYSTORE_PATH)) { + keyStore.load(inputStream, CLIENT_KEYSTORE_PASSWORD.toCharArray()); + } + keyManagerFactory.init(keyStore, "vertx_client_pwd".toCharArray()); + KeyManager[] keyManagers = keyManagerFactory.getKeyManagers(); + // trust store + TrustManagerFactory trustManagerFactory = TrustManagerFactory.getInstance( + TrustManagerFactory.getDefaultAlgorithm()); + KeyStore trustStore = KeyStore.getInstance("JKS"); + try (FileInputStream inputStream = new FileInputStream(CLIENT_WRONG_TRUSTSTORE_PATH)) { + trustStore.load(inputStream, CLIENT_TRUSTSTORE_PASSWORD.toCharArray()); + } + trustManagerFactory.init(trustStore); + sslContext.init(keyManagers, trustManagerFactory.getTrustManagers(), new SecureRandom()); + URL url = new URL("https://localhost:" + actualPort); + HttpsURLConnection urlConnection = (HttpsURLConnection) url.openConnection(); + urlConnection.setHostnameVerifier((s, sslSession) -> true); + SSLSocketFactory socketFactory = sslContext.getSocketFactory(); + urlConnection.setSSLSocketFactory(socketFactory); + urlConnection.setRequestMethod("GET"); + urlConnection.getResponseCode(); + } + +} diff --git a/bookkeeper-http/vertx-http-server/src/test/resources/vertx_client_key.jks b/bookkeeper-http/vertx-http-server/src/test/resources/vertx_client_key.jks new file mode 100644 index 00000000000..a6594d13954 Binary files /dev/null and b/bookkeeper-http/vertx-http-server/src/test/resources/vertx_client_key.jks differ diff --git a/bookkeeper-http/vertx-http-server/src/test/resources/vertx_client_trust.jks b/bookkeeper-http/vertx-http-server/src/test/resources/vertx_client_trust.jks new file mode 100644 index 00000000000..dd4821fc588 Binary files /dev/null and b/bookkeeper-http/vertx-http-server/src/test/resources/vertx_client_trust.jks differ diff --git a/bookkeeper-http/vertx-http-server/src/test/resources/vertx_client_wrong_trust.jks b/bookkeeper-http/vertx-http-server/src/test/resources/vertx_client_wrong_trust.jks new file mode 100644 index 00000000000..46ef37aa0c0 Binary files /dev/null and b/bookkeeper-http/vertx-http-server/src/test/resources/vertx_client_wrong_trust.jks differ diff --git a/bookkeeper-http/vertx-http-server/src/test/resources/vertx_server_key.jks b/bookkeeper-http/vertx-http-server/src/test/resources/vertx_server_key.jks new file mode 100644 index 00000000000..bef5712b3b6 Binary files /dev/null and b/bookkeeper-http/vertx-http-server/src/test/resources/vertx_server_key.jks differ diff --git a/bookkeeper-http/vertx-http-server/src/test/resources/vertx_server_trust.jks b/bookkeeper-http/vertx-http-server/src/test/resources/vertx_server_trust.jks new file mode 100644 index 00000000000..f9ba56443de Binary files /dev/null and b/bookkeeper-http/vertx-http-server/src/test/resources/vertx_server_trust.jks differ diff --git a/bookkeeper-proto/pom.xml b/bookkeeper-proto/pom.xml index f9520d8ba1b..02e9f31b157 100644 --- a/bookkeeper-proto/pom.xml +++ b/bookkeeper-proto/pom.xml @@ -20,7 +20,7 @@ bookkeeper org.apache.bookkeeper - 4.9.0-SNAPSHOT + 4.18.0-SNAPSHOT bookkeeper-proto Apache BookKeeper :: Protocols diff --git a/bookkeeper-proto/src/.gitignore b/bookkeeper-proto/src/.gitignore new file mode 100644 index 00000000000..9ab870da897 --- /dev/null +++ b/bookkeeper-proto/src/.gitignore @@ -0,0 +1 @@ +generated/ diff --git a/bookkeeper-proto/src/main/proto/BookkeeperProtocol.proto b/bookkeeper-proto/src/main/proto/BookkeeperProtocol.proto index bac9411330c..72df7d5e1d4 100644 --- a/bookkeeper-proto/src/main/proto/BookkeeperProtocol.proto +++ b/bookkeeper-proto/src/main/proto/BookkeeperProtocol.proto @@ -46,6 +46,7 @@ enum StatusCode { EFENCED = 504; EREADONLY = 505; ETOOMANYREQUESTS = 506; + EUNKNOWNLEDGERSTATE = 507; } /** @@ -64,6 +65,8 @@ enum OperationType { GET_BOOKIE_INFO = 8; START_TLS = 9; FORCE_LEDGER = 10; + GET_LIST_OF_ENTRIES_OF_LEDGER = 11; + BATCH_READ_ENTRY = 12; } /** @@ -76,6 +79,11 @@ message BKPacketHeader { optional uint32 priority = 4 [default = 0]; } +message ContextPair { + required string key = 1; + required string value = 2; +} + message Request { required BKPacketHeader header = 1; // Requests @@ -87,6 +95,9 @@ message Request { optional GetBookieInfoRequest getBookieInfoRequest = 105; optional StartTLSRequest startTLSRequest = 106; optional ForceLedgerRequest forceLedgerRequest = 107; + optional GetListOfEntriesOfLedgerRequest getListOfEntriesOfLedgerRequest = 108; + // to pass MDC context + repeated ContextPair requestContext = 200; } message ReadRequest { @@ -145,6 +156,10 @@ message GetBookieInfoRequest { optional int64 requested = 1; } +message GetListOfEntriesOfLedgerRequest { + required int64 ledgerId = 1; +} + message Response { required BKPacketHeader header = 1; @@ -160,6 +175,7 @@ message Response { optional GetBookieInfoResponse getBookieInfoResponse = 105; optional StartTLSResponse startTLSResponse = 106; optional ForceLedgerResponse forceLedgerResponse = 107; + optional GetListOfEntriesOfLedgerResponse getListOfEntriesOfLedgerResponse = 108; } message ReadResponse { @@ -206,5 +222,11 @@ message GetBookieInfoResponse { optional int64 freeDiskSpace = 3; } +message GetListOfEntriesOfLedgerResponse { + required StatusCode status = 1; + required int64 ledgerId = 2; + optional bytes availabilityOfEntriesOfLedger = 3; // condensed encoded format representing availability of entries of ledger +} + message StartTLSResponse { } diff --git a/bookkeeper-proto/src/main/proto/DataFormats.proto b/bookkeeper-proto/src/main/proto/DataFormats.proto index 92eaa5fd396..21ffda8415f 100644 --- a/bookkeeper-proto/src/main/proto/DataFormats.proto +++ b/bookkeeper-proto/src/main/proto/DataFormats.proto @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -60,6 +60,8 @@ message LedgerMetadataFormat { optional bytes value = 2; } repeated cMetadataMapEntry customMetadata = 11; + + optional int64 cToken = 12; } message LedgerRereplicationLayoutFormat { @@ -80,6 +82,7 @@ message CookieFormat { required string journalDir = 2; required string ledgerDirs = 3; optional string instanceId = 4; + optional string indexDirs = 5; } /** @@ -95,3 +98,47 @@ message LockDataFormat { message AuditorVoteFormat { optional string bookieId = 1; } + +/** + * information of checkAllLedgers execution + */ +message CheckAllLedgersFormat { + optional int64 checkAllLedgersCTime = 1; +} + +/** + * information of PlacementPolicyCheck execution + */ +message PlacementPolicyCheckFormat { + optional int64 placementPolicyCheckCTime = 1; +} + +/** + * information of ReplicasCheck execution + */ +message ReplicasCheckFormat { + optional int64 replicasCheckCTime = 1; +} + +/** + * information about services exposed by a Bookie. + */ +message BookieServiceInfoFormat { + + /** + * Information about an endpoint. + */ + message Endpoint { + required string id = 1; + required int32 port = 2; + required string host = 3; + required string protocol = 4; + + repeated string auth = 5; + repeated string extensions = 6; + } + + repeated Endpoint endpoints = 6; + map properties = 7; +} + diff --git a/bookkeeper-proto/src/main/proto/DbLedgerStorageDataFormats.proto b/bookkeeper-proto/src/main/proto/DbLedgerStorageDataFormats.proto index e68b2d0d51c..6940a8a2223 100644 --- a/bookkeeper-proto/src/main/proto/DbLedgerStorageDataFormats.proto +++ b/bookkeeper-proto/src/main/proto/DbLedgerStorageDataFormats.proto @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -27,4 +27,6 @@ message LedgerData { required bool exists = 1; required bool fenced = 2; required bytes masterKey = 3; + optional bytes explicitLac = 4; + optional bool limbo = 5; } diff --git a/bookkeeper-server/pom.xml b/bookkeeper-server/pom.xml index 90dcf0a5c46..03982d1ad31 100644 --- a/bookkeeper-server/pom.xml +++ b/bookkeeper-server/pom.xml @@ -20,7 +20,7 @@ bookkeeper org.apache.bookkeeper - 4.9.0-SNAPSHOT + 4.18.0-SNAPSHOT bookkeeper-server Apache BookKeeper :: Server @@ -29,31 +29,46 @@ org.apache.bookkeeper bookkeeper-common ${project.parent.version} - - - - io.netty - netty-common - - + + + org.apache.bookkeeper + bookkeeper-common-allocator + ${project.parent.version} org.apache.bookkeeper bookkeeper-proto ${project.parent.version} + + org.apache.bookkeeper + bookkeeper-slogger-slf4j + ${project.parent.version} + + + org.apache.bookkeeper + bookkeeper-slogger-api + ${project.parent.version} + org.apache.bookkeeper bookkeeper-tools-framework ${project.parent.version} + + org.apache.bookkeeper + native-io + ${project.parent.version} + org.rocksdb rocksdbjni - org.slf4j - slf4j-log4j12 + org.apache.bookkeeper + testtools + ${project.parent.version} + test org.apache.zookeeper @@ -61,7 +76,17 @@ io.netty - netty-all + netty-handler + + + io.netty + netty-transport-native-epoll + linux-x86_64 + + + io.netty + netty-transport-native-epoll + linux-aarch_64 io.netty @@ -76,13 +101,6 @@ org.apache.bookkeeper circe-checksum ${project.version} - - - - io.netty - netty-buffer - - commons-cli @@ -104,13 +122,53 @@ org.apache.commons commons-collections4 + + org.bouncycastle + bc-fips + com.beust jcommander - net.java.dev.jna - jna + org.apache.httpcomponents + httpclient + + + + io.reactivex.rxjava3 + rxjava + + + + + org.xerial.snappy + snappy-java + runtime + true + + + + io.dropwizard.metrics + metrics-core + runtime + true + + + com.carrotsearch + hppc + + + org.apache.logging.log4j + log4j-core + + + org.apache.logging.log4j + log4j-slf4j2-impl + + + com.lmax + disruptor @@ -121,9 +179,28 @@ test - org.apache.hadoop - hadoop-minikdc - test + org.apache.kerby + kerby-config + ${kerby.version} + test + + + org.slf4j + * + + + + + org.apache.kerby + kerb-simplekdc + ${kerby.version} + test + + + org.slf4j + * + + org.apache.zookeeper @@ -131,21 +208,41 @@ test-jar test + + + org.junit.jupiter + junit-jupiter-api + test + + + org.apache.bookkeeper.stats + prometheus-metrics-provider + ${project.parent.version} + test + + + org.apache.bookkeeper.stats + otel-metrics-provider + ${project.version} + test + + + org.apache.bookkeeper.http + vertx-http-server + ${project.parent.version} + test + + + org.awaitility + awaitility + test + - - - org.apache.felix - maven-bundle-plugin - ${maven-bundle-plugin.version} - true - true - org.apache.maven.plugins maven-jar-plugin - ${maven-jar-plugin.version} @@ -186,8 +283,8 @@ org.apache.maven.plugins maven-surefire-plugin - ${maven-surefire-plugin.version} + false listener @@ -199,11 +296,13 @@ org.apache.maven.plugins maven-javadoc-plugin - ${maven-javadoc-plugin.version} + ${src.dir} - -Xdoclint:none + none + Bookkeeper @@ -223,36 +322,6 @@ - - twitter-science-provider - - - org.apache.bookkeeper.stats - twitter-science-provider - ${project.parent.version} - - - - - codahale-metrics-provider - - - org.apache.bookkeeper.stats - codahale-metrics-provider - ${project.parent.version} - - - - - twitter-http-server - - - org.apache.bookkeeper.http - twitter-http-server - ${project.parent.version} - - - vertx-http-server @@ -270,7 +339,6 @@ org.codehaus.mojo exec-maven-plugin - 1.6.0 Generate Self-Signed Certificates diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/auth/AuthCallbacks.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/auth/AuthCallbacks.java index 74134b5ff66..a8107d11868 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/auth/AuthCallbacks.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/auth/AuthCallbacks.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with this * work for additional information regarding copyright ownership. The ASF diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/auth/AuthProviderFactoryFactory.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/auth/AuthProviderFactoryFactory.java index c5906fe1fc6..72339a2c413 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/auth/AuthProviderFactoryFactory.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/auth/AuthProviderFactoryFactory.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -22,12 +22,11 @@ import java.io.IOException; import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.common.util.ReflectionUtils; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.proto.BookieConnectionPeer; import org.apache.bookkeeper.proto.ClientConnectionPeer; -import org.apache.bookkeeper.util.ReflectionUtils; - /** * A factory to manage the authentication provider factories. @@ -76,6 +75,7 @@ public BookieAuthProvider newProvider(BookieConnectionPeer addr, AuthCallbacks.GenericCallback completeCb) { completeCb.operationComplete(BKException.Code.OK, null); return new BookieAuthProvider() { + @Override public void process(AuthToken m, AuthCallbacks.GenericCallback cb) { // any request of authentication for clients is going to be answered with a standard response // the client will d @@ -101,7 +101,9 @@ public ClientAuthProvider newProvider(ClientConnectionPeer addr, addr.setAuthorizedId(BookKeeperPrincipal.ANONYMOUS); completeCb.operationComplete(BKException.Code.OK, null); return new ClientAuthProvider() { + @Override public void init(AuthCallbacks.GenericCallback cb) {} + @Override public void process(AuthToken m, AuthCallbacks.GenericCallback cb) {} }; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/auth/AuthToken.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/auth/AuthToken.java index 918d91001a5..e6e01e5ba8f 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/auth/AuthToken.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/auth/AuthToken.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/auth/BookKeeperPrincipal.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/auth/BookKeeperPrincipal.java index 41eade56ed3..0de0462511d 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/auth/BookKeeperPrincipal.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/auth/BookKeeperPrincipal.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -63,10 +63,7 @@ public boolean equals(Object obj) { return false; } final BookKeeperPrincipal other = (BookKeeperPrincipal) obj; - if (!Objects.equals(this.name, other.name)) { - return false; - } - return true; + return Objects.equals(this.name, other.name); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/auth/BookieAuthProvider.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/auth/BookieAuthProvider.java index 4570d103d96..76a922b452e 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/auth/BookieAuthProvider.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/auth/BookieAuthProvider.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -39,7 +39,7 @@ interface Factory { /** * Initialize the factory with the server configuration * and protobuf message registry. Implementors must - * add any extention messages which contain the auth + * add any extension messages which contain the auth * payload, so that the server can decode auth messages * it receives from the client. */ diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/auth/ClientAuthProvider.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/auth/ClientAuthProvider.java index 8737d606284..a89deea2160 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/auth/ClientAuthProvider.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/auth/ClientAuthProvider.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -37,7 +37,7 @@ interface Factory { /** * Initialize the factory with the client configuration * and protobuf message registry. Implementors must - * add any extention messages which contain the auth + * add any extension messages which contain the auth * payload, so that the client can decode auth messages * it receives from the server. */ diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/AbstractLogCompactor.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/AbstractLogCompactor.java index 8f190a3649c..698d0f47864 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/AbstractLogCompactor.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/AbstractLogCompactor.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -22,7 +22,9 @@ package org.apache.bookkeeper.bookie; import com.google.common.util.concurrent.RateLimiter; - +import java.io.IOException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import org.apache.bookkeeper.conf.ServerConfiguration; /** @@ -32,12 +34,20 @@ public abstract class AbstractLogCompactor { protected final ServerConfiguration conf; protected final Throttler throttler; - protected final GarbageCollectorThread gcThread; - public AbstractLogCompactor(GarbageCollectorThread gcThread) { - this.gcThread = gcThread; - this.conf = gcThread.conf; + /** + * LogRemovalListener. + */ + public interface LogRemovalListener { + void removeEntryLog(long logToRemove); + } + + protected final LogRemovalListener logRemovalListener; + + public AbstractLogCompactor(ServerConfiguration conf, LogRemovalListener logRemovalListener) { + this.conf = conf; this.throttler = new Throttler(conf); + this.logRemovalListener = logRemovalListener; } /** @@ -52,9 +62,13 @@ public AbstractLogCompactor(GarbageCollectorThread gcThread) { */ public void cleanUpAndRecover() {} - static class Throttler { + /** + * class Throttler. + */ + public static class Throttler { private final RateLimiter rateLimiter; private final boolean isThrottleByBytes; + private final AtomicBoolean cancelled = new AtomicBoolean(false); Throttler(ServerConfiguration conf) { this.isThrottleByBytes = conf.getIsThrottleByBytes(); @@ -63,8 +77,32 @@ static class Throttler { } // acquire. if bybytes: bytes of this entry; if byentries: 1. - void acquire(int permits) { - rateLimiter.acquire(this.isThrottleByBytes ? permits : 1); + boolean tryAcquire(int permits, long timeout, TimeUnit unit) { + return rateLimiter.tryAcquire(this.isThrottleByBytes ? permits : 1, timeout, unit); + } + + // GC thread will check the status for the rate limiter + // If the compactor is being stopped by other threads, + // and the GC thread is still limited, the compact task will be stopped. + public void acquire(int permits) throws IOException { + long timeout = 100; + long start = System.currentTimeMillis(); + while (!tryAcquire(permits, timeout, TimeUnit.MILLISECONDS)) { + if (cancelled.get()) { + throw new IOException("Failed to get permits takes " + + (System.currentTimeMillis() - start) + + " ms may be compactor has been shutting down"); + } + try { + TimeUnit.MILLISECONDS.sleep(timeout); + } catch (InterruptedException e) { + // ignore + } + } + } + + public void cancelledAcquire() { + cancelled.set(true); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookKeeperServerStats.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookKeeperServerStats.java index 9afd8a58f7c..59ffd99e0de 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookKeeperServerStats.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookKeeperServerStats.java @@ -25,10 +25,13 @@ */ public interface BookKeeperServerStats { + String CATEGORY_SERVER = "server"; + String SERVER_SCOPE = "bookkeeper_server"; String BOOKIE_SCOPE = "bookie"; String SERVER_STATUS = "SERVER_STATUS"; + String SERVER_SANITY = "SERVER_SANITY"; // // Network Stats (scoped under SERVER_SCOPE) @@ -44,10 +47,13 @@ public interface BookKeeperServerStats { // Stats String ADD_ENTRY_REQUEST = "ADD_ENTRY_REQUEST"; String ADD_ENTRY = "ADD_ENTRY"; + String WRITE_THREAD_QUEUED_LATENCY = "WRITE_THREAD_QUEUED_LATENCY"; + String ADD_ENTRY_REJECTED = "ADD_ENTRY_REJECTED"; String FORCE_LEDGER_REQUEST = "FORCE_LEDGER_REQUEST"; String FORCE_LEDGER = "FORCE_LEDGER"; String READ_ENTRY_REQUEST = "READ_ENTRY_REQUEST"; String READ_ENTRY = "READ_ENTRY"; + String READ_ENTRY_REJECTED = "READ_ENTRY_REJECTED"; String READ_ENTRY_SCHEDULING_DELAY = "READ_ENTRY_SCHEDULING_DELAY"; String READ_ENTRY_FENCE_REQUEST = "READ_ENTRY_FENCE_REQUEST"; String READ_ENTRY_FENCE_WAIT = "READ_ENTRY_FENCE_WAIT"; @@ -62,20 +68,28 @@ public interface BookKeeperServerStats { String READ_LAC = "READ_LAC"; String GET_BOOKIE_INFO_REQUEST = "GET_BOOKIE_INFO_REQUEST"; String GET_BOOKIE_INFO = "GET_BOOKIE_INFO"; + String GET_LIST_OF_ENTRIES_OF_LEDGER = "GET_LIST_OF_ENTRIES_OF_LEDGER"; + String GET_LIST_OF_ENTRIES_OF_LEDGER_REQUEST = "GET_LIST_OF_ENTRIES_OF_LEDGER_REQUEST"; // Ensemble Stats String WATCHER_SCOPE = "bookie_watcher"; String REPLACE_BOOKIE_TIME = "REPLACE_BOOKIE_TIME"; String NEW_ENSEMBLE_TIME = "NEW_ENSEMBLE_TIME"; + String FAILED_TO_RESOLVE_NETWORK_LOCATION_COUNT = "FAILED_TO_RESOLVE_NETWORK_LOCATION_TOTAL"; + String ENSEMBLE_NOT_ADHERING_TO_PLACEMENT_POLICY_COUNT = "ENSEMBLE_NOT_ADHERING_TO_PLACEMENT_POLICY_TOTAL"; + + // Bookie Quarantine Stats + String BOOKIE_QUARANTINE = "BOOKIE_QUARANTINE"; + String BOOKIE_QUARANTINE_SKIP = "BOOKIE_QUARANTINE_SKIP"; // Bookie Operations String BOOKIE_ADD_ENTRY = "BOOKIE_ADD_ENTRY"; String BOOKIE_RECOVERY_ADD_ENTRY = "BOOKIE_RECOVERY_ADD_ENTRY"; String BOOKIE_READ_ENTRY = "BOOKIE_READ_ENTRY"; String BOOKIE_FORCE_LEDGER = "BOOKIE_FORCE_LEDGER"; - String BOOKIE_READ_LAST_CONFIRMED = "BOOKIE_READ_LAST_CONFIRMED"; String BOOKIE_ADD_ENTRY_BYTES = "BOOKIE_ADD_ENTRY_BYTES"; String BOOKIE_READ_ENTRY_BYTES = "BOOKIE_READ_ENTRY_BYTES"; + String BOOKIE_GET_LIST_OF_ENTRIES_OF_LEDGER = "BOOKIE_GET_LIST_OF_ENTRIES_OF_LEDGER"; String ADD_ENTRY_IN_PROGRESS = "ADD_ENTRY_IN_PROGRESS"; String ADD_ENTRY_BLOCKED = "ADD_ENTRY_BLOCKED"; @@ -89,22 +103,29 @@ public interface BookKeeperServerStats { // String JOURNAL_SCOPE = "journal"; + String JOURNAL_DIRS = "JOURNAL_DIRS"; String JOURNAL_ADD_ENTRY = "JOURNAL_ADD_ENTRY"; String JOURNAL_FORCE_LEDGER = "JOURNAL_FORCE_LEDGER"; String JOURNAL_SYNC = "JOURNAL_SYNC"; - String JOURNAL_MEM_ADD_ENTRY = "JOURNAL_MEM_ADD_ENTRY"; - String JOURNAL_PREALLOCATION = "JOURNAL_PREALLOCATION"; - String JOURNAL_FORCE_WRITE_LATENCY = "JOURNAL_FORCE_WRITE_LATENCY"; + String JOURNAL_FORCE_WRITE_ENQUEUE = "JOURNAL_FORCE_WRITE_ENQUEUE"; String JOURNAL_FORCE_WRITE_BATCH_ENTRIES = "JOURNAL_FORCE_WRITE_BATCH_ENTRIES"; String JOURNAL_FORCE_WRITE_BATCH_BYTES = "JOURNAL_FORCE_WRITE_BATCH_BYTES"; String JOURNAL_FLUSH_LATENCY = "JOURNAL_FLUSH_LATENCY"; String JOURNAL_QUEUE_LATENCY = "JOURNAL_QUEUE_LATENCY"; + String JOURNAL_QUEUE_MAX_SIZE = "JOURNAL_QUEUE_MAX_SIZE"; String JOURNAL_PROCESS_TIME_LATENCY = "JOURNAL_PROCESS_TIME_LATENCY"; String JOURNAL_CREATION_LATENCY = "JOURNAL_CREATION_LATENCY"; + String JOURNAL_MEMORY_MAX = "JOURNAL_MEMORY_MAX"; + String JOURNAL_MEMORY_USED = "JOURNAL_MEMORY_USED"; // Ledger Storage Stats String STORAGE_GET_OFFSET = "STORAGE_GET_OFFSET"; String STORAGE_GET_ENTRY = "STORAGE_GET_ENTRY"; + + // Ledger Storage Scrub Stats + String STORAGE_SCRUB_PAGES_SCANNED = "STORAGE_SCRUB_PAGES_SCANNED"; + String STORAGE_SCRUB_PAGE_RETRIES = "STORAGE_SCRUB_PAGE_RETRIES"; + // Ledger Cache Stats String LEDGER_CACHE_READ_PAGE = "LEDGER_CACHE_READ_PAGE"; // SkipList Stats @@ -123,22 +144,21 @@ public interface BookKeeperServerStats { String LEDGER_CACHE_MISS = "LEDGER_CACHE_MISS"; // Compaction/Garbage Collection Related Counters - String ACTIVE_ENTRY_LOG_COUNT = "ACTIVE_ENTRY_LOG_COUNT"; + String ACTIVE_ENTRY_LOG_COUNT = "ACTIVE_ENTRY_LOG_TOTAL"; String ACTIVE_ENTRY_LOG_SPACE_BYTES = "ACTIVE_ENTRY_LOG_SPACE_BYTES"; String RECLAIMED_COMPACTION_SPACE_BYTES = "RECLAIMED_COMPACTION_SPACE_BYTES"; String RECLAIMED_DELETION_SPACE_BYTES = "RECLAIMED_DELETION_SPACE_BYTES"; + String RECLAIM_FAILED_TO_DELETE = "RECLAIM_FAILED_TO_DELETE"; String THREAD_RUNTIME = "THREAD_RUNTIME"; - String MAJOR_COMPACTION_COUNT = "MAJOR_COMPACTION_COUNT"; - String MINOR_COMPACTION_COUNT = "MINOR_COMPACTION_COUNT"; - String ACTIVE_LEDGER_COUNT = "ACTIVE_LEDGER_COUNT"; - String DELETED_LEDGER_COUNT = "DELETED_LEDGER_COUNT"; + String MAJOR_COMPACTION_COUNT = "MAJOR_COMPACTION_TOTAL"; + String MINOR_COMPACTION_COUNT = "MINOR_COMPACTION_TOTAL"; + String ACTIVE_LEDGER_COUNT = "ACTIVE_LEDGER_TOTAL"; + String DELETED_LEDGER_COUNT = "DELETED_LEDGER_TOTAL"; // Index Related Counters String INDEX_INMEM_ILLEGAL_STATE_RESET = "INDEX_INMEM_ILLEGAL_STATE_RESET"; String INDEX_INMEM_ILLEGAL_STATE_DELETE = "INDEX_INMEM_ILLEGAL_STATE_DELETE"; String JOURNAL_FORCE_WRITE_QUEUE_SIZE = "JOURNAL_FORCE_WRITE_QUEUE_SIZE"; - String JOURNAL_CB_QUEUE_SIZE = "JOURNAL_CB_QUEUE_SIZE"; - String JOURNAL_NUM_FORCE_WRITES = "JOURNAL_NUM_FORCE_WRITES"; String JOURNAL_NUM_FLUSH_EMPTY_QUEUE = "JOURNAL_NUM_FLUSH_EMPTY_QUEUE"; String JOURNAL_NUM_FLUSH_MAX_OUTSTANDING_BYTES = "JOURNAL_NUM_FLUSH_MAX_OUTSTANDING_BYTES"; String JOURNAL_NUM_FLUSH_MAX_WAIT = "JOURNAL_NUM_FLUSH_MAX_WAIT"; @@ -155,15 +175,13 @@ public interface BookKeeperServerStats { // Gauge String NUM_INDEX_PAGES = "NUM_INDEX_PAGES"; - String NUM_OPEN_LEDGERS = "NUM_OPEN_LEDGERS"; - String JOURNAL_FORCE_WRITE_GROUPING_COUNT = "JOURNAL_FORCE_WRITE_GROUPING_COUNT"; - String NUM_PENDING_READ = "NUM_PENDING_READ"; - String NUM_PENDING_ADD = "NUM_PENDING_ADD"; + String JOURNAL_FORCE_WRITE_GROUPING_COUNT = "JOURNAL_FORCE_WRITE_GROUPING_TOTAL"; // LedgerDirs Stats String LD_LEDGER_SCOPE = "ledger"; String LD_INDEX_SCOPE = "index"; String LD_WRITABLE_DIRS = "writable_dirs"; + String LD_NUM_DIRS = "num_dirs"; // EntryLogManagerForEntryLogPerLedger Stats String ENTRYLOGGER_SCOPE = "entrylogger"; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/Bookie.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/Bookie.java index e9e4d15023e..90c8acf5af4 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/Bookie.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/Bookie.java @@ -1,5 +1,4 @@ -/** - * +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -16,147 +15,82 @@ * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. - * */ - package org.apache.bookkeeper.bookie; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_ADD_ENTRY; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_ADD_ENTRY_BYTES; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_FORCE_LEDGER; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_READ_ENTRY; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_READ_ENTRY_BYTES; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_RECOVERY_ADD_ENTRY; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_SCOPE; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LD_INDEX_SCOPE; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LD_LEDGER_SCOPE; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_BYTES; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.WRITE_BYTES; -import static org.apache.bookkeeper.bookie.Bookie.METAENTRY_ID_FENCE_KEY; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.Lists; -import com.google.common.util.concurrent.SettableFuture; import io.netty.buffer.ByteBuf; -import io.netty.buffer.PooledByteBufAllocator; -import io.netty.buffer.Unpooled; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FilenameFilter; import java.io.IOException; -import java.net.InetAddress; -import java.net.InetSocketAddress; -import java.net.URI; -import java.net.UnknownHostException; -import java.nio.ByteBuffer; -import java.nio.file.FileStore; -import java.nio.file.Files; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.stream.Collectors; -import org.apache.bookkeeper.bookie.BookieException.BookieIllegalOpException; -import org.apache.bookkeeper.bookie.BookieException.CookieNotFoundException; -import org.apache.bookkeeper.bookie.BookieException.DiskPartitionDuplicationException; -import org.apache.bookkeeper.bookie.BookieException.InvalidCookieException; -import org.apache.bookkeeper.bookie.BookieException.MetadataStoreException; -import org.apache.bookkeeper.bookie.BookieException.UnknownBookieIdException; -import org.apache.bookkeeper.bookie.CheckpointSource.Checkpoint; -import org.apache.bookkeeper.bookie.Journal.JournalScanner; -import org.apache.bookkeeper.bookie.LedgerDirsManager.LedgerDirsListener; -import org.apache.bookkeeper.bookie.LedgerDirsManager.NoWritableLedgerDirException; +import java.util.PrimitiveIterator; +import java.util.concurrent.CompletableFuture; import org.apache.bookkeeper.common.util.Watcher; -import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.discover.RegistrationManager; -import org.apache.bookkeeper.meta.LedgerManager; -import org.apache.bookkeeper.meta.LedgerManagerFactory; -import org.apache.bookkeeper.meta.MetadataBookieDriver; -import org.apache.bookkeeper.meta.MetadataDrivers; -import org.apache.bookkeeper.meta.exceptions.MetadataException; -import org.apache.bookkeeper.net.BookieSocketAddress; -import org.apache.bookkeeper.net.DNS; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback; -import org.apache.bookkeeper.stats.Counter; -import org.apache.bookkeeper.stats.NullStatsLogger; -import org.apache.bookkeeper.stats.OpStatsLogger; -import org.apache.bookkeeper.stats.StatsLogger; -import org.apache.bookkeeper.util.BookKeeperConstants; -import org.apache.bookkeeper.util.DiskChecker; -import org.apache.bookkeeper.util.IOUtils; -import org.apache.bookkeeper.util.MathUtils; -import org.apache.bookkeeper.util.collections.ConcurrentLongHashMap; -import org.apache.bookkeeper.versioning.Version; -import org.apache.bookkeeper.versioning.Versioned; -import org.apache.commons.configuration.ConfigurationException; -import org.apache.commons.io.FileUtils; -import org.apache.commons.lang3.mutable.MutableBoolean; -import org.apache.commons.lang3.tuple.Pair; -import org.apache.zookeeper.KeeperException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** - * Implements a bookie. + * Interface for the bookie. */ -public class Bookie extends BookieCriticalThread { - - private static final Logger LOG = LoggerFactory.getLogger(Bookie.class); - - final List journalDirectories; - final ServerConfiguration conf; - - final SyncThread syncThread; - final LedgerManagerFactory ledgerManagerFactory; - final LedgerManager ledgerManager; - final LedgerStorage ledgerStorage; - final List journals; - - final HandleFactory handles; - final boolean entryLogPerLedgerEnabled; - - static final long METAENTRY_ID_LEDGER_KEY = -0x1000; - static final long METAENTRY_ID_FENCE_KEY = -0x2000; - public static final long METAENTRY_ID_FORCE_LEDGER = -0x4000; - static final long METAENTRY_ID_LEDGER_EXPLICITLAC = -0x8000; +public interface Bookie { + + void start(); + void join() throws InterruptedException; + boolean isRunning(); + int getExitCode(); + int shutdown(); + + boolean isAvailableForHighPriorityWrites(); + boolean isReadOnly(); + + // TODO: replace callback with futures + // TODO: replace ackBeforeSync with flags + void addEntry(ByteBuf entry, boolean ackBeforeSync, WriteCallback cb, Object ctx, byte[] masterKey) + throws IOException, BookieException, InterruptedException; + void recoveryAddEntry(ByteBuf entry, WriteCallback cb, Object ctx, byte[] masterKey) + throws IOException, BookieException, InterruptedException; + void forceLedger(long ledgerId, WriteCallback cb, Object ctx); + void setExplicitLac(ByteBuf entry, WriteCallback writeCallback, Object ctx, byte[] masterKey) + throws IOException, InterruptedException, BookieException; + ByteBuf getExplicitLac(long ledgerId) throws IOException, NoLedgerException, BookieException; + + // these can probably be moved out and called directly on ledgerdirmanager + long getTotalDiskSpace() throws IOException; + long getTotalFreeSpace() throws IOException; + + // TODO: Shouldn't this be async? + ByteBuf readEntry(long ledgerId, long entryId) + throws IOException, NoLedgerException, BookieException; + long readLastAddConfirmed(long ledgerId) throws IOException, BookieException; + PrimitiveIterator.OfLong getListOfEntriesOfLedger(long ledgerId) throws IOException, NoLedgerException; - private final LedgerDirsManager ledgerDirsManager; - private LedgerDirsManager indexDirsManager; - - LedgerDirsMonitor ledgerMonitor; - LedgerDirsMonitor idxMonitor; - - // Registration Manager for managing registration - protected final MetadataBookieDriver metadataDriver; - - private int exitCode = ExitCode.OK; + /** + * Fences a ledger. From this point on, clients will be unable to + * write to this ledger. Only recoveryAddEntry will be + * able to add entries to the ledger. + * This method is idempotent. Once a ledger is fenced, it can + * never be unfenced. Fencing a fenced ledger has no effect. + * @return + */ + CompletableFuture fenceLedger(long ledgerId, byte[] masterKey) + throws IOException, BookieException; - private final ConcurrentLongHashMap masterKeyCache = new ConcurrentLongHashMap<>(); + // TODO: Replace Watcher with a completableFuture (cancellable) + boolean waitForLastAddConfirmedUpdate(long ledgerId, + long previousLAC, + Watcher watcher) + throws IOException; + void cancelWaitForLastAddConfirmedUpdate(long ledgerId, + Watcher watcher) + throws IOException; - protected StateManager stateManager; + // TODO: StateManager should be passed as a parameter to Bookie + StateManager getStateManager(); - // Expose Stats - final StatsLogger statsLogger; - private final Counter writeBytes; - private final Counter readBytes; - private final Counter forceLedgerOps; - // Bookie Operation Latency Stats - private final OpStatsLogger addEntryStats; - private final OpStatsLogger recoveryAddEntryStats; - private final OpStatsLogger readEntryStats; - // Bookie Operation Bytes Stats - private final OpStatsLogger addBytesStats; - private final OpStatsLogger readBytesStats; + // TODO: Should be constructed and passed in as a parameter + LedgerStorage getLedgerStorage(); + // TODO: Move this exceptions somewhere else /** * Exception is thrown when no such a ledger is found in this bookie. */ - public static class NoLedgerException extends IOException { + class NoLedgerException extends IOException { private static final long serialVersionUID = 1L; private final long ledgerId; public NoLedgerException(long ledgerId) { @@ -171,7 +105,7 @@ public long getLedgerId() { /** * Exception is thrown when no such an entry is found in this bookie. */ - public static class NoEntryException extends IOException { + class NoEntryException extends IOException { private static final long serialVersionUID = 1L; private final long ledgerId; private final long entryId; @@ -193,1357 +127,4 @@ public long getEntry() { } } - // Write Callback do nothing - static class NopWriteCallback implements WriteCallback { - @Override - public void writeComplete(int rc, long ledgerId, long entryId, - BookieSocketAddress addr, Object ctx) { - if (LOG.isDebugEnabled()) { - LOG.debug("Finished writing entry {} @ ledger {} for {} : {}", - entryId, ledgerId, addr, rc); - } - } - } - - public static void checkDirectoryStructure(File dir) throws IOException { - if (!dir.exists()) { - File parent = dir.getParentFile(); - File preV3versionFile = new File(dir.getParent(), - BookKeeperConstants.VERSION_FILENAME); - - final AtomicBoolean oldDataExists = new AtomicBoolean(false); - parent.list(new FilenameFilter() { - @Override - public boolean accept(File dir, String name) { - if (name.endsWith(".txn") || name.endsWith(".idx") || name.endsWith(".log")) { - oldDataExists.set(true); - } - return true; - } - }); - if (preV3versionFile.exists() || oldDataExists.get()) { - String err = "Directory layout version is less than 3, upgrade needed"; - LOG.error(err); - throw new IOException(err); - } - if (!dir.mkdirs()) { - String err = "Unable to create directory " + dir; - LOG.error(err); - throw new IOException(err); - } - } - } - - /** - * Check that the environment for the bookie is correct. - * This means that the configuration has stayed the same as the - * first run and the filesystem structure is up to date. - */ - private void checkEnvironment(MetadataBookieDriver metadataDriver) - throws BookieException, IOException { - List allLedgerDirs = new ArrayList(ledgerDirsManager.getAllLedgerDirs().size() - + indexDirsManager.getAllLedgerDirs().size()); - allLedgerDirs.addAll(ledgerDirsManager.getAllLedgerDirs()); - if (indexDirsManager != ledgerDirsManager) { - allLedgerDirs.addAll(indexDirsManager.getAllLedgerDirs()); - } - if (metadataDriver == null) { // exists only for testing, just make sure directories are correct - - for (File journalDirectory : journalDirectories) { - checkDirectoryStructure(journalDirectory); - } - - for (File dir : allLedgerDirs) { - checkDirectoryStructure(dir); - } - return; - } - - checkEnvironmentWithStorageExpansion(conf, metadataDriver, journalDirectories, allLedgerDirs); - - checkIfDirsOnSameDiskPartition(allLedgerDirs); - checkIfDirsOnSameDiskPartition(journalDirectories); - } - - /** - * Checks if multiple directories are in same diskpartition/filesystem/device. - * If ALLOW_MULTIPLEDIRS_UNDER_SAME_DISKPARTITION config parameter is not enabled, and - * if it is found that there are multiple directories in the same DiskPartition then - * it will throw DiskPartitionDuplicationException. - * - * @param dirs dirs to validate - * - * @throws IOException - */ - private void checkIfDirsOnSameDiskPartition(List dirs) throws DiskPartitionDuplicationException { - boolean allowDiskPartitionDuplication = conf.isAllowMultipleDirsUnderSameDiskPartition(); - final MutableBoolean isDuplicationFoundAndNotAllowed = new MutableBoolean(false); - Map> fileStoreDirsMap = new HashMap>(); - for (File dir : dirs) { - FileStore fileStore; - try { - fileStore = Files.getFileStore(dir.toPath()); - } catch (IOException e) { - LOG.error("Got IOException while trying to FileStore of {}", dir); - throw new BookieException.DiskPartitionDuplicationException(e); - } - if (fileStoreDirsMap.containsKey(fileStore)) { - fileStoreDirsMap.get(fileStore).add(dir); - } else { - List dirsList = new ArrayList(); - dirsList.add(dir); - fileStoreDirsMap.put(fileStore, dirsList); - } - } - - fileStoreDirsMap.forEach((fileStore, dirsList) -> { - if (dirsList.size() > 1) { - if (allowDiskPartitionDuplication) { - LOG.warn("Dirs: {} are in same DiskPartition/FileSystem: {}", dirsList, fileStore); - } else { - LOG.error("Dirs: {} are in same DiskPartition/FileSystem: {}", dirsList, fileStore); - isDuplicationFoundAndNotAllowed.setValue(true); - } - } - }); - if (isDuplicationFoundAndNotAllowed.getValue()) { - throw new BookieException.DiskPartitionDuplicationException(); - } - } - - static List possibleBookieIds(ServerConfiguration conf) - throws BookieException { - // we need to loop through all possible bookie identifiers to ensure it is treated as a new environment - // just because of bad configuration - List addresses = Lists.newArrayListWithExpectedSize(3); - // we are checking all possibilities here, so we don't need to fail if we can only get - // loopback address. it will fail anyway when the bookie attempts to listen on loopback address. - try { - // ip address - addresses.add(getBookieAddress( - new ServerConfiguration(conf) - .setUseHostNameAsBookieID(false) - .setAdvertisedAddress(null) - .setAllowLoopback(true) - )); - // host name - addresses.add(getBookieAddress( - new ServerConfiguration(conf) - .setUseHostNameAsBookieID(true) - .setAdvertisedAddress(null) - .setAllowLoopback(true) - )); - // advertised address - if (null != conf.getAdvertisedAddress()) { - addresses.add(getBookieAddress(conf)); - } - } catch (UnknownHostException e) { - throw new UnknownBookieIdException(e); - } - return addresses; - } - - static Versioned readAndVerifyCookieFromRegistrationManager( - Cookie masterCookie, RegistrationManager rm, - List addresses, boolean allowExpansion) - throws BookieException { - Versioned rmCookie = null; - for (BookieSocketAddress address : addresses) { - try { - rmCookie = Cookie.readFromRegistrationManager(rm, address); - // If allowStorageExpansion option is set, we should - // make sure that the new set of ledger/index dirs - // is a super set of the old; else, we fail the cookie check - if (allowExpansion) { - masterCookie.verifyIsSuperSet(rmCookie.getValue()); - } else { - masterCookie.verify(rmCookie.getValue()); - } - } catch (CookieNotFoundException e) { - continue; - } - } - return rmCookie; - } - - private static Pair, List> verifyAndGetMissingDirs( - Cookie masterCookie, boolean allowExpansion, List dirs) - throws InvalidCookieException, IOException { - List missingDirs = Lists.newArrayList(); - List existedCookies = Lists.newArrayList(); - for (File dir : dirs) { - checkDirectoryStructure(dir); - try { - Cookie c = Cookie.readFromDirectory(dir); - if (allowExpansion) { - masterCookie.verifyIsSuperSet(c); - } else { - masterCookie.verify(c); - } - existedCookies.add(c); - } catch (FileNotFoundException fnf) { - missingDirs.add(dir); - } - } - return Pair.of(missingDirs, existedCookies); - } - - private static void stampNewCookie(ServerConfiguration conf, - Cookie masterCookie, - RegistrationManager rm, - Version version, - List journalDirectories, - List allLedgerDirs) - throws BookieException, IOException { - // backfill all the directories that miss cookies (for storage expansion) - LOG.info("Stamping new cookies on all dirs {} {}", - journalDirectories, allLedgerDirs); - for (File journalDirectory : journalDirectories) { - masterCookie.writeToDirectory(journalDirectory); - } - for (File dir : allLedgerDirs) { - masterCookie.writeToDirectory(dir); - } - masterCookie.writeToRegistrationManager(rm, conf, version); - } - - public static void checkEnvironmentWithStorageExpansion( - ServerConfiguration conf, - MetadataBookieDriver metadataDriver, - List journalDirectories, - List allLedgerDirs) throws BookieException { - RegistrationManager rm = metadataDriver.getRegistrationManager(); - try { - // 1. retrieve the instance id - String instanceId = rm.getClusterInstanceId(); - - // 2. build the master cookie from the configuration - Cookie.Builder builder = Cookie.generateCookie(conf); - if (null != instanceId) { - builder.setInstanceId(instanceId); - } - Cookie masterCookie = builder.build(); - boolean allowExpansion = conf.getAllowStorageExpansion(); - - // 3. read the cookie from registration manager. it is the `source-of-truth` of a given bookie. - // if it doesn't exist in registration manager, this bookie is a new bookie, otherwise it is - // an old bookie. - List possibleBookieIds = possibleBookieIds(conf); - final Versioned rmCookie = readAndVerifyCookieFromRegistrationManager( - masterCookie, rm, possibleBookieIds, allowExpansion); - - // 4. check if the cookie appear in all the directories. - List missedCookieDirs = new ArrayList<>(); - List existingCookies = Lists.newArrayList(); - if (null != rmCookie) { - existingCookies.add(rmCookie.getValue()); - } - - // 4.1 verify the cookies in journal directories - Pair, List> journalResult = - verifyAndGetMissingDirs(masterCookie, - allowExpansion, journalDirectories); - missedCookieDirs.addAll(journalResult.getLeft()); - existingCookies.addAll(journalResult.getRight()); - // 4.2. verify the cookies in ledger directories - Pair, List> ledgerResult = - verifyAndGetMissingDirs(masterCookie, - allowExpansion, allLedgerDirs); - missedCookieDirs.addAll(ledgerResult.getLeft()); - existingCookies.addAll(ledgerResult.getRight()); - - // 5. if there are directories missing cookies, - // this is either a: - // - new environment - // - a directory is being added - // - a directory has been corrupted/wiped, which is an error - if (!missedCookieDirs.isEmpty()) { - if (rmCookie == null) { - // 5.1 new environment: all directories should be empty - verifyDirsForNewEnvironment(missedCookieDirs); - stampNewCookie(conf, masterCookie, rm, Version.NEW, - journalDirectories, allLedgerDirs); - } else if (allowExpansion) { - // 5.2 storage is expanding - Set knownDirs = getKnownDirs(existingCookies); - verifyDirsForStorageExpansion(missedCookieDirs, knownDirs); - stampNewCookie(conf, masterCookie, - rm, rmCookie.getVersion(), - journalDirectories, allLedgerDirs); - } else { - // 5.3 Cookie-less directories and - // we can't do anything with them - LOG.error("There are directories without a cookie," - + " and this is neither a new environment," - + " nor is storage expansion enabled. " - + "Empty directories are {}", missedCookieDirs); - throw new InvalidCookieException(); - } - } - } catch (IOException ioe) { - LOG.error("Error accessing cookie on disks", ioe); - throw new BookieException.InvalidCookieException(ioe); - } - } - - private static void verifyDirsForNewEnvironment(List missedCookieDirs) - throws InvalidCookieException { - List nonEmptyDirs = new ArrayList<>(); - for (File dir : missedCookieDirs) { - String[] content = dir.list(); - if (content != null && content.length != 0) { - nonEmptyDirs.add(dir); - } - } - if (!nonEmptyDirs.isEmpty()) { - LOG.error("Not all the new directories are empty. New directories that are not empty are: " + nonEmptyDirs); - throw new InvalidCookieException(); - } - } - - private static Set getKnownDirs(List cookies) { - return cookies.stream() - .flatMap((c) -> Arrays.stream(c.getLedgerDirPathsFromCookie())) - .map((s) -> new File(s)) - .collect(Collectors.toSet()); - } - - private static void verifyDirsForStorageExpansion( - List missedCookieDirs, - Set existingLedgerDirs) throws InvalidCookieException { - - List dirsMissingData = new ArrayList(); - List nonEmptyDirs = new ArrayList(); - for (File dir : missedCookieDirs) { - if (existingLedgerDirs.contains(dir.getParentFile())) { - // if one of the existing ledger dirs doesn't have cookie, - // let us not proceed further - dirsMissingData.add(dir); - continue; - } - String[] content = dir.list(); - if (content != null && content.length != 0) { - nonEmptyDirs.add(dir); - } - } - if (dirsMissingData.size() > 0 || nonEmptyDirs.size() > 0) { - LOG.error("Either not all local directories have cookies or directories being added " - + " newly are not empty. " - + "Directories missing cookie file are: " + dirsMissingData - + " New directories that are not empty are: " + nonEmptyDirs); - throw new InvalidCookieException(); - } - } - - /** - * Return the configured address of the bookie. - */ - public static BookieSocketAddress getBookieAddress(ServerConfiguration conf) - throws UnknownHostException { - // Advertised address takes precedence over the listening interface and the - // useHostNameAsBookieID settings - if (conf.getAdvertisedAddress() != null && conf.getAdvertisedAddress().trim().length() > 0) { - String hostAddress = conf.getAdvertisedAddress().trim(); - return new BookieSocketAddress(hostAddress, conf.getBookiePort()); - } - - String iface = conf.getListeningInterface(); - if (iface == null) { - iface = "default"; - } - - String hostName = DNS.getDefaultHost(iface); - InetSocketAddress inetAddr = new InetSocketAddress(hostName, conf.getBookiePort()); - if (inetAddr.isUnresolved()) { - throw new UnknownHostException("Unable to resolve default hostname: " - + hostName + " for interface: " + iface); - } - String hostAddress = null; - InetAddress iAddress = inetAddr.getAddress(); - if (conf.getUseHostNameAsBookieID()) { - hostAddress = iAddress.getCanonicalHostName(); - if (conf.getUseShortHostName()) { - /* - * if short hostname is used, then FQDN is not used. Short - * hostname is the hostname cut at the first dot. - */ - hostAddress = hostAddress.split("\\.", 2)[0]; - } - } else { - hostAddress = iAddress.getHostAddress(); - } - - BookieSocketAddress addr = - new BookieSocketAddress(hostAddress, conf.getBookiePort()); - if (addr.getSocketAddress().getAddress().isLoopbackAddress() - && !conf.getAllowLoopback()) { - throw new UnknownHostException("Trying to listen on loopback address, " - + addr + " but this is forbidden by default " - + "(see ServerConfiguration#getAllowLoopback()).\n" - + "If this happen, you can consider specifying the network interface" - + " to listen on (e.g. listeningInterface=eth0) or specifying the" - + " advertised address (e.g. advertisedAddress=172.x.y.z)"); - } - return addr; - } - - public LedgerDirsManager getLedgerDirsManager() { - return ledgerDirsManager; - } - - LedgerDirsManager getIndexDirsManager() { - return indexDirsManager; - } - - public long getTotalDiskSpace() throws IOException { - return getLedgerDirsManager().getTotalDiskSpace(ledgerDirsManager.getAllLedgerDirs()); - } - - public long getTotalFreeSpace() throws IOException { - return getLedgerDirsManager().getTotalFreeSpace(ledgerDirsManager.getAllLedgerDirs()); - } - - public static File getCurrentDirectory(File dir) { - return new File(dir, BookKeeperConstants.CURRENT_DIR); - } - - public static File[] getCurrentDirectories(File[] dirs) { - File[] currentDirs = new File[dirs.length]; - for (int i = 0; i < dirs.length; i++) { - currentDirs[i] = getCurrentDirectory(dirs[i]); - } - return currentDirs; - } - - public Bookie(ServerConfiguration conf) - throws IOException, InterruptedException, BookieException { - this(conf, NullStatsLogger.INSTANCE); - } - - public Bookie(ServerConfiguration conf, StatsLogger statsLogger) - throws IOException, InterruptedException, BookieException { - super("Bookie-" + conf.getBookiePort()); - this.statsLogger = statsLogger; - this.conf = conf; - this.journalDirectories = Lists.newArrayList(); - for (File journalDirectory : conf.getJournalDirs()) { - this.journalDirectories.add(getCurrentDirectory(journalDirectory)); - } - DiskChecker diskChecker = new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()); - this.ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), diskChecker, - statsLogger.scope(LD_LEDGER_SCOPE)); - - File[] idxDirs = conf.getIndexDirs(); - if (null == idxDirs) { - this.indexDirsManager = this.ledgerDirsManager; - } else { - this.indexDirsManager = new LedgerDirsManager(conf, idxDirs, diskChecker, - statsLogger.scope(LD_INDEX_SCOPE)); - } - - // instantiate zookeeper client to initialize ledger manager - this.metadataDriver = instantiateMetadataDriver(conf); - checkEnvironment(this.metadataDriver); - try { - if (this.metadataDriver != null) { - // current the registration manager is zookeeper only - ledgerManagerFactory = metadataDriver.getLedgerManagerFactory(); - LOG.info("instantiate ledger manager {}", ledgerManagerFactory.getClass().getName()); - ledgerManager = ledgerManagerFactory.newLedgerManager(); - } else { - ledgerManagerFactory = null; - ledgerManager = null; - } - } catch (MetadataException e) { - throw new MetadataStoreException("Failed to initialize ledger manager", e); - } - stateManager = initializeStateManager(); - // register shutdown handler using trigger mode - stateManager.setShutdownHandler(exitCode -> triggerBookieShutdown(exitCode)); - // Initialise ledgerDirMonitor. This would look through all the - // configured directories. When disk errors or all the ledger - // directories are full, would throws exception and fail bookie startup. - this.ledgerMonitor = new LedgerDirsMonitor(conf, diskChecker, ledgerDirsManager); - try { - this.ledgerMonitor.init(); - } catch (NoWritableLedgerDirException nle) { - // start in read-only mode if no writable dirs and read-only allowed - if (!conf.isReadOnlyModeEnabled()) { - throw nle; - } else { - this.stateManager.transitionToReadOnlyMode(); - } - } - - if (null == idxDirs) { - this.idxMonitor = this.ledgerMonitor; - } else { - this.idxMonitor = new LedgerDirsMonitor(conf, diskChecker, indexDirsManager); - try { - this.idxMonitor.init(); - } catch (NoWritableLedgerDirException nle) { - // start in read-only mode if no writable dirs and read-only allowed - if (!conf.isReadOnlyModeEnabled()) { - throw nle; - } else { - this.stateManager.transitionToReadOnlyMode(); - } - } - } - - - // instantiate the journals - journals = Lists.newArrayList(); - for (int i = 0; i < journalDirectories.size(); i++) { - journals.add(new Journal(i, journalDirectories.get(i), - conf, ledgerDirsManager, statsLogger.scope(JOURNAL_SCOPE))); - } - - this.entryLogPerLedgerEnabled = conf.isEntryLogPerLedgerEnabled(); - CheckpointSource checkpointSource = new CheckpointSourceList(journals); - - // Instantiate the ledger storage implementation - String ledgerStorageClass = conf.getLedgerStorageClass(); - LOG.info("Using ledger storage: {}", ledgerStorageClass); - ledgerStorage = LedgerStorageFactory.createLedgerStorage(ledgerStorageClass); - - /* - * with this change https://github.com/apache/bookkeeper/pull/677, - * LedgerStorage drives the checkpoint logic. But with multiple entry - * logs, checkpoint logic based on a entry log is not possible, hence it - * needs to be timebased recurring thing and it is driven by SyncThread. - * SyncThread.start does that and it is started in Bookie.start method. - */ - if (entryLogPerLedgerEnabled) { - syncThread = new SyncThread(conf, getLedgerDirsListener(), ledgerStorage, checkpointSource) { - @Override - public void startCheckpoint(Checkpoint checkpoint) { - /* - * in the case of entryLogPerLedgerEnabled, LedgerStorage - * dont drive checkpoint logic, but instead it is done - * periodically by SyncThread. So startCheckpoint which - * will be called by LedgerStorage will be no-op. - */ - } - - @Override - public void start() { - executor.scheduleAtFixedRate(() -> { - doCheckpoint(checkpointSource.newCheckpoint()); - }, conf.getFlushInterval(), conf.getFlushInterval(), TimeUnit.MILLISECONDS); - } - }; - } else { - syncThread = new SyncThread(conf, getLedgerDirsListener(), ledgerStorage, checkpointSource); - } - - ledgerStorage.initialize( - conf, - ledgerManager, - ledgerDirsManager, - indexDirsManager, - stateManager, - checkpointSource, - syncThread, - statsLogger); - - - handles = new HandleFactoryImpl(ledgerStorage); - - // Expose Stats - writeBytes = statsLogger.getCounter(WRITE_BYTES); - readBytes = statsLogger.getCounter(READ_BYTES); - forceLedgerOps = statsLogger.getCounter(BOOKIE_FORCE_LEDGER); - addEntryStats = statsLogger.getOpStatsLogger(BOOKIE_ADD_ENTRY); - recoveryAddEntryStats = statsLogger.getOpStatsLogger(BOOKIE_RECOVERY_ADD_ENTRY); - readEntryStats = statsLogger.getOpStatsLogger(BOOKIE_READ_ENTRY); - addBytesStats = statsLogger.getOpStatsLogger(BOOKIE_ADD_ENTRY_BYTES); - readBytesStats = statsLogger.getOpStatsLogger(BOOKIE_READ_ENTRY_BYTES); - } - - StateManager initializeStateManager() throws IOException { - return new BookieStateManager(conf, statsLogger, metadataDriver, ledgerDirsManager); - } - - void readJournal() throws IOException, BookieException { - long startTs = MathUtils.now(); - JournalScanner scanner = new JournalScanner() { - @Override - public void process(int journalVersion, long offset, ByteBuffer recBuff) throws IOException { - long ledgerId = recBuff.getLong(); - long entryId = recBuff.getLong(); - try { - if (LOG.isDebugEnabled()) { - LOG.debug("Replay journal - ledger id : {}, entry id : {}.", ledgerId, entryId); - } - if (entryId == METAENTRY_ID_LEDGER_KEY) { - if (journalVersion >= JournalChannel.V3) { - int masterKeyLen = recBuff.getInt(); - byte[] masterKey = new byte[masterKeyLen]; - - recBuff.get(masterKey); - masterKeyCache.put(ledgerId, masterKey); - - // Force to re-insert the master key in ledger storage - handles.getHandle(ledgerId, masterKey); - } else { - throw new IOException("Invalid journal. Contains journalKey " - + " but layout version (" + journalVersion - + ") is too old to hold this"); - } - } else if (entryId == METAENTRY_ID_FENCE_KEY) { - if (journalVersion >= JournalChannel.V4) { - byte[] key = masterKeyCache.get(ledgerId); - if (key == null) { - key = ledgerStorage.readMasterKey(ledgerId); - } - LedgerDescriptor handle = handles.getHandle(ledgerId, key); - handle.setFenced(); - } else { - throw new IOException("Invalid journal. Contains fenceKey " - + " but layout version (" + journalVersion - + ") is too old to hold this"); - } - } else if (entryId == METAENTRY_ID_LEDGER_EXPLICITLAC) { - if (journalVersion >= JournalChannel.V6) { - int explicitLacBufLength = recBuff.getInt(); - ByteBuf explicitLacBuf = Unpooled.buffer(explicitLacBufLength); - byte[] explicitLacBufArray = new byte[explicitLacBufLength]; - recBuff.get(explicitLacBufArray); - explicitLacBuf.writeBytes(explicitLacBufArray); - byte[] key = masterKeyCache.get(ledgerId); - if (key == null) { - key = ledgerStorage.readMasterKey(ledgerId); - } - LedgerDescriptor handle = handles.getHandle(ledgerId, key); - handle.setExplicitLac(explicitLacBuf); - } else { - throw new IOException("Invalid journal. Contains explicitLAC " + " but layout version (" - + journalVersion + ") is too old to hold this"); - } - } else if (entryId < 0) { - /* - * this is possible if bookie code binary is rolledback - * to older version but when it is trying to read - * Journal which was created previously using newer - * code/journalversion, which introduced new special - * entry. So in anycase, if we see unrecognizable - * special entry while replaying journal we should skip - * (ignore) it. - */ - LOG.warn("Read unrecognizable entryId: {} for ledger: {} while replaying Journal. Skipping it", - entryId, ledgerId); - } else { - byte[] key = masterKeyCache.get(ledgerId); - if (key == null) { - key = ledgerStorage.readMasterKey(ledgerId); - } - LedgerDescriptor handle = handles.getHandle(ledgerId, key); - - recBuff.rewind(); - handle.addEntry(Unpooled.wrappedBuffer(recBuff)); - } - } catch (NoLedgerException nsle) { - if (LOG.isDebugEnabled()) { - LOG.debug("Skip replaying entries of ledger {} since it was deleted.", ledgerId); - } - } catch (BookieException be) { - throw new IOException(be); - } - } - }; - - for (Journal journal : journals) { - journal.replay(scanner); - } - long elapsedTs = MathUtils.now() - startTs; - LOG.info("Finished replaying journal in {} ms.", elapsedTs); - } - - @Override - public synchronized void start() { - setDaemon(true); - if (LOG.isDebugEnabled()) { - LOG.debug("I'm starting a bookie with journal directories {}", - journalDirectories.stream().map(File::getName).collect(Collectors.joining(", "))); - } - //Start DiskChecker thread - ledgerMonitor.start(); - if (indexDirsManager != ledgerDirsManager) { - idxMonitor.start(); - } - - // replay journals - try { - readJournal(); - } catch (IOException ioe) { - LOG.error("Exception while replaying journals, shutting down", ioe); - shutdown(ExitCode.BOOKIE_EXCEPTION); - return; - } catch (BookieException be) { - LOG.error("Exception while replaying journals, shutting down", be); - shutdown(ExitCode.BOOKIE_EXCEPTION); - return; - } - - // Do a fully flush after journal replay - try { - syncThread.requestFlush().get(); - } catch (InterruptedException e) { - LOG.warn("Interrupting the fully flush after replaying journals : ", e); - Thread.currentThread().interrupt(); - } catch (ExecutionException e) { - LOG.error("Error on executing a fully flush after replaying journals."); - shutdown(ExitCode.BOOKIE_EXCEPTION); - } - LOG.info("Finished reading journal, starting bookie"); - - - /* - * start sync thread first, so during replaying journals, we could do - * checkpoint which reduce the chance that we need to replay journals - * again if bookie restarted again before finished journal replays. - */ - syncThread.start(); - - // start bookie thread - super.start(); - - // After successful bookie startup, register listener for disk - // error/full notifications. - ledgerDirsManager.addLedgerDirsListener(getLedgerDirsListener()); - if (indexDirsManager != ledgerDirsManager) { - indexDirsManager.addLedgerDirsListener(getLedgerDirsListener()); - } - - ledgerStorage.start(); - - // check the bookie status to start with, and set running. - // since bookie server use running as a flag to tell bookie server whether it is alive - // if setting it in bookie thread, the watcher might run before bookie thread. - stateManager.initState(); - - try { - stateManager.registerBookie(true).get(); - } catch (Exception e) { - LOG.error("Couldn't register bookie with zookeeper, shutting down : ", e); - shutdown(ExitCode.ZK_REG_FAIL); - } - } - - /* - * Get the DiskFailure listener for the bookie - */ - private LedgerDirsListener getLedgerDirsListener() { - - return new LedgerDirsListener() { - - @Override - public void diskFailed(File disk) { - // Shutdown the bookie on disk failure. - triggerBookieShutdown(ExitCode.BOOKIE_EXCEPTION); - } - - @Override - public void allDisksFull(boolean highPriorityWritesAllowed) { - // Transition to readOnly mode on all disks full - stateManager.setHighPriorityWritesAvailability(highPriorityWritesAllowed); - stateManager.transitionToReadOnlyMode(); - } - - @Override - public void fatalError() { - LOG.error("Fatal error reported by ledgerDirsManager"); - triggerBookieShutdown(ExitCode.BOOKIE_EXCEPTION); - } - - @Override - public void diskWritable(File disk) { - // Transition to writable mode when a disk becomes writable again. - stateManager.setHighPriorityWritesAvailability(true); - stateManager.transitionToWritableMode(); - } - - @Override - public void diskJustWritable(File disk) { - // Transition to writable mode when a disk becomes writable again. - stateManager.setHighPriorityWritesAvailability(true); - stateManager.transitionToWritableMode(); - } - }; - } - - /** - * Instantiate the metadata driver for the Bookie. - */ - private MetadataBookieDriver instantiateMetadataDriver(ServerConfiguration conf) throws BookieException { - try { - String metadataServiceUriStr = conf.getMetadataServiceUri(); - if (null == metadataServiceUriStr) { - return null; - } - - MetadataBookieDriver driver = MetadataDrivers.getBookieDriver( - URI.create(metadataServiceUriStr)); - driver.initialize( - conf, - () -> { - stateManager.forceToUnregistered(); - // schedule a re-register operation - stateManager.registerBookie(false); - }, - statsLogger); - return driver; - } catch (MetadataException me) { - throw new MetadataStoreException("Failed to initialize metadata bookie driver", me); - } catch (ConfigurationException e) { - throw new BookieIllegalOpException(e); - } - } - - /* - * Check whether Bookie is writable. - */ - public boolean isReadOnly() { - return stateManager.isReadOnly(); - } - - /** - * Check whether Bookie is available for high priority writes. - * - * @return true if the bookie is able to take high priority writes. - */ - public boolean isAvailableForHighPriorityWrites() { - return stateManager.isAvailableForHighPriorityWrites(); - } - - public boolean isRunning() { - return stateManager.isRunning(); - } - - @Override - public void run() { - // bookie thread wait for journal thread - try { - // start journals - for (Journal journal: journals) { - journal.start(); - } - - // wait until journal quits - for (Journal journal: journals) { - - journal.joinThread(); - } - LOG.info("Journal thread(s) quit."); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - LOG.warn("Interrupted on running journal thread : ", ie); - } - // if the journal thread quits due to shutting down, it is ok - if (!stateManager.isShuttingDown()) { - // some error found in journal thread and it quits - // following add operations to it would hang unit client timeout - // so we should let bookie server exists - LOG.error("Journal manager quits unexpectedly."); - triggerBookieShutdown(ExitCode.BOOKIE_EXCEPTION); - } - } - - // Triggering the Bookie shutdown in its own thread, - // because shutdown can be called from sync thread which would be - // interrupted by shutdown call. - AtomicBoolean shutdownTriggered = new AtomicBoolean(false); - void triggerBookieShutdown(final int exitCode) { - if (!shutdownTriggered.compareAndSet(false, true)) { - return; - } - LOG.info("Triggering shutdown of Bookie-{} with exitCode {}", - conf.getBookiePort(), exitCode); - BookieThread th = new BookieThread("BookieShutdownTrigger") { - @Override - public void run() { - Bookie.this.shutdown(exitCode); - } - }; - th.start(); - } - - // provided a public shutdown method for other caller - // to shut down bookie gracefully - public int shutdown() { - return shutdown(ExitCode.OK); - } - - // internal shutdown method to let shutdown bookie gracefully - // when encountering exception - synchronized int shutdown(int exitCode) { - try { - if (isRunning()) { // avoid shutdown twice - // the exitCode only set when first shutdown usually due to exception found - LOG.info("Shutting down Bookie-{} with exitCode {}", - conf.getBookiePort(), exitCode); - if (this.exitCode == ExitCode.OK) { - this.exitCode = exitCode; - } - - stateManager.forceToShuttingDown(); - - // turn bookie to read only during shutting down process - LOG.info("Turning bookie to read only during shut down"); - stateManager.forceToReadOnly(); - - // Shutdown Sync thread - syncThread.shutdown(); - - // Shutdown journals - for (Journal journal : journals) { - journal.shutdown(); - } - this.join(); - - // Shutdown the EntryLogger which has the GarbageCollector Thread running - ledgerStorage.shutdown(); - - // close Ledger Manager - try { - if (null != ledgerManager) { - ledgerManager.close(); - } - if (null != ledgerManagerFactory) { - ledgerManagerFactory.close(); - } - } catch (IOException ie) { - LOG.error("Failed to close active ledger manager : ", ie); - } - - //Shutdown disk checker - ledgerMonitor.shutdown(); - if (indexDirsManager != ledgerDirsManager) { - idxMonitor.shutdown(); - } - - } - // Shutdown the ZK client - if (metadataDriver != null) { - metadataDriver.close(); - } - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - LOG.error("Interrupted during shutting down bookie : ", ie); - } catch (Exception e) { - LOG.error("Got Exception while trying to shutdown Bookie", e); - throw e; - } finally { - // setting running to false here, so watch thread - // in bookie server know it only after bookie shut down - stateManager.close(); - } - return this.exitCode; - } - - /** - * Retrieve the ledger descriptor for the ledger which entry should be added to. - * The LedgerDescriptor returned from this method should be eventually freed with - * #putHandle(). - * - * @throws BookieException if masterKey does not match the master key of the ledger - */ - @VisibleForTesting - LedgerDescriptor getLedgerForEntry(ByteBuf entry, final byte[] masterKey) - throws IOException, BookieException { - final long ledgerId = entry.getLong(entry.readerIndex()); - - return handles.getHandle(ledgerId, masterKey); - } - - private Journal getJournal(long ledgerId) { - return journals.get(MathUtils.signSafeMod(ledgerId, journals.size())); - } - - /** - * Add an entry to a ledger as specified by handle. - */ - private void addEntryInternal(LedgerDescriptor handle, ByteBuf entry, - boolean ackBeforeSync, WriteCallback cb, Object ctx, byte[] masterKey) - throws IOException, BookieException { - long ledgerId = handle.getLedgerId(); - long entryId = handle.addEntry(entry); - - writeBytes.add(entry.readableBytes()); - - // journal `addEntry` should happen after the entry is added to ledger storage. - // otherwise the journal entry can potentially be rolled before the ledger is created in ledger storage. - if (masterKeyCache.get(ledgerId) == null) { - // Force the load into masterKey cache - byte[] oldValue = masterKeyCache.putIfAbsent(ledgerId, masterKey); - if (oldValue == null) { - // new handle, we should add the key to journal ensure we can rebuild - ByteBuffer bb = ByteBuffer.allocate(8 + 8 + 4 + masterKey.length); - bb.putLong(ledgerId); - bb.putLong(METAENTRY_ID_LEDGER_KEY); - bb.putInt(masterKey.length); - bb.put(masterKey); - bb.flip(); - - getJournal(ledgerId).logAddEntry(bb, false /* ackBeforeSync */, new NopWriteCallback(), null); - } - } - - if (LOG.isTraceEnabled()) { - LOG.trace("Adding {}@{}", entryId, ledgerId); - } - getJournal(ledgerId).logAddEntry(entry, ackBeforeSync, cb, ctx); - } - - /** - * Add entry to a ledger, even if the ledger has previous been fenced. This should only - * happen in bookie recovery or ledger recovery cases, where entries are being replicates - * so that they exist on a quorum of bookies. The corresponding client side call for this - * is not exposed to users. - */ - public void recoveryAddEntry(ByteBuf entry, WriteCallback cb, Object ctx, byte[] masterKey) - throws IOException, BookieException { - long requestNanos = MathUtils.nowInNano(); - boolean success = false; - int entrySize = 0; - try { - LedgerDescriptor handle = getLedgerForEntry(entry, masterKey); - synchronized (handle) { - entrySize = entry.readableBytes(); - addEntryInternal(handle, entry, false /* ackBeforeSync */, cb, ctx, masterKey); - } - success = true; - } catch (NoWritableLedgerDirException e) { - stateManager.transitionToReadOnlyMode(); - throw new IOException(e); - } finally { - long elapsedNanos = MathUtils.elapsedNanos(requestNanos); - if (success) { - recoveryAddEntryStats.registerSuccessfulEvent(elapsedNanos, TimeUnit.NANOSECONDS); - addBytesStats.registerSuccessfulValue(entrySize); - } else { - recoveryAddEntryStats.registerFailedEvent(elapsedNanos, TimeUnit.NANOSECONDS); - addBytesStats.registerFailedValue(entrySize); - } - - entry.release(); - } - } - - static ByteBuf createExplicitLACEntry(long ledgerId, ByteBuf explicitLac) { - ByteBuf bb = PooledByteBufAllocator.DEFAULT.directBuffer(8 + 8 + 4 + explicitLac.capacity()); - bb.writeLong(ledgerId); - bb.writeLong(METAENTRY_ID_LEDGER_EXPLICITLAC); - bb.writeInt(explicitLac.capacity()); - bb.writeBytes(explicitLac); - return bb; - } - - public void setExplicitLac(ByteBuf entry, WriteCallback writeCallback, Object ctx, byte[] masterKey) - throws IOException, BookieException { - try { - long ledgerId = entry.getLong(entry.readerIndex()); - LedgerDescriptor handle = handles.getHandle(ledgerId, masterKey); - synchronized (handle) { - entry.markReaderIndex(); - handle.setExplicitLac(entry); - entry.resetReaderIndex(); - ByteBuf explicitLACEntry = createExplicitLACEntry(ledgerId, entry); - getJournal(ledgerId).logAddEntry(explicitLACEntry, false /* ackBeforeSync */, writeCallback, ctx); - } - } catch (NoWritableLedgerDirException e) { - stateManager.transitionToReadOnlyMode(); - throw new IOException(e); - } - } - - public ByteBuf getExplicitLac(long ledgerId) throws IOException, Bookie.NoLedgerException { - ByteBuf lac; - LedgerDescriptor handle = handles.getReadOnlyHandle(ledgerId); - synchronized (handle) { - lac = handle.getExplicitLac(); - } - return lac; - } - - /** - * Force sync given 'ledgerId' entries on the journal to the disk. - * It works like a regular addEntry with ackBeforeSync=false. - * This is useful for ledgers with DEFERRED_SYNC write flag. - */ - public void forceLedger(long ledgerId, WriteCallback cb, - Object ctx) { - if (LOG.isTraceEnabled()) { - LOG.trace("Forcing ledger {}", ledgerId); - } - Journal journal = getJournal(ledgerId); - journal.forceLedger(ledgerId, cb, ctx); - forceLedgerOps.inc(); - } - - /** - * Add entry to a ledger. - * @throws BookieException.LedgerFencedException if the ledger is fenced - */ - public void addEntry(ByteBuf entry, boolean ackBeforeSync, WriteCallback cb, Object ctx, byte[] masterKey) - throws IOException, BookieException.LedgerFencedException, BookieException { - long requestNanos = MathUtils.nowInNano(); - boolean success = false; - int entrySize = 0; - try { - LedgerDescriptor handle = getLedgerForEntry(entry, masterKey); - synchronized (handle) { - if (handle.isFenced()) { - throw BookieException - .create(BookieException.Code.LedgerFencedException); - } - entrySize = entry.readableBytes(); - addEntryInternal(handle, entry, ackBeforeSync, cb, ctx, masterKey); - } - success = true; - } catch (NoWritableLedgerDirException e) { - stateManager.transitionToReadOnlyMode(); - throw new IOException(e); - } finally { - long elapsedNanos = MathUtils.elapsedNanos(requestNanos); - if (success) { - addEntryStats.registerSuccessfulEvent(elapsedNanos, TimeUnit.NANOSECONDS); - addBytesStats.registerSuccessfulValue(entrySize); - } else { - addEntryStats.registerFailedEvent(elapsedNanos, TimeUnit.NANOSECONDS); - addBytesStats.registerFailedValue(entrySize); - } - - entry.release(); - } - } - - static class FutureWriteCallback implements WriteCallback { - - SettableFuture result = SettableFuture.create(); - - @Override - public void writeComplete(int rc, long ledgerId, long entryId, - BookieSocketAddress addr, Object ctx) { - if (LOG.isDebugEnabled()) { - LOG.debug("Finished writing entry {} @ ledger {} for {} : {}", - entryId, ledgerId, addr, rc); - } - - result.set(0 == rc); - } - - public SettableFuture getResult() { - return result; - } - } - - /** - * Fences a ledger. From this point on, clients will be unable to - * write to this ledger. Only recoveryAddEntry will be - * able to add entries to the ledger. - * This method is idempotent. Once a ledger is fenced, it can - * never be unfenced. Fencing a fenced ledger has no effect. - */ - public SettableFuture fenceLedger(long ledgerId, byte[] masterKey) throws IOException, BookieException { - LedgerDescriptor handle = handles.getHandle(ledgerId, masterKey); - return handle.fenceAndLogInJournal(getJournal(ledgerId)); - } - - public ByteBuf readEntry(long ledgerId, long entryId) - throws IOException, NoLedgerException { - long requestNanos = MathUtils.nowInNano(); - boolean success = false; - int entrySize = 0; - try { - LedgerDescriptor handle = handles.getReadOnlyHandle(ledgerId); - if (LOG.isTraceEnabled()) { - LOG.trace("Reading {}@{}", entryId, ledgerId); - } - ByteBuf entry = handle.readEntry(entryId); - readBytes.add(entry.readableBytes()); - success = true; - return entry; - } finally { - long elapsedNanos = MathUtils.elapsedNanos(requestNanos); - if (success) { - readEntryStats.registerSuccessfulEvent(elapsedNanos, TimeUnit.NANOSECONDS); - readBytesStats.registerSuccessfulValue(entrySize); - } else { - readEntryStats.registerFailedEvent(elapsedNanos, TimeUnit.NANOSECONDS); - readBytesStats.registerFailedValue(entrySize); - } - } - } - - public long readLastAddConfirmed(long ledgerId) throws IOException { - LedgerDescriptor handle = handles.getReadOnlyHandle(ledgerId); - return handle.getLastAddConfirmed(); - } - - public boolean waitForLastAddConfirmedUpdate(long ledgerId, - long previousLAC, - Watcher watcher) - throws IOException { - LedgerDescriptor handle = handles.getReadOnlyHandle(ledgerId); - return handle.waitForLastAddConfirmedUpdate(previousLAC, watcher); - } - - @VisibleForTesting - public LedgerStorage getLedgerStorage() { - return ledgerStorage; - } - - @VisibleForTesting - public BookieStateManager getStateManager() { - return (BookieStateManager) this.stateManager; - } - - @VisibleForTesting - public LedgerManagerFactory getLedgerManagerFactory() { - return ledgerManagerFactory; - } - - // The rest of the code is test stuff - static class CounterCallback implements WriteCallback { - int count; - - @Override - public synchronized void writeComplete(int rc, long l, long e, BookieSocketAddress addr, Object ctx) { - count--; - if (count == 0) { - notifyAll(); - } - } - - public synchronized void incCount() { - count++; - } - - public synchronized void waitZero() throws InterruptedException { - while (count > 0) { - wait(); - } - } - } - - /** - * Format the bookie server data. - * - * @param conf ServerConfiguration - * @param isInteractive Whether format should ask prompt for confirmation if old data exists or not. - * @param force If non interactive and force is true, then old data will be removed without confirm prompt. - * @return Returns true if the format is success else returns false - */ - public static boolean format(ServerConfiguration conf, - boolean isInteractive, boolean force) { - for (File journalDir : conf.getJournalDirs()) { - String[] journalDirFiles = - journalDir.exists() && journalDir.isDirectory() ? journalDir.list() : null; - if (journalDirFiles != null && journalDirFiles.length != 0) { - try { - boolean confirm = false; - if (!isInteractive) { - // If non interactive and force is set, then delete old - // data. - if (force) { - confirm = true; - } else { - confirm = false; - } - } else { - confirm = IOUtils - .confirmPrompt("Are you sure to format Bookie data..?"); - } - - if (!confirm) { - LOG.error("Bookie format aborted!!"); - return false; - } - } catch (IOException e) { - LOG.error("Error during bookie format", e); - return false; - } - } - if (!cleanDir(journalDir)) { - LOG.error("Formatting journal directory failed"); - return false; - } - - File[] ledgerDirs = conf.getLedgerDirs(); - for (File dir : ledgerDirs) { - if (!cleanDir(dir)) { - LOG.error("Formatting ledger directory " + dir + " failed"); - return false; - } - } - - // Clean up index directories if they are separate from the ledger dirs - File[] indexDirs = conf.getIndexDirs(); - if (null != indexDirs) { - for (File dir : indexDirs) { - if (!cleanDir(dir)) { - LOG.error("Formatting ledger directory " + dir + " failed"); - return false; - } - } - } - } - - LOG.info("Bookie format completed successfully"); - return true; - } - - private static boolean cleanDir(File dir) { - if (dir.exists()) { - File[] files = dir.listFiles(); - if (files != null) { - for (File child : files) { - boolean delete = FileUtils.deleteQuietly(child); - if (!delete) { - LOG.error("Not able to delete " + child); - return false; - } - } - } - } else if (!dir.mkdirs()) { - LOG.error("Not able to create the directory " + dir); - return false; - } - return true; - } - - /** - * @param args - * @throws IOException - * @throws InterruptedException - */ - public static void main(String[] args) - throws IOException, InterruptedException, BookieException, KeeperException { - Bookie b = new Bookie(new ServerConfiguration()); - b.start(); - CounterCallback cb = new CounterCallback(); - long start = MathUtils.now(); - for (int i = 0; i < 100000; i++) { - ByteBuf buff = Unpooled.buffer(1024); - buff.writeLong(1); - buff.writeLong(i); - cb.incCount(); - b.addEntry(buff, false /* ackBeforeSync */, cb, null, new byte[0]); - } - cb.waitZero(); - long end = MathUtils.now(); - System.out.println("Took " + (end - start) + "ms"); - } - - /** - * Returns exit code - cause of failure. - * - * @return {@link ExitCode} - */ - public int getExitCode() { - return exitCode; - } - -} +} \ No newline at end of file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieCriticalThread.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieCriticalThread.java index 53116611c9c..31712a60d70 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieCriticalThread.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieCriticalThread.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieException.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieException.java index 3d84148d1cf..2b85961cf4b 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieException.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieException.java @@ -63,10 +63,14 @@ public static BookieException create(int code) { return new DiskPartitionDuplicationException(); case Code.CookieNotFoundException: return new CookieNotFoundException(); + case Code.CookieExistsException: + return new CookieExistException(); case Code.MetadataStoreException: return new MetadataStoreException(); case Code.UnknownBookieIdException: return new UnknownBookieIdException(); + case Code.DataUnknownException: + return new DataUnknownException(); default: return new BookieIllegalOpException(); } @@ -88,6 +92,9 @@ public interface Code { int MetadataStoreException = -106; int UnknownBookieIdException = -107; int OperationRejectedException = -108; + int CookieExistsException = -109; + int EntryLogMetadataMapException = -110; + int DataUnknownException = -111; } public int getCode() { @@ -118,6 +125,12 @@ public String getMessage(int code) { case Code.CookieNotFoundException: err = "Cookie not found"; break; + case Code.CookieExistsException: + err = "Cookie already exists"; + break; + case Code.EntryLogMetadataMapException: + err = "Error in accessing Entry-log metadata map"; + break; case Code.MetadataStoreException: err = "Error performing metadata operations"; break; @@ -127,6 +140,9 @@ public String getMessage(int code) { case Code.OperationRejectedException: err = "Operation rejected"; break; + case Code.DataUnknownException: + err = "Unable to respond, ledger is in unknown state"; + break; default: err = "Invalid operation"; break; @@ -151,6 +167,10 @@ public static class BookieUnauthorizedAccessException extends BookieException { public BookieUnauthorizedAccessException() { super(Code.UnauthorizedAccessException); } + + public BookieUnauthorizedAccessException(String reason) { + super(Code.UnauthorizedAccessException, reason); + } } /** @@ -180,7 +200,7 @@ public LedgerFencedException() { } /** - * Signals that a ledger has been fenced in a bookie. No more entries can be appended to that ledger. + * Signals that a ledger's operation has been rejected by an internal component because of the resource saturation. */ public static class OperationRejectedException extends BookieException { public OperationRejectedException() { @@ -231,6 +251,32 @@ public CookieNotFoundException(Throwable cause) { } } + /** + * Signal that cookie already exists when creating a new cookie. + */ + public static class CookieExistException extends BookieException { + public CookieExistException() { + this(""); + } + + public CookieExistException(String reason) { + super(Code.CookieExistsException, reason); + } + + public CookieExistException(Throwable cause) { + super(Code.CookieExistsException, cause); + } + } + + /** + * Signal that error while accessing entry-log metadata map. + */ + public static class EntryLogMetadataMapException extends BookieException { + public EntryLogMetadataMapException(Throwable cause) { + super(Code.EntryLogMetadataMapException, cause); + } + } + /** * Signals that an exception occurs on upgrading a bookie. */ @@ -301,4 +347,25 @@ public UnknownBookieIdException(Throwable cause) { } } + /** + * Signal when a ledger is in a limbo state and certain operations + * cannot be performed on it. + */ + public static class DataUnknownException extends BookieException { + public DataUnknownException() { + super(Code.DataUnknownException); + } + + public DataUnknownException(Throwable t) { + super(Code.DataUnknownException, t); + } + + public DataUnknownException(String reason) { + super(Code.DataUnknownException, reason); + } + + public DataUnknownException(String reason, Throwable t) { + super(Code.DataUnknownException, reason, t); + } + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieFileChannel.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieFileChannel.java new file mode 100644 index 00000000000..3bd04cbe35f --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieFileChannel.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.bookie; + +import java.io.File; +import java.io.FileDescriptor; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.channels.FileChannel; + +/** + * A FileChannel for the JournalChannel read and write, we can use this interface to extend the FileChannel + * which we use in the JournalChannel. + */ +public interface BookieFileChannel { + + /** + * An interface for get the FileChannel from the provider. + * @return + */ + FileChannel getFileChannel() throws FileNotFoundException, IOException; + + /** + * Check the given file if exists. + * + * @param file + * @return + */ + boolean fileExists(File file); + + /** + * Get the file descriptor of the opened file. + * + * @return + * @throws IOException + */ + FileDescriptor getFD() throws IOException; + + /** + * Close file channel and release all resources. + */ + void close() throws IOException; +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieImpl.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieImpl.java new file mode 100644 index 00000000000..a69df4a1768 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieImpl.java @@ -0,0 +1,1324 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.bookkeeper.bookie; + +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_SCOPE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LD_INDEX_SCOPE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LD_LEDGER_SCOPE; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Strings; +import com.google.common.collect.Lists; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.Unpooled; +import io.netty.buffer.UnpooledByteBufAllocator; +import io.netty.util.ReferenceCountUtil; +import java.io.File; +import java.io.FilenameFilter; +import java.io.IOException; +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.net.UnknownHostException; +import java.nio.ByteBuffer; +import java.nio.file.FileStore; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.PrimitiveIterator.OfLong; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.ReentrantLock; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import org.apache.bookkeeper.bookie.BookieException.DiskPartitionDuplicationException; +import org.apache.bookkeeper.bookie.CheckpointSource.Checkpoint; +import org.apache.bookkeeper.bookie.Journal.JournalScanner; +import org.apache.bookkeeper.bookie.LedgerDirsManager.LedgerDirsListener; +import org.apache.bookkeeper.bookie.LedgerDirsManager.NoWritableLedgerDirException; +import org.apache.bookkeeper.bookie.stats.BookieStats; +import org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorage; +import org.apache.bookkeeper.common.util.MathUtils; +import org.apache.bookkeeper.common.util.Watcher; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.discover.BookieServiceInfo; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.DNS; +import org.apache.bookkeeper.proto.BookieRequestHandler; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.ThreadRegistry; +import org.apache.bookkeeper.util.BookKeeperConstants; +import org.apache.bookkeeper.util.DiskChecker; +import org.apache.bookkeeper.util.IOUtils; +import org.apache.bookkeeper.util.collections.ConcurrentLongHashMap; +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.mutable.MutableBoolean; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Implements a bookie. + */ +public class BookieImpl implements Bookie { + + private static final Logger LOG = LoggerFactory.getLogger(Bookie.class); + + final List journalDirectories; + final ServerConfiguration conf; + + final SyncThread syncThread; + final LedgerStorage ledgerStorage; + final RegistrationManager registrationManager; + final List journals; + + final HandleFactory handles; + final boolean entryLogPerLedgerEnabled; + + public static final long METAENTRY_ID_LEDGER_KEY = -0x1000; + public static final long METAENTRY_ID_FENCE_KEY = -0x2000; + public static final long METAENTRY_ID_FORCE_LEDGER = -0x4000; + static final long METAENTRY_ID_LEDGER_EXPLICITLAC = -0x8000; + + private final LedgerDirsManager ledgerDirsManager; + protected final Supplier bookieServiceInfoProvider; + private final LedgerDirsManager indexDirsManager; + LedgerDirsMonitor dirsMonitor; + + private int exitCode = ExitCode.OK; + + private final ConcurrentLongHashMap masterKeyCache = + ConcurrentLongHashMap.newBuilder().autoShrink(true).build(); + + protected StateManager stateManager; + + private BookieCriticalThread bookieThread; + + // Expose Stats + final StatsLogger statsLogger; + private final BookieStats bookieStats; + + private final ByteBufAllocator allocator; + + private final boolean writeDataToJournal; + + // Write Callback do nothing + static class NopWriteCallback implements WriteCallback { + @Override + public void writeComplete(int rc, long ledgerId, long entryId, + BookieId addr, Object ctx) { + if (LOG.isDebugEnabled()) { + LOG.debug("Finished writing entry {} @ ledger {} for {} : {}", + entryId, ledgerId, addr, rc); + } + } + } + + public static void checkDirectoryStructure(File dir) throws IOException { + if (!dir.exists()) { + File parent = dir.getParentFile(); + File preV3versionFile = new File(dir.getParent(), + BookKeeperConstants.VERSION_FILENAME); + + final AtomicBoolean oldDataExists = new AtomicBoolean(false); + parent.list(new FilenameFilter() { + @Override + public boolean accept(File dir, String name) { + if (name.endsWith(".txn") || name.endsWith(".idx") || name.endsWith(".log")) { + oldDataExists.set(true); + } + return true; + } + }); + if (preV3versionFile.exists() || oldDataExists.get()) { + String err = "Directory layout version is less than 3, upgrade needed"; + LOG.error(err); + throw new IOException(err); + } + if (!dir.mkdirs()) { + String err = "Unable to create directory " + dir; + LOG.error(err); + throw new IOException(err); + } + } + } + + /** + * Check that the environment for the bookie is correct. + * This means that the configuration has stayed the same as the + * first run and the filesystem structure is up to date. + */ + private void checkEnvironment() + throws BookieException, IOException, InterruptedException { + List allLedgerDirs = new ArrayList(ledgerDirsManager.getAllLedgerDirs().size() + + indexDirsManager.getAllLedgerDirs().size()); + allLedgerDirs.addAll(ledgerDirsManager.getAllLedgerDirs()); + if (indexDirsManager != ledgerDirsManager) { + allLedgerDirs.addAll(indexDirsManager.getAllLedgerDirs()); + } + + for (File journalDirectory : journalDirectories) { + checkDirectoryStructure(journalDirectory); + } + + for (File dir : allLedgerDirs) { + checkDirectoryStructure(dir); + } + + checkIfDirsOnSameDiskPartition(allLedgerDirs); + checkIfDirsOnSameDiskPartition(journalDirectories); + } + + /** + * Checks if multiple directories are in same diskpartition/filesystem/device. + * If ALLOW_MULTIPLEDIRS_UNDER_SAME_DISKPARTITION config parameter is not enabled, and + * if it is found that there are multiple directories in the same DiskPartition then + * it will throw DiskPartitionDuplicationException. + * + * @param dirs dirs to validate + * + * @throws IOException + */ + private void checkIfDirsOnSameDiskPartition(List dirs) throws DiskPartitionDuplicationException { + boolean allowDiskPartitionDuplication = conf.isAllowMultipleDirsUnderSameDiskPartition(); + final MutableBoolean isDuplicationFoundAndNotAllowed = new MutableBoolean(false); + Map> fileStoreDirsMap = new HashMap>(); + for (File dir : dirs) { + FileStore fileStore; + try { + fileStore = Files.getFileStore(dir.toPath()); + } catch (IOException e) { + LOG.error("Got IOException while trying to FileStore of {}", dir); + throw new BookieException.DiskPartitionDuplicationException(e); + } + if (fileStoreDirsMap.containsKey(fileStore)) { + fileStoreDirsMap.get(fileStore).add(dir); + } else { + List dirsList = new ArrayList(); + dirsList.add(dir); + fileStoreDirsMap.put(fileStore, dirsList); + } + } + + fileStoreDirsMap.forEach((fileStore, dirsList) -> { + if (dirsList.size() > 1) { + if (allowDiskPartitionDuplication) { + LOG.warn("Dirs: {} are in same DiskPartition/FileSystem: {}", dirsList, fileStore); + } else { + LOG.error("Dirs: {} are in same DiskPartition/FileSystem: {}", dirsList, fileStore); + isDuplicationFoundAndNotAllowed.setValue(true); + } + } + }); + if (isDuplicationFoundAndNotAllowed.getValue()) { + throw new BookieException.DiskPartitionDuplicationException(); + } + } + + public static BookieId getBookieId(ServerConfiguration conf) throws UnknownHostException { + String customBookieId = conf.getBookieId(); + if (customBookieId != null) { + return BookieId.parse(customBookieId); + } + return getBookieAddress(conf).toBookieId(); + } + + /** + * Return the configured address of the bookie. + */ + public static BookieSocketAddress getBookieAddress(ServerConfiguration conf) + throws UnknownHostException { + // Advertised address takes precedence over the listening interface and the + // useHostNameAsBookieID settings + if (conf.getAdvertisedAddress() != null && conf.getAdvertisedAddress().trim().length() > 0) { + String hostAddress = conf.getAdvertisedAddress().trim(); + return new BookieSocketAddress(hostAddress, conf.getBookiePort()); + } + + String iface = conf.getListeningInterface(); + if (iface == null) { + iface = "default"; + } + + String hostName = DNS.getDefaultHost(iface); + InetSocketAddress inetAddr = new InetSocketAddress(hostName, conf.getBookiePort()); + if (inetAddr.isUnresolved()) { + throw new UnknownHostException("Unable to resolve default hostname: " + + hostName + " for interface: " + iface); + } + String hostAddress = null; + InetAddress iAddress = inetAddr.getAddress(); + if (conf.getUseHostNameAsBookieID()) { + hostAddress = iAddress.getCanonicalHostName(); + if (conf.getUseShortHostName()) { + /* + * if short hostname is used, then FQDN is not used. Short + * hostname is the hostname cut at the first dot. + */ + hostAddress = hostAddress.split("\\.", 2)[0]; + } + } else { + hostAddress = iAddress.getHostAddress(); + } + + BookieSocketAddress addr = + new BookieSocketAddress(hostAddress, conf.getBookiePort()); + if (addr.getSocketAddress().getAddress().isLoopbackAddress() + && !conf.getAllowLoopback()) { + throw new UnknownHostException("Trying to listen on loopback address, " + + addr + " but this is forbidden by default " + + "(see ServerConfiguration#getAllowLoopback()).\n" + + "If this happen, you can consider specifying the network interface" + + " to listen on (e.g. listeningInterface=eth0) or specifying the" + + " advertised address (e.g. advertisedAddress=172.x.y.z)"); + } + return addr; + } + + public LedgerDirsManager getLedgerDirsManager() { + return ledgerDirsManager; + } + + LedgerDirsManager getIndexDirsManager() { + return indexDirsManager; + } + + public long getTotalDiskSpace() throws IOException { + return getLedgerDirsManager().getTotalDiskSpace(ledgerDirsManager.getAllLedgerDirs()); + } + + public long getTotalFreeSpace() throws IOException { + return getLedgerDirsManager().getTotalFreeSpace(ledgerDirsManager.getAllLedgerDirs()); + } + + public static File getCurrentDirectory(File dir) { + return new File(dir, BookKeeperConstants.CURRENT_DIR); + } + + public static File[] getCurrentDirectories(File[] dirs) { + File[] currentDirs = new File[dirs.length]; + for (int i = 0; i < dirs.length; i++) { + currentDirs[i] = getCurrentDirectory(dirs[i]); + } + return currentDirs; + } + + /** + * Initialize LedgerStorage instance without checkpointing for use within the shell + * and other RO users. ledgerStorage must not have already been initialized. + * + *

The caller is responsible for disposing of the ledgerStorage object. + * + * @param conf Bookie config. + * @param ledgerStorage Instance to initialize. + * @return Passed ledgerStorage instance + * @throws IOException + */ + public static LedgerStorage mountLedgerStorageOffline(ServerConfiguration conf, LedgerStorage ledgerStorage) + throws IOException { + StatsLogger statsLogger = NullStatsLogger.INSTANCE; + DiskChecker diskChecker = new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()); + + LedgerDirsManager ledgerDirsManager = BookieResources.createLedgerDirsManager( + conf, diskChecker, statsLogger.scope(LD_LEDGER_SCOPE)); + LedgerDirsManager indexDirsManager = BookieResources.createIndexDirsManager( + conf, diskChecker, statsLogger.scope(LD_INDEX_SCOPE), ledgerDirsManager); + + if (null == ledgerStorage) { + ledgerStorage = BookieResources.createLedgerStorage(conf, null, + ledgerDirsManager, + indexDirsManager, + statsLogger, + UnpooledByteBufAllocator.DEFAULT); + } else { + ledgerStorage.initialize( + conf, + null, + ledgerDirsManager, + indexDirsManager, + statsLogger, + UnpooledByteBufAllocator.DEFAULT); + } + + ledgerStorage.setCheckpointSource(new CheckpointSource() { + @Override + public Checkpoint newCheckpoint() { + return Checkpoint.MIN; + } + + @Override + public void checkpointComplete(Checkpoint checkpoint, boolean compact) + throws IOException { + } + }); + ledgerStorage.setCheckpointer(Checkpointer.NULL); + return ledgerStorage; + } + + public BookieImpl(ServerConfiguration conf, + RegistrationManager registrationManager, + LedgerStorage storage, + DiskChecker diskChecker, + LedgerDirsManager ledgerDirsManager, + LedgerDirsManager indexDirsManager, + StatsLogger statsLogger, + ByteBufAllocator allocator, + Supplier bookieServiceInfoProvider) + throws IOException, InterruptedException, BookieException { + this.bookieServiceInfoProvider = bookieServiceInfoProvider; + this.statsLogger = statsLogger; + this.conf = conf; + this.journalDirectories = Lists.newArrayList(); + for (File journalDirectory : conf.getJournalDirs()) { + this.journalDirectories.add(getCurrentDirectory(journalDirectory)); + } + this.ledgerDirsManager = ledgerDirsManager; + this.indexDirsManager = indexDirsManager; + this.writeDataToJournal = conf.getJournalWriteData(); + this.allocator = allocator; + this.registrationManager = registrationManager; + stateManager = initializeStateManager(); + checkEnvironment(); + + // register shutdown handler using trigger mode + stateManager.setShutdownHandler(exitCode -> triggerBookieShutdown(exitCode)); + // Initialise dirsMonitor. This would look through all the + // configured directories. When disk errors or all the ledger + // directories are full, would throws exception and fail bookie startup. + List dirsManagers = new ArrayList<>(); + dirsManagers.add(ledgerDirsManager); + if (indexDirsManager != ledgerDirsManager) { + dirsManagers.add(indexDirsManager); + } + this.dirsMonitor = new LedgerDirsMonitor(conf, diskChecker, dirsManagers); + try { + this.dirsMonitor.init(); + } catch (NoWritableLedgerDirException nle) { + // start in read-only mode if no writable dirs and read-only allowed + if (!conf.isReadOnlyModeEnabled()) { + throw nle; + } else { + this.stateManager.transitionToReadOnlyMode(); + } + } + + JournalAliveListener journalAliveListener = + () -> BookieImpl.this.triggerBookieShutdown(ExitCode.BOOKIE_EXCEPTION); + // instantiate the journals + journals = Lists.newArrayList(); + for (int i = 0; i < journalDirectories.size(); i++) { + journals.add(Journal.newJournal(i, journalDirectories.get(i), + conf, ledgerDirsManager, statsLogger.scope(JOURNAL_SCOPE), allocator, journalAliveListener)); + } + + this.entryLogPerLedgerEnabled = conf.isEntryLogPerLedgerEnabled(); + CheckpointSource checkpointSource = new CheckpointSourceList(journals); + + this.ledgerStorage = storage; + boolean isDbLedgerStorage = ledgerStorage instanceof DbLedgerStorage; + + /* + * with this change https://github.com/apache/bookkeeper/pull/677, + * LedgerStorage drives the checkpoint logic. + * + *

There are two exceptions: + * + * 1) with multiple entry logs, checkpoint logic based on a entry log is + * not possible, hence it needs to be timebased recurring thing and + * it is driven by SyncThread. SyncThread.start does that and it is + * started in Bookie.start method. + * + * 2) DbLedgerStorage + */ + if (entryLogPerLedgerEnabled || isDbLedgerStorage) { + syncThread = new SyncThread(conf, getLedgerDirsListener(), ledgerStorage, checkpointSource, statsLogger) { + @Override + public void startCheckpoint(Checkpoint checkpoint) { + /* + * in the case of entryLogPerLedgerEnabled, LedgerStorage + * dont drive checkpoint logic, but instead it is done + * periodically by SyncThread. So startCheckpoint which + * will be called by LedgerStorage will be no-op. + */ + } + + @Override + public void start() { + executor.scheduleAtFixedRate(() -> { + doCheckpoint(checkpointSource.newCheckpoint()); + }, conf.getFlushInterval(), conf.getFlushInterval(), TimeUnit.MILLISECONDS); + } + }; + } else { + syncThread = new SyncThread(conf, getLedgerDirsListener(), ledgerStorage, checkpointSource, statsLogger); + } + + LedgerStorage.LedgerDeletionListener ledgerDeletionListener = new LedgerStorage.LedgerDeletionListener() { + @Override + public void ledgerDeleted(long ledgerId) { + masterKeyCache.remove(ledgerId); + } + }; + + ledgerStorage.setStateManager(stateManager); + ledgerStorage.setCheckpointSource(checkpointSource); + ledgerStorage.setCheckpointer(syncThread); + ledgerStorage.registerLedgerDeletionListener(ledgerDeletionListener); + handles = new HandleFactoryImpl(ledgerStorage); + + // Expose Stats + this.bookieStats = new BookieStats(statsLogger, journalDirectories.size(), conf.getJournalQueueSize()); + } + + @VisibleForTesting + public static BookieImpl newBookieImpl(ServerConfiguration conf, + RegistrationManager registrationManager, + LedgerStorage storage, + DiskChecker diskChecker, + LedgerDirsManager ledgerDirsManager, + LedgerDirsManager indexDirsManager, + StatsLogger statsLogger, + ByteBufAllocator allocator, + Supplier bookieServiceInfoProvider) + throws IOException, InterruptedException, BookieException { + return new BookieImpl(conf, registrationManager, storage, diskChecker, + ledgerDirsManager, indexDirsManager, statsLogger, allocator, bookieServiceInfoProvider); + } + + StateManager initializeStateManager() throws IOException { + return new BookieStateManager(conf, statsLogger, registrationManager, + ledgerDirsManager, bookieServiceInfoProvider); + } + + void readJournal() throws IOException, BookieException { + if (!conf.getJournalWriteData()) { + LOG.warn("Journal disabled for add entry requests. Running BookKeeper this way can " + + "lead to data loss. It is recommended to use data integrity checking when " + + "running without the journal to minimize data loss risk"); + } + + long startTs = System.currentTimeMillis(); + JournalScanner scanner = new JournalScanner() { + @Override + public void process(int journalVersion, long offset, ByteBuffer recBuff) throws IOException { + long ledgerId = recBuff.getLong(); + long entryId = recBuff.getLong(); + try { + if (LOG.isDebugEnabled()) { + LOG.debug("Replay journal - ledger id : {}, entry id : {}.", ledgerId, entryId); + } + if (entryId == METAENTRY_ID_LEDGER_KEY) { + if (journalVersion >= JournalChannel.V3) { + int masterKeyLen = recBuff.getInt(); + byte[] masterKey = new byte[masterKeyLen]; + + recBuff.get(masterKey); + masterKeyCache.put(ledgerId, masterKey); + + // Force to re-insert the master key in ledger storage + handles.getHandle(ledgerId, masterKey); + } else { + throw new IOException("Invalid journal. Contains journalKey " + + " but layout version (" + journalVersion + + ") is too old to hold this"); + } + } else if (entryId == METAENTRY_ID_FENCE_KEY) { + if (journalVersion >= JournalChannel.V4) { + byte[] key = masterKeyCache.get(ledgerId); + if (key == null) { + key = ledgerStorage.readMasterKey(ledgerId); + } + LedgerDescriptor handle = handles.getHandle(ledgerId, key); + handle.setFenced(); + } else { + throw new IOException("Invalid journal. Contains fenceKey " + + " but layout version (" + journalVersion + + ") is too old to hold this"); + } + } else if (entryId == METAENTRY_ID_LEDGER_EXPLICITLAC) { + if (journalVersion >= JournalChannel.V6) { + int explicitLacBufLength = recBuff.getInt(); + ByteBuf explicitLacBuf = Unpooled.buffer(explicitLacBufLength); + byte[] explicitLacBufArray = new byte[explicitLacBufLength]; + recBuff.get(explicitLacBufArray); + explicitLacBuf.writeBytes(explicitLacBufArray); + byte[] key = masterKeyCache.get(ledgerId); + if (key == null) { + key = ledgerStorage.readMasterKey(ledgerId); + } + LedgerDescriptor handle = handles.getHandle(ledgerId, key); + handle.setExplicitLac(explicitLacBuf); + } else { + throw new IOException("Invalid journal. Contains explicitLAC " + " but layout version (" + + journalVersion + ") is too old to hold this"); + } + } else if (entryId < 0) { + /* + * this is possible if bookie code binary is rolledback + * to older version but when it is trying to read + * Journal which was created previously using newer + * code/journalversion, which introduced new special + * entry. So in anycase, if we see unrecognizable + * special entry while replaying journal we should skip + * (ignore) it. + */ + LOG.warn("Read unrecognizable entryId: {} for ledger: {} while replaying Journal. Skipping it", + entryId, ledgerId); + } else { + byte[] key = masterKeyCache.get(ledgerId); + if (key == null) { + key = ledgerStorage.readMasterKey(ledgerId); + } + LedgerDescriptor handle = handles.getHandle(ledgerId, key); + + recBuff.rewind(); + handle.addEntry(Unpooled.wrappedBuffer(recBuff)); + } + } catch (NoLedgerException nsle) { + if (LOG.isDebugEnabled()) { + LOG.debug("Skip replaying entries of ledger {} since it was deleted.", ledgerId); + } + } catch (BookieException be) { + throw new IOException(be); + } + } + }; + + for (Journal journal : journals) { + replay(journal, scanner); + } + long elapsedTs = System.currentTimeMillis() - startTs; + LOG.info("Finished replaying journal in {} ms.", elapsedTs); + } + + /** + * Replay journal files and updates journal's in-memory lastLogMark object. + * + * @param journal Journal object corresponding to a journalDir + * @param scanner Scanner to process replayed entries. + * @throws IOException + */ + private void replay(Journal journal, JournalScanner scanner) throws IOException { + final LogMark markedLog = journal.getLastLogMark().getCurMark(); + List logs = Journal.listJournalIds(journal.getJournalDirectory(), journalId -> + journalId >= markedLog.getLogFileId()); + // last log mark may be missed due to no sync up before + // validate filtered log ids only when we have markedLogId + if (markedLog.getLogFileId() > 0) { + if (logs.size() == 0 || logs.get(0) != markedLog.getLogFileId()) { + String path = journal.getJournalDirectory().getAbsolutePath(); + throw new IOException("Recovery log " + markedLog.getLogFileId() + " is missing at " + path); + } + } + + // TODO: When reading in the journal logs that need to be synced, we + // should use BufferedChannels instead to minimize the amount of + // system calls done. + for (Long id : logs) { + long logPosition = 0L; + if (id == markedLog.getLogFileId()) { + logPosition = markedLog.getLogFileOffset(); + } + LOG.info("Replaying journal {} from position {}", id, logPosition); + long scanOffset = journal.scanJournal(id, logPosition, scanner, conf.isSkipReplayJournalInvalidRecord()); + // Update LastLogMark after completely replaying journal + // scanOffset will point to EOF position + // After LedgerStorage flush, SyncThread should persist this to disk + journal.setLastLogMark(id, scanOffset); + } + } + + @Override + public synchronized void start() { + bookieThread = new BookieCriticalThread(() -> run(), "Bookie-" + conf.getBookiePort()); + bookieThread.setDaemon(true); + + ThreadRegistry.register("BookieThread", true); + if (LOG.isDebugEnabled()) { + LOG.debug("I'm starting a bookie with journal directories {}", + journalDirectories.stream().map(File::getName).collect(Collectors.joining(", "))); + } + //Start DiskChecker thread + dirsMonitor.start(); + + // replay journals + try { + readJournal(); + } catch (IOException | BookieException ioe) { + LOG.error("Exception while replaying journals, shutting down", ioe); + shutdown(ExitCode.BOOKIE_EXCEPTION); + return; + } + + // Do a fully flush after journal replay + try { + syncThread.requestFlush().get(); + } catch (InterruptedException e) { + LOG.warn("Interrupting the fully flush after replaying journals : ", e); + Thread.currentThread().interrupt(); + } catch (ExecutionException e) { + LOG.error("Error on executing a fully flush after replaying journals."); + shutdown(ExitCode.BOOKIE_EXCEPTION); + return; + } + + if (conf.isLocalConsistencyCheckOnStartup()) { + LOG.info("Running local consistency check on startup prior to accepting IO."); + List errors = null; + try { + errors = ledgerStorage.localConsistencyCheck(Optional.empty()); + } catch (IOException e) { + LOG.error("Got a fatal exception while checking store", e); + shutdown(ExitCode.BOOKIE_EXCEPTION); + return; + } + if (errors != null && errors.size() > 0) { + LOG.error("Bookie failed local consistency check:"); + for (LedgerStorage.DetectedInconsistency error : errors) { + LOG.error("Ledger {}, entry {}: ", error.getLedgerId(), error.getEntryId(), error.getException()); + } + shutdown(ExitCode.BOOKIE_EXCEPTION); + return; + } + } + + LOG.info("Finished reading journal, starting bookie"); + + + /* + * start sync thread first, so during replaying journals, we could do + * checkpoint which reduce the chance that we need to replay journals + * again if bookie restarted again before finished journal replays. + */ + syncThread.start(); + + // start bookie thread + bookieThread.start(); + + // After successful bookie startup, register listener for disk + // error/full notifications. + ledgerDirsManager.addLedgerDirsListener(getLedgerDirsListener()); + if (indexDirsManager != ledgerDirsManager) { + indexDirsManager.addLedgerDirsListener(getLedgerDirsListener()); + } + + ledgerStorage.start(); + + // check the bookie status to start with, and set running. + // since bookie server use running as a flag to tell bookie server whether it is alive + // if setting it in bookie thread, the watcher might run before bookie thread. + stateManager.initState(); + + try { + stateManager.registerBookie(true).get(); + } catch (Exception e) { + LOG.error("Couldn't register bookie with zookeeper, shutting down : ", e); + shutdown(ExitCode.ZK_REG_FAIL); + } + } + + @Override + public void join() throws InterruptedException { + if (bookieThread != null) { + bookieThread.join(); + } + } + + public boolean isAlive() { + if (bookieThread == null) { + return false; + } + return bookieThread.isAlive(); + } + + /* + * Get the DiskFailure listener for the bookie + */ + private LedgerDirsListener getLedgerDirsListener() { + + return new LedgerDirsListener() { + + @Override + public void diskFailed(File disk) { + // Shutdown the bookie on disk failure. + triggerBookieShutdown(ExitCode.BOOKIE_EXCEPTION); + } + + @Override + public void allDisksFull(boolean highPriorityWritesAllowed) { + // Transition to readOnly mode on all disks full + stateManager.setHighPriorityWritesAvailability(highPriorityWritesAllowed); + stateManager.transitionToReadOnlyMode(); + } + + @Override + public void fatalError() { + LOG.error("Fatal error reported by ledgerDirsManager"); + triggerBookieShutdown(ExitCode.BOOKIE_EXCEPTION); + } + + @Override + public void diskWritable(File disk) { + if (conf.isReadOnlyModeOnAnyDiskFullEnabled()) { + return; + } + // Transition to writable mode when a disk becomes writable again. + stateManager.setHighPriorityWritesAvailability(true); + stateManager.transitionToWritableMode(); + } + + @Override + public void diskJustWritable(File disk) { + if (conf.isReadOnlyModeOnAnyDiskFullEnabled()) { + return; + } + // Transition to writable mode when a disk becomes writable again. + stateManager.setHighPriorityWritesAvailability(true); + stateManager.transitionToWritableMode(); + } + + @Override + public void anyDiskFull(boolean highPriorityWritesAllowed) { + if (conf.isReadOnlyModeOnAnyDiskFullEnabled()) { + stateManager.setHighPriorityWritesAvailability(highPriorityWritesAllowed); + stateManager.transitionToReadOnlyMode(); + } + } + + @Override + public void allDisksWritable() { + // Transition to writable mode when a disk becomes writable again. + stateManager.setHighPriorityWritesAvailability(true); + stateManager.transitionToWritableMode(); + } + }; + } + + /* + * Check whether Bookie is writable. + */ + public boolean isReadOnly() { + return stateManager.isReadOnly(); + } + + /** + * Check whether Bookie is available for high priority writes. + * + * @return true if the bookie is able to take high priority writes. + */ + public boolean isAvailableForHighPriorityWrites() { + return stateManager.isAvailableForHighPriorityWrites(); + } + + public boolean isRunning() { + return stateManager.isRunning(); + } + + public void run() { + // start journals + for (Journal journal: journals) { + journal.start(); + } + } + + // Triggering the Bookie shutdown in its own thread, + // because shutdown can be called from sync thread which would be + // interrupted by shutdown call. + AtomicBoolean shutdownTriggered = new AtomicBoolean(false); + void triggerBookieShutdown(final int exitCode) { + if (!shutdownTriggered.compareAndSet(false, true)) { + return; + } + LOG.info("Triggering shutdown of Bookie-{} with exitCode {}", + conf.getBookiePort(), exitCode); + BookieThread th = new BookieThread("BookieShutdownTrigger") { + @Override + public void run() { + BookieImpl.this.shutdown(exitCode); + } + }; + th.start(); + } + + // provided a public shutdown method for other caller + // to shut down bookie gracefully + public int shutdown() { + return shutdown(ExitCode.OK); + } + // internal shutdown method to let shutdown bookie gracefully + // when encountering exception + ReentrantLock lock = new ReentrantLock(true); + int shutdown(int exitCode) { + lock.lock(); + try { + if (isRunning()) { + // the exitCode only set when first shutdown usually due to exception found + LOG.info("Shutting down Bookie-{} with exitCode {}", + conf.getBookiePort(), exitCode); + if (this.exitCode == ExitCode.OK) { + this.exitCode = exitCode; + } + + stateManager.forceToShuttingDown(); + + // turn bookie to read only during shutting down process + LOG.info("Turning bookie to read only during shut down"); + stateManager.forceToReadOnly(); + + // Shutdown Sync thread + syncThread.shutdown(); + + // Shutdown journals + for (Journal journal : journals) { + journal.shutdown(); + } + + // Shutdown the EntryLogger which has the GarbageCollector Thread running + ledgerStorage.shutdown(); + + //Shutdown disk checker + dirsMonitor.shutdown(); + } + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + LOG.error("Interrupted during shutting down bookie : ", ie); + } catch (Exception e) { + LOG.error("Got Exception while trying to shutdown Bookie", e); + throw e; + } finally { + lock.unlock(); + // setting running to false here, so watch thread + // in bookie server know it only after bookie shut down + stateManager.close(); + } + return this.exitCode; + } + + /** + * Retrieve the ledger descriptor for the ledger which entry should be added to. + * The LedgerDescriptor returned from this method should be eventually freed with + * #putHandle(). + * + * @throws BookieException if masterKey does not match the master key of the ledger + */ + @VisibleForTesting + LedgerDescriptor getLedgerForEntry(ByteBuf entry, final byte[] masterKey) + throws IOException, BookieException { + final long ledgerId = entry.getLong(entry.readerIndex()); + + return handles.getHandle(ledgerId, masterKey); + } + + private Journal getJournal(long ledgerId) { + return journals.get(MathUtils.signSafeMod(ledgerId, journals.size())); + } + + @VisibleForTesting + public ByteBuf createMasterKeyEntry(long ledgerId, byte[] masterKey) { + // new handle, we should add the key to journal ensure we can rebuild + ByteBuf bb = allocator.directBuffer(8 + 8 + 4 + masterKey.length); + bb.writeLong(ledgerId); + bb.writeLong(METAENTRY_ID_LEDGER_KEY); + bb.writeInt(masterKey.length); + bb.writeBytes(masterKey); + return bb; + } + + /** + * Add an entry to a ledger as specified by handle. + */ + private void addEntryInternal(LedgerDescriptor handle, ByteBuf entry, + boolean ackBeforeSync, WriteCallback cb, Object ctx, byte[] masterKey) + throws IOException, BookieException, InterruptedException { + long ledgerId = handle.getLedgerId(); + long entryId = handle.addEntry(entry); + + bookieStats.getWriteBytes().addCount(entry.readableBytes()); + + // journal `addEntry` should happen after the entry is added to ledger storage. + // otherwise the journal entry can potentially be rolled before the ledger is created in ledger storage. + if (masterKeyCache.get(ledgerId) == null) { + // Force the load into masterKey cache + byte[] oldValue = masterKeyCache.putIfAbsent(ledgerId, masterKey); + if (oldValue == null) { + ByteBuf masterKeyEntry = createMasterKeyEntry(ledgerId, masterKey); + try { + getJournal(ledgerId).logAddEntry( + masterKeyEntry, false /* ackBeforeSync */, new NopWriteCallback(), null); + } finally { + ReferenceCountUtil.release(masterKeyEntry); + } + } + } + + if (!writeDataToJournal) { + cb.writeComplete(0, ledgerId, entryId, null, ctx); + if (ctx instanceof BookieRequestHandler) { + ((BookieRequestHandler) ctx).flushPendingResponse(); + } + return; + } + + if (LOG.isTraceEnabled()) { + LOG.trace("Adding {}@{}", entryId, ledgerId); + } + getJournal(ledgerId).logAddEntry(entry, ackBeforeSync, cb, ctx); + } + + /** + * Add entry to a ledger, even if the ledger has previous been fenced. This should only + * happen in bookie recovery or ledger recovery cases, where entries are being replicates + * so that they exist on a quorum of bookies. The corresponding client side call for this + * is not exposed to users. + */ + public void recoveryAddEntry(ByteBuf entry, WriteCallback cb, Object ctx, byte[] masterKey) + throws IOException, BookieException, InterruptedException { + long requestNanos = MathUtils.nowInNano(); + boolean success = false; + int entrySize = 0; + try { + LedgerDescriptor handle = getLedgerForEntry(entry, masterKey); + synchronized (handle) { + entrySize = entry.readableBytes(); + addEntryInternal(handle, entry, false /* ackBeforeSync */, cb, ctx, masterKey); + } + success = true; + } catch (NoWritableLedgerDirException e) { + stateManager.transitionToReadOnlyMode(); + throw new IOException(e); + } finally { + long elapsedNanos = MathUtils.elapsedNanos(requestNanos); + if (success) { + bookieStats.getRecoveryAddEntryStats().registerSuccessfulEvent(elapsedNanos, TimeUnit.NANOSECONDS); + bookieStats.getAddBytesStats().registerSuccessfulValue(entrySize); + } else { + bookieStats.getRecoveryAddEntryStats().registerFailedEvent(elapsedNanos, TimeUnit.NANOSECONDS); + bookieStats.getAddBytesStats().registerFailedValue(entrySize); + } + + ReferenceCountUtil.release(entry); + } + } + + @VisibleForTesting + public ByteBuf createExplicitLACEntry(long ledgerId, ByteBuf explicitLac) { + ByteBuf bb = allocator.directBuffer(8 + 8 + 4 + explicitLac.capacity()); + bb.writeLong(ledgerId); + bb.writeLong(METAENTRY_ID_LEDGER_EXPLICITLAC); + bb.writeInt(explicitLac.capacity()); + bb.writeBytes(explicitLac); + return bb; + } + + public void setExplicitLac(ByteBuf entry, WriteCallback writeCallback, Object ctx, byte[] masterKey) + throws IOException, InterruptedException, BookieException { + ByteBuf explicitLACEntry = null; + try { + long ledgerId = entry.getLong(entry.readerIndex()); + LedgerDescriptor handle = handles.getHandle(ledgerId, masterKey); + synchronized (handle) { + entry.markReaderIndex(); + handle.setExplicitLac(entry); + entry.resetReaderIndex(); + explicitLACEntry = createExplicitLACEntry(ledgerId, entry); + getJournal(ledgerId).logAddEntry(explicitLACEntry, false /* ackBeforeSync */, writeCallback, ctx); + } + } catch (NoWritableLedgerDirException e) { + stateManager.transitionToReadOnlyMode(); + throw new IOException(e); + } finally { + ReferenceCountUtil.release(entry); + if (explicitLACEntry != null) { + ReferenceCountUtil.release(explicitLACEntry); + } + } + } + + public ByteBuf getExplicitLac(long ledgerId) throws IOException, Bookie.NoLedgerException, BookieException { + ByteBuf lac; + LedgerDescriptor handle = handles.getReadOnlyHandle(ledgerId); + synchronized (handle) { + lac = handle.getExplicitLac(); + } + return lac; + } + + /** + * Force sync given 'ledgerId' entries on the journal to the disk. + * It works like a regular addEntry with ackBeforeSync=false. + * This is useful for ledgers with DEFERRED_SYNC write flag. + */ + public void forceLedger(long ledgerId, WriteCallback cb, + Object ctx) { + if (LOG.isTraceEnabled()) { + LOG.trace("Forcing ledger {}", ledgerId); + } + Journal journal = getJournal(ledgerId); + journal.forceLedger(ledgerId, cb, ctx); + bookieStats.getForceLedgerOps().inc(); + } + + /** + * Add entry to a ledger. + */ + public void addEntry(ByteBuf entry, boolean ackBeforeSync, WriteCallback cb, Object ctx, byte[] masterKey) + throws IOException, BookieException, InterruptedException { + long requestNanos = MathUtils.nowInNano(); + boolean success = false; + int entrySize = 0; + try { + LedgerDescriptor handle = getLedgerForEntry(entry, masterKey); + synchronized (handle) { + if (handle.isFenced()) { + throw BookieException + .create(BookieException.Code.LedgerFencedException); + } + entrySize = entry.readableBytes(); + addEntryInternal(handle, entry, ackBeforeSync, cb, ctx, masterKey); + } + success = true; + } catch (NoWritableLedgerDirException e) { + stateManager.transitionToReadOnlyMode(); + throw new IOException(e); + } finally { + long elapsedNanos = MathUtils.elapsedNanos(requestNanos); + if (success) { + bookieStats.getAddEntryStats().registerSuccessfulEvent(elapsedNanos, TimeUnit.NANOSECONDS); + bookieStats.getAddBytesStats().registerSuccessfulValue(entrySize); + } else { + bookieStats.getAddEntryStats().registerFailedEvent(elapsedNanos, TimeUnit.NANOSECONDS); + bookieStats.getAddBytesStats().registerFailedValue(entrySize); + } + + ReferenceCountUtil.release(entry); + } + } + + /** + * Fences a ledger. From this point on, clients will be unable to + * write to this ledger. Only recoveryAddEntry will be + * able to add entries to the ledger. + * This method is idempotent. Once a ledger is fenced, it can + * never be unfenced. Fencing a fenced ledger has no effect. + * @return + */ + public CompletableFuture fenceLedger(long ledgerId, byte[] masterKey) + throws IOException, BookieException { + LedgerDescriptor handle = handles.getHandle(ledgerId, masterKey); + return handle.fenceAndLogInJournal(getJournal(ledgerId)); + } + + public ByteBuf readEntry(long ledgerId, long entryId) + throws IOException, NoLedgerException, BookieException { + long requestNanos = MathUtils.nowInNano(); + boolean success = false; + int entrySize = 0; + try { + LedgerDescriptor handle = handles.getReadOnlyHandle(ledgerId); + if (LOG.isTraceEnabled()) { + LOG.trace("Reading {}@{}", entryId, ledgerId); + } + ByteBuf entry = handle.readEntry(entryId); + entrySize = entry.readableBytes(); + bookieStats.getReadBytes().addCount(entrySize); + success = true; + return entry; + } finally { + long elapsedNanos = MathUtils.elapsedNanos(requestNanos); + if (success) { + bookieStats.getReadEntryStats().registerSuccessfulEvent(elapsedNanos, TimeUnit.NANOSECONDS); + bookieStats.getReadBytesStats().registerSuccessfulValue(entrySize); + } else { + bookieStats.getReadEntryStats().registerFailedEvent(elapsedNanos, TimeUnit.NANOSECONDS); + bookieStats.getReadBytesStats().registerFailedValue(entrySize); + } + } + } + + public long readLastAddConfirmed(long ledgerId) throws IOException, BookieException { + LedgerDescriptor handle = handles.getReadOnlyHandle(ledgerId); + return handle.getLastAddConfirmed(); + } + + public boolean waitForLastAddConfirmedUpdate(long ledgerId, + long previousLAC, + Watcher watcher) + throws IOException { + LedgerDescriptor handle = handles.getReadOnlyHandle(ledgerId); + return handle.waitForLastAddConfirmedUpdate(previousLAC, watcher); + } + + public void cancelWaitForLastAddConfirmedUpdate(long ledgerId, + Watcher watcher) + throws IOException { + LedgerDescriptor handle = handles.getReadOnlyHandle(ledgerId); + handle.cancelWaitForLastAddConfirmedUpdate(watcher); + } + + @VisibleForTesting + public LedgerStorage getLedgerStorage() { + return ledgerStorage; + } + + @VisibleForTesting + public BookieStateManager getStateManager() { + return (BookieStateManager) this.stateManager; + } + + public ByteBufAllocator getAllocator() { + return allocator; + } + + /** + * Format the bookie server data. + * + * @param conf ServerConfiguration + * @param isInteractive Whether format should ask prompt for confirmation if old data exists or not. + * @param force If non interactive and force is true, then old data will be removed without confirm prompt. + * @return Returns true if the format is success else returns false + */ + public static boolean format(ServerConfiguration conf, + boolean isInteractive, boolean force) { + for (File journalDir : conf.getJournalDirs()) { + String[] journalDirFiles = + journalDir.exists() && journalDir.isDirectory() ? journalDir.list() : null; + if (journalDirFiles != null && journalDirFiles.length != 0) { + try { + boolean confirm = false; + if (!isInteractive) { + // If non interactive and force is set, then delete old + // data. + confirm = force; + } else { + confirm = IOUtils + .confirmPrompt("Are you sure to format Bookie data..?"); + } + + if (!confirm) { + LOG.error("Bookie format aborted!!"); + return false; + } + } catch (IOException e) { + LOG.error("Error during bookie format", e); + return false; + } + } + if (!cleanDir(journalDir)) { + LOG.error("Formatting journal directory failed"); + return false; + } + } + + File[] ledgerDirs = conf.getLedgerDirs(); + for (File dir : ledgerDirs) { + if (!cleanDir(dir)) { + LOG.error("Formatting ledger directory " + dir + " failed"); + return false; + } + } + + // Clean up index directories if they are separate from the ledger dirs + File[] indexDirs = conf.getIndexDirs(); + if (null != indexDirs) { + for (File dir : indexDirs) { + if (!cleanDir(dir)) { + LOG.error("Formatting index directory " + dir + " failed"); + return false; + } + } + } + + // Clean up metadata directories if they are separate from the + // ledger dirs + if (!Strings.isNullOrEmpty(conf.getGcEntryLogMetadataCachePath())) { + File metadataDir = new File(conf.getGcEntryLogMetadataCachePath()); + if (!cleanDir(metadataDir)) { + LOG.error("Formatting ledger metadata directory {} failed", metadataDir); + return false; + } + } + LOG.info("Bookie format completed successfully"); + return true; + } + + private static boolean cleanDir(File dir) { + if (dir.exists()) { + File[] files = dir.listFiles(); + if (files != null) { + for (File child : files) { + boolean delete = FileUtils.deleteQuietly(child); + if (!delete) { + LOG.error("Not able to delete " + child); + return false; + } + } + } + } else if (!dir.mkdirs()) { + LOG.error("Not able to create the directory " + dir); + return false; + } + return true; + } + + /** + * Returns exit code - cause of failure. + * + * @return {@link ExitCode} + */ + public int getExitCode() { + return exitCode; + } + + public OfLong getListOfEntriesOfLedger(long ledgerId) throws IOException, NoLedgerException { + long requestNanos = MathUtils.nowInNano(); + boolean success = false; + try { + LedgerDescriptor handle = handles.getReadOnlyHandle(ledgerId); + if (LOG.isTraceEnabled()) { + LOG.trace("GetEntriesOfLedger {}", ledgerId); + } + OfLong entriesOfLedger = handle.getListOfEntriesOfLedger(ledgerId); + success = true; + return entriesOfLedger; + } finally { + long elapsedNanos = MathUtils.elapsedNanos(requestNanos); + if (success) { + bookieStats.getReadEntryStats().registerSuccessfulEvent(elapsedNanos, TimeUnit.NANOSECONDS); + } else { + bookieStats.getReadEntryStats().registerFailedEvent(elapsedNanos, TimeUnit.NANOSECONDS); + } + } + } + + @VisibleForTesting + public List getJournals() { + return this.journals; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieResources.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieResources.java new file mode 100644 index 00000000000..c9b71b9968d --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieResources.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.bookie; + +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_SCOPE; + +import io.netty.buffer.ByteBufAllocator; +import java.io.File; +import java.io.IOException; +import java.net.URI; +import org.apache.bookkeeper.common.allocator.ByteBufAllocatorBuilder; +import org.apache.bookkeeper.common.allocator.ByteBufAllocatorWithOomHandler; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.MetadataBookieDriver; +import org.apache.bookkeeper.meta.MetadataDrivers; +import org.apache.bookkeeper.meta.exceptions.MetadataException; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.util.DiskChecker; +import org.apache.commons.configuration.ConfigurationException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Centralizes the creation of injected resources. + */ +public class BookieResources { + private static final Logger log = LoggerFactory.getLogger(BookieResources.class); + + /** + * Instantiate the metadata driver for the Bookie. + */ + public static MetadataBookieDriver createMetadataDriver(ServerConfiguration conf, + StatsLogger statsLogger) throws BookieException { + try { + String metadataServiceUriStr = conf.getMetadataServiceUri(); + if (null == metadataServiceUriStr) { + throw new BookieException.MetadataStoreException("Metadata URI must not be null"); + } + + MetadataBookieDriver driver = MetadataDrivers.getBookieDriver( + URI.create(metadataServiceUriStr)); + driver.initialize(conf, statsLogger.scope(BOOKIE_SCOPE)); + return driver; + } catch (MetadataException me) { + throw new BookieException.MetadataStoreException("Failed to initialize metadata bookie driver", me); + } catch (ConfigurationException e) { + throw new BookieException.BookieIllegalOpException(e); + } + } + + public static ByteBufAllocatorWithOomHandler createAllocator(ServerConfiguration conf) { + return ByteBufAllocatorBuilder.create() + .poolingPolicy(conf.getAllocatorPoolingPolicy()) + .poolingConcurrency(conf.getAllocatorPoolingConcurrency()) + .outOfMemoryPolicy(conf.getAllocatorOutOfMemoryPolicy()) + .leakDetectionPolicy(conf.getAllocatorLeakDetectionPolicy()) + .build(); + } + + public static DiskChecker createDiskChecker(ServerConfiguration conf) { + return new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()); + } + + public static LedgerDirsManager createLedgerDirsManager(ServerConfiguration conf, DiskChecker diskChecker, + StatsLogger statsLogger) throws IOException { + return new LedgerDirsManager(conf, conf.getLedgerDirs(), diskChecker, statsLogger); + } + + public static LedgerDirsManager createIndexDirsManager(ServerConfiguration conf, DiskChecker diskChecker, + StatsLogger statsLogger, LedgerDirsManager fallback) + throws IOException { + File[] idxDirs = conf.getIndexDirs(); + if (null == idxDirs) { + return fallback; + } else { + return new LedgerDirsManager(conf, idxDirs, diskChecker, statsLogger); + } + } + + public static LedgerStorage createLedgerStorage(ServerConfiguration conf, + LedgerManager ledgerManager, + LedgerDirsManager ledgerDirsManager, + LedgerDirsManager indexDirsManager, + StatsLogger statsLogger, + ByteBufAllocator allocator) throws IOException { + // Instantiate the ledger storage implementation + String ledgerStorageClass = conf.getLedgerStorageClass(); + log.info("Using ledger storage: {}", ledgerStorageClass); + LedgerStorage storage = LedgerStorageFactory.createLedgerStorage(ledgerStorageClass); + + storage.initialize(conf, ledgerManager, ledgerDirsManager, indexDirsManager, statsLogger, allocator); + storage.setCheckpointSource(CheckpointSource.DEFAULT); + return storage; + } + + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java index 4434cfc3aef..00869c7fc85 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,111 +18,88 @@ package org.apache.bookkeeper.bookie; -import static com.google.common.base.Charsets.UTF_8; import static org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithLedgerManagerFactory; -import static org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithMetadataBookieDriver; -import static org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithRegistrationManager; -import static org.apache.bookkeeper.tools.cli.helpers.CommandHelpers.getBookieSocketAddrStringRepresentation; +import static org.apache.bookkeeper.tools.cli.commands.bookie.LastMarkCommand.newLastMarkCommand; +import static org.apache.bookkeeper.tools.cli.commands.bookies.ClusterInfoCommand.newClusterInfoCommand; +import static org.apache.bookkeeper.tools.cli.commands.bookies.ListBookiesCommand.newListBookiesCommand; +import static org.apache.bookkeeper.tools.cli.commands.client.SimpleTestCommand.newSimpleTestCommand; import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.util.concurrent.AbstractFuture; -import com.google.common.util.concurrent.UncheckedExecutionException; -import io.netty.buffer.ByteBuf; -import io.netty.buffer.ByteBufUtil; -import io.netty.buffer.Unpooled; -import io.netty.channel.EventLoopGroup; -import io.netty.channel.nio.NioEventLoopGroup; -import io.netty.util.concurrent.DefaultThreadFactory; import java.io.File; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.Serializable; -import java.math.RoundingMode; -import java.net.URI; -import java.nio.ByteBuffer; -import java.nio.file.FileSystems; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.attribute.BasicFileAttributes; import java.nio.file.attribute.FileTime; -import java.text.DecimalFormat; +import java.sql.Timestamp; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; -import java.util.Enumeration; -import java.util.Formatter; import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Set; -import java.util.SortedMap; -import java.util.TreeMap; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.function.Predicate; -import java.util.stream.LongStream; -import org.apache.bookkeeper.bookie.BookieException.CookieNotFoundException; -import org.apache.bookkeeper.bookie.BookieException.InvalidCookieException; -import org.apache.bookkeeper.bookie.CheckpointSource.Checkpoint; -import org.apache.bookkeeper.bookie.EntryLogger.EntryLogScanner; -import org.apache.bookkeeper.bookie.Journal.JournalScanner; -import org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorage; -import org.apache.bookkeeper.bookie.storage.ldb.LocationsIndexRebuildOp; -import org.apache.bookkeeper.client.BKException; -import org.apache.bookkeeper.client.BKException.MetaStoreException; -import org.apache.bookkeeper.client.BookKeeper; -import org.apache.bookkeeper.client.BookKeeper.DigestType; -import org.apache.bookkeeper.client.BookKeeperAdmin; -import org.apache.bookkeeper.client.BookieInfoReader.BookieInfo; +import java.util.stream.Collectors; +import org.apache.bookkeeper.bookie.storage.EntryLogger; import org.apache.bookkeeper.client.LedgerEntry; -import org.apache.bookkeeper.client.LedgerHandle; -import org.apache.bookkeeper.client.LedgerMetadata; -import org.apache.bookkeeper.client.UpdateLedgerOp; -import org.apache.bookkeeper.common.util.OrderedExecutor; -import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.common.annotation.InterfaceAudience.Private; import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.discover.RegistrationManager; -import org.apache.bookkeeper.meta.LedgerManager; import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; -import org.apache.bookkeeper.meta.UnderreplicatedLedger; -import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; -import org.apache.bookkeeper.net.BookieSocketAddress; -import org.apache.bookkeeper.proto.BookieClient; -import org.apache.bookkeeper.proto.BookieClientImpl; -import org.apache.bookkeeper.proto.BookieProtocol; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.Processor; -import org.apache.bookkeeper.replication.AuditorElector; import org.apache.bookkeeper.replication.ReplicationException; -import org.apache.bookkeeper.replication.ReplicationException.CompatibilityException; -import org.apache.bookkeeper.replication.ReplicationException.UnavailableException; -import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.tools.cli.commands.autorecovery.ListUnderReplicatedCommand; +import org.apache.bookkeeper.tools.cli.commands.autorecovery.LostBookieRecoveryDelayCommand; +import org.apache.bookkeeper.tools.cli.commands.autorecovery.QueryAutoRecoveryStatusCommand; +import org.apache.bookkeeper.tools.cli.commands.autorecovery.ToggleCommand; +import org.apache.bookkeeper.tools.cli.commands.autorecovery.TriggerAuditCommand; +import org.apache.bookkeeper.tools.cli.commands.autorecovery.WhoIsAuditorCommand; +import org.apache.bookkeeper.tools.cli.commands.bookie.CheckDBLedgersIndexCommand; +import org.apache.bookkeeper.tools.cli.commands.bookie.ConvertToDBStorageCommand; +import org.apache.bookkeeper.tools.cli.commands.bookie.ConvertToInterleavedStorageCommand; +import org.apache.bookkeeper.tools.cli.commands.bookie.FlipBookieIdCommand; +import org.apache.bookkeeper.tools.cli.commands.bookie.FormatCommand; +import org.apache.bookkeeper.tools.cli.commands.bookie.InitCommand; import org.apache.bookkeeper.tools.cli.commands.bookie.LastMarkCommand; +import org.apache.bookkeeper.tools.cli.commands.bookie.LedgerCommand; +import org.apache.bookkeeper.tools.cli.commands.bookie.ListActiveLedgersCommand; +import org.apache.bookkeeper.tools.cli.commands.bookie.ListFilesOnDiscCommand; +import org.apache.bookkeeper.tools.cli.commands.bookie.ListLedgersCommand; +import org.apache.bookkeeper.tools.cli.commands.bookie.LocalConsistencyCheckCommand; +import org.apache.bookkeeper.tools.cli.commands.bookie.ReadJournalCommand; +import org.apache.bookkeeper.tools.cli.commands.bookie.ReadLedgerCommand; +import org.apache.bookkeeper.tools.cli.commands.bookie.ReadLogCommand; +import org.apache.bookkeeper.tools.cli.commands.bookie.ReadLogMetadataCommand; +import org.apache.bookkeeper.tools.cli.commands.bookie.RebuildDBLedgerLocationsIndexCommand; +import org.apache.bookkeeper.tools.cli.commands.bookie.RebuildDBLedgersIndexCommand; +import org.apache.bookkeeper.tools.cli.commands.bookie.RegenerateInterleavedStorageIndexFileCommand; +import org.apache.bookkeeper.tools.cli.commands.bookie.SanityTestCommand; +import org.apache.bookkeeper.tools.cli.commands.bookie.UpdateBookieInLedgerCommand; +import org.apache.bookkeeper.tools.cli.commands.bookies.ClusterInfoCommand; +import org.apache.bookkeeper.tools.cli.commands.bookies.DecommissionCommand; +import org.apache.bookkeeper.tools.cli.commands.bookies.EndpointInfoCommand; +import org.apache.bookkeeper.tools.cli.commands.bookies.InfoCommand; +import org.apache.bookkeeper.tools.cli.commands.bookies.InstanceIdCommand; import org.apache.bookkeeper.tools.cli.commands.bookies.ListBookiesCommand; +import org.apache.bookkeeper.tools.cli.commands.bookies.MetaFormatCommand; +import org.apache.bookkeeper.tools.cli.commands.bookies.NukeExistingClusterCommand; +import org.apache.bookkeeper.tools.cli.commands.bookies.NukeExistingClusterCommand.NukeExistingClusterFlags; +import org.apache.bookkeeper.tools.cli.commands.bookies.RecoverCommand; +import org.apache.bookkeeper.tools.cli.commands.client.DeleteLedgerCommand; +import org.apache.bookkeeper.tools.cli.commands.client.LedgerMetaDataCommand; import org.apache.bookkeeper.tools.cli.commands.client.SimpleTestCommand; +import org.apache.bookkeeper.tools.cli.commands.cookie.AdminCommand; +import org.apache.bookkeeper.tools.cli.commands.cookie.CreateCookieCommand; +import org.apache.bookkeeper.tools.cli.commands.cookie.DeleteCookieCommand; +import org.apache.bookkeeper.tools.cli.commands.cookie.GenerateCookieCommand; +import org.apache.bookkeeper.tools.cli.commands.cookie.GetCookieCommand; +import org.apache.bookkeeper.tools.cli.commands.cookie.UpdateCookieCommand; import org.apache.bookkeeper.tools.framework.CliFlags; -import org.apache.bookkeeper.util.BookKeeperConstants; -import org.apache.bookkeeper.util.DiskChecker; import org.apache.bookkeeper.util.EntryFormatter; -import org.apache.bookkeeper.util.IOUtils; import org.apache.bookkeeper.util.LedgerIdFormatter; -import org.apache.bookkeeper.util.MathUtils; import org.apache.bookkeeper.util.Tool; -import org.apache.bookkeeper.versioning.Version; -import org.apache.bookkeeper.versioning.Versioned; -import org.apache.bookkeeper.zookeeper.ZooKeeperClient; import org.apache.commons.cli.BasicParser; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.HelpFormatter; @@ -135,12 +112,6 @@ import org.apache.commons.configuration.PropertiesConfiguration; import org.apache.commons.io.FileUtils; import org.apache.commons.lang.StringUtils; -import org.apache.commons.lang.mutable.MutableBoolean; -import org.apache.commons.lang3.ArrayUtils; -import org.apache.zookeeper.AsyncCallback; -import org.apache.zookeeper.AsyncCallback.VoidCallback; -import org.apache.zookeeper.KeeperException; -import org.apache.zookeeper.ZooKeeper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -179,15 +150,33 @@ public class BookieShell implements Tool { static final String CMD_LISTFILESONDISC = "listfilesondisc"; static final String CMD_UPDATECOOKIE = "updatecookie"; static final String CMD_UPDATELEDGER = "updateledgers"; + static final String CMD_UPDATE_BOOKIE_IN_LEDGER = "updateBookieInLedger"; static final String CMD_DELETELEDGER = "deleteledger"; static final String CMD_BOOKIEINFO = "bookieinfo"; + static final String CMD_CLUSTERINFO = "clusterinfo"; + static final String CMD_ACTIVE_LEDGERS_ON_ENTRY_LOG_FILE = "activeledgers"; static final String CMD_DECOMMISSIONBOOKIE = "decommissionbookie"; + static final String CMD_ENDPOINTINFO = "endpointinfo"; static final String CMD_LOSTBOOKIERECOVERYDELAY = "lostbookierecoverydelay"; static final String CMD_TRIGGERAUDIT = "triggeraudit"; + static final String CMD_FORCEAUDITCHECKS = "forceauditchecks"; static final String CMD_CONVERT_TO_DB_STORAGE = "convert-to-db-storage"; static final String CMD_CONVERT_TO_INTERLEAVED_STORAGE = "convert-to-interleaved-storage"; static final String CMD_REBUILD_DB_LEDGER_LOCATIONS_INDEX = "rebuild-db-ledger-locations-index"; + static final String CMD_REBUILD_DB_LEDGERS_INDEX = "rebuild-db-ledgers-index"; + static final String CMD_CHECK_DB_LEDGERS_INDEX = "check-db-ledgers-index"; + static final String CMD_REGENERATE_INTERLEAVED_STORAGE_INDEX_FILE = "regenerate-interleaved-storage-index-file"; + static final String CMD_QUERY_AUTORECOVERY_STATUS = "queryautorecoverystatus"; + + // cookie commands + static final String CMD_CREATE_COOKIE = "cookie_create"; + static final String CMD_DELETE_COOKIE = "cookie_delete"; + static final String CMD_UPDATE_COOKIE = "cookie_update"; + static final String CMD_GET_COOKIE = "cookie_get"; + static final String CMD_GENERATE_COOKIE = "cookie_generate"; + static final String CMD_HELP = "help"; + static final String CMD_LOCALCONSISTENCYCHECK = "localconsistencycheck"; final ServerConfiguration bkConf = new ServerConfiguration(); File[] indexDirectories; @@ -210,12 +199,26 @@ public BookieShell(LedgerIdFormatter ledgeridFormatter, EntryFormatter entryForm this.entryFormatter = entryFormatter; } - interface Command { + /** + * BookieShell command. + */ + @Private + public interface Command { int runCmd(String[] args) throws Exception; + String description(); + void printUsage(); } + void printInfoLine(String s) { + System.out.println(s); + } + + void printErrorLine(String s) { + System.err.println(s); + } + abstract class MyCommand implements Command { abstract Options getOptions(); @@ -226,9 +229,17 @@ abstract class MyCommand implements Command { abstract int runCmd(CommandLine cmdLine) throws Exception; String cmdName; + Options opts; MyCommand(String cmdName) { this.cmdName = cmdName; + opts = getOptionsWithHelp(); + } + + @Override + public String description() { + // we used the string returned by `getUsage` as description in showing the list of commands + return getUsage(); } @Override @@ -236,6 +247,10 @@ public int runCmd(String[] args) throws Exception { try { BasicParser parser = new BasicParser(); CommandLine cmdLine = parser.parse(getOptions(), args); + if (cmdLine.hasOption("help")) { + printUsage(); + return 0; + } return runCmd(cmdLine); } catch (ParseException e) { LOG.error("Error parsing command line arguments : ", e); @@ -250,13 +265,18 @@ public void printUsage() { System.err.println(cmdName + ": " + getDescription()); hf.printHelp(getUsage(), getOptions()); } + + private Options getOptionsWithHelp() { + Options opts = new Options(); + opts.addOption("h", "help", false, "Show the help"); + return opts; + } } /** * Format the bookkeeper metadata present in zookeeper. */ class MetaFormatCmd extends MyCommand { - Options opts = new Options(); MetaFormatCmd() { super(CMD_METAFORMAT); @@ -279,7 +299,14 @@ String getDescription() { @Override String getUsage() { - return "metaformat [-nonInteractive] [-force]"; + return "metaformat Format bookkeeper metadata in zookeeper\n" + + " Usage: metaformat [options]\n" + + " Options:\n" + + " -f, --force\n" + + " If [nonInteractive] is specified, " + + "then whether to force delete the old data without prompt\n" + + " -n, --nonInteractive\n" + + " Whether to confirm if old data exists "; } @Override @@ -287,13 +314,16 @@ int runCmd(CommandLine cmdLine) throws Exception { boolean interactive = (!cmdLine.hasOption("n")); boolean force = cmdLine.hasOption("f"); - boolean result = BookKeeperAdmin.format(bkConf, interactive, force); - return (result) ? 0 : 1; + MetaFormatCommand cmd = new MetaFormatCommand(); + MetaFormatCommand.MetaFormatFlags flags = new MetaFormatCommand.MetaFormatFlags() + .interactive(interactive).force(force); + boolean result = cmd.apply(bkConf, flags); + return result ? 0 : 1; } } /** - * Intializes new cluster by creating required znodes for the cluster. If + * Initializes new cluster by creating required znodes for the cluster. If * ledgersrootpath is already existing then it will error out. If for any * reason it errors out while creating znodes for the cluster, then before * running initnewcluster again, try nuking existing cluster by running @@ -303,7 +333,6 @@ int runCmd(CommandLine cmdLine) throws Exception { * already existing. */ class InitNewCluster extends MyCommand { - Options opts = new Options(); InitNewCluster() { super(CMD_INITNEWCLUSTER); @@ -322,12 +351,17 @@ String getDescription() { @Override String getUsage() { - return "initnewcluster"; + return "initnewcluster Initializes a new bookkeeper cluster. If initnewcluster fails then try nuking " + + "existing cluster by running nukeexistingcluster before running initnewcluster again, " + + "initbookie requires no options,use the default conf or re-specify BOOKIE_CONF \n" + + " Usage: initnewcluster"; } @Override int runCmd(CommandLine cmdLine) throws Exception { - boolean result = BookKeeperAdmin.initNewCluster(bkConf); + org.apache.bookkeeper.tools.cli.commands.bookies.InitCommand initCommand = + new org.apache.bookkeeper.tools.cli.commands.bookies.InitCommand(); + boolean result = initCommand.apply(bkConf, new CliFlags()); return (result) ? 0 : 1; } } @@ -336,7 +370,6 @@ int runCmd(CommandLine cmdLine) throws Exception { * Nuke bookkeeper metadata of existing cluster in zookeeper. */ class NukeExistingCluster extends MyCommand { - Options opts = new Options(); NukeExistingCluster() { super(CMD_NUKEEXISTINGCLUSTER); @@ -359,7 +392,16 @@ String getDescription() { @Override String getUsage() { - return "nukeexistingcluster -zkledgersrootpath [-instanceid | -force]"; + return "nukeexistingcluster Nuke bookkeeper cluster by deleting metadata\n" + + " Usage: nukeexistingcluster [options]\n" + + " Options:\n" + + " -f, --force\n" + + " If instanceid is not specified, " + + "then whether to force nuke the metadata without validating instanceid\n" + + " * -i, --instanceid\n" + + " the bookie cluster's instanceid (param format: `instanceId`)\n" + + " * -p,--zkledgersrootpath\n" + + " zookeeper ledgers rootpath (param format: `zkLedgersRootPath`)"; } @Override @@ -368,19 +410,11 @@ int runCmd(CommandLine cmdLine) throws Exception { String zkledgersrootpath = cmdLine.getOptionValue("zkledgersrootpath"); String instanceid = cmdLine.getOptionValue("instanceid"); - /* - * for NukeExistingCluster command 'zkledgersrootpath' should be provided and either force option or - * instanceid should be provided. - */ - if ((zkledgersrootpath == null) || (force == (instanceid != null))) { - LOG.error( - "zkledgersrootpath should be specified and either force option " - + "or instanceid should be specified (but not both)"); - printUsage(); - return -1; - } - - boolean result = BookKeeperAdmin.nukeExistingCluster(bkConf, zkledgersrootpath, instanceid, force); + NukeExistingClusterCommand cmd = new NukeExistingClusterCommand(); + NukeExistingClusterFlags flags = new NukeExistingClusterFlags().force(force) + .zkLedgersRootPath(zkledgersrootpath) + .instandId(instanceid); + boolean result = cmd.apply(bkConf, flags); return (result) ? 0 : 1; } } @@ -389,7 +423,6 @@ int runCmd(CommandLine cmdLine) throws Exception { * Formats the local data present in current bookie server. */ class BookieFormatCmd extends MyCommand { - Options opts = new Options(); public BookieFormatCmd() { super(CMD_BOOKIEFORMAT); @@ -413,30 +446,30 @@ String getDescription() { @Override String getUsage() { - return "bookieformat [-nonInteractive] [-force] [-deleteCookie]"; + return "bookieformat Format the current server contents\n" + + " Usage: bookieformat [options]\n" + + " Options:\n" + + " -f, --force\n" + + " If [nonInteractive] is specified, then whether " + + "to force delete the old data without prompt..? \n" + + " * -n, --nonInteractive\n" + + " Whether to confirm if old data exists..? \n" + + " * -d, --deleteCookie\n" + + " Delete its cookie on metadata store "; } @Override int runCmd(CommandLine cmdLine) throws Exception { boolean interactive = (!cmdLine.hasOption("n")); boolean force = cmdLine.hasOption("f"); - - ServerConfiguration conf = new ServerConfiguration(bkConf); - boolean result = Bookie.format(conf, interactive, force); - // delete cookie - if (cmdLine.hasOption("d")) { - runFunctionWithRegistrationManager(bkConf, rm -> { - try { - Versioned cookie = Cookie.readFromRegistrationManager(rm, conf); - cookie.getValue().deleteFromRegistrationManager(rm, conf, cookie.getVersion()); - } catch (CookieNotFoundException nne) { - LOG.warn("No cookie to remove : ", nne); - } catch (BookieException be) { - throw new UncheckedExecutionException(be.getMessage(), be); - } - return null; - }); - } + boolean deletecookie = cmdLine.hasOption("d"); + + FormatCommand.Flags flags = new FormatCommand.Flags() + .nonInteractive(interactive) + .force(force) + .deleteCookie(deletecookie); + FormatCommand command = new FormatCommand(flags); + boolean result = command.apply(bkConf, flags); return (result) ? 0 : 1; } } @@ -446,7 +479,6 @@ int runCmd(CommandLine cmdLine) throws Exception { * indexDirs are empty and there is no registered Bookie with this BookieId. */ class InitBookieCmd extends MyCommand { - Options opts = new Options(); public InitBookieCmd() { super(CMD_INITBOOKIE); @@ -464,13 +496,16 @@ String getDescription() { @Override String getUsage() { - return CMD_INITBOOKIE; + return "initbookie Initialize new Bookie, initbookie requires no options," + + "use the default conf or re-specify BOOKIE_CONF \n" + + " Usage: initbookie"; } @Override int runCmd(CommandLine cmdLine) throws Exception { ServerConfiguration conf = new ServerConfiguration(bkConf); - boolean result = BookKeeperAdmin.initBookie(conf); + InitCommand initCommand = new InitCommand(); + boolean result = initCommand.apply(conf, new CliFlags()); return (result) ? 0 : 1; } } @@ -479,7 +514,6 @@ int runCmd(CommandLine cmdLine) throws Exception { * Recover command for ledger data recovery for failed bookie. */ class RecoverCmd extends MyCommand { - Options opts = new Options(); public RecoverCmd() { super(CMD_RECOVER); @@ -489,6 +523,8 @@ public RecoverCmd() { opts.addOption("l", "ledger", true, "Recover a specific ledger"); opts.addOption("sk", "skipOpenLedgers", false, "Skip recovering open ledgers"); opts.addOption("d", "deleteCookie", false, "Delete cookie node for the bookie."); + opts.addOption("sku", "skipUnrecoverableLedgers", false, "Skip unrecoverable ledgers."); + opts.addOption("rate", "replicationRate", false, "Replication rate by bytes"); } @Override @@ -503,7 +539,25 @@ String getDescription() { @Override String getUsage() { - return "recover [-deleteCookie] "; + return "recover Recover the ledger data for failed bookie\n" + + " Usage: recover [options]\n" + + " Options:\n" + + " -q, --query\n" + + " Query the ledgers that contain given bookies\n" + + " -dr, --dryrun\n" + + " Printing the recovery plan w/o doing actual recovery\n" + + " -f, --force\n" + + " Force recovery without confirmation\n" + + " -l, --ledger\n" + + " Recover a specific ledger (param format: `ledgerId`)\n" + + " -sk, --skipOpenLedgers\n" + + " Skip recovering open ledgers\n" + + " -d, --deleteCookie\n" + + " Delete cookie node for the bookie\n" + + " -sku, --skipUnrecoverableLedgers\n" + + " Skip unrecoverable ledgers\n" + + " -rate, --replicationRate\n" + + " Replication rate by bytes"; } @Override @@ -521,211 +575,48 @@ int runCmd(CommandLine cmdLine) throws Exception { boolean force = cmdLine.hasOption("f"); boolean skipOpenLedgers = cmdLine.hasOption("sk"); boolean removeCookies = !dryrun && cmdLine.hasOption("d"); + boolean skipUnrecoverableLedgers = cmdLine.hasOption("sku"); Long ledgerId = getOptionLedgerIdValue(cmdLine, "ledger", -1); + int replicationRate = getOptionIntValue(cmdLine, "replicationRate", -1); - // Get bookies list - final String[] bookieStrs = args[0].split(","); - final Set bookieAddrs = new HashSet<>(); - for (String bookieStr : bookieStrs) { - final String bookieStrParts[] = bookieStr.split(":"); - if (bookieStrParts.length != 2) { - System.err.println("BookieSrcs has invalid bookie address format (host:port expected) : " - + bookieStr); - return -1; - } - bookieAddrs.add(new BookieSocketAddress(bookieStrParts[0], - Integer.parseInt(bookieStrParts[1]))); - } - - if (!force) { - System.err.println("Bookies : " + bookieAddrs); - if (!IOUtils.confirmPrompt("Are you sure to recover them : (Y/N)")) { - System.err.println("Give up!"); - return -1; - } - } - - LOG.info("Constructing admin"); - ClientConfiguration adminConf = new ClientConfiguration(bkConf); - BookKeeperAdmin admin = new BookKeeperAdmin(adminConf); - LOG.info("Construct admin : {}", admin); - try { - if (query) { - return bkQuery(admin, bookieAddrs); - } - if (-1 != ledgerId) { - return bkRecoveryLedger(admin, ledgerId, bookieAddrs, dryrun, skipOpenLedgers, removeCookies); - } - return bkRecovery(admin, bookieAddrs, dryrun, skipOpenLedgers, removeCookies); - } finally { - admin.close(); - } - } - - private int bkQuery(BookKeeperAdmin bkAdmin, Set bookieAddrs) - throws InterruptedException, BKException { - SortedMap ledgersContainBookies = - bkAdmin.getLedgersContainBookies(bookieAddrs); - System.err.println("NOTE: Bookies in inspection list are marked with '*'."); - for (Map.Entry ledger : ledgersContainBookies.entrySet()) { - System.out.println("ledger " + ledger.getKey() + " : " + ledger.getValue().getState()); - Map numBookiesToReplacePerEnsemble = - inspectLedger(ledger.getValue(), bookieAddrs); - System.out.print("summary: ["); - for (Map.Entry entry : numBookiesToReplacePerEnsemble.entrySet()) { - System.out.print(entry.getKey() + "=" + entry.getValue() + ", "); - } - System.out.println("]"); - System.out.println(); - } - System.err.println("Done"); - return 0; - } - - private Map inspectLedger(LedgerMetadata metadata, Set bookiesToInspect) { - Map numBookiesToReplacePerEnsemble = new TreeMap(); - for (Map.Entry> ensemble : metadata.getEnsembles().entrySet()) { - List bookieList = ensemble.getValue(); - System.out.print(ensemble.getKey() + ":\t"); - int numBookiesToReplace = 0; - for (BookieSocketAddress bookie : bookieList) { - System.out.print(bookie); - if (bookiesToInspect.contains(bookie)) { - System.out.print("*"); - ++numBookiesToReplace; - } else { - System.out.print(" "); - } - System.out.print(" "); - } - System.out.println(); - numBookiesToReplacePerEnsemble.put(ensemble.getKey(), numBookiesToReplace); - } - return numBookiesToReplacePerEnsemble; - } - - private int bkRecoveryLedger(BookKeeperAdmin bkAdmin, - long lid, - Set bookieAddrs, - boolean dryrun, - boolean skipOpenLedgers, - boolean removeCookies) - throws InterruptedException, BKException, KeeperException { - bkAdmin.recoverBookieData(lid, bookieAddrs, dryrun, skipOpenLedgers); - if (removeCookies) { - deleteCookies(bkAdmin.getConf(), bookieAddrs); - } - return 0; - } - - private int bkRecovery(BookKeeperAdmin bkAdmin, - Set bookieAddrs, - boolean dryrun, - boolean skipOpenLedgers, - boolean removeCookies) - throws InterruptedException, BKException, KeeperException { - bkAdmin.recoverBookieData(bookieAddrs, dryrun, skipOpenLedgers); - if (removeCookies) { - deleteCookies(bkAdmin.getConf(), bookieAddrs); - } - return 0; - } - - private void deleteCookies(ClientConfiguration conf, - Set bookieAddrs) throws BKException { - ServerConfiguration serverConf = new ServerConfiguration(conf); - try { - runFunctionWithRegistrationManager(serverConf, rm -> { - try { - for (BookieSocketAddress addr : bookieAddrs) { - deleteCookie(rm, addr); - } - } catch (Exception e) { - throw new UncheckedExecutionException(e); - } - return null; - }); - } catch (Exception e) { - Throwable cause = e; - if (e instanceof UncheckedExecutionException) { - cause = e.getCause(); - } - if (cause instanceof BKException) { - throw (BKException) cause; - } else { - BKException bke = new MetaStoreException(); - bke.initCause(bke); - throw bke; - } - } - } - - private void deleteCookie(RegistrationManager rm, - BookieSocketAddress bookieSrc) throws BookieException { - try { - Versioned cookie = Cookie.readFromRegistrationManager(rm, bookieSrc); - cookie.getValue().deleteFromRegistrationManager(rm, bookieSrc, cookie.getVersion()); - } catch (CookieNotFoundException nne) { - LOG.warn("No cookie to remove for {} : ", bookieSrc, nne); - } + RecoverCommand cmd = new RecoverCommand(); + RecoverCommand.RecoverFlags flags = new RecoverCommand.RecoverFlags(); + flags.bookieAddress(args[0]); + flags.deleteCookie(removeCookies); + flags.dryRun(dryrun); + flags.force(force); + flags.ledger(ledgerId); + flags.replicateRate(replicationRate); + flags.skipOpenLedgers(skipOpenLedgers); + flags.query(query); + flags.skipUnrecoverableLedgers(skipUnrecoverableLedgers); + boolean result = cmd.apply(bkConf, flags); + return (result) ? 0 : -1; } - } /** * Ledger Command Handles ledger related operations. */ class LedgerCmd extends MyCommand { - Options lOpts = new Options(); LedgerCmd() { super(CMD_LEDGER); - lOpts.addOption("m", "meta", false, "Print meta information"); + opts.addOption("m", "meta", false, "Print meta information"); } @Override public int runCmd(CommandLine cmdLine) throws Exception { - String[] leftArgs = cmdLine.getArgs(); - if (leftArgs.length <= 0) { - System.err.println("ERROR: missing ledger id"); - printUsage(); - return -1; - } - - boolean printMeta = false; + LedgerCommand cmd = new LedgerCommand(ledgerIdFormatter); + cmd.setPrint(BookieShell.this::printInfoLine); + LedgerCommand.LedgerFlags flags = new LedgerCommand.LedgerFlags(); if (cmdLine.hasOption("m")) { - printMeta = true; - } - long ledgerId; - try { - ledgerId = ledgerIdFormatter.readLedgerId(leftArgs[0]); - } catch (IllegalArgumentException iae) { - System.err.println("ERROR: invalid ledger id " + leftArgs[0]); - printUsage(); - return -1; - } - - if (bkConf.getLedgerStorageClass().equals(DbLedgerStorage.class.getName())) { - // dump ledger info - try { - DbLedgerStorage.readLedgerIndexEntries(ledgerId, bkConf, - (currentEntry, entryLogId, position) -> System.out.println( - "entry " + currentEntry + "\t:\t(log: " + entryLogId + ", pos: " + position + ")")); - } catch (IOException e) { - System.err.printf("ERROR: initializing dbLedgerStorage %s", e.getMessage()); - return -1; - } - } else { - if (printMeta) { - // print meta - readLedgerMeta(ledgerId); - } - // dump ledger info - readLedgerIndexEntries(ledgerId); + flags.meta(true); } - - return 0; + flags.ledgerId(Long.parseLong(cmdLine.getArgs()[0])); + boolean result = cmd.apply(bkConf, flags); + return (result) ? 0 : 1; } @Override @@ -735,12 +626,18 @@ String getDescription() { @Override String getUsage() { - return "ledger [-m] "; + return "ledger Dump ledger index entries into readable format\n" + + " Usage: ledger [options]\n" + + " Options:\n" + + " -m, --meta\n" + + " Print meta information\n" + + " * \n" + + " Ledger ID(param format: `ledgerId`) "; } @Override Options getOptions() { - return lOpts; + return opts; } } @@ -748,22 +645,21 @@ Options getOptions() { * Command for reading ledger entries. */ class ReadLedgerEntriesCmd extends MyCommand { - Options lOpts = new Options(); ReadLedgerEntriesCmd() { super(CMD_READ_LEDGER_ENTRIES); - lOpts.addOption("m", "msg", false, "Print message body"); - lOpts.addOption("l", "ledgerid", true, "Ledger ID"); - lOpts.addOption("fe", "firstentryid", true, "First EntryID"); - lOpts.addOption("le", "lastentryid", true, "Last EntryID"); - lOpts.addOption("r", "force-recovery", false, + opts.addOption("m", "msg", false, "Print message body"); + opts.addOption("l", "ledgerid", true, "Ledger ID"); + opts.addOption("fe", "firstentryid", true, "First EntryID"); + opts.addOption("le", "lastentryid", true, "Last EntryID"); + opts.addOption("r", "force-recovery", false, "Ensure the ledger is properly closed before reading"); - lOpts.addOption("b", "bookie", true, "Only read from a specific bookie"); + opts.addOption("b", "bookie", true, "Only read from a specific bookie"); } @Override Options getOptions() { - return lOpts; + return opts; } @Override @@ -773,100 +669,48 @@ String getDescription() { @Override String getUsage() { - return "readledger [-bookie ] [-msg] -ledgerid " - + "[-firstentryid [-lastentryid ]] " - + "[-force-recovery]"; + return "readledger Read a range of entries from a ledger\n" + + " Usage: readledger [options]\n" + + " Options:\n" + + " -m, --msg\n" + + " Print message body\n" + + " * -l, --ledgerid\n" + + " Ledger ID (param format: `ledgerId`)\n" + + " * -fe, --firstentryid\n" + + " First EntryID (param format: `firstEntryId`)\n" + + " * -le, --lastentryid\n" + + " Last EntryID (param format: `lastEntryId`)\n" + + " -r, --force-recovery\n" + + " Ensure the ledger is properly closed before reading\n" + + " * -b, --bookie\n" + + " Only read from a specific bookie (param format: `address:port`)"; } @Override int runCmd(CommandLine cmdLine) throws Exception { final long ledgerId = getOptionLedgerIdValue(cmdLine, "ledgerid", -1); - if (ledgerId == -1) { - System.err.println("Must specify a ledger id"); - return -1; - } - final long firstEntry = getOptionLongValue(cmdLine, "firstentryid", 0); long lastEntry = getOptionLongValue(cmdLine, "lastentryid", -1); boolean printMsg = cmdLine.hasOption("m"); boolean forceRecovery = cmdLine.hasOption("r"); - final BookieSocketAddress bookie; + String bookieAddress; if (cmdLine.hasOption("b")) { // A particular bookie was specified - bookie = new BookieSocketAddress(cmdLine.getOptionValue("b")); + bookieAddress = cmdLine.getOptionValue("b"); } else { - bookie = null; - } - - ClientConfiguration conf = new ClientConfiguration(); - conf.addConfiguration(bkConf); - - try (BookKeeperAdmin bk = new BookKeeperAdmin(conf)) { - if (forceRecovery) { - // Force the opening of the ledger to trigger recovery - try (LedgerHandle lh = bk.openLedger(ledgerId)) { - if (lastEntry == -1 || lastEntry > lh.getLastAddConfirmed()) { - lastEntry = lh.getLastAddConfirmed(); - } - } - } - - if (bookie == null) { - // No bookie was specified, use normal bk client - Iterator entries = bk.readEntries(ledgerId, firstEntry, lastEntry).iterator(); - while (entries.hasNext()) { - LedgerEntry entry = entries.next(); - formatEntry(entry, printMsg); - } - } else { - // Use BookieClient to target a specific bookie - EventLoopGroup eventLoopGroup = new NioEventLoopGroup(); - OrderedExecutor executor = OrderedExecutor.newBuilder() - .numThreads(1) - .name("BookieClientScheduler") - .build(); - - ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor( - new DefaultThreadFactory("BookKeeperClientSchedulerPool")); - - BookieClient bookieClient = new BookieClientImpl(conf, eventLoopGroup, executor, - scheduler, NullStatsLogger.INSTANCE); - - LongStream.range(firstEntry, lastEntry).forEach(entryId -> { - CompletableFuture future = new CompletableFuture<>(); - - bookieClient.readEntry(bookie, ledgerId, entryId, - (rc, ledgerId1, entryId1, buffer, ctx) -> { - if (rc != BKException.Code.OK) { - LOG.error("Failed to read entry {} -- {}", entryId1, BKException.getMessage(rc)); - future.completeExceptionally(BKException.create(rc)); - return; - } - - System.out.println("--------- Lid=" + ledgerIdFormatter.formatLedgerId(ledgerId) - + ", Eid=" + entryId + " ---------"); - if (printMsg) { - System.out.println("Data: " + ByteBufUtil.prettyHexDump(buffer)); - } - - buffer.release(); - future.complete(null); - }, null, BookieProtocol.FLAG_NONE); - - try { - future.get(); - } catch (Exception e) { - LOG.error("Error future.get while reading entries from ledger {}", ledgerId, e); - } - }); - - eventLoopGroup.shutdownGracefully(); - executor.shutdown(); - bookieClient.close(); - } - } - + bookieAddress = null; + } + + ReadLedgerCommand cmd = new ReadLedgerCommand(entryFormatter, ledgerIdFormatter); + ReadLedgerCommand.ReadLedgerFlags flags = new ReadLedgerCommand.ReadLedgerFlags(); + flags.bookieAddress(bookieAddress); + flags.firstEntryId(firstEntry); + flags.forceRecovery(forceRecovery); + flags.lastEntryId(lastEntry); + flags.ledgerId(ledgerId); + flags.msg(printMsg); + cmd.apply(bkConf, flags); return 0; } @@ -876,14 +720,18 @@ int runCmd(CommandLine cmdLine) throws Exception { * Command for listing underreplicated ledgers. */ class ListUnderreplicatedCmd extends MyCommand { - Options opts = new Options(); public ListUnderreplicatedCmd() { super(CMD_LISTUNDERREPLICATED); - opts.addOption("missingreplica", true, "Bookie Id of missing replica"); - opts.addOption("excludingmissingreplica", true, "Bookie Id of missing replica to ignore"); - opts.addOption("printmissingreplica", false, "Whether to print missingreplicas list?"); - opts.addOption("printreplicationworkerid", false, "Whether to print replicationworkerid?"); + opts.addOption("mr", "missingreplica", true, "Bookie Id of missing replica"); + opts.addOption("emr", "excludingmissingreplica", true, + "Bookie Id of missing replica to ignore"); + opts.addOption("pmr", "printmissingreplica", false, + "Whether to print missingreplicas list?"); + opts.addOption("prw", "printreplicationworkerid", false, + "Whether to print replicationworkerid?"); + opts.addOption("c", "onlydisplayledgercount", false, + "Only display underreplicated ledger count"); } @Override @@ -899,8 +747,20 @@ String getDescription() { @Override String getUsage() { - return "listunderreplicated [[-missingreplica ]" - + " [-excludingmissingreplica ]] [-printmissingreplica] [-printreplicationworkerid]"; + return "listunderreplicated List ledgers marked as underreplicated, with optional options to " + + "specify missingreplica (BookieId) and to exclude missingreplica\n" + + " Usage: listunderreplicated [options]\n" + + " Options:\n" + + " -c,--onlydisplayledgercount\n" + + " Only display underreplicated ledger count \n" + + " * -emr,--excludingmissingreplica\n" + + " Bookie Id of missing replica to ignore (param format: `address:port`)\n" + + " * -mr,--missingreplica\n" + + " Bookie Id of missing replica (param format: `address:port`)\n" + + " -pmr,--printmissingreplica\n" + + " Whether to print missingreplicas list \n" + + " -prw,--printreplicationworkerid\n" + + " Whether to print replicationworkerid "; } @Override @@ -910,59 +770,16 @@ int runCmd(CommandLine cmdLine) throws Exception { final String excludingBookieId = cmdLine.getOptionValue("excludingmissingreplica"); final boolean printMissingReplica = cmdLine.hasOption("printmissingreplica"); final boolean printReplicationWorkerId = cmdLine.hasOption("printreplicationworkerid"); - - final Predicate> predicate; - if (!StringUtils.isBlank(includingBookieId) && !StringUtils.isBlank(excludingBookieId)) { - predicate = replicasList -> (replicasList.contains(includingBookieId) - && !replicasList.contains(excludingBookieId)); - } else if (!StringUtils.isBlank(includingBookieId)) { - predicate = replicasList -> replicasList.contains(includingBookieId); - } else if (!StringUtils.isBlank(excludingBookieId)) { - predicate = replicasList -> !replicasList.contains(excludingBookieId); - } else { - predicate = null; - } - - runFunctionWithLedgerManagerFactory(bkConf, mFactory -> { - LedgerUnderreplicationManager underreplicationManager; - try { - underreplicationManager = mFactory.newLedgerUnderreplicationManager(); - } catch (KeeperException | CompatibilityException e) { - throw new UncheckedExecutionException("Failed to new ledger underreplicated manager", e); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new UncheckedExecutionException("Interrupted on newing ledger underreplicated manager", e); - } - Iterator iter = underreplicationManager.listLedgersToRereplicate(predicate); - while (iter.hasNext()) { - UnderreplicatedLedger underreplicatedLedger = iter.next(); - long urLedgerId = underreplicatedLedger.getLedgerId(); - System.out.println(ledgerIdFormatter.formatLedgerId(urLedgerId)); - long ctime = underreplicatedLedger.getCtime(); - if (ctime != UnderreplicatedLedger.UNASSIGNED_CTIME) { - System.out.println("\tCtime : " + ctime); - } - if (printMissingReplica) { - underreplicatedLedger.getReplicaList().forEach((missingReplica) -> { - System.out.println("\tMissingReplica : " + missingReplica); - }); - } - if (printReplicationWorkerId) { - try { - String replicationWorkerId = underreplicationManager - .getReplicationWorkerIdRereplicatingLedger(urLedgerId); - if (replicationWorkerId != null) { - System.out.println("\tReplicationWorkerId : " + replicationWorkerId); - } - } catch (UnavailableException e) { - LOG.error("Failed to get ReplicationWorkerId rereplicating ledger {} -- {}", urLedgerId, - e.getMessage()); - } - } - } - return null; - }); - + final boolean onlyDisplayLedgerCount = cmdLine.hasOption("onlydisplayledgercount"); + + ListUnderReplicatedCommand.LURFlags flags = new ListUnderReplicatedCommand.LURFlags() + .missingReplica(includingBookieId) + .excludingMissingReplica(excludingBookieId) + .printMissingReplica(printMissingReplica) + .printReplicationWorkerId(printReplicationWorkerId) + .onlyDisplayLedgerCount(onlyDisplayLedgerCount); + ListUnderReplicatedCommand cmd = new ListUnderReplicatedCommand(ledgerIdFormatter); + cmd.apply(bkConf, flags); return 0; } } @@ -973,87 +790,22 @@ int runCmd(CommandLine cmdLine) throws Exception { * Command to list all ledgers in the cluster. */ class ListLedgersCmd extends MyCommand { - Options lOpts = new Options(); ListLedgersCmd() { super(CMD_LISTLEDGERS); - lOpts.addOption("m", "meta", false, "Print metadata"); - lOpts.addOption("bookieid", true, "List ledgers residing in this bookie"); + opts.addOption("m", "meta", false, "Print metadata"); + opts.addOption("bookieid", true, "List ledgers residing in this bookie"); } @Override public int runCmd(CommandLine cmdLine) throws Exception { final boolean printMeta = cmdLine.hasOption("m"); final String bookieidToBePartOfEnsemble = cmdLine.getOptionValue("bookieid"); - final BookieSocketAddress bookieAddress = StringUtils.isBlank(bookieidToBePartOfEnsemble) ? null - : new BookieSocketAddress(bookieidToBePartOfEnsemble); - - runFunctionWithLedgerManagerFactory(bkConf, mFactory -> { - try (LedgerManager ledgerManager = mFactory.newLedgerManager()) { - - final AtomicInteger returnCode = new AtomicInteger(BKException.Code.OK); - final CountDownLatch processDone = new CountDownLatch(1); - - Processor ledgerProcessor = new Processor() { - @Override - public void process(Long ledgerId, VoidCallback cb) { - if (!printMeta && (bookieAddress == null)) { - printLedgerMetadata(ledgerId, null, false); - cb.processResult(BKException.Code.OK, null, null); - } else { - GenericCallback gencb = new GenericCallback() { - @Override - public void operationComplete(int rc, LedgerMetadata ledgerMetadata) { - if (rc == BKException.Code.OK) { - if ((bookieAddress == null) - || BookKeeperAdmin.areEntriesOfLedgerStoredInTheBookie(ledgerId, - bookieAddress, ledgerMetadata)) { - /* - * the print method has to be in - * synchronized scope, otherwise - * output of printLedgerMetadata - * could interleave since this - * callback for different - * ledgers can happen in - * different threads. - */ - synchronized (BookieShell.this) { - printLedgerMetadata(ledgerId, ledgerMetadata, printMeta); - } - } - } else if (rc == BKException.Code.NoSuchLedgerExistsException) { - rc = BKException.Code.OK; - } else { - LOG.error("Unable to read the ledger: " + ledgerId + " information"); - } - cb.processResult(rc, null, null); - } - }; - ledgerManager.readLedgerMetadata(ledgerId, gencb); - } - } - }; - - ledgerManager.asyncProcessLedgers(ledgerProcessor, new AsyncCallback.VoidCallback() { - @Override - public void processResult(int rc, String s, Object obj) { - returnCode.set(rc); - processDone.countDown(); - } - }, null, BKException.Code.OK, BKException.Code.ReadException); - processDone.await(); - if (returnCode.get() != BKException.Code.OK) { - LOG.error("Received error return value while processing ledgers: {}", returnCode.get()); - throw BKException.create(returnCode.get()); - } - - } catch (Exception ioe) { - LOG.error("Received Exception while processing ledgers", ioe); - throw new UncheckedExecutionException(ioe); - } - return null; - }); + ListLedgersCommand.ListLedgersFlags flags = new ListLedgersCommand.ListLedgersFlags() + .bookieId(bookieidToBePartOfEnsemble).meta(printMeta); + ListLedgersCommand cmd = new ListLedgersCommand(ledgerIdFormatter); + cmd.apply(bkConf, flags); return 0; } @@ -1065,41 +817,77 @@ String getDescription() { @Override String getUsage() { - return "listledgers [-meta] [-bookieid ]"; + return "listledgers List all ledgers on the cluster (this may take a long time)\n" + + " Usage: listledgers [options]\n" + + " Options:\n" + + " -m, --meta\n" + + " Print metadata\n" + + " * -bookieid\n" + + " List ledgers residing in this bookie(param format: `address:port`) "; } @Override Options getOptions() { - return lOpts; + return opts; } } - void printLedgerMetadata(long ledgerId, LedgerMetadata md, boolean printMeta) { - System.out.println("ledgerID: " + ledgerIdFormatter.formatLedgerId(ledgerId)); - if (printMeta) { - System.out.println(new String(md.serialize(), UTF_8)); + /** + * List active ledgers on entry log file. + **/ + class ListActiveLedgersCmd extends MyCommand { + + ListActiveLedgersCmd() { + super(CMD_ACTIVE_LEDGERS_ON_ENTRY_LOG_FILE); + opts.addOption("l", "logId", true, "Entry log file id"); + opts.addOption("t", "timeout", true, "Read timeout(ms)"); } - } - static class ReadMetadataCallback extends AbstractFuture - implements GenericCallback { - final long ledgerId; + @Override + public int runCmd(CommandLine cmdLine) throws Exception { + final boolean hasTimeout = cmdLine.hasOption("t"); + final boolean hasLogId = cmdLine.hasOption("l"); + if (!hasLogId){ + printUsage(); + return -1; + } + final long logId = Long.parseLong(cmdLine.getOptionValue("l")); + ListActiveLedgersCommand.ActiveLedgerFlags flags = new ListActiveLedgersCommand.ActiveLedgerFlags(); + flags.logId(logId); + if (hasTimeout){ + flags.timeout(Long.parseLong(cmdLine.getOptionValue("t"))); + } + ListActiveLedgersCommand cmd = new ListActiveLedgersCommand(ledgerIdFormatter); + cmd.apply(bkConf, flags); + return 0; + } - ReadMetadataCallback(long ledgerId) { - this.ledgerId = ledgerId; + @Override + String getDescription() { + return "List all active ledgers on the entry log file."; } - long getLedgerId() { - return ledgerId; + @Override + String getUsage() { + return "activeledgers List all active ledgers on the entry log file\n" + + " Usage: activeledgers [options]\n" + + " Options:\n" + + " * -l, --logId\n" + + " Entry log file id (`ledgers/logFileName.log`,param format: `logFileName`)\n" + + " * -t, --timeout\n" + + " Read timeout(ms, param format: `runTimeoutMs`) "; } @Override - public void operationComplete(int rc, LedgerMetadata result) { - if (rc != 0) { - setException(BKException.create(rc)); - } else { - set(result); - } + Options getOptions() { + return opts; + } + } + + void printLedgerMetadata(long ledgerId, LedgerMetadata md, boolean printMeta) { + System.out.println("ledgerID: " + ledgerIdFormatter.formatLedgerId(ledgerId)); + if (printMeta) { + System.out.println(md.toString()); } } @@ -1107,48 +895,99 @@ public void operationComplete(int rc, LedgerMetadata result) { * Print the metadata for a ledger. */ class LedgerMetadataCmd extends MyCommand { - Options lOpts = new Options(); LedgerMetadataCmd() { super(CMD_LEDGERMETADATA); - lOpts.addOption("l", "ledgerid", true, "Ledger ID"); + opts.addOption("l", "ledgerid", true, "Ledger ID"); + opts.addOption("dumptofile", true, "Dump metadata for ledger, to a file"); + opts.addOption("restorefromfile", true, "Restore metadata for ledger, from a file"); + opts.addOption("update", false, "Update metadata if ledger already exist"); } @Override public int runCmd(CommandLine cmdLine) throws Exception { - final long lid = getOptionLedgerIdValue(cmdLine, "ledgerid", -1); - if (lid == -1) { + final long ledgerId = getOptionLedgerIdValue(cmdLine, "ledgerid", -1); + if (ledgerId == -1) { System.err.println("Must specify a ledger id"); return -1; } + if (cmdLine.hasOption("dumptofile") && cmdLine.hasOption("restorefromfile")) { + System.err.println("Only one of --dumptofile and --restorefromfile can be specified"); + return -2; + } - runFunctionWithLedgerManagerFactory(bkConf, mFactory -> { - try (LedgerManager m = mFactory.newLedgerManager()) { - ReadMetadataCallback cb = new ReadMetadataCallback(lid); - m.readLedgerMetadata(lid, cb); - printLedgerMetadata(lid, cb.get(), true); - } catch (Exception e) { - throw new UncheckedExecutionException(e); - } - return null; - }); + LedgerMetaDataCommand.LedgerMetadataFlag flag = new LedgerMetaDataCommand.LedgerMetadataFlag(); + flag.ledgerId(ledgerId); + if (cmdLine.hasOption("dumptofile")) { + flag.dumpToFile(cmdLine.getOptionValue("dumptofile")); + } + if (cmdLine.hasOption("restorefromfile")) { + flag.restoreFromFile(cmdLine.getOptionValue("restorefromfile")); + } + flag.update(cmdLine.hasOption("update")); + LedgerMetaDataCommand cmd = new LedgerMetaDataCommand(ledgerIdFormatter); + cmd.apply(bkConf, flag); return 0; } @Override String getDescription() { - return "Print the metadata for a ledger."; + return "Print the metadata for a ledger, or optionally dump to a file."; + } + + @Override + String getUsage() { + return "ledgermetadata Print the metadata for a ledger, or optionally dump to a file\n" + + " Usage: ledgermetadata [options]\n" + + " Options:\n" + + " -dumptofile \n" + + " Dump metadata for ledger, to a file (param format: `dumpFilePath`)\n" + + " -restorefromfile \n" + + " Restore metadata for ledger, from a file (param format: `storeFilePath`)\n" + + " -update \n" + + " Update metadata if ledger already exist \n" + + " * -l, --ledgerid\n" + + " Ledger ID(param format: `ledgerId`) "; + } + + @Override + Options getOptions() { + return opts; + } + } + + /** + * Check local storage for inconsistencies. + */ + class LocalConsistencyCheck extends MyCommand { + + LocalConsistencyCheck() { + super(CMD_LOCALCONSISTENCYCHECK); + } + + @Override + public int runCmd(CommandLine cmdLine) throws Exception { + LocalConsistencyCheckCommand cmd = new LocalConsistencyCheckCommand(); + boolean result = cmd.apply(bkConf, new CliFlags()); + return (result) ? 0 : 1; + } + + @Override + String getDescription() { + return "Validate Ledger Storage internal metadata"; } @Override String getUsage() { - return "ledgermetadata -ledgerid "; + return "localconsistencycheck Validate Ledger Storage internal metadata, " + + "localconsistencycheck requires no options,use the default conf or re-specify BOOKIE_CONF \n" + + " Usage: localconsistencycheck"; } @Override Options getOptions() { - return lOpts; + return opts; } } @@ -1156,14 +995,13 @@ Options getOptions() { * Simple test to create a ledger and write to it. */ class SimpleTestCmd extends MyCommand { - Options lOpts = new Options(); SimpleTestCmd() { super(CMD_SIMPLETEST); - lOpts.addOption("e", "ensemble", true, "Ensemble size (default 3)"); - lOpts.addOption("w", "writeQuorum", true, "Write quorum size (default 2)"); - lOpts.addOption("a", "ackQuorum", true, "Ack quorum size (default 2)"); - lOpts.addOption("n", "numEntries", true, "Entries to write (default 1000)"); + opts.addOption("e", "ensemble", true, "Ensemble size (default 3)"); + opts.addOption("w", "writeQuorum", true, "Write quorum size (default 2)"); + opts.addOption("a", "ackQuorum", true, "Ack quorum size (default 2)"); + opts.addOption("n", "numEntries", true, "Entries to write (default 1000)"); } @Override @@ -1173,13 +1011,13 @@ public int runCmd(CommandLine cmdLine) throws Exception { int ackQuorum = getOptionIntValue(cmdLine, "ackQuorum", 2); int numEntries = getOptionIntValue(cmdLine, "numEntries", 1000); - SimpleTestCommand.Flags flags = new SimpleTestCommand.Flags() + SimpleTestCommand.Flags flags = SimpleTestCommand.Flags.newFlags() .ensembleSize(ensemble) .writeQuorumSize(writeQuorum) .ackQuorumSize(ackQuorum) .numEntries(numEntries); - SimpleTestCommand command = new SimpleTestCommand(flags); + SimpleTestCommand command = newSimpleTestCommand(flags); command.apply(bkConf, flags); return 0; @@ -1192,12 +1030,22 @@ String getDescription() { @Override String getUsage() { - return "simpletest [-ensemble N] [-writeQuorum N] [-ackQuorum N] [-numEntries N]"; + return "simpletest Simple test to create a ledger and write entries to it\n" + + " Usage: simpletest [options]\n" + + " Options:\n" + + " -e, --ensemble\n" + + " Ensemble size (default 3, param format: `ensembleSize`)\n" + + " -w, --writeQuorum\n" + + " Write quorum size (default 2, param format: `writeQuorumSize`)\n" + + " -a, --ackQuorum\n" + + " Ack quorum size (default 2, param format: `ackQuorumSize`)\n" + + " -n, --numEntries\n" + + " Entries to write (default 1000, param format: `entriesToWrite`)"; } @Override Options getOptions() { - return lOpts; + return opts; } } @@ -1205,17 +1053,16 @@ Options getOptions() { * Command to run a bookie sanity test. */ class BookieSanityTestCmd extends MyCommand { - Options lOpts = new Options(); BookieSanityTestCmd() { super(CMD_BOOKIESANITYTEST); - lOpts.addOption("e", "entries", true, "Total entries to be added for the test (default 10)"); - lOpts.addOption("t", "timeout", true, "Timeout for write/read operations in seconds (default 1)"); + opts.addOption("e", "entries", true, "Total entries to be added for the test (default 10)"); + opts.addOption("t", "timeout", true, "Timeout for write/read operations in seconds (default 1)"); } @Override Options getOptions() { - return lOpts; + return opts; } @Override @@ -1225,67 +1072,22 @@ String getDescription() { @Override String getUsage() { - return "bookiesanity [-entries N] [-timeout N]"; + return "bookiesanity Sanity test for local bookie. " + + "Create ledger and write/reads entries on local bookie\n" + + " Usage: bookiesanity [options]\n" + + " Options:\n" + + " -e, --entries\n" + + " Total entries to be added for the test (default 10, param format: `entryNum`)\n" + + " -t, --timeout\n" + + " Timeout for write/read in seconds (default 1s, param format: `readTimeoutMs`) "; } @Override int runCmd(CommandLine cmdLine) throws Exception { - int numberOfEntries = getOptionIntValue(cmdLine, "entries", 10); - int timeoutSecs = getOptionIntValue(cmdLine, "timeout", 1); - - ClientConfiguration conf = new ClientConfiguration(); - conf.addConfiguration(bkConf); - conf.setEnsemblePlacementPolicy(LocalBookieEnsemblePlacementPolicy.class); - conf.setAddEntryTimeout(timeoutSecs); - conf.setReadEntryTimeout(timeoutSecs); - - BookKeeper bk = new BookKeeper(conf); - LedgerHandle lh = null; - try { - lh = bk.createLedger(1, 1, DigestType.MAC, new byte[0]); - LOG.info("Created ledger {}", lh.getId()); - - for (int i = 0; i < numberOfEntries; i++) { - String content = "entry-" + i; - lh.addEntry(content.getBytes(UTF_8)); - } - - LOG.info("Written {} entries in ledger {}", numberOfEntries, lh.getId()); - - // Reopen the ledger and read entries - lh = bk.openLedger(lh.getId(), DigestType.MAC, new byte[0]); - if (lh.getLastAddConfirmed() != (numberOfEntries - 1)) { - throw new Exception("Invalid last entry found on ledger. expecting: " + (numberOfEntries - 1) - + " -- found: " + lh.getLastAddConfirmed()); - } - - Enumeration entries = lh.readEntries(0, numberOfEntries - 1); - int i = 0; - while (entries.hasMoreElements()) { - LedgerEntry entry = entries.nextElement(); - String actualMsg = new String(entry.getEntry(), UTF_8); - String expectedMsg = "entry-" + (i++); - if (!expectedMsg.equals(actualMsg)) { - throw new Exception("Failed validation of received message - Expected: " + expectedMsg - + ", Actual: " + actualMsg); - } - } - - LOG.info("Read {} entries from ledger {}", entries, lh.getId()); - } catch (Exception e) { - LOG.warn("Error in bookie sanity test", e); - return -1; - } finally { - if (lh != null) { - bk.deleteLedger(lh.getId()); - LOG.info("Deleted ledger {}", lh.getId()); - } - - bk.close(); - } - - LOG.info("Bookie sanity test succeeded"); - return 0; + SanityTestCommand command = new SanityTestCommand(); + SanityTestCommand.SanityFlags flags = new SanityTestCommand.SanityFlags(); + boolean result = command.apply(bkConf, flags); + return (result) ? 0 : -1; } } @@ -1293,15 +1095,14 @@ int runCmd(CommandLine cmdLine) throws Exception { * Command to read entry log files. */ class ReadLogCmd extends MyCommand { - Options rlOpts = new Options(); ReadLogCmd() { super(CMD_READLOG); - rlOpts.addOption("m", "msg", false, "Print message body"); - rlOpts.addOption("l", "ledgerid", true, "Ledger ID"); - rlOpts.addOption("e", "entryid", true, "Entry ID"); - rlOpts.addOption("sp", "startpos", true, "Start Position"); - rlOpts.addOption("ep", "endpos", true, "End Position"); + opts.addOption("m", "msg", false, "Print message body"); + opts.addOption("l", "ledgerid", true, "Ledger ID"); + opts.addOption("e", "entryid", true, "Entry ID"); + opts.addOption("sp", "startpos", true, "Start Position"); + opts.addOption("ep", "endpos", true, "End Position"); } @Override @@ -1312,6 +1113,8 @@ public int runCmd(CommandLine cmdLine) throws Exception { printUsage(); return -1; } + ReadLogCommand cmd = new ReadLogCommand(ledgerIdFormatter, entryFormatter); + ReadLogCommand.ReadLogFlags flags = new ReadLogCommand.ReadLogFlags(); boolean printMsg = false; if (cmdLine.hasOption("m")) { @@ -1320,40 +1123,22 @@ public int runCmd(CommandLine cmdLine) throws Exception { long logId; try { logId = Long.parseLong(leftArgs[0]); + flags.entryLogId(logId); } catch (NumberFormatException nfe) { // not a entry log id - File f = new File(leftArgs[0]); - String name = f.getName(); - if (!name.endsWith(".log")) { - // not a log file - System.err.println("ERROR: invalid entry log file name " + leftArgs[0]); - printUsage(); - return -1; - } - String idString = name.split("\\.")[0]; - logId = Long.parseLong(idString, 16); + flags.filename(leftArgs[0]); } - final long lId = getOptionLedgerIdValue(cmdLine, "ledgerid", -1); final long eId = getOptionLongValue(cmdLine, "entryid", -1); final long startpos = getOptionLongValue(cmdLine, "startpos", -1); final long endpos = getOptionLongValue(cmdLine, "endpos", -1); - - // scan entry log - if (startpos != -1) { - if ((endpos != -1) && (endpos < startpos)) { - System.err - .println("ERROR: StartPosition of the range should be lesser than or equal to EndPosition"); - return -1; - } - scanEntryLogForPositionRange(logId, startpos, endpos, printMsg); - } else if (lId != -1) { - scanEntryLogForSpecificEntry(logId, lId, eId, printMsg); - } else { - scanEntryLog(logId, printMsg); - } - - return 0; + flags.endPos(endpos); + flags.startPos(startpos); + flags.entryId(eId); + flags.ledgerId(lId); + flags.msg(printMsg); + boolean result = cmd.apply(bkConf, flags); + return (result) ? 0 : -1; } @Override @@ -1363,13 +1148,24 @@ String getDescription() { @Override String getUsage() { - return "readlog [-msg] [-ledgerid " - + "[-entryid ]] [-startpos [-endpos ]]"; + return "readlog Scan an entry file and format the entries into readable format\n" + + " Usage: readlog [options]\n" + + " Options:\n" + + " -m, --msg\n" + + " Print message body\n" + + " * -l, --ledgerid\n" + + " Ledger ID (param format: `ledgerId`)\n" + + " * -e, --entryid\n" + + " Entry ID (param format: `entryId`)\n" + + " * -sp, --startpos\n" + + " Start Position (param format: `startPosition`)\n" + + " * -ep, --endpos\n" + + " End Position (param format: `endPosition`)"; } @Override Options getOptions() { - return rlOpts; + return opts; } } @@ -1377,7 +1173,6 @@ Options getOptions() { * Command to print metadata of entrylog. */ class ReadLogMetadataCmd extends MyCommand { - Options rlOpts = new Options(); ReadLogMetadataCmd() { super(CMD_READLOGMETADATA); @@ -1385,6 +1180,8 @@ class ReadLogMetadataCmd extends MyCommand { @Override public int runCmd(CommandLine cmdLine) throws Exception { + ReadLogMetadataCommand cmd = new ReadLogMetadataCommand(ledgerIdFormatter); + ReadLogMetadataCommand.ReadLogMetadataFlags flags = new ReadLogMetadataCommand.ReadLogMetadataFlags(); String[] leftArgs = cmdLine.getArgs(); if (leftArgs.length <= 0) { LOG.error("ERROR: missing entry log id or entry log file name"); @@ -1394,23 +1191,13 @@ public int runCmd(CommandLine cmdLine) throws Exception { long logId; try { - logId = Long.parseLong(leftArgs[0]); + logId = Long.parseLong(leftArgs[0], 16); + flags.logId(logId); } catch (NumberFormatException nfe) { - // not a entry log id - File f = new File(leftArgs[0]); - String name = f.getName(); - if (!name.endsWith(".log")) { - // not a log file - LOG.error("ERROR: invalid entry log file name " + leftArgs[0]); - printUsage(); - return -1; - } - String idString = name.split("\\.")[0]; - logId = Long.parseLong(idString, 16); + flags.logFilename(leftArgs[0]); + flags.logId(-1); } - - printEntryLogMetadata(logId); - + cmd.apply(bkConf, flags); return 0; } @@ -1421,12 +1208,17 @@ String getDescription() { @Override String getUsage() { - return "readlogmetadata "; + return "readlogmetadata Prints entrylog's metadata\n" + + " Usage: readlogmetadata [options]\n" + + " Options:\n" + + " * \n" + + " entry log id or entry log file name (param format: `entryLogId` " + + "or `entryLogFileName`)"; } @Override Options getOptions() { - return rlOpts; + return opts; } } @@ -1434,12 +1226,11 @@ Options getOptions() { * Command to read journal files. */ class ReadJournalCmd extends MyCommand { - Options rjOpts = new Options(); ReadJournalCmd() { super(CMD_READJOURNAL); - rjOpts.addOption("dir", false, "Journal directory (needed if more than one journal configured)"); - rjOpts.addOption("m", "msg", false, "Print message body"); + opts.addOption("dir", true, "Journal directory (needed if more than one journal configured)"); + opts.addOption("m", "msg", false, "Print message body"); } @Override @@ -1451,70 +1242,47 @@ public int runCmd(CommandLine cmdLine) throws Exception { return -1; } + long journalId = -1L; + String filename = ""; + try { + journalId = Long.parseLong(leftArgs[0]); + } catch (NumberFormatException nfe) { + filename = leftArgs[0]; + } + boolean printMsg = false; if (cmdLine.hasOption("m")) { printMsg = true; } - Journal journal = null; - if (getJournals().size() > 1) { - if (!cmdLine.hasOption("dir")) { - System.err.println("ERROR: invalid or missing journal directory"); - printUsage(); - return -1; - } + ReadJournalCommand.ReadJournalFlags flags = new ReadJournalCommand.ReadJournalFlags().msg(printMsg) + .fileName(filename).journalId(journalId) + .dir(cmdLine.getOptionValue("dir")); + ReadJournalCommand cmd = new ReadJournalCommand(ledgerIdFormatter, entryFormatter); + boolean result = cmd.apply(bkConf, flags); + return result ? 0 : -1; + } - File journalDirectory = new File(cmdLine.getOptionValue("dir")); - for (Journal j : getJournals()) { - if (j.getJournalDirectory().equals(journalDirectory)) { - journal = j; - break; - } - } + @Override + String getDescription() { + return "Scan a journal file and format the entries into readable format."; + } - if (journal == null) { - System.err.println("ERROR: journal directory not found"); - printUsage(); - return -1; - } - } else { - journal = getJournals().get(0); - } - - long journalId; - try { - journalId = Long.parseLong(leftArgs[0]); - } catch (NumberFormatException nfe) { - // not a journal id - File f = new File(leftArgs[0]); - String name = f.getName(); - if (!name.endsWith(".txn")) { - // not a journal file - System.err.println("ERROR: invalid journal file name " + leftArgs[0]); - printUsage(); - return -1; - } - String idString = name.split("\\.")[0]; - journalId = Long.parseLong(idString, 16); - } - // scan journal - scanJournal(journal, journalId, printMsg); - return 0; - } - - @Override - String getDescription() { - return "Scan a journal file and format the entries into readable format."; - } - - @Override - String getUsage() { - return "readjournal [-dir] [-msg] "; - } + @Override + String getUsage() { + return "readjournal Scan a journal file and format the entries into readable format\n" + + " Usage: readjournal [options]\n" + + " Options:\n" + + " * -dir\n" + + " Journal directory needed if more than one journal configured" + + " (param format: `journalDir`)\n" + + " -m, --msg\n" + + " Print message body"; + } @Override Options getOptions() { - return rjOpts; + return opts; } } @@ -1528,7 +1296,7 @@ class LastMarkCmd extends MyCommand { @Override public int runCmd(CommandLine c) throws Exception { - LastMarkCommand command = new LastMarkCommand(); + LastMarkCommand command = newLastMarkCommand(); command.apply(bkConf, new CliFlags()); return 0; } @@ -1540,12 +1308,13 @@ String getDescription() { @Override String getUsage() { - return "lastmark"; + return "lastmark Print last log marker \n" + + " Usage: lastmark"; } @Override Options getOptions() { - return new Options(); + return opts; } } @@ -1553,32 +1322,45 @@ Options getOptions() { * List available bookies. */ class ListBookiesCmd extends MyCommand { - Options opts = new Options(); ListBookiesCmd() { super(CMD_LISTBOOKIES); opts.addOption("rw", "readwrite", false, "Print readwrite bookies"); opts.addOption("ro", "readonly", false, "Print readonly bookies"); + opts.addOption("a", "all", false, "Print all bookies"); // @deprecated 'rw'/'ro' option print both hostname and ip, so this option is not needed anymore opts.addOption("h", "hostnames", false, "Also print hostname of the bookie"); } @Override public int runCmd(CommandLine cmdLine) throws Exception { + int passedCommands = 0; + boolean readwrite = cmdLine.hasOption("rw"); + if (readwrite) { + passedCommands++; + } boolean readonly = cmdLine.hasOption("ro"); + if (readonly) { + passedCommands++; + } + boolean all = cmdLine.hasOption("a"); + if (all) { + passedCommands++; + } - if ((!readwrite && !readonly) || (readwrite && readonly)) { - LOG.error("One and only one of -readwrite and -readonly must be specified"); + if (passedCommands != 1) { + LOG.error("One and only one of -readwrite, -readonly and -all must be specified"); printUsage(); return 1; } - ListBookiesCommand.Flags flags = new ListBookiesCommand.Flags() + ListBookiesCommand.Flags flags = ListBookiesCommand.Flags.newFlags() .readwrite(readwrite) - .readonly(readonly); + .readonly(readonly) + .all(all); - ListBookiesCommand command = new ListBookiesCommand(flags); + ListBookiesCommand command = newListBookiesCommand(flags); command.apply(bkConf, flags); return 0; @@ -1591,7 +1373,17 @@ String getDescription() { @Override String getUsage() { - return "listbookies [-readwrite|-readonly] [-hostnames]"; + return "listbookies List the bookies, which are running as either readwrite or readonly mode\n" + + " Usage: listbookies [options]\n" + + " Options:\n" + + " -a, --all\n" + + " Print all bookies\n" + + " -h, --hostnames\n" + + " Also print hostname of the bookie\n" + + " -ro, --readonly\n" + + " Print readonly bookies\n" + + " -rw, --readwrite\n" + + " Print readwrite bookies "; } @Override @@ -1601,7 +1393,6 @@ Options getOptions() { } class ListDiskFilesCmd extends MyCommand { - Options opts = new Options(); ListDiskFilesCmd() { super(CMD_LISTFILESONDISC); @@ -1616,49 +1407,30 @@ public int runCmd(CommandLine cmdLine) throws Exception { boolean journal = cmdLine.hasOption("txn"); boolean entrylog = cmdLine.hasOption("log"); boolean index = cmdLine.hasOption("idx"); - boolean all = false; - if (!journal && !entrylog && !index && !all) { - all = true; - } - - if (all || journal) { - File[] journalDirs = bkConf.getJournalDirs(); - List journalFiles = listFilesAndSort(journalDirs, "txn"); - System.out.println("--------- Printing the list of Journal Files ---------"); - for (File journalFile : journalFiles) { - System.out.println(journalFile.getCanonicalPath()); - } - System.out.println(); - } - if (all || entrylog) { - File[] ledgerDirs = bkConf.getLedgerDirs(); - List ledgerFiles = listFilesAndSort(ledgerDirs, "log"); - System.out.println("--------- Printing the list of EntryLog/Ledger Files ---------"); - for (File ledgerFile : ledgerFiles) { - System.out.println(ledgerFile.getCanonicalPath()); - } - System.out.println(); - } - if (all || index) { - File[] indexDirs = (bkConf.getIndexDirs() == null) ? bkConf.getLedgerDirs() : bkConf.getIndexDirs(); - List indexFiles = listFilesAndSort(indexDirs, "idx"); - System.out.println("--------- Printing the list of Index Files ---------"); - for (File indexFile : indexFiles) { - System.out.println(indexFile.getCanonicalPath()); - } - } + ListFilesOnDiscCommand.LFODFlags flags = new ListFilesOnDiscCommand.LFODFlags().journal(journal) + .entrylog(entrylog).index(index); + ListFilesOnDiscCommand cmd = new ListFilesOnDiscCommand(flags); + cmd.apply(bkConf, flags); return 0; } @Override String getDescription() { - return "List the files in JournalDirectory/LedgerDirectories/IndexDirectories."; + return "List the files in JournalDirectories/LedgerDirectories/IndexDirectories."; } @Override String getUsage() { - return "listfilesondisc [-journal|-entrylog|-index]"; + return "listfilesondisc List the files in JournalDirectories/LedgerDirectories/IndexDirectories \n" + + " Usage: listfilesondisc [options]\n" + + " Options:\n" + + " -txn, --journal\n" + + " Print list of Journal Files\n" + + " -log, --entrylog\n" + + " Print list of EntryLog Files\n" + + " -idx, --index\n" + + " Print list of Index files "; } @Override @@ -1667,7 +1439,6 @@ Options getOptions() { } } - /** * Command to print help message. */ @@ -1706,7 +1477,7 @@ String getUsage() { @Override Options getOptions() { - return new Options(); + return opts; } } @@ -1714,7 +1485,6 @@ Options getOptions() { * Command for administration of autorecovery. */ class AutoRecoveryCmd extends MyCommand { - Options opts = new Options(); public AutoRecoveryCmd() { super(CMD_AUTORECOVERY); @@ -1736,7 +1506,13 @@ String getDescription() { @Override String getUsage() { - return "autorecovery [-enable|-disable]"; + return "autorecovery Enable or disable autorecovery in the cluster\n" + + " Usage: autorecovery [options]\n" + + " Options:\n" + + " * -e, --enable\n" + + " Enable auto recovery of underreplicated ledgers\n" + + " * -d, --disable\n" + + " Disable auto recovery of underreplicated ledgers"; } @Override @@ -1744,44 +1520,49 @@ int runCmd(CommandLine cmdLine) throws Exception { boolean disable = cmdLine.hasOption("d"); boolean enable = cmdLine.hasOption("e"); - if (enable && disable) { - LOG.error("Only one of -enable and -disable can be specified"); - printUsage(); - return 1; - } + ToggleCommand.AutoRecoveryFlags flags = new ToggleCommand.AutoRecoveryFlags() + .enable(enable).status(!disable && !enable); + ToggleCommand cmd = new ToggleCommand(); + cmd.apply(bkConf, flags); - runFunctionWithLedgerManagerFactory(bkConf, mFactory -> { - try { - try (LedgerUnderreplicationManager underreplicationManager = - mFactory.newLedgerUnderreplicationManager()) { - if (!enable && !disable) { - boolean enabled = underreplicationManager.isLedgerReplicationEnabled(); - System.out.println("Autorecovery is " + (enabled ? "enabled." : "disabled.")); - } else if (enable) { - if (underreplicationManager.isLedgerReplicationEnabled()) { - LOG.warn("Autorecovery already enabled. Doing nothing"); - } else { - LOG.info("Enabling autorecovery"); - underreplicationManager.enableLedgerReplication(); - } - } else { - if (!underreplicationManager.isLedgerReplicationEnabled()) { - LOG.warn("Autorecovery already disabled. Doing nothing"); - } else { - LOG.info("Disabling autorecovery"); - underreplicationManager.disableLedgerReplication(); - } - } - } - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new UncheckedExecutionException(e); - } catch (KeeperException | ReplicationException e) { - throw new UncheckedExecutionException(e); - } - return null; - }); + return 0; + } + } + + + /** + * Command to query autorecovery status. + */ + class QueryAutoRecoveryStatusCmd extends MyCommand { + + public QueryAutoRecoveryStatusCmd() { + super(CMD_QUERY_AUTORECOVERY_STATUS); + } + + @Override + Options getOptions() { + return opts; + } + + @Override + String getDescription() { + return "Query the autorecovery status"; + } + + @Override + String getUsage() { + return "queryautorecoverystatus Query the autorecovery status, " + + "queryautorecoverystatus requires no options,use the default conf or re-specify BOOKIE_CONF \n" + + " Usage: queryautorecoverystatus"; + } + @Override + int runCmd(CommandLine cmdLine) throws Exception { + final boolean verbose = cmdLine.hasOption("verbose"); + QueryAutoRecoveryStatusCommand.QFlags flags = new QueryAutoRecoveryStatusCommand.QFlags() + .verbose(verbose); + QueryAutoRecoveryStatusCommand cmd = new QueryAutoRecoveryStatusCommand(); + cmd.apply(bkConf, flags); return 0; } } @@ -1790,7 +1571,6 @@ int runCmd(CommandLine cmdLine) throws Exception { * Setter and Getter for LostBookieRecoveryDelay value (in seconds) in metadata store. */ class LostBookieRecoveryDelayCmd extends MyCommand { - Options opts = new Options(); public LostBookieRecoveryDelayCmd() { super(CMD_LOSTBOOKIERECOVERYDELAY); @@ -1810,46 +1590,38 @@ String getDescription() { @Override String getUsage() { - return "lostbookierecoverydelay [-get|-set ]"; + return "lostbookierecoverydelay Setter and Getter for LostBookieRecoveryDelay value" + + " (in seconds) in metadata store\n" + + " Usage: lostbookierecoverydelay [options]\n" + + " Options:\n" + + " -g, --get\n" + + " Get LostBookieRecoveryDelay value (in seconds)\n" + + " -s, --set\n" + + " Set LostBookieRecoveryDelay value (in seconds, " + + "param format: `lostBookieRecoveryDelayInSecs`) "; } @Override int runCmd(CommandLine cmdLine) throws Exception { boolean getter = cmdLine.hasOption("g"); boolean setter = cmdLine.hasOption("s"); - - if ((!getter && !setter) || (getter && setter)) { - LOG.error("One and only one of -get and -set must be specified"); - printUsage(); - return 1; - } - ClientConfiguration adminConf = new ClientConfiguration(bkConf); - BookKeeperAdmin admin = new BookKeeperAdmin(adminConf); - try { - if (getter) { - int lostBookieRecoveryDelay = admin.getLostBookieRecoveryDelay(); - LOG.info("LostBookieRecoveryDelay value in ZK: {}", String.valueOf(lostBookieRecoveryDelay)); - } else { - int lostBookieRecoveryDelay = Integer.parseInt(cmdLine.getOptionValue("set")); - admin.setLostBookieRecoveryDelay(lostBookieRecoveryDelay); - LOG.info("Successfully set LostBookieRecoveryDelay value in ZK: {}", - String.valueOf(lostBookieRecoveryDelay)); - } - } finally { - if (admin != null) { - admin.close(); - } + int set = 0; + if (setter) { + set = Integer.parseInt(cmdLine.getOptionValue("set")); } - return 0; + + LostBookieRecoveryDelayCommand.LBRDFlags flags = new LostBookieRecoveryDelayCommand.LBRDFlags() + .get(getter).set(set); + LostBookieRecoveryDelayCommand cmd = new LostBookieRecoveryDelayCommand(); + boolean result = cmd.apply(bkConf, flags); + return result ? 0 : 1; } } - /** * Print which node has the auditor lock. */ class WhoIsAuditorCmd extends MyCommand { - Options opts = new Options(); public WhoIsAuditorCmd() { super(CMD_WHOISAUDITOR); @@ -1867,32 +1639,17 @@ String getDescription() { @Override String getUsage() { - return "whoisauditor"; + return "whoisauditor Print the node which holds the auditor lock, " + + "whoisauditor requires no options,use the default conf or re-specify BOOKIE_CONF \n" + + " Usage: whoisauditor"; } @Override int runCmd(CommandLine cmdLine) throws Exception { - ZooKeeper zk = null; - try { - String metadataServiceUri = bkConf.getMetadataServiceUri(); - String zkServers = ZKMetadataDriverBase.getZKServersFromServiceUri(URI.create(metadataServiceUri)); - zk = ZooKeeperClient.newBuilder() - .connectString(zkServers) - .sessionTimeoutMs(bkConf.getZkTimeout()) - .build(); - BookieSocketAddress bookieId = AuditorElector.getCurrentAuditor(bkConf, zk); - if (bookieId == null) { - LOG.info("No auditor elected"); - return -1; - } - LOG.info("Auditor: " + getBookieSocketAddrStringRepresentation(bookieId)); - } finally { - if (zk != null) { - zk.close(); - } - } - - return 0; + CliFlags flags = new CliFlags(); + WhoIsAuditorCommand cmd = new WhoIsAuditorCommand(); + boolean result = cmd.apply(bkConf, flags); + return result ? 0 : -1; } } @@ -1900,7 +1657,6 @@ int runCmd(CommandLine cmdLine) throws Exception { * Prints the instanceid of the cluster. */ class WhatIsInstanceId extends MyCommand { - Options opts = new Options(); public WhatIsInstanceId() { super(CMD_WHATISINSTANCEID); @@ -1918,22 +1674,15 @@ String getDescription() { @Override String getUsage() { - return "whatisinstanceid"; + return "whatisinstanceid Print the instanceid of the cluster, " + + "whatisinstanceid requires no options,use the default conf or re-specify BOOKIE_CONF \n" + + " Usage: whatisinstanceid"; } @Override int runCmd(CommandLine cmdLine) throws Exception { - runFunctionWithRegistrationManager(bkConf, rm -> { - String readInstanceId = null; - try { - readInstanceId = rm.getClusterInstanceId(); - } catch (BookieException e) { - throw new UncheckedExecutionException(e); - } - LOG.info("Metadata Service Uri: {} InstanceId: {}", - bkConf.getMetadataServiceUriUnchecked(), readInstanceId); - return null; - }); + InstanceIdCommand cmd = new InstanceIdCommand(); + cmd.apply(bkConf, new CliFlags()); return 0; } } @@ -1942,7 +1691,6 @@ int runCmd(CommandLine cmdLine) throws Exception { * Update cookie command. */ class UpdateCookieCmd extends MyCommand { - Options opts = new Options(); private static final String BOOKIEID = "bookieId"; private static final String EXPANDSTORAGE = "expandstorage"; private static final String LIST = "list"; @@ -1979,12 +1727,23 @@ String getDescription() { @Override String getUsage() { - return "updatecookie [-bookieId ] [-expandstorage] [-list] [-delete ]"; + return "updatecookie Command to update cookie\n" + + " Usage: updatecookie [options]\n" + + " Options:\n" + + " * -b, --bookieId\n" + + " Bookie Id (param format: `address:port`)\n" + + " -e, --expandstorage\n" + + " Expand Storage\n" + + " -l, --list\n" + + " List paths of all the cookies present locally and on zookkeeper\n" + + " -d, --delete\n" + + " Delete cookie both locally and in ZooKeeper (param format: force)"; } @Override int runCmd(CommandLine cmdLine) throws Exception { - int retValue = -1; + AdminCommand cmd = new AdminCommand(); + AdminCommand.AdminFlags flags = new AdminCommand.AdminFlags(); Option[] options = cmdLine.getOptions(); if (options.length != 1) { LOG.error("Invalid command!"); @@ -2005,214 +1764,22 @@ int runCmd(CommandLine cmdLine) throws Exception { return -1; } boolean useHostName = getOptionalValue(bookieId, HOSTNAME); - if (!bkConf.getUseHostNameAsBookieID() && useHostName) { - LOG.error( - "Expects config useHostNameAsBookieID=true as the option value passed is 'hostname'"); - return -1; - } else if (bkConf.getUseHostNameAsBookieID() && !useHostName) { - LOG.error("Expects configuration useHostNameAsBookieID=false as the option value passed is 'ip'"); - return -1; - } - retValue = updateBookieIdInCookie(bookieId, useHostName); - } else if (thisCommandOption.getLongOpt().equals(EXPANDSTORAGE)) { - bkConf.setAllowStorageExpansion(true); - return expandStorage(); - } else if (thisCommandOption.getLongOpt().equals(LIST)) { - return listOrDeleteCookies(false, false); - } else if (thisCommandOption.getLongOpt().equals(DELETE)) { + flags.hostname(useHostName); + flags.ip(!useHostName); + } + flags.expandstorage(thisCommandOption.getLongOpt().equals(EXPANDSTORAGE)); + flags.list(thisCommandOption.getLongOpt().equals(LIST)); + flags.delete(thisCommandOption.getLongOpt().equals(DELETE)); + if (thisCommandOption.getLongOpt().equals(DELETE)) { boolean force = false; String optionValue = thisCommandOption.getValue(); if (!StringUtils.isEmpty(optionValue) && optionValue.equals(FORCE)) { force = true; } - return listOrDeleteCookies(true, force); - } else { - LOG.error("Invalid command!"); - this.printUsage(); - return -1; - } - return retValue; - } - - private int updateBookieIdInCookie(final String bookieId, final boolean useHostname) - throws Exception { - return runFunctionWithRegistrationManager(bkConf, rm -> { - try { - ServerConfiguration conf = new ServerConfiguration(bkConf); - String newBookieId = Bookie.getBookieAddress(conf).toString(); - // read oldcookie - Versioned oldCookie = null; - try { - conf.setUseHostNameAsBookieID(!useHostname); - oldCookie = Cookie.readFromRegistrationManager(rm, conf); - } catch (CookieNotFoundException nne) { - LOG.error("Either cookie already updated with UseHostNameAsBookieID={} or no cookie exists!", - useHostname, nne); - return -1; - } - Cookie newCookie = Cookie.newBuilder(oldCookie.getValue()).setBookieHost(newBookieId).build(); - boolean hasCookieUpdatedInDirs = verifyCookie(newCookie, journalDirectories[0]); - for (File dir : ledgerDirectories) { - hasCookieUpdatedInDirs &= verifyCookie(newCookie, dir); - } - if (indexDirectories != ledgerDirectories) { - for (File dir : indexDirectories) { - hasCookieUpdatedInDirs &= verifyCookie(newCookie, dir); - } - } - - if (hasCookieUpdatedInDirs) { - try { - conf.setUseHostNameAsBookieID(useHostname); - Cookie.readFromRegistrationManager(rm, conf); - // since newcookie exists, just do cleanup of oldcookie and return - conf.setUseHostNameAsBookieID(!useHostname); - oldCookie.getValue().deleteFromRegistrationManager(rm, conf, oldCookie.getVersion()); - return 0; - } catch (CookieNotFoundException nne) { - if (LOG.isDebugEnabled()) { - LOG.debug("Ignoring, cookie will be written to zookeeper"); - } - } - } else { - // writes newcookie to local dirs - for (File journalDirectory : journalDirectories) { - newCookie.writeToDirectory(journalDirectory); - LOG.info("Updated cookie file present in journalDirectory {}", journalDirectory); - } - for (File dir : ledgerDirectories) { - newCookie.writeToDirectory(dir); - } - LOG.info("Updated cookie file present in ledgerDirectories {}", ledgerDirectories); - if (ledgerDirectories != indexDirectories) { - for (File dir : indexDirectories) { - newCookie.writeToDirectory(dir); - } - LOG.info("Updated cookie file present in indexDirectories {}", indexDirectories); - } - } - // writes newcookie to zookeeper - conf.setUseHostNameAsBookieID(useHostname); - newCookie.writeToRegistrationManager(rm, conf, Version.NEW); - - // delete oldcookie - conf.setUseHostNameAsBookieID(!useHostname); - oldCookie.getValue().deleteFromRegistrationManager(rm, conf, oldCookie.getVersion()); - return 0; - } catch (IOException | BookieException ioe) { - LOG.error("IOException during cookie updation!", ioe); - return -1; - } - }); - } - - private int expandStorage() throws Exception { - return runFunctionWithMetadataBookieDriver(bkConf, driver -> { - List allLedgerDirs = Lists.newArrayList(); - allLedgerDirs.addAll(Arrays.asList(ledgerDirectories)); - if (indexDirectories != ledgerDirectories) { - allLedgerDirs.addAll(Arrays.asList(indexDirectories)); - } - - try { - Bookie.checkEnvironmentWithStorageExpansion( - bkConf, driver, Arrays.asList(journalDirectories), allLedgerDirs); - return 0; - } catch (BookieException e) { - LOG.error("Exception while updating cookie for storage expansion", e); - return -1; - } - }); - } - - private boolean verifyCookie(Cookie oldCookie, File dir) throws IOException { - try { - Cookie cookie = Cookie.readFromDirectory(dir); - cookie.verify(oldCookie); - } catch (InvalidCookieException e) { - return false; - } - return true; - } - - private int listOrDeleteCookies(boolean delete, boolean force) throws Exception { - BookieSocketAddress bookieAddress = Bookie.getBookieAddress(bkConf); - File[] journalDirs = bkConf.getJournalDirs(); - File[] ledgerDirs = bkConf.getLedgerDirs(); - File[] indexDirs = bkConf.getIndexDirs(); - File[] allDirs = ArrayUtils.addAll(journalDirs, ledgerDirs); - if (indexDirs != null) { - allDirs = ArrayUtils.addAll(allDirs, indexDirs); - } - - File[] allCurDirs = Bookie.getCurrentDirectories(allDirs); - List allVersionFiles = new LinkedList(); - File versionFile; - for (File curDir : allCurDirs) { - versionFile = new File(curDir, BookKeeperConstants.VERSION_FILENAME); - if (versionFile.exists()) { - allVersionFiles.add(versionFile); - } - } - - if (!allVersionFiles.isEmpty()) { - if (delete) { - boolean confirm = force; - if (!confirm) { - confirm = IOUtils.confirmPrompt("Are you sure you want to delete Cookies locally?"); - } - if (confirm) { - for (File verFile : allVersionFiles) { - if (!verFile.delete()) { - LOG.error( - "Failed to delete Local cookie file {}. So aborting deletecookie of Bookie: {}", - verFile, bookieAddress); - return -1; - } - } - LOG.info("Deleted Local Cookies of Bookie: {}", bookieAddress); - } else { - LOG.info("Skipping deleting local Cookies of Bookie: {}", bookieAddress); - } - } else { - LOG.info("Listing local Cookie Files of Bookie: {}", bookieAddress); - for (File verFile : allVersionFiles) { - LOG.info(verFile.getCanonicalPath()); - } - } - } else { - LOG.info("No local cookies for Bookie: {}", bookieAddress); + flags.force(force); } - - return runFunctionWithRegistrationManager(bkConf, rm -> { - try { - Versioned cookie = null; - try { - cookie = Cookie.readFromRegistrationManager(rm, bookieAddress); - } catch (CookieNotFoundException nne) { - LOG.info("No cookie for {} in metadata store", bookieAddress); - return 0; - } - - if (delete) { - boolean confirm = force; - if (!confirm) { - confirm = IOUtils.confirmPrompt( - "Are you sure you want to delete Cookies from metadata store?"); - } - - if (confirm) { - cookie.getValue().deleteFromRegistrationManager(rm, bkConf, cookie.getVersion()); - LOG.info("Deleted Cookie from metadata store for Bookie: {}", bookieAddress); - } else { - LOG.info("Skipping deleting cookie from metadata store for Bookie: {}", bookieAddress); - } - } - } catch (BookieException | IOException e) { - return -1; - } - return 0; - }); + boolean result = cmd.apply(bkConf, flags); + return (result) ? 0 : -1; } } @@ -2220,12 +1787,12 @@ private int listOrDeleteCookies(boolean delete, boolean force) throws Exception * Update ledger command. */ class UpdateLedgerCmd extends MyCommand { - private final Options opts = new Options(); UpdateLedgerCmd() { super(CMD_UPDATELEDGER); opts.addOption("b", "bookieId", true, "Bookie Id"); opts.addOption("s", "updatespersec", true, "Number of ledgers updating per second (default: 5 per sec)"); + opts.addOption("r", "maxOutstandingReads", true, "Max outstanding reads (default: 5 * updatespersec)"); opts.addOption("l", "limit", true, "Maximum number of ledgers to update (default: no limit)"); opts.addOption("v", "verbose", true, "Print status of the ledger updation (default: false)"); opts.addOption("p", "printprogress", true, @@ -2244,12 +1811,31 @@ String getDescription() { @Override String getUsage() { - return "updateledgers -bookieId [-updatespersec N] [-limit N] [-verbose true/false] " - + "[-printprogress N]"; + return "updateledgers Update bookie id in ledgers\n" + + " Usage: updateledgers [options]\n" + + " Options:\n" + + " * -b, --bookieId\n" + + " Bookie Id (param format: `address:port`)\n" + + " -s, --updatespersec\n" + + " Number of ledgers updating per second (default: 5, " + + "param format: `updatespersec`)\n" + + " -r, --maxOutstandingReads\n" + + " Max outstanding reads (default: 5 * updatespersec, " + + "param format: `maxOutstandingReads`)\n" + + " -l, --limit\n" + + " Maximum number of ledgers to update (default: no limit, param format: `limit`)\n" + + " -v, --verbose\n" + + " Print status of the ledger updation (default: false, param format: `verbose`)\n" + + " -p, --printprogress\n" + + " Print messages on every configured seconds if verbose turned on " + + "(default: 10 secs, param format: `printprogress`)"; } @Override int runCmd(CommandLine cmdLine) throws Exception { + FlipBookieIdCommand cmd = new FlipBookieIdCommand(); + FlipBookieIdCommand.FlipBookieIdFlags flags = new FlipBookieIdCommand.FlipBookieIdFlags(); + final String bookieId = cmdLine.getOptionValue("bookieId"); if (StringUtils.isBlank(bookieId)) { LOG.error("Invalid argument list!"); @@ -2262,23 +1848,106 @@ int runCmd(CommandLine cmdLine) throws Exception { return -1; } boolean useHostName = getOptionalValue(bookieId, "hostname"); - if (!bkConf.getUseHostNameAsBookieID() && useHostName) { - LOG.error("Expects configuration useHostNameAsBookieID=true as the option value passed is 'hostname'"); - return -1; - } else if (bkConf.getUseHostNameAsBookieID() && !useHostName) { - LOG.error("Expects configuration useHostNameAsBookieID=false as the option value passed is 'ip'"); - return -1; - } final int rate = getOptionIntValue(cmdLine, "updatespersec", 5); - if (rate <= 0) { - LOG.error("Invalid updatespersec {}, should be > 0", rate); + final int maxOutstandingReads = getOptionIntValue(cmdLine, "maxOutstandingReads", (rate * 5)); + final int limit = getOptionIntValue(cmdLine, "limit", Integer.MIN_VALUE); + final boolean verbose = getOptionBooleanValue(cmdLine, "verbose", false); + final long printprogress; + if (!verbose) { + if (cmdLine.hasOption("printprogress")) { + LOG.warn("Ignoring option 'printprogress', this is applicable when 'verbose' is true"); + } + printprogress = Integer.MIN_VALUE; + } else { + // defaulting to 10 seconds + printprogress = getOptionLongValue(cmdLine, "printprogress", 10); + } + flags.hostname(useHostName); + flags.printProgress(printprogress); + flags.limit(limit); + flags.updatePerSec(rate); + flags.maxOutstandingReads(maxOutstandingReads); + flags.verbose(verbose); + + boolean result = cmd.apply(bkConf, flags); + return (result) ? 0 : -1; + } + } + + /** + * Update bookie into ledger command. + */ + class UpdateBookieInLedgerCmd extends MyCommand { + + UpdateBookieInLedgerCmd() { + super(CMD_UPDATE_BOOKIE_IN_LEDGER); + opts.addOption("sb", "srcBookie", true, "Source bookie which needs to be replaced by destination bookie."); + opts.addOption("db", "destBookie", true, "Destination bookie which replaces source bookie."); + opts.addOption("s", "updatespersec", true, "Number of ledgers updating per second (default: 5 per sec)"); + opts.addOption("r", "maxOutstandingReads", true, "Max outstanding reads (default: 5 * updatespersec)"); + opts.addOption("l", "limit", true, "Maximum number of ledgers to update (default: no limit)"); + opts.addOption("v", "verbose", true, "Print status of the ledger updation (default: false)"); + opts.addOption("p", "printprogress", true, + "Print messages on every configured seconds if verbose turned on (default: 10 secs)"); + } + + @Override + Options getOptions() { + return opts; + } + + @Override + String getDescription() { + return "Replace bookie in ledger metadata. (useful when re-ip of host) " + + "replace srcBookie with destBookie. (this may take a long time)."; + } + + @Override + String getUsage() { + return "updateBookieInLedger Replace bookie in ledger metadata. (useful when re-ip of host) " + + "replace srcBookie with destBookie. (this may take a long time)\n" + + " Usage: updateBookieInLedger [options]\n" + + " Options:\n" + + " * -sb, --srcBookie\n" + + " Source bookie which needs to be replaced by destination bookie " + + "(param format: `address:port`)\n" + + " * -db, --destBookie\n" + + " Destination bookie which replaces source bookie (param format: `address:port`)\n" + + " -s, --updatespersec\n" + + " Number of ledgers updating per second (default: 5, " + + "param format: `updatesPerSec`)\n" + + " -r, --maxOutstandingReads\n" + + " Max outstanding reads (default: 5 * updatespersec, " + + "param format: `maxOutstandingReads`)\n" + + " -l, --limit\n" + + " Maximum number of ledgers to update (default: no limit, param format: `limit`)\n" + + " -v, --verbose\n" + + " Print status of the ledger updation (default: false, param format: `verbose`)\n" + + " -p, --printprogress\n" + + " Print messages on every configured seconds if verbose turned on (default: 10, " + + "param format: `printprogress`)"; + } + + @Override + int runCmd(CommandLine cmdLine) throws Exception { + UpdateBookieInLedgerCommand cmd = new UpdateBookieInLedgerCommand(); + UpdateBookieInLedgerCommand.UpdateBookieInLedgerFlags flags = + new UpdateBookieInLedgerCommand.UpdateBookieInLedgerFlags(); + + final String srcBookie = cmdLine.getOptionValue("srcBookie"); + final String destBookie = cmdLine.getOptionValue("destBookie"); + if (StringUtils.isBlank(srcBookie) || StringUtils.isBlank(destBookie)) { + LOG.error("Invalid argument list (srcBookie and destBookie must be provided)!"); + this.printUsage(); return -1; } - final int limit = getOptionIntValue(cmdLine, "limit", Integer.MIN_VALUE); - if (limit <= 0 && limit != Integer.MIN_VALUE) { - LOG.error("Invalid limit {}, should be > 0", limit); + if (StringUtils.equals(srcBookie, destBookie)) { + LOG.error("srcBookie and destBookie can't be the same."); return -1; } + final int rate = getOptionIntValue(cmdLine, "updatespersec", 5); + final int maxOutstandingReads = getOptionIntValue(cmdLine, "maxOutstandingReads", (rate * 5)); + final int limit = getOptionIntValue(cmdLine, "limit", Integer.MIN_VALUE); final boolean verbose = getOptionBooleanValue(cmdLine, "verbose", false); final long printprogress; if (!verbose) { @@ -2290,37 +1959,16 @@ int runCmd(CommandLine cmdLine) throws Exception { // defaulting to 10 seconds printprogress = getOptionLongValue(cmdLine, "printprogress", 10); } - final ClientConfiguration conf = new ClientConfiguration(); - conf.addConfiguration(bkConf); - final BookKeeper bk = new BookKeeper(conf); - final BookKeeperAdmin admin = new BookKeeperAdmin(conf); - final UpdateLedgerOp updateLedgerOp = new UpdateLedgerOp(bk, admin); - final ServerConfiguration serverConf = new ServerConfiguration(bkConf); - final BookieSocketAddress newBookieId = Bookie.getBookieAddress(serverConf); - serverConf.setUseHostNameAsBookieID(!useHostName); - final BookieSocketAddress oldBookieId = Bookie.getBookieAddress(serverConf); - - UpdateLedgerNotifier progressable = new UpdateLedgerNotifier() { - long lastReport = System.nanoTime(); - - @Override - public void progress(long updated, long issued) { - if (printprogress <= 0) { - return; // disabled - } - if (TimeUnit.MILLISECONDS.toSeconds(MathUtils.elapsedMSec(lastReport)) >= printprogress) { - LOG.info("Number of ledgers issued={}, updated={}", issued, updated); - lastReport = MathUtils.nowInNano(); - } - } - }; - try { - updateLedgerOp.updateBookieIdInLedgers(oldBookieId, newBookieId, rate, limit, progressable); - } catch (IOException e) { - LOG.error("Failed to update ledger metadata", e); - return -1; - } - return 0; + flags.srcBookie(srcBookie); + flags.destBookie(destBookie); + flags.printProgress(printprogress); + flags.limit(limit); + flags.updatePerSec(rate); + flags.maxOutstandingReads(maxOutstandingReads); + flags.verbose(verbose); + + boolean result = cmd.apply(bkConf, flags); + return (result) ? 0 : -1; } } @@ -2328,42 +1976,22 @@ public void progress(long updated, long issued) { * Command to delete a given ledger. */ class DeleteLedgerCmd extends MyCommand { - Options lOpts = new Options(); DeleteLedgerCmd() { super(CMD_DELETELEDGER); - lOpts.addOption("l", "ledgerid", true, "Ledger ID"); - lOpts.addOption("f", "force", false, "Whether to force delete the Ledger without prompt..?"); + opts.addOption("l", "ledgerid", true, "Ledger ID"); + opts.addOption("f", "force", false, "Whether to force delete the Ledger without prompt..?"); } @Override public int runCmd(CommandLine cmdLine) throws Exception { final long lid = getOptionLedgerIdValue(cmdLine, "ledgerid", -1); - if (lid == -1) { - System.err.println("Must specify a ledger id"); - return -1; - } boolean force = cmdLine.hasOption("f"); - boolean confirm = false; - if (!force) { - confirm = IOUtils.confirmPrompt( - "Are you sure to delete Ledger : " + ledgerIdFormatter.formatLedgerId(lid) + "?"); - } - - BookKeeper bk = null; - try { - if (force || confirm) { - ClientConfiguration conf = new ClientConfiguration(); - conf.addConfiguration(bkConf); - bk = new BookKeeper(conf); - bk.deleteLedger(lid); - } - } finally { - if (bk != null) { - bk.close(); - } - } + DeleteLedgerCommand cmd = new DeleteLedgerCommand(ledgerIdFormatter); + DeleteLedgerCommand.DeleteLedgerFlags flags = new DeleteLedgerCommand.DeleteLedgerFlags() + .ledgerId(lid).force(force); + cmd.apply(bkConf, flags); return 0; } @@ -2375,12 +2003,18 @@ String getDescription() { @Override String getUsage() { - return "deleteledger -ledgerid [-force]"; + return "deleteledger Delete a ledger\n" + + " Usage: deleteledger [options]\n" + + " Options:\n" + + " * -l, --ledgerid\n" + + " Ledger ID (param format: `ledgerId`)\n" + + " * -f, --force\n" + + " Whether to force delete the Ledger without prompt"; } @Override Options getOptions() { - return lOpts; + return opts; } } @@ -2389,7 +2023,6 @@ Options getOptions() { * the bookies in the cluster. */ class BookieInfoCmd extends MyCommand { - Options lOpts = new Options(); BookieInfoCmd() { super(CMD_BOOKIEINFO); @@ -2402,54 +2035,21 @@ String getDescription() { @Override String getUsage() { - return "bookieinfo"; + return "bookieinfo Retrieve bookie info such as free and total disk space," + + "bookieinfo requires no options," + + "use the default conf or re-specify BOOKIE_CONF \n" + + " Usage: bookieinfo"; } @Override Options getOptions() { - return lOpts; - } - - String getReadable(long val) { - String unit[] = {"", "KB", "MB", "GB", "TB"}; - int cnt = 0; - double d = val; - while (d >= 1000 && cnt < unit.length - 1) { - d = d / 1000; - cnt++; - } - DecimalFormat df = new DecimalFormat("#.###"); - df.setRoundingMode(RoundingMode.DOWN); - return cnt > 0 ? "(" + df.format(d) + unit[cnt] + ")" : unit[cnt]; + return opts; } @Override public int runCmd(CommandLine cmdLine) throws Exception { - ClientConfiguration clientConf = new ClientConfiguration(bkConf); - clientConf.setDiskWeightBasedPlacementEnabled(true); - BookKeeper bk = new BookKeeper(clientConf); - - Map map = bk.getBookieInfo(); - if (map.size() == 0) { - System.out.println("Failed to retrieve bookie information from any of the bookies"); - bk.close(); - return 0; - } - - System.out.println("Free disk space info:"); - long totalFree = 0, total = 0; - for (Map.Entry e : map.entrySet()) { - BookieInfo bInfo = e.getValue(); - BookieSocketAddress bookieId = e.getKey(); - System.out.println(getBookieSocketAddrStringRepresentation(bookieId) + ":\tFree: " - + bInfo.getFreeDiskSpace() + getReadable(bInfo.getFreeDiskSpace()) + "\tTotal: " - + bInfo.getTotalDiskSpace() + getReadable(bInfo.getTotalDiskSpace())); - totalFree += bInfo.getFreeDiskSpace(); - total += bInfo.getTotalDiskSpace(); - } - System.out.println("Total free disk space in the cluster:\t" + totalFree + getReadable(totalFree)); - System.out.println("Total disk capacity in the cluster:\t" + total + getReadable(total)); - bk.close(); + InfoCommand cmd = new InfoCommand(); + cmd.apply(bkConf, new CliFlags()); return 0; } } @@ -2458,7 +2058,6 @@ public int runCmd(CommandLine cmdLine) throws Exception { * Command to trigger AuditTask by resetting lostBookieRecoveryDelay to its current value. */ class TriggerAuditCmd extends MyCommand { - Options opts = new Options(); TriggerAuditCmd() { super(CMD_TRIGGERAUDIT); @@ -2471,7 +2070,9 @@ String getDescription() { @Override String getUsage() { - return CMD_TRIGGERAUDIT; + return "triggeraudit Force trigger the Audit by resetting the lostBookieRecoveryDelay, " + + "triggeraudit requires no options,use the default conf or re-specify BOOKIE_CONF \n" + + " Usage: triggeraudit"; } @Override @@ -2481,85 +2082,180 @@ Options getOptions() { @Override public int runCmd(CommandLine cmdLine) throws Exception { - ClientConfiguration adminConf = new ClientConfiguration(bkConf); - BookKeeperAdmin admin = new BookKeeperAdmin(adminConf); - try { - admin.triggerAudit(); - } finally { - if (admin != null) { - admin.close(); - } - } + TriggerAuditCommand cmd = new TriggerAuditCommand(); + cmd.apply(bkConf, new CliFlags()); return 0; } } - /** - * Command to trigger AuditTask by resetting lostBookieRecoveryDelay and - * then make sure the ledgers stored in the bookie are properly replicated - * and Cookie of the decommissioned bookie should be deleted from metadata - * server. - */ - class DecommissionBookieCmd extends MyCommand { - Options lOpts = new Options(); + class ForceAuditorChecksCmd extends MyCommand { - DecommissionBookieCmd() { - super(CMD_DECOMMISSIONBOOKIE); - lOpts.addOption("bookieid", true, "decommission a remote bookie"); + ForceAuditorChecksCmd() { + super(CMD_FORCEAUDITCHECKS); + opts.addOption("calc", "checkallledgerscheck", false, "Force checkAllLedgers audit " + + "upon next Auditor startup "); + opts.addOption("ppc", "placementpolicycheck", false, "Force placementPolicyCheck audit " + + "upon next Auditor startup "); + opts.addOption("rc", "replicascheck", false, "Force replicasCheck audit " + + "upon next Auditor startup "); } @Override - String getDescription() { - return "Force trigger the Audittask and make sure all the ledgers stored in the decommissioning bookie" - + " are replicated and cookie of the decommissioned bookie is deleted from metadata server."; + Options getOptions() { + return opts; } @Override - String getUsage() { - return CMD_DECOMMISSIONBOOKIE + " [-bookieid ]"; + String getDescription() { + return "Reset the last run time of auditor checks " + + "(checkallledgerscheck, placementpolicycheck, replicascheck) " + + "The current auditor must be REBOOTED after this command is run."; } @Override - Options getOptions() { - return lOpts; + String getUsage() { + return "forceauditchecks Reset the last run time of auditor checks " + + "(checkallledgerscheck, placementpolicycheck, replicascheck) " + + "The current auditor must be REBOOTED after this command is run" + + " Usage: forceauditchecks [options]\n" + + " Options:\n" + + " * -calc, --checkallledgerscheck\n" + + " Force checkAllLedgers audit upon next Auditor startup\n" + + " * -ppc, --placementpolicycheck\n" + + " Force placementPolicyCheck audit upon next Auditor startup\n" + + " * -rc, --replicascheck\n" + + " Force replicasCheck audit upon next Auditor startup"; } @Override - public int runCmd(CommandLine cmdLine) throws Exception { - ClientConfiguration adminConf = new ClientConfiguration(bkConf); - BookKeeperAdmin admin = new BookKeeperAdmin(adminConf); - try { - final String remoteBookieidToDecommission = cmdLine.getOptionValue("bookieid"); - final BookieSocketAddress bookieAddressToDecommission = (StringUtils - .isBlank(remoteBookieidToDecommission) ? Bookie.getBookieAddress(bkConf) - : new BookieSocketAddress(remoteBookieidToDecommission)); - admin.decommissionBookie(bookieAddressToDecommission); - LOG.info("The ledgers stored in the given decommissioning bookie: {} are properly replicated", - bookieAddressToDecommission); - runFunctionWithRegistrationManager(bkConf, rm -> { + int runCmd(CommandLine cmdLine) throws Exception { + boolean checkAllLedgersCheck = cmdLine.hasOption("calc"); + boolean placementPolicyCheck = cmdLine.hasOption("ppc"); + boolean replicasCheck = cmdLine.hasOption("rc"); + + if (checkAllLedgersCheck || placementPolicyCheck || replicasCheck) { + runFunctionWithLedgerManagerFactory(bkConf, mFactory -> { try { - Versioned cookie = Cookie.readFromRegistrationManager(rm, bookieAddressToDecommission); - cookie.getValue().deleteFromRegistrationManager(rm, bookieAddressToDecommission, - cookie.getVersion()); - } catch (CookieNotFoundException nne) { - LOG.warn("No cookie to remove for the decommissioning bookie: {}, it could be deleted already", - bookieAddressToDecommission, nne); - } catch (BookieException be) { - throw new UncheckedExecutionException(be.getMessage(), be); + try (LedgerUnderreplicationManager underreplicationManager = + mFactory.newLedgerUnderreplicationManager()) { + // Arbitrary value of 21 days chosen since current freq of all checks is less than 21 days + long time = System.currentTimeMillis() - (21 * 24 * 60 * 60 * 1000); + if (checkAllLedgersCheck) { + LOG.info("Resetting CheckAllLedgersCTime to : " + new Timestamp(time)); + underreplicationManager.setCheckAllLedgersCTime(time); + } + if (placementPolicyCheck) { + LOG.info("Resetting PlacementPolicyCheckCTime to : " + new Timestamp(time)); + underreplicationManager.setPlacementPolicyCheckCTime(time); + } + if (replicasCheck) { + LOG.info("Resetting ReplicasCheckCTime to : " + new Timestamp(time)); + underreplicationManager.setReplicasCheckCTime(time); + } + } + } catch (InterruptedException | ReplicationException e) { + LOG.error("Exception while trying to reset last run time ", e); + return -1; } return 0; }); - LOG.info("Cookie of the decommissioned bookie: {} is deleted successfully", - bookieAddressToDecommission); - return 0; - } catch (Exception e) { - LOG.error("Received exception in DecommissionBookieCmd ", e); + } else { + LOG.error("Command line args must contain atleast one type of check. This was a no-op."); return -1; - } finally { - if (admin != null) { - admin.close(); - } } + return 0; + } + } + + /** + * Command to trigger AuditTask by resetting lostBookieRecoveryDelay and + * then make sure the ledgers stored in the bookie are properly replicated + * and Cookie of the decommissioned bookie should be deleted from metadata + * server. + */ + class DecommissionBookieCmd extends MyCommand { + + DecommissionBookieCmd() { + super(CMD_DECOMMISSIONBOOKIE); + opts.addOption("bookieid", true, "decommission a remote bookie"); + } + + @Override + String getDescription() { + return "Force trigger the Audittask and make sure all the ledgers stored in the decommissioning bookie" + + " are replicated and cookie of the decommissioned bookie is deleted from metadata server."; + } + + @Override + String getUsage() { + return "decommissionbookie Force trigger the Audittask and make sure all the ledgers stored in the " + + "decommissioning bookie " + "are replicated and cookie of the decommissioned bookie is deleted " + + "from metadata server.\n" + + " Usage: decommissionbookie [options]\n" + + " Options:\n" + + " * -bookieid\n" + + " Decommission a remote bookie (param format: `address:port`)"; + } + + @Override + Options getOptions() { + return opts; + } + + @Override + public int runCmd(CommandLine cmdLine) throws Exception { + DecommissionCommand cmd = new DecommissionCommand(); + DecommissionCommand.DecommissionFlags flags = new DecommissionCommand.DecommissionFlags(); + final String remoteBookieidToDecommission = cmdLine.getOptionValue("bookieid"); + flags.remoteBookieIdToDecommission(remoteBookieidToDecommission); + boolean result = cmd.apply(bkConf, flags); + return (result) ? 0 : -1; + } + } + + /** + * Command to retrieve remote bookie endpoint information. + */ + class EndpointInfoCmd extends MyCommand { + + EndpointInfoCmd() { + super(CMD_ENDPOINTINFO); + opts.addOption("b", "bookieid", true, "Bookie Id"); + } + + @Override + String getDescription() { + return "Get info about a remote bookie with a specific bookie address (bookieid)"; + } + + @Override + String getUsage() { + return "endpointinfo Get info about a remote bookie with a specific bookie\n" + + " Usage: endpointinfo [options]\n" + + " Options:\n" + + " * -b, --bookieid\n" + + " Bookie Id (param format: `address:port`)"; + } + + @Override + Options getOptions() { + return opts; + } + + @Override + public int runCmd(CommandLine cmdLine) throws Exception { + EndpointInfoCommand cmd = new EndpointInfoCommand(); + EndpointInfoCommand.EndpointInfoFlags flags = new EndpointInfoCommand.EndpointInfoFlags(); + final String bookieId = cmdLine.getOptionValue("bookieid"); + flags.bookie(bookieId); + if (StringUtils.isBlank(bookieId)) { + LOG.error("Invalid argument list!"); + this.printUsage(); + return -1; + } + + boolean result = cmd.apply(bkConf, flags); + return (result) ? 0 : -1; } } @@ -2570,12 +2266,10 @@ public interface UpdateLedgerNotifier { void progress(long updated, long issued); } - /** * Convert bookie indexes from InterleavedStorage to DbLedgerStorage format. */ class ConvertToDbStorageCmd extends MyCommand { - Options opts = new Options(); public ConvertToDbStorageCmd() { super(CMD_CONVERT_TO_DB_STORAGE); @@ -2593,88 +2287,62 @@ String getDescription() { @Override String getUsage() { - return CMD_CONVERT_TO_DB_STORAGE; + return "convert-to-db-storage Convert bookie indexes from InterleavedStorage to DbLedgerStorage\n" + + " Usage: convert-to-db-storage\n"; } @Override int runCmd(CommandLine cmdLine) throws Exception { - LOG.info("=== Converting to DbLedgerStorage ==="); - ServerConfiguration conf = new ServerConfiguration(bkConf); - LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(bkConf, bkConf.getLedgerDirs(), - new DiskChecker(bkConf.getDiskUsageThreshold(), bkConf.getDiskUsageWarnThreshold())); - LedgerDirsManager ledgerIndexManager = new LedgerDirsManager(bkConf, bkConf.getLedgerDirs(), - new DiskChecker(bkConf.getDiskUsageThreshold(), bkConf.getDiskUsageWarnThreshold())); - - InterleavedLedgerStorage interleavedStorage = new InterleavedLedgerStorage(); - DbLedgerStorage dbStorage = new DbLedgerStorage(); - - CheckpointSource checkpointSource = new CheckpointSource() { - @Override - public Checkpoint newCheckpoint() { - return Checkpoint.MAX; - } - - @Override - public void checkpointComplete(Checkpoint checkpoint, boolean compact) - throws IOException { - } - }; - Checkpointer checkpointer = new Checkpointer() { - @Override - public void startCheckpoint(Checkpoint checkpoint) { - // No-op - } - - @Override - public void start() { - // no-op - } - }; - - interleavedStorage.initialize(conf, null, ledgerDirsManager, ledgerIndexManager, - null, checkpointSource, checkpointer, NullStatsLogger.INSTANCE); - dbStorage.initialize(conf, null, ledgerDirsManager, ledgerIndexManager, null, - checkpointSource, checkpointer, NullStatsLogger.INSTANCE); - - int convertedLedgers = 0; - for (long ledgerId : interleavedStorage.getActiveLedgersInRange(0, Long.MAX_VALUE)) { - if (LOG.isDebugEnabled()) { - LOG.debug("Converting ledger {}", ledgerIdFormatter.formatLedgerId(ledgerId)); - } - - FileInfo fi = getFileInfo(ledgerId); + ConvertToDBStorageCommand cmd = new ConvertToDBStorageCommand(); + ConvertToDBStorageCommand.CTDBFlags flags = new ConvertToDBStorageCommand.CTDBFlags(); + cmd.setLedgerIdFormatter(ledgerIdFormatter); + cmd.apply(bkConf, flags); + return 0; + } + } - Iterable> entries = getLedgerIndexEntries(ledgerId); + /** + * Convert bookie indexes from DbLedgerStorage to InterleavedStorage format. + */ + class ConvertToInterleavedStorageCmd extends MyCommand { - long numberOfEntries = dbStorage.addLedgerToIndex(ledgerId, fi.isFenced(), fi.getMasterKey(), entries); - if (LOG.isDebugEnabled()) { - LOG.debug(" -- done. fenced={} entries={}", fi.isFenced(), numberOfEntries); - } + public ConvertToInterleavedStorageCmd() { + super(CMD_CONVERT_TO_INTERLEAVED_STORAGE); + } - // Remove index from old storage - interleavedStorage.deleteLedger(ledgerId); + @Override + Options getOptions() { + return opts; + } - if (++convertedLedgers % 1000 == 0) { - LOG.info("Converted {} ledgers", convertedLedgers); - } - } + @Override + String getDescription() { + return "Convert bookie indexes from DbLedgerStorage to InterleavedStorage format"; + } - dbStorage.shutdown(); - interleavedStorage.shutdown(); + @Override + String getUsage() { + return "convert-to-interleaved-storage " + + "Convert bookie indexes from DbLedgerStorage to InterleavedStorage\n" + + " Usage: convert-to-interleaved-storage"; + } - LOG.info("---- Done Converting ----"); + @Override + int runCmd(CommandLine cmdLine) throws Exception { + ConvertToInterleavedStorageCommand cmd = new ConvertToInterleavedStorageCommand(); + ConvertToInterleavedStorageCommand.CTISFlags flags = new ConvertToInterleavedStorageCommand.CTISFlags(); + cmd.apply(bkConf, flags); return 0; } } /** - * Convert bookie indexes from DbLedgerStorage to InterleavedStorage format. + * Rebuild DbLedgerStorage locations index. */ - class ConvertToInterleavedStorageCmd extends MyCommand { - Options opts = new Options(); + class RebuildDbLedgerLocationsIndexCmd extends MyCommand { - public ConvertToInterleavedStorageCmd() { - super(CMD_CONVERT_TO_INTERLEAVED_STORAGE); + public RebuildDbLedgerLocationsIndexCmd() { + super(CMD_REBUILD_DB_LEDGER_LOCATIONS_INDEX); } @Override @@ -2684,111 +2352,135 @@ Options getOptions() { @Override String getDescription() { - return "Convert bookie indexes from DbLedgerStorage to InterleavedStorage format"; + return "Rebuild DbLedgerStorage locations index by scanning the entry logs"; } @Override String getUsage() { - return CMD_CONVERT_TO_INTERLEAVED_STORAGE; + return "rebuild-db-ledger-locations-index Rebuild DbLedgerStorage locations index by scanning " + + "the entry logs, rebuild-db-ledger-locations-index requires no options,use the default conf " + + "or re-specify BOOKIE_CONF \n" + + " Usage: rebuild-db-ledger-locations-index"; } @Override int runCmd(CommandLine cmdLine) throws Exception { - LOG.info("=== Converting DbLedgerStorage ==="); - ServerConfiguration conf = new ServerConfiguration(bkConf); - LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(bkConf, bkConf.getLedgerDirs(), - new DiskChecker(bkConf.getDiskUsageThreshold(), bkConf.getDiskUsageWarnThreshold())); - LedgerDirsManager ledgerIndexManager = new LedgerDirsManager(bkConf, bkConf.getLedgerDirs(), - new DiskChecker(bkConf.getDiskUsageThreshold(), bkConf.getDiskUsageWarnThreshold())); - - DbLedgerStorage dbStorage = new DbLedgerStorage(); - InterleavedLedgerStorage interleavedStorage = new InterleavedLedgerStorage(); - - CheckpointSource checkpointSource = new CheckpointSource() { - @Override - public Checkpoint newCheckpoint() { - return Checkpoint.MAX; - } + RebuildDBLedgerLocationsIndexCommand cmd = new RebuildDBLedgerLocationsIndexCommand(); + cmd.apply(bkConf, new CliFlags()); + return 0; + } + } - @Override - public void checkpointComplete(Checkpoint checkpoint, boolean compact) - throws IOException { - } - }; - Checkpointer checkpointer = new Checkpointer() { - @Override - public void startCheckpoint(Checkpoint checkpoint) { - // No-op - } + /** + * Rebuild DbLedgerStorage ledgers index. + */ + class RebuildDbLedgersIndexCmd extends MyCommand { - @Override - public void start() { - // no-op - } - }; - - dbStorage.initialize(conf, null, ledgerDirsManager, ledgerIndexManager, null, - checkpointSource, checkpointer, NullStatsLogger.INSTANCE); - interleavedStorage.initialize(conf, null, ledgerDirsManager, ledgerIndexManager, - null, checkpointSource, checkpointer, NullStatsLogger.INSTANCE); - LedgerCache interleavedLedgerCache = interleavedStorage.ledgerCache; - - int convertedLedgers = 0; - for (long ledgerId : dbStorage.getActiveLedgersInRange(0, Long.MAX_VALUE)) { - if (LOG.isDebugEnabled()) { - LOG.debug("Converting ledger {}", ledgerIdFormatter.formatLedgerId(ledgerId)); - } + public RebuildDbLedgersIndexCmd() { + super(CMD_REBUILD_DB_LEDGERS_INDEX); + opts.addOption("v", "verbose", false, "Verbose logging, print the ledgers added to the new index"); + } - interleavedStorage.setMasterKey(ledgerId, dbStorage.readMasterKey(ledgerId)); - if (dbStorage.isFenced(ledgerId)) { - interleavedStorage.setFenced(ledgerId); - } + @Override + Options getOptions() { + return opts; + } - long lastEntryInLedger = dbStorage.getLastEntryInLedger(ledgerId); - for (long entryId = 0; entryId <= lastEntryInLedger; entryId++) { - try { - long location = dbStorage.getLocation(ledgerId, entryId); - if (location != 0L) { - interleavedLedgerCache.putEntryOffset(ledgerId, entryId, location); - } - } catch (Bookie.NoEntryException e) { - // Ignore entry - } - } + @Override + String getDescription() { + return "Rebuild DbLedgerStorage ledgers index by scanning " + + "the journal and entry logs (sets all ledgers to fenced)"; + } - if (++convertedLedgers % 1000 == 0) { - LOG.info("Converted {} ledgers", convertedLedgers); - } + @Override + String getUsage() { + return "rebuild-db-ledgers-index Rebuild DbLedgerStorage ledgers index by scanning the journal " + + "and entry logs (sets all ledgers to fenced)\n" + + " Usage: rebuild-db-ledgers-index [options]\n" + + " Options:\n" + + " -v, --verbose\n" + + " Verbose logging, print the ledgers added to the new index"; + } + + @Override + int runCmd(CommandLine cmdLine) throws Exception { + RebuildDBLedgersIndexCommand.RebuildLedgersIndexFlags flags = + new RebuildDBLedgersIndexCommand.RebuildLedgersIndexFlags(); + flags.verbose(cmdLine.hasOption("v")); + RebuildDBLedgersIndexCommand cmd = new RebuildDBLedgersIndexCommand(); + if (cmd.apply(bkConf, flags)) { + return 0; + } else { + return -1; } + } + } - dbStorage.shutdown(); + /** + * Rebuild DbLedgerStorage ledgers index. + */ + class CheckDbLedgersIndexCmd extends MyCommand { - interleavedLedgerCache.flushLedger(true); - interleavedStorage.flush(); - interleavedStorage.shutdown(); + public CheckDbLedgersIndexCmd() { + super(CMD_CHECK_DB_LEDGERS_INDEX); + opts.addOption("v", "verbose", false, "Verbose logging, print the ledger data in the index."); + } - String baseDir = ledgerDirsManager.getAllLedgerDirs().get(0).toString(); + @Override + Options getOptions() { + return opts; + } - // Rename databases and keep backup - Files.move(FileSystems.getDefault().getPath(baseDir, "ledgers"), - FileSystems.getDefault().getPath(baseDir, "ledgers.backup")); + @Override + String getDescription() { + return "Check DbLedgerStorage ledgers index by performing a read scan"; + } - Files.move(FileSystems.getDefault().getPath(baseDir, "locations"), - FileSystems.getDefault().getPath(baseDir, "locations.backup")); + @Override + String getUsage() { + return "check-db-ledgers-index Check DbLedgerStorage ledgers index by performing a read scan\n" + + " Usage: check-db-ledgers-index [options]\n" + + " Options:\n" + + " -v, --verbose\n" + + " Verbose logging, print the ledger data in the index"; + } - LOG.info("---- Done Converting {} ledgers ----", convertedLedgers); - return 0; + @Override + int runCmd(CommandLine cmdLine) throws Exception { + CheckDBLedgersIndexCommand.CheckLedgersIndexFlags flags = + new CheckDBLedgersIndexCommand.CheckLedgersIndexFlags(); + flags.verbose(cmdLine.hasOption("v")); + CheckDBLedgersIndexCommand cmd = new CheckDBLedgersIndexCommand(); + if (cmd.apply(bkConf, flags)) { + return 0; + } else { + return -1; + } } } /** - * Rebuild DbLedgerStorage locations index. + * Regenerate an index file for interleaved storage. */ - class RebuildDbLedgerLocationsIndexCmd extends MyCommand { - Options opts = new Options(); + class RegenerateInterleavedStorageIndexFile extends MyCommand { - public RebuildDbLedgerLocationsIndexCmd() { - super(CMD_REBUILD_DB_LEDGER_LOCATIONS_INDEX); + public RegenerateInterleavedStorageIndexFile() { + super(CMD_REGENERATE_INTERLEAVED_STORAGE_INDEX_FILE); + Option ledgerOption = new Option("l", "ledgerIds", true, + "Ledger(s) whose index needs to be regenerated." + + " Multiple can be specified, comma separated."); + ledgerOption.setRequired(true); + ledgerOption.setValueSeparator(','); + ledgerOption.setArgs(Option.UNLIMITED_VALUES); + + opts.addOption(ledgerOption); + opts.addOption("dryRun", false, + "Process the entryLogger, but don't write anything."); + opts.addOption("password", true, + "The bookie stores the password in the index file, so we need it to regenerate. " + + "This must match the value in the ledger metadata."); + opts.addOption("b64password", true, + "The password in base64 encoding, for cases where the password is not UTF-8."); } @Override @@ -2798,25 +2490,80 @@ Options getOptions() { @Override String getDescription() { - return "Rebuild DbLedgerStorage locations index by scanning the entry logs"; + return "Regenerate an interleaved storage index file, from available entrylogger files."; } @Override String getUsage() { - return CMD_REBUILD_DB_LEDGER_LOCATIONS_INDEX; + return "regenerate-interleaved-storage-index-file Regenerate an interleaved storage index file, " + + "from available entrylogger files\n" + + " Usage: regenerate-interleaved-storage-index-file [options]\n" + + " Options:\n" + + " * -l, --ledgerIds\n" + + " Ledger(s) whose index needs to be regenerated (param format: `l1,...,lN`)\n" + + " -dryRun\n" + + " Process the entryLogger, but don't write anything\n" + + " -password\n" + + " The bookie stores the password in the index file, so we need it to regenerate " + + "(param format: `ledgerPassword`)\n" + + " -b64password\n" + + " The password in base64 encoding (param format: `ledgerB64Password`)"; } @Override int runCmd(CommandLine cmdLine) throws Exception { - LOG.info("=== Rebuilding bookie index ==="); - ServerConfiguration conf = new ServerConfiguration(bkConf); - new LocationsIndexRebuildOp(conf).initiate(); - LOG.info("-- Done rebuilding bookie index --"); + RegenerateInterleavedStorageIndexFileCommand cmd = new RegenerateInterleavedStorageIndexFileCommand(); + RegenerateInterleavedStorageIndexFileCommand.RISIFFlags + flags = new RegenerateInterleavedStorageIndexFileCommand.RISIFFlags(); + List ledgerIds = Arrays.stream(cmdLine.getOptionValues("ledgerIds")).map((id) -> Long.parseLong(id)) + .collect(Collectors.toList()); + boolean dryRun = cmdLine.hasOption("dryRun"); + flags.ledgerIds(ledgerIds); + if (cmdLine.hasOption("password")) { + flags.password(cmdLine.getOptionValue("password")); + } else if (cmdLine.hasOption("b64password")) { + flags.b64Password(cmdLine.getOptionValue("b64password")); + } + flags.dryRun(dryRun); + cmd.apply(bkConf, flags); + return 0; + } + } + + /* + * Command to exposes the current info about the cluster of bookies. + */ + class ClusterInfoCmd extends MyCommand { + ClusterInfoCmd() { + super(CMD_CLUSTERINFO); + } + + @Override + String getDescription() { + return "Exposes the current info about the cluster of bookies."; + } + + @Override + String getUsage() { + return "clusterinfo Exposes the current info about the cluster of bookies\n" + + " Usage: clusterinfo"; + } + + @Override + Options getOptions() { + return opts; + } + + @Override + int runCmd(CommandLine cmdLine) throws Exception { + ClusterInfoCommand cmd = newClusterInfoCommand(); + cmd.apply(bkConf, new CliFlags()); return 0; } } - final Map commands = new HashMap(); + + final Map commands = new HashMap<>(); { commands.put(CMD_METAFORMAT, new MetaFormatCmd()); @@ -2828,10 +2575,12 @@ int runCmd(CommandLine cmdLine) throws Exception { commands.put(CMD_LEDGER, new LedgerCmd()); commands.put(CMD_READ_LEDGER_ENTRIES, new ReadLedgerEntriesCmd()); commands.put(CMD_LISTLEDGERS, new ListLedgersCmd()); + commands.put(CMD_ACTIVE_LEDGERS_ON_ENTRY_LOG_FILE, new ListActiveLedgersCmd()); commands.put(CMD_LISTUNDERREPLICATED, new ListUnderreplicatedCmd()); commands.put(CMD_WHOISAUDITOR, new WhoIsAuditorCmd()); commands.put(CMD_WHATISINSTANCEID, new WhatIsInstanceId()); commands.put(CMD_LEDGERMETADATA, new LedgerMetadataCmd()); + commands.put(CMD_LOCALCONSISTENCYCHECK, new LocalConsistencyCheck()); commands.put(CMD_SIMPLETEST, new SimpleTestCmd()); commands.put(CMD_BOOKIESANITYTEST, new BookieSanityTestCmd()); commands.put(CMD_READLOG, new ReadLogCmd()); @@ -2839,30 +2588,49 @@ int runCmd(CommandLine cmdLine) throws Exception { commands.put(CMD_READJOURNAL, new ReadJournalCmd()); commands.put(CMD_LASTMARK, new LastMarkCmd()); commands.put(CMD_AUTORECOVERY, new AutoRecoveryCmd()); + commands.put(CMD_QUERY_AUTORECOVERY_STATUS, new QueryAutoRecoveryStatusCmd()); commands.put(CMD_LISTBOOKIES, new ListBookiesCmd()); commands.put(CMD_LISTFILESONDISC, new ListDiskFilesCmd()); commands.put(CMD_UPDATECOOKIE, new UpdateCookieCmd()); commands.put(CMD_UPDATELEDGER, new UpdateLedgerCmd()); + commands.put(CMD_UPDATE_BOOKIE_IN_LEDGER, new UpdateBookieInLedgerCmd()); commands.put(CMD_DELETELEDGER, new DeleteLedgerCmd()); commands.put(CMD_BOOKIEINFO, new BookieInfoCmd()); + commands.put(CMD_CLUSTERINFO, new ClusterInfoCmd()); commands.put(CMD_DECOMMISSIONBOOKIE, new DecommissionBookieCmd()); + commands.put(CMD_ENDPOINTINFO, new EndpointInfoCmd()); commands.put(CMD_CONVERT_TO_DB_STORAGE, new ConvertToDbStorageCmd()); commands.put(CMD_CONVERT_TO_INTERLEAVED_STORAGE, new ConvertToInterleavedStorageCmd()); commands.put(CMD_REBUILD_DB_LEDGER_LOCATIONS_INDEX, new RebuildDbLedgerLocationsIndexCmd()); + commands.put(CMD_REBUILD_DB_LEDGERS_INDEX, new RebuildDbLedgersIndexCmd()); + commands.put(CMD_CHECK_DB_LEDGERS_INDEX, new CheckDbLedgersIndexCmd()); + commands.put(CMD_REGENERATE_INTERLEAVED_STORAGE_INDEX_FILE, new RegenerateInterleavedStorageIndexFile()); commands.put(CMD_HELP, new HelpCmd()); commands.put(CMD_LOSTBOOKIERECOVERYDELAY, new LostBookieRecoveryDelayCmd()); commands.put(CMD_TRIGGERAUDIT, new TriggerAuditCmd()); + commands.put(CMD_FORCEAUDITCHECKS, new ForceAuditorChecksCmd()); + // cookie related commands + commands.put(CMD_CREATE_COOKIE, + new CreateCookieCommand().asShellCommand(CMD_CREATE_COOKIE, bkConf)); + commands.put(CMD_DELETE_COOKIE, + new DeleteCookieCommand().asShellCommand(CMD_DELETE_COOKIE, bkConf)); + commands.put(CMD_UPDATE_COOKIE, + new UpdateCookieCommand().asShellCommand(CMD_UPDATE_COOKIE, bkConf)); + commands.put(CMD_GET_COOKIE, + new GetCookieCommand().asShellCommand(CMD_GET_COOKIE, bkConf)); + commands.put(CMD_GENERATE_COOKIE, + new GenerateCookieCommand().asShellCommand(CMD_GENERATE_COOKIE, bkConf)); } @Override public void setConf(CompositeConfiguration conf) throws Exception { bkConf.loadConf(conf); - journalDirectories = Bookie.getCurrentDirectories(bkConf.getJournalDirs()); - ledgerDirectories = Bookie.getCurrentDirectories(bkConf.getLedgerDirs()); + journalDirectories = BookieImpl.getCurrentDirectories(bkConf.getJournalDirs()); + ledgerDirectories = BookieImpl.getCurrentDirectories(bkConf.getLedgerDirs()); if (null == bkConf.getIndexDirs()) { indexDirectories = ledgerDirectories; } else { - indexDirectories = Bookie.getCurrentDirectories(bkConf.getIndexDirs()); + indexDirectories = BookieImpl.getCurrentDirectories(bkConf.getIndexDirs()); } pageSize = bkConf.getPageSize(); entriesPerPage = pageSize / 8; @@ -2873,8 +2641,8 @@ private void printShellUsage() { + "[-entryformat ] [-conf configuration] "); System.err.println("where command is one of:"); List commandNames = new ArrayList(); - for (MyCommand c : commands.values()) { - commandNames.add(" " + c.getUsage()); + for (Command c : commands.values()) { + commandNames.add(" " + c.description()); } Collections.sort(commandNames); for (String s : commandNames) { @@ -2956,205 +2724,69 @@ public int compare(File file1, File file2) { } } - public static void main(String argv[]) throws Exception { - BookieShell shell = new BookieShell(); - - // handle some common options for multiple cmds - Options opts = new Options(); - opts.addOption(CONF_OPT, true, "configuration file"); - opts.addOption(LEDGERID_FORMATTER_OPT, true, "format of ledgerId"); - opts.addOption(ENTRY_FORMATTER_OPT, true, "format of entries"); - BasicParser parser = new BasicParser(); - CommandLine cmdLine = parser.parse(opts, argv, true); - - // load configuration - CompositeConfiguration conf = new CompositeConfiguration(); - if (cmdLine.hasOption(CONF_OPT)) { - String val = cmdLine.getOptionValue(CONF_OPT); - conf.addConfiguration(new PropertiesConfiguration( - new File(val).toURI().toURL())); - } - shell.setConf(conf); - - // ledgerid format - if (cmdLine.hasOption(LEDGERID_FORMATTER_OPT)) { - String val = cmdLine.getOptionValue(LEDGERID_FORMATTER_OPT); - shell.ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(val, shell.bkConf); - } else { - shell.ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(shell.bkConf); - } - LOG.debug("Using ledgerIdFormatter {}", shell.ledgerIdFormatter.getClass()); - - // entry format - if (cmdLine.hasOption(ENTRY_FORMATTER_OPT)) { - String val = cmdLine.getOptionValue(ENTRY_FORMATTER_OPT); - shell.entryFormatter = EntryFormatter.newEntryFormatter(val, shell.bkConf); - } else { - shell.entryFormatter = EntryFormatter.newEntryFormatter(shell.bkConf); - } - LOG.debug("Using entry formatter {}", shell.entryFormatter.getClass()); - - int res = shell.run(cmdLine.getArgs()); - System.exit(res); - } - - /// - /// Bookie File Operations - /// + public static void main(String[] argv) { + int res = -1; + try { + BookieShell shell = new BookieShell(); + + // handle some common options for multiple cmds + Options opts = new Options(); + opts.addOption(CONF_OPT, true, "configuration file"); + opts.addOption(LEDGERID_FORMATTER_OPT, true, "format of ledgerId"); + opts.addOption(ENTRY_FORMATTER_OPT, true, "format of entries"); + BasicParser parser = new BasicParser(); + CommandLine cmdLine = parser.parse(opts, argv, true); + + // load configuration + CompositeConfiguration conf = new CompositeConfiguration(); + if (cmdLine.hasOption(CONF_OPT)) { + String val = cmdLine.getOptionValue(CONF_OPT); + conf.addConfiguration(new PropertiesConfiguration( + new File(val).toURI().toURL())); + } + shell.setConf(conf); + + // ledgerid format + if (cmdLine.hasOption(LEDGERID_FORMATTER_OPT)) { + String val = cmdLine.getOptionValue(LEDGERID_FORMATTER_OPT); + shell.ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(val, shell.bkConf); + } else { + shell.ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(shell.bkConf); + } + if (LOG.isDebugEnabled()) { + LOG.debug("Using ledgerIdFormatter {}", shell.ledgerIdFormatter.getClass()); + } - /** - * Get the ledger file of a specified ledger. - * - * @param ledgerId Ledger Id - * - * @return file object. - */ - private File getLedgerFile(long ledgerId) { - String ledgerName = IndexPersistenceMgr.getLedgerName(ledgerId); - File lf = null; - for (File d : indexDirectories) { - lf = new File(d, ledgerName); - if (lf.exists()) { - break; + // entry format + if (cmdLine.hasOption(ENTRY_FORMATTER_OPT)) { + String val = cmdLine.getOptionValue(ENTRY_FORMATTER_OPT); + shell.entryFormatter = EntryFormatter.newEntryFormatter(val, shell.bkConf); + } else { + shell.entryFormatter = EntryFormatter.newEntryFormatter(shell.bkConf); + } + if (LOG.isDebugEnabled()) { + LOG.debug("Using entry formatter {}", shell.entryFormatter.getClass()); } - lf = null; - } - return lf; - } - /** - * Get FileInfo for a specified ledger. - * - * @param ledgerId Ledger Id - * @return read only file info instance - */ - ReadOnlyFileInfo getFileInfo(long ledgerId) throws IOException { - File ledgerFile = getLedgerFile(ledgerId); - if (null == ledgerFile) { - throw new FileNotFoundException("No index file found for ledger " + ledgerId - + ". It may be not flushed yet."); + res = shell.run(cmdLine.getArgs()); + } catch (Throwable e) { + LOG.error("Got an exception", e); + } finally { + System.exit(res); } - ReadOnlyFileInfo fi = new ReadOnlyFileInfo(ledgerFile, null); - fi.readHeader(); - return fi; } private synchronized void initEntryLogger() throws IOException { if (null == entryLogger) { // provide read only entry logger - entryLogger = new ReadOnlyEntryLogger(bkConf); - } - } - - /** - * Scan over entry log. - * - * @param logId Entry Log Id - * @param scanner Entry Log Scanner - */ - protected void scanEntryLog(long logId, EntryLogScanner scanner) throws IOException { - initEntryLogger(); - entryLogger.scanEntryLog(logId, scanner); - } - - private synchronized List getJournals() throws IOException { - if (null == journals) { - journals = Lists.newArrayListWithCapacity(bkConf.getJournalDirs().length); - int idx = 0; - for (File journalDir : bkConf.getJournalDirs()) { - journals.add(new Journal(idx++, new File(journalDir, BookKeeperConstants.CURRENT_DIR), bkConf, - new LedgerDirsManager(bkConf, bkConf.getLedgerDirs(), - new DiskChecker(bkConf.getDiskUsageThreshold(), bkConf.getDiskUsageWarnThreshold())))); - } + entryLogger = new ReadOnlyDefaultEntryLogger(bkConf); } - return journals; - } - - /** - * Scan journal file. - * - * @param journalId Journal File Id - * @param scanner Journal File Scanner - */ - protected void scanJournal(Journal journal, long journalId, JournalScanner scanner) throws IOException { - journal.scanJournal(journalId, 0L, scanner); } /// /// Bookie Shell Commands /// - /** - * Read ledger meta. - * - * @param ledgerId Ledger Id - */ - protected void readLedgerMeta(long ledgerId) throws Exception { - System.out.println("===== LEDGER: " + ledgerIdFormatter.formatLedgerId(ledgerId) + " ====="); - FileInfo fi = getFileInfo(ledgerId); - byte[] masterKey = fi.getMasterKey(); - if (null == masterKey) { - System.out.println("master key : NULL"); - } else { - System.out.println("master key : " + bytes2Hex(fi.getMasterKey())); - } - long size = fi.size(); - if (size % 8 == 0) { - System.out.println("size : " + size); - } else { - System.out.println("size : " + size + " (not aligned with 8, may be corrupted or under flushing now)"); - } - System.out.println("entries : " + (size / 8)); - System.out.println("isFenced : " + fi.isFenced()); - } - - /** - * Read ledger index entries. - * - * @param ledgerId Ledger Id - * @throws IOException - */ - protected void readLedgerIndexEntries(long ledgerId) throws IOException { - System.out.println("===== LEDGER: " + ledgerIdFormatter.formatLedgerId(ledgerId) + " ====="); - FileInfo fi = getFileInfo(ledgerId); - long size = fi.size(); - System.out.println("size : " + size); - long curSize = 0; - long curEntry = 0; - LedgerEntryPage lep = new LedgerEntryPage(pageSize, entriesPerPage); - lep.usePage(); - try { - while (curSize < size) { - lep.setLedgerAndFirstEntry(ledgerId, curEntry); - lep.readPage(fi); - - // process a page - for (int i = 0; i < entriesPerPage; i++) { - long offset = lep.getOffset(i * 8); - if (0 == offset) { - System.out.println("entry " + curEntry + "\t:\tN/A"); - } else { - long entryLogId = offset >> 32L; - long pos = offset & 0xffffffffL; - System.out.println("entry " + curEntry + "\t:\t(log:" + entryLogId + ", pos: " + pos + ")"); - } - ++curEntry; - } - - curSize += pageSize; - } - } catch (IOException ie) { - LOG.error("Failed to read index page : ", ie); - if (curSize + pageSize < size) { - System.out.println("Failed to read index page @ " + curSize + ", the index file may be corrupted : " - + ie.getMessage()); - } else { - System.out.println("Failed to read last index page @ " + curSize + ", the index file may be corrupted " - + "or last index page is not fully flushed yet : " + ie.getMessage()); - } - } - } - protected void printEntryLogMetadata(long logId) throws IOException { LOG.info("Print entryLogMetadata of entrylog {} ({}.log)", logId, Long.toHexString(logId)); initEntryLogger(); @@ -3165,214 +2797,6 @@ protected void printEntryLogMetadata(long logId) throws IOException { }); } - /** - * Get an iterable over pages of entries and locations for a ledger. - * - * @param ledgerId - * @return - * @throws IOException - */ - protected Iterable> getLedgerIndexEntries(final long ledgerId) throws IOException { - final FileInfo fi = getFileInfo(ledgerId); - final long size = fi.size(); - - final LedgerEntryPage lep = new LedgerEntryPage(pageSize, entriesPerPage); - lep.usePage(); - - final Iterator> iterator = new Iterator>() { - long curSize = 0; - long curEntry = 0; - - @Override - public boolean hasNext() { - return curSize < size; - } - - @Override - public SortedMap next() { - SortedMap entries = Maps.newTreeMap(); - lep.setLedgerAndFirstEntry(ledgerId, curEntry); - try { - lep.readPage(fi); - } catch (IOException e) { - throw new RuntimeException(e); - } - - // process a page - for (int i = 0; i < entriesPerPage; i++) { - long offset = lep.getOffset(i * 8); - if (offset != 0) { - entries.put(curEntry, offset); - } - ++curEntry; - } - - curSize += pageSize; - return entries; - } - - @Override - public void remove() { - throw new RuntimeException("Cannot remove"); - } - - }; - - return new Iterable>() { - @Override - public Iterator> iterator() { - return iterator; - } - }; - } - - /** - * Scan over an entry log file. - * - * @param logId - * Entry Log File id. - * @param printMsg - * Whether printing the entry data. - */ - protected void scanEntryLog(long logId, final boolean printMsg) throws Exception { - System.out.println("Scan entry log " + logId + " (" + Long.toHexString(logId) + ".log)"); - scanEntryLog(logId, new EntryLogScanner() { - @Override - public boolean accept(long ledgerId) { - return true; - } - - @Override - public void process(long ledgerId, long startPos, ByteBuf entry) { - formatEntry(startPos, entry, printMsg); - } - }); - } - - /** - * Scan over an entry log file for a particular entry. - * - * @param logId Entry Log File id. - * @param ledgerId id of the ledger - * @param entryId entryId of the ledger we are looking for (-1 for all of the entries of the ledger) - * @param printMsg Whether printing the entry data. - * @throws Exception - */ - protected void scanEntryLogForSpecificEntry(long logId, final long ledgerId, final long entryId, - final boolean printMsg) throws Exception { - System.out.println("Scan entry log " + logId + " (" + Long.toHexString(logId) + ".log)" + " for LedgerId " - + ledgerId + ((entryId == -1) ? "" : " for EntryId " + entryId)); - final MutableBoolean entryFound = new MutableBoolean(false); - scanEntryLog(logId, new EntryLogScanner() { - @Override - public boolean accept(long candidateLedgerId) { - return ((candidateLedgerId == ledgerId) && ((!entryFound.booleanValue()) || (entryId == -1))); - } - - @Override - public void process(long candidateLedgerId, long startPos, ByteBuf entry) { - long entrysLedgerId = entry.getLong(entry.readerIndex()); - long entrysEntryId = entry.getLong(entry.readerIndex() + 8); - if ((candidateLedgerId == entrysLedgerId) && (candidateLedgerId == ledgerId) - && ((entrysEntryId == entryId) || (entryId == -1))) { - entryFound.setValue(true); - formatEntry(startPos, entry, printMsg); - } - } - }); - if (!entryFound.booleanValue()) { - System.out.println("LedgerId " + ledgerId + ((entryId == -1) ? "" : " EntryId " + entryId) - + " is not available in the entry log " + logId + " (" + Long.toHexString(logId) + ".log)"); - } - } - - /** - * Scan over an entry log file for entries in the given position range. - * - * @param logId Entry Log File id. - * @param rangeStartPos Start position of the entry we are looking for - * @param rangeEndPos End position of the entry we are looking for (-1 for till the end of the entrylog) - * @param printMsg Whether printing the entry data. - * @throws Exception - */ - protected void scanEntryLogForPositionRange(long logId, final long rangeStartPos, final long rangeEndPos, - final boolean printMsg) throws Exception { - System.out.println("Scan entry log " + logId + " (" + Long.toHexString(logId) + ".log)" + " for PositionRange: " - + rangeStartPos + " - " + rangeEndPos); - final MutableBoolean entryFound = new MutableBoolean(false); - scanEntryLog(logId, new EntryLogScanner() { - private MutableBoolean stopScanning = new MutableBoolean(false); - - @Override - public boolean accept(long ledgerId) { - return !stopScanning.booleanValue(); - } - - @Override - public void process(long ledgerId, long entryStartPos, ByteBuf entry) { - if (!stopScanning.booleanValue()) { - if ((rangeEndPos != -1) && (entryStartPos > rangeEndPos)) { - stopScanning.setValue(true); - } else { - int entrySize = entry.readableBytes(); - /** - * entrySize of an entry (inclusive of payload and - * header) value is stored as int value in log file, but - * it is not counted in the entrySize, hence for calculating - * the end position of the entry we need to add additional - * 4 (intsize of entrySize). Please check - * EntryLogger.scanEntryLog. - */ - long entryEndPos = entryStartPos + entrySize + 4 - 1; - if (((rangeEndPos == -1) || (entryStartPos <= rangeEndPos)) && (rangeStartPos <= entryEndPos)) { - formatEntry(entryStartPos, entry, printMsg); - entryFound.setValue(true); - } - } - } - } - }); - if (!entryFound.booleanValue()) { - System.out.println("Entry log " + logId + " (" + Long.toHexString(logId) - + ".log) doesn't has any entry in the range " + rangeStartPos + " - " + rangeEndPos - + ". Probably the position range, you have provided is lesser than the LOGFILE_HEADER_SIZE (1024) " - + "or greater than the current log filesize."); - } - } - - /** - * Scan a journal file. - * - * @param journalId Journal File Id - * @param printMsg Whether printing the entry data. - */ - protected void scanJournal(Journal journal, long journalId, final boolean printMsg) throws Exception { - System.out.println("Scan journal " + journalId + " (" + Long.toHexString(journalId) + ".txn)"); - scanJournal(journal, journalId, new JournalScanner() { - boolean printJournalVersion = false; - - @Override - public void process(int journalVersion, long offset, ByteBuffer entry) throws IOException { - if (!printJournalVersion) { - System.out.println("Journal Version : " + journalVersion); - printJournalVersion = true; - } - formatEntry(offset, Unpooled.wrappedBuffer(entry), printMsg); - } - }); - } - - /** - * Print last log mark. - */ - protected void printLastLogMark() throws IOException { - for (Journal journal : getJournals()) { - LogMark lastLogMark = journal.getLastLogMark().getCurMark(); - System.out.println("LastLogMark: Journal Id - " + lastLogMark.getLogFileId() + "(" - + Long.toHexString(lastLogMark.getLogFileId()) + ".txn), Pos - " - + lastLogMark.getLogFileOffset()); - } - } /** * Format the entry into a readable format. @@ -3393,70 +2817,6 @@ private void formatEntry(LedgerEntry entry, boolean printMsg) { } } - /** - * Format the message into a readable format. - * - * @param pos - * File offset of the message stored in entry log file - * @param recBuff - * Entry Data - * @param printMsg - * Whether printing the message body - */ - private void formatEntry(long pos, ByteBuf recBuff, boolean printMsg) { - int entrySize = recBuff.readableBytes(); - long ledgerId = recBuff.readLong(); - long entryId = recBuff.readLong(); - - System.out.println("--------- Lid=" + ledgerIdFormatter.formatLedgerId(ledgerId) + ", Eid=" + entryId - + ", ByteOffset=" + pos + ", EntrySize=" + entrySize + " ---------"); - if (entryId == Bookie.METAENTRY_ID_LEDGER_KEY) { - int masterKeyLen = recBuff.readInt(); - byte[] masterKey = new byte[masterKeyLen]; - recBuff.readBytes(masterKey); - System.out.println("Type: META"); - System.out.println("MasterKey: " + bytes2Hex(masterKey)); - System.out.println(); - return; - } - if (entryId == Bookie.METAENTRY_ID_FENCE_KEY) { - System.out.println("Type: META"); - System.out.println("Fenced"); - System.out.println(); - return; - } - // process a data entry - long lastAddConfirmed = recBuff.readLong(); - System.out.println("Type: DATA"); - System.out.println("LastConfirmed: " + lastAddConfirmed); - if (!printMsg) { - System.out.println(); - return; - } - // skip digest checking - recBuff.skipBytes(8); - System.out.println("Data:"); - System.out.println(); - try { - byte[] ret = new byte[recBuff.readableBytes()]; - recBuff.readBytes(ret); - entryFormatter.formatEntry(ret); - } catch (Exception e) { - System.out.println("N/A. Corrupted."); - } - System.out.println(); - } - - static String bytes2Hex(byte[] data) { - StringBuilder sb = new StringBuilder(data.length * 2); - Formatter formatter = new Formatter(sb); - for (byte b : data) { - formatter.format("%02x", b); - } - formatter.close(); - return sb.toString(); - } - private static int getOptionIntValue(CommandLine cmdLine, String option, int defaultVal) { if (cmdLine.hasOption(option)) { String val = cmdLine.getOptionValue(option); @@ -3505,9 +2865,7 @@ private static boolean getOptionBooleanValue(CommandLine cmdLine, String option, } private static boolean getOptionalValue(String optValue, String optName) { - if (StringUtils.equals(optValue, optName)) { - return true; - } - return false; + return StringUtils.equals(optValue, optName); } + } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieStateManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieStateManager.java index 370e06d8207..9800d9e0636 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieStateManager.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieStateManager.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,7 +21,11 @@ package org.apache.bookkeeper.bookie; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_SCOPE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.CATEGORY_SERVER; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.SERVER_SANITY; import static org.apache.bookkeeper.bookie.BookKeeperServerStats.SERVER_STATUS; + import com.google.common.annotations.VisibleForTesting; import com.google.common.util.concurrent.ThreadFactoryBuilder; import java.io.File; @@ -30,33 +34,45 @@ import java.net.UnknownHostException; import java.util.List; import java.util.concurrent.Callable; -import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Supplier; import lombok.extern.slf4j.Slf4j; import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.discover.BookieServiceInfo; import org.apache.bookkeeper.discover.RegistrationManager; -import org.apache.bookkeeper.meta.MetadataBookieDriver; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.stats.Gauge; import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.annotations.StatsDoc; +import org.apache.bookkeeper.tools.cli.commands.bookie.SanityTestCommand; import org.apache.bookkeeper.util.DiskChecker; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + /** * An implementation of StateManager. */ @Slf4j +@StatsDoc( + name = BOOKIE_SCOPE, + category = CATEGORY_SERVER, + help = "Bookie state manager related stats" +) public class BookieStateManager implements StateManager { private static final Logger LOG = LoggerFactory.getLogger(BookieStateManager.class); private final ServerConfiguration conf; + private final Supplier bookieServiceInfoProvider; private final List statusDirs; // use an executor to execute the state changes task - final ExecutorService stateService = Executors.newSingleThreadExecutor( + final ScheduledExecutorService stateService = Executors.newScheduledThreadPool(1, new ThreadFactoryBuilder().setNameFormat("BookieStateManagerService-%d").build()); // Running flag @@ -67,44 +83,66 @@ public class BookieStateManager implements StateManager { private final BookieStatus bookieStatus = new BookieStatus(); private final AtomicBoolean rmRegistered = new AtomicBoolean(false); private final AtomicBoolean forceReadOnly = new AtomicBoolean(false); + private final AtomicInteger sanityPassed = new AtomicInteger(-1); private volatile boolean availableForHighPriorityWrites = true; - private final String bookieId; + private final Supplier bookieIdSupplier; private ShutdownHandler shutdownHandler; - private final Supplier rm; + private final RegistrationManager rm; // Expose Stats - private final StatsLogger statsLogger; + @StatsDoc( + name = SERVER_STATUS, + help = "Bookie status (1: up, 0: readonly, -1: unregistered)" + ) + private final Gauge serverStatusGauge; + @StatsDoc( + name = SERVER_SANITY, + help = "Bookie sanity (1: up, 0: down, -1: unknown)" + ) + private final Gauge serverSanityGauge; public BookieStateManager(ServerConfiguration conf, StatsLogger statsLogger, - MetadataBookieDriver metadataDriver, - LedgerDirsManager ledgerDirsManager) throws IOException { + RegistrationManager rm, + LedgerDirsManager ledgerDirsManager, + Supplier bookieServiceInfoProvider) throws IOException { this( conf, statsLogger, - () -> null == metadataDriver ? null : metadataDriver.getRegistrationManager(), + rm, ledgerDirsManager.getAllLedgerDirs(), () -> { try { - return Bookie.getBookieAddress(conf).toString(); + return BookieImpl.getBookieId(conf); } catch (UnknownHostException e) { throw new UncheckedIOException("Failed to resolve bookie id", e); } - }); + }, + bookieServiceInfoProvider); } public BookieStateManager(ServerConfiguration conf, StatsLogger statsLogger, - Supplier rm, + RegistrationManager rm, List statusDirs, - Supplier bookieIdSupplier) throws IOException { + Supplier bookieIdSupplier, + Supplier bookieServiceInfoProvider) throws IOException { this.conf = conf; - this.statsLogger = statsLogger; this.rm = rm; + if (this.rm != null) { + rm.addRegistrationListener(() -> { + log.info("Trying to re-register the bookie"); + forceToUnregistered(); + // schedule a re-register operation + registerBookie(false); + }); + } + this.statusDirs = statusDirs; // ZK ephemeral node for this Bookie. - this.bookieId = bookieIdSupplier.get(); + this.bookieIdSupplier = bookieIdSupplier; + this.bookieServiceInfoProvider = bookieServiceInfoProvider; // 1 : up, 0 : readonly, -1 : unregistered - statsLogger.registerGauge(SERVER_STATUS, new Gauge() { + this.serverStatusGauge = new Gauge() { @Override public Number getDefaultValue() { return 0; @@ -120,18 +158,45 @@ public Number getSample() { return 1; } } - }); + }; + statsLogger.registerGauge(SERVER_STATUS, serverStatusGauge); + this.serverSanityGauge = new Gauge() { + @Override + public Number getDefaultValue() { + return -1; + } + + @Override + public Number getSample() { + return sanityPassed.get(); + } + }; + if (conf.isSanityCheckMetricsEnabled()) { + statsLogger.registerGauge(SERVER_SANITY, serverSanityGauge); + stateService.scheduleAtFixedRate(() -> { + if (isReadOnly()) { + sanityPassed.set(1); + return; + } + SanityTestCommand.handleAsync(conf, new SanityTestCommand.SanityFlags()).thenAccept(__ -> { + sanityPassed.set(1); + }).exceptionally(ex -> { + sanityPassed.set(0); + return null; + }); + }, 60, 60, TimeUnit.SECONDS); + } } private boolean isRegistrationManagerDisabled() { - return null == rm || null == rm.get(); + return null == rm; } @VisibleForTesting - BookieStateManager(ServerConfiguration conf, MetadataBookieDriver metadataDriver) throws IOException { - this(conf, NullStatsLogger.INSTANCE, metadataDriver, new LedgerDirsManager(conf, conf.getLedgerDirs(), + BookieStateManager(ServerConfiguration conf, RegistrationManager registrationManager) throws IOException { + this(conf, NullStatsLogger.INSTANCE, registrationManager, new LedgerDirsManager(conf, conf.getLedgerDirs(), new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()), - NullStatsLogger.INSTANCE)); + NullStatsLogger.INSTANCE), BookieServiceInfo.NO_INFO); } @Override @@ -165,6 +230,16 @@ public boolean isReadOnly(){ return forceReadOnly.get() || bookieStatus.isInReadOnlyMode(); } + @Override + public boolean isForceReadOnly(){ + return forceReadOnly.get(); + } + + @Override + public boolean isReadOnlyModeEnabled() { + return conf.isReadOnlyModeEnabled(); + } + @Override public boolean isAvailableForHighPriorityWrites() { return availableForHighPriorityWrites; @@ -202,6 +277,7 @@ public Future registerBookie(final boolean throwException) { @Override public Void call() throws IOException { try { + log.info("Re-registering the bookie"); doRegisterBookie(); } catch (IOException ioe) { if (throwException) { @@ -211,7 +287,7 @@ public Void call() throws IOException { shutdownHandler.shutdown(ExitCode.ZK_REG_FAIL); } } - return (Void) null; + return null; } }); } @@ -251,7 +327,7 @@ private void doRegisterBookie(boolean isReadOnly) throws IOException { rmRegistered.set(false); try { - rm.get().registerBookie(bookieId, isReadOnly); + rm.registerBookie(bookieIdSupplier.get(), isReadOnly, bookieServiceInfoProvider.get()); rmRegistered.set(true); } catch (BookieException e) { throw new IOException(e); @@ -285,7 +361,7 @@ public void doTransitionToWritableMode() { } // clear the readonly state try { - rm.get().unregisterBookie(bookieId, true); + rm.unregisterBookie(bookieIdSupplier.get(), true); } catch (BookieException e) { // if we failed when deleting the readonly flag in zookeeper, it is OK since client would // already see the bookie in writable list. so just log the exception @@ -321,7 +397,7 @@ public void doTransitionToReadOnlyMode() { return; } try { - rm.get().registerBookie(bookieId, true); + rm.registerBookie(bookieIdSupplier.get(), true, bookieServiceInfoProvider.get()); } catch (BookieException e) { LOG.error("Error in transition to ReadOnly Mode." + " Shutting down", e); @@ -329,6 +405,7 @@ public void doTransitionToReadOnlyMode() { return; } } + @Override public void setShutdownHandler(ShutdownHandler handler){ shutdownHandler = handler; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieStatus.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieStatus.java index 49aa66ed237..7b2d0aad990 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieStatus.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieStatus.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -19,7 +19,7 @@ package org.apache.bookkeeper.bookie; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.bookkeeper.util.BookKeeperConstants.BOOKIE_STATUS_FILENAME; import java.io.BufferedReader; @@ -123,16 +123,9 @@ synchronized void writeToDirectories(List directories) { * @throws IOException */ private static void writeToFile(File file, String body) throws IOException { - FileOutputStream fos = new FileOutputStream(file); - BufferedWriter bw = null; - try { - bw = new BufferedWriter(new OutputStreamWriter(fos, UTF_8)); + try (FileOutputStream fos = new FileOutputStream(file); + BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fos, UTF_8))) { bw.write(body); - } finally { - if (bw != null) { - bw.close(); - } - fos.close(); } } @@ -206,12 +199,16 @@ public BookieStatus parse(BufferedReader reader) BookieStatus status = new BookieStatus(); String line = reader.readLine(); if (line == null || line.trim().isEmpty()) { - LOG.debug("Empty line when parsing bookie status"); + if (LOG.isDebugEnabled()) { + LOG.debug("Empty line when parsing bookie status"); + } return null; } String[] parts = line.split(","); if (parts.length == 0) { - LOG.debug("Error in parsing bookie status: {}", line); + if (LOG.isDebugEnabled()) { + LOG.debug("Error in parsing bookie status: {}", line); + } return null; } synchronized (status) { diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieThread.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieThread.java index 16bf35c5495..e35db7b68ed 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieThread.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieThread.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,7 +18,6 @@ package org.apache.bookkeeper.bookie; import io.netty.util.concurrent.FastThreadLocalThread; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BufferedChannel.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BufferedChannel.java index b2dd4be605a..3197165827a 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BufferedChannel.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BufferedChannel.java @@ -24,7 +24,6 @@ import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBufAllocator; import io.netty.util.ReferenceCountUtil; - import java.io.Closeable; import java.io.IOException; import java.nio.ByteBuffer; @@ -35,14 +34,22 @@ * Provides a buffering layer in front of a FileChannel. */ public class BufferedChannel extends BufferedReadChannel implements Closeable { - // The capacity of the write buffer. + /** + * The capacity of the write buffer. + */ protected final int writeCapacity; - // The position of the file channel's write pointer. + /** + * The position of the file channel's write pointer. + */ protected AtomicLong writeBufferStartPosition = new AtomicLong(0); - // The buffer used to write operations. + /** + * The buffer used to write operations. + */ protected final ByteBuf writeBuffer; - // The absolute position of the next write operation. - protected final AtomicLong position; + /** + * The absolute position of the next write operation. + */ + protected volatile long position; /* * if unpersistedBytesBound is non-zero value, then after writing to @@ -63,24 +70,27 @@ public class BufferedChannel extends BufferedReadChannel implements Closeable { */ protected final AtomicLong unpersistedBytes; + private boolean closed = false; + // make constructor to be public for unit test - public BufferedChannel(FileChannel fc, int capacity) throws IOException { + public BufferedChannel(ByteBufAllocator allocator, FileChannel fc, int capacity) throws IOException { // Use the same capacity for read and write buffers. - this(fc, capacity, 0L); + this(allocator, fc, capacity, 0L); } - public BufferedChannel(FileChannel fc, int capacity, long unpersistedBytesBound) throws IOException { + public BufferedChannel(ByteBufAllocator allocator, FileChannel fc, int capacity, long unpersistedBytesBound) + throws IOException { // Use the same capacity for read and write buffers. - this(fc, capacity, capacity, unpersistedBytesBound); + this(allocator, fc, capacity, capacity, unpersistedBytesBound); } - public BufferedChannel(FileChannel fc, int writeCapacity, int readCapacity, long unpersistedBytesBound) - throws IOException { + public BufferedChannel(ByteBufAllocator allocator, FileChannel fc, int writeCapacity, int readCapacity, + long unpersistedBytesBound) throws IOException { super(fc, readCapacity); this.writeCapacity = writeCapacity; - this.position = new AtomicLong(fc.position()); - this.writeBufferStartPosition.set(position.get()); - this.writeBuffer = ByteBufAllocator.DEFAULT.directBuffer(writeCapacity); + this.position = fc.position(); + this.writeBufferStartPosition.set(position); + this.writeBuffer = allocator.directBuffer(writeCapacity); this.unpersistedBytes = new AtomicLong(0); this.unpersistedBytesBound = unpersistedBytesBound; this.doRegularFlushes = unpersistedBytesBound > 0; @@ -88,8 +98,12 @@ public BufferedChannel(FileChannel fc, int writeCapacity, int readCapacity, long @Override public synchronized void close() throws IOException { - ReferenceCountUtil.safeRelease(writeBuffer); + if (closed) { + return; + } + ReferenceCountUtil.release(writeBuffer); fileChannel.close(); + closed = true; } /** @@ -116,9 +130,9 @@ public void write(ByteBuf src) throws IOException { flush(); } } - position.addAndGet(copied); - unpersistedBytes.addAndGet(copied); + position += copied; if (doRegularFlushes) { + unpersistedBytes.addAndGet(copied); if (unpersistedBytes.get() >= unpersistedBytesBound) { flush(); shouldForceWrite = true; @@ -135,7 +149,7 @@ public void write(ByteBuf src) throws IOException { * @return */ public long position() { - return position.get(); + return position; } /** @@ -190,8 +204,11 @@ public synchronized void flush() throws IOException { writeBufferStartPosition.set(fileChannel.position()); } - /* + /** * force a sync operation so that data is persisted to the disk. + * @param forceMetadata + * @return + * @throws IOException */ public long forceWrite(boolean forceMetadata) throws IOException { // This is the point up to which we had flushed to the file system page cache @@ -214,9 +231,12 @@ public long forceWrite(boolean forceMetadata) throws IOException { * Hence setting writeBuffer.readableBytes() to unpersistedBytes. * */ - synchronized (this) { - unpersistedBytes.set(writeBuffer.readableBytes()); + if (unpersistedBytesBound > 0) { + synchronized (this) { + unpersistedBytes.set(writeBuffer.readableBytes()); + } } + fileChannel.force(forceMetadata); return positionForceWrite; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BufferedChannelBase.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BufferedChannelBase.java index cfaee56b41d..e52fc4f0c1b 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BufferedChannelBase.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BufferedChannelBase.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -25,6 +25,12 @@ * to buffer the input and output data. This class is a base class for wrapping the {@link FileChannel}. */ public abstract class BufferedChannelBase { + static class BufferedChannelClosedException extends IOException { + BufferedChannelClosedException() { + super("Attempting to access a file channel that has already been closed"); + } + } + protected final FileChannel fileChannel; protected BufferedChannelBase(FileChannel fc) { @@ -36,7 +42,7 @@ protected FileChannel validateAndGetFileChannel() throws IOException { // guarantee that once a log file has been closed and possibly deleted during garbage // collection, attempts will not be made to read from it if (!fileChannel.isOpen()) { - throw new IOException("Attempting to access a file channel that has already been closed"); + throw new BufferedChannelClosedException(); } return fileChannel; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BufferedReadChannel.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BufferedReadChannel.java index 96dea6f670c..4de3890e082 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BufferedReadChannel.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BufferedReadChannel.java @@ -24,14 +24,13 @@ import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; - import java.io.IOException; import java.nio.channels.FileChannel; /** * A Buffered channel without a write buffer. Only reads are buffered. */ -public class BufferedReadChannel extends BufferedChannelBase { +public class BufferedReadChannel extends BufferedChannelBase { // The capacity of the read buffer. protected final int readCapacity; @@ -44,9 +43,16 @@ public class BufferedReadChannel extends BufferedChannelBase { long invocationCount = 0; long cacheHitCount = 0; + private volatile long fileSize = -1; + final boolean sealed; + + public BufferedReadChannel(FileChannel fileChannel, int readCapacity) { + this(fileChannel, readCapacity, false); + } - public BufferedReadChannel(FileChannel fileChannel, int readCapacity) throws IOException { + public BufferedReadChannel(FileChannel fileChannel, int readCapacity, boolean sealed) { super(fileChannel); + this.sealed = sealed; this.readCapacity = readCapacity; this.readBuffer = Unpooled.buffer(readCapacity); } @@ -65,10 +71,26 @@ public int read(ByteBuf dest, long pos) throws IOException { return read(dest, pos, dest.writableBytes()); } + @Override + public long size() throws IOException { + if (sealed) { + if (fileSize == -1) { + synchronized (this) { + if (fileSize == -1) { + fileSize = validateAndGetFileChannel().size(); + } + } + } + return fileSize; + } else { + return validateAndGetFileChannel().size(); + } + } + public synchronized int read(ByteBuf dest, long pos, int length) throws IOException { invocationCount++; long currentPosition = pos; - long eof = validateAndGetFileChannel().size(); + long eof = size(); // return -1 if the given position is greater than or equal to the file's current size. if (pos >= eof) { return -1; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/CheckpointSource.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/CheckpointSource.java index a6b41f4c12f..2221632469a 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/CheckpointSource.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/CheckpointSource.java @@ -90,4 +90,14 @@ public String toString() { * Flag to compact old checkpoints. */ void checkpointComplete(Checkpoint checkpoint, boolean compact) throws IOException; + + CheckpointSource DEFAULT = new CheckpointSource() { + @Override + public Checkpoint newCheckpoint() { + return Checkpoint.MIN; + } + + @Override + public void checkpointComplete(Checkpoint checkpoint, boolean compact) {} + }; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/CompactableLedgerStorage.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/CompactableLedgerStorage.java index 9a5a0abbcba..1798391fc76 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/CompactableLedgerStorage.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/CompactableLedgerStorage.java @@ -22,17 +22,11 @@ package org.apache.bookkeeper.bookie; import java.io.IOException; - /** * Interface that identifies LedgerStorage implementations using EntryLogger and running periodic entries compaction. */ public interface CompactableLedgerStorage extends LedgerStorage { - /** - * @return the EntryLogger used by the ledger storage - */ - EntryLogger getEntryLogger(); - /** * Get an iterator over a range of ledger ids stored in the bookie. * diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/Cookie.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/Cookie.java index 4a9f6f6c79e..d7502a54b69 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/Cookie.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/Cookie.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,7 +20,7 @@ */ package org.apache.bookkeeper.bookie; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.base.Joiner; import com.google.common.collect.Sets; @@ -30,6 +30,7 @@ import java.io.EOFException; import java.io.File; import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; @@ -37,17 +38,19 @@ import java.io.StringReader; import java.net.InetSocketAddress; import java.net.UnknownHostException; +import java.util.Objects; import java.util.Set; import org.apache.bookkeeper.bookie.BookieException.InvalidCookieException; import org.apache.bookkeeper.bookie.BookieException.UnknownBookieIdException; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.discover.RegistrationManager; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.DataFormats.CookieFormat; import org.apache.bookkeeper.util.BookKeeperConstants; import org.apache.bookkeeper.versioning.LongVersion; import org.apache.bookkeeper.versioning.Version; import org.apache.bookkeeper.versioning.Versioned; +import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -66,23 +69,26 @@ public class Cookie { private static final Logger LOG = LoggerFactory.getLogger(Cookie.class); - static final int CURRENT_COOKIE_LAYOUT_VERSION = 4; + static final int CURRENT_COOKIE_LAYOUT_VERSION = 5; private final int layoutVersion; - private final String bookieHost; + private final String bookieId; private final String journalDirs; private final String ledgerDirs; + private final String indexDirs; private final String instanceId; private static final String SEPARATOR = "\t"; - private Cookie(int layoutVersion, String bookieHost, String journalDirs, String ledgerDirs, String instanceId) { + private Cookie(int layoutVersion, String bookieId, String journalDirs, String ledgerDirs, String instanceId, + String indexDirs) { this.layoutVersion = layoutVersion; - this.bookieHost = bookieHost; + this.bookieId = bookieId; this.journalDirs = journalDirs; this.ledgerDirs = ledgerDirs; this.instanceId = instanceId; + this.indexDirs = indexDirs; } - private static String encodeDirPaths(String[] dirs) { + public static String encodeDirPaths(String[] dirs) { StringBuilder b = new StringBuilder(); b.append(dirs.length); for (String d : dirs) { @@ -102,6 +108,13 @@ String[] getLedgerDirPathsFromCookie() { return decodeDirPathFromCookie(ledgerDirs); } + String[] getIndexDirPathsFromCookie() { + if (null == indexDirs) { + return null; + } + return decodeDirPathFromCookie(indexDirs); + } + /** * Receives 2 String arrays, that each contain a list of directory paths, * and checks if first is a super set of the second. @@ -124,14 +137,27 @@ private boolean verifyLedgerDirs(Cookie c, boolean checkIfSuperSet) { } } + private boolean verifyIndexDirs(Cookie c, boolean checkIfSuperSet) { + // compatible logic: existed node's cookie has no indexDirs, the indexDirs's default value is ledgerDirs. + String indexDirsInConfig = StringUtils.isNotBlank(indexDirs) ? indexDirs : ledgerDirs; + String indexDirsInCookie = StringUtils.isNotBlank(c.indexDirs) ? c.indexDirs : c.ledgerDirs; + + if (!checkIfSuperSet) { + return indexDirsInConfig.equals(indexDirsInCookie); + } else { + return isSuperSet(decodeDirPathFromCookie(indexDirsInConfig), decodeDirPathFromCookie(indexDirsInCookie)); + } + } + private void verifyInternal(Cookie c, boolean checkIfSuperSet) throws BookieException.InvalidCookieException { String errMsg; if (c.layoutVersion < 3 && c.layoutVersion != layoutVersion) { errMsg = "Cookie is of too old version " + c.layoutVersion; LOG.error(errMsg); throw new BookieException.InvalidCookieException(errMsg); - } else if (!(c.layoutVersion >= 3 && c.bookieHost.equals(bookieHost) - && c.journalDirs.equals(journalDirs) && verifyLedgerDirs(c, checkIfSuperSet))) { + } else if (!(c.layoutVersion >= 3 && c.bookieId.equals(bookieId) + && c.journalDirs.equals(journalDirs) && verifyLedgerDirs(c, checkIfSuperSet) + && verifyIndexDirs(c, checkIfSuperSet))) { errMsg = "Cookie [" + this + "] is not matching with [" + c + "]"; throw new BookieException.InvalidCookieException(errMsg); } else if ((instanceId == null && c.instanceId != null) @@ -151,27 +177,32 @@ public void verifyIsSuperSet(Cookie c) throws BookieException.InvalidCookieExcep verifyInternal(c, true); } + @Override public String toString() { if (layoutVersion <= 3) { return toStringVersion3(); } CookieFormat.Builder builder = CookieFormat.newBuilder(); - builder.setBookieHost(bookieHost); + builder.setBookieHost(bookieId); builder.setJournalDir(journalDirs); builder.setLedgerDirs(ledgerDirs); if (null != instanceId) { builder.setInstanceId(instanceId); } + if (null != indexDirs) { + builder.setIndexDirs(indexDirs); + } + StringBuilder b = new StringBuilder(); b.append(CURRENT_COOKIE_LAYOUT_VERSION).append("\n"); - b.append(TextFormat.printToString(builder.build())); + b.append(builder.build()); return b.toString(); } private String toStringVersion3() { StringBuilder b = new StringBuilder(); b.append(CURRENT_COOKIE_LAYOUT_VERSION).append("\n") - .append(bookieHost).append("\n") + .append(bookieId).append("\n") .append(journalDirs).append("\n") .append(ledgerDirs).append("\n"); return b.toString(); @@ -192,38 +223,43 @@ private static Builder parse(BufferedReader reader) throws IOException { + "', cannot parse cookie."); } if (layoutVersion == 3) { - cBuilder.setBookieHost(reader.readLine()); + cBuilder.setBookieId(reader.readLine()); cBuilder.setJournalDirs(reader.readLine()); cBuilder.setLedgerDirs(reader.readLine()); } else if (layoutVersion >= 4) { CookieFormat.Builder cfBuilder = CookieFormat.newBuilder(); TextFormat.merge(reader, cfBuilder); CookieFormat data = cfBuilder.build(); - cBuilder.setBookieHost(data.getBookieHost()); + cBuilder.setBookieId(data.getBookieHost()); cBuilder.setJournalDirs(data.getJournalDir()); cBuilder.setLedgerDirs(data.getLedgerDirs()); // Since InstanceId is optional if (null != data.getInstanceId() && !data.getInstanceId().isEmpty()) { cBuilder.setInstanceId(data.getInstanceId()); } + if (null != data.getIndexDirs() && !data.getIndexDirs().isEmpty()) { + cBuilder.setIndexDirs(data.getIndexDirs()); + } } return cBuilder; } + public static Cookie parseFromBytes(byte[] bytes) throws IOException { + try (BufferedReader reader = new BufferedReader(new StringReader(new String(bytes, UTF_8)))) { + return parse(reader).build(); + } + } + public void writeToDirectory(File directory) throws IOException { File versionFile = new File(directory, - BookKeeperConstants.VERSION_FILENAME); + BookKeeperConstants.VERSION_FILENAME); + writeToFile(versionFile); + } - FileOutputStream fos = new FileOutputStream(versionFile); - BufferedWriter bw = null; - try { - bw = new BufferedWriter(new OutputStreamWriter(fos, UTF_8)); + public void writeToFile (File versionFile) throws IOException { + try (FileOutputStream fos = new FileOutputStream(versionFile); + BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fos, UTF_8))) { bw.write(toString()); - } finally { - if (bw != null) { - bw.close(); - } - fos.close(); } } @@ -237,14 +273,14 @@ public void writeToDirectory(File directory) throws IOException { */ public void writeToRegistrationManager(RegistrationManager rm, ServerConfiguration conf, Version version) throws BookieException { - BookieSocketAddress address = null; + BookieId address = null; try { - address = Bookie.getBookieAddress(conf); + address = BookieImpl.getBookieId(conf); } catch (UnknownHostException e) { throw new UnknownBookieIdException(e); } byte[] data = toString().getBytes(UTF_8); - rm.writeCookie(address.toString(), new Versioned<>(data, version)); + rm.writeCookie(address, new Versioned<>(data, version)); } /** @@ -258,9 +294,9 @@ public void writeToRegistrationManager(RegistrationManager rm, ServerConfigurati public void deleteFromRegistrationManager(RegistrationManager rm, ServerConfiguration conf, Version version) throws BookieException { - BookieSocketAddress address = null; + BookieId address = null; try { - address = Bookie.getBookieAddress(conf); + address = BookieImpl.getBookieId(conf); } catch (UnknownHostException e) { throw new UnknownBookieIdException(e); } @@ -276,13 +312,13 @@ public void deleteFromRegistrationManager(RegistrationManager rm, * @throws BookieException when fail to delete cookie. */ public void deleteFromRegistrationManager(RegistrationManager rm, - BookieSocketAddress address, + BookieId address, Version version) throws BookieException { if (!(version instanceof LongVersion)) { throw new IllegalArgumentException("Invalid version type, expected ZkVersion type"); } - rm.removeCookie(address.toString(), version); + rm.removeCookie(address, version); } /** @@ -292,13 +328,16 @@ public void deleteFromRegistrationManager(RegistrationManager rm, * @return cookie builder object * @throws UnknownHostException */ - static Builder generateCookie(ServerConfiguration conf) + public static Builder generateCookie(ServerConfiguration conf) throws UnknownHostException { Builder builder = Cookie.newBuilder(); builder.setLayoutVersion(CURRENT_COOKIE_LAYOUT_VERSION); - builder.setBookieHost(Bookie.getBookieAddress(conf).toString()); + builder.setBookieId(BookieImpl.getBookieId(conf).toString()); builder.setJournalDirs(Joiner.on(',').join(conf.getJournalDirNames())); builder.setLedgerDirs(encodeDirPaths(conf.getLedgerDirNames())); + if (null != conf.getIndexDirNames()) { + builder.setIndexDirs(encodeDirPaths(conf.getIndexDirNames())); + } return builder; } @@ -313,7 +352,7 @@ static Builder generateCookie(ServerConfiguration conf) public static Versioned readFromRegistrationManager(RegistrationManager rm, ServerConfiguration conf) throws BookieException { try { - return readFromRegistrationManager(rm, Bookie.getBookieAddress(conf)); + return readFromRegistrationManager(rm, BookieImpl.getBookieId(conf)); } catch (UnknownHostException e) { throw new UnknownBookieIdException(e); } @@ -328,8 +367,8 @@ public static Versioned readFromRegistrationManager(RegistrationManager * @throws BookieException when fail to read cookie */ public static Versioned readFromRegistrationManager(RegistrationManager rm, - BookieSocketAddress address) throws BookieException { - Versioned cookieData = rm.readCookie(address.toString()); + BookieId address) throws BookieException { + Versioned cookieData = rm.readCookie(address); try { try (BufferedReader reader = new BufferedReader( new StringReader(new String(cookieData.getValue(), UTF_8)))) { @@ -349,7 +388,7 @@ public static Versioned readFromRegistrationManager(RegistrationManager * @return cookie object * @throws IOException */ - public static Cookie readFromDirectory(File directory) throws IOException { + public static Cookie readFromDirectory(File directory) throws IOException, FileNotFoundException { File versionFile = new File(directory, BookKeeperConstants.VERSION_FILENAME); try (BufferedReader reader = new BufferedReader( new InputStreamReader(new FileInputStream(versionFile), UTF_8))) { @@ -367,15 +406,17 @@ public static Cookie readFromDirectory(File directory) throws IOException { * if the 'bookieHost' was created using a hostname */ public boolean isBookieHostCreatedFromIp() throws IOException { - String parts[] = bookieHost.split(":"); + String[] parts = bookieId.split(":"); if (parts.length != 2) { - throw new IOException(bookieHost + " does not have the form host:port"); + // custom BookieId ? + return false; } int port; try { port = Integer.parseInt(parts[1]); } catch (NumberFormatException e) { - throw new IOException(bookieHost + " does not have the form host:port"); + // custom BookieId ? + return false; } InetSocketAddress addr = new InetSocketAddress(parts[0], port); @@ -386,22 +427,24 @@ public boolean isBookieHostCreatedFromIp() throws IOException { * Cookie builder. */ public static class Builder { - private int layoutVersion = 0; - private String bookieHost = null; + private int layoutVersion = CURRENT_COOKIE_LAYOUT_VERSION; + private String bookieId = null; private String journalDirs = null; private String ledgerDirs = null; private String instanceId = null; + private String indexDirs = null; private Builder() { } - private Builder(int layoutVersion, String bookieHost, String journalDirs, String ledgerDirs, - String instanceId) { + private Builder(int layoutVersion, String bookieId, String journalDirs, String ledgerDirs, + String instanceId, String indexDirs) { this.layoutVersion = layoutVersion; - this.bookieHost = bookieHost; + this.bookieId = bookieId; this.journalDirs = journalDirs; this.ledgerDirs = ledgerDirs; this.instanceId = instanceId; + this.indexDirs = indexDirs; } public Builder setLayoutVersion(int layoutVersion) { @@ -409,8 +452,8 @@ public Builder setLayoutVersion(int layoutVersion) { return this; } - public Builder setBookieHost(String bookieHost) { - this.bookieHost = bookieHost; + public Builder setBookieId(String bookieId) { + this.bookieId = bookieId; return this; } @@ -429,8 +472,13 @@ public Builder setInstanceId(String instanceId) { return this; } + public Builder setIndexDirs(String indexDirs) { + this.indexDirs = indexDirs; + return this; + } + public Cookie build() { - return new Cookie(layoutVersion, bookieHost, journalDirs, ledgerDirs, instanceId); + return new Cookie(layoutVersion, bookieId, journalDirs, ledgerDirs, instanceId, indexDirs); } } @@ -450,7 +498,27 @@ public static Builder newBuilder() { * @return cookie builder */ public static Builder newBuilder(Cookie oldCookie) { - return new Builder(oldCookie.layoutVersion, oldCookie.bookieHost, oldCookie.journalDirs, oldCookie.ledgerDirs, - oldCookie.instanceId); + return new Builder(oldCookie.layoutVersion, oldCookie.bookieId, oldCookie.journalDirs, oldCookie.ledgerDirs, + oldCookie.instanceId, oldCookie.indexDirs); + } + + @Override + public boolean equals(Object other) { + if (other instanceof Cookie) { + Cookie otherCookie = (Cookie) other; + return layoutVersion == otherCookie.layoutVersion + && Objects.equals(bookieId, otherCookie.bookieId) + && Objects.equals(journalDirs, otherCookie.journalDirs) + && Objects.equals(ledgerDirs, otherCookie.ledgerDirs) + && Objects.equals(instanceId, otherCookie.instanceId) + && Objects.equals(indexDirs, otherCookie.indexDirs); + } else { + return false; + } + } + + @Override + public int hashCode() { + return Objects.hash(bookieId, journalDirs, ledgerDirs, instanceId, indexDirs); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/CookieValidation.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/CookieValidation.java new file mode 100644 index 00000000000..9b4892b5b4f --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/CookieValidation.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.bookie; + +import java.io.File; +import java.net.UnknownHostException; +import java.util.List; + +/** + * Interface for cookie validation. + */ +public interface CookieValidation { + void checkCookies(List directories) throws BookieException, UnknownHostException, InterruptedException; +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/DefaultEntryLogger.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/DefaultEntryLogger.java new file mode 100644 index 00000000000..c47c0411c24 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/DefaultEntryLogger.java @@ -0,0 +1,1461 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.bookkeeper.bookie; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.MoreObjects; +import com.google.common.collect.MapMaker; +import com.google.common.collect.Sets; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.PooledByteBufAllocator; +import io.netty.buffer.Unpooled; +import io.netty.util.ReferenceCountUtil; +import io.netty.util.concurrent.FastThreadLocal; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FilenameFilter; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.channels.AsynchronousCloseException; +import java.nio.channels.FileChannel; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.regex.Pattern; +import org.apache.bookkeeper.bookie.storage.CompactionEntryLog; +import org.apache.bookkeeper.bookie.storage.EntryLogScanner; +import org.apache.bookkeeper.bookie.storage.EntryLogger; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.util.DiskChecker; +import org.apache.bookkeeper.util.HardLink; +import org.apache.bookkeeper.util.IOUtils; +import org.apache.bookkeeper.util.LedgerDirUtil; +import org.apache.bookkeeper.util.collections.ConcurrentLongLongHashMap; +import org.apache.bookkeeper.util.collections.ConcurrentLongLongHashMap.BiConsumerLong; +import org.apache.commons.lang3.tuple.Pair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class manages the writing of the bookkeeper entries. All the new + * entries are written to a common log. The LedgerCache will have pointers + * into files created by this class with offsets into the files to find + * the actual ledger entry. The entry log files created by this class are + * identified by a long. + */ +public class DefaultEntryLogger implements EntryLogger { + private static final Logger LOG = LoggerFactory.getLogger(DefaultEntryLogger.class); + + @VisibleForTesting + static final int UNINITIALIZED_LOG_ID = -0xDEAD; + + static class BufferedLogChannel extends BufferedChannel { + private final long logId; + private final EntryLogMetadata entryLogMetadata; + private final File logFile; + private long ledgerIdAssigned = UNASSIGNED_LEDGERID; + + public BufferedLogChannel(ByteBufAllocator allocator, FileChannel fc, int writeCapacity, int readCapacity, + long logId, File logFile, long unpersistedBytesBound) throws IOException { + super(allocator, fc, writeCapacity, readCapacity, unpersistedBytesBound); + this.logId = logId; + this.entryLogMetadata = new EntryLogMetadata(logId); + this.logFile = logFile; + } + public long getLogId() { + return logId; + } + + public File getLogFile() { + return logFile; + } + + public void registerWrittenEntry(long ledgerId, long entrySize) { + entryLogMetadata.addLedgerSize(ledgerId, entrySize); + } + + public ConcurrentLongLongHashMap getLedgersMap() { + return entryLogMetadata.getLedgersMap(); + } + + public Long getLedgerIdAssigned() { + return ledgerIdAssigned; + } + + public void setLedgerIdAssigned(Long ledgerId) { + this.ledgerIdAssigned = ledgerId; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(BufferedChannel.class) + .add("logId", logId) + .add("logFile", logFile) + .add("ledgerIdAssigned", ledgerIdAssigned) + .toString(); + } + + /** + * Append the ledger map at the end of the entry log. + * Updates the entry log file header with the offset and size of the map. + */ + void appendLedgersMap() throws IOException { + + long ledgerMapOffset = this.position(); + + ConcurrentLongLongHashMap ledgersMap = this.getLedgersMap(); + int numberOfLedgers = (int) ledgersMap.size(); + + // Write the ledgers map into several batches + + final int maxMapSize = LEDGERS_MAP_HEADER_SIZE + LEDGERS_MAP_ENTRY_SIZE * LEDGERS_MAP_MAX_BATCH_SIZE; + final ByteBuf serializedMap = ByteBufAllocator.DEFAULT.buffer(maxMapSize); + + try { + ledgersMap.forEach(new BiConsumerLong() { + int remainingLedgers = numberOfLedgers; + boolean startNewBatch = true; + int remainingInBatch = 0; + + @Override + public void accept(long ledgerId, long size) { + if (startNewBatch) { + int batchSize = Math.min(remainingLedgers, LEDGERS_MAP_MAX_BATCH_SIZE); + int ledgerMapSize = LEDGERS_MAP_HEADER_SIZE + LEDGERS_MAP_ENTRY_SIZE * batchSize; + + serializedMap.clear(); + serializedMap.writeInt(ledgerMapSize - 4); + serializedMap.writeLong(INVALID_LID); + serializedMap.writeLong(LEDGERS_MAP_ENTRY_ID); + serializedMap.writeInt(batchSize); + + startNewBatch = false; + remainingInBatch = batchSize; + } + // Dump the ledger in the current batch + serializedMap.writeLong(ledgerId); + serializedMap.writeLong(size); + --remainingLedgers; + + if (--remainingInBatch == 0) { + // Close current batch + try { + write(serializedMap); + } catch (IOException e) { + throw new RuntimeException(e); + } + + startNewBatch = true; + } + } + }); + } catch (RuntimeException e) { + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } else { + throw e; + } + } finally { + ReferenceCountUtil.release(serializedMap); + } + // Flush the ledger's map out before we write the header. + // Otherwise the header might point to something that is not fully + // written + super.flush(); + + // Update the headers with the map offset and count of ledgers + ByteBuffer mapInfo = ByteBuffer.allocate(8 + 4); + mapInfo.putLong(ledgerMapOffset); + mapInfo.putInt(numberOfLedgers); + mapInfo.flip(); + this.fileChannel.write(mapInfo, LEDGERS_MAP_OFFSET_POSITION); + } + } + + private final LedgerDirsManager ledgerDirsManager; + private final boolean entryLogPerLedgerEnabled; + + final RecentEntryLogsStatus recentlyCreatedEntryLogsStatus; + + /** + * locks for compaction log. + */ + private final Object compactionLogLock = new Object(); + + private volatile BufferedLogChannel compactionLogChannel; + + final EntryLoggerAllocator entryLoggerAllocator; + private final EntryLogManager entryLogManager; + + private final CopyOnWriteArrayList listeners = new CopyOnWriteArrayList(); + + private static final int HEADER_V0 = 0; // Old log file format (no ledgers map index) + private static final int HEADER_V1 = 1; // Introduced ledger map index + static final int HEADER_CURRENT_VERSION = HEADER_V1; + + private static class Header { + final int version; + final long ledgersMapOffset; + final int ledgersCount; + + Header(int version, long ledgersMapOffset, int ledgersCount) { + this.version = version; + this.ledgersMapOffset = ledgersMapOffset; + this.ledgersCount = ledgersCount; + } + } + + /** + * The 1K block at the head of the entry logger file + * that contains the fingerprint and meta-data. + * + *

+     * Header is composed of:
+     * Fingerprint: 4 bytes "BKLO"
+     * Log file HeaderVersion enum: 4 bytes
+     * Ledger map offset: 8 bytes
+     * Ledgers Count: 4 bytes
+     * 
+ */ + static final int LOGFILE_HEADER_SIZE = 1024; + static final int HEADER_VERSION_POSITION = 4; + static final int LEDGERS_MAP_OFFSET_POSITION = HEADER_VERSION_POSITION + 4; + + /** + * Ledgers map is composed of multiple parts that can be split into separated entries. Each of them is composed of: + * + *
+     * length: (4 bytes) [0-3]
+     * ledger id (-1): (8 bytes) [4 - 11]
+     * entry id: (8 bytes) [12-19]
+     * num ledgers stored in current metadata entry: (4 bytes) [20 - 23]
+     * ledger entries: sequence of (ledgerid, size) (8 + 8 bytes each) [24..]
+     * 
+ */ + static final int LEDGERS_MAP_HEADER_SIZE = 4 + 8 + 8 + 4; + static final int LEDGERS_MAP_ENTRY_SIZE = 8 + 8; + + // Break the ledgers map into multiple batches, each of which can contain up to 10K ledgers + static final int LEDGERS_MAP_MAX_BATCH_SIZE = 10000; + + static final long INVALID_LID = -1L; + + // EntryId used to mark an entry (belonging to INVALID_ID) as a component of the serialized ledgers map + static final long LEDGERS_MAP_ENTRY_ID = -2L; + + static final int MIN_SANE_ENTRY_SIZE = 8 + 8; + static final long MB = 1024 * 1024; + + private final int maxSaneEntrySize; + + private final ByteBufAllocator allocator; + + final ServerConfiguration conf; + + /** + * Entry Log Listener. + */ + interface EntryLogListener { + /** + * Rotate a new entry log to write. + */ + void onRotateEntryLog(); + } + + public DefaultEntryLogger(ServerConfiguration conf) throws IOException { + this(conf, new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()))); + } + + /** + * Create an EntryLogger that stores it's log files in the given directories. + */ + public DefaultEntryLogger(ServerConfiguration conf, + LedgerDirsManager ledgerDirsManager) throws IOException { + this(conf, ledgerDirsManager, null, NullStatsLogger.INSTANCE, PooledByteBufAllocator.DEFAULT); + } + + public DefaultEntryLogger(ServerConfiguration conf, + LedgerDirsManager ledgerDirsManager, EntryLogListener listener, StatsLogger statsLogger, + ByteBufAllocator allocator) throws IOException { + //We reserve 500 bytes as overhead for the protocol. This is not 100% accurate + // but the protocol varies so an exact value is difficult to determine + this.maxSaneEntrySize = conf.getNettyMaxFrameSizeBytes() - 500; + this.allocator = allocator; + this.ledgerDirsManager = ledgerDirsManager; + this.conf = conf; + entryLogPerLedgerEnabled = conf.isEntryLogPerLedgerEnabled(); + if (listener != null) { + addListener(listener); + } + + // Find the largest logId + long logId = INVALID_LID; + for (File dir : ledgerDirsManager.getAllLedgerDirs()) { + if (!dir.exists()) { + throw new FileNotFoundException( + "Entry log directory '" + dir + "' does not exist"); + } + long lastLogId; + long lastLogFileFromFile = getLastLogIdFromFile(dir); + long lastLogIdInDir = getLastLogIdInDir(dir); + if (lastLogFileFromFile < lastLogIdInDir) { + LOG.info("The lastLogFileFromFile is {}, the lastLogIdInDir is {}, " + + "use lastLogIdInDir as the lastLogId.", lastLogFileFromFile, lastLogIdInDir); + lastLogId = lastLogIdInDir; + } else { + lastLogId = lastLogFileFromFile; + } + + if (lastLogId > logId) { + logId = lastLogId; + } + } + this.recentlyCreatedEntryLogsStatus = new RecentEntryLogsStatus(logId + 1); + this.entryLoggerAllocator = new EntryLoggerAllocator(conf, ledgerDirsManager, recentlyCreatedEntryLogsStatus, + logId, allocator); + if (entryLogPerLedgerEnabled) { + this.entryLogManager = new EntryLogManagerForEntryLogPerLedger(conf, ledgerDirsManager, + entryLoggerAllocator, listeners, recentlyCreatedEntryLogsStatus, statsLogger); + } else { + this.entryLogManager = new EntryLogManagerForSingleEntryLog(conf, ledgerDirsManager, entryLoggerAllocator, + listeners, recentlyCreatedEntryLogsStatus); + } + } + + EntryLogManager getEntryLogManager() { + return entryLogManager; + } + + void addListener(EntryLogListener listener) { + if (null != listener) { + listeners.add(listener); + } + } + + /** + * If the log id of current writable channel is the same as entryLogId and the position + * we want to read might end up reading from a position in the write buffer of the + * buffered channel, route this read to the current logChannel. Else, + * read from the BufferedReadChannel that is provided. + * @param entryLogId + * @param channel + * @param buff remaining() on this bytebuffer tells us the last position that we + * expect to read. + * @param pos The starting position from where we want to read. + * @return + */ + private int readFromLogChannel(long entryLogId, BufferedReadChannel channel, ByteBuf buff, long pos) + throws IOException { + BufferedLogChannel bc = entryLogManager.getCurrentLogIfPresent(entryLogId); + if (null != bc) { + synchronized (bc) { + if (pos + buff.writableBytes() >= bc.getFileChannelPosition()) { + return bc.read(buff, pos); + } + } + } + return channel.read(buff, pos); + } + + /** + * A thread-local variable that wraps a mapping of log ids to bufferedchannels + * These channels should be used only for reading. logChannel is the one + * that is used for writes. + */ + private final ThreadLocal> logid2Channel = + new ThreadLocal>() { + @Override + public Map initialValue() { + // Since this is thread local there only one modifier + // We dont really need the concurrency, but we need to use + // the weak values. Therefore using the concurrency level of 1 + return new MapMaker().concurrencyLevel(1) + .weakValues() + .makeMap(); + } + }; + + /** + * Each thread local buffered read channel can share the same file handle because reads are not relative + * and don't cause a change in the channel's position. We use this map to store the file channels. Each + * file channel is mapped to a log id which represents an open log file. + */ + private final ConcurrentMap logid2FileChannel = new ConcurrentHashMap(); + + /** + * Put the logId, bc pair in the map responsible for the current thread. + * @param logId + * @param bc + */ + public BufferedReadChannel putInReadChannels(long logId, BufferedReadChannel bc) { + Map threadMap = logid2Channel.get(); + return threadMap.put(logId, bc); + } + + /** + * Remove all entries for this log file in each thread's cache. + * @param logId + */ + public void removeFromChannelsAndClose(long logId) { + FileChannel fileChannel = logid2FileChannel.remove(logId); + if (null != fileChannel) { + try { + fileChannel.close(); + } catch (IOException e) { + LOG.warn("Exception while closing channel for log file:" + logId); + } + } + } + + public BufferedReadChannel getFromChannels(long logId) { + return logid2Channel.get().get(logId); + } + + @VisibleForTesting + long getLeastUnflushedLogId() { + return recentlyCreatedEntryLogsStatus.getLeastUnflushedLogId(); + } + + @Override + public Set getFlushedLogIds() { + Set logIds = new HashSet<>(); + synchronized (recentlyCreatedEntryLogsStatus) { + for (File dir : ledgerDirsManager.getAllLedgerDirs()) { + if (dir.exists() && dir.isDirectory()) { + File[] files = dir.listFiles(file -> file.getName().endsWith(".log")); + if (files != null && files.length > 0) { + for (File f : files) { + long logId = fileName2LogId(f.getName()); + if (recentlyCreatedEntryLogsStatus.isFlushedLogId(logId)) { + logIds.add(logId); + } + } + } + } + } + } + return logIds; + } + + long getPreviousAllocatedEntryLogId() { + return entryLoggerAllocator.getPreallocatedLogId(); + } + + /** + * Get the current log file for compaction. + */ + private File getCurCompactionLogFile() { + synchronized (compactionLogLock) { + if (compactionLogChannel == null) { + return null; + } + return compactionLogChannel.getLogFile(); + } + } + + void prepareSortedLedgerStorageCheckpoint(long numBytesFlushed) throws IOException { + entryLogManager.prepareSortedLedgerStorageCheckpoint(numBytesFlushed); + } + + void prepareEntryMemTableFlush() { + entryLogManager.prepareEntryMemTableFlush(); + } + + boolean commitEntryMemTableFlush() throws IOException { + return entryLogManager.commitEntryMemTableFlush(); + } + + /** + * get EntryLoggerAllocator, Just for tests. + */ + EntryLoggerAllocator getEntryLoggerAllocator() { + return entryLoggerAllocator; + } + + /** + * Remove entry log. + * + * @param entryLogId + * Entry Log File Id + */ + @Override + public boolean removeEntryLog(long entryLogId) { + removeFromChannelsAndClose(entryLogId); + File entryLogFile; + try { + entryLogFile = findFile(entryLogId); + } catch (FileNotFoundException e) { + LOG.error("Trying to delete an entryLog file that could not be found: " + + entryLogId + ".log"); + return true; + } + if (!entryLogFile.delete()) { + LOG.warn("Could not delete entry log file {}", entryLogFile); + return false; + } + return true; + } + + private long getLastLogIdFromFile(File dir) { + long id = readLastLogId(dir); + // read success + if (id > 0) { + return id; + } + // read failed, scan the ledger directories to find biggest log id + File[] logFiles = dir.listFiles(file -> file.getName().endsWith(".log")); + List logs = new ArrayList(); + if (logFiles != null) { + for (File lf : logFiles) { + long logId = fileName2LogId(lf.getName()); + logs.add(logId); + } + } + // no log file found in this directory + if (0 == logs.size()) { + return INVALID_LID; + } + // order the collections + Collections.sort(logs); + return logs.get(logs.size() - 1); + } + + private long getLastLogIdInDir(File dir) { + List currentIds = new ArrayList(); + currentIds.addAll(LedgerDirUtil.logIdsInDirectory(dir)); + currentIds.addAll(LedgerDirUtil.compactedLogIdsInDirectory(dir)); + if (currentIds.isEmpty()) { + return -1; + } + Pair largestGap = LedgerDirUtil.findLargestGap(currentIds); + return largestGap.getLeft() - 1; + } + + /** + * reads id from the "lastId" file in the given directory. + */ + private long readLastLogId(File f) { + FileInputStream fis; + try { + fis = new FileInputStream(new File(f, "lastId")); + } catch (FileNotFoundException e) { + return INVALID_LID; + } + try (BufferedReader br = new BufferedReader(new InputStreamReader(fis, UTF_8))) { + String lastIdString = br.readLine(); + return Long.parseLong(lastIdString, 16); + } catch (IOException | NumberFormatException e) { + return INVALID_LID; + } + } + + void clearCompactingLogId() { + entryLoggerAllocator.clearCompactingLogId(); + } + + /** + * Flushes all rotated log channels. After log channels are flushed, + * move leastUnflushedLogId ptr to current logId. + */ + void checkpoint() throws IOException { + entryLogManager.checkpoint(); + } + + @Override + public void flush() throws IOException { + entryLogManager.flush(); + } + + long addEntry(long ledger, ByteBuffer entry) throws IOException { + return entryLogManager.addEntry(ledger, Unpooled.wrappedBuffer(entry), true); + } + + long addEntry(long ledger, ByteBuf entry, boolean rollLog) throws IOException { + return entryLogManager.addEntry(ledger, entry, rollLog); + } + + @Override + public long addEntry(long ledger, ByteBuf entry) throws IOException { + return entryLogManager.addEntry(ledger, entry, true); + } + + private final FastThreadLocal sizeBuffer = new FastThreadLocal() { + @Override + protected ByteBuf initialValue() throws Exception { + // Max usage is size (4 bytes) + ledgerId (8 bytes) + entryid (8 bytes) + return Unpooled.buffer(4 + 8 + 8); + } + }; + + private long addEntryForCompaction(long ledgerId, ByteBuf entry) throws IOException { + synchronized (compactionLogLock) { + int entrySize = entry.readableBytes() + 4; + if (compactionLogChannel == null) { + createNewCompactionLog(); + } + + ByteBuf sizeBuffer = this.sizeBuffer.get(); + sizeBuffer.clear(); + sizeBuffer.writeInt(entry.readableBytes()); + compactionLogChannel.write(sizeBuffer); + + long pos = compactionLogChannel.position(); + compactionLogChannel.write(entry); + compactionLogChannel.registerWrittenEntry(ledgerId, entrySize); + return (compactionLogChannel.getLogId() << 32L) | pos; + } + } + + private void flushCompactionLog() throws IOException { + synchronized (compactionLogLock) { + if (compactionLogChannel != null) { + compactionLogChannel.appendLedgersMap(); + compactionLogChannel.flushAndForceWrite(false); + LOG.info("Flushed compaction log file {} with logId {}.", + compactionLogChannel.getLogFile(), + compactionLogChannel.getLogId()); + // since this channel is only used for writing, after flushing the channel, + // we had to close the underlying file channel. Otherwise, we might end up + // leaking fds which cause the disk spaces could not be reclaimed. + compactionLogChannel.close(); + } else { + throw new IOException("Failed to flush compaction log which has already been removed."); + } + } + } + + private void createNewCompactionLog() throws IOException { + synchronized (compactionLogLock) { + if (compactionLogChannel == null) { + compactionLogChannel = entryLogManager.createNewLogForCompaction(); + } + } + } + + /** + * Remove the current compaction log, usually invoked when compaction failed and + * we need to do some clean up to remove the compaction log file. + */ + private void removeCurCompactionLog() { + synchronized (compactionLogLock) { + if (compactionLogChannel != null) { + if (compactionLogChannel.getLogFile().exists() && !compactionLogChannel.getLogFile().delete()) { + LOG.warn("Could not delete compaction log file {}", compactionLogChannel.getLogFile()); + } + + try { + compactionLogChannel.close(); + } catch (IOException e) { + LOG.error("Failed to close file channel for compaction log {}", compactionLogChannel.getLogId(), + e); + } + compactionLogChannel = null; + } + } + } + + static long logIdForOffset(long offset) { + return offset >> 32L; + } + + + static long posForOffset(long location) { + return location & 0xffffffffL; + } + + + /** + * Exception type for representing lookup errors. Useful for disambiguating different error + * conditions for reporting purposes. + */ + static class EntryLookupException extends Exception { + EntryLookupException(String message) { + super(message); + } + + /** + * Represents case where log file is missing. + */ + static class MissingLogFileException extends EntryLookupException { + MissingLogFileException(long ledgerId, long entryId, long entryLogId, long pos) { + super(String.format("Missing entryLog %d for ledgerId %d, entry %d at offset %d", + entryLogId, + ledgerId, + entryId, + pos)); + } + } + + /** + * Represents case where entry log is present, but does not contain the specified entry. + */ + static class MissingEntryException extends EntryLookupException { + MissingEntryException(long ledgerId, long entryId, long entryLogId, long pos) { + super(String.format("pos %d (entry %d for ledgerId %d) past end of entryLog %d", + pos, + entryId, + ledgerId, + entryLogId)); + } + } + + /** + * Represents case where log is present, but encoded entry length header is invalid. + */ + static class InvalidEntryLengthException extends EntryLookupException { + InvalidEntryLengthException(long ledgerId, long entryId, long entryLogId, long pos) { + super(String.format("Invalid entry length at pos %d (entry %d for ledgerId %d) for entryLog %d", + pos, + entryId, + ledgerId, + entryLogId)); + } + } + + /** + * Represents case where the entry at pos is wrong. + */ + static class WrongEntryException extends EntryLookupException { + WrongEntryException(long foundEntryId, long foundLedgerId, long ledgerId, + long entryId, long entryLogId, long pos) { + super(String.format( + "Found entry %d, ledger %d at pos %d entryLog %d, should have found entry %d for ledgerId %d", + foundEntryId, + foundLedgerId, + pos, + entryLogId, + entryId, + ledgerId)); + } + } + } + + private BufferedReadChannel getFCForEntryInternal( + long ledgerId, long entryId, long entryLogId, long pos) + throws EntryLookupException, IOException { + try { + return getChannelForLogId(entryLogId); + } catch (FileNotFoundException e) { + throw new EntryLookupException.MissingLogFileException(ledgerId, entryId, entryLogId, pos); + } + } + + private ByteBuf readEntrySize(long ledgerId, long entryId, long entryLogId, long pos, BufferedReadChannel fc) + throws EntryLookupException, IOException { + ByteBuf sizeBuff = sizeBuffer.get(); + sizeBuff.clear(); + + long entrySizePos = pos - 4; // we want to get the entrySize as well as the ledgerId and entryId + + try { + if (readFromLogChannel(entryLogId, fc, sizeBuff, entrySizePos) != sizeBuff.capacity()) { + throw new EntryLookupException.MissingEntryException(ledgerId, entryId, entryLogId, entrySizePos); + } + } catch (BufferedChannelBase.BufferedChannelClosedException | AsynchronousCloseException e) { + throw new EntryLookupException.MissingLogFileException(ledgerId, entryId, entryLogId, entrySizePos); + } + return sizeBuff; + } + + void checkEntry(long ledgerId, long entryId, long location) throws EntryLookupException, IOException { + long entryLogId = logIdForOffset(location); + long pos = posForOffset(location); + BufferedReadChannel fc = getFCForEntryInternal(ledgerId, entryId, entryLogId, pos); + ByteBuf sizeBuf = readEntrySize(ledgerId, entryId, entryLogId, pos, fc); + validateEntry(ledgerId, entryId, entryLogId, pos, sizeBuf); + } + + private void validateEntry(long ledgerId, long entryId, long entryLogId, long pos, ByteBuf sizeBuff) + throws IOException, EntryLookupException { + int entrySize = sizeBuff.readInt(); + + // entrySize does not include the ledgerId + if (entrySize > maxSaneEntrySize) { + LOG.warn("Sanity check failed for entry size of " + entrySize + " at location " + pos + " in " + + entryLogId); + } + if (entrySize < MIN_SANE_ENTRY_SIZE) { + LOG.error("Read invalid entry length {}", entrySize); + throw new EntryLookupException.InvalidEntryLengthException(ledgerId, entryId, entryLogId, pos); + } + + long thisLedgerId = sizeBuff.getLong(4); + long thisEntryId = sizeBuff.getLong(12); + if (thisLedgerId != ledgerId || thisEntryId != entryId) { + throw new EntryLookupException.WrongEntryException( + thisEntryId, thisLedgerId, ledgerId, entryId, entryLogId, pos); + } + } + + @Override + public ByteBuf readEntry(long ledgerId, long entryId, long entryLocation) + throws IOException, Bookie.NoEntryException { + return internalReadEntry(ledgerId, entryId, entryLocation, true /* validateEntry */); + } + + @Override + public ByteBuf readEntry(long location) throws IOException, Bookie.NoEntryException { + return internalReadEntry(-1L, -1L, location, false /* validateEntry */); + } + + + private ByteBuf internalReadEntry(long ledgerId, long entryId, long location, boolean validateEntry) + throws IOException, Bookie.NoEntryException { + long entryLogId = logIdForOffset(location); + long pos = posForOffset(location); + + + BufferedReadChannel fc = null; + int entrySize = -1; + try { + fc = getFCForEntryInternal(ledgerId, entryId, entryLogId, pos); + + ByteBuf sizeBuff = readEntrySize(ledgerId, entryId, entryLogId, pos, fc); + entrySize = sizeBuff.getInt(0); + if (validateEntry) { + validateEntry(ledgerId, entryId, entryLogId, pos, sizeBuff); + } + } catch (EntryLookupException e) { + throw new IOException("Bad entry read from log file id: " + entryLogId, e); + } + + ByteBuf data = allocator.buffer(entrySize, entrySize); + int rc = readFromLogChannel(entryLogId, fc, data, pos); + if (rc != entrySize) { + ReferenceCountUtil.release(data); + throw new IOException("Bad entry read from log file id: " + entryLogId, + new EntryLookupException("Short read for " + ledgerId + "@" + + entryId + " in " + entryLogId + "@" + + pos + "(" + rc + "!=" + entrySize + ")")); + } + data.writerIndex(entrySize); + + return data; + } + + /** + * Read the header of an entry log. + */ + private Header getHeaderForLogId(long entryLogId) throws IOException { + BufferedReadChannel bc = getChannelForLogId(entryLogId); + + // Allocate buffer to read (version, ledgersMapOffset, ledgerCount) + ByteBuf headers = allocator.directBuffer(LOGFILE_HEADER_SIZE); + try { + bc.read(headers, 0); + + // Skip marker string "BKLO" + headers.readInt(); + + int headerVersion = headers.readInt(); + if (headerVersion < HEADER_V0 || headerVersion > HEADER_CURRENT_VERSION) { + LOG.info("Unknown entry log header version for log {}: {}", entryLogId, headerVersion); + } + + long ledgersMapOffset = headers.readLong(); + int ledgersCount = headers.readInt(); + return new Header(headerVersion, ledgersMapOffset, ledgersCount); + } finally { + ReferenceCountUtil.release(headers); + } + } + + @VisibleForTesting + BufferedReadChannel getChannelForLogId(long entryLogId) throws IOException { + BufferedReadChannel fc = getFromChannels(entryLogId); + if (fc != null) { + return fc; + } + File file = findFile(entryLogId); + // get channel is used to open an existing entry log file + // it would be better to open using read mode + FileChannel newFc = new RandomAccessFile(file, "r").getChannel(); + FileChannel oldFc = logid2FileChannel.putIfAbsent(entryLogId, newFc); + if (null != oldFc) { + newFc.close(); + newFc = oldFc; + } + // We set the position of the write buffer of this buffered channel to Long.MAX_VALUE + // so that there are no overlaps with the write buffer while reading + if (entryLogManager instanceof EntryLogManagerForSingleEntryLog) { + fc = new BufferedReadChannel(newFc, conf.getReadBufferBytes(), entryLoggerAllocator.isSealed(entryLogId)); + } else { + fc = new BufferedReadChannel(newFc, conf.getReadBufferBytes(), false); + } + putInReadChannels(entryLogId, fc); + return fc; + } + + /** + * Whether the log file exists or not. + */ + @Override + public boolean logExists(long logId) { + for (File d : ledgerDirsManager.getAllLedgerDirs()) { + File f = new File(d, Long.toHexString(logId) + ".log"); + if (f.exists()) { + return true; + } + } + return false; + } + + /** + * Returns a set with the ids of all the entry log files. + * + * @throws IOException + */ + public Set getEntryLogsSet() throws IOException { + Set entryLogs = Sets.newTreeSet(); + + final FilenameFilter logFileFilter = new FilenameFilter() { + @Override + public boolean accept(File dir, String name) { + return name.endsWith(".log"); + } + }; + + for (File d : ledgerDirsManager.getAllLedgerDirs()) { + File[] files = d.listFiles(logFileFilter); + if (files == null) { + throw new IOException("Failed to get list of files in directory " + d); + } + + for (File f : files) { + Long entryLogId = Long.parseLong(f.getName().split(".log")[0], 16); + entryLogs.add(entryLogId); + } + } + return entryLogs; + } + + private File findFile(long logId) throws FileNotFoundException { + for (File d : ledgerDirsManager.getAllLedgerDirs()) { + File f = new File(d, Long.toHexString(logId) + ".log"); + if (f.exists()) { + return f; + } + } + throw new FileNotFoundException("No file for log " + Long.toHexString(logId)); + } + + /** + * Scan entry log. + * + * @param entryLogId Entry Log Id + * @param scanner Entry Log Scanner + * @throws IOException + */ + @Override + public void scanEntryLog(long entryLogId, EntryLogScanner scanner) throws IOException { + // Buffer where to read the entrySize (4 bytes) and the ledgerId (8 bytes) + ByteBuf headerBuffer = Unpooled.buffer(4 + 8); + BufferedReadChannel bc; + // Get the BufferedChannel for the current entry log file + try { + bc = getChannelForLogId(entryLogId); + } catch (IOException e) { + LOG.warn("Failed to get channel to scan entry log: " + entryLogId + ".log"); + throw e; + } + // Start the read position in the current entry log file to be after + // the header where all of the ledger entries are. + long pos = LOGFILE_HEADER_SIZE; + + // Start with a reasonably sized buffer size + ByteBuf data = allocator.directBuffer(1024 * 1024); + + try { + + // Read through the entry log file and extract the ledger ID's. + while (true) { + // Check if we've finished reading the entry log file. + if (pos >= bc.size()) { + break; + } + if (readFromLogChannel(entryLogId, bc, headerBuffer, pos) != headerBuffer.capacity()) { + LOG.warn("Short read for entry size from entrylog {}", entryLogId); + return; + } + long offset = pos; + + int entrySize = headerBuffer.readInt(); + if (entrySize <= 0) { // hitting padding + pos++; + headerBuffer.clear(); + continue; + } + long ledgerId = headerBuffer.readLong(); + headerBuffer.clear(); + + pos += 4; + if (ledgerId == INVALID_LID || !scanner.accept(ledgerId)) { + // skip this entry + pos += entrySize; + continue; + } + // read the entry + data.clear(); + data.capacity(entrySize); + int rc = readFromLogChannel(entryLogId, bc, data, pos); + if (rc != entrySize) { + LOG.warn("Short read for ledger entry from entryLog {}@{} ({} != {})", + entryLogId, pos, rc, entrySize); + return; + } + // process the entry + scanner.process(ledgerId, offset, data); + + // Advance position to the next entry + pos += entrySize; + } + } finally { + ReferenceCountUtil.release(data); + } + } + + public EntryLogMetadata getEntryLogMetadata(long entryLogId, AbstractLogCompactor.Throttler throttler) + throws IOException { + // First try to extract the EntryLogMetadata from the index, if there's no index then fallback to scanning the + // entry log + try { + return extractEntryLogMetadataFromIndex(entryLogId); + } catch (FileNotFoundException fne) { + LOG.warn("Cannot find entry log file {}.log : {}", Long.toHexString(entryLogId), fne.getMessage()); + throw fne; + } catch (Exception e) { + LOG.info("Failed to get ledgers map index from: {}.log : {}", entryLogId, e.getMessage()); + + // Fall-back to scanning + return extractEntryLogMetadataByScanning(entryLogId, throttler); + } + } + + EntryLogMetadata extractEntryLogMetadataFromIndex(long entryLogId) throws IOException { + Header header = getHeaderForLogId(entryLogId); + + if (header.version < HEADER_V1) { + throw new IOException("Old log file header without ledgers map on entryLogId " + entryLogId); + } + + if (header.ledgersMapOffset == 0L) { + // The index was not stored in the log file (possibly because the bookie crashed before flushing it) + throw new IOException("No ledgers map index found on entryLogId " + entryLogId); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Recovering ledgers maps for log {} at offset: {}", entryLogId, header.ledgersMapOffset); + } + + BufferedReadChannel bc = getChannelForLogId(entryLogId); + + // There can be multiple entries containing the various components of the serialized ledgers map + long offset = header.ledgersMapOffset; + EntryLogMetadata meta = new EntryLogMetadata(entryLogId); + + final int maxMapSize = LEDGERS_MAP_HEADER_SIZE + LEDGERS_MAP_ENTRY_SIZE * LEDGERS_MAP_MAX_BATCH_SIZE; + ByteBuf ledgersMap = allocator.directBuffer(maxMapSize); + + try { + while (offset < bc.size()) { + // Read ledgers map size + sizeBuffer.get().clear(); + bc.read(sizeBuffer.get(), offset); + + int ledgersMapSize = sizeBuffer.get().readInt(); + if (ledgersMapSize <= 0) { + break; + } + // Read the index into a buffer + ledgersMap.clear(); + bc.read(ledgersMap, offset + 4, ledgersMapSize); + + // Discard ledgerId and entryId + long lid = ledgersMap.readLong(); + if (lid != INVALID_LID) { + throw new IOException("Cannot deserialize ledgers map from ledger " + lid); + } + + long entryId = ledgersMap.readLong(); + if (entryId != LEDGERS_MAP_ENTRY_ID) { + throw new IOException("Cannot deserialize ledgers map from entryId " + entryId); + } + + // Read the number of ledgers in the current entry batch + int ledgersCount = ledgersMap.readInt(); + + // Extract all (ledger,size) tuples from buffer + for (int i = 0; i < ledgersCount; i++) { + long ledgerId = ledgersMap.readLong(); + long size = ledgersMap.readLong(); + + if (LOG.isDebugEnabled()) { + LOG.debug("Recovering ledgers maps for log {} -- Found ledger: {} with size: {}", + entryLogId, ledgerId, size); + } + meta.addLedgerSize(ledgerId, size); + } + if (ledgersMap.isReadable()) { + throw new IOException("Invalid entry size when reading ledgers map"); + } + + // Move to next entry, if any + offset += ledgersMapSize + 4; + } + } catch (IndexOutOfBoundsException e) { + throw new IOException(e); + } finally { + ReferenceCountUtil.release(ledgersMap); + } + + if (meta.getLedgersMap().size() != header.ledgersCount) { + throw new IOException("Not all ledgers were found in ledgers map index. expected: " + header.ledgersCount + + " -- found: " + meta.getLedgersMap().size() + " -- entryLogId: " + entryLogId); + } + + return meta; + } + + private EntryLogMetadata extractEntryLogMetadataByScanning(long entryLogId, + AbstractLogCompactor.Throttler throttler) + throws IOException { + final EntryLogMetadata meta = new EntryLogMetadata(entryLogId); + + // Read through the entry log file and extract the entry log meta + scanEntryLog(entryLogId, new EntryLogScanner() { + @Override + public void process(long ledgerId, long offset, ByteBuf entry) throws IOException { + if (throttler != null) { + throttler.acquire(entry.readableBytes()); + } + // add new entry size of a ledger to entry log meta + meta.addLedgerSize(ledgerId, entry.readableBytes() + 4); + } + + @Override + public boolean accept(long ledgerId) { + return ledgerId >= 0; + } + }); + + if (LOG.isDebugEnabled()) { + LOG.debug("Retrieved entry log meta data entryLogId: {}, meta: {}", entryLogId, meta); + } + return meta; + } + + /** + * Shutdown method to gracefully stop entry logger. + */ + @Override + public void close() { + // since logChannel is buffered channel, do flush when shutting down + LOG.info("Stopping EntryLogger"); + try { + flush(); + for (FileChannel fc : logid2FileChannel.values()) { + fc.close(); + } + // clear the mapping, so we don't need to go through the channels again in finally block in normal case. + logid2FileChannel.clear(); + entryLogManager.close(); + synchronized (compactionLogLock) { + if (compactionLogChannel != null) { + compactionLogChannel.close(); + compactionLogChannel = null; + } + } + } catch (IOException ie) { + // we have no idea how to avoid io exception during shutting down, so just ignore it + LOG.error("Error flush entry log during shutting down, which may cause entry log corrupted.", ie); + } finally { + for (FileChannel fc : logid2FileChannel.values()) { + IOUtils.close(LOG, fc); + } + + entryLogManager.forceClose(); + synchronized (compactionLogLock) { + IOUtils.close(LOG, compactionLogChannel); + } + } + // shutdown the pre-allocation thread + entryLoggerAllocator.stop(); + } + + protected LedgerDirsManager getLedgerDirsManager() { + return ledgerDirsManager; + } + + /** + * Convert log filename (hex format with suffix) to logId in long. + */ + static long fileName2LogId(String fileName) { + if (fileName != null && fileName.contains(".")) { + fileName = fileName.split("\\.")[0]; + } + try { + return Long.parseLong(fileName, 16); + } catch (Exception nfe) { + LOG.error("Invalid log file name {} found when trying to convert to logId.", fileName, nfe); + } + return INVALID_LID; + } + + /** + * Convert log Id to hex string. + */ + static String logId2HexString(long logId) { + return Long.toHexString(logId); + } + + /** + * Datastructure which maintains the status of logchannels. When a + * logChannel is created entry of < entryLogId, false > will be made to this + * sortedmap and when logChannel is rotated and flushed then the entry is + * updated to < entryLogId, true > and all the lowest entries with + * < entryLogId, true > status will be removed from the sortedmap. So that way + * we could get least unflushed LogId. + * + */ + static class RecentEntryLogsStatus { + private final SortedMap entryLogsStatusMap; + private long leastUnflushedLogId; + + RecentEntryLogsStatus(long leastUnflushedLogId) { + entryLogsStatusMap = new TreeMap<>(); + this.leastUnflushedLogId = leastUnflushedLogId; + } + + synchronized void createdEntryLog(Long entryLogId) { + entryLogsStatusMap.put(entryLogId, false); + } + + synchronized void flushRotatedEntryLog(Long entryLogId) { + entryLogsStatusMap.replace(entryLogId, true); + while ((!entryLogsStatusMap.isEmpty()) && (entryLogsStatusMap.get(entryLogsStatusMap.firstKey()))) { + long leastFlushedLogId = entryLogsStatusMap.firstKey(); + entryLogsStatusMap.remove(leastFlushedLogId); + leastUnflushedLogId = leastFlushedLogId + 1; + } + } + + synchronized long getLeastUnflushedLogId() { + return leastUnflushedLogId; + } + + synchronized boolean isFlushedLogId(long entryLogId) { + return entryLogsStatusMap.getOrDefault(entryLogId, Boolean.FALSE) || entryLogId < leastUnflushedLogId; + } + } + + @Override + public CompactionEntryLog newCompactionLog(long logToCompact) throws IOException { + createNewCompactionLog(); + + File compactingLogFile = getCurCompactionLogFile(); + long compactionLogId = fileName2LogId(compactingLogFile.getName()); + File compactedLogFile = compactedLogFileFromCompacting(compactingLogFile, logToCompact); + File finalLogFile = new File(compactingLogFile.getParentFile(), + compactingLogFile.getName().substring(0, + compactingLogFile.getName().indexOf(".log") + 4)); + return new EntryLoggerCompactionEntryLog( + compactionLogId, logToCompact, compactingLogFile, compactedLogFile, finalLogFile); + + } + + private class EntryLoggerCompactionEntryLog implements CompactionEntryLog { + private final long compactionLogId; + private final long logIdToCompact; + private final File compactingLogFile; + private final File compactedLogFile; + private final File finalLogFile; + + EntryLoggerCompactionEntryLog(long compactionLogId, long logIdToCompact, + File compactingLogFile, + File compactedLogFile, + File finalLogFile) { + this.compactionLogId = compactionLogId; + this.logIdToCompact = logIdToCompact; + this.compactingLogFile = compactingLogFile; + this.compactedLogFile = compactedLogFile; + this.finalLogFile = finalLogFile; + } + + @Override + public long addEntry(long ledgerId, ByteBuf entry) throws IOException { + return addEntryForCompaction(ledgerId, entry); + } + @Override + public void scan(EntryLogScanner scanner) throws IOException { + scanEntryLog(compactionLogId, scanner); + } + @Override + public void flush() throws IOException { + flushCompactionLog(); + } + @Override + public void abort() { + removeCurCompactionLog(); + if (compactedLogFile.exists()) { + if (!compactedLogFile.delete()) { + LOG.warn("Could not delete file: {}", compactedLogFile); + } + } + } + + @Override + public void markCompacted() throws IOException { + if (compactingLogFile.exists()) { + if (!compactedLogFile.exists()) { + HardLink.createHardLink(compactingLogFile, compactedLogFile); + } + } else { + throw new IOException("Compaction log doesn't exist any more after flush: " + compactingLogFile); + } + removeCurCompactionLog(); + } + + @Override + public void makeAvailable() throws IOException { + if (!finalLogFile.exists()) { + HardLink.createHardLink(compactedLogFile, finalLogFile); + } + } + @Override + public void finalizeAndCleanup() { + if (compactedLogFile.exists()) { + if (!compactedLogFile.delete()) { + LOG.warn("Could not delete file: {}", compactedLogFile); + } + } + if (compactingLogFile.exists()) { + if (!compactingLogFile.delete()) { + LOG.warn("Could not delete file: {}", compactingLogFile); + } + } + } + + @Override + public long getDstLogId() { + return compactionLogId; + } + @Override + public long getSrcLogId() { + return logIdToCompact; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("logId", compactionLogId) + .add("compactedLogId", logIdToCompact) + .add("compactingLogFile", compactingLogFile) + .add("compactedLogFile", compactedLogFile) + .add("finalLogFile", finalLogFile) + .toString(); + } + } + + @Override + public Collection incompleteCompactionLogs() { + List ledgerDirs = ledgerDirsManager.getAllLedgerDirs(); + List compactionLogs = new ArrayList<>(); + + for (File dir : ledgerDirs) { + File[] compactingPhaseFiles = dir.listFiles( + file -> file.getName().endsWith(TransactionalEntryLogCompactor.COMPACTING_SUFFIX)); + if (compactingPhaseFiles != null) { + for (File file : compactingPhaseFiles) { + if (file.delete()) { + LOG.info("Deleted failed compaction file {}", file); + } + } + } + File[] compactedPhaseFiles = dir.listFiles( + file -> file.getName().endsWith(TransactionalEntryLogCompactor.COMPACTED_SUFFIX)); + if (compactedPhaseFiles != null) { + for (File compactedFile : compactedPhaseFiles) { + LOG.info("Found compacted log file {} has partially flushed index, recovering index.", + compactedFile); + + File compactingLogFile = new File(compactedFile.getParentFile(), "doesntexist"); + long compactionLogId = -1L; + long compactedLogId = -1L; + String[] parts = compactedFile.getName().split(Pattern.quote(".")); + boolean valid = true; + if (parts.length != 4) { + valid = false; + } else { + try { + compactionLogId = Long.parseLong(parts[0], 16); + compactedLogId = Long.parseLong(parts[2], 16); + } catch (NumberFormatException nfe) { + valid = false; + } + } + + if (!valid) { + LOG.info("Invalid compacted file found ({}), deleting", compactedFile); + if (!compactedFile.delete()) { + LOG.warn("Couldn't delete invalid compacted file ({})", compactedFile); + } + continue; + } + File finalLogFile = new File(compactedFile.getParentFile(), compactionLogId + ".log"); + + compactionLogs.add( + new EntryLoggerCompactionEntryLog(compactionLogId, compactedLogId, + compactingLogFile, compactedFile, finalLogFile)); + } + } + } + return compactionLogs; + } + + private static File compactedLogFileFromCompacting(File compactionLogFile, long compactingLogId) { + File dir = compactionLogFile.getParentFile(); + String filename = compactionLogFile.getName(); + String newSuffix = ".log." + DefaultEntryLogger.logId2HexString(compactingLogId) + + TransactionalEntryLogCompactor.COMPACTED_SUFFIX; + String hardLinkFilename = filename.replace(TransactionalEntryLogCompactor.COMPACTING_SUFFIX, newSuffix); + return new File(dir, hardLinkFilename); + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/DefaultFileChannel.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/DefaultFileChannel.java new file mode 100644 index 00000000000..3c581b489b2 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/DefaultFileChannel.java @@ -0,0 +1,79 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.bookkeeper.bookie; + +import java.io.File; +import java.io.FileDescriptor; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.channels.FileChannel; +import org.apache.bookkeeper.conf.ServerConfiguration; + +/** + * Default FileChannel for bookie to read and write. + * + */ +public class DefaultFileChannel implements BookieFileChannel { + private final File file; + private RandomAccessFile randomAccessFile; + private final ServerConfiguration configuration; + + DefaultFileChannel(File file, ServerConfiguration serverConfiguration) throws IOException { + this.file = file; + this.configuration = serverConfiguration; + } + + @Override + public FileChannel getFileChannel() throws FileNotFoundException { + synchronized (this) { + if (randomAccessFile == null) { + randomAccessFile = new RandomAccessFile(file, "rw"); + } + return randomAccessFile.getChannel(); + } + } + + @Override + public boolean fileExists(File file) { + return file.exists(); + } + + @Override + public FileDescriptor getFD() throws IOException { + synchronized (this) { + if (randomAccessFile == null) { + throw new IOException("randomAccessFile is null, please initialize it by calling getFileChannel"); + } + return randomAccessFile.getFD(); + } + } + + @Override + public void close() throws IOException { + synchronized (this) { + if (randomAccessFile != null) { + randomAccessFile.close(); + } + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/DefaultFileChannelProvider.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/DefaultFileChannelProvider.java new file mode 100644 index 00000000000..e9444a77df0 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/DefaultFileChannelProvider.java @@ -0,0 +1,46 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.bookkeeper.bookie; + +import java.io.File; +import java.io.IOException; +import org.apache.bookkeeper.conf.ServerConfiguration; + +/** + * A wrapper of FileChannel. + */ +public class DefaultFileChannelProvider implements FileChannelProvider{ + @Override + public BookieFileChannel open(File file, ServerConfiguration configuration) throws IOException { + return new DefaultFileChannel(file, configuration); + } + + @Override + public void close(BookieFileChannel bookieFileChannel) throws IOException { + bookieFileChannel.close(); + } + + @Override + public void close() { + + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryKeyValue.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryKeyValue.java index 43e35fbbaa4..a0a31db19e5 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryKeyValue.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryKeyValue.java @@ -21,7 +21,6 @@ import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; - import java.nio.ByteBuffer; /** @@ -109,6 +108,7 @@ int writeToByteBuffer(ByteBuffer dst) { /** * String representation. */ + @Override public String toString() { return ledgerId + ":" + entryId; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLocation.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLocation.java index be5eb7fbee8..f43521d9e95 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLocation.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLocation.java @@ -46,4 +46,14 @@ public long getEntry() { public long getLocation() { return location; } + + @Override + public String toString() { + return new StringBuilder().append("EntryLocation{") + .append("ledger=").append(ledger) + .append(",entry=").append(entry) + .append(",locationLog=").append(location >> 32 & 0xFFFFFFFF) + .append(",locationOffset=").append((int) (location & 0xFFFFFFFF)) + .append("}").toString(); + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogCompactor.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogCompactor.java index a5e2c3fc210..81cd463761a 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogCompactor.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogCompactor.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -22,11 +22,12 @@ package org.apache.bookkeeper.bookie; import io.netty.buffer.ByteBuf; - import java.io.IOException; import java.util.ArrayList; import java.util.List; - +import org.apache.bookkeeper.bookie.storage.EntryLogScanner; +import org.apache.bookkeeper.bookie.storage.EntryLogger; +import org.apache.bookkeeper.conf.ServerConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,11 +44,15 @@ public class EntryLogCompactor extends AbstractLogCompactor { final CompactableLedgerStorage ledgerStorage; private final int maxOutstandingRequests; - public EntryLogCompactor(GarbageCollectorThread gcThread) { - super(gcThread); + public EntryLogCompactor( + ServerConfiguration conf, + EntryLogger entryLogger, + CompactableLedgerStorage ledgerStorage, + LogRemovalListener logRemover) { + super(conf, logRemover); this.maxOutstandingRequests = conf.getCompactionMaxOutstandingRequests(); - this.entryLogger = gcThread.getEntryLogger(); - this.ledgerStorage = gcThread.getLedgerStorage(); + this.entryLogger = entryLogger; + this.ledgerStorage = ledgerStorage; } @Override @@ -57,7 +62,7 @@ public boolean compact(EntryLogMetadata entryLogMeta) { scannerFactory.newScanner(entryLogMeta)); scannerFactory.flush(); LOG.info("Removing entry log {} after compaction", entryLogMeta.getEntryLogId()); - gcThread.removeEntryLog(entryLogMeta.getEntryLogId()); + logRemovalListener.removeEntryLog(entryLogMeta.getEntryLogId()); } catch (LedgerDirsManager.NoWritableLedgerDirException nwlde) { LOG.warn("No writable ledger directory available, aborting compaction", nwlde); return false; @@ -77,9 +82,9 @@ public boolean compact(EntryLogMetadata entryLogMeta) { class CompactionScannerFactory { List offsets = new ArrayList(); - EntryLogger.EntryLogScanner newScanner(final EntryLogMetadata meta) { + EntryLogScanner newScanner(final EntryLogMetadata meta) { - return new EntryLogger.EntryLogScanner() { + return new EntryLogScanner() { @Override public boolean accept(long ledgerId) { return meta.containsLedger(ledgerId); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogManager.java index 340e9a18b60..7364fb08e56 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogManager.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogManager.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -25,8 +25,7 @@ import java.io.File; import java.io.IOException; import java.util.List; - -import org.apache.bookkeeper.bookie.EntryLogger.BufferedLogChannel; +import org.apache.bookkeeper.bookie.DefaultEntryLogger.BufferedLogChannel; interface EntryLogManager { diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogManagerBase.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogManagerBase.java index 701fb7b2e8e..e997906c23c 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogManagerBase.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogManagerBase.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,6 +21,9 @@ package org.apache.bookkeeper.bookie; +import static org.apache.bookkeeper.bookie.DefaultEntryLogger.UNASSIGNED_LEDGERID; + +import com.google.common.annotations.VisibleForTesting; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; import io.netty.util.concurrent.FastThreadLocal; @@ -28,8 +31,8 @@ import java.io.IOException; import java.util.List; import lombok.extern.slf4j.Slf4j; -import org.apache.bookkeeper.bookie.EntryLogger.BufferedLogChannel; -import org.apache.bookkeeper.bookie.EntryLogger.EntryLogListener; +import org.apache.bookkeeper.bookie.DefaultEntryLogger.BufferedLogChannel; +import org.apache.bookkeeper.bookie.DefaultEntryLogger.EntryLogListener; import org.apache.bookkeeper.bookie.LedgerDirsManager.NoWritableLedgerDirException; import org.apache.bookkeeper.conf.ServerConfiguration; @@ -38,14 +41,14 @@ abstract class EntryLogManagerBase implements EntryLogManager { volatile List rotatedLogChannels; final EntryLoggerAllocator entryLoggerAllocator; final LedgerDirsManager ledgerDirsManager; - private final List listeners; + private final List listeners; /** * The maximum size of a entry logger file. */ final long logSizeLimit; EntryLogManagerBase(ServerConfiguration conf, LedgerDirsManager ledgerDirsManager, - EntryLoggerAllocator entryLoggerAllocator, List listeners) { + EntryLoggerAllocator entryLoggerAllocator, List listeners) { this.ledgerDirsManager = ledgerDirsManager; this.entryLoggerAllocator = entryLoggerAllocator; this.listeners = listeners; @@ -123,7 +126,9 @@ public void flush() throws IOException { void flushLogChannel(BufferedLogChannel logChannel, boolean forceMetadata) throws IOException { if (logChannel != null) { logChannel.flushAndForceWrite(forceMetadata); - log.debug("Flush and sync current entry logger {}", logChannel.getLogId()); + if (log.isDebugEnabled()) { + log.debug("Flush and sync current entry logger {}", logChannel.getLogId()); + } } } @@ -131,7 +136,18 @@ void flushLogChannel(BufferedLogChannel logChannel, boolean forceMetadata) throw * Creates a new log file. This method should be guarded by a lock, * so callers of this method should be in right scope of the lock. */ + @VisibleForTesting void createNewLog(long ledgerId) throws IOException { + createNewLog(ledgerId, ""); + } + + void createNewLog(long ledgerId, String reason) throws IOException { + if (ledgerId != UNASSIGNED_LEDGERID) { + log.info("Creating a new entry log file for ledger '{}' {}", ledgerId, reason); + } else { + log.info("Creating a new entry log file {}", reason); + } + BufferedLogChannel logChannel = getCurrentLogForLedger(ledgerId); // first tried to create a new log channel. add current log channel to ToFlush list only when // there is a new log channel. it would prevent that a log channel is referenced by both @@ -145,6 +161,7 @@ void createNewLog(long ledgerId) throws IOException { logChannel.appendLedgersMap(); BufferedLogChannel newLogChannel = entryLoggerAllocator.createNewLog(selectDirForNextEntryLog()); + entryLoggerAllocator.setWritingLogId(newLogChannel.getLogId()); setCurrentLogForLedgerAndAddToRotate(ledgerId, newLogChannel); log.info("Flushing entry logger {} back to filesystem, pending for syncing entry loggers : {}.", logChannel.getLogId(), rotatedLogChannels); @@ -152,8 +169,9 @@ void createNewLog(long ledgerId) throws IOException { listener.onRotateEntryLog(); } } else { - setCurrentLogForLedgerAndAddToRotate(ledgerId, - entryLoggerAllocator.createNewLog(selectDirForNextEntryLog())); + BufferedLogChannel newLogChannel = entryLoggerAllocator.createNewLog(selectDirForNextEntryLog()); + entryLoggerAllocator.setWritingLogId(newLogChannel.getLogId()); + setCurrentLogForLedgerAndAddToRotate(ledgerId, newLogChannel); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogManagerForEntryLogPerLedger.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogManagerForEntryLogPerLedger.java index 39ed60cea57..ca6224ea83c 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogManagerForEntryLogPerLedger.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogManagerForEntryLogPerLedger.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,6 +21,8 @@ package org.apache.bookkeeper.bookie; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.CATEGORY_SERVER; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.ENTRYLOGGER_SCOPE; import static org.apache.bookkeeper.bookie.BookKeeperServerStats.ENTRYLOGS_PER_LEDGER; import static org.apache.bookkeeper.bookie.BookKeeperServerStats.NUM_LEDGERS_HAVING_MULTIPLE_ENTRYLOGS; import static org.apache.bookkeeper.bookie.BookKeeperServerStats.NUM_OF_WRITE_ACTIVE_LEDGERS; @@ -34,9 +36,7 @@ import com.google.common.cache.RemovalCause; import com.google.common.cache.RemovalListener; import com.google.common.cache.RemovalNotification; - import io.netty.buffer.ByteBuf; - import java.io.File; import java.io.IOException; import java.util.HashMap; @@ -52,17 +52,16 @@ import java.util.concurrent.atomic.AtomicReferenceArray; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; - import lombok.extern.slf4j.Slf4j; - -import org.apache.bookkeeper.bookie.EntryLogger.BufferedLogChannel; +import org.apache.bookkeeper.bookie.DefaultEntryLogger.BufferedLogChannel; import org.apache.bookkeeper.bookie.LedgerDirsManager.LedgerDirsListener; +import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.stats.Counter; import org.apache.bookkeeper.stats.OpStatsLogger; import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.annotations.StatsDoc; import org.apache.bookkeeper.util.IOUtils; -import org.apache.bookkeeper.util.MathUtils; import org.apache.bookkeeper.util.collections.ConcurrentLongHashMap; import org.apache.commons.lang3.mutable.MutableInt; @@ -115,11 +114,37 @@ private void setEntryLogWithDirInfo(BufferedLogChannelWithDirInfo entryLogWithDi } } + @StatsDoc( + name = ENTRYLOGGER_SCOPE, + category = CATEGORY_SERVER, + help = "EntryLogger related stats" + ) class EntryLogsPerLedgerCounter { + + @StatsDoc( + name = NUM_OF_WRITE_ACTIVE_LEDGERS, + help = "Number of write active ledgers" + ) private final Counter numOfWriteActiveLedgers; + @StatsDoc( + name = NUM_OF_WRITE_LEDGERS_REMOVED_CACHE_EXPIRY, + help = "Number of write ledgers removed after cache expiry" + ) private final Counter numOfWriteLedgersRemovedCacheExpiry; + @StatsDoc( + name = NUM_OF_WRITE_LEDGERS_REMOVED_CACHE_MAXSIZE, + help = "Number of write ledgers removed due to reach max cache size" + ) private final Counter numOfWriteLedgersRemovedCacheMaxSize; + @StatsDoc( + name = NUM_LEDGERS_HAVING_MULTIPLE_ENTRYLOGS, + help = "Number of ledgers having multiple entry logs" + ) private final Counter numLedgersHavingMultipleEntrylogs; + @StatsDoc( + name = ENTRYLOGS_PER_LEDGER, + help = "The distribution of number of entry logs per ledger" + ) private final OpStatsLogger entryLogsPerLedger; /* * ledgerIdEntryLogCounterCacheMap cache will be used to store count of @@ -127,7 +152,7 @@ class EntryLogsPerLedgerCounter { * 'expiry duration' and 'maximumSize' will be set to * entryLogPerLedgerCounterLimitsMultFactor times of * 'ledgerIdEntryLogMap' cache limits. This is needed because entries - * from 'ledgerIdEntryLogMap' can be removed from cache becasue of + * from 'ledgerIdEntryLogMap' can be removed from cache because of * accesstime expiry or cache size limits, but to know the actual number * of entrylogs per ledger, we should maintain this count for long time. */ @@ -230,7 +255,7 @@ ConcurrentMap getCounterMap() { */ private final ConcurrentLongHashMap replicaOfCurrentLogChannels; private final CacheLoader entryLogAndLockTupleCacheLoader; - private final EntryLogger.RecentEntryLogsStatus recentlyCreatedEntryLogsStatus; + private final DefaultEntryLogger.RecentEntryLogsStatus recentlyCreatedEntryLogsStatus; private final int entrylogMapAccessExpiryTimeInSeconds; private final int maximumNumberOfActiveEntryLogs; private final int entryLogPerLedgerCounterLimitsMultFactor; @@ -240,13 +265,15 @@ ConcurrentMap getCounterMap() { final EntryLogsPerLedgerCounter entryLogsPerLedgerCounter; EntryLogManagerForEntryLogPerLedger(ServerConfiguration conf, LedgerDirsManager ledgerDirsManager, - EntryLoggerAllocator entryLoggerAllocator, List listeners, - EntryLogger.RecentEntryLogsStatus recentlyCreatedEntryLogsStatus, StatsLogger statsLogger) - throws IOException { + EntryLoggerAllocator entryLoggerAllocator, + List listeners, + DefaultEntryLogger.RecentEntryLogsStatus recentlyCreatedEntryLogsStatus, + StatsLogger statsLogger) throws IOException { super(conf, ledgerDirsManager, entryLoggerAllocator, listeners); this.recentlyCreatedEntryLogsStatus = recentlyCreatedEntryLogsStatus; this.rotatedLogChannels = new CopyOnWriteArrayList(); - this.replicaOfCurrentLogChannels = new ConcurrentLongHashMap(); + this.replicaOfCurrentLogChannels = + ConcurrentLongHashMap.newBuilder().build(); this.entrylogMapAccessExpiryTimeInSeconds = conf.getEntrylogMapAccessExpiryTimeInSeconds(); this.maximumNumberOfActiveEntryLogs = conf.getMaximumNumberOfActiveEntryLogs(); this.entryLogPerLedgerCounterLimitsMultFactor = conf.getEntryLogPerLedgerCounterLimitsMultFactor(); @@ -307,8 +334,10 @@ public void onRemoval( */ private void onCacheEntryRemoval(RemovalNotification removedLedgerEntryLogMapEntry) { Long ledgerId = removedLedgerEntryLogMapEntry.getKey(); - log.debug("LedgerId {} is being evicted from the cache map because of {}", ledgerId, - removedLedgerEntryLogMapEntry.getCause()); + if (log.isDebugEnabled()) { + log.debug("LedgerId {} is being evicted from the cache map because of {}", ledgerId, + removedLedgerEntryLogMapEntry.getCause()); + } EntryLogAndLockTuple entryLogAndLockTuple = removedLedgerEntryLogMapEntry.getValue(); if (entryLogAndLockTuple == null) { log.error("entryLogAndLockTuple is not supposed to be null in entry removal listener for ledger : {}", @@ -484,7 +513,7 @@ public boolean commitEntryMemTableFlush() throws IOException { try { if (reachEntryLogLimit(currentLog, 0L)) { log.info("Rolling entry logger since it reached size limitation for ledger: {}", ledgerId); - createNewLog(ledgerId); + createNewLog(ledgerId, "after entry log file is rotated"); } } finally { lock.unlock(); @@ -640,7 +669,9 @@ BufferedLogChannel getCurrentLogForLedgerForAddEntry(long ledgerId, int entrySiz if (logChannel != null) { logChannel.flushAndForceWriteIfRegularFlush(false); } - createNewLog(ledgerId); + createNewLog(ledgerId, + ": diskFull = " + diskFull + ", allDisksFull = " + allDisksFull + + ", reachEntryLogLimit = " + reachEntryLogLimit + ", logChannel = " + logChannel); } return getCurrentLogForLedger(ledgerId); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogManagerForSingleEntryLog.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogManagerForSingleEntryLog.java index 3e552d0fca9..b7845118680 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogManagerForSingleEntryLog.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogManagerForSingleEntryLog.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,8 +21,8 @@ package org.apache.bookkeeper.bookie; -import static org.apache.bookkeeper.bookie.EntryLogger.INVALID_LID; -import static org.apache.bookkeeper.bookie.EntryLogger.UNASSIGNED_LEDGERID; +import static org.apache.bookkeeper.bookie.DefaultEntryLogger.INVALID_LID; +import static org.apache.bookkeeper.bookie.DefaultEntryLogger.UNASSIGNED_LEDGERID; import io.netty.buffer.ByteBuf; import java.io.File; @@ -33,7 +33,7 @@ import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; import lombok.extern.slf4j.Slf4j; -import org.apache.bookkeeper.bookie.EntryLogger.BufferedLogChannel; +import org.apache.bookkeeper.bookie.DefaultEntryLogger.BufferedLogChannel; import org.apache.bookkeeper.bookie.LedgerDirsManager.LedgerDirsListener; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.util.IOUtils; @@ -44,11 +44,11 @@ class EntryLogManagerForSingleEntryLog extends EntryLogManagerBase { private volatile BufferedLogChannel activeLogChannel; private long logIdBeforeFlush = INVALID_LID; private final AtomicBoolean shouldCreateNewEntryLog = new AtomicBoolean(false); - private EntryLogger.RecentEntryLogsStatus recentlyCreatedEntryLogsStatus; + private final DefaultEntryLogger.RecentEntryLogsStatus recentlyCreatedEntryLogsStatus; EntryLogManagerForSingleEntryLog(ServerConfiguration conf, LedgerDirsManager ledgerDirsManager, - EntryLoggerAllocator entryLoggerAllocator, List listeners, - EntryLogger.RecentEntryLogsStatus recentlyCreatedEntryLogsStatus) { + EntryLoggerAllocator entryLoggerAllocator, List listeners, + DefaultEntryLogger.RecentEntryLogsStatus recentlyCreatedEntryLogsStatus) { super(conf, ledgerDirsManager, entryLoggerAllocator, listeners); this.rotatedLogChannels = new LinkedList(); this.recentlyCreatedEntryLogsStatus = recentlyCreatedEntryLogsStatus; @@ -92,7 +92,8 @@ synchronized BufferedLogChannel getCurrentLogForLedgerForAddEntry(long ledgerId, boolean rollLog) throws IOException { if (null == activeLogChannel) { // log channel can be null because the file is deferred to be created - createNewLog(UNASSIGNED_LEDGERID); + createNewLog(UNASSIGNED_LEDGERID, "because current active log channel has not initialized yet"); + return activeLogChannel; } boolean reachEntryLogLimit = rollLog ? reachEntryLogLimit(activeLogChannel, entrySize) @@ -103,7 +104,8 @@ synchronized BufferedLogChannel getCurrentLogForLedgerForAddEntry(long ledgerId, if (activeLogChannel != null) { activeLogChannel.flushAndForceWriteIfRegularFlush(false); } - createNewLog(UNASSIGNED_LEDGERID); + createNewLog(UNASSIGNED_LEDGERID, + ": createNewLog = " + createNewLog + ", reachEntryLogLimit = " + reachEntryLogLimit); // Reset the flag if (createNewLog) { shouldCreateNewEntryLog.set(false); @@ -156,7 +158,7 @@ public long getCurrentLogId() { if (currentActiveLogChannel != null) { return currentActiveLogChannel.getLogId(); } else { - return EntryLogger.UNINITIALIZED_LOG_ID; + return DefaultEntryLogger.UNINITIALIZED_LOG_ID; } } @@ -238,7 +240,9 @@ public boolean commitEntryMemTableFlush() throws IOException { */ if (reachEntryLogLimit(activeLogChannel, 0L) || logIdAfterFlush != logIdBeforeFlush) { log.info("Rolling entry logger since it reached size limitation"); - createNewLog(UNASSIGNED_LEDGERID); + createNewLog(UNASSIGNED_LEDGERID, + "due to reaching log limit after flushing memtable : logIdBeforeFlush = " + + logIdBeforeFlush + ", logIdAfterFlush = " + logIdAfterFlush); return true; } return false; @@ -251,12 +255,16 @@ public void prepareSortedLedgerStorageCheckpoint(long numBytesFlushed) throws IO // it means bytes might live at current active entry log, we need // roll current entry log and then issue checkpoint to underlying // interleaved ledger storage. - createNewLog(UNASSIGNED_LEDGERID); + createNewLog(UNASSIGNED_LEDGERID, + "due to preparing checkpoint : numBytesFlushed = " + numBytesFlushed); } } @Override - public EntryLogger.BufferedLogChannel createNewLogForCompaction() throws IOException { - return entryLoggerAllocator.createNewLogForCompaction(selectDirForNextEntryLog()); + public DefaultEntryLogger.BufferedLogChannel createNewLogForCompaction() throws IOException { + BufferedLogChannel newLogForCompaction = entryLoggerAllocator.createNewLogForCompaction( + selectDirForNextEntryLog()); + entryLoggerAllocator.setWritingCompactingLogId(newLogForCompaction.getLogId()); + return newLogForCompaction; } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogMetadata.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogMetadata.java index ad6d87d24c2..1fdb22be545 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogMetadata.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogMetadata.java @@ -21,24 +21,37 @@ package org.apache.bookkeeper.bookie; +import io.netty.util.Recycler; +import io.netty.util.Recycler.Handle; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; import java.util.function.LongPredicate; - import org.apache.bookkeeper.util.collections.ConcurrentLongLongHashMap; /** - * Records the total size, remaining size and the set of ledgers that comprise a entry log. + * Records the total size, remaining size and the set of ledgers that comprise a + * entry log. */ public class EntryLogMetadata { - private final long entryLogId; - private long totalSize; - private long remainingSize; - private final ConcurrentLongLongHashMap ledgersMap; + protected long entryLogId; + protected long totalSize; + protected long remainingSize; + protected final ConcurrentLongLongHashMap ledgersMap; + private static final short DEFAULT_SERIALIZATION_VERSION = 0; + + protected EntryLogMetadata() { + ledgersMap = ConcurrentLongLongHashMap.newBuilder() + .expectedItems(256) + .concurrencyLevel(1) + .build(); + } public EntryLogMetadata(long logId) { + this(); this.entryLogId = logId; totalSize = remainingSize = 0; - ledgersMap = new ConcurrentLongLongHashMap(256, 1); } public void addLedgerSize(long ledgerId, long size) { @@ -74,7 +87,7 @@ public long getRemainingSize() { return remainingSize; } - ConcurrentLongLongHashMap getLedgersMap() { + public ConcurrentLongLongHashMap getLedgersMap() { return ledgersMap; } @@ -91,9 +104,116 @@ public void removeLedgerIf(LongPredicate predicate) { @Override public String toString() { StringBuilder sb = new StringBuilder(); - sb.append("{ totalSize = ").append(totalSize).append(", remainingSize = ").append(remainingSize) - .append(", ledgersMap = ").append(ledgersMap).append(" }"); + sb.append("{totalSize = ").append(totalSize).append(", remainingSize = ").append(remainingSize) + .append(", ledgersMap = ").append(ledgersMap.toString()).append("}"); return sb.toString(); } + /** + * Serializes {@link EntryLogMetadata} and writes to + * {@link DataOutputStream}. + *
+     * schema:
+     * 2-bytes: schema-version
+     * 8-bytes: entrylog-entryLogId
+     * 8-bytes: entrylog-totalSize
+     * 8-bytes: entrylog-remainingSize
+     * 8-bytes: total number of ledgers
+     * ledgers-map
+     * [repeat]: (8-bytes::ledgerId, 8-bytes::size-of-ledger)
+     * 
+ * @param out + * @throws IOException + * throws if it couldn't serialize metadata-fields + * @throws IllegalStateException + * throws if it couldn't serialize ledger-map + */ + public void serialize(DataOutputStream out) throws IOException, IllegalStateException { + out.writeShort(DEFAULT_SERIALIZATION_VERSION); + out.writeLong(entryLogId); + out.writeLong(totalSize); + out.writeLong(remainingSize); + out.writeLong(ledgersMap.size()); + ledgersMap.forEach((ledgerId, size) -> { + try { + out.writeLong(ledgerId); + out.writeLong(size); + } catch (IOException e) { + throw new IllegalStateException("Failed to serialize entryLogMetadata", e); + } + }); + out.flush(); + } + + /** + * Deserializes {@link EntryLogMetadataRecyclable} from given {@link DataInputStream}. + * Caller has to recycle returned {@link EntryLogMetadataRecyclable}. + * @param in + * @return + * @throws IOException + */ + public static EntryLogMetadataRecyclable deserialize(DataInputStream in) throws IOException { + EntryLogMetadataRecyclable metadata = EntryLogMetadataRecyclable.get(); + try { + short serVersion = in.readShort(); + if ((serVersion != DEFAULT_SERIALIZATION_VERSION)) { + throw new IOException(String.format("%s. expected =%d, found=%d", "serialization version doesn't match", + DEFAULT_SERIALIZATION_VERSION, serVersion)); + } + metadata.entryLogId = in.readLong(); + metadata.totalSize = in.readLong(); + metadata.remainingSize = in.readLong(); + long ledgersMapSize = in.readLong(); + for (int i = 0; i < ledgersMapSize; i++) { + long ledgerId = in.readLong(); + long entryId = in.readLong(); + metadata.ledgersMap.put(ledgerId, entryId); + } + return metadata; + } catch (IOException e) { + metadata.recycle(); + throw e; + } catch (Exception e) { + metadata.recycle(); + throw new IOException(e); + } + } + + public void clear() { + entryLogId = -1L; + totalSize = -1L; + remainingSize = -1L; + ledgersMap.clear(); + } + + /** + * Recyclable {@link EntryLogMetadata} class. + * + */ + public static class EntryLogMetadataRecyclable extends EntryLogMetadata { + + private final Handle recyclerHandle; + + private EntryLogMetadataRecyclable(Handle recyclerHandle) { + this.recyclerHandle = recyclerHandle; + } + + private static final Recycler RECYCLER = + new Recycler() { + protected EntryLogMetadataRecyclable newObject(Recycler.Handle handle) { + return new EntryLogMetadataRecyclable(handle); + } + }; + + public static EntryLogMetadataRecyclable get() { + EntryLogMetadataRecyclable metadata = RECYCLER.get(); + return metadata; + } + + public void recycle() { + clear(); + recyclerHandle.recycle(this); + } + + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogMetadataMap.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogMetadataMap.java new file mode 100644 index 00000000000..5b6915cf384 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogMetadataMap.java @@ -0,0 +1,100 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.bookkeeper.bookie; + +import java.io.Closeable; +import java.util.function.BiConsumer; +import org.apache.bookkeeper.bookie.BookieException.EntryLogMetadataMapException; + +/** + * Map-store to store Entrylogger metadata. + */ +public interface EntryLogMetadataMap extends Closeable { + + /** + * Checks if record with entryLogId exists into the map. + * + * @param entryLogId + * @return + * @throws EntryLogMetadataMapException + */ + boolean containsKey(long entryLogId) throws EntryLogMetadataMapException; + + /** + * Adds entryLogMetadata record into the map. + * + * @param entryLogId + * @param entryLogMeta + * @throws EntryLogMetadataMapException + */ + void put(long entryLogId, EntryLogMetadata entryLogMeta) throws EntryLogMetadataMapException; + + /** + * Performs the given action for each entry in this map until all entries + * have been processed or the action throws an exception. + * + * @param action + * @throws EntryLogMetadataMapException + */ + void forEach(BiConsumer action) throws EntryLogMetadataMapException; + + /** + * Performs the given action for the key. + * + * @param action + * @throws EntryLogMetadataMapException + */ + void forKey(long entryLogId, BiConsumer action) throws EntryLogMetadataMapException; + + /** + * Removes entryLogMetadata record from the map. + * + * @param entryLogId + * @throws EntryLogMetadataMapException + */ + void remove(long entryLogId) throws EntryLogMetadataMapException; + + /** + * Returns number of entryLogMetadata records presents into the map. + * + * @return + * @throws EntryLogMetadataMapException + */ + int size() throws EntryLogMetadataMapException; + + /** + * Returns true if there are no elements in the map. + * + * @return + */ + default boolean isEmpty() throws EntryLogMetadataMapException { + return size() == 0; + } + + /** + * Clear all records from the map. + * For unit tests. + * + * @throws EntryLogMetadataMapException + */ + void clear() throws EntryLogMetadataMapException; +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogger.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogger.java deleted file mode 100644 index ddf255a3b3b..00000000000 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLogger.java +++ /dev/null @@ -1,1157 +0,0 @@ -/* - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * - */ - -package org.apache.bookkeeper.bookie; - -import static com.google.common.base.Charsets.UTF_8; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.MoreObjects; -import com.google.common.collect.MapMaker; -import com.google.common.collect.Sets; - -import io.netty.buffer.ByteBuf; -import io.netty.buffer.ByteBufAllocator; -import io.netty.buffer.PooledByteBufAllocator; -import io.netty.buffer.Unpooled; -import io.netty.util.concurrent.FastThreadLocal; -import java.io.BufferedReader; -import java.io.File; -import java.io.FileFilter; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FilenameFilter; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.RandomAccessFile; -import java.nio.ByteBuffer; -import java.nio.channels.FileChannel; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.SortedMap; -import java.util.TreeMap; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.CopyOnWriteArrayList; - -import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.stats.NullStatsLogger; -import org.apache.bookkeeper.stats.StatsLogger; -import org.apache.bookkeeper.util.IOUtils; -import org.apache.bookkeeper.util.collections.ConcurrentLongLongHashMap; -import org.apache.bookkeeper.util.collections.ConcurrentLongLongHashMap.BiConsumerLong; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * This class manages the writing of the bookkeeper entries. All the new - * entries are written to a common log. The LedgerCache will have pointers - * into files created by this class with offsets into the files to find - * the actual ledger entry. The entry log files created by this class are - * identified by a long. - */ -public class EntryLogger { - private static final Logger LOG = LoggerFactory.getLogger(EntryLogger.class); - static final long UNASSIGNED_LEDGERID = -1L; - // log file suffix - static final String LOG_FILE_SUFFIX = ".log"; - - @VisibleForTesting - static final int UNINITIALIZED_LOG_ID = -0xDEAD; - - // Expose Stats - private final StatsLogger statsLogger; - - static class BufferedLogChannel extends BufferedChannel { - private final long logId; - private final EntryLogMetadata entryLogMetadata; - private final File logFile; - private long ledgerIdAssigned = UNASSIGNED_LEDGERID; - - public BufferedLogChannel(FileChannel fc, int writeCapacity, int readCapacity, long logId, File logFile, - long unpersistedBytesBound) throws IOException { - super(fc, writeCapacity, readCapacity, unpersistedBytesBound); - this.logId = logId; - this.entryLogMetadata = new EntryLogMetadata(logId); - this.logFile = logFile; - } - public long getLogId() { - return logId; - } - - public File getLogFile() { - return logFile; - } - - public void registerWrittenEntry(long ledgerId, long entrySize) { - entryLogMetadata.addLedgerSize(ledgerId, entrySize); - } - - public ConcurrentLongLongHashMap getLedgersMap() { - return entryLogMetadata.getLedgersMap(); - } - - public Long getLedgerIdAssigned() { - return ledgerIdAssigned; - } - - public void setLedgerIdAssigned(Long ledgerId) { - this.ledgerIdAssigned = ledgerId; - } - - @Override - public String toString() { - return MoreObjects.toStringHelper(BufferedChannel.class) - .add("logId", logId) - .add("logFile", logFile) - .add("ledgerIdAssigned", ledgerIdAssigned) - .toString(); - } - - /** - * Append the ledger map at the end of the entry log. - * Updates the entry log file header with the offset and size of the map. - */ - void appendLedgersMap() throws IOException { - - long ledgerMapOffset = this.position(); - - ConcurrentLongLongHashMap ledgersMap = this.getLedgersMap(); - int numberOfLedgers = (int) ledgersMap.size(); - - // Write the ledgers map into several batches - - final int maxMapSize = LEDGERS_MAP_HEADER_SIZE + LEDGERS_MAP_ENTRY_SIZE * LEDGERS_MAP_MAX_BATCH_SIZE; - final ByteBuf serializedMap = ByteBufAllocator.DEFAULT.buffer(maxMapSize); - - try { - ledgersMap.forEach(new BiConsumerLong() { - int remainingLedgers = numberOfLedgers; - boolean startNewBatch = true; - int remainingInBatch = 0; - - @Override - public void accept(long ledgerId, long size) { - if (startNewBatch) { - int batchSize = Math.min(remainingLedgers, LEDGERS_MAP_MAX_BATCH_SIZE); - int ledgerMapSize = LEDGERS_MAP_HEADER_SIZE + LEDGERS_MAP_ENTRY_SIZE * batchSize; - - serializedMap.clear(); - serializedMap.writeInt(ledgerMapSize - 4); - serializedMap.writeLong(INVALID_LID); - serializedMap.writeLong(LEDGERS_MAP_ENTRY_ID); - serializedMap.writeInt(batchSize); - - startNewBatch = false; - remainingInBatch = batchSize; - } - // Dump the ledger in the current batch - serializedMap.writeLong(ledgerId); - serializedMap.writeLong(size); - --remainingLedgers; - - if (--remainingInBatch == 0) { - // Close current batch - try { - write(serializedMap); - } catch (IOException e) { - throw new RuntimeException(e); - } - - startNewBatch = true; - } - } - }); - } catch (RuntimeException e) { - if (e.getCause() instanceof IOException) { - throw (IOException) e.getCause(); - } else { - throw e; - } - } finally { - serializedMap.release(); - } - // Flush the ledger's map out before we write the header. - // Otherwise the header might point to something that is not fully - // written - super.flush(); - - // Update the headers with the map offset and count of ledgers - ByteBuffer mapInfo = ByteBuffer.allocate(8 + 4); - mapInfo.putLong(ledgerMapOffset); - mapInfo.putInt(numberOfLedgers); - mapInfo.flip(); - this.fileChannel.write(mapInfo, LEDGERS_MAP_OFFSET_POSITION); - } - } - - private final LedgerDirsManager ledgerDirsManager; - private final boolean entryLogPerLedgerEnabled; - - final RecentEntryLogsStatus recentlyCreatedEntryLogsStatus; - - /** - * locks for compaction log. - */ - private final Object compactionLogLock = new Object(); - - private volatile BufferedLogChannel compactionLogChannel; - - final EntryLoggerAllocator entryLoggerAllocator; - private final EntryLogManager entryLogManager; - - private final CopyOnWriteArrayList listeners = new CopyOnWriteArrayList(); - - private static final int HEADER_V0 = 0; // Old log file format (no ledgers map index) - private static final int HEADER_V1 = 1; // Introduced ledger map index - static final int HEADER_CURRENT_VERSION = HEADER_V1; - - private static class Header { - final int version; - final long ledgersMapOffset; - final int ledgersCount; - - Header(int version, long ledgersMapOffset, int ledgersCount) { - this.version = version; - this.ledgersMapOffset = ledgersMapOffset; - this.ledgersCount = ledgersCount; - } - } - - /** - * The 1K block at the head of the entry logger file - * that contains the fingerprint and meta-data. - * - *
-     * Header is composed of:
-     * Fingerprint: 4 bytes "BKLO"
-     * Log file HeaderVersion enum: 4 bytes
-     * Ledger map offset: 8 bytes
-     * Ledgers Count: 4 bytes
-     * 
- */ - static final int LOGFILE_HEADER_SIZE = 1024; - final ByteBuf logfileHeader = Unpooled.buffer(LOGFILE_HEADER_SIZE); - - static final int HEADER_VERSION_POSITION = 4; - static final int LEDGERS_MAP_OFFSET_POSITION = HEADER_VERSION_POSITION + 4; - - /** - * Ledgers map is composed of multiple parts that can be split into separated entries. Each of them is composed of: - * - *
-     * length: (4 bytes) [0-3]
-     * ledger id (-1): (8 bytes) [4 - 11]
-     * entry id: (8 bytes) [12-19]
-     * num ledgers stored in current metadata entry: (4 bytes) [20 - 23]
-     * ledger entries: sequence of (ledgerid, size) (8 + 8 bytes each) [24..]
-     * 
- */ - static final int LEDGERS_MAP_HEADER_SIZE = 4 + 8 + 8 + 4; - static final int LEDGERS_MAP_ENTRY_SIZE = 8 + 8; - - // Break the ledgers map into multiple batches, each of which can contain up to 10K ledgers - static final int LEDGERS_MAP_MAX_BATCH_SIZE = 10000; - - static final long INVALID_LID = -1L; - - // EntryId used to mark an entry (belonging to INVALID_ID) as a component of the serialized ledgers map - static final long LEDGERS_MAP_ENTRY_ID = -2L; - - static final int MIN_SANE_ENTRY_SIZE = 8 + 8; - static final long MB = 1024 * 1024; - - private final int maxSaneEntrySize; - - final ServerConfiguration conf; - /** - * Scan entries in a entry log file. - */ - public interface EntryLogScanner { - /** - * Tests whether or not the entries belongs to the specified ledger - * should be processed. - * - * @param ledgerId - * Ledger ID. - * @return true if and only the entries of the ledger should be scanned. - */ - boolean accept(long ledgerId); - - /** - * Process an entry. - * - * @param ledgerId - * Ledger ID. - * @param offset - * File offset of this entry. - * @param entry - * Entry ByteBuf - * @throws IOException - */ - void process(long ledgerId, long offset, ByteBuf entry) throws IOException; - } - - /** - * Entry Log Listener. - */ - interface EntryLogListener { - /** - * Rotate a new entry log to write. - */ - void onRotateEntryLog(); - } - - /** - * Create an EntryLogger that stores it's log files in the given directories. - */ - public EntryLogger(ServerConfiguration conf, - LedgerDirsManager ledgerDirsManager) throws IOException { - this(conf, ledgerDirsManager, null, NullStatsLogger.INSTANCE); - } - - public EntryLogger(ServerConfiguration conf, - LedgerDirsManager ledgerDirsManager, EntryLogListener listener, StatsLogger statsLogger) - throws IOException { - //We reserve 500 bytes as overhead for the protocol. This is not 100% accurate - // but the protocol varies so an exact value is difficult to determine - this.maxSaneEntrySize = conf.getNettyMaxFrameSizeBytes() - 500; - this.ledgerDirsManager = ledgerDirsManager; - this.conf = conf; - entryLogPerLedgerEnabled = conf.isEntryLogPerLedgerEnabled(); - if (listener != null) { - addListener(listener); - } - - // Initialize the entry log header buffer. This cannot be a static object - // since in our unit tests, we run multiple Bookies and thus EntryLoggers - // within the same JVM. All of these Bookie instances access this header - // so there can be race conditions when entry logs are rolled over and - // this header buffer is cleared before writing it into the new logChannel. - logfileHeader.writeBytes("BKLO".getBytes(UTF_8)); - logfileHeader.writeInt(HEADER_CURRENT_VERSION); - logfileHeader.writerIndex(LOGFILE_HEADER_SIZE); - - // Find the largest logId - long logId = INVALID_LID; - for (File dir : ledgerDirsManager.getAllLedgerDirs()) { - if (!dir.exists()) { - throw new FileNotFoundException( - "Entry log directory '" + dir + "' does not exist"); - } - long lastLogId = getLastLogId(dir); - if (lastLogId > logId) { - logId = lastLogId; - } - } - this.recentlyCreatedEntryLogsStatus = new RecentEntryLogsStatus(logId + 1); - this.entryLoggerAllocator = new EntryLoggerAllocator(conf, ledgerDirsManager, recentlyCreatedEntryLogsStatus, - logId); - this.statsLogger = statsLogger; - if (entryLogPerLedgerEnabled) { - this.entryLogManager = new EntryLogManagerForEntryLogPerLedger(conf, ledgerDirsManager, - entryLoggerAllocator, listeners, recentlyCreatedEntryLogsStatus, statsLogger); - } else { - this.entryLogManager = new EntryLogManagerForSingleEntryLog(conf, ledgerDirsManager, entryLoggerAllocator, - listeners, recentlyCreatedEntryLogsStatus); - } - } - - EntryLogManager getEntryLogManager() { - return entryLogManager; - } - - void addListener(EntryLogListener listener) { - if (null != listener) { - listeners.add(listener); - } - } - - /** - * If the log id of current writable channel is the same as entryLogId and the position - * we want to read might end up reading from a position in the write buffer of the - * buffered channel, route this read to the current logChannel. Else, - * read from the BufferedReadChannel that is provided. - * @param entryLogId - * @param channel - * @param buff remaining() on this bytebuffer tells us the last position that we - * expect to read. - * @param pos The starting position from where we want to read. - * @return - */ - private int readFromLogChannel(long entryLogId, BufferedReadChannel channel, ByteBuf buff, long pos) - throws IOException { - BufferedLogChannel bc = entryLogManager.getCurrentLogIfPresent(entryLogId); - if (null != bc) { - synchronized (bc) { - if (pos + buff.writableBytes() >= bc.getFileChannelPosition()) { - return bc.read(buff, pos); - } - } - } - return channel.read(buff, pos); - } - - /** - * A thread-local variable that wraps a mapping of log ids to bufferedchannels - * These channels should be used only for reading. logChannel is the one - * that is used for writes. - */ - private final ThreadLocal> logid2Channel = - new ThreadLocal>() { - @Override - public Map initialValue() { - // Since this is thread local there only one modifier - // We dont really need the concurrency, but we need to use - // the weak values. Therefore using the concurrency level of 1 - return new MapMaker().concurrencyLevel(1) - .weakValues() - .makeMap(); - } - }; - - /** - * Each thread local buffered read channel can share the same file handle because reads are not relative - * and don't cause a change in the channel's position. We use this map to store the file channels. Each - * file channel is mapped to a log id which represents an open log file. - */ - private final ConcurrentMap logid2FileChannel = new ConcurrentHashMap(); - - /** - * Put the logId, bc pair in the map responsible for the current thread. - * @param logId - * @param bc - */ - public BufferedReadChannel putInReadChannels(long logId, BufferedReadChannel bc) { - Map threadMap = logid2Channel.get(); - return threadMap.put(logId, bc); - } - - /** - * Remove all entries for this log file in each thread's cache. - * @param logId - */ - public void removeFromChannelsAndClose(long logId) { - FileChannel fileChannel = logid2FileChannel.remove(logId); - if (null != fileChannel) { - try { - fileChannel.close(); - } catch (IOException e) { - LOG.warn("Exception while closing channel for log file:" + logId); - } - } - } - - public BufferedReadChannel getFromChannels(long logId) { - return logid2Channel.get().get(logId); - } - - /** - * Get the least unflushed log id. Garbage collector thread should not process - * unflushed entry log file. - * - * @return least unflushed log id. - */ - long getLeastUnflushedLogId() { - return recentlyCreatedEntryLogsStatus.getLeastUnflushedLogId(); - } - - long getPreviousAllocatedEntryLogId() { - return entryLoggerAllocator.getPreallocatedLogId(); - } - - /** - * Get the current log file for compaction. - */ - File getCurCompactionLogFile() { - synchronized (compactionLogLock) { - if (compactionLogChannel == null) { - return null; - } - return compactionLogChannel.getLogFile(); - } - } - - void prepareSortedLedgerStorageCheckpoint(long numBytesFlushed) throws IOException { - entryLogManager.prepareSortedLedgerStorageCheckpoint(numBytesFlushed); - } - - void prepareEntryMemTableFlush() { - entryLogManager.prepareEntryMemTableFlush(); - } - - boolean commitEntryMemTableFlush() throws IOException { - return entryLogManager.commitEntryMemTableFlush(); - } - - /** - * get EntryLoggerAllocator, Just for tests. - */ - EntryLoggerAllocator getEntryLoggerAllocator() { - return entryLoggerAllocator; - } - - /** - * Remove entry log. - * - * @param entryLogId - * Entry Log File Id - */ - protected boolean removeEntryLog(long entryLogId) { - removeFromChannelsAndClose(entryLogId); - File entryLogFile; - try { - entryLogFile = findFile(entryLogId); - } catch (FileNotFoundException e) { - LOG.error("Trying to delete an entryLog file that could not be found: " - + entryLogId + ".log"); - return false; - } - if (!entryLogFile.delete()) { - LOG.warn("Could not delete entry log file {}", entryLogFile); - } - return true; - } - - private long getLastLogId(File dir) { - long id = readLastLogId(dir); - // read success - if (id > 0) { - return id; - } - // read failed, scan the ledger directories to find biggest log id - File[] logFiles = dir.listFiles(new FileFilter() { - @Override - public boolean accept(File file) { - return file.getName().endsWith(".log"); - } - }); - List logs = new ArrayList(); - if (logFiles != null) { - for (File lf : logFiles) { - long logId = fileName2LogId(lf.getName()); - logs.add(logId); - } - } - // no log file found in this directory - if (0 == logs.size()) { - return INVALID_LID; - } - // order the collections - Collections.sort(logs); - return logs.get(logs.size() - 1); - } - - /** - * reads id from the "lastId" file in the given directory. - */ - private long readLastLogId(File f) { - FileInputStream fis; - try { - fis = new FileInputStream(new File(f, "lastId")); - } catch (FileNotFoundException e) { - return INVALID_LID; - } - BufferedReader br = new BufferedReader(new InputStreamReader(fis, UTF_8)); - try { - String lastIdString = br.readLine(); - return Long.parseLong(lastIdString, 16); - } catch (IOException e) { - return INVALID_LID; - } catch (NumberFormatException e) { - return INVALID_LID; - } finally { - try { - br.close(); - } catch (IOException e) { - } - } - } - - /** - * Flushes all rotated log channels. After log channels are flushed, - * move leastUnflushedLogId ptr to current logId. - */ - void checkpoint() throws IOException { - entryLogManager.checkpoint(); - } - - public void flush() throws IOException { - entryLogManager.flush(); - } - - long addEntry(long ledger, ByteBuffer entry) throws IOException { - return entryLogManager.addEntry(ledger, Unpooled.wrappedBuffer(entry), true); - } - - long addEntry(long ledger, ByteBuf entry) throws IOException { - return entryLogManager.addEntry(ledger, entry, true); - } - - public long addEntry(long ledger, ByteBuf entry, boolean rollLog) throws IOException { - return entryLogManager.addEntry(ledger, entry, rollLog); - } - - private final FastThreadLocal sizeBuffer = new FastThreadLocal() { - @Override - protected ByteBuf initialValue() throws Exception { - return Unpooled.buffer(4); - } - }; - - long addEntryForCompaction(long ledgerId, ByteBuf entry) throws IOException { - synchronized (compactionLogLock) { - int entrySize = entry.readableBytes() + 4; - if (compactionLogChannel == null) { - createNewCompactionLog(); - } - - ByteBuf sizeBuffer = this.sizeBuffer.get(); - sizeBuffer.clear(); - sizeBuffer.writeInt(entry.readableBytes()); - compactionLogChannel.write(sizeBuffer); - - long pos = compactionLogChannel.position(); - compactionLogChannel.write(entry); - compactionLogChannel.registerWrittenEntry(ledgerId, entrySize); - return (compactionLogChannel.getLogId() << 32L) | pos; - } - } - - void flushCompactionLog() throws IOException { - synchronized (compactionLogLock) { - if (compactionLogChannel != null) { - compactionLogChannel.appendLedgersMap(); - compactionLogChannel.flushAndForceWrite(false); - LOG.info("Flushed compaction log file {} with logId.", - compactionLogChannel.getLogFile(), - compactionLogChannel.getLogId()); - // since this channel is only used for writing, after flushing the channel, - // we had to close the underlying file channel. Otherwise, we might end up - // leaking fds which cause the disk spaces could not be reclaimed. - compactionLogChannel.close(); - } else { - throw new IOException("Failed to flush compaction log which has already been removed."); - } - } - } - - void createNewCompactionLog() throws IOException { - synchronized (compactionLogLock) { - if (compactionLogChannel == null) { - compactionLogChannel = entryLogManager.createNewLogForCompaction(); - } - } - } - - /** - * Remove the current compaction log, usually invoked when compaction failed and - * we need to do some clean up to remove the compaction log file. - */ - void removeCurCompactionLog() { - synchronized (compactionLogLock) { - if (compactionLogChannel != null) { - if (!compactionLogChannel.getLogFile().delete()) { - LOG.warn("Could not delete compaction log file {}", compactionLogChannel.getLogFile()); - } - - try { - compactionLogChannel.close(); - } catch (IOException e) { - LOG.error("Failed to close file channel for compaction log {}", compactionLogChannel.getLogId(), - e); - } - compactionLogChannel = null; - } - } - } - - static long logIdForOffset(long offset) { - return offset >> 32L; - } - - - - public ByteBuf internalReadEntry(long ledgerId, long entryId, long location) - throws IOException, Bookie.NoEntryException { - long entryLogId = logIdForOffset(location); - long pos = location & 0xffffffffL; - ByteBuf sizeBuff = sizeBuffer.get(); - sizeBuff.clear(); - pos -= 4; // we want to get the ledgerId and length to check - BufferedReadChannel fc; - try { - fc = getChannelForLogId(entryLogId); - } catch (FileNotFoundException e) { - FileNotFoundException newe = new FileNotFoundException(e.getMessage() + " for " + ledgerId - + " with location " + location); - newe.setStackTrace(e.getStackTrace()); - throw newe; - } - - if (readFromLogChannel(entryLogId, fc, sizeBuff, pos) != sizeBuff.capacity()) { - throw new Bookie.NoEntryException("Short read from entrylog " + entryLogId, - ledgerId, entryId); - } - pos += 4; - int entrySize = sizeBuff.readInt(); - - // entrySize does not include the ledgerId - if (entrySize > maxSaneEntrySize) { - LOG.warn("Sanity check failed for entry size of " + entrySize + " at location " + pos + " in " - + entryLogId); - } - if (entrySize < MIN_SANE_ENTRY_SIZE) { - LOG.error("Read invalid entry length {}", entrySize); - throw new IOException("Invalid entry length " + entrySize); - } - - ByteBuf data = PooledByteBufAllocator.DEFAULT.directBuffer(entrySize, entrySize); - int rc = readFromLogChannel(entryLogId, fc, data, pos); - if (rc != entrySize) { - // Note that throwing NoEntryException here instead of IOException is not - // without risk. If all bookies in a quorum throw this same exception - // the client will assume that it has reached the end of the ledger. - // However, this may not be the case, as a very specific error condition - // could have occurred, where the length of the entry was corrupted on all - // replicas. However, the chance of this happening is very very low, so - // returning NoEntryException is mostly safe. - data.release(); - throw new Bookie.NoEntryException("Short read for " + ledgerId + "@" - + entryId + " in " + entryLogId + "@" - + pos + "(" + rc + "!=" + entrySize + ")", ledgerId, entryId); - } - data.writerIndex(entrySize); - - return data; - } - - public ByteBuf readEntry(long ledgerId, long entryId, long location) throws IOException, Bookie.NoEntryException { - long entryLogId = logIdForOffset(location); - long pos = location & 0xffffffffL; - - ByteBuf data = internalReadEntry(ledgerId, entryId, location); - long thisLedgerId = data.getLong(0); - if (thisLedgerId != ledgerId) { - data.release(); - throw new IOException("problem found in " + entryLogId + "@" + entryId + " at position + " + pos - + " entry belongs to " + thisLedgerId + " not " + ledgerId); - } - long thisEntryId = data.getLong(8); - if (thisEntryId != entryId) { - data.release(); - throw new IOException("problem found in " + entryLogId + "@" + entryId + " at position + " + pos - + " entry is " + thisEntryId + " not " + entryId); - } - - return data; - } - - /** - * Read the header of an entry log. - */ - private Header getHeaderForLogId(long entryLogId) throws IOException { - BufferedReadChannel bc = getChannelForLogId(entryLogId); - - // Allocate buffer to read (version, ledgersMapOffset, ledgerCount) - ByteBuf headers = PooledByteBufAllocator.DEFAULT.directBuffer(LOGFILE_HEADER_SIZE); - try { - bc.read(headers, 0); - - // Skip marker string "BKLO" - headers.readInt(); - - int headerVersion = headers.readInt(); - if (headerVersion < HEADER_V0 || headerVersion > HEADER_CURRENT_VERSION) { - LOG.info("Unknown entry log header version for log {}: {}", entryLogId, headerVersion); - } - - long ledgersMapOffset = headers.readLong(); - int ledgersCount = headers.readInt(); - return new Header(headerVersion, ledgersMapOffset, ledgersCount); - } finally { - headers.release(); - } - } - - private BufferedReadChannel getChannelForLogId(long entryLogId) throws IOException { - BufferedReadChannel fc = getFromChannels(entryLogId); - if (fc != null) { - return fc; - } - File file = findFile(entryLogId); - // get channel is used to open an existing entry log file - // it would be better to open using read mode - FileChannel newFc = new RandomAccessFile(file, "r").getChannel(); - FileChannel oldFc = logid2FileChannel.putIfAbsent(entryLogId, newFc); - if (null != oldFc) { - newFc.close(); - newFc = oldFc; - } - // We set the position of the write buffer of this buffered channel to Long.MAX_VALUE - // so that there are no overlaps with the write buffer while reading - fc = new BufferedReadChannel(newFc, conf.getReadBufferBytes()); - putInReadChannels(entryLogId, fc); - return fc; - } - - /** - * Whether the log file exists or not. - */ - boolean logExists(long logId) { - for (File d : ledgerDirsManager.getAllLedgerDirs()) { - File f = new File(d, Long.toHexString(logId) + ".log"); - if (f.exists()) { - return true; - } - } - return false; - } - - /** - * Returns a set with the ids of all the entry log files. - * - * @throws IOException - */ - public Set getEntryLogsSet() throws IOException { - Set entryLogs = Sets.newTreeSet(); - - final FilenameFilter logFileFilter = new FilenameFilter() { - @Override - public boolean accept(File dir, String name) { - return name.endsWith(".log"); - } - }; - - for (File d : ledgerDirsManager.getAllLedgerDirs()) { - File[] files = d.listFiles(logFileFilter); - if (files == null) { - throw new IOException("Failed to get list of files in directory " + d); - } - - for (File f : files) { - Long entryLogId = Long.parseLong(f.getName().split(".log")[0], 16); - entryLogs.add(entryLogId); - } - } - return entryLogs; - } - - private File findFile(long logId) throws FileNotFoundException { - for (File d : ledgerDirsManager.getAllLedgerDirs()) { - File f = new File(d, Long.toHexString(logId) + ".log"); - if (f.exists()) { - return f; - } - } - throw new FileNotFoundException("No file for log " + Long.toHexString(logId)); - } - - /** - * Scan entry log. - * - * @param entryLogId Entry Log Id - * @param scanner Entry Log Scanner - * @throws IOException - */ - public void scanEntryLog(long entryLogId, EntryLogScanner scanner) throws IOException { - // Buffer where to read the entrySize (4 bytes) and the ledgerId (8 bytes) - ByteBuf headerBuffer = Unpooled.buffer(4 + 8); - BufferedReadChannel bc; - // Get the BufferedChannel for the current entry log file - try { - bc = getChannelForLogId(entryLogId); - } catch (IOException e) { - LOG.warn("Failed to get channel to scan entry log: " + entryLogId + ".log"); - throw e; - } - // Start the read position in the current entry log file to be after - // the header where all of the ledger entries are. - long pos = LOGFILE_HEADER_SIZE; - - // Start with a reasonably sized buffer size - ByteBuf data = PooledByteBufAllocator.DEFAULT.directBuffer(1024 * 1024); - - try { - - // Read through the entry log file and extract the ledger ID's. - while (true) { - // Check if we've finished reading the entry log file. - if (pos >= bc.size()) { - break; - } - if (readFromLogChannel(entryLogId, bc, headerBuffer, pos) != headerBuffer.capacity()) { - LOG.warn("Short read for entry size from entrylog {}", entryLogId); - return; - } - long offset = pos; - pos += 4; - int entrySize = headerBuffer.readInt(); - long ledgerId = headerBuffer.readLong(); - headerBuffer.clear(); - - if (ledgerId == INVALID_LID || !scanner.accept(ledgerId)) { - // skip this entry - pos += entrySize; - continue; - } - // read the entry - - data.clear(); - data.capacity(entrySize); - int rc = readFromLogChannel(entryLogId, bc, data, pos); - if (rc != entrySize) { - LOG.warn("Short read for ledger entry from entryLog {}@{} ({} != {})", - entryLogId, pos, rc, entrySize); - return; - } - // process the entry - scanner.process(ledgerId, offset, data); - - // Advance position to the next entry - pos += entrySize; - } - } finally { - data.release(); - } - } - - public EntryLogMetadata getEntryLogMetadata(long entryLogId) throws IOException { - // First try to extract the EntryLogMetada from the index, if there's no index then fallback to scanning the - // entry log - try { - return extractEntryLogMetadataFromIndex(entryLogId); - } catch (Exception e) { - LOG.info("Failed to get ledgers map index from: {}.log : {}", entryLogId, e.getMessage()); - - // Fall-back to scanning - return extractEntryLogMetadataByScanning(entryLogId); - } - } - - EntryLogMetadata extractEntryLogMetadataFromIndex(long entryLogId) throws IOException { - Header header = getHeaderForLogId(entryLogId); - - if (header.version < HEADER_V1) { - throw new IOException("Old log file header without ledgers map on entryLogId " + entryLogId); - } - - if (header.ledgersMapOffset == 0L) { - // The index was not stored in the log file (possibly because the bookie crashed before flushing it) - throw new IOException("No ledgers map index found on entryLogId" + entryLogId); - } - - if (LOG.isDebugEnabled()) { - LOG.debug("Recovering ledgers maps for log {} at offset: {}", entryLogId, header.ledgersMapOffset); - } - - BufferedReadChannel bc = getChannelForLogId(entryLogId); - - // There can be multiple entries containing the various components of the serialized ledgers map - long offset = header.ledgersMapOffset; - EntryLogMetadata meta = new EntryLogMetadata(entryLogId); - - final int maxMapSize = LEDGERS_MAP_HEADER_SIZE + LEDGERS_MAP_ENTRY_SIZE * LEDGERS_MAP_MAX_BATCH_SIZE; - ByteBuf ledgersMap = ByteBufAllocator.DEFAULT.directBuffer(maxMapSize); - - try { - while (offset < bc.size()) { - // Read ledgers map size - sizeBuffer.get().clear(); - bc.read(sizeBuffer.get(), offset); - - int ledgersMapSize = sizeBuffer.get().readInt(); - - // Read the index into a buffer - ledgersMap.clear(); - bc.read(ledgersMap, offset + 4, ledgersMapSize); - - // Discard ledgerId and entryId - long lid = ledgersMap.readLong(); - if (lid != INVALID_LID) { - throw new IOException("Cannot deserialize ledgers map from ledger " + lid); - } - - long entryId = ledgersMap.readLong(); - if (entryId != LEDGERS_MAP_ENTRY_ID) { - throw new IOException("Cannot deserialize ledgers map from entryId " + entryId); - } - - // Read the number of ledgers in the current entry batch - int ledgersCount = ledgersMap.readInt(); - - // Extract all (ledger,size) tuples from buffer - for (int i = 0; i < ledgersCount; i++) { - long ledgerId = ledgersMap.readLong(); - long size = ledgersMap.readLong(); - - if (LOG.isDebugEnabled()) { - LOG.debug("Recovering ledgers maps for log {} -- Found ledger: {} with size: {}", - entryLogId, ledgerId, size); - } - meta.addLedgerSize(ledgerId, size); - } - if (ledgersMap.isReadable()) { - throw new IOException("Invalid entry size when reading ledgers map"); - } - - // Move to next entry, if any - offset += ledgersMapSize + 4; - } - } catch (IndexOutOfBoundsException e) { - throw new IOException(e); - } finally { - ledgersMap.release(); - } - - if (meta.getLedgersMap().size() != header.ledgersCount) { - throw new IOException("Not all ledgers were found in ledgers map index. expected: " + header.ledgersCount - + " -- found: " + meta.getLedgersMap().size() + " -- entryLogId: " + entryLogId); - } - - return meta; - } - - private EntryLogMetadata extractEntryLogMetadataByScanning(long entryLogId) throws IOException { - final EntryLogMetadata meta = new EntryLogMetadata(entryLogId); - - // Read through the entry log file and extract the entry log meta - scanEntryLog(entryLogId, new EntryLogScanner() { - @Override - public void process(long ledgerId, long offset, ByteBuf entry) throws IOException { - // add new entry size of a ledger to entry log meta - meta.addLedgerSize(ledgerId, entry.readableBytes() + 4); - } - - @Override - public boolean accept(long ledgerId) { - return ledgerId >= 0; - } - }); - - if (LOG.isDebugEnabled()) { - LOG.debug("Retrieved entry log meta data entryLogId: {}, meta: {}", entryLogId, meta); - } - return meta; - } - - /** - * Shutdown method to gracefully stop entry logger. - */ - public void shutdown() { - // since logChannel is buffered channel, do flush when shutting down - LOG.info("Stopping EntryLogger"); - try { - flush(); - for (FileChannel fc : logid2FileChannel.values()) { - fc.close(); - } - // clear the mapping, so we don't need to go through the channels again in finally block in normal case. - logid2FileChannel.clear(); - entryLogManager.close(); - synchronized (compactionLogLock) { - if (compactionLogChannel != null) { - compactionLogChannel.close(); - compactionLogChannel = null; - } - } - } catch (IOException ie) { - // we have no idea how to avoid io exception during shutting down, so just ignore it - LOG.error("Error flush entry log during shutting down, which may cause entry log corrupted.", ie); - } finally { - for (FileChannel fc : logid2FileChannel.values()) { - IOUtils.close(LOG, fc); - } - - entryLogManager.forceClose(); - synchronized (compactionLogLock) { - IOUtils.close(LOG, compactionLogChannel); - } - } - // shutdown the pre-allocation thread - entryLoggerAllocator.stop(); - } - - protected LedgerDirsManager getLedgerDirsManager() { - return ledgerDirsManager; - } - - /** - * Convert log filename (hex format with suffix) to logId in long. - */ - static long fileName2LogId(String fileName) { - if (fileName != null && fileName.contains(".")) { - fileName = fileName.split("\\.")[0]; - } - try { - return Long.parseLong(fileName, 16); - } catch (Exception nfe) { - LOG.error("Invalid log file name {} found when trying to convert to logId.", fileName, nfe); - } - return INVALID_LID; - } - - /** - * Convert log Id to hex string. - */ - static String logId2HexString(long logId) { - return Long.toHexString(logId); - } - - /** - * Datastructure which maintains the status of logchannels. When a - * logChannel is created entry of < entryLogId, false > will be made to this - * sortedmap and when logChannel is rotated and flushed then the entry is - * updated to < entryLogId, true > and all the lowest entries with - * < entryLogId, true > status will be removed from the sortedmap. So that way - * we could get least unflushed LogId. - * - */ - static class RecentEntryLogsStatus { - private final SortedMap entryLogsStatusMap; - private long leastUnflushedLogId; - - RecentEntryLogsStatus(long leastUnflushedLogId) { - entryLogsStatusMap = new TreeMap(); - this.leastUnflushedLogId = leastUnflushedLogId; - } - - synchronized void createdEntryLog(Long entryLogId) { - entryLogsStatusMap.put(entryLogId, false); - } - - synchronized void flushRotatedEntryLog(Long entryLogId) { - entryLogsStatusMap.replace(entryLogId, true); - while ((!entryLogsStatusMap.isEmpty()) && (entryLogsStatusMap.get(entryLogsStatusMap.firstKey()))) { - long leastFlushedLogId = entryLogsStatusMap.firstKey(); - entryLogsStatusMap.remove(leastFlushedLogId); - leastUnflushedLogId = leastFlushedLogId + 1; - } - } - - synchronized long getLeastUnflushedLogId() { - return leastUnflushedLogId; - } - } -} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLoggerAllocator.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLoggerAllocator.java index 10e7715cd31..70b76aaf376 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLoggerAllocator.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLoggerAllocator.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,12 +21,13 @@ package org.apache.bookkeeper.bookie; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.bookkeeper.bookie.TransactionalEntryLogCompactor.COMPACTING_SUFFIX; +import com.google.common.annotations.VisibleForTesting; import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; import io.netty.buffer.Unpooled; - import java.io.BufferedWriter; import java.io.File; import java.io.FileOutputStream; @@ -40,8 +41,10 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import lombok.extern.slf4j.Slf4j; -import org.apache.bookkeeper.bookie.EntryLogger.BufferedLogChannel; +import org.apache.bookkeeper.bookie.DefaultEntryLogger.BufferedLogChannel; import org.apache.bookkeeper.conf.ServerConfiguration; /** @@ -57,13 +60,18 @@ class EntryLoggerAllocator { private final LedgerDirsManager ledgerDirsManager; private final Object createEntryLogLock = new Object(); private final Object createCompactionLogLock = new Object(); - private final EntryLogger.RecentEntryLogsStatus recentlyCreatedEntryLogsStatus; + private final DefaultEntryLogger.RecentEntryLogsStatus recentlyCreatedEntryLogsStatus; private final boolean entryLogPreAllocationEnabled; - final ByteBuf logfileHeader = Unpooled.buffer(EntryLogger.LOGFILE_HEADER_SIZE); + private final ByteBufAllocator byteBufAllocator; + final ByteBuf logfileHeader = Unpooled.buffer(DefaultEntryLogger.LOGFILE_HEADER_SIZE); + private volatile long writingLogId = -1; + private volatile long writingCompactingLogId = -1; EntryLoggerAllocator(ServerConfiguration conf, LedgerDirsManager ledgerDirsManager, - EntryLogger.RecentEntryLogsStatus recentlyCreatedEntryLogsStatus, long logId) { + DefaultEntryLogger.RecentEntryLogsStatus recentlyCreatedEntryLogsStatus, long logId, + ByteBufAllocator byteBufAllocator) { this.conf = conf; + this.byteBufAllocator = byteBufAllocator; this.ledgerDirsManager = ledgerDirsManager; this.preallocatedLogId = logId; this.recentlyCreatedEntryLogsStatus = recentlyCreatedEntryLogsStatus; @@ -76,8 +84,8 @@ class EntryLoggerAllocator { // so there can be race conditions when entry logs are rolled over and // this header buffer is cleared before writing it into the new logChannel. logfileHeader.writeBytes("BKLO".getBytes(UTF_8)); - logfileHeader.writeInt(EntryLogger.HEADER_CURRENT_VERSION); - logfileHeader.writerIndex(EntryLogger.LOGFILE_HEADER_SIZE); + logfileHeader.writeInt(DefaultEntryLogger.HEADER_CURRENT_VERSION); + logfileHeader.writerIndex(DefaultEntryLogger.LOGFILE_HEADER_SIZE); } @@ -85,16 +93,19 @@ synchronized long getPreallocatedLogId() { return preallocatedLogId; } + public boolean isSealed(long logId) { + return logId != writingLogId && logId != writingCompactingLogId; + } + BufferedLogChannel createNewLog(File dirForNextEntryLog) throws IOException { synchronized (createEntryLogLock) { BufferedLogChannel bc; - if (!entryLogPreAllocationEnabled){ + if (!entryLogPreAllocationEnabled) { // create a new log directly - bc = allocateNewLog(dirForNextEntryLog); - return bc; + return allocateNewLog(dirForNextEntryLog); } else { // allocate directly to response request - if (null == preallocation){ + if (null == preallocation) { bc = allocateNewLog(dirForNextEntryLog); } else { // has a preallocated entry log @@ -110,7 +121,7 @@ BufferedLogChannel createNewLog(File dirForNextEntryLog) throws IOException { throw new IOException("Task to allocate a new entry log is cancelled.", ce); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); - throw new IOException("Intrrupted when waiting a new entry log to be allocated.", ie); + throw new IOException("Interrupted when waiting a new entry log to be allocated.", ie); } } // preallocate a new log in background upon every call @@ -126,6 +137,18 @@ BufferedLogChannel createNewLogForCompaction(File dirForNextEntryLog) throws IOE } } + void setWritingLogId(long logId) { + this.writingLogId = logId; + } + + void setWritingCompactingLogId(long logId) { + this.writingCompactingLogId = logId; + } + + void clearCompactingLogId() { + writingCompactingLogId = -1; + } + private synchronized BufferedLogChannel allocateNewLog(File dirForNextEntryLog) throws IOException { return allocateNewLog(dirForNextEntryLog, ".log"); } @@ -159,7 +182,7 @@ private synchronized BufferedLogChannel allocateNewLog(File dirForNextEntryLog, File newLogFile = new File(dirForNextEntryLog, logFileName); FileChannel channel = new RandomAccessFile(newLogFile, "rw").getChannel(); - BufferedLogChannel logChannel = new BufferedLogChannel(channel, conf.getWriteBufferBytes(), + BufferedLogChannel logChannel = new BufferedLogChannel(byteBufAllocator, channel, conf.getWriteBufferBytes(), conf.getReadBufferBytes(), preallocatedLogId, newLogFile, conf.getFlushIntervalInBytes()); logfileHeader.readerIndex(0); logChannel.write(logfileHeader); @@ -168,7 +191,7 @@ private synchronized BufferedLogChannel allocateNewLog(File dirForNextEntryLog, setLastLogId(f, preallocatedLogId); } - if (suffix.equals(EntryLogger.LOG_FILE_SUFFIX)) { + if (suffix.equals(DefaultEntryLogger.LOG_FILE_SUFFIX)) { recentlyCreatedEntryLogsStatus.createdEntryLog(preallocatedLogId); } @@ -176,10 +199,29 @@ private synchronized BufferedLogChannel allocateNewLog(File dirForNextEntryLog, return logChannel; } + + private synchronized void closePreAllocateLog() { + if (preallocation != null) { + // if preallocate new log success, release the file channel + try { + BufferedLogChannel bufferedLogChannel = getPreallocationFuture().get(3, TimeUnit.SECONDS); + if (bufferedLogChannel != null) { + bufferedLogChannel.close(); + } + } catch (InterruptedException e) { + log.warn("interrupted while release preAllocate log"); + Thread.currentThread().interrupt(); + } catch (IOException | ExecutionException | TimeoutException e) { + log.warn("release preAllocate log failed, ignore error"); + } + } + } + /** * writes the given id to the "lastId" file in the given directory. */ - private void setLastLogId(File dir, long logId) throws IOException { + @VisibleForTesting + void setLastLogId(File dir, long logId) throws IOException { FileOutputStream fos; fos = new FileOutputStream(new File(dir, "lastId")); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fos, UTF_8)); @@ -202,7 +244,17 @@ private void setLastLogId(File dir, long logId) throws IOException { */ void stop() { // wait until the preallocation finished. + allocatorExecutor.execute(this::closePreAllocateLog); allocatorExecutor.shutdown(); + try { + if (!allocatorExecutor.awaitTermination(5, TimeUnit.SECONDS)) { + log.warn("Timedout while awaiting for allocatorExecutor's termination, so force shuttingdown"); + } + } catch (InterruptedException e) { + log.warn("Got InterruptedException while awaiting termination of allocatorExecutor, so force shuttingdown"); + Thread.currentThread().interrupt(); + } + allocatorExecutor.shutdownNow(); log.info("Stopped entry logger preallocator."); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryMemTable.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryMemTable.java index 73283989bb0..197956b7540 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryMemTable.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryMemTable.java @@ -19,15 +19,10 @@ package org.apache.bookkeeper.bookie; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.SKIP_LIST_FLUSH_BYTES; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.SKIP_LIST_GET_ENTRY; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.SKIP_LIST_PUT_ENTRY; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.SKIP_LIST_SNAPSHOT; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.SKIP_LIST_THROTTLING; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.SKIP_LIST_THROTTLING_LATENCY; - import java.io.IOException; import java.nio.ByteBuffer; +import java.util.Iterator; +import java.util.PrimitiveIterator; import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; @@ -36,11 +31,11 @@ import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.bookkeeper.bookie.Bookie.NoLedgerException; import org.apache.bookkeeper.bookie.CheckpointSource.Checkpoint; +import org.apache.bookkeeper.bookie.stats.EntryMemTableStats; +import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.stats.Counter; -import org.apache.bookkeeper.stats.OpStatsLogger; import org.apache.bookkeeper.stats.StatsLogger; -import org.apache.bookkeeper.util.MathUtils; +import org.apache.bookkeeper.util.IteratorUtility; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -51,8 +46,7 @@ * flusher reports in that the flush succeeded. At that point we let the snapshot go. */ public class EntryMemTable implements AutoCloseable{ - private static Logger logger = LoggerFactory.getLogger(Journal.class); - + private static Logger logger = LoggerFactory.getLogger(EntryMemTable.class); /** * Entry skip list. */ @@ -117,12 +111,7 @@ private EntrySkipList newSkipList() { } // Stats - private final OpStatsLogger snapshotStats; - private final OpStatsLogger putEntryStats; - private final OpStatsLogger getEntryStats; - final Counter flushBytesCounter; - private final Counter throttlingCounter; - private final OpStatsLogger throttlingStats; + protected final EntryMemTableStats memTableStats; /** * Constructor. @@ -150,12 +139,7 @@ public EntryMemTable(final ServerConfiguration conf, final CheckpointSource sour this.skipListSemaphore = new Semaphore((int) skipListSizeLimit * 2); // Stats - this.snapshotStats = statsLogger.getOpStatsLogger(SKIP_LIST_SNAPSHOT); - this.putEntryStats = statsLogger.getOpStatsLogger(SKIP_LIST_PUT_ENTRY); - this.getEntryStats = statsLogger.getOpStatsLogger(SKIP_LIST_GET_ENTRY); - this.flushBytesCounter = statsLogger.getCounter(SKIP_LIST_FLUSH_BYTES); - this.throttlingCounter = statsLogger.getCounter(SKIP_LIST_THROTTLING); - this.throttlingStats = statsLogger.getOpStatsLogger(SKIP_LIST_THROTTLING_LATENCY); + this.memTableStats = new EntryMemTableStats(statsLogger); } void dump() { @@ -203,9 +187,11 @@ Checkpoint snapshot(Checkpoint oldCp) throws IOException { } if (null != cp) { - snapshotStats.registerSuccessfulEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); + memTableStats.getSnapshotStats() + .registerSuccessfulEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); } else { - snapshotStats.registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); + memTableStats.getSnapshotStats() + .registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); } } return cp; @@ -272,7 +258,7 @@ long flushSnapshot(final SkipListFlusher flusher, Checkpoint checkpoint) throws } } } - flushBytesCounter.add(size); + memTableStats.getFlushBytesCounter().addCount(size); clearSnapshot(keyValues); } } @@ -322,17 +308,20 @@ public long addEntry(long ledgerId, long entryId, final ByteBuffer entry, final final int len = entry.remaining(); if (!skipListSemaphore.tryAcquire(len)) { - throttlingCounter.inc(); + memTableStats.getThrottlingCounter().inc(); final long throttlingStartTimeNanos = MathUtils.nowInNano(); skipListSemaphore.acquireUninterruptibly(len); - throttlingStats.registerSuccessfulEvent(MathUtils.elapsedNanos(throttlingStartTimeNanos), - TimeUnit.NANOSECONDS); + memTableStats.getThrottlingStats() + .registerSuccessfulEvent(MathUtils.elapsedNanos(throttlingStartTimeNanos), TimeUnit.NANOSECONDS); } this.lock.readLock().lock(); try { EntryKeyValue toAdd = cloneWithAllocator(ledgerId, entryId, entry); size = internalAdd(toAdd); + if (size == 0) { + skipListSemaphore.release(len); + } } finally { this.lock.readLock().unlock(); } @@ -340,9 +329,11 @@ public long addEntry(long ledgerId, long entryId, final ByteBuffer entry, final return size; } finally { if (success) { - putEntryStats.registerSuccessfulEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); + memTableStats.getPutEntryStats() + .registerSuccessfulEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); } else { - putEntryStats.registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); + memTableStats.getPutEntryStats() + .registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); } } } @@ -366,13 +357,8 @@ private EntryKeyValue newEntry(long ledgerId, long entryId, final ByteBuffer ent int offset = 0; int length = entry.remaining(); - if (entry.hasArray()) { - buf = entry.array(); - offset = entry.arrayOffset(); - } else { - buf = new byte[length]; - entry.get(buf); - } + buf = new byte[length]; + entry.get(buf); return new EntryKeyValue(ledgerId, entryId, buf, offset, length); } @@ -411,9 +397,11 @@ public EntryKeyValue getEntry(long ledgerId, long entryId) throws IOException { } finally { this.lock.readLock().unlock(); if (success) { - getEntryStats.registerSuccessfulEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); + memTableStats.getGetEntryStats() + .registerSuccessfulEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); } else { - getEntryStats.registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); + memTableStats.getGetEntryStats() + .registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); } } @@ -440,9 +428,11 @@ public EntryKeyValue getLastEntry(long ledgerId) throws IOException { } finally { this.lock.readLock().unlock(); if (success) { - getEntryStats.registerSuccessfulEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); + memTableStats.getGetEntryStats() + .registerSuccessfulEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); } else { - getEntryStats.registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); + memTableStats.getGetEntryStats() + .registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); } } @@ -471,4 +461,48 @@ boolean isEmpty() { public void close() throws Exception { // no-op } + + /* + * returns the primitive long iterator of entries of a ledger available in + * this EntryMemTable. It would be in the ascending order and this Iterator + * is weakly consistent. + */ + PrimitiveIterator.OfLong getListOfEntriesOfLedger(long ledgerId) { + EntryKey thisLedgerFloorEntry = new EntryKey(ledgerId, 0); + EntryKey thisLedgerCeilingEntry = new EntryKey(ledgerId, Long.MAX_VALUE); + Iterator thisLedgerEntriesInKVMap; + Iterator thisLedgerEntriesInSnapshot; + this.lock.readLock().lock(); + try { + /* + * Gets a view of the portion of this map that corresponds to + * entries of this ledger. + * + * Here 'kvmap' is of type 'ConcurrentSkipListMap', so its 'subMap' + * call would return a view of the portion of this map whose keys + * range from fromKey to toKey and it would be of type + * 'ConcurrentNavigableMap'. ConcurrentNavigableMap's 'keySet' would + * return NavigableSet view of the keys contained in this map. This + * view's iterator would be weakly consistent - + * https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/ + * package-summary.html#Weakly. + * + * 'weakly consistent' would guarantee 'to traverse elements as they + * existed upon construction exactly once, and may (but are not + * guaranteed to) reflect any modifications subsequent to + * construction.' + * + */ + thisLedgerEntriesInKVMap = this.kvmap.subMap(thisLedgerFloorEntry, thisLedgerCeilingEntry).keySet() + .iterator(); + thisLedgerEntriesInSnapshot = this.snapshot.subMap(thisLedgerFloorEntry, thisLedgerCeilingEntry).keySet() + .iterator(); + } finally { + this.lock.readLock().unlock(); + } + return IteratorUtility.mergeIteratorsForPrimitiveLongIterator(thisLedgerEntriesInKVMap, + thisLedgerEntriesInSnapshot, EntryKey.COMPARATOR, (entryKey) -> { + return entryKey.entryId; + }); + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryMemTableWithParallelFlusher.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryMemTableWithParallelFlusher.java index 4f2cf022923..ed1a55735c2 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryMemTableWithParallelFlusher.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryMemTableWithParallelFlusher.java @@ -31,7 +31,6 @@ import org.apache.bookkeeper.common.util.OrderedExecutor; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.stats.StatsLogger; -import org.apache.bookkeeper.util.SafeRunnable; /** * EntryMemTableWithParallelFlusher. @@ -84,36 +83,33 @@ long flushSnapshot(final SkipListFlusher flusher, Checkpoint checkpoint) throws ConcurrentNavigableMap thisLedgerEntries = keyValues .subMap(thisLedgerFirstEntry, thisLedgerCeilingKeyMarker); pendingNumOfLedgerFlushes.register(); - flushExecutor.executeOrdered(thisLedgerId, new SafeRunnable() { - @Override - public void safeRun() { - try { - long ledger; - boolean ledgerDeleted = false; - for (EntryKey key : thisLedgerEntries.keySet()) { - EntryKeyValue kv = (EntryKeyValue) key; - flushedSize.addAndGet(kv.getLength()); - ledger = kv.getLedgerId(); - if (!ledgerDeleted) { - try { - flusher.process(ledger, kv.getEntryId(), kv.getValueAsByteBuffer()); - } catch (NoLedgerException exception) { - ledgerDeleted = true; - } + flushExecutor.executeOrdered(thisLedgerId, () -> { + try { + long ledger; + boolean ledgerDeleted = false; + for (EntryKey key : thisLedgerEntries.keySet()) { + EntryKeyValue kv = (EntryKeyValue) key; + flushedSize.addAndGet(kv.getLength()); + ledger = kv.getLedgerId(); + if (!ledgerDeleted) { + try { + flusher.process(ledger, kv.getEntryId(), kv.getValueAsByteBuffer()); + } catch (NoLedgerException exception) { + ledgerDeleted = true; } } - pendingNumOfLedgerFlushes.arriveAndDeregister(); - } catch (Exception exc) { - log.error("Got Exception while trying to flush process entryies: ", exc); - exceptionWhileFlushingParallelly.set(exc); - /* - * if we get any unexpected exception while - * trying to flush process entries of a - * ledger, then terminate the - * pendingNumOfLedgerFlushes phaser. - */ - pendingNumOfLedgerFlushes.forceTermination(); } + pendingNumOfLedgerFlushes.arriveAndDeregister(); + } catch (Exception exc) { + log.error("Got Exception while trying to flush process entryies: ", exc); + exceptionWhileFlushingParallelly.set(exc); + /* + * if we get any unexpected exception while + * trying to flush process entries of a + * ledger, then terminate the + * pendingNumOfLedgerFlushes phaser. + */ + pendingNumOfLedgerFlushes.forceTermination(); } }); thisLedgerFirstMapEntry = keyValues.ceilingEntry(thisLedgerCeilingKeyMarker); @@ -139,7 +135,7 @@ public void safeRun() { throw new IOException("Failed to complete the flushSnapshotByParallelizing", exceptionWhileFlushingParallelly.get()); } - flushBytesCounter.add(flushedSize.get()); + memTableStats.getFlushBytesCounter().addCount(flushedSize.get()); clearSnapshot(keyValues); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/FileChannelProvider.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/FileChannelProvider.java new file mode 100644 index 00000000000..47ea110749a --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/FileChannelProvider.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie; + +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import org.apache.bookkeeper.conf.ServerConfiguration; + +/** + * An interface of the FileChannelProvider. + */ +public interface FileChannelProvider extends Closeable { + /** + * + * @param providerClassName Provided class name for file channel. + * @return FileChannelProvider. A file channel provider loaded from providerClassName + * @throws IOException Possible IOException. + */ + static FileChannelProvider newProvider(String providerClassName) throws IOException { + try { + Class providerClass = Class.forName(providerClassName); + Object obj = providerClass.getConstructor().newInstance(); + return (FileChannelProvider) obj; + } catch (Exception e) { + throw new IOException(e); + } + } + + /** + * Get the BookieFileChannel with the given file and configuration. + * + * @param file File path related to bookie. + * @param configuration Server configuration. + * @return BookieFileChannel related to file parameter. + * @throws IOException Possible IOException. + */ + BookieFileChannel open(File file, ServerConfiguration configuration) throws IOException; + + /** + * Close bookieFileChannel. + * @param bookieFileChannel The bookieFileChannel to be closed. + * @throws IOException Possible IOException. + */ + void close(BookieFileChannel bookieFileChannel) throws IOException; + + /** + * Whether support reuse file. Default is false. + * + * @return + */ + default boolean supportReuseFile() { + return false; + } + + /** + * Notify the rename source file name to the target file name operation. + * @param source + * @param target + */ + default void notifyRename(File source, File target) { + + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/FileInfo.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/FileInfo.java index a5ddacf0ff6..150697b822a 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/FileInfo.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/FileInfo.java @@ -21,7 +21,7 @@ package org.apache.bookkeeper.bookie; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.bookkeeper.bookie.LastAddConfirmedUpdateNotification.WATCHER_RECYCLER; import com.google.common.annotations.VisibleForTesting; @@ -47,7 +47,7 @@ * Ledger index file is made of a header and several fixed-length index pages, which records the offsets of data stored * in entry loggers *
<header><index pages>
- * Header is formated as below: + * Header is formatted as below: *
<magic bytes><len of master key><master key>
*
    *
  • magic bytes: 4 bytes, 'BKLE', version: 4 bytes @@ -55,7 +55,7 @@ *
  • master key: master key *
  • state: bit map to indicate the state, 32 bits. *
- * Index page is a fixed-length page, which contains serveral entries which point to the offsets of data stored + * Index page is a fixed-length page, which contains several entries which point to the offsets of data stored * in entry loggers. *

*/ @@ -86,7 +86,7 @@ class FileInfo extends Watchable { static final long START_OF_DATA = 1024; private long size; private boolean isClosed; - private long sizeSinceLastwrite; + private long sizeSinceLastWrite; // bit map for states of the ledger. private int stateBits; @@ -101,6 +101,8 @@ class FileInfo extends Watchable { // this FileInfo Header Version int headerVersion; + private boolean deleted; + public FileInfo(File lf, byte[] masterKey, int fileInfoVersionToWrite) throws IOException { super(WATCHER_RECYCLER); @@ -108,6 +110,7 @@ public FileInfo(File lf, byte[] masterKey, int fileInfoVersionToWrite) throws IO this.masterKey = masterKey; mode = "rw"; this.headerVersion = fileInfoVersionToWrite; + this.deleted = false; } synchronized Long getLastAddConfirmed() { @@ -136,8 +139,7 @@ long setLastAddConfirmed(long lac) { synchronized boolean waitForLastAddConfirmedUpdate(long previousLAC, Watcher watcher) { - if ((null != lac && lac > previousLAC) - || isClosed || ((stateBits & STATE_FENCED_BIT) == STATE_FENCED_BIT)) { + if ((null != lac && lac > previousLAC) || isClosed) { if (LOG.isTraceEnabled()) { LOG.trace("Wait For LAC {} , {}", this.lac, previousLAC); } @@ -148,6 +150,10 @@ synchronized boolean waitForLastAddConfirmedUpdate(long previousLAC, return true; } + synchronized void cancelWaitForLastAddConfirmedUpdate(Watcher watcher) { + deleteWatcher(watcher); + } + public boolean isClosed() { return isClosed; } @@ -156,8 +162,8 @@ public synchronized File getLf() { return lf; } - public long getSizeSinceLastwrite() { - return sizeSinceLastwrite; + public long getSizeSinceLastWrite() { + return sizeSinceLastWrite; } public ByteBuf getExplicitLac() { @@ -206,7 +212,7 @@ public synchronized void readHeader() throws IOException { fc = new RandomAccessFile(lf, mode).getChannel(); size = fc.size(); - sizeSinceLastwrite = size; + sizeSinceLastWrite = size; // avoid hang on reading partial index ByteBuffer bb = ByteBuffer.allocate((int) (Math.min(size, START_OF_DATA))); @@ -257,6 +263,16 @@ public synchronized void readHeader() throws IOException { } } + public synchronized boolean isDeleted() { + return deleted; + } + + public static class FileInfoDeletedException extends IOException { + FileInfoDeletedException() { + super("FileInfo already deleted"); + } + } + @VisibleForTesting void checkOpen(boolean create) throws IOException { checkOpen(create, false); @@ -264,6 +280,9 @@ void checkOpen(boolean create) throws IOException { private synchronized void checkOpen(boolean create, boolean openBeforeClose) throws IOException { + if (deleted) { + throw new FileInfoDeletedException(); + } if (fc != null) { return; } @@ -290,7 +309,7 @@ private synchronized void checkOpen(boolean create, boolean openBeforeClose) try { readHeader(); } catch (BufferUnderflowException buf) { - LOG.warn("Exception when reading header of {} : {}", lf, buf); + LOG.warn("Exception when reading header of {}.", lf, buf); if (null != masterKey) { LOG.warn("Attempting to write header of {} again.", lf); writeHeader(); @@ -344,9 +363,7 @@ public boolean setFenced() throws IOException { // not fenced yet stateBits |= STATE_FENCED_BIT; needFlushHeader = true; - synchronized (this) { - changed = true; - } + changed = true; returnVal = true; } } @@ -475,7 +492,7 @@ public synchronized long write(ByteBuffer[] buffs, long position) throws IOExcep size = newsize; } } - sizeSinceLastwrite = fc.size(); + sizeSinceLastWrite = fc.size(); return total; } @@ -532,6 +549,7 @@ public synchronized void moveToNewLocation(File newFile, long size) throws IOExc } fc = new RandomAccessFile(newFile, mode).getChannel(); lf = newFile; + deleted = false; } public synchronized byte[] getMasterKey() throws IOException { @@ -540,6 +558,7 @@ public synchronized byte[] getMasterKey() throws IOException { } public synchronized boolean delete() { + deleted = true; return lf.delete(); } @@ -549,7 +568,7 @@ private static void checkParents(File f) throws IOException { return; } if (!parent.mkdirs()) { - throw new IOException("Counldn't mkdirs for " + parent); + throw new IOException("Couldn't mkdirs for " + parent); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/FileInfoBackingCache.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/FileInfoBackingCache.java index 6beba6a744d..266595b1587 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/FileInfoBackingCache.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/FileInfoBackingCache.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -33,7 +33,8 @@ class FileInfoBackingCache { static final int DEAD_REF = -0xdead; final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); - final ConcurrentLongHashMap fileInfos = new ConcurrentLongHashMap<>(); + final ConcurrentLongHashMap fileInfos = + ConcurrentLongHashMap.newBuilder().build(); final FileLoader fileLoader; final int fileInfoVersionToWrite; @@ -49,6 +50,8 @@ private static CachedFileInfo tryRetainFileInfo(CachedFileInfo fi) throws IOExce boolean retained = fi.tryRetain(); if (!retained) { throw new IOException("FileInfo " + fi + " is already marked dead"); + } else if (fi.isDeleted()) { + throw new Bookie.NoLedgerException(fi.ledgerId); } return fi; } @@ -63,7 +66,7 @@ CachedFileInfo loadFileInfo(long ledgerId, byte[] masterKey) throws IOException // and if it is called (and succeeds) the fi will have been // removed from fileInfos at the same time, so we should not // have been able to get a reference to it here. - // The caller of loadFileInfo owns the refence, and is + // The caller of loadFileInfo owns the reference, and is // responsible for calling the corresponding #release(). return tryRetainFileInfo(fi); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/FileSystemUpgrade.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/FileSystemUpgrade.java index 7600accfd4a..8fff510c562 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/FileSystemUpgrade.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/FileSystemUpgrade.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,9 +21,10 @@ package org.apache.bookkeeper.bookie; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithRegistrationManager; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; import com.google.common.util.concurrent.UncheckedExecutionException; import java.io.File; @@ -31,6 +32,7 @@ import java.io.IOException; import java.net.MalformedURLException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -86,22 +88,27 @@ private boolean containsIndexFiles(File dir, String name) { return false; } + @Override public boolean accept(File dir, String name) { if (name.endsWith(".txn") || name.endsWith(".log") || name.equals("lastId") || name.startsWith("lastMark")) { return true; } - if (containsIndexFiles(dir, name)) { - return true; - } - return false; + return containsIndexFiles(dir, name); } }; - private static List getAllDirectories(ServerConfiguration conf) { + @VisibleForTesting + public static List getAllDirectories(ServerConfiguration conf) { List dirs = new ArrayList<>(); dirs.addAll(Lists.newArrayList(conf.getJournalDirs())); - Collections.addAll(dirs, conf.getLedgerDirs()); + final File[] ledgerDirs = conf.getLedgerDirs(); + final File[] indexDirs = conf.getIndexDirs(); + if (indexDirs != null && indexDirs.length == ledgerDirs.length + && !Arrays.asList(indexDirs).containsAll(Arrays.asList(ledgerDirs))) { + dirs.addAll(Lists.newArrayList(indexDirs)); + } + Collections.addAll(dirs, ledgerDirs); return dirs; } @@ -198,6 +205,7 @@ private static void upgrade(ServerConfiguration conf, c.writeToDirectory(tmpDir); String[] files = d.list(new FilenameFilter() { + @Override public boolean accept(File dir, String name) { return bookieFilesFilter.accept(dir, name) && !(new File(dir, name).isDirectory()); @@ -335,12 +343,6 @@ private static void printHelp(Options opts) { } public static void main(String[] args) throws Exception { - org.apache.log4j.Logger root = org.apache.log4j.Logger.getRootLogger(); - root.addAppender(new org.apache.log4j.ConsoleAppender( - new org.apache.log4j.PatternLayout("%-5p [%t]: %m%n"))); - root.setLevel(org.apache.log4j.Level.ERROR); - org.apache.log4j.Logger.getLogger(FileSystemUpgrade.class).setLevel( - org.apache.log4j.Level.INFO); final Options opts = new Options(); opts.addOption("c", "conf", true, "Configuration for Bookie"); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectionStatus.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectionStatus.java new file mode 100644 index 00000000000..3f872092f01 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectionStatus.java @@ -0,0 +1,47 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.bookkeeper.bookie; + +import lombok.Builder; +import lombok.Getter; +import lombok.Setter; + +/** + * This is the garbage collection thread status. + * It includes what phase GarbageCollection (major/minor), gc counters, last gc time, etc. + */ +@Setter +@Getter +@Builder +public class GarbageCollectionStatus { + // whether the GC thread is in force GC. + private boolean forceCompacting; + // whether the GC thread is in major compacting. + private boolean majorCompacting; + // whether the GC thread is in minor compacting. + private boolean minorCompacting; + + private long lastMajorCompactionTime; + private long lastMinorCompactionTime; + private long majorCompactionCounter; + private long minorCompactionCounter; +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollector.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollector.java index f9f9a7b8a2e..ef522d998ae 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollector.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollector.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java index 4b1c834443a..e58064cfd90 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,39 +21,34 @@ package org.apache.bookkeeper.bookie; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.ACTIVE_ENTRY_LOG_COUNT; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.ACTIVE_ENTRY_LOG_SPACE_BYTES; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.MAJOR_COMPACTION_COUNT; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.MINOR_COMPACTION_COUNT; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.RECLAIMED_COMPACTION_SPACE_BYTES; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.RECLAIMED_DELETION_SPACE_BYTES; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.THREAD_RUNTIME; +import static org.apache.bookkeeper.util.BookKeeperConstants.METADATA_CACHE; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Strings; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import io.netty.util.concurrent.DefaultThreadFactory; - import java.io.IOException; import java.util.ArrayList; -import java.util.Comparator; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; +import java.util.LinkedList; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; - +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; +import lombok.Getter; +import org.apache.bookkeeper.bookie.BookieException.EntryLogMetadataMapException; import org.apache.bookkeeper.bookie.GarbageCollector.GarbageCleaner; +import org.apache.bookkeeper.bookie.stats.GarbageCollectorStats; +import org.apache.bookkeeper.bookie.storage.EntryLogger; +import org.apache.bookkeeper.bookie.storage.ldb.PersistentEntryLogMetadataMap; +import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.meta.LedgerManager; -import org.apache.bookkeeper.stats.Counter; -import org.apache.bookkeeper.stats.Gauge; -import org.apache.bookkeeper.stats.OpStatsLogger; import org.apache.bookkeeper.stats.StatsLogger; -import org.apache.bookkeeper.util.MathUtils; -import org.apache.bookkeeper.util.SafeRunnable; +import org.apache.commons.lang3.mutable.MutableBoolean; +import org.apache.commons.lang3.mutable.MutableLong; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -61,43 +56,44 @@ * This is the garbage collector thread that runs in the background to * remove any entry log files that no longer contains any active ledger. */ -public class GarbageCollectorThread extends SafeRunnable { +public class GarbageCollectorThread implements Runnable { private static final Logger LOG = LoggerFactory.getLogger(GarbageCollectorThread.class); private static final int SECOND = 1000; + private static final long MINUTE = TimeUnit.MINUTES.toMillis(1); // Maps entry log files to the set of ledgers that comprise the file and the size usage per ledger - private Map entryLogMetaMap = new ConcurrentHashMap(); + private final EntryLogMetadataMap entryLogMetaMap; private final ScheduledExecutorService gcExecutor; Future scheduledFuture = null; - // This is how often we want to run the Garbage Collector Thread (in milliseconds). + // This is the fixed delay in milliseconds before running the Garbage Collector Thread again. final long gcWaitTime; // Compaction parameters + boolean isForceMinorCompactionAllow = false; boolean enableMinorCompaction = false; final double minorCompactionThreshold; final long minorCompactionInterval; + final long minorCompactionMaxTimeMillis; long lastMinorCompactionTime; + boolean isForceMajorCompactionAllow = false; boolean enableMajorCompaction = false; final double majorCompactionThreshold; final long majorCompactionInterval; + long majorCompactionMaxTimeMillis; long lastMajorCompactionTime; + @Getter final boolean isForceGCAllowWhenNoSpace; // Entry Logger Handle final EntryLogger entryLogger; - final AbstractLogCompactor compactor; + AbstractLogCompactor compactor; // Stats loggers for garbage collection operations - final StatsLogger statsLogger; - private final Counter minorCompactionCounter; - private final Counter majorCompactionCounter; - private final Counter reclaimedSpaceViaDeletes; - private final Counter reclaimedSpaceViaCompaction; - private final OpStatsLogger gcThreadRuntime; + private final GarbageCollectorStats gcStats; private volatile long totalEntryLogSize; private volatile int numActiveEntryLogs; @@ -108,10 +104,11 @@ public class GarbageCollectorThread extends SafeRunnable { // to reduce the risk getting entry log corrupted final AtomicBoolean compacting = new AtomicBoolean(false); - volatile boolean running = true; + // use to get the compacting status + final AtomicBoolean minorCompacting = new AtomicBoolean(false); + final AtomicBoolean majorCompacting = new AtomicBoolean(false); - // track the last scanned successfully log id - long scannedLogId = 0; + volatile boolean running = true; // Boolean to trigger a forced GC. final AtomicBoolean forceGarbageCollection = new AtomicBoolean(false); @@ -120,10 +117,14 @@ public class GarbageCollectorThread extends SafeRunnable { // Boolean to disable minor compaction, when disk is full final AtomicBoolean suspendMinorCompaction = new AtomicBoolean(false); - final GarbageCollector garbageCollector; + final ScanAndCompareGarbageCollector garbageCollector; final GarbageCleaner garbageCleaner; final ServerConfiguration conf; + final LedgerDirsManager ledgerDirsManager; + + private static final AtomicLong threadNum = new AtomicLong(0); + final AbstractLogCompactor.Throttler throttler; /** * Create a garbage collector thread. @@ -133,9 +134,16 @@ public class GarbageCollectorThread extends SafeRunnable { * @throws IOException */ public GarbageCollectorThread(ServerConfiguration conf, LedgerManager ledgerManager, - final CompactableLedgerStorage ledgerStorage, StatsLogger statsLogger) throws IOException { - this(conf, ledgerManager, ledgerStorage, statsLogger, - Executors.newSingleThreadScheduledExecutor(new DefaultThreadFactory("GarbageCollectorThread"))); + final LedgerDirsManager ledgerDirsManager, + final CompactableLedgerStorage ledgerStorage, + EntryLogger entryLogger, + StatsLogger statsLogger) throws IOException { + this(conf, ledgerManager, ledgerDirsManager, ledgerStorage, entryLogger, statsLogger, newExecutor()); + } + + @VisibleForTesting + static ScheduledExecutorService newExecutor() { + return Executors.newSingleThreadScheduledExecutor(new DefaultThreadFactory("GarbageCollectorThread")); } /** @@ -147,93 +155,101 @@ public GarbageCollectorThread(ServerConfiguration conf, LedgerManager ledgerMana */ public GarbageCollectorThread(ServerConfiguration conf, LedgerManager ledgerManager, + final LedgerDirsManager ledgerDirsManager, final CompactableLedgerStorage ledgerStorage, + EntryLogger entryLogger, StatsLogger statsLogger, - ScheduledExecutorService gcExecutor) + ScheduledExecutorService gcExecutor) throws IOException { this.gcExecutor = gcExecutor; this.conf = conf; - this.entryLogger = ledgerStorage.getEntryLogger(); + this.ledgerDirsManager = ledgerDirsManager; + this.entryLogger = entryLogger; + this.entryLogMetaMap = createEntryLogMetadataMap(); this.ledgerStorage = ledgerStorage; this.gcWaitTime = conf.getGcWaitTime(); + this.numActiveEntryLogs = 0; + this.totalEntryLogSize = 0L; + this.garbageCollector = new ScanAndCompareGarbageCollector(ledgerManager, ledgerStorage, conf, statsLogger); + this.gcStats = new GarbageCollectorStats( + statsLogger, + () -> numActiveEntryLogs, + () -> totalEntryLogSize, + () -> garbageCollector.getNumActiveLedgers() + ); + this.garbageCleaner = ledgerId -> { try { if (LOG.isDebugEnabled()) { LOG.debug("delete ledger : " + ledgerId); } + gcStats.getDeletedLedgerCounter().inc(); ledgerStorage.deleteLedger(ledgerId); } catch (IOException e) { LOG.error("Exception when deleting the ledger index file on the Bookie: ", e); } }; - // Stat state initialization - this.statsLogger = statsLogger; - - this.minorCompactionCounter = statsLogger.getCounter(MINOR_COMPACTION_COUNT); - this.majorCompactionCounter = statsLogger.getCounter(MAJOR_COMPACTION_COUNT); - this.reclaimedSpaceViaCompaction = statsLogger.getCounter(RECLAIMED_COMPACTION_SPACE_BYTES); - this.reclaimedSpaceViaDeletes = statsLogger.getCounter(RECLAIMED_DELETION_SPACE_BYTES); - this.gcThreadRuntime = statsLogger.getOpStatsLogger(THREAD_RUNTIME); - this.numActiveEntryLogs = 0; - statsLogger.registerGauge(ACTIVE_ENTRY_LOG_COUNT, new Gauge() { - @Override - public Integer getDefaultValue() { - return 0; - } - - @Override - public Integer getSample() { - return numActiveEntryLogs; - } - }); - this.totalEntryLogSize = 0L; - statsLogger.registerGauge(ACTIVE_ENTRY_LOG_SPACE_BYTES, new Gauge() { - @Override - public Long getDefaultValue() { - return 0L; - } - - @Override - public Long getSample() { - return totalEntryLogSize; - } - }); - - this.garbageCollector = new ScanAndCompareGarbageCollector(ledgerManager, ledgerStorage, conf, statsLogger); - // compaction parameters minorCompactionThreshold = conf.getMinorCompactionThreshold(); minorCompactionInterval = conf.getMinorCompactionInterval() * SECOND; majorCompactionThreshold = conf.getMajorCompactionThreshold(); majorCompactionInterval = conf.getMajorCompactionInterval() * SECOND; isForceGCAllowWhenNoSpace = conf.getIsForceGCAllowWhenNoSpace(); + majorCompactionMaxTimeMillis = conf.getMajorCompactionMaxTimeMillis(); + minorCompactionMaxTimeMillis = conf.getMinorCompactionMaxTimeMillis(); + + boolean isForceAllowCompaction = conf.isForceAllowCompaction(); + + AbstractLogCompactor.LogRemovalListener remover = new AbstractLogCompactor.LogRemovalListener() { + @Override + public void removeEntryLog(long logToRemove) { + try { + GarbageCollectorThread.this.removeEntryLog(logToRemove); + } catch (EntryLogMetadataMapException e) { + // Ignore and continue because ledger will not be cleaned up + // from entry-logger in this pass and will be taken care in + // next schedule task + LOG.warn("Failed to remove entry-log metadata {}", logToRemove, e); + } + } + }; if (conf.getUseTransactionalCompaction()) { - this.compactor = new TransactionalEntryLogCompactor(this); + this.compactor = new TransactionalEntryLogCompactor(conf, entryLogger, ledgerStorage, remover); } else { - this.compactor = new EntryLogCompactor(this); + this.compactor = new EntryLogCompactor(conf, entryLogger, ledgerStorage, remover); } + this.throttler = new AbstractLogCompactor.Throttler(conf); if (minorCompactionInterval > 0 && minorCompactionThreshold > 0) { - if (minorCompactionThreshold > 1.0f) { + if (minorCompactionThreshold > 1.0d) { throw new IOException("Invalid minor compaction threshold " + minorCompactionThreshold); } - if (minorCompactionInterval <= gcWaitTime) { + if (minorCompactionInterval < gcWaitTime) { throw new IOException("Too short minor compaction interval : " + minorCompactionInterval); } enableMinorCompaction = true; } + if (isForceAllowCompaction) { + if (minorCompactionThreshold > 0 && minorCompactionThreshold < 1.0d) { + isForceMinorCompactionAllow = true; + } + if (majorCompactionThreshold > 0 && majorCompactionThreshold < 1.0d) { + isForceMajorCompactionAllow = true; + } + } + if (majorCompactionInterval > 0 && majorCompactionThreshold > 0) { - if (majorCompactionThreshold > 1.0f) { + if (majorCompactionThreshold > 1.0d) { throw new IOException("Invalid major compaction threshold " + majorCompactionThreshold); } - if (majorCompactionInterval <= gcWaitTime) { + if (majorCompactionInterval < gcWaitTime) { throw new IOException("Too short major compaction interval : " + majorCompactionInterval); } @@ -255,7 +271,23 @@ public Long getSample() { LOG.info("Major Compaction : enabled=" + enableMajorCompaction + ", threshold=" + majorCompactionThreshold + ", interval=" + majorCompactionInterval); - lastMinorCompactionTime = lastMajorCompactionTime = MathUtils.now(); + lastMinorCompactionTime = lastMajorCompactionTime = System.currentTimeMillis(); + } + + private EntryLogMetadataMap createEntryLogMetadataMap() throws IOException { + if (conf.isGcEntryLogMetadataCacheEnabled()) { + String baseDir = Strings.isNullOrEmpty(conf.getGcEntryLogMetadataCachePath()) + ? this.ledgerDirsManager.getAllLedgerDirs().get(0).getPath() : conf.getGcEntryLogMetadataCachePath(); + try { + return new PersistentEntryLogMetadataMap(baseDir, conf); + } catch (IOException e) { + LOG.error("Failed to initialize persistent-metadata-map , clean up {}", + baseDir + "/" + METADATA_CACHE, e); + throw e; + } + } else { + return new InMemoryEntryLogMetadataMap(); + } } public void enableForceGC() { @@ -266,6 +298,14 @@ public void enableForceGC() { } } + public void enableForceGC(boolean forceMajor, boolean forceMinor) { + if (forceGarbageCollection.compareAndSet(false, true)) { + LOG.info("Forced garbage collection triggered by thread: {}, forceMajor: {}, forceMinor: {}", + Thread.currentThread().getName(), forceMajor, forceMinor); + triggerGC(true, !forceMajor, !forceMinor); + } + } + public void disableForceGC() { if (forceGarbageCollection.compareAndSet(true, false)) { LOG.info("{} disabled force garbage collection since bookie has enough space now.", Thread @@ -291,6 +331,18 @@ Future triggerGC() { }); } + public boolean isInForceGC() { + return forceGarbageCollection.get(); + } + + public boolean isMajorGcSuspend() { + return suspendMajorCompaction.get(); + } + + public boolean isMinorGcSuspend() { + return suspendMinorCompaction.get(); + } + public void suspendMajorGC() { if (suspendMajorCompaction.compareAndSet(false, true)) { LOG.info("Suspend Major Compaction triggered by thread: {}", Thread.currentThread().getName()); @@ -321,22 +373,31 @@ public void start() { if (scheduledFuture != null) { scheduledFuture.cancel(false); } - scheduledFuture = gcExecutor.scheduleAtFixedRate(this, gcWaitTime, gcWaitTime, TimeUnit.MILLISECONDS); + long initialDelay = getModInitialDelay(); + scheduledFuture = gcExecutor.scheduleWithFixedDelay(this, initialDelay, gcWaitTime, TimeUnit.MILLISECONDS); + } + + /** + * when number of ledger's Dir are more than 1,the same of GarbageCollectorThread will do the same thing, + * Especially + * 1) deleting ledger, then SyncThread will be timed to do rocksDB compact + * 2) compact: entry, cost cpu. + * then get Mod initial Delay time to simply avoid GarbageCollectorThread working at the same time + */ + public long getModInitialDelay() { + int ledgerDirsNum = conf.getLedgerDirs().length; + long splitTime = gcWaitTime / ledgerDirsNum; + long currentThreadNum = threadNum.incrementAndGet(); + return gcWaitTime + currentThreadNum * splitTime; } @Override - public void safeRun() { + public void run() { boolean force = forceGarbageCollection.get(); boolean suspendMajor = suspendMajorCompaction.get(); boolean suspendMinor = suspendMinorCompaction.get(); runWithFlags(force, suspendMajor, suspendMinor); - - if (force) { - // only set force to false if it had been true when the garbage - // collection cycle started - forceGarbageCollection.set(false); - } } public void runWithFlags(boolean force, boolean suspendMajor, boolean suspendMinor) { @@ -347,43 +408,69 @@ public void runWithFlags(boolean force, boolean suspendMajor, boolean suspendMin // Recover and clean up previous state if using transactional compaction compactor.cleanUpAndRecover(); - // Extract all of the ledger ID's that comprise all of the entry logs - // (except for the current new one which is still being written to). - entryLogMetaMap = extractMetaFromEntryLogs(entryLogMetaMap); + try { + // gc inactive/deleted ledgers + // this is used in extractMetaFromEntryLogs to calculate the usage of entry log + doGcLedgers(); - // gc inactive/deleted ledgers - doGcLedgers(); + // Extract all of the ledger ID's that comprise all of the entry logs + // (except for the current new one which is still being written to). + extractMetaFromEntryLogs(); - // gc entry logs - doGcEntryLogs(); + // gc entry logs + doGcEntryLogs(); - if (suspendMajor) { - LOG.info("Disk almost full, suspend major compaction to slow down filling disk."); - } - if (suspendMinor) { - LOG.info("Disk full, suspend minor compaction to slow down filling disk."); - } + if (suspendMajor) { + LOG.info("Disk almost full, suspend major compaction to slow down filling disk."); + } + if (suspendMinor) { + LOG.info("Disk full, suspend minor compaction to slow down filling disk."); + } - long curTime = MathUtils.now(); - if (enableMajorCompaction && (!suspendMajor) - && (force || curTime - lastMajorCompactionTime > majorCompactionInterval)) { - // enter major compaction - LOG.info("Enter major compaction, suspendMajor {}", suspendMajor); - doCompactEntryLogs(majorCompactionThreshold); - lastMajorCompactionTime = MathUtils.now(); - // and also move minor compaction time - lastMinorCompactionTime = lastMajorCompactionTime; - majorCompactionCounter.inc(); - } else if (enableMinorCompaction && (!suspendMinor) - && (force || curTime - lastMinorCompactionTime > minorCompactionInterval)) { - // enter minor compaction - LOG.info("Enter minor compaction, suspendMinor {}", suspendMinor); - doCompactEntryLogs(minorCompactionThreshold); - lastMinorCompactionTime = MathUtils.now(); - minorCompactionCounter.inc(); + long curTime = System.currentTimeMillis(); + if (((isForceMajorCompactionAllow && force) || (enableMajorCompaction + && (force || curTime - lastMajorCompactionTime > majorCompactionInterval))) + && (!suspendMajor)) { + // enter major compaction + LOG.info("Enter major compaction, suspendMajor {}", suspendMajor); + majorCompacting.set(true); + try { + doCompactEntryLogs(majorCompactionThreshold, majorCompactionMaxTimeMillis); + } finally { + lastMajorCompactionTime = System.currentTimeMillis(); + // and also move minor compaction time + lastMinorCompactionTime = lastMajorCompactionTime; + gcStats.getMajorCompactionCounter().inc(); + majorCompacting.set(false); + } + } else if (((isForceMinorCompactionAllow && force) || (enableMinorCompaction + && (force || curTime - lastMinorCompactionTime > minorCompactionInterval))) + && (!suspendMinor)) { + // enter minor compaction + LOG.info("Enter minor compaction, suspendMinor {}", suspendMinor); + minorCompacting.set(true); + try { + doCompactEntryLogs(minorCompactionThreshold, minorCompactionMaxTimeMillis); + } finally { + lastMinorCompactionTime = System.currentTimeMillis(); + gcStats.getMinorCompactionCounter().inc(); + minorCompacting.set(false); + } + } + gcStats.getGcThreadRuntime().registerSuccessfulEvent( + MathUtils.nowInNano() - threadStart, TimeUnit.NANOSECONDS); + } catch (EntryLogMetadataMapException e) { + LOG.error("Error in entryLog-metadatamap, Failed to complete GC/Compaction due to entry-log {}", + e.getMessage(), e); + gcStats.getGcThreadRuntime().registerFailedEvent( + MathUtils.nowInNano() - threadStart, TimeUnit.NANOSECONDS); + } finally { + if (force && forceGarbageCollection.compareAndSet(true, false)) { + LOG.info("{} Set forceGarbageCollection to false after force GC to make it forceGC-able again.", + Thread.currentThread().getName()); + } } - this.gcThreadRuntime.registerSuccessfulEvent( - MathUtils.nowInNano() - threadStart, TimeUnit.NANOSECONDS); + } /** @@ -396,38 +483,58 @@ private void doGcLedgers() { /** * Garbage collect those entry loggers which are not associated with any active ledgers. */ - private void doGcEntryLogs() { + private void doGcEntryLogs() throws EntryLogMetadataMapException { // Get a cumulative count, don't update until complete AtomicLong totalEntryLogSizeAcc = new AtomicLong(0L); // Loop through all of the entry logs and remove the non-active ledgers. entryLogMetaMap.forEach((entryLogId, meta) -> { - removeIfLedgerNotExists(meta); - if (meta.isEmpty()) { - // This means the entry log is not associated with any active ledgers anymore. - // We can remove this entry log file now. - LOG.info("Deleting entryLogId " + entryLogId + " as it has no active ledgers!"); - removeEntryLog(entryLogId); - this.reclaimedSpaceViaDeletes.add(meta.getTotalSize()); - } - + try { + boolean modified = removeIfLedgerNotExists(meta); + if (meta.isEmpty()) { + // This means the entry log is not associated with any active + // ledgers anymore. + // We can remove this entry log file now. + LOG.info("Deleting entryLogId {} as it has no active ledgers!", entryLogId); + if (removeEntryLog(entryLogId)) { + gcStats.getReclaimedSpaceViaDeletes().addCount(meta.getTotalSize()); + } else { + gcStats.getReclaimFailedToDelete().inc(); + } + } else if (modified) { + // update entryLogMetaMap only when the meta modified. + entryLogMetaMap.put(meta.getEntryLogId(), meta); + } + } catch (EntryLogMetadataMapException e) { + // Ignore and continue because ledger will not be cleaned up + // from entry-logger in this pass and will be taken care in next + // schedule task + LOG.warn("Failed to remove ledger from entry-log metadata {}", entryLogId, e); + } totalEntryLogSizeAcc.getAndAdd(meta.getRemainingSize()); }); this.totalEntryLogSize = totalEntryLogSizeAcc.get(); - this.numActiveEntryLogs = entryLogMetaMap.keySet().size(); + this.numActiveEntryLogs = entryLogMetaMap.size(); } - private void removeIfLedgerNotExists(EntryLogMetadata meta) { + private boolean removeIfLedgerNotExists(EntryLogMetadata meta) throws EntryLogMetadataMapException { + MutableBoolean modified = new MutableBoolean(false); meta.removeLedgerIf((entryLogLedger) -> { // Remove the entry log ledger from the set if it isn't active. try { - return !ledgerStorage.ledgerExists(entryLogLedger); + boolean exist = ledgerStorage.ledgerExists(entryLogLedger); + if (!exist) { + modified.setTrue(); + } + return !exist; } catch (IOException e) { LOG.error("Error reading from ledger storage", e); return false; } }); + + return modified.getValue(); } /** @@ -440,41 +547,124 @@ private void removeIfLedgerNotExists(EntryLogMetadata meta) { *

*/ @VisibleForTesting - void doCompactEntryLogs(double threshold) { + void doCompactEntryLogs(double threshold, long maxTimeMillis) throws EntryLogMetadataMapException { LOG.info("Do compaction to compact those files lower than {}", threshold); - // sort the ledger meta by usage in ascending order. - List logsToCompact = new ArrayList(); - logsToCompact.addAll(entryLogMetaMap.values()); - logsToCompact.sort(Comparator.comparing(EntryLogMetadata::getUsage)); - final int numBuckets = 10; - int entryLogUsageBuckets[] = new int[numBuckets]; + int[] entryLogUsageBuckets = new int[numBuckets]; + int[] compactedBuckets = new int[numBuckets]; + + ArrayList> compactableBuckets = new ArrayList<>(numBuckets); + for (int i = 0; i < numBuckets; i++) { + compactableBuckets.add(new LinkedList<>()); + } + + long start = System.currentTimeMillis(); + MutableLong end = new MutableLong(start); + MutableLong timeDiff = new MutableLong(0); - for (EntryLogMetadata meta : logsToCompact) { - int bucketIndex = Math.min( - numBuckets - 1, - (int) Math.ceil(meta.getUsage() * numBuckets)); + entryLogMetaMap.forEach((entryLogId, meta) -> { + double usage = meta.getUsage(); + if (conf.isUseTargetEntryLogSizeForGc() && usage < 1.0d) { + usage = (double) meta.getRemainingSize() / Math.max(meta.getTotalSize(), conf.getEntryLogSizeLimit()); + } + int bucketIndex = calculateUsageIndex(numBuckets, usage); entryLogUsageBuckets[bucketIndex]++; - if (meta.getUsage() >= threshold) { - continue; + if (timeDiff.getValue() < maxTimeMillis) { + end.setValue(System.currentTimeMillis()); + timeDiff.setValue(end.getValue() - start); } - if (LOG.isDebugEnabled()) { - LOG.debug("Compacting entry log {} below threshold {}", meta.getEntryLogId(), threshold); + if ((usage >= threshold + || (maxTimeMillis > 0 && timeDiff.getValue() >= maxTimeMillis) + || !running)) { + // We allow the usage limit calculation to continue so that we get an accurate + // report of where the usage was prior to running compaction. + return; } - long priorRemainingSize = meta.getRemainingSize(); - compactEntryLog(meta); - this.reclaimedSpaceViaCompaction.add(meta.getTotalSize() - priorRemainingSize); + compactableBuckets.get(bucketIndex).add(meta.getEntryLogId()); + }); - if (!running) { // if gc thread is not running, stop compaction - return; + LOG.info( + "Compaction: entry log usage buckets before compaction [10% 20% 30% 40% 50% 60% 70% 80% 90% 100%] = {}", + entryLogUsageBuckets); + + final int maxBucket = calculateUsageIndex(numBuckets, threshold); + int totalEntryLogIds = 0; + for (int currBucket = 0; currBucket <= maxBucket; currBucket++) { + totalEntryLogIds += compactableBuckets.get(currBucket).size(); + } + long lastPrintTimestamp = 0; + AtomicInteger processedEntryLogCnt = new AtomicInteger(0); + + stopCompaction: + for (int currBucket = 0; currBucket <= maxBucket; currBucket++) { + LinkedList entryLogIds = compactableBuckets.get(currBucket); + while (!entryLogIds.isEmpty()) { + if (timeDiff.getValue() < maxTimeMillis) { + end.setValue(System.currentTimeMillis()); + timeDiff.setValue(end.getValue() - start); + } + + if ((maxTimeMillis > 0 && timeDiff.getValue() >= maxTimeMillis) || !running) { + // We allow the usage limit calculation to continue so that we get an accurate + // report of where the usage was prior to running compaction. + break stopCompaction; + } + + final int bucketIndex = currBucket; + final long logId = entryLogIds.remove(); + if (System.currentTimeMillis() - lastPrintTimestamp >= MINUTE) { + lastPrintTimestamp = System.currentTimeMillis(); + LOG.info("Compaction progress {} / {}, current compaction entryLogId: {}", + processedEntryLogCnt.get(), totalEntryLogIds, logId); + } + entryLogMetaMap.forKey(logId, (entryLogId, meta) -> { + if (meta == null) { + if (LOG.isDebugEnabled()) { + LOG.debug("Metadata for entry log {} already deleted", logId); + } + return; + } + if (LOG.isDebugEnabled()) { + LOG.debug("Compacting entry log {} with usage {} below threshold {}", + meta.getEntryLogId(), meta.getUsage(), threshold); + } + + long priorRemainingSize = meta.getRemainingSize(); + compactEntryLog(meta); + gcStats.getReclaimedSpaceViaCompaction().addCount(meta.getTotalSize() - priorRemainingSize); + compactedBuckets[bucketIndex]++; + processedEntryLogCnt.getAndIncrement(); + }); + } + } + + if (LOG.isDebugEnabled()) { + if (!running) { + LOG.debug("Compaction exited due to gc not running"); + } + if (maxTimeMillis > 0 && timeDiff.getValue() > maxTimeMillis) { + LOG.debug("Compaction ran for {}ms but was limited by {}ms", timeDiff, maxTimeMillis); } } LOG.info( - "Compaction: entry log usage buckets[10% 20% 30% 40% 50% 60% 70% 80% 90% 100%] = {}", - entryLogUsageBuckets); + "Compaction: entry log usage buckets[10% 20% 30% 40% 50% 60% 70% 80% 90% 100%] = {}, compacted {}", + entryLogUsageBuckets, compactedBuckets); + } + + /** + * Calculate the index for the batch based on the usage between 0 and 1. + * + * @param numBuckets Number of reporting buckets. + * @param usage 0.0 - 1.0 value representing the usage of the entry log. + * @return index based on the number of buckets The last bucket will have the 1.0 if added. + */ + int calculateUsageIndex(int numBuckets, double usage) { + return Math.min( + numBuckets - 1, + (int) Math.floor(usage * numBuckets)); } /** @@ -482,10 +672,16 @@ void doCompactEntryLogs(double threshold) { * * @throws InterruptedException if there is an exception stopping gc thread. */ - public void shutdown() throws InterruptedException { + @SuppressFBWarnings("SWL_SLEEP_WITH_LOCK_HELD") + public synchronized void shutdown() throws InterruptedException { + if (!this.running) { + return; + } this.running = false; LOG.info("Shutting down GarbageCollectorThread"); + throttler.cancelledAcquire(); + compactor.throttler.cancelledAcquire(); while (!compacting.compareAndSet(false, true)) { // Wait till the thread stops compacting Thread.sleep(100); @@ -493,6 +689,11 @@ public void shutdown() throws InterruptedException { // Interrupt GC executor thread gcExecutor.shutdownNow(); + try { + entryLogMetaMap.close(); + } catch (Exception e) { + LOG.warn("Failed to close entryLog metadata-map", e); + } } /** @@ -500,13 +701,17 @@ public void shutdown() throws InterruptedException { * * @param entryLogId * Entry Log File Id + * @throws EntryLogMetadataMapException */ - protected void removeEntryLog(long entryLogId) { + protected boolean removeEntryLog(long entryLogId) throws EntryLogMetadataMapException { // remove entry log file successfully if (entryLogger.removeEntryLog(entryLogId)) { LOG.info("Removing entry log metadata for {}", entryLogId); entryLogMetaMap.remove(entryLogId); + return true; } + + return false; } /** @@ -525,27 +730,26 @@ protected void compactEntryLog(EntryLogMetadata entryLogMeta) { // indicates that compaction is in progress for this EntryLogId. return; } - // Do the actual compaction - compactor.compact(entryLogMeta); - // Mark compaction done - compacting.set(false); + + try { + // Do the actual compaction + compactor.compact(entryLogMeta); + } catch (Exception e) { + LOG.error("Failed to compact entry log {} due to unexpected error", entryLogMeta.getEntryLogId(), e); + } finally { + // Mark compaction done + compacting.set(false); + } } /** * Method to read in all of the entry logs (those that we haven't done so yet), * and find the set of ledger ID's that make up each entry log file. * - * @param entryLogMetaMap - * Existing EntryLogs to Meta - * @throws IOException + * @throws EntryLogMetadataMapException */ - protected Map extractMetaFromEntryLogs(Map entryLogMetaMap) { - // Extract it for every entry log except for the current one. - // Entry Log ID's are just a long value that starts at 0 and increments - // by 1 when the log fills up and we roll to a new one. - long curLogId = entryLogger.getLeastUnflushedLogId(); - boolean hasExceptionWhenScan = false; - for (long entryLogId = scannedLogId; entryLogId < curLogId; entryLogId++) { + protected void extractMetaFromEntryLogs() throws EntryLogMetadataMapException { + for (long entryLogId : entryLogger.getFlushedLogIds()) { // Comb the current entry log file if it has not already been extracted. if (entryLogMetaMap.containsKey(entryLogId)) { continue; @@ -561,34 +765,46 @@ protected Map extractMetaFromEntryLogs(Map(); - this.readOnlyLedgers = new ConcurrentLongHashMap<>(); + this.ledgers = ConcurrentLongHashMap.newBuilder().build(); + this.readOnlyLedgers = ConcurrentLongHashMap.newBuilder().build(); ledgerStorage.registerLedgerDeletionListener(this); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/InMemoryEntryLogMetadataMap.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/InMemoryEntryLogMetadataMap.java new file mode 100644 index 00000000000..658c9ecbae1 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/InMemoryEntryLogMetadataMap.java @@ -0,0 +1,81 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie; + +import java.io.IOException; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.BiConsumer; + +/** + * In-memory metadata-store to store entry-log metadata-map in memory-map. + */ +public class InMemoryEntryLogMetadataMap implements EntryLogMetadataMap { + + private final Map entryLogMetaMap = new ConcurrentHashMap<>(); + + @Override + public boolean containsKey(long entryLogId) { + return entryLogMetaMap.containsKey(entryLogId); + } + + @Override + public void put(long entryLogId, EntryLogMetadata entryLogMeta) { + entryLogMetaMap.put(entryLogId, entryLogMeta); + } + + @Override + public void forEach(BiConsumer action) { + entryLogMetaMap.forEach(action); + } + + @Override + public void forKey(long entryLogId, BiConsumer action) + throws BookieException.EntryLogMetadataMapException { + action.accept(entryLogId, entryLogMetaMap.get(entryLogId)); + } + + @Override + public void remove(long entryLogId) { + entryLogMetaMap.remove(entryLogId); + } + + @Override + public int size() { + return entryLogMetaMap.size(); + } + + @Override + public boolean isEmpty() { + return entryLogMetaMap.isEmpty(); + } + + @Override + public void clear() { + entryLogMetaMap.clear(); + } + + @Override + public void close() throws IOException { + entryLogMetaMap.clear(); + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/IndexInMemPageMgr.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/IndexInMemPageMgr.java index 66e97f79423..063b428e923 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/IndexInMemPageMgr.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/IndexInMemPageMgr.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,14 +20,15 @@ */ package org.apache.bookkeeper.bookie; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.INDEX_INMEM_ILLEGAL_STATE_DELETE; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.INDEX_INMEM_ILLEGAL_STATE_RESET; +import static java.lang.Long.max; import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LEDGER_CACHE_HIT; import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LEDGER_CACHE_MISS; import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LEDGER_CACHE_READ_PAGE; import static org.apache.bookkeeper.bookie.BookKeeperServerStats.NUM_INDEX_PAGES; +// CHECKSTYLE.OFF: IllegalImport import com.google.common.base.Stopwatch; +import io.netty.util.internal.PlatformDependent; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; @@ -40,17 +41,17 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.ConcurrentSkipListSet; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import org.apache.bookkeeper.bookie.stats.IndexInMemPageMgrStats; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.stats.Counter; import org.apache.bookkeeper.stats.Gauge; import org.apache.bookkeeper.stats.OpStatsLogger; import org.apache.bookkeeper.stats.StatsLogger; -import org.apache.bookkeeper.util.DirectMemoryUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +// CHECKSTYLE.ON: IllegalImport class IndexInMemPageMgr { private static final Logger LOG = LoggerFactory.getLogger(IndexInMemPageMgr.class); @@ -64,16 +65,14 @@ private static class InMemPageCollection implements LEPStateChangeCallback { final ConcurrentLinkedQueue listOfFreePages; // Stats - final Counter illegalStateResetCounter; - final Counter illegalStateDeleteCounter; + private final IndexInMemPageMgrStats inMemPageMgrStats; public InMemPageCollection(StatsLogger statsLogger) { pages = new ConcurrentHashMap<>(); lruCleanPageMap = Collections.synchronizedMap(new LinkedHashMap(16, 0.75f, true)); listOfFreePages = new ConcurrentLinkedQueue(); - illegalStateResetCounter = statsLogger.getCounter(INDEX_INMEM_ILLEGAL_STATE_RESET); - illegalStateDeleteCounter = statsLogger.getCounter(INDEX_INMEM_ILLEGAL_STATE_DELETE); + inMemPageMgrStats = new IndexInMemPageMgrStats(statsLogger); } /** @@ -154,22 +153,11 @@ private void removeEntriesForALedger(long ledgerId) { ConcurrentMap lPages = pages.remove(ledgerId); if (null != lPages) { for (Map.Entry pageEntry: lPages.entrySet()) { - long entryId = pageEntry.getKey(); - synchronized (lruCleanPageMap) { - lruCleanPageMap.remove(new EntryKey(ledgerId, entryId)); - } - LedgerEntryPage lep = pageEntry.getValue(); - // Cannot imagine under what circumstances we would have a null entry here - // Just being safe - if (null != lep) { - if (lep.inUse()) { - illegalStateDeleteCounter.inc(); - } - listOfFreePages.add(lep); - } + lep.usePage(); + lep.markDeleted(); + lep.releasePage(); } - } } @@ -305,7 +293,7 @@ LedgerEntryPage grabCleanPage(long ledgerId, long firstEntry) { public void addToListOfFreePages(LedgerEntryPage lep) { if ((null == lep) || lep.inUse()) { - illegalStateResetCounter.inc(); + inMemPageMgrStats.getIllegalStateResetCounter().inc(); } if (null != lep) { listOfFreePages.add(lep); @@ -319,7 +307,11 @@ public void onSetInUse(LedgerEntryPage lep) { @Override public void onResetInUse(LedgerEntryPage lep) { - addToCleanPagesList(lep); + if (!lep.isDeleted()) { + addToCleanPagesList(lep); + } else { + addToListOfFreePages(lep); + } } @Override @@ -345,12 +337,6 @@ public void onSetDirty(LedgerEntryPage lep) { // flush and read pages private final IndexPersistenceMgr indexPersistenceManager; - /** - * the list of potentially dirty ledgers. - */ - private final ConcurrentLinkedQueue ledgersToFlush = new ConcurrentLinkedQueue(); - private final ConcurrentSkipListSet ledgersFlushing = new ConcurrentSkipListSet(); - // Stats private final Counter ledgerCacheHitCounter; private final Counter ledgerCacheMissCounter; @@ -366,7 +352,7 @@ public IndexInMemPageMgr(int pageSize, this.indexPersistenceManager = indexPersistenceManager; this.pageMapAndList = new InMemPageCollection(statsLogger); - long maxDirectMemory = DirectMemoryUtils.maxDirectMemory(); + long maxDirectMemory = PlatformDependent.estimateMaxDirectMemory(); if (conf.getPageLimit() <= 0) { // By default, allocate a third of the direct memory to the page cache @@ -403,28 +389,14 @@ public int getPageSize() { return pageSize; } - /** - * @return entries per page used in ledger cache - */ - public int getEntriesPerPage() { - return entriesPerPage; - } - - /** - * @return page limitation in ledger cache - */ - public int getPageLimit() { - return pageLimit; - } - /** * @return number of page used in ledger cache */ - public int getNumUsedPages() { + private int getNumUsedPages() { return pageCount.get(); } - /** + /** * Get the ledger entry page for a given pageEntry. * * @param ledger @@ -434,7 +406,7 @@ public int getNumUsedPages() { * @return ledger entry page * @throws IOException */ - public LedgerEntryPage getLedgerEntryPage(long ledger, + LedgerEntryPage getLedgerEntryPage(long ledger, long pageEntry) throws IOException { LedgerEntryPage lep = getLedgerEntryPageFromCache(ledger, pageEntry, false); if (lep == null) { @@ -504,7 +476,6 @@ private LedgerEntryPage grabLedgerEntryPage(long ledger, long pageEntry) throws void removePagesForLedger(long ledgerId) { pageMapAndList.removeEntriesForALedger(ledgerId); - ledgersToFlush.remove(ledgerId); } long getLastEntryInMem(long ledgerId) { @@ -542,18 +513,12 @@ private LedgerEntryPage grabCleanPage(long ledger, long entry) throws IOExceptio } void flushOneOrMoreLedgers(boolean doAll) throws IOException { - if (ledgersToFlush.isEmpty()) { - ledgersToFlush.addAll(pageMapAndList.getActiveLedgers()); - } - Long potentiallyDirtyLedger; - while (null != (potentiallyDirtyLedger = ledgersToFlush.poll())) { - if (!ledgersFlushing.add(potentiallyDirtyLedger)) { - continue; - } + List ledgersToFlush = new ArrayList<>(pageMapAndList.getActiveLedgers()); + for (Long potentiallyDirtyLedger : ledgersToFlush) { try { flushSpecificLedger(potentiallyDirtyLedger); - } finally { - ledgersFlushing.remove(potentiallyDirtyLedger); + } catch (Bookie.NoLedgerException e) { + continue; } if (!doAll) { break; @@ -608,6 +573,8 @@ void putEntryOffset(long ledger, long entry, long offset) throws IOException { lep = getLedgerEntryPage(ledger, pageEntry); assert lep != null; lep.setOffset(offset, offsetInPage * LedgerEntryPage.getIndexEntrySize()); + } catch (FileInfo.FileInfoDeletedException e) { + throw new Bookie.NoLedgerException(ledger); } finally { if (null != lep) { lep.releasePage(); @@ -630,4 +597,82 @@ long getEntryOffset(long ledger, long entry) throws IOException { } } } + + /** + * Represents a page of the index. + */ + private class PageEntriesImpl implements LedgerCache.PageEntries { + final long ledgerId; + final long initEntry; + + PageEntriesImpl(long ledgerId, long initEntry) { + this.ledgerId = ledgerId; + this.initEntry = initEntry; + } + + @Override + public LedgerEntryPage getLEP() throws IOException { + return getLedgerEntryPage(ledgerId, initEntry); + } + + @Override + public long getFirstEntry() { + return initEntry; + } + + @Override + public long getLastEntry() { + return initEntry + entriesPerPage; + } + } + + /** + * Iterable over index pages -- returns PageEntries rather than individual + * entries because getEntries() above needs to be able to throw an IOException. + */ + private class PageEntriesIterableImpl implements LedgerCache.PageEntriesIterable { + final long ledgerId; + final FileInfoBackingCache.CachedFileInfo fi; + final long totalEntries; + + long curEntry = 0; + + PageEntriesIterableImpl(long ledgerId) throws IOException { + this.ledgerId = ledgerId; + this.fi = indexPersistenceManager.getFileInfo(ledgerId, null); + this.totalEntries = max(entriesPerPage * (fi.size() / pageSize), getLastEntryInMem(ledgerId)); + } + + @Override + public Iterator iterator() { + return new Iterator() { + @Override + public boolean hasNext() { + return curEntry < totalEntries; + } + + @Override + public LedgerCache.PageEntries next() { + LedgerCache.PageEntries next = new PageEntriesImpl(ledgerId, curEntry); + curEntry += entriesPerPage; + return next; + } + }; + } + + @Override + public void close() { + fi.release(); + } + } + + /** + * Return iterator over pages for mapping entries to entry loggers. + * @param ledgerId + * @return Iterator over pages + * @throws IOException + */ + public LedgerCache.PageEntriesIterable listEntries(long ledgerId) throws IOException { + return new PageEntriesIterableImpl(ledgerId); + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/IndexPersistenceMgr.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/IndexPersistenceMgr.java index 83cb88fcc31..220f81eb8a3 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/IndexPersistenceMgr.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/IndexPersistenceMgr.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,11 +20,6 @@ */ package org.apache.bookkeeper.bookie; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LEDGER_CACHE_NUM_EVICTED_LEDGERS; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.PENDING_GET_FILE_INFO; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_FILE_INFO_CACHE_SIZE; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.WRITE_FILE_INFO_CACHE_SIZE; - import com.google.common.annotations.VisibleForTesting; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; @@ -43,10 +38,9 @@ import java.util.concurrent.TimeUnit; import org.apache.bookkeeper.bookie.FileInfoBackingCache.CachedFileInfo; import org.apache.bookkeeper.bookie.LedgerDirsManager.NoWritableLedgerDirException; +import org.apache.bookkeeper.bookie.stats.IndexPersistenceMgrStats; import org.apache.bookkeeper.common.util.Watcher; import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.stats.Counter; -import org.apache.bookkeeper.stats.Gauge; import org.apache.bookkeeper.stats.StatsLogger; import org.apache.bookkeeper.util.SnapshotMap; import org.slf4j.Logger; @@ -89,9 +83,7 @@ public static final String getLedgerName(long ledgerId) { final SnapshotMap activeLedgers; final LedgerDirsManager ledgerDirsManager; - // Stats - private final Counter evictedLedgersCounter; - private final Counter pendingGetFileInfoCounter; + private final IndexPersistenceMgrStats persistenceMgrStats; public IndexPersistenceMgr(int pageSize, int entriesPerPage, @@ -127,30 +119,11 @@ public IndexPersistenceMgr(int pageSize, fileInfoEvictionListener); // Expose Stats - evictedLedgersCounter = statsLogger.getCounter(LEDGER_CACHE_NUM_EVICTED_LEDGERS); - pendingGetFileInfoCounter = statsLogger.getCounter(PENDING_GET_FILE_INFO); - statsLogger.registerGauge(WRITE_FILE_INFO_CACHE_SIZE, new Gauge() { - @Override - public Number getDefaultValue() { - return 0; - } - - @Override - public Number getSample() { - return writeFileInfoCache.size(); - } - }); - statsLogger.registerGauge(READ_FILE_INFO_CACHE_SIZE, new Gauge() { - @Override - public Number getDefaultValue() { - return 0; - } - - @Override - public Number getSample() { - return readFileInfoCache.size(); - } - }); + persistenceMgrStats = new IndexPersistenceMgrStats( + statsLogger, + () -> writeFileInfoCache.size(), + () -> readFileInfoCache.size() + ); } private static Cache buildCache(int concurrencyLevel, @@ -192,7 +165,7 @@ private void handleLedgerEviction(RemovalNotification noti return; } if (notification.wasEvicted()) { - evictedLedgersCounter.inc(); + persistenceMgrStats.getEvictedLedgersCounter().inc(); } fileInfo.release(); } @@ -204,10 +177,10 @@ private void handleLedgerEviction(RemovalNotification noti * the FileInfo from cache, that FileInfo is then evicted and closed before we * could even increase the reference counter. */ - CachedFileInfo getFileInfo(final Long ledger, final byte masterKey[]) throws IOException { + CachedFileInfo getFileInfo(final Long ledger, final byte[] masterKey) throws IOException { try { CachedFileInfo fi; - pendingGetFileInfoCounter.inc(); + persistenceMgrStats.getPendingGetFileInfoCounter().inc(); Callable loader = () -> { CachedFileInfo fileInfo = fileInfoBackingCache.loadFileInfo(ledger, masterKey); activeLedgers.put(ledger, true); @@ -240,10 +213,10 @@ CachedFileInfo getFileInfo(final Long ledger, final byte masterKey[]) throws IOE if (ee.getCause() instanceof IOException) { throw (IOException) ee.getCause(); } else { - throw new IOException("Failed to load file info for ledger " + ledger, ee); + throw new LedgerCache.NoIndexForLedgerException("Failed to load file info for ledger " + ledger, ee); } } finally { - pendingGetFileInfoCounter.dec(); + persistenceMgrStats.getPendingGetFileInfoCounter().dec(); } } @@ -301,8 +274,9 @@ private void getActiveLedgers() throws IOException { // name is the HexString representation of the // ledgerId. String ledgerIdInHex = index.getName().replace(RLOC, "").replace(IDX, ""); + long ledgerId = Long.parseLong(ledgerIdInHex, 16); if (index.getName().endsWith(RLOC)) { - if (findIndexFile(Long.parseLong(ledgerIdInHex)) != null) { + if (findIndexFile(ledgerId) != null) { if (!index.delete()) { LOG.warn("Deleting the rloc file " + index + " failed"); } @@ -315,7 +289,7 @@ private void getActiveLedgers() throws IOException { } } } - activeLedgers.put(Long.parseLong(ledgerIdInHex, 16), true); + activeLedgers.put(ledgerId, true); } } } @@ -401,6 +375,19 @@ boolean waitForLastAddConfirmedUpdate(long ledgerId, } } + void cancelWaitForLastAddConfirmedUpdate(long ledgerId, + Watcher watcher) throws IOException { + CachedFileInfo fi = null; + try { + fi = getFileInfo(ledgerId, null); + fi.cancelWaitForLastAddConfirmedUpdate(watcher); + } finally { + if (null != fi) { + fi.release(); + } + } + } + long updateLastAddConfirmed(long ledgerId, long lac) throws IOException { CachedFileInfo fi = null; try { @@ -465,7 +452,6 @@ void setExplicitLac(long ledgerId, ByteBuf lac) throws IOException { try { fi = getFileInfo(ledgerId, null); fi.setExplicitLac(lac); - return; } finally { if (null != fi) { fi.release(); @@ -479,7 +465,7 @@ public ByteBuf getExplicitLac(long ledgerId) { fi = getFileInfo(ledgerId, null); return fi.getExplicitLac(); } catch (IOException e) { - LOG.error("Exception during getLastAddConfirmed: {}", e); + LOG.error("Exception during getLastAddConfirmed", e); return null; } finally { if (null != fi) { @@ -525,7 +511,12 @@ private File getLedgerDirForLedger(FileInfo fi) { private void moveLedgerIndexFile(Long l, FileInfo fi) throws NoWritableLedgerDirException, IOException { File newLedgerIndexFile = getNewLedgerIndexFile(l, getLedgerDirForLedger(fi)); - fi.moveToNewLocation(newLedgerIndexFile, fi.getSizeSinceLastwrite()); + try { + fi.moveToNewLocation(newLedgerIndexFile, fi.getSizeSinceLastWrite()); + } catch (FileInfo.FileInfoDeletedException fileInfoDeleted) { + // File concurrently deleted + throw new Bookie.NoLedgerException(l); + } } void flushLedgerHeader(long ledger) throws IOException { @@ -599,14 +590,14 @@ public int compare(LedgerEntryPage o1, LedgerEntryPage o2) { private void writeBuffers(Long ledger, List entries, FileInfo fi, - int start, int count) throws IOException { + int start, int count) throws IOException, Bookie.NoLedgerException { if (LOG.isTraceEnabled()) { LOG.trace("Writing {} buffers of {}", count, Long.toHexString(ledger)); } if (count == 0) { return; } - ByteBuffer buffs[] = new ByteBuffer[count]; + ByteBuffer[] buffs = new ByteBuffer[count]; for (int j = 0; j < count; j++) { buffs[j] = entries.get(start + j).getPageToWrite(); if (entries.get(start + j).getLedger() != ledger) { @@ -616,7 +607,12 @@ private void writeBuffers(Long ledger, } long totalWritten = 0; while (buffs[buffs.length - 1].remaining() > 0) { - long rc = fi.write(buffs, entries.get(start + 0).getFirstEntryPosition()); + long rc = 0; + try { + rc = fi.write(buffs, entries.get(start + 0).getFirstEntryPosition()); + } catch (FileInfo.FileInfoDeletedException e) { + throw new Bookie.NoLedgerException(ledger); + } if (rc <= 0) { throw new IOException("Short write to ledger " + ledger + " rc = " + rc); } @@ -665,7 +661,7 @@ long getPersistEntryBeyondInMem(long ledgerId, long lastEntryInMem) throws IOExc fi = getFileInfo(ledgerId, null); long size = fi.size(); // make sure the file size is aligned with index entry size - // otherwise we may read incorret data + // otherwise we may read incorrect data if (0 != size % LedgerEntryPage.getIndexEntrySize()) { LOG.warn("Index file of ledger {} is not aligned with index entry size.", ledgerId); size = size - size % LedgerEntryPage.getIndexEntrySize(); @@ -705,4 +701,22 @@ long getPersistEntryBeyondInMem(long ledgerId, long lastEntryInMem) throws IOExc return lastEntry; } + /** + * Read ledger meta. + * @param ledgerId Ledger Id + */ + public LedgerCache.LedgerIndexMetadata readLedgerIndexMetadata(long ledgerId) throws IOException { + CachedFileInfo fi = null; + try { + fi = getFileInfo(ledgerId, null); + return new LedgerCache.LedgerIndexMetadata( + fi.getMasterKey(), + fi.size(), + fi.isFenced()); + } finally { + if (fi != null) { + fi.release(); + } + } + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/InterleavedLedgerStorage.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/InterleavedLedgerStorage.java index 08e7f4ef914..4c6b7a9ee4d 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/InterleavedLedgerStorage.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/InterleavedLedgerStorage.java @@ -22,33 +22,53 @@ package org.apache.bookkeeper.bookie; import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_READ_ENTRY; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_SCOPE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.CATEGORY_SERVER; import static org.apache.bookkeeper.bookie.BookKeeperServerStats.ENTRYLOGGER_SCOPE; import static org.apache.bookkeeper.bookie.BookKeeperServerStats.STORAGE_GET_ENTRY; import static org.apache.bookkeeper.bookie.BookKeeperServerStats.STORAGE_GET_OFFSET; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.STORAGE_SCRUB_PAGES_SCANNED; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.STORAGE_SCRUB_PAGE_RETRIES; -import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; +import com.google.common.util.concurrent.RateLimiter; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.util.ReferenceCountUtil; import java.io.File; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.EnumSet; +import java.util.List; import java.util.Map; import java.util.NavigableMap; +import java.util.Optional; +import java.util.PrimitiveIterator.OfLong; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; - +import lombok.Cleanup; +import lombok.Getter; import org.apache.bookkeeper.bookie.Bookie.NoLedgerException; import org.apache.bookkeeper.bookie.CheckpointSource.Checkpoint; -import org.apache.bookkeeper.bookie.EntryLogger.EntryLogListener; +import org.apache.bookkeeper.bookie.DefaultEntryLogger.EntryLogListener; import org.apache.bookkeeper.bookie.LedgerDirsManager.LedgerDirsListener; +import org.apache.bookkeeper.bookie.storage.EntryLogger; +import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.common.util.Watcher; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.meta.LedgerManager; import org.apache.bookkeeper.proto.BookieProtocol; +import org.apache.bookkeeper.stats.Counter; import org.apache.bookkeeper.stats.OpStatsLogger; import org.apache.bookkeeper.stats.StatsLogger; -import org.apache.bookkeeper.util.MathUtils; +import org.apache.bookkeeper.stats.annotations.StatsDoc; import org.apache.bookkeeper.util.SnapshotMap; +import org.apache.commons.lang3.mutable.MutableBoolean; +import org.apache.commons.lang3.mutable.MutableLong; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -58,13 +78,19 @@ *

This ledger storage implementation stores all entries in a single * file and maintains an index file for each ledger. */ +@StatsDoc( + name = BOOKIE_SCOPE, + category = CATEGORY_SERVER, + help = "Bookie related stats" +) public class InterleavedLedgerStorage implements CompactableLedgerStorage, EntryLogListener { private static final Logger LOG = LoggerFactory.getLogger(InterleavedLedgerStorage.class); - EntryLogger entryLogger; + DefaultEntryLogger entryLogger; + @Getter LedgerCache ledgerCache; - protected CheckpointSource checkpointSource; - protected Checkpointer checkpointer; + protected CheckpointSource checkpointSource = CheckpointSource.DEFAULT; + protected Checkpointer checkpointer = Checkpointer.NULL; private final CopyOnWriteArrayList ledgerDeletionListeners = Lists.newCopyOnWriteArrayList(); @@ -80,12 +106,24 @@ public class InterleavedLedgerStorage implements CompactableLedgerStorage, Entry private final AtomicBoolean somethingWritten = new AtomicBoolean(false); // Expose Stats + @StatsDoc( + name = STORAGE_GET_OFFSET, + help = "Operation stats of getting offset from ledger cache", + parent = BOOKIE_READ_ENTRY + ) private OpStatsLogger getOffsetStats; + @StatsDoc( + name = STORAGE_GET_ENTRY, + help = "Operation stats of getting entry from entry logger", + parent = BOOKIE_READ_ENTRY, + happensAfter = STORAGE_GET_OFFSET + ) private OpStatsLogger getEntryStats; + private OpStatsLogger pageScanStats; + private Counter retryCounter; - @VisibleForTesting public InterleavedLedgerStorage() { - activeLedgers = new SnapshotMap(); + activeLedgers = new SnapshotMap<>(); } @Override @@ -93,23 +131,69 @@ public void initialize(ServerConfiguration conf, LedgerManager ledgerManager, LedgerDirsManager ledgerDirsManager, LedgerDirsManager indexDirsManager, - StateManager stateManager, - CheckpointSource checkpointSource, - Checkpointer checkpointer, - StatsLogger statsLogger) + StatsLogger statsLogger, + ByteBufAllocator allocator) throws IOException { - checkNotNull(checkpointSource, "invalid null checkpoint source"); - checkNotNull(checkpointer, "invalid null checkpointer"); + initializeWithEntryLogListener( + conf, + ledgerManager, + ledgerDirsManager, + indexDirsManager, + this, + statsLogger, + allocator); + } + + void initializeWithEntryLogListener(ServerConfiguration conf, + LedgerManager ledgerManager, + LedgerDirsManager ledgerDirsManager, + LedgerDirsManager indexDirsManager, + EntryLogListener entryLogListener, + StatsLogger statsLogger, + ByteBufAllocator allocator) throws IOException { + initializeWithEntryLogger( + conf, + ledgerManager, + ledgerDirsManager, + indexDirsManager, + new DefaultEntryLogger(conf, ledgerDirsManager, entryLogListener, statsLogger.scope(ENTRYLOGGER_SCOPE), + allocator), + statsLogger); + } + + @Override + public void setStateManager(StateManager stateManager) {} + + @Override + public void setCheckpointSource(CheckpointSource checkpointSource) { this.checkpointSource = checkpointSource; + } + + @Override + public void setCheckpointer(Checkpointer checkpointer) { this.checkpointer = checkpointer; - entryLogger = new EntryLogger(conf, ledgerDirsManager, this, statsLogger.scope(ENTRYLOGGER_SCOPE)); + } + + public void initializeWithEntryLogger(ServerConfiguration conf, + LedgerManager ledgerManager, + LedgerDirsManager ledgerDirsManager, + LedgerDirsManager indexDirsManager, + EntryLogger entryLogger, + StatsLogger statsLogger) throws IOException { + checkNotNull(checkpointSource, "invalid null checkpoint source"); + checkNotNull(checkpointer, "invalid null checkpointer"); + this.entryLogger = (DefaultEntryLogger) entryLogger; + this.entryLogger.addListener(this); ledgerCache = new LedgerCacheImpl(conf, activeLedgers, null == indexDirsManager ? ledgerDirsManager : indexDirsManager, statsLogger); - gcThread = new GarbageCollectorThread(conf, ledgerManager, this, statsLogger.scope("gc")); + gcThread = new GarbageCollectorThread(conf, ledgerManager, ledgerDirsManager, + this, entryLogger, statsLogger.scope("gc")); ledgerDirsManager.addLedgerDirsListener(getLedgerDirsListener()); // Expose Stats getOffsetStats = statsLogger.getOpStatsLogger(STORAGE_GET_OFFSET); getEntryStats = statsLogger.getOpStatsLogger(STORAGE_GET_ENTRY); + pageScanStats = statsLogger.getOpStatsLogger(STORAGE_SCRUB_PAGES_SCANNED); + retryCounter = statsLogger.getCounter(STORAGE_SCRUB_PAGE_RETRIES); } private LedgerDirsListener getLedgerDirsListener() { @@ -170,6 +254,45 @@ public void diskJustWritable(File disk) { }; } + @Override + public void forceGC() { + gcThread.enableForceGC(); + } + + @Override + public void forceGC(boolean forceMajor, boolean forceMinor) { + gcThread.enableForceGC(forceMajor, forceMinor); + } + + @Override + public boolean isInForceGC() { + return gcThread.isInForceGC(); + } + + public void suspendMinorGC() { + gcThread.suspendMinorGC(); + } + + public void suspendMajorGC() { + gcThread.suspendMajorGC(); + } + + public void resumeMinorGC() { + gcThread.resumeMinorGC(); + } + + public void resumeMajorGC() { + gcThread.resumeMajorGC(); + } + + public boolean isMajorGcSuspended() { + return gcThread.isMajorGcSuspend(); + } + + public boolean isMinorGcSuspended() { + return gcThread.isMinorGcSuspend(); + } + @Override public void start() { gcThread.start(); @@ -183,7 +306,7 @@ public void shutdown() throws InterruptedException { LOG.info("Shutting down GC thread"); gcThread.shutdown(); LOG.info("Shutting down entry logger"); - entryLogger.shutdown(); + entryLogger.close(); try { ledgerCache.close(); } catch (IOException e) { @@ -203,7 +326,7 @@ public boolean isFenced(long ledgerId) throws IOException { } @Override - public void setExplicitlac(long ledgerId, ByteBuf lac) throws IOException { + public void setExplicitLac(long ledgerId, ByteBuf lac) throws IOException { ledgerCache.setExplicitLac(ledgerId, lac); } @@ -227,6 +350,13 @@ public boolean ledgerExists(long ledgerId) throws IOException { return ledgerCache.ledgerExists(ledgerId); } + @Override + public boolean entryExists(long ledgerId, long entryId) throws IOException { + //Implementation should be as simple as what's below, but this needs testing + //return ledgerCache.getEntryOffset(ledgerId, entryId) > 0; + throw new UnsupportedOperationException("entry exists not supported"); + } + @Override public long getLastAddConfirmed(long ledgerId) throws IOException { Long lac = ledgerCache.getLastAddConfirmed(ledgerId); @@ -240,7 +370,7 @@ public long getLastAddConfirmed(long ledgerId) throws IOException { lac = bb.readLong(); lac = ledgerCache.updateLastAddConfirmed(ledgerId, lac); } finally { - bb.release(); + ReferenceCountUtil.release(bb); } } } @@ -255,6 +385,12 @@ public boolean waitForLastAddConfirmedUpdate(long ledgerId, return ledgerCache.waitForLastAddConfirmedUpdate(ledgerId, previousLAC, watcher); } + @Override + public void cancelWaitForLastAddConfirmedUpdate(long ledgerId, + Watcher watcher) + throws IOException { + ledgerCache.cancelWaitForLastAddConfirmedUpdate(ledgerId, watcher); + } @Override public long addEntry(ByteBuf entry) throws IOException { @@ -397,8 +533,7 @@ public void flushEntriesLocationsIndex() throws IOException { ledgerCache.flushLedger(true); } - @Override - public EntryLogger getEntryLogger() { + public DefaultEntryLogger getEntryLogger() { return entryLogger; } @@ -434,10 +569,163 @@ public void onRotateEntryLog() { // for interleaved ledger storage, we request a checkpoint when rotating a entry log file. // the checkpoint represent the point that all the entries added before this point are already // in ledger storage and ready to be synced to disk. - // TODO: we could consider remove checkpointSource and checkpointSouce#newCheckpoint + // TODO: we could consider remove checkpointSource and checkpointSource#newCheckpoint // later if we provide kind of LSN (Log/Journal Squeuence Number) // mechanism when adding entry. {@link https://github.com/apache/bookkeeper/issues/279} Checkpoint checkpoint = checkpointSource.newCheckpoint(); checkpointer.startCheckpoint(checkpoint); } + + /** + * Return iterable for index entries for ledgerId. + * @param ledgerId ledger to scan + * @return Iterator + */ + public LedgerCache.PageEntriesIterable getIndexEntries(long ledgerId) throws IOException { + return ledgerCache.listEntries(ledgerId); + } + + /** + * Read implementation metadata for index file. + * @param ledgerId + * @return Implementation metadata + * @throws IOException + */ + public LedgerCache.LedgerIndexMetadata readLedgerIndexMetadata(long ledgerId) throws IOException { + return ledgerCache.readLedgerIndexMetadata(ledgerId); + } + + @Override + @SuppressFBWarnings("RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE") + public List localConsistencyCheck(Optional rateLimiter) throws IOException { + long checkStart = MathUtils.nowInNano(); + LOG.info("Starting localConsistencyCheck"); + long checkedLedgers = 0; + long checkedPages = 0; + final MutableLong checkedEntries = new MutableLong(0); + final MutableLong pageRetries = new MutableLong(0); + NavigableMap bkActiveLedgersSnapshot = activeLedgers.snapshot(); + final List errors = new ArrayList<>(); + for (Long ledger : bkActiveLedgersSnapshot.keySet()) { + try (LedgerCache.PageEntriesIterable pages = ledgerCache.listEntries(ledger)) { + for (LedgerCache.PageEntries page : pages) { + @Cleanup LedgerEntryPage lep = page.getLEP(); + MutableBoolean retry = new MutableBoolean(false); + do { + retry.setValue(false); + int version = lep.getVersion(); + + MutableBoolean success = new MutableBoolean(true); + long start = MathUtils.nowInNano(); + lep.getEntries((entry, offset) -> { + rateLimiter.ifPresent(RateLimiter::acquire); + + try { + entryLogger.checkEntry(ledger, entry, offset); + checkedEntries.increment(); + } catch (DefaultEntryLogger.EntryLookupException e) { + if (version != lep.getVersion()) { + pageRetries.increment(); + if (lep.isDeleted()) { + if (LOG.isDebugEnabled()) { + LOG.debug("localConsistencyCheck: ledger {} deleted", + ledger); + } + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("localConsistencyCheck: " + + "concurrent modification, retrying"); + } + retry.setValue(true); + retryCounter.inc(); + } + return false; + } else { + errors.add(new DetectedInconsistency(ledger, entry, e)); + LOG.error("Got error: ", e); + } + success.setValue(false); + } + return true; + }); + + if (success.booleanValue()) { + pageScanStats.registerSuccessfulEvent( + MathUtils.elapsedNanos(start), TimeUnit.NANOSECONDS); + } else { + pageScanStats.registerFailedEvent( + MathUtils.elapsedNanos(start), TimeUnit.NANOSECONDS); + } + } while (retry.booleanValue()); + checkedPages++; + } + } catch (NoLedgerException | FileInfo.FileInfoDeletedException e) { + if (activeLedgers.containsKey(ledger)) { + LOG.error("Cannot find ledger {}, should exist, exception is ", ledger, e); + errors.add(new DetectedInconsistency(ledger, -1, e)); + } else if (LOG.isDebugEnabled()){ + LOG.debug("ledger {} deleted since snapshot taken", ledger); + } + } catch (Exception e) { + throw new IOException("Got other exception in localConsistencyCheck", e); + } + checkedLedgers++; + } + LOG.info( + "Finished localConsistencyCheck, took {}s to scan {} ledgers, {} pages, " + + "{} entries with {} retries, {} errors", + TimeUnit.NANOSECONDS.toSeconds(MathUtils.elapsedNanos(checkStart)), + checkedLedgers, + checkedPages, + checkedEntries.longValue(), + pageRetries.longValue(), + errors.size()); + + return errors; + } + + @Override + public List getGarbageCollectionStatus() { + return Collections.singletonList(gcThread.getGarbageCollectionStatus()); + } + + @Override + public OfLong getListOfEntriesOfLedger(long ledgerId) throws IOException { + return ledgerCache.getEntriesIterator(ledgerId); + } + + @Override + public void setLimboState(long ledgerId) throws IOException { + throw new UnsupportedOperationException( + "Limbo state only supported for DbLedgerStorage"); + } + + @Override + public boolean hasLimboState(long ledgerId) throws IOException { + throw new UnsupportedOperationException( + "Limbo state only supported for DbLedgerStorage"); + } + + @Override + public void clearLimboState(long ledgerId) throws IOException { + throw new UnsupportedOperationException( + "Limbo state only supported for DbLedgerStorage"); + } + + @Override + public EnumSet getStorageStateFlags() throws IOException { + return EnumSet.noneOf(StorageState.class); + } + + @Override + public void setStorageStateFlag(StorageState flags) throws IOException { + throw new UnsupportedOperationException( + "Storage state only flags supported for DbLedgerStorage"); + } + + @Override + public void clearStorageStateFlag(StorageState flags) throws IOException { + throw new UnsupportedOperationException( + "Storage state flags only supported for DbLedgerStorage"); + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/InterleavedStorageRegenerateIndexOp.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/InterleavedStorageRegenerateIndexOp.java new file mode 100644 index 00000000000..a05971a021f --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/InterleavedStorageRegenerateIndexOp.java @@ -0,0 +1,247 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie; + +import io.netty.buffer.ByteBuf; +import java.io.IOException; +import java.security.NoSuchAlgorithmException; +import java.util.HashMap; +import java.util.Map; +import java.util.PrimitiveIterator.OfLong; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import org.apache.bookkeeper.bookie.storage.EntryLogScanner; +import org.apache.bookkeeper.common.util.Watcher; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.proto.checksum.DigestManager; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.util.DiskChecker; +import org.apache.bookkeeper.util.SnapshotMap; +import org.apache.commons.lang.time.DurationFormatUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Scan all entries in the entry log and rebuild the index file for one ledger. + */ +public class InterleavedStorageRegenerateIndexOp { + private static final Logger LOG = LoggerFactory.getLogger(InterleavedStorageRegenerateIndexOp.class); + + private final ServerConfiguration conf; + private final Set ledgerIds; + private final byte[] masterKey; + + public InterleavedStorageRegenerateIndexOp(ServerConfiguration conf, Set ledgerIds, byte[] password) + throws NoSuchAlgorithmException { + this.conf = conf; + this.ledgerIds = ledgerIds; + this.masterKey = DigestManager.generateMasterKey(password); + } + + static class RecoveryStats { + long firstEntry = Long.MAX_VALUE; + long lastEntry = Long.MIN_VALUE; + long numEntries = 0; + + void registerEntry(long entryId) { + numEntries++; + if (entryId < firstEntry) { + firstEntry = entryId; + } + if (entryId > lastEntry) { + lastEntry = entryId; + } + } + + long getNumEntries() { + return numEntries; + } + + long getFirstEntry() { + return firstEntry; + } + + long getLastEntry() { + return lastEntry; + } + } + + public void initiate(boolean dryRun) throws IOException { + LOG.info("Starting index rebuilding"); + + DiskChecker diskChecker = BookieResources.createDiskChecker(conf); + LedgerDirsManager ledgerDirsManager = BookieResources.createLedgerDirsManager( + conf, diskChecker, NullStatsLogger.INSTANCE); + LedgerDirsManager indexDirsManager = BookieResources.createIndexDirsManager( + conf, diskChecker, NullStatsLogger.INSTANCE, ledgerDirsManager); + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf, ledgerDirsManager); + final LedgerCache ledgerCache; + if (dryRun) { + ledgerCache = new DryRunLedgerCache(); + } else { + ledgerCache = new LedgerCacheImpl(conf, new SnapshotMap(), + indexDirsManager, NullStatsLogger.INSTANCE); + } + + Set entryLogs = entryLogger.getEntryLogsSet(); + + int totalEntryLogs = entryLogs.size(); + int completedEntryLogs = 0; + long startTime = System.nanoTime(); + + LOG.info("Scanning {} entry logs", totalEntryLogs); + + Map stats = new HashMap<>(); + for (long entryLogId : entryLogs) { + LOG.info("Scanning {}", entryLogId); + entryLogger.scanEntryLog(entryLogId, new EntryLogScanner() { + @Override + public void process(long ledgerId, long offset, ByteBuf entry) throws IOException { + long entryId = entry.getLong(8); + + stats.computeIfAbsent(ledgerId, (ignore) -> new RecoveryStats()).registerEntry(entryId); + + // Actual location indexed is pointing past the entry size + long location = (entryLogId << 32L) | (offset + 4); + + if (LOG.isDebugEnabled()) { + LOG.debug("Rebuilding {}:{} at location {} / {}", ledgerId, entryId, location >> 32, + location & (Integer.MAX_VALUE - 1)); + } + + if (!ledgerCache.ledgerExists(ledgerId)) { + ledgerCache.setMasterKey(ledgerId, masterKey); + ledgerCache.setFenced(ledgerId); + } + ledgerCache.putEntryOffset(ledgerId, entryId, location); + } + + @Override + public boolean accept(long ledgerId) { + return ledgerIds.contains(ledgerId); + } + }); + + ledgerCache.flushLedger(true); + + ++completedEntryLogs; + LOG.info("Completed scanning of log {}.log -- {} / {}", Long.toHexString(entryLogId), completedEntryLogs, + totalEntryLogs); + } + + LOG.info("Rebuilding indices done"); + for (long ledgerId : ledgerIds) { + RecoveryStats ledgerStats = stats.get(ledgerId); + if (ledgerStats == null || ledgerStats.getNumEntries() == 0) { + LOG.info(" {} - No entries found", ledgerId); + } else { + LOG.info(" {} - Found {} entries, from {} to {}", ledgerId, + ledgerStats.getNumEntries(), ledgerStats.getFirstEntry(), ledgerStats.getLastEntry()); + } + } + LOG.info("Total time: {}", DurationFormatUtils.formatDurationHMS( + TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime))); + } + + + static class DryRunLedgerCache implements LedgerCache { + @Override + public void close() { + } + @Override + public boolean setFenced(long ledgerId) throws IOException { + return false; + } + @Override + public boolean isFenced(long ledgerId) throws IOException { + throw new UnsupportedOperationException(); + } + @Override + public void setMasterKey(long ledgerId, byte[] masterKey) throws IOException { + } + @Override + public byte[] readMasterKey(long ledgerId) throws IOException, BookieException { + throw new UnsupportedOperationException(); + } + @Override + public boolean ledgerExists(long ledgerId) throws IOException { + return false; + } + @Override + public void putEntryOffset(long ledger, long entry, long offset) throws IOException { + } + @Override + public long getEntryOffset(long ledger, long entry) throws IOException { + throw new UnsupportedOperationException(); + } + @Override + public void flushLedger(boolean doAll) throws IOException { + } + @Override + public long getLastEntry(long ledgerId) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public Long getLastAddConfirmed(long ledgerId) throws IOException { + throw new UnsupportedOperationException(); + } + @Override + public long updateLastAddConfirmed(long ledgerId, long lac) throws IOException { + throw new UnsupportedOperationException(); + } + @Override + public boolean waitForLastAddConfirmedUpdate(long ledgerId, + long previousLAC, + Watcher watcher) + throws IOException { + throw new UnsupportedOperationException(); + } + @Override + public void cancelWaitForLastAddConfirmedUpdate(long ledgerId, + Watcher watcher) + throws IOException { + throw new UnsupportedOperationException(); + } + @Override + public void deleteLedger(long ledgerId) throws IOException { + } + @Override + public void setExplicitLac(long ledgerId, ByteBuf lac) throws IOException { + } + @Override + public ByteBuf getExplicitLac(long ledgerId) { + throw new UnsupportedOperationException(); + } + @Override + public PageEntriesIterable listEntries(long ledgerId) throws IOException { + throw new UnsupportedOperationException(); + } + @Override + public LedgerIndexMetadata readLedgerIndexMetadata(long ledgerId) throws IOException { + throw new UnsupportedOperationException(); + } + @Override + public OfLong getEntriesIterator(long ledgerId) throws IOException { + throw new UnsupportedOperationException(); + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/Journal.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/Journal.java index 1b0d7070bac..2d68b8d2da8 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/Journal.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/Journal.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,16 +21,17 @@ package org.apache.bookkeeper.bookie; +import com.carrotsearch.hppc.ObjectHashSet; +import com.carrotsearch.hppc.procedures.ObjectProcedure; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Stopwatch; -import com.google.common.util.concurrent.MoreExecutors; - import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; import io.netty.buffer.Unpooled; +import io.netty.buffer.UnpooledByteBufAllocator; import io.netty.util.Recycler; import io.netty.util.Recycler.Handle; -import io.netty.util.concurrent.DefaultThreadFactory; - +import io.netty.util.ReferenceCountUtil; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; @@ -40,40 +41,45 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; - +import java.util.function.Consumer; import org.apache.bookkeeper.bookie.LedgerDirsManager.NoWritableLedgerDirException; +import org.apache.bookkeeper.bookie.stats.JournalStats; +import org.apache.bookkeeper.common.collections.BatchedArrayBlockingQueue; +import org.apache.bookkeeper.common.collections.BatchedBlockingQueue; +import org.apache.bookkeeper.common.collections.BlockingMpscQueue; import org.apache.bookkeeper.common.collections.RecyclableArrayList; +import org.apache.bookkeeper.common.util.MathUtils; +import org.apache.bookkeeper.common.util.MemoryLimitController; +import org.apache.bookkeeper.common.util.affinity.CpuAffinity; import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.proto.BookieRequestHandler; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback; import org.apache.bookkeeper.stats.Counter; import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.stats.OpStatsLogger; import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.ThreadRegistry; import org.apache.bookkeeper.util.IOUtils; -import org.apache.bookkeeper.util.MathUtils; -import org.apache.bookkeeper.util.collections.GrowableArrayBlockingQueue; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Provide journal related management. */ -public class Journal extends BookieCriticalThread implements CheckpointSource { +public class Journal implements CheckpointSource { private static final Logger LOG = LoggerFactory.getLogger(Journal.class); private static final RecyclableArrayList.Recycler entryListRecycler = new RecyclableArrayList.Recycler(); - private static final RecyclableArrayList EMPTY_ARRAY_LIST = new RecyclableArrayList<>(); + + private BookieCriticalThread thread; /** * Filter to pickup journals. */ - private interface JournalIdFilter { + public interface JournalIdFilter { boolean accept(long journalId); } @@ -82,8 +88,8 @@ private interface JournalIdFilter { */ @FunctionalInterface public interface BufferedChannelBuilder { - BufferedChannelBuilder DEFAULT_BCBUILDER = - (FileChannel fc, int capacity) -> new BufferedChannel(fc, capacity); + BufferedChannelBuilder DEFAULT_BCBUILDER = (FileChannel fc, + int capacity) -> new BufferedChannel(UnpooledByteBufAllocator.DEFAULT, fc, capacity); BufferedChannel create(FileChannel fc, int capacity) throws IOException; } @@ -97,8 +103,8 @@ public interface BufferedChannelBuilder { * @param filter journal id filter * @return list of filtered ids */ - static List listJournalIds(File journalDir, JournalIdFilter filter) { - File logFiles[] = journalDir.listFiles(); + public static List listJournalIds(File journalDir, JournalIdFilter filter) { + File[] logFiles = journalDir.listFiles(); if (logFiles == null || logFiles.length == 0) { return Collections.emptyList(); } @@ -185,17 +191,19 @@ public LogMark getCurMark() { } void rollLog(LastLogMark lastMark) throws NoWritableLedgerDirException { - byte buff[] = new byte[16]; + byte[] buff = new byte[16]; ByteBuffer bb = ByteBuffer.wrap(buff); // we should record marked in markLog // which is safe since records before lastMark have been // persisted to disk (both index & entry logger) lastMark.getCurMark().writeLogMark(bb); + if (LOG.isDebugEnabled()) { LOG.debug("RollLog to persist last marked log : {}", lastMark.getCurMark()); } + List writableLedgerDirs = ledgerDirsManager - .getWritableLedgerDirs(); + .getWritableLedgerDirsForNewLog(); for (File dir : writableLedgerDirs) { File file = new File(dir, lastMarkFileName); FileOutputStream fos = null; @@ -221,8 +229,8 @@ void rollLog(LastLogMark lastMark) throws NoWritableLedgerDirException { * The last mark should first be max journal log id, * and then max log position in max journal log. */ - void readLog() { - byte buff[] = new byte[16]; + public void readLog() { + byte[] buff = new byte[16]; ByteBuffer bb = ByteBuffer.wrap(buff); LogMark mark = new LogMark(); for (File dir: ledgerDirsManager.getAllLedgerDirs()) { @@ -266,11 +274,7 @@ private static class JournalRollingFilter implements JournalIdFilter { @Override public boolean accept(long journalId) { - if (journalId < lastMark.getCurMark().getLogFileId()) { - return true; - } else { - return false; - } + return journalId < lastMark.getCurMark().getLogFileId(); } } @@ -292,7 +296,7 @@ public interface JournalScanner { /** * Journal Entry to Record. */ - private static class QueueEntry implements Runnable { + static class QueueEntry implements Runnable { ByteBuf entry; long ledgerId; long entryId; @@ -302,12 +306,11 @@ private static class QueueEntry implements Runnable { boolean ackBeforeSync; OpStatsLogger journalAddEntryStats; - Counter journalCbQueueSize; - + Counter callbackTime; static QueueEntry create(ByteBuf entry, boolean ackBeforeSync, long ledgerId, long entryId, WriteCallback cb, Object ctx, long enqueueTime, OpStatsLogger journalAddEntryStats, - Counter journalCbQueueSize) { + Counter callbackTime) { QueueEntry qe = RECYCLER.get(); qe.entry = entry; qe.ackBeforeSync = ackBeforeSync; @@ -317,21 +320,26 @@ static QueueEntry create(ByteBuf entry, boolean ackBeforeSync, long ledgerId, lo qe.entryId = entryId; qe.enqueueTime = enqueueTime; qe.journalAddEntryStats = journalAddEntryStats; - qe.journalCbQueueSize = journalCbQueueSize; + qe.callbackTime = callbackTime; return qe; } @Override public void run() { + long startTime = System.nanoTime(); if (LOG.isDebugEnabled()) { LOG.debug("Acknowledge Ledger: {}, Entry: {}", ledgerId, entryId); } - journalCbQueueSize.dec(); journalAddEntryStats.registerSuccessfulEvent(MathUtils.elapsedNanos(enqueueTime), TimeUnit.NANOSECONDS); cb.writeComplete(0, ledgerId, entryId, null, ctx); + callbackTime.addLatency(MathUtils.elapsedNanos(startTime), TimeUnit.NANOSECONDS); recycle(); } + private Object getCtx() { + return ctx; + } + private final Handle recyclerHandle; private QueueEntry(Handle recyclerHandle) { @@ -339,12 +347,18 @@ private QueueEntry(Handle recyclerHandle) { } private static final Recycler RECYCLER = new Recycler() { + @Override protected QueueEntry newObject(Recycler.Handle handle) { return new QueueEntry(handle); } }; private void recycle() { + this.entry = null; + this.cb = null; + this.ctx = null; + this.journalAddEntryStats = null; + this.callbackTime = null; recyclerHandle.recycle(this); } } @@ -353,40 +367,36 @@ private void recycle() { * Token which represents the need to force a write to the Journal. */ @VisibleForTesting - public class ForceWriteRequest { + public static class ForceWriteRequest { private JournalChannel logFile; private RecyclableArrayList forceWriteWaiters; private boolean shouldClose; - private boolean isMarker; private long lastFlushedPosition; private long logId; - - public int process(boolean shouldForceWrite) throws IOException { - forceWriteQueueSize.dec(); - if (isMarker) { - return 0; - } - - try { - if (shouldForceWrite) { - long startTime = MathUtils.nowInNano(); - this.logFile.forceWrite(false); - journalSyncStats.registerSuccessfulEvent(MathUtils.elapsedNanos(startTime), TimeUnit.NANOSECONDS); - } - lastLogMark.setCurLogMark(this.logId, this.lastFlushedPosition); - - // Notify the waiters that the force write succeeded - for (int i = 0; i < forceWriteWaiters.size(); i++) { - QueueEntry qe = forceWriteWaiters.get(i); - if (qe != null) { - cbThreadPool.execute(qe); + private boolean flushed; + + public int process(ObjectHashSet writeHandlers) { + closeFileIfNecessary(); + + // Notify the waiters that the force write succeeded + for (int i = 0; i < forceWriteWaiters.size(); i++) { + QueueEntry qe = forceWriteWaiters.get(i); + if (qe != null) { + if (qe.getCtx() instanceof BookieRequestHandler + && qe.entryId != BookieImpl.METAENTRY_ID_FORCE_LEDGER) { + writeHandlers.add((BookieRequestHandler) qe.getCtx()); } - journalCbQueueSize.inc(); + qe.run(); } + } + + return forceWriteWaiters.size(); + } - return forceWriteWaiters.size(); - } finally { - closeFileIfNecessary(); + private void flushFileToDisk() throws IOException { + if (!flushed) { + logFile.forceWrite(false); + flushed = true; } } @@ -396,6 +406,7 @@ public void closeFileIfNecessary() { // We should guard against exceptions so its // safe to call in catch blocks try { + flushFileToDisk(); logFile.close(); // Call close only once shouldClose = false; @@ -413,6 +424,7 @@ private ForceWriteRequest(Handle recyclerHandle) { private void recycle() { logFile = null; + flushed = false; if (forceWriteWaiters != null) { forceWriteWaiters.recycle(); forceWriteWaiters = null; @@ -425,20 +437,19 @@ private ForceWriteRequest createForceWriteRequest(JournalChannel logFile, long logId, long lastFlushedPosition, RecyclableArrayList forceWriteWaiters, - boolean shouldClose, - boolean isMarker) { + boolean shouldClose) { ForceWriteRequest req = forceWriteRequestsRecycler.get(); req.forceWriteWaiters = forceWriteWaiters; req.logFile = logFile; req.logId = logId; req.lastFlushedPosition = lastFlushedPosition; req.shouldClose = shouldClose; - req.isMarker = isMarker; - forceWriteQueueSize.inc(); + journalStats.getForceWriteQueueSize().inc(); return req; } - private final Recycler forceWriteRequestsRecycler = new Recycler() { + private static final Recycler forceWriteRequestsRecycler = new Recycler() { + @Override protected ForceWriteRequest newObject( Recycler.Handle handle) { return new ForceWriteRequest(handle); @@ -453,80 +464,93 @@ private class ForceWriteThread extends BookieCriticalThread { volatile boolean running = true; // This holds the queue entries that should be notified after a // successful force write - Thread threadToNotifyOnEx; + Consumer threadToNotifyOnEx; + // should we group force writes private final boolean enableGroupForceWrites; - // make flush interval as a parameter - public ForceWriteThread(Thread threadToNotifyOnEx, boolean enableGroupForceWrites) { + private final Counter forceWriteThreadTime; + + public ForceWriteThread(Consumer threadToNotifyOnEx, + boolean enableGroupForceWrites, + StatsLogger statsLogger) { super("ForceWriteThread"); + this.setPriority(Thread.MAX_PRIORITY); this.threadToNotifyOnEx = threadToNotifyOnEx; this.enableGroupForceWrites = enableGroupForceWrites; + this.forceWriteThreadTime = statsLogger.getThreadScopedCounter("force-write-thread-time"); } @Override public void run() { LOG.info("ForceWrite Thread started"); - boolean shouldForceWrite = true; - int numReqInLastForceWrite = 0; + ThreadRegistry.register(super.getName()); + + if (conf.isBusyWaitEnabled()) { + try { + CpuAffinity.acquireCore(); + } catch (Exception e) { + LOG.warn("Unable to acquire CPU core for Journal ForceWrite thread: {}", e.getMessage(), e); + } + } + + final ObjectHashSet writeHandlers = new ObjectHashSet<>(); + final ForceWriteRequest[] localRequests = new ForceWriteRequest[conf.getJournalQueueSize()]; + while (running) { - ForceWriteRequest req = null; try { - req = forceWriteRequests.take(); - // Force write the file and then notify the write completions - // - if (!req.isMarker) { - if (shouldForceWrite) { - // if we are going to force write, any request that is already in the - // queue will benefit from this force write - post a marker prior to issuing - // the flush so until this marker is encountered we can skip the force write - if (enableGroupForceWrites) { - forceWriteRequests.put(createForceWriteRequest(req.logFile, 0, 0, null, false, true)); - } + int numEntriesInLastForceWrite = 0; - // If we are about to issue a write, record the number of requests in - // the last force write and then reset the counter so we can accumulate - // requests in the write we are about to issue - if (numReqInLastForceWrite > 0) { - forceWriteGroupingCountStats.registerSuccessfulValue(numReqInLastForceWrite); - numReqInLastForceWrite = 0; - } - } - } - numReqInLastForceWrite += req.process(shouldForceWrite); - - if (enableGroupForceWrites - // if its a marker we should switch back to flushing - && !req.isMarker - // This indicates that this is the last request in a given file - // so subsequent requests will go to a different file so we should - // flush on the next request - && !req.shouldClose) { - shouldForceWrite = false; - } else { - shouldForceWrite = true; + int requestsCount = forceWriteRequests.takeAll(localRequests); + + journalStats.getForceWriteQueueSize().addCount(-requestsCount); + + // Sync and mark the journal up to the position of the last entry in the batch + ForceWriteRequest lastRequest = localRequests[requestsCount - 1]; + syncJournal(lastRequest); + + // All the requests in the batch are now fully-synced. We can trigger sending the + // responses + for (int i = 0; i < requestsCount; i++) { + ForceWriteRequest req = localRequests[i]; + numEntriesInLastForceWrite += req.process(writeHandlers); + localRequests[i] = null; + req.recycle(); } + + journalStats.getForceWriteGroupingCountStats() + .registerSuccessfulValue(numEntriesInLastForceWrite); + writeHandlers.forEach( + (ObjectProcedure) + BookieRequestHandler::flushPendingResponse); + writeHandlers.clear(); } catch (IOException ioe) { LOG.error("I/O exception in ForceWrite thread", ioe); running = false; } catch (InterruptedException e) { Thread.currentThread().interrupt(); - LOG.error("ForceWrite thread interrupted", e); - // close is idempotent - if (null != req) { - req.shouldClose = true; - req.closeFileIfNecessary(); - } + LOG.info("ForceWrite thread interrupted"); running = false; - } finally { - if (req != null) { - req.recycle(); - } } } // Regardless of what caused us to exit, we should notify the // the parent thread as it should either exit or be in the process // of exiting else we will have write requests hang - threadToNotifyOnEx.interrupt(); + threadToNotifyOnEx.accept(null); + } + + private void syncJournal(ForceWriteRequest lastRequest) throws IOException { + long fsyncStartTime = MathUtils.nowInNano(); + try { + lastRequest.flushFileToDisk(); + journalStats.getJournalSyncStats().registerSuccessfulEvent(MathUtils.elapsedNanos(fsyncStartTime), + TimeUnit.NANOSECONDS); + lastLogMark.setCurLogMark(lastRequest.logId, lastRequest.lastFlushedPosition); + } catch (IOException ioe) { + journalStats.getJournalSyncStats() + .registerFailedEvent(MathUtils.elapsedNanos(fsyncStartTime), TimeUnit.NANOSECONDS); + throw ioe; + } } + // shutdown sync thread void shutdown() throws InterruptedException { running = false; @@ -575,6 +599,8 @@ static void writePaddingBytes(JournalChannel jc, ByteBuf paddingBuffer, int jour final File journalDirectory; final ServerConfiguration conf; final ForceWriteThread forceWriteThread; + final FileChannelProvider fileChannelProvider; + // Time after which we will stop grouping and issue the flush private final long maxGroupWaitInNanos; // Threshold after which we flush any buffered journal entries @@ -587,6 +613,10 @@ static void writePaddingBytes(JournalChannel jc, ByteBuf paddingBuffer, int jour private final boolean removePagesFromCache; private final int journalFormatVersionToWrite; private final int journalAlignmentSize; + // control PageCache flush interval when syncData disabled to reduce disk io util + private final long journalPageCacheFlushIntervalMSec; + // Whether reuse journal files, it will use maxBackupJournal as the journal file pool. + private final boolean journalReuseFiles; // Should data be fsynced on disk before triggering the callback private final boolean syncData; @@ -597,45 +627,49 @@ static void writePaddingBytes(JournalChannel jc, ByteBuf paddingBuffer, int jour private final String lastMarkFileName; - /** - * The thread pool used to handle callback. - */ - private final ExecutorService cbThreadPool; + private final Counter callbackTime; + private static final String journalThreadName = "BookieJournal"; // journal entry queue to commit - final BlockingQueue queue = new GrowableArrayBlockingQueue(); - final BlockingQueue forceWriteRequests = new GrowableArrayBlockingQueue(); + final BatchedBlockingQueue queue; + BatchedBlockingQueue forceWriteRequests; volatile boolean running = true; private final LedgerDirsManager ledgerDirsManager; + private final ByteBufAllocator allocator; // Expose Stats - private final OpStatsLogger journalAddEntryStats; - private final OpStatsLogger journalForceLedgerStats; - private final OpStatsLogger journalSyncStats; - private final OpStatsLogger journalCreationStats; - private final OpStatsLogger journalFlushStats; - private final OpStatsLogger journalProcessTimeStats; - private final OpStatsLogger journalQueueStats; - private final OpStatsLogger forceWriteGroupingCountStats; - private final OpStatsLogger forceWriteBatchEntriesStats; - private final OpStatsLogger forceWriteBatchBytesStats; - private final Counter journalQueueSize; - private final Counter forceWriteQueueSize; - private final Counter journalCbQueueSize; - private final Counter flushMaxWaitCounter; - private final Counter flushMaxOutstandingBytesCounter; - private final Counter flushEmptyQueueCounter; - private final Counter journalWriteBytes; + private final JournalStats journalStats; + + private JournalAliveListener journalAliveListener; + + private MemoryLimitController memoryLimitController; + public Journal(int journalIndex, File journalDirectory, ServerConfiguration conf, LedgerDirsManager ledgerDirsManager) { - this(journalIndex, journalDirectory, conf, ledgerDirsManager, NullStatsLogger.INSTANCE); + this(journalIndex, journalDirectory, conf, ledgerDirsManager, NullStatsLogger.INSTANCE, + UnpooledByteBufAllocator.DEFAULT); } public Journal(int journalIndex, File journalDirectory, ServerConfiguration conf, - LedgerDirsManager ledgerDirsManager, StatsLogger statsLogger) { - super("BookieJournal-" + conf.getBookiePort()); + LedgerDirsManager ledgerDirsManager, StatsLogger statsLogger, ByteBufAllocator allocator) { + this.allocator = allocator; + + StatsLogger journalStatsLogger = statsLogger.scopeLabel("journalIndex", String.valueOf(journalIndex)); + + if (conf.isBusyWaitEnabled()) { + // To achieve lower latency, use busy-wait blocking queue implementation + queue = new BlockingMpscQueue<>(conf.getJournalQueueSize()); + forceWriteRequests = new BlockingMpscQueue<>(conf.getJournalQueueSize()); + } else { + queue = new BatchedArrayBlockingQueue<>(conf.getJournalQueueSize()); + forceWriteRequests = new BatchedArrayBlockingQueue<>(conf.getJournalQueueSize()); + } + + // Adjust the journal max memory in case there are multiple journals configured. + long journalMaxMemory = conf.getJournalMaxMemorySizeMb() / conf.getJournalDirNames().length * 1024 * 1024; + this.memoryLimitController = new MemoryLimitController(journalMaxMemory); this.ledgerDirsManager = ledgerDirsManager; this.conf = conf; this.journalDirectory = journalDirectory; @@ -644,19 +678,16 @@ public Journal(int journalIndex, File journalDirectory, ServerConfiguration conf this.journalWriteBufferSize = conf.getJournalWriteBufferSizeKB() * KB; this.syncData = conf.getJournalSyncData(); this.maxBackupJournals = conf.getMaxBackupJournals(); - this.forceWriteThread = new ForceWriteThread(this, conf.getJournalAdaptiveGroupWrites()); + this.forceWriteThread = new ForceWriteThread((__) -> this.interruptThread(), + conf.getJournalAdaptiveGroupWrites(), journalStatsLogger); this.maxGroupWaitInNanos = TimeUnit.MILLISECONDS.toNanos(conf.getJournalMaxGroupWaitMSec()); this.bufferedWritesThreshold = conf.getJournalBufferedWritesThreshold(); this.bufferedEntriesThreshold = conf.getJournalBufferedEntriesThreshold(); this.journalFormatVersionToWrite = conf.getJournalFormatVersionToWrite(); this.journalAlignmentSize = conf.getJournalAlignmentSize(); - if (conf.getNumJournalCallbackThreads() > 0) { - this.cbThreadPool = Executors.newFixedThreadPool(conf.getNumJournalCallbackThreads(), - new DefaultThreadFactory("bookie-journal-callback")); - } else { - this.cbThreadPool = MoreExecutors.newDirectExecutorService(); - } - + this.journalPageCacheFlushIntervalMSec = conf.getJournalPageCacheFlushIntervalMSec(); + this.journalReuseFiles = conf.getJournalReuseFiles(); + this.callbackTime = journalStatsLogger.getThreadScopedCounter("callback-time"); // Unless there is a cap on the max wait (which requires group force writes) // we cannot skip flushing for queue empty this.flushWhenQueueEmpty = maxGroupWaitInNanos <= 0 || conf.getJournalFlushWhenQueueEmpty(); @@ -673,27 +704,35 @@ public Journal(int journalIndex, File journalDirectory, ServerConfiguration conf LOG.debug("Last Log Mark : {}", lastLogMark.getCurMark()); } + try { + this.fileChannelProvider = FileChannelProvider.newProvider(conf.getJournalChannelProvider()); + } catch (IOException e) { + LOG.error("Failed to initiate file channel provider: {}", conf.getJournalChannelProvider()); + throw new RuntimeException(e); + } + // Expose Stats - journalAddEntryStats = statsLogger.getOpStatsLogger(BookKeeperServerStats.JOURNAL_ADD_ENTRY); - journalForceLedgerStats = statsLogger.getOpStatsLogger(BookKeeperServerStats.JOURNAL_FORCE_LEDGER); - journalSyncStats = statsLogger.getOpStatsLogger(BookKeeperServerStats.JOURNAL_SYNC); - journalCreationStats = statsLogger.getOpStatsLogger(BookKeeperServerStats.JOURNAL_CREATION_LATENCY); - journalFlushStats = statsLogger.getOpStatsLogger(BookKeeperServerStats.JOURNAL_FLUSH_LATENCY); - journalQueueStats = statsLogger.getOpStatsLogger(BookKeeperServerStats.JOURNAL_QUEUE_LATENCY); - journalProcessTimeStats = statsLogger.getOpStatsLogger(BookKeeperServerStats.JOURNAL_PROCESS_TIME_LATENCY); - forceWriteGroupingCountStats = - statsLogger.getOpStatsLogger(BookKeeperServerStats.JOURNAL_FORCE_WRITE_GROUPING_COUNT); - forceWriteBatchEntriesStats = - statsLogger.getOpStatsLogger(BookKeeperServerStats.JOURNAL_FORCE_WRITE_BATCH_ENTRIES); - forceWriteBatchBytesStats = statsLogger.getOpStatsLogger(BookKeeperServerStats.JOURNAL_FORCE_WRITE_BATCH_BYTES); - journalQueueSize = statsLogger.getCounter(BookKeeperServerStats.JOURNAL_QUEUE_SIZE); - forceWriteQueueSize = statsLogger.getCounter(BookKeeperServerStats.JOURNAL_FORCE_WRITE_QUEUE_SIZE); - journalCbQueueSize = statsLogger.getCounter(BookKeeperServerStats.JOURNAL_CB_QUEUE_SIZE); - flushMaxWaitCounter = statsLogger.getCounter(BookKeeperServerStats.JOURNAL_NUM_FLUSH_MAX_WAIT); - flushMaxOutstandingBytesCounter = - statsLogger.getCounter(BookKeeperServerStats.JOURNAL_NUM_FLUSH_MAX_OUTSTANDING_BYTES); - flushEmptyQueueCounter = statsLogger.getCounter(BookKeeperServerStats.JOURNAL_NUM_FLUSH_EMPTY_QUEUE); - journalWriteBytes = statsLogger.getCounter(BookKeeperServerStats.JOURNAL_WRITE_BYTES); + this.journalStats = new JournalStats(journalStatsLogger, journalMaxMemory, + () -> memoryLimitController.currentUsage()); + } + + public Journal(int journalIndex, File journalDirectory, ServerConfiguration conf, + LedgerDirsManager ledgerDirsManager, StatsLogger statsLogger, + ByteBufAllocator allocator, JournalAliveListener journalAliveListener) { + this(journalIndex, journalDirectory, conf, ledgerDirsManager, statsLogger, allocator); + this.journalAliveListener = journalAliveListener; + } + + @VisibleForTesting + static Journal newJournal(int journalIndex, File journalDirectory, ServerConfiguration conf, + LedgerDirsManager ledgerDirsManager, StatsLogger statsLogger, + ByteBufAllocator allocator, JournalAliveListener journalAliveListener) { + return new Journal(journalIndex, journalDirectory, conf, ledgerDirsManager, statsLogger, allocator, + journalAliveListener); + } + + JournalStats getJournalStats() { + return this.journalStats; } public File getJournalDirectory() { @@ -704,6 +743,16 @@ public LastLogMark getLastLogMark() { return lastLogMark; } + /** + * Update lastLogMark of the journal + * Indicates that the file has been processed. + * @param id + * @param scanOffset + */ + void setLastLogMark(Long id, long scanOffset) { + lastLogMark.setCurLogMark(id, scanOffset); + } + /** * Application tried to schedule a checkpoint. After all the txns added * before checkpoint are persisted, a checkpoint will be returned @@ -752,19 +801,22 @@ public void checkpointComplete(Checkpoint checkpoint, boolean compact) throws IO /** * Scan the journal. * - * @param journalId Journal Log Id - * @param journalPos Offset to start scanning - * @param scanner Scanner to handle entries + * @param journalId Journal Log Id + * @param journalPos Offset to start scanning + * @param scanner Scanner to handle entries + * @param skipInvalidRecord when invalid record,should we skip it or not + * @return scanOffset - represents the byte till which journal was read * @throws IOException */ - public void scanJournal(long journalId, long journalPos, JournalScanner scanner) + public long scanJournal(long journalId, long journalPos, JournalScanner scanner, boolean skipInvalidRecord) throws IOException { JournalChannel recLog; if (journalPos <= 0) { - recLog = new JournalChannel(journalDirectory, journalId, journalPreAllocSize, journalWriteBufferSize); + recLog = new JournalChannel(journalDirectory, journalId, journalPreAllocSize, journalWriteBufferSize, + conf, fileChannelProvider); } else { recLog = new JournalChannel(journalDirectory, journalId, journalPreAllocSize, journalWriteBufferSize, - journalPos); + journalPos, conf, fileChannelProvider); } int journalVersion = recLog.getFormatVersion(); try { @@ -785,8 +837,8 @@ public void scanJournal(long journalId, long journalPos, JournalScanner scanner) break; } boolean isPaddingRecord = false; - if (len == PADDING_MASK) { - if (journalVersion >= JournalChannel.V5) { + if (len < 0) { + if (len == PADDING_MASK && journalVersion >= JournalChannel.V5) { // skip padding bytes lenBuff.clear(); fullRead(recLog, lenBuff); @@ -800,7 +852,8 @@ public void scanJournal(long journalId, long journalPos, JournalScanner scanner) } isPaddingRecord = true; } else { - throw new IOException("Invalid record found with negative length : " + len); + LOG.error("Invalid record found with negative length: {}", len); + throw new IOException("Invalid record found with negative length " + len); } } recBuff.clear(); @@ -818,83 +871,54 @@ public void scanJournal(long journalId, long journalPos, JournalScanner scanner) scanner.process(journalVersion, offset, recBuff); } } + return recLog.fc.position(); + } catch (IOException e) { + if (skipInvalidRecord) { + LOG.warn("Failed to parse journal file, and skipInvalidRecord is true, skip this journal file reply"); + } else { + throw e; + } + return recLog.fc.position(); } finally { recLog.close(); } } - /** - * Replay journal files. - * - * @param scanner Scanner to process replayed entries. - * @throws IOException - */ - public void replay(JournalScanner scanner) throws IOException { - final LogMark markedLog = lastLogMark.getCurMark(); - List logs = listJournalIds(journalDirectory, new JournalIdFilter() { - @Override - public boolean accept(long journalId) { - if (journalId < markedLog.getLogFileId()) { - return false; - } - return true; - } - }); - // last log mark may be missed due to no sync up before - // validate filtered log ids only when we have markedLogId - if (markedLog.getLogFileId() > 0) { - if (logs.size() == 0 || logs.get(0) != markedLog.getLogFileId()) { - throw new IOException("Recovery log " + markedLog.getLogFileId() + " is missing"); - } - } - - if (LOG.isDebugEnabled()) { - LOG.debug("Try to relay journal logs : {}", logs); - } - // TODO: When reading in the journal logs that need to be synced, we - // should use BufferedChannels instead to minimize the amount of - // system calls done. - for (Long id: logs) { - long logPosition = 0L; - if (id == markedLog.getLogFileId()) { - logPosition = markedLog.getLogFileOffset(); - } - LOG.info("Replaying journal {} from position {}", id, logPosition); - scanJournal(id, logPosition, scanner); - } - } - - public void logAddEntry(ByteBuffer entry, boolean ackBeforeSync, WriteCallback cb, Object ctx) { - logAddEntry(Unpooled.wrappedBuffer(entry), ackBeforeSync, cb, ctx); - } - /** * record an add entry operation in journal. */ - public void logAddEntry(ByteBuf entry, boolean ackBeforeSync, WriteCallback cb, Object ctx) { + public void logAddEntry(ByteBuf entry, boolean ackBeforeSync, WriteCallback cb, Object ctx) + throws InterruptedException { long ledgerId = entry.getLong(entry.readerIndex() + 0); long entryId = entry.getLong(entry.readerIndex() + 8); logAddEntry(ledgerId, entryId, entry, ackBeforeSync, cb, ctx); } @VisibleForTesting - void logAddEntry(long ledgerId, long entryId, ByteBuf entry, - boolean ackBeforeSync, WriteCallback cb, Object ctx) { - //Retain entry until it gets written to journal + public void logAddEntry(long ledgerId, long entryId, ByteBuf entry, + boolean ackBeforeSync, WriteCallback cb, Object ctx) + throws InterruptedException { + // Retain entry until it gets written to journal entry.retain(); - journalQueueSize.inc(); - queue.add(QueueEntry.create( - entry, ackBeforeSync, ledgerId, entryId, cb, ctx, MathUtils.nowInNano(), - journalAddEntryStats, journalQueueSize)); + journalStats.getJournalQueueSize().inc(); + + memoryLimitController.reserveMemory(entry.readableBytes()); + + queue.put(QueueEntry.create( + entry, ackBeforeSync, ledgerId, entryId, cb, ctx, MathUtils.nowInNano(), + journalStats.getJournalAddEntryStats(), + callbackTime)); } void forceLedger(long ledgerId, WriteCallback cb, Object ctx) { - journalQueueSize.inc(); queue.add(QueueEntry.create( - null, false /* ackBeforeSync */, ledgerId, - Bookie.METAENTRY_ID_FORCE_LEDGER, cb, ctx, MathUtils.nowInNano(), - journalForceLedgerStats, journalQueueSize)); + null, false /* ackBeforeSync */, ledgerId, + BookieImpl.METAENTRY_ID_FORCE_LEDGER, cb, ctx, MathUtils.nowInNano(), + journalStats.getJournalForceLedgerStats(), + callbackTime)); + // Increment afterwards because the add operation could fail. + journalStats.getJournalQueueSize().inc(); } /** @@ -906,6 +930,14 @@ public int getJournalQueueLength() { return queue.size(); } + @VisibleForTesting + JournalChannel newLogFile(long logId, Long replaceLogId) throws IOException { + return new JournalChannel(journalDirectory, logId, journalPreAllocSize, journalWriteBufferSize, + journalAlignmentSize, removePagesFromCache, + journalFormatVersionToWrite, getBufferedChannelBuilder(), + conf, fileChannelProvider, replaceLogId); + } + /** * A thread used for persisting journal entries to journal files. * @@ -921,9 +953,17 @@ public int getJournalQueueLength() { *

* @see org.apache.bookkeeper.bookie.SyncThread */ - @Override public void run() { LOG.info("Starting journal on {}", journalDirectory); + ThreadRegistry.register(journalThreadName); + + if (conf.isBusyWaitEnabled()) { + try { + CpuAffinity.acquireCore(); + } catch (Exception e) { + LOG.warn("Unable to acquire CPU core for Journal thread: {}", e.getMessage(), e); + } + } RecyclableArrayList toFlush = entryListRecycler.newInstance(); int numEntriesToFlush = 0; @@ -947,20 +987,27 @@ public void run() { boolean groupWhenTimeout = false; long dequeueStartTime = 0L; + long lastFlushTimeMs = System.currentTimeMillis(); + final ObjectHashSet writeHandlers = new ObjectHashSet<>(); + QueueEntry[] localQueueEntries = new QueueEntry[conf.getJournalQueueSize()]; + int localQueueEntriesIdx = 0; + int localQueueEntriesLen = 0; QueueEntry qe = null; while (true) { // new journal file to write if (null == logFile) { - logId = logId + 1; + journalIds = listJournalIds(journalDirectory, null); + Long replaceLogId = fileChannelProvider.supportReuseFile() && journalReuseFiles + && journalIds.size() >= maxBackupJournals + && journalIds.get(0) < lastLogMark.getCurMark().getLogFileId() + ? journalIds.get(0) : null; journalCreationWatcher.reset().start(); - logFile = new JournalChannel(journalDirectory, logId, journalPreAllocSize, journalWriteBufferSize, - journalAlignmentSize, removePagesFromCache, - journalFormatVersionToWrite, getBufferedChannelBuilder()); + logFile = newLogFile(logId, replaceLogId); - journalCreationStats.registerSuccessfulEvent( + journalStats.getJournalCreationStats().registerSuccessfulEvent( journalCreationWatcher.stop().elapsed(TimeUnit.NANOSECONDS), TimeUnit.NANOSECONDS); bc = logFile.getBufferedChannel(); @@ -970,124 +1017,145 @@ public void run() { if (qe == null) { if (dequeueStartTime != 0) { - journalProcessTimeStats.registerSuccessfulEvent(MathUtils.elapsedNanos(dequeueStartTime), - TimeUnit.NANOSECONDS); + journalStats.getJournalProcessTimeStats() + .registerSuccessfulEvent(MathUtils.elapsedNanos(dequeueStartTime), TimeUnit.NANOSECONDS); } + // At this point the local queue will always be empty, otherwise we would have + // advanced to the next `qe` at the end of the loop + localQueueEntriesIdx = 0; if (numEntriesToFlush == 0) { - qe = queue.take(); - dequeueStartTime = MathUtils.nowInNano(); - journalQueueStats.registerSuccessfulEvent(MathUtils.elapsedNanos(qe.enqueueTime), - TimeUnit.NANOSECONDS); + // There are no entries pending. We can wait indefinitely until the next + // one is available + localQueueEntriesLen = queue.takeAll(localQueueEntries); } else { + // There are already some entries pending. We must adjust + // the waiting time to the remaining groupWait time long pollWaitTimeNanos = maxGroupWaitInNanos - MathUtils.elapsedNanos(toFlush.get(0).enqueueTime); if (flushWhenQueueEmpty || pollWaitTimeNanos < 0) { pollWaitTimeNanos = 0; } - qe = queue.poll(pollWaitTimeNanos, TimeUnit.NANOSECONDS); - dequeueStartTime = MathUtils.nowInNano(); - if (qe != null) { - journalQueueStats.registerSuccessfulEvent(MathUtils.elapsedNanos(qe.enqueueTime), - TimeUnit.NANOSECONDS); - } + localQueueEntriesLen = queue.pollAll(localQueueEntries, + pollWaitTimeNanos, TimeUnit.NANOSECONDS); + } - boolean shouldFlush = false; - // We should issue a forceWrite if any of the three conditions below holds good - // 1. If the oldest pending entry has been pending for longer than the max wait time - if (maxGroupWaitInNanos > 0 && !groupWhenTimeout && (MathUtils - .elapsedNanos(toFlush.get(0).enqueueTime) > maxGroupWaitInNanos)) { - groupWhenTimeout = true; - } else if (maxGroupWaitInNanos > 0 && groupWhenTimeout && qe != null - && MathUtils.elapsedNanos(qe.enqueueTime) < maxGroupWaitInNanos) { - // when group timeout, it would be better to look forward, as there might be lots of - // entries already timeout - // due to a previous slow write (writing to filesystem which impacted by force write). - // Group those entries in the queue - // a) already timeout - // b) limit the number of entries to group - groupWhenTimeout = false; - shouldFlush = true; - flushMaxWaitCounter.inc(); - } else if (qe != null - && ((bufferedEntriesThreshold > 0 && toFlush.size() > bufferedEntriesThreshold) - || (bc.position() > lastFlushPosition + bufferedWritesThreshold))) { - // 2. If we have buffered more than the buffWriteThreshold or bufferedEntriesThreshold - shouldFlush = true; - flushMaxOutstandingBytesCounter.inc(); - } else if (qe == null) { - // We should get here only if we flushWhenQueueEmpty is true else we would wait - // for timeout that would put is past the maxWait threshold - // 3. If the queue is empty i.e. no benefit of grouping. This happens when we have one - // publish at a time - common case in tests. - shouldFlush = true; - flushEmptyQueueCounter.inc(); - } + dequeueStartTime = MathUtils.nowInNano(); - // toFlush is non null and not empty so should be safe to access getFirst - if (shouldFlush) { - if (journalFormatVersionToWrite >= JournalChannel.V5) { - writePaddingBytes(logFile, paddingBuff, journalAlignmentSize); - } - journalFlushWatcher.reset().start(); - bc.flush(); - - for (int i = 0; i < toFlush.size(); i++) { - QueueEntry entry = toFlush.get(i); - if (entry != null && (!syncData || entry.ackBeforeSync)) { - toFlush.set(i, null); - numEntriesToFlush--; - cbThreadPool.execute(entry); - } - } + if (localQueueEntriesLen > 0) { + qe = localQueueEntries[localQueueEntriesIdx]; + localQueueEntries[localQueueEntriesIdx++] = null; + } + } - lastFlushPosition = bc.position(); - journalFlushStats.registerSuccessfulEvent( - journalFlushWatcher.stop().elapsed(TimeUnit.NANOSECONDS), TimeUnit.NANOSECONDS); - - // Trace the lifetime of entries through persistence - if (LOG.isDebugEnabled()) { - for (QueueEntry e : toFlush) { - if (e != null) { - LOG.debug("Written and queuing for flush Ledger: {} Entry: {}", - e.ledgerId, e.entryId); - } + if (numEntriesToFlush > 0) { + boolean shouldFlush = false; + // We should issue a forceWrite if any of the three conditions below holds good + // 1. If the oldest pending entry has been pending for longer than the max wait time + if (maxGroupWaitInNanos > 0 && !groupWhenTimeout && (MathUtils + .elapsedNanos(toFlush.get(0).enqueueTime) > maxGroupWaitInNanos)) { + groupWhenTimeout = true; + } else if (maxGroupWaitInNanos > 0 && groupWhenTimeout + && (qe == null // no entry to group + || MathUtils.elapsedNanos(qe.enqueueTime) < maxGroupWaitInNanos)) { + // when group timeout, it would be better to look forward, as there might be lots of + // entries already timeout + // due to a previous slow write (writing to filesystem which impacted by force write). + // Group those entries in the queue + // a) already timeout + // b) limit the number of entries to group + groupWhenTimeout = false; + shouldFlush = true; + journalStats.getFlushMaxWaitCounter().inc(); + } else if (qe != null + && ((bufferedEntriesThreshold > 0 && toFlush.size() > bufferedEntriesThreshold) + || (bc.position() > lastFlushPosition + bufferedWritesThreshold))) { + // 2. If we have buffered more than the buffWriteThreshold or bufferedEntriesThreshold + groupWhenTimeout = false; + shouldFlush = true; + journalStats.getFlushMaxOutstandingBytesCounter().inc(); + } else if (qe == null && flushWhenQueueEmpty) { + // We should get here only if we flushWhenQueueEmpty is true else we would wait + // for timeout that would put is past the maxWait threshold + // 3. If the queue is empty i.e. no benefit of grouping. This happens when we have one + // publish at a time - common case in tests. + groupWhenTimeout = false; + shouldFlush = true; + journalStats.getFlushEmptyQueueCounter().inc(); + } + + // toFlush is non null and not empty so should be safe to access getFirst + if (shouldFlush) { + if (journalFormatVersionToWrite >= JournalChannel.V5) { + writePaddingBytes(logFile, paddingBuff, journalAlignmentSize); + } + journalFlushWatcher.reset().start(); + bc.flush(); + + for (int i = 0; i < toFlush.size(); i++) { + QueueEntry entry = toFlush.get(i); + if (entry != null && (!syncData || entry.ackBeforeSync)) { + toFlush.set(i, null); + numEntriesToFlush--; + if (entry.getCtx() instanceof BookieRequestHandler + && entry.entryId != BookieImpl.METAENTRY_ID_FORCE_LEDGER) { + writeHandlers.add((BookieRequestHandler) entry.getCtx()); } + entry.run(); } - - forceWriteBatchEntriesStats.registerSuccessfulValue(numEntriesToFlush); - forceWriteBatchBytesStats.registerSuccessfulValue(batchSize); - - boolean shouldRolloverJournal = (lastFlushPosition > maxJournalSize); - if (syncData) { - // Trigger data sync to disk in the "Force-Write" thread. - // Callback will be triggered after data is committed to disk - forceWriteRequests.put(createForceWriteRequest(logFile, logId, lastFlushPosition, - toFlush, shouldRolloverJournal, false)); - toFlush = entryListRecycler.newInstance(); - numEntriesToFlush = 0; - } else { - // Data is already written on the file (though it might still be in the OS page-cache) - lastLogMark.setCurLogMark(logId, lastFlushPosition); - toFlush.clear(); - numEntriesToFlush = 0; - if (shouldRolloverJournal) { - forceWriteRequests.put( - createForceWriteRequest( - logFile, logId, lastFlushPosition, - EMPTY_ARRAY_LIST, shouldRolloverJournal, false)); + } + writeHandlers.forEach( + (ObjectProcedure) + BookieRequestHandler::flushPendingResponse); + writeHandlers.clear(); + + lastFlushPosition = bc.position(); + journalStats.getJournalFlushStats().registerSuccessfulEvent( + journalFlushWatcher.stop().elapsed(TimeUnit.NANOSECONDS), TimeUnit.NANOSECONDS); + + // Trace the lifetime of entries through persistence + if (LOG.isDebugEnabled()) { + for (QueueEntry e : toFlush) { + if (e != null && LOG.isDebugEnabled()) { + LOG.debug("Written and queuing for flush Ledger: {} Entry: {}", + e.ledgerId, e.entryId); } } + } - batchSize = 0L; - // check whether journal file is over file limit - if (shouldRolloverJournal) { - // if the journal file is rolled over, the journal file will be closed after last - // entry is force written to disk. - logFile = null; - continue; - } + journalStats.getForceWriteBatchEntriesStats() + .registerSuccessfulValue(numEntriesToFlush); + journalStats.getForceWriteBatchBytesStats() + .registerSuccessfulValue(batchSize); + boolean shouldRolloverJournal = (lastFlushPosition > maxJournalSize); + // Trigger data sync to disk in the "Force-Write" thread. + // Trigger data sync to disk has three situations: + // 1. journalSyncData enabled, usually for SSD used as journal storage + // 2. shouldRolloverJournal is true, that is the journal file reaches maxJournalSize + // 3. if journalSyncData disabled and shouldRolloverJournal is false, we can use + // journalPageCacheFlushIntervalMSec to control sync frequency, preventing disk + // synchronize frequently, which will increase disk io util. + // when flush interval reaches journalPageCacheFlushIntervalMSec (default: 1s), + // it will trigger data sync to disk + if (syncData + || shouldRolloverJournal + || (System.currentTimeMillis() - lastFlushTimeMs + >= journalPageCacheFlushIntervalMSec)) { + forceWriteRequests.put(createForceWriteRequest(logFile, logId, lastFlushPosition, + toFlush, shouldRolloverJournal)); + lastFlushTimeMs = System.currentTimeMillis(); + } + toFlush = entryListRecycler.newInstance(); + numEntriesToFlush = 0; + + batchSize = 0L; + // check whether journal file is over file limit + if (shouldRolloverJournal) { + // if the journal file is rolled over, the journal file will be closed after last + // entry is force written to disk. + logFile = null; + continue; } } } @@ -1100,7 +1168,12 @@ public void run() { if (qe == null) { // no more queue entry continue; } - if ((qe.entryId == Bookie.METAENTRY_ID_LEDGER_EXPLICITLAC) + + journalStats.getJournalQueueSize().dec(); + journalStats.getJournalQueueStats() + .registerSuccessfulEvent(MathUtils.elapsedNanos(qe.enqueueTime), TimeUnit.NANOSECONDS); + + if ((qe.entryId == BookieImpl.METAENTRY_ID_LEDGER_EXPLICITLAC) && (journalFormatVersionToWrite < JournalChannel.V6)) { /* * this means we are using new code which supports @@ -1109,11 +1182,11 @@ public void run() { * shouldn't write this special entry * (METAENTRY_ID_LEDGER_EXPLICITLAC) to Journal. */ - qe.entry.release(); - } else if (qe.entryId != Bookie.METAENTRY_ID_FORCE_LEDGER) { + memoryLimitController.releaseMemory(qe.entry.readableBytes()); + ReferenceCountUtil.release(qe.entry); + } else if (qe.entryId != BookieImpl.METAENTRY_ID_FORCE_LEDGER) { int entrySize = qe.entry.readableBytes(); - journalWriteBytes.add(entrySize); - journalQueueSize.dec(); + journalStats.getJournalWriteBytes().addCount(entrySize); batchSize += (4 + entrySize); @@ -1125,31 +1198,41 @@ public void run() { bc.write(lenBuff); bc.write(qe.entry); - qe.entry.release(); + memoryLimitController.releaseMemory(qe.entry.readableBytes()); + ReferenceCountUtil.release(qe.entry); } toFlush.add(qe); numEntriesToFlush++; - qe = null; + + if (localQueueEntriesIdx < localQueueEntriesLen) { + qe = localQueueEntries[localQueueEntriesIdx]; + localQueueEntries[localQueueEntriesIdx++] = null; + } else { + qe = null; + } } } catch (IOException ioe) { LOG.error("I/O exception in Journal thread!", ioe); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); - LOG.warn("Journal exits when shutting down", ie); + LOG.info("Journal exits when shutting down"); } finally { // There could be packets queued for forceWrite on this logFile - // That is fine as this exception is going to anyway take down the + // That is fine as this exception is going to anyway take down // the bookie. If we execute this as a part of graceful shutdown, // close will flush the file system cache making any previous // cached writes durable so this is fine as well. IOUtils.close(LOG, bc); + if (journalAliveListener != null) { + journalAliveListener.onJournalExit(); + } } LOG.info("Journal exited loop!"); } public BufferedChannelBuilder getBufferedChannelBuilder() { - return BufferedChannelBuilder.DEFAULT_BCBUILDER; + return (FileChannel fc, int capacity) -> new BufferedChannel(allocator, fc, capacity); } /** @@ -1161,18 +1244,17 @@ public synchronized void shutdown() { return; } LOG.info("Shutting down Journal"); - forceWriteThread.shutdown(); - cbThreadPool.shutdown(); - if (!cbThreadPool.awaitTermination(5, TimeUnit.SECONDS)) { - LOG.warn("Couldn't shutdown journal callback thread gracefully. Forcing"); + if (fileChannelProvider != null) { + fileChannelProvider.close(); } - cbThreadPool.shutdownNow(); + + forceWriteThread.shutdown(); running = false; - this.interrupt(); - this.join(); + this.interruptThread(); + this.joinThread(); LOG.info("Finished Shutting down Journal thread"); - } catch (InterruptedException ie) { + } catch (IOException | InterruptedException ie) { Thread.currentThread().interrupt(); LOG.warn("Interrupted during shutting down journal : ", ie); } @@ -1190,7 +1272,6 @@ private static int fullRead(JournalChannel fc, ByteBuffer bb) throws IOException return total; } - // /** * Wait for the Journal thread to exit. * This is method is needed in order to mock the journal, we can't mock final method of java.lang.Thread class @@ -1199,7 +1280,34 @@ private static int fullRead(JournalChannel fc, ByteBuffer bb) throws IOException */ @VisibleForTesting public void joinThread() throws InterruptedException { - join(); + if (thread != null) { + thread.join(); + } + } + + public void interruptThread() { + if (thread != null) { + thread.interrupt(); + } } + public synchronized void start() { + thread = new BookieCriticalThread(() -> run(), journalThreadName + "-" + conf.getBookiePort()); + thread.setPriority(Thread.MAX_PRIORITY); + thread.start(); + } + + long getMemoryUsage() { + return memoryLimitController.currentUsage(); + } + + @VisibleForTesting + void setMemoryLimitController(MemoryLimitController memoryLimitController) { + this.memoryLimitController = memoryLimitController; + } + + @VisibleForTesting + public void setForceWriteRequests(BatchedBlockingQueue forceWriteRequests) { + this.forceWriteRequests = forceWriteRequests; + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/JournalAliveListener.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/JournalAliveListener.java new file mode 100644 index 00000000000..ef73edc0eac --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/JournalAliveListener.java @@ -0,0 +1,28 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie; + +/** + * Listener for journal alive. + * */ +public interface JournalAliveListener { + void onJournalExit(); +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/JournalChannel.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/JournalChannel.java index f8a7230aaf9..b3759b68787 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/JournalChannel.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/JournalChannel.java @@ -21,16 +21,16 @@ package org.apache.bookkeeper.bookie; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import java.io.Closeable; import java.io.File; import java.io.IOException; -import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.util.Arrays; -import org.apache.bookkeeper.util.NativeIO; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.util.PageCacheUtil; import org.apache.bookkeeper.util.ZeroBuffer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -42,18 +42,19 @@ class JournalChannel implements Closeable { private static final Logger LOG = LoggerFactory.getLogger(JournalChannel.class); - final RandomAccessFile randomAccessFile; + static final long MB = 1024 * 1024L; + final BookieFileChannel channel; final int fd; final FileChannel fc; - final BufferedChannel bc; final int formatVersion; + BufferedChannel bc; long nextPrealloc = 0; final byte[] magicWord = "BKLG".getBytes(UTF_8); static final int SECTOR_SIZE = 512; private static final int START_OF_FILE = -12345; - private static long cacheDropLagBytes = 8 * 1024 * 1024; + private static final long cacheDropLagBytes = 8 * MB; // No header static final int V1 = 1; @@ -66,7 +67,7 @@ class JournalChannel implements Closeable { // 1) expanding header to 512 // 2) Padding writes to align sector size static final int V5 = 5; - // Adding explicitlac entry + // Adding explicit lac entry public static final int V6 = 6; static final int HEADER_SIZE = SECTOR_SIZE; // align header to sector size @@ -82,37 +83,46 @@ class JournalChannel implements Closeable { // The position of the file channel's last drop position private long lastDropPosition = 0L; + final ServerConfiguration configuration; + // Mostly used by tests JournalChannel(File journalDirectory, long logId) throws IOException { - this(journalDirectory, logId, 4 * 1024 * 1024, 65536, START_OF_FILE); + this(journalDirectory, logId, 4 * MB, 65536, START_OF_FILE, new ServerConfiguration(), + new DefaultFileChannelProvider()); } // Open journal for scanning starting from the first record in journal. - JournalChannel(File journalDirectory, long logId, long preAllocSize, int writeBufferSize) throws IOException { - this(journalDirectory, logId, preAllocSize, writeBufferSize, START_OF_FILE); + JournalChannel(File journalDirectory, long logId, + long preAllocSize, int writeBufferSize, ServerConfiguration conf, + FileChannelProvider provider) throws IOException { + this(journalDirectory, logId, preAllocSize, writeBufferSize, START_OF_FILE, conf, provider); } // Open journal for scanning starting from given position. JournalChannel(File journalDirectory, long logId, - long preAllocSize, int writeBufferSize, long position) throws IOException { + long preAllocSize, int writeBufferSize, long position, ServerConfiguration conf, + FileChannelProvider provider) throws IOException { this(journalDirectory, logId, preAllocSize, writeBufferSize, SECTOR_SIZE, - position, false, V5, Journal.BufferedChannelBuilder.DEFAULT_BCBUILDER); + position, false, V5, Journal.BufferedChannelBuilder.DEFAULT_BCBUILDER, + conf, provider, null); } // Open journal to write JournalChannel(File journalDirectory, long logId, long preAllocSize, int writeBufferSize, int journalAlignSize, - boolean fRemoveFromPageCache, int formatVersionToWrite) throws IOException { - this(journalDirectory, logId, preAllocSize, writeBufferSize, journalAlignSize, - fRemoveFromPageCache, formatVersionToWrite, Journal.BufferedChannelBuilder.DEFAULT_BCBUILDER); + boolean fRemoveFromPageCache, int formatVersionToWrite, + ServerConfiguration conf, FileChannelProvider provider) throws IOException { + this(journalDirectory, logId, preAllocSize, writeBufferSize, journalAlignSize, fRemoveFromPageCache, + formatVersionToWrite, Journal.BufferedChannelBuilder.DEFAULT_BCBUILDER, conf, provider, null); } JournalChannel(File journalDirectory, long logId, long preAllocSize, int writeBufferSize, int journalAlignSize, boolean fRemoveFromPageCache, int formatVersionToWrite, - Journal.BufferedChannelBuilder bcBuilder) throws IOException { + Journal.BufferedChannelBuilder bcBuilder, ServerConfiguration conf, + FileChannelProvider provider, Long toReplaceLogId) throws IOException { this(journalDirectory, logId, preAllocSize, writeBufferSize, journalAlignSize, - START_OF_FILE, fRemoveFromPageCache, formatVersionToWrite, bcBuilder); + START_OF_FILE, fRemoveFromPageCache, formatVersionToWrite, bcBuilder, conf, provider, toReplaceLogId); } /** @@ -140,46 +150,49 @@ class JournalChannel implements Closeable { private JournalChannel(File journalDirectory, long logId, long preAllocSize, int writeBufferSize, int journalAlignSize, long position, boolean fRemoveFromPageCache, - int formatVersionToWrite, Journal.BufferedChannelBuilder bcBuilder) throws IOException { + int formatVersionToWrite, Journal.BufferedChannelBuilder bcBuilder, + ServerConfiguration conf, + FileChannelProvider provider, Long toReplaceLogId) throws IOException { this.journalAlignSize = journalAlignSize; this.zeros = ByteBuffer.allocate(journalAlignSize); this.preAllocSize = preAllocSize - preAllocSize % journalAlignSize; this.fRemoveFromPageCache = fRemoveFromPageCache; + this.configuration = conf; + + boolean reuseFile = false; File fn = new File(journalDirectory, Long.toHexString(logId) + ".txn"); + if (toReplaceLogId != null && logId != toReplaceLogId && provider.supportReuseFile()) { + File toReplaceFile = new File(journalDirectory, Long.toHexString(toReplaceLogId) + ".txn"); + if (toReplaceFile.exists()) { + renameJournalFile(toReplaceFile, fn); + provider.notifyRename(toReplaceFile, fn); + reuseFile = true; + } + } + channel = provider.open(fn, configuration); if (formatVersionToWrite < V4) { throw new IOException("Invalid journal format to write : version = " + formatVersionToWrite); } LOG.info("Opening journal {}", fn); - if (!fn.exists()) { // new file, write version + if (!channel.fileExists(fn)) { // create new journal file to write, write version if (!fn.createNewFile()) { LOG.error("Journal file {}, that shouldn't exist, already exists. " + " is there another bookie process running?", fn); throw new IOException("File " + fn + " suddenly appeared, is another bookie process running?"); } - randomAccessFile = new RandomAccessFile(fn, "rw"); - fc = randomAccessFile.getChannel(); + fc = channel.getFileChannel(); formatVersion = formatVersionToWrite; - - int headerSize = (V4 == formatVersion) ? VERSION_HEADER_SIZE : HEADER_SIZE; - ByteBuffer bb = ByteBuffer.allocate(headerSize); - ZeroBuffer.put(bb); - bb.clear(); - bb.put(magicWord); - bb.putInt(formatVersion); - bb.clear(); - fc.write(bb); - - bc = bcBuilder.create(fc, writeBufferSize); - forceWrite(true); - nextPrealloc = this.preAllocSize; - fc.write(zeros, nextPrealloc - journalAlignSize); - } else { // open an existing file - randomAccessFile = new RandomAccessFile(fn, "r"); - fc = randomAccessFile.getChannel(); - bc = null; // readonly + writeHeader(bcBuilder, writeBufferSize); + } else if (reuseFile) { // Open an existing journal to write, it needs fileChannelProvider support reuse file. + fc = channel.getFileChannel(); + formatVersion = formatVersionToWrite; + writeHeader(bcBuilder, writeBufferSize); + } else { // open an existing file to read. + fc = channel.getFileChannel(); + // readonly, use fileChannel directly, no need to use BufferedChannel ByteBuffer bb = ByteBuffer.allocate(VERSION_HEADER_SIZE); int c = fc.read(bb); @@ -224,15 +237,40 @@ private JournalChannel(File journalDirectory, long logId, } } catch (IOException e) { LOG.error("Bookie journal file can seek to position :", e); + throw e; } } if (fRemoveFromPageCache) { - this.fd = NativeIO.getSysFileDescriptor(randomAccessFile.getFD()); + this.fd = PageCacheUtil.getSysFileDescriptor(channel.getFD()); } else { this.fd = -1; } } + private void writeHeader(Journal.BufferedChannelBuilder bcBuilder, + int writeBufferSize) throws IOException { + int headerSize = (V4 == formatVersion) ? VERSION_HEADER_SIZE : HEADER_SIZE; + ByteBuffer bb = ByteBuffer.allocate(headerSize); + ZeroBuffer.put(bb); + bb.clear(); + bb.put(magicWord); + bb.putInt(formatVersion); + bb.clear(); + fc.write(bb); + + bc = bcBuilder.create(fc, writeBufferSize); + forceWrite(true); + nextPrealloc = this.preAllocSize; + fc.write(zeros, nextPrealloc - journalAlignSize); + } + + public static void renameJournalFile(File source, File target) throws IOException { + if (source == null || target == null || !source.renameTo(target)) { + LOG.error("Failed to rename file {} to {}", source, target); + throw new IOException("Failed to rename file " + source + " to " + target); + } + } + int getFormatVersion() { return formatVersion; } @@ -261,6 +299,8 @@ int read(ByteBuffer dst) public void close() throws IOException { if (bc != null) { bc.close(); + } else if (fc != null) { + fc.close(); } } @@ -285,9 +325,10 @@ public void forceWrite(boolean forceMetadata) throws IOException { if (fRemoveFromPageCache) { long newDropPos = newForceWritePosition - cacheDropLagBytes; if (lastDropPosition < newDropPos) { - NativeIO.bestEffortRemoveFromPageCache(fd, lastDropPosition, newDropPos - lastDropPosition); + PageCacheUtil.bestEffortRemoveFromPageCache(fd, lastDropPosition, newDropPos - lastDropPosition); } this.lastDropPosition = newDropPos; } } + } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LastAddConfirmedUpdateNotification.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LastAddConfirmedUpdateNotification.java index 71cbd614458..26961465987 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LastAddConfirmedUpdateNotification.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LastAddConfirmedUpdateNotification.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerCache.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerCache.java index 14d48255d5a..4d954de46a8 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerCache.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerCache.java @@ -21,9 +21,12 @@ package org.apache.bookkeeper.bookie; +import static org.apache.bookkeeper.tools.cli.commands.bookie.FormatUtil.bytes2Hex; + import io.netty.buffer.ByteBuf; import java.io.Closeable; import java.io.IOException; +import java.util.PrimitiveIterator.OfLong; import org.apache.bookkeeper.common.util.Watcher; /** @@ -31,7 +34,7 @@ * an entry log file. It does user level caching to more efficiently manage disk * head scheduling. */ -interface LedgerCache extends Closeable { +public interface LedgerCache extends Closeable { boolean setFenced(long ledgerId) throws IOException; boolean isFenced(long ledgerId) throws IOException; @@ -51,9 +54,63 @@ interface LedgerCache extends Closeable { boolean waitForLastAddConfirmedUpdate(long ledgerId, long previousLAC, Watcher watcher) throws IOException; + void cancelWaitForLastAddConfirmedUpdate(long ledgerId, + Watcher watcher) throws IOException; void deleteLedger(long ledgerId) throws IOException; void setExplicitLac(long ledgerId, ByteBuf lac) throws IOException; ByteBuf getExplicitLac(long ledgerId); + + /** + * Specific exception to encode the case where the index is not present. + */ + class NoIndexForLedgerException extends IOException { + NoIndexForLedgerException(String reason, Exception cause) { + super(reason, cause); + } + } + + /** + * Represents a page of the index. + */ + interface PageEntries { + LedgerEntryPage getLEP() throws IOException; + long getFirstEntry(); + long getLastEntry(); + } + + /** + * Iterable over index pages -- returns PageEntries rather than individual + * entries because getEntries() above needs to be able to throw an IOException. + */ + interface PageEntriesIterable extends AutoCloseable, Iterable {} + + PageEntriesIterable listEntries(long ledgerId) throws IOException; + + OfLong getEntriesIterator(long ledgerId) throws IOException; + + /** + * Represents summary of ledger metadata. + */ + class LedgerIndexMetadata { + public final byte[] masterKey; + public final long size; + public final boolean fenced; + LedgerIndexMetadata(byte[] masterKey, long size, boolean fenced) { + this.masterKey = masterKey; + this.size = size; + this.fenced = fenced; + } + + public String getMasterKeyHex() { + if (null == masterKey) { + return "NULL"; + } else { + return bytes2Hex(masterKey); + } + } + } + + LedgerIndexMetadata readLedgerIndexMetadata(long ledgerId) throws IOException; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerCacheImpl.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerCacheImpl.java index 1db7d47041e..d7b2ab4b563 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerCacheImpl.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerCacheImpl.java @@ -23,6 +23,10 @@ import io.netty.buffer.ByteBuf; import java.io.IOException; +import java.util.Collections; +import java.util.Iterator; +import java.util.NoSuchElementException; +import java.util.PrimitiveIterator.OfLong; import org.apache.bookkeeper.common.util.Watcher; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.stats.NullStatsLogger; @@ -91,6 +95,13 @@ public boolean waitForLastAddConfirmedUpdate(long ledgerId, return indexPersistenceManager.waitForLastAddConfirmedUpdate(ledgerId, previousLAC, watcher); } + @Override + public void cancelWaitForLastAddConfirmedUpdate(long ledgerId, + Watcher watcher) + throws IOException { + indexPersistenceManager.cancelWaitForLastAddConfirmedUpdate(ledgerId, watcher); + } + @Override public void putEntryOffset(long ledger, long entry, long offset) throws IOException { indexPageManager.putEntryOffset(ledger, entry, offset); @@ -147,10 +158,12 @@ public boolean isFenced(long ledgerId) throws IOException { return indexPersistenceManager.isFenced(ledgerId); } + @Override public void setExplicitLac(long ledgerId, ByteBuf lac) throws IOException { indexPersistenceManager.setExplicitLac(ledgerId, lac); } + @Override public ByteBuf getExplicitLac(long ledgerId) { return indexPersistenceManager.getExplicitLac(ledgerId); } @@ -169,4 +182,58 @@ public boolean ledgerExists(long ledgerId) throws IOException { public void close() throws IOException { indexPersistenceManager.close(); } + + @Override + public PageEntriesIterable listEntries(long ledgerId) throws IOException { + return indexPageManager.listEntries(ledgerId); + } + + @Override + public LedgerIndexMetadata readLedgerIndexMetadata(long ledgerId) throws IOException { + return indexPersistenceManager.readLedgerIndexMetadata(ledgerId); + } + + @Override + public OfLong getEntriesIterator(long ledgerId) throws IOException { + Iterator pageEntriesIteratorNonFinal = null; + try { + pageEntriesIteratorNonFinal = listEntries(ledgerId).iterator(); + } catch (Bookie.NoLedgerException noLedgerException) { + pageEntriesIteratorNonFinal = Collections.emptyIterator(); + } + final Iterator pageEntriesIterator = pageEntriesIteratorNonFinal; + return new OfLong() { + private OfLong entriesInCurrentLEPIterator = null; + { + if (pageEntriesIterator.hasNext()) { + entriesInCurrentLEPIterator = pageEntriesIterator.next().getLEP().getEntriesIterator(); + } + } + + @Override + public boolean hasNext() { + try { + while ((entriesInCurrentLEPIterator != null) && (!entriesInCurrentLEPIterator.hasNext())) { + if (pageEntriesIterator.hasNext()) { + entriesInCurrentLEPIterator = pageEntriesIterator.next().getLEP().getEntriesIterator(); + } else { + entriesInCurrentLEPIterator = null; + } + } + return (entriesInCurrentLEPIterator != null); + } catch (Exception exc) { + throw new RuntimeException( + "Received exception in InterleavedLedgerStorage getEntriesOfLedger hasNext call", exc); + } + } + + @Override + public long nextLong() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + return entriesInCurrentLEPIterator.nextLong(); + } + }; + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerDescriptor.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerDescriptor.java index 23840be279c..3c84feb876d 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerDescriptor.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerDescriptor.java @@ -21,12 +21,13 @@ package org.apache.bookkeeper.bookie; -import static org.apache.bookkeeper.bookie.Bookie.METAENTRY_ID_FENCE_KEY; +import static org.apache.bookkeeper.bookie.BookieImpl.METAENTRY_ID_FENCE_KEY; -import com.google.common.util.concurrent.SettableFuture; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; import java.io.IOException; +import java.util.PrimitiveIterator.OfLong; +import java.util.concurrent.CompletableFuture; import org.apache.bookkeeper.common.util.Watcher; /** @@ -59,29 +60,34 @@ static ByteBuf createLedgerFenceEntry(Long ledgerId) { return bb; } - abstract void checkAccess(byte masterKey[]) throws BookieException, IOException; + abstract void checkAccess(byte[] masterKey) throws BookieException, IOException; abstract long getLedgerId(); abstract boolean setFenced() throws IOException; - abstract boolean isFenced() throws IOException; + abstract boolean isFenced() throws IOException, BookieException; /** * When we fence a ledger, we need to first set ledger to fenced state in memory and * then log the fence entry in Journal so that we can rebuild the state. * *

We should satisfy the future only after we complete logging fence entry in Journal + * @return */ - abstract SettableFuture fenceAndLogInJournal(Journal journal) throws IOException; + abstract CompletableFuture fenceAndLogInJournal(Journal journal) throws IOException; abstract long addEntry(ByteBuf entry) throws IOException, BookieException; - abstract ByteBuf readEntry(long entryId) throws IOException; + abstract ByteBuf readEntry(long entryId) throws IOException, BookieException; - abstract long getLastAddConfirmed() throws IOException; + abstract long getLastAddConfirmed() throws IOException, BookieException; abstract boolean waitForLastAddConfirmedUpdate(long previousLAC, Watcher watcher) throws IOException; + abstract void cancelWaitForLastAddConfirmedUpdate(Watcher watcher) + throws IOException; abstract void setExplicitLac(ByteBuf entry) throws IOException; - abstract ByteBuf getExplicitLac(); + abstract ByteBuf getExplicitLac() throws IOException, BookieException; + + abstract OfLong getListOfEntriesOfLedger(long ledgerId) throws IOException; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerDescriptorImpl.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerDescriptorImpl.java index a0f34eab65d..00edfb6a9c2 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerDescriptorImpl.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerDescriptorImpl.java @@ -21,12 +21,14 @@ package org.apache.bookkeeper.bookie; -import com.google.common.util.concurrent.SettableFuture; import io.netty.buffer.ByteBuf; import java.io.IOException; import java.util.Arrays; +import java.util.PrimitiveIterator.OfLong; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.bookkeeper.client.api.BKException; +import org.apache.bookkeeper.common.concurrent.FutureUtils; import org.apache.bookkeeper.common.util.Watcher; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,13 +38,13 @@ * to write entries to a ledger and read entries from a ledger. */ public class LedgerDescriptorImpl extends LedgerDescriptor { - private static final Logger LOG = LoggerFactory.getLogger(LedgerDescriptor.class); + private static final Logger LOG = LoggerFactory.getLogger(LedgerDescriptorImpl.class); final LedgerStorage ledgerStorage; - private long ledgerId; + private final long ledgerId; final byte[] masterKey; - private AtomicBoolean fenceEntryPersisted = new AtomicBoolean(); - private SettableFuture logFenceResult = null; + private final AtomicBoolean fenceEntryPersisted = new AtomicBoolean(); + private CompletableFuture logFenceResult = null; LedgerDescriptorImpl(byte[] masterKey, long ledgerId, @@ -53,7 +55,7 @@ public class LedgerDescriptorImpl extends LedgerDescriptor { } @Override - void checkAccess(byte masterKey[]) throws BookieException, IOException { + void checkAccess(byte[] masterKey) throws BookieException, IOException { if (!Arrays.equals(this.masterKey, masterKey)) { LOG.error("[{}] Requested master key {} does not match the cached master key {}", this.ledgerId, Arrays.toString(masterKey), Arrays.toString(this.masterKey)); @@ -72,21 +74,22 @@ boolean setFenced() throws IOException { } @Override - boolean isFenced() throws IOException { + boolean isFenced() throws IOException, BookieException { return ledgerStorage.isFenced(ledgerId); } @Override void setExplicitLac(ByteBuf lac) throws IOException { - ledgerStorage.setExplicitlac(ledgerId, lac); + ledgerStorage.setExplicitLac(ledgerId, lac); } @Override - ByteBuf getExplicitLac() { + ByteBuf getExplicitLac() throws IOException, BookieException { return ledgerStorage.getExplicitLac(ledgerId); } - synchronized SettableFuture fenceAndLogInJournal(Journal journal) throws IOException { + @Override + synchronized CompletableFuture fenceAndLogInJournal(Journal journal) throws IOException { boolean success = this.setFenced(); if (success) { // fenced for first time, we should add the key to journal ensure we can rebuild. @@ -99,8 +102,8 @@ synchronized SettableFuture fenceAndLogInJournal(Journal journal) throw if (logFenceResult == null || fenceEntryPersisted.get()){ // Either ledger's fenced state is recovered from Journal // Or Log fence entry in Journal succeed - SettableFuture result = SettableFuture.create(); - result.set(true); + CompletableFuture result = FutureUtils.createFuture(); + result.complete(true); return result; } else if (logFenceResult.isDone()) { // We failed to log fence entry in Journal, try again. @@ -116,24 +119,29 @@ synchronized SettableFuture fenceAndLogInJournal(Journal journal) throw * @param journal log the fence entry in the Journal * @return A future which will be satisfied when add entry to journal complete */ - private SettableFuture logFenceEntryInJournal(Journal journal) { - SettableFuture result; + private CompletableFuture logFenceEntryInJournal(Journal journal) { + CompletableFuture result; synchronized (this) { - result = logFenceResult = SettableFuture.create(); + result = logFenceResult = FutureUtils.createFuture(); } ByteBuf entry = createLedgerFenceEntry(ledgerId); - journal.logAddEntry(entry, false /* ackBeforeSync */, (rc, ledgerId, entryId, addr, ctx) -> { - if (LOG.isDebugEnabled()) { - LOG.debug("Record fenced state for ledger {} in journal with rc {}", - ledgerId, BKException.codeLogger(rc)); - } - if (rc == 0) { - fenceEntryPersisted.compareAndSet(false, true); - result.set(true); - } else { - result.set(false); - } - }, null); + try { + journal.logAddEntry(entry, false /* ackBeforeSync */, (rc, ledgerId, entryId, addr, ctx) -> { + if (LOG.isDebugEnabled()) { + LOG.debug("Record fenced state for ledger {} in journal with rc {}", + ledgerId, BKException.codeLogger(rc)); + } + if (rc == 0) { + fenceEntryPersisted.compareAndSet(false, true); + result.complete(true); + } else { + result.complete(false); + } + }, null); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + result.completeExceptionally(e); + } return result; } @@ -149,12 +157,12 @@ long addEntry(ByteBuf entry) throws IOException, BookieException { } @Override - ByteBuf readEntry(long entryId) throws IOException { + ByteBuf readEntry(long entryId) throws IOException, BookieException { return ledgerStorage.getEntry(ledgerId, entryId); } @Override - long getLastAddConfirmed() throws IOException { + long getLastAddConfirmed() throws IOException, BookieException { return ledgerStorage.getLastAddConfirmed(ledgerId); } @@ -163,4 +171,14 @@ boolean waitForLastAddConfirmedUpdate(long previousLAC, Watcher watcher) throws IOException { return ledgerStorage.waitForLastAddConfirmedUpdate(ledgerId, previousLAC, watcher); } + + @Override + void cancelWaitForLastAddConfirmedUpdate(Watcher watcher) throws IOException { + ledgerStorage.cancelWaitForLastAddConfirmedUpdate(ledgerId, watcher); + } + + @Override + OfLong getListOfEntriesOfLedger(long ledgerId) throws IOException { + return ledgerStorage.getListOfEntriesOfLedger(ledgerId); + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerDescriptorReadOnlyImpl.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerDescriptorReadOnlyImpl.java index 40bf988582b..6cea561c729 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerDescriptorReadOnlyImpl.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerDescriptorReadOnlyImpl.java @@ -22,7 +22,6 @@ package org.apache.bookkeeper.bookie; import io.netty.buffer.ByteBuf; - import java.io.IOException; /** * Implements a ledger inside a bookie. In particular, it implements operations @@ -35,19 +34,16 @@ public class LedgerDescriptorReadOnlyImpl extends LedgerDescriptorImpl { @Override boolean setFenced() throws IOException { - assert false; throw new IOException("Invalid action on read only descriptor"); } @Override long addEntry(ByteBuf entry) throws IOException { - assert false; throw new IOException("Invalid action on read only descriptor"); } @Override - void checkAccess(byte masterKey[]) throws BookieException, IOException { - assert false; + void checkAccess(byte[] masterKey) throws BookieException, IOException { throw new IOException("Invalid action on read only descriptor"); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerDirsManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerDirsManager.java index cdaa66814b9..056a07622a1 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerDirsManager.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerDirsManager.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,6 +20,7 @@ */ package org.apache.bookkeeper.bookie; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LD_NUM_DIRS; import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LD_WRITABLE_DIRS; import com.google.common.annotations.VisibleForTesting; @@ -58,13 +59,16 @@ public class LedgerDirsManager { private final DiskChecker diskChecker; - public LedgerDirsManager(ServerConfiguration conf, File[] dirs, DiskChecker diskChecker) { + public LedgerDirsManager(ServerConfiguration conf, File[] dirs, DiskChecker diskChecker) throws IOException { this(conf, dirs, diskChecker, NullStatsLogger.INSTANCE); } - public LedgerDirsManager(ServerConfiguration conf, File[] dirs, DiskChecker diskChecker, StatsLogger statsLogger) { - this.ledgerDirectories = Arrays.asList(Bookie - .getCurrentDirectories(dirs)); + public LedgerDirsManager(ServerConfiguration conf, File[] dirs, DiskChecker diskChecker, StatsLogger statsLogger) + throws IOException { + this.ledgerDirectories = Arrays.asList(BookieImpl.getCurrentDirectories(dirs)); + for (File f : this.ledgerDirectories) { + BookieImpl.checkDirectoryStructure(f); + } this.writableLedgerDirectories = new ArrayList(ledgerDirectories); this.filledDirs = new ArrayList(); this.listeners = new ArrayList(); @@ -101,6 +105,20 @@ public Number getSample() { return writableLedgerDirectories.size(); } }); + + final int numDirs = dirs.length; + statsLogger.registerGauge(LD_NUM_DIRS, new Gauge() { + + @Override + public Number getDefaultValue() { + return numDirs; + } + + @Override + public Number getSample() { + return numDirs; + } + }); } /** @@ -122,7 +140,7 @@ public List getListeners() { /** * Calculate the total amount of free space available in all of the ledger directories put together. * - * @return totalDiskSpace in bytes + * @return freeDiskSpace in bytes * @throws IOException */ public long getTotalFreeSpace(List dirs) throws IOException { @@ -130,9 +148,9 @@ public long getTotalFreeSpace(List dirs) throws IOException { } /** - * Calculate the total amount of free space available in all of the ledger directories put together. + * Calculate the total amount of disk space in all of the ledger directories put together. * - * @return freeDiskSpace in bytes + * @return totalDiskSpace in bytes * @throws IOException */ public long getTotalDiskSpace(List dirs) throws IOException { @@ -157,7 +175,6 @@ public List getWritableLedgerDirs() String errMsg = "All ledger directories are non writable"; NoWritableLedgerDirException e = new NoWritableLedgerDirException( errMsg); - LOG.error(errMsg, e); throw e; } return writableLedgerDirectories; @@ -183,20 +200,20 @@ public List getWritableLedgerDirsForNewLog() throws NoWritableLedgerDirExc List getDirsAboveUsableThresholdSize(long thresholdSize, boolean loggingNoWritable) throws NoWritableLedgerDirException { - List fullLedgerDirsToAccomodate = new ArrayList(); + List fullLedgerDirsToAccommodate = new ArrayList(); for (File dir: this.ledgerDirectories) { // Pick dirs which can accommodate little more than thresholdSize if (dir.getUsableSpace() > thresholdSize) { - fullLedgerDirsToAccomodate.add(dir); + fullLedgerDirsToAccommodate.add(dir); } } - if (!fullLedgerDirsToAccomodate.isEmpty()) { + if (!fullLedgerDirsToAccommodate.isEmpty()) { if (loggingNoWritable) { LOG.info("No writable ledger dirs below diskUsageThreshold. " - + "But Dirs that can accommodate {} are: {}", thresholdSize, fullLedgerDirsToAccomodate); + + "But Dirs that can accommodate {} are: {}", thresholdSize, fullLedgerDirsToAccommodate); } - return fullLedgerDirsToAccomodate; + return fullLedgerDirsToAccommodate; } // We will reach here when we find no ledgerDir which has atleast @@ -417,6 +434,19 @@ default void diskJustWritable(File disk) {} */ default void allDisksFull(boolean highPriorityWritesAllowed) {} + /** + * This will be notified whenever all disks are detected as not full. + * + */ + default void allDisksWritable() {} + + /** + * This will be notified whenever any disks are detected as full. + * + * @param highPriorityWritesAllowed the parameter indicates we are still have disk spaces for high priority + * * writes even disks are detected as "full" + */ + default void anyDiskFull(boolean highPriorityWritesAllowed) {} /** * This will notify the fatal errors. */ diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerDirsMonitor.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerDirsMonitor.java index 4ef02fae4da..32321aee277 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerDirsMonitor.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerDirsMonitor.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -27,7 +27,6 @@ import java.util.ArrayList; import java.util.List; import java.util.concurrent.ConcurrentMap; - import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledFuture; @@ -50,25 +49,29 @@ class LedgerDirsMonitor { private final int interval; private final ServerConfiguration conf; - private final ConcurrentMap diskUsages; private final DiskChecker diskChecker; - private final LedgerDirsManager ldm; - private long minUsableSizeForHighPriorityWrites; + private final List dirsManagers; + private final long minUsableSizeForHighPriorityWrites; private ScheduledExecutorService executor; private ScheduledFuture checkTask; public LedgerDirsMonitor(final ServerConfiguration conf, final DiskChecker diskChecker, - final LedgerDirsManager ldm) { + final List dirsManagers) { + validateThreshold(conf.getDiskUsageThreshold(), conf.getDiskLowWaterMarkUsageThreshold()); this.interval = conf.getDiskCheckInterval(); this.minUsableSizeForHighPriorityWrites = conf.getMinUsableSizeForHighPriorityWrites(); this.conf = conf; this.diskChecker = diskChecker; - this.diskUsages = ldm.getDiskUsages(); - this.ldm = ldm; + this.dirsManagers = dirsManagers; } - private void check() { + private void check(final LedgerDirsManager ldm) { + final ConcurrentMap diskUsages = ldm.getDiskUsages(); + boolean someDiskFulled = false; + boolean highPriorityWritesAllowed = true; + boolean someDiskRecovered = false; + try { List writableDirs = ldm.getWritableLedgerDirs(); // Check all writable dirs disk space usage. @@ -100,6 +103,7 @@ private void check() { }); // Notify disk full to all listeners ldm.addToFilledDirs(dir); + someDiskFulled = true; } } // Let's get NoWritableLedgerDirException without waiting for the next iteration @@ -108,7 +112,7 @@ private void check() { // bookie cannot get writable dir but considered to be writable ldm.getWritableLedgerDirs(); } catch (NoWritableLedgerDirException e) { - boolean highPriorityWritesAllowed = true; + LOG.warn("LedgerDirsMonitor check process: All ledger directories are non writable"); try { // disk check can be frequent, so disable 'loggingNoWritable' to avoid log flooding. ldm.getDirsAboveUsableThresholdSize(minUsableSizeForHighPriorityWrites, false); @@ -120,44 +124,49 @@ private void check() { } } - List fullfilledDirs = new ArrayList(ldm.getFullFilledLedgerDirs()); - boolean hasWritableLedgerDirs = ldm.hasWritableLedgerDirs(); - float totalDiskUsage = 0; + List fulfilledDirs = new ArrayList(ldm.getFullFilledLedgerDirs()); + boolean makeWritable = ldm.hasWritableLedgerDirs(); - // When bookie is in READONLY mode .i.e there are no writableLedgerDirs: - // - Check if the total disk usage is below DiskLowWaterMarkUsageThreshold. - // - If So, walk through the entire list of fullfilledDirs and add them back to writableLedgerDirs list if - // their usage is < conf.getDiskUsageThreshold. + // When bookie is in READONLY mode, i.e there are no writableLedgerDirs: + // - Update fulfilledDirs disk usage. + // - If the total disk usage is below DiskLowWaterMarkUsageThreshold + // add fulfilledDirs back to writableLedgerDirs list if their usage is < conf.getDiskUsageThreshold. try { - if (hasWritableLedgerDirs - || (totalDiskUsage = diskChecker.getTotalDiskUsage(ldm.getAllLedgerDirs())) < conf - .getDiskLowWaterMarkUsageThreshold()) { - // Check all full-filled disk space usage - for (File dir : fullfilledDirs) { - try { - diskUsages.put(dir, diskChecker.checkDir(dir)); + if (!makeWritable) { + float totalDiskUsage = diskChecker.getTotalDiskUsage(ldm.getAllLedgerDirs()); + if (totalDiskUsage < conf.getDiskLowWaterMarkUsageThreshold()) { + makeWritable = true; + } else if (LOG.isDebugEnabled()) { + LOG.debug( + "Current TotalDiskUsage: {} is greater than LWMThreshold: {}." + + " So not adding any filledDir to WritableDirsList", + totalDiskUsage, conf.getDiskLowWaterMarkUsageThreshold()); + } + } + // Update all full-filled disk space usage + for (File dir : fulfilledDirs) { + try { + diskUsages.put(dir, diskChecker.checkDir(dir)); + if (makeWritable) { ldm.addToWritableDirs(dir, true); - } catch (DiskErrorException e) { - // Notify disk failure to all the listeners - for (LedgerDirsListener listener : ldm.getListeners()) { - listener.diskFailed(dir); - } - } catch (DiskWarnThresholdException e) { - diskUsages.put(dir, e.getUsage()); - // the full-filled dir become writable but still - // above - // warn threshold + } + someDiskRecovered = true; + } catch (DiskErrorException e) { + // Notify disk failure to all the listeners + for (LedgerDirsListener listener : ldm.getListeners()) { + listener.diskFailed(dir); + } + } catch (DiskWarnThresholdException e) { + diskUsages.put(dir, e.getUsage()); + // the full-filled dir become writable but still above the warn threshold + if (makeWritable) { ldm.addToWritableDirs(dir, false); - } catch (DiskOutOfSpaceException e) { - // the full-filled dir is still full-filled - diskUsages.put(dir, e.getUsage()); } + someDiskRecovered = true; + } catch (DiskOutOfSpaceException e) { + // the full-filled dir is still full-filled + diskUsages.put(dir, e.getUsage()); } - } else { - LOG.debug( - "Current TotalDiskUsage: {} is greater than LWMThreshold: {}." - + " So not adding any filledDir to WritableDirsList", - totalDiskUsage, conf.getDiskLowWaterMarkUsageThreshold()); } } catch (IOException ioe) { LOG.error("Got IOException while monitoring Dirs", ioe); @@ -165,6 +174,26 @@ private void check() { listener.fatalError(); } } + + if (conf.isReadOnlyModeOnAnyDiskFullEnabled()) { + if (someDiskFulled && !ldm.getFullFilledLedgerDirs().isEmpty()) { + // notify any disk full. + for (LedgerDirsListener listener : ldm.getListeners()) { + listener.anyDiskFull(highPriorityWritesAllowed); + } + } + + if (someDiskRecovered && ldm.getFullFilledLedgerDirs().isEmpty()) { + // notify all disk recovered. + for (LedgerDirsListener listener : ldm.getListeners()) { + listener.allDisksWritable(); + } + } + } + } + + private void check() { + dirsManagers.forEach(this::check); } /** @@ -177,7 +206,7 @@ private void check() { * less space than threshold */ public void init() throws DiskErrorException, NoWritableLedgerDirException { - checkDirs(ldm.getWritableLedgerDirs()); + checkDirs(); } // start the daemon for disk monitoring @@ -187,14 +216,14 @@ public void start() { .setNameFormat("LedgerDirsMonitorThread") .setDaemon(true) .build()); - this.checkTask = this.executor.scheduleAtFixedRate(() -> check(), interval, interval, TimeUnit.MILLISECONDS); + this.checkTask = this.executor.scheduleAtFixedRate(this::check, interval, interval, TimeUnit.MILLISECONDS); } // shutdown disk monitoring daemon public void shutdown() { LOG.info("Shutting down LedgerDirsMonitor"); if (null != checkTask) { - if (checkTask.cancel(true)) { + if (checkTask.cancel(true) && LOG.isDebugEnabled()) { LOG.debug("Failed to cancel check task in LedgerDirsMonitor"); } } @@ -203,9 +232,15 @@ public void shutdown() { } } - public void checkDirs(List writableDirs) + private void checkDirs() throws NoWritableLedgerDirException, DiskErrorException { + for (LedgerDirsManager dirsManager : dirsManagers) { + checkDirs(dirsManager); + } + } + + private void checkDirs(final LedgerDirsManager ldm) throws DiskErrorException, NoWritableLedgerDirException { - for (File dir : writableDirs) { + for (File dir : ldm.getWritableLedgerDirs()) { try { diskChecker.checkDir(dir); } catch (DiskWarnThresholdException e) { @@ -216,5 +251,13 @@ public void checkDirs(List writableDirs) } ldm.getWritableLedgerDirs(); } + + private void validateThreshold(float diskSpaceThreshold, float diskSpaceLwmThreshold) { + if (diskSpaceThreshold <= 0 || diskSpaceThreshold >= 1 || diskSpaceLwmThreshold - diskSpaceThreshold > 1e-6) { + throw new IllegalArgumentException("Disk space threshold: " + + diskSpaceThreshold + " and lwm threshold: " + diskSpaceLwmThreshold + + " are not valid. Should be > 0 and < 1 and diskSpaceThreshold >= diskSpaceLwmThreshold"); + } + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerEntryPage.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerEntryPage.java index a9cef723061..98ea9484155 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerEntryPage.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerEntryPage.java @@ -23,6 +23,8 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.NoSuchElementException; +import java.util.PrimitiveIterator.OfLong; import java.util.concurrent.atomic.AtomicInteger; import org.apache.bookkeeper.proto.BookieProtocol; import org.apache.bookkeeper.util.ZeroBuffer; @@ -33,7 +35,7 @@ * This is a page in the LedgerCache. It holds the locations * (entrylogfile, offset) for entry ids. */ -public class LedgerEntryPage { +public class LedgerEntryPage implements AutoCloseable { private static final Logger LOG = LoggerFactory.getLogger(LedgerEntryPage.class); @@ -47,6 +49,7 @@ public class LedgerEntryPage { private final AtomicInteger version = new AtomicInteger(0); private volatile int last = -1; // Last update position private final LEPStateChangeCallback callback; + private boolean deleted; public static int getIndexEntrySize() { return indexEntrySize; @@ -75,11 +78,20 @@ public void resetPage() { entryKey = new EntryKey(-1, BookieProtocol.INVALID_ENTRY_ID); clean = true; useCount.set(0); + deleted = false; if (null != this.callback) { callback.onResetInUse(this); } } + public void markDeleted() { + deleted = true; + version.incrementAndGet(); + } + + public boolean isDeleted() { + return deleted; + } @Override public String toString() { @@ -207,14 +219,15 @@ public void readPage(FileInfo fi) throws IOException { public ByteBuffer getPageToWrite() { checkPage(); page.clear(); - return page; + // Different callers to this method should be able to reasonably expect independent read pointers + return page.duplicate(); } long getLedger() { return entryKey.getLedgerId(); } - int getVersion() { + public int getVersion() { return version.get(); } @@ -261,4 +274,59 @@ public long getLastEntry() { return index >= 0 ? (index + entryKey.getEntryId()) : 0; } } + + /** + * Interface for getEntries to propagate entry, pos pairs. + */ + public interface EntryVisitor { + boolean visit(long entry, long pos) throws Exception; + } + + /** + * Iterates over non-empty entry mappings. + * + * @param vis Consumer for entry position pairs. + * @throws Exception + */ + public void getEntries(EntryVisitor vis) throws Exception { + // process a page + for (int i = 0; i < entriesPerPage; i++) { + long offset = getOffset(i * 8); + if (offset != 0) { + if (!vis.visit(getFirstEntry() + i, offset)) { + return; + } + } + } + } + + public OfLong getEntriesIterator() { + return new OfLong() { + long firstEntry = getFirstEntry(); + int curDiffEntry = 0; + + @Override + public boolean hasNext() { + while ((curDiffEntry < entriesPerPage) && (getOffset(curDiffEntry * 8) == 0)) { + curDiffEntry++; + } + return (curDiffEntry != entriesPerPage); + } + + @Override + public long nextLong() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + long nextEntry = firstEntry + curDiffEntry; + curDiffEntry++; + return nextEntry; + } + }; + } + + @Override + public void close() throws Exception { + releasePage(); + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerStorage.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerStorage.java index 34e32b9499d..6eca6e00108 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerStorage.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerStorage.java @@ -21,8 +21,17 @@ package org.apache.bookkeeper.bookie; +import com.google.common.util.concurrent.RateLimiter; import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.PrimitiveIterator; import org.apache.bookkeeper.bookie.CheckpointSource.Checkpoint; import org.apache.bookkeeper.common.util.Watcher; import org.apache.bookkeeper.conf.ServerConfiguration; @@ -45,12 +54,14 @@ void initialize(ServerConfiguration conf, LedgerManager ledgerManager, LedgerDirsManager ledgerDirsManager, LedgerDirsManager indexDirsManager, - StateManager stateManager, - CheckpointSource checkpointSource, - Checkpointer checkpointer, - StatsLogger statsLogger) + StatsLogger statsLogger, + ByteBufAllocator allocator) throws IOException; + void setStateManager(StateManager stateManager); + void setCheckpointSource(CheckpointSource checkpointSource); + void setCheckpointer(Checkpointer checkpointer); + /** * Start any background threads belonging to the storage system. For example, garbage collection. */ @@ -66,6 +77,11 @@ void initialize(ServerConfiguration conf, */ boolean ledgerExists(long ledgerId) throws IOException; + /** + * Whether an entry exists. + */ + boolean entryExists(long ledgerId, long entryId) throws IOException, BookieException; + /** * Fenced the ledger id in ledger storage. * @@ -80,7 +96,28 @@ void initialize(ServerConfiguration conf, * @param ledgerId Ledger ID. * @throws IOException */ - boolean isFenced(long ledgerId) throws IOException; + boolean isFenced(long ledgerId) throws IOException, BookieException; + + /** + * Set a ledger to limbo state. + * When a ledger is in limbo state, we cannot answer any requests about it. + * For example, if a client asks for an entry, we cannot say we don't have it because + * it may have been written to us in the past, but we are waiting for data integrity checks + * to copy it over. + */ + void setLimboState(long ledgerId) throws IOException; + + /** + * Check whether a ledger is in limbo state. + * @see #setLimboState(long) + */ + boolean hasLimboState(long ledgerId) throws IOException; + + /** + * Clear the limbo state of a ledger. + * @see #setLimboState(long) + */ + void clearLimboState(long ledgerId) throws IOException; /** * Set the master key for a ledger. @@ -105,7 +142,7 @@ void initialize(ServerConfiguration conf, /** * Read an entry from storage. */ - ByteBuf getEntry(long ledgerId, long entryId) throws IOException; + ByteBuf getEntry(long ledgerId, long entryId) throws IOException, BookieException; /** * Get last add confirmed. @@ -114,7 +151,7 @@ void initialize(ServerConfiguration conf, * @return last add confirmed. * @throws IOException */ - long getLastAddConfirmed(long ledgerId) throws IOException; + long getLastAddConfirmed(long ledgerId) throws IOException, BookieException; /** * Wait for last add confirmed update. @@ -128,6 +165,16 @@ boolean waitForLastAddConfirmedUpdate(long ledgerId, long previousLAC, Watcher watcher) throws IOException; + /** + * Cancel a previous wait for last add confirmed update. + * + * @param ledgerId The ledger being watched. + * @param watcher The watcher to cancel. + * @throws IOException + */ + void cancelWaitForLastAddConfirmedUpdate(long ledgerId, + Watcher watcher) throws IOException; + /** * Flushes all data in the storage. Once this is called, * add data written to the LedgerStorage up until this point @@ -166,13 +213,170 @@ interface LedgerDeletionListener { */ void registerLedgerDeletionListener(LedgerDeletionListener listener); - void setExplicitlac(long ledgerId, ByteBuf lac) throws IOException; + void setExplicitLac(long ledgerId, ByteBuf lac) throws IOException; - ByteBuf getExplicitLac(long ledgerId); + ByteBuf getExplicitLac(long ledgerId) throws IOException, BookieException; // for testability default LedgerStorage getUnderlyingLedgerStorage() { return this; } + /** + * Force trigger Garbage Collection. + */ + default void forceGC() { + return; + } + + /** + * Force trigger Garbage Collection with forceMajor or forceMinor parameter. + */ + default void forceGC(boolean forceMajor, boolean forceMinor) { + return; + } + + default void suspendMinorGC() { + return; + } + + default void suspendMajorGC() { + return; + } + + default void resumeMinorGC() { + return; + } + + default void resumeMajorGC() { + return; + } + + default boolean isMajorGcSuspended() { + return false; + } + + default boolean isMinorGcSuspended() { + return false; + } + + default void entryLocationCompact() { + return; + } + + default void entryLocationCompact(List locations) { + return; + } + + default boolean isEntryLocationCompacting() { + return false; + } + + default Map isEntryLocationCompacting(List locations) { + return Collections.emptyMap(); + } + + default List getEntryLocationDBPath() { + return Collections.emptyList(); + } + + /** + * Class for describing location of a generic inconsistency. Implementations should + * ensure that detail is populated with an exception which adequately describes the + * nature of the problem. + */ + class DetectedInconsistency { + private long ledgerId; + private long entryId; + private Exception detail; + + DetectedInconsistency(long ledgerId, long entryId, Exception detail) { + this.ledgerId = ledgerId; + this.entryId = entryId; + this.detail = detail; + } + + public long getLedgerId() { + return ledgerId; + } + + public long getEntryId() { + return entryId; + } + + public Exception getException() { + return detail; + } + } + + /** + * Performs internal check of local storage logging any inconsistencies. + * @param rateLimiter Provide to rate of entry checking. null for unlimited. + * @return List of inconsistencies detected + * @throws IOException + */ + default List localConsistencyCheck(Optional rateLimiter) throws IOException { + return new ArrayList<>(); + } + + /** + * Whether force triggered Garbage Collection is running or not. + * + * @return + * true -- force triggered Garbage Collection is running, + * false -- force triggered Garbage Collection is not running + */ + default boolean isInForceGC() { + return false; + } + + + /** + * Get Garbage Collection status. + * Since DbLedgerStorage is a list of storage instances, we should return a list. + */ + default List getGarbageCollectionStatus() { + return Collections.emptyList(); + } + + /** + * Returns the primitive long iterator for entries of the ledger, stored in + * this LedgerStorage. The returned iterator provide weakly consistent state + * of the ledger. It is guaranteed that entries of the ledger added to this + * LedgerStorage by the time this method is called will be available but + * modifications made after method invocation may not be available. + * + * @param ledgerId + * - id of the ledger + * @return the list of entries of the ledger available in this + * ledgerstorage. + * @throws Exception + */ + PrimitiveIterator.OfLong getListOfEntriesOfLedger(long ledgerId) throws IOException; + + /** + * Get the storage state flags currently set for the storage instance. + */ + EnumSet getStorageStateFlags() throws IOException; + + /** + * Set a storage state flag for the storage instance. + * Implementations must ensure this method is atomic, and the flag + * is persisted to storage when the method returns. + */ + void setStorageStateFlag(StorageState flags) throws IOException; + + /** + * Clear a storage state flag for the storage instance. + * Implementations must ensure this method is atomic, and the flag + * is persisted to storage when the method returns. + */ + void clearStorageStateFlag(StorageState flags) throws IOException; + + /** + * StorageState flags. + */ + enum StorageState { + NEEDS_INTEGRITY_CHECK + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerStorageFactory.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerStorageFactory.java index 6a9d0c44a1e..147c64b172d 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerStorageFactory.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LedgerStorageFactory.java @@ -21,8 +21,7 @@ package org.apache.bookkeeper.bookie; import java.io.IOException; - -import org.apache.bookkeeper.util.ReflectionUtils; +import org.apache.bookkeeper.common.util.ReflectionUtils; /** * A factory that creates {@link LedgerStorage} by reflection. diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LegacyCookieValidation.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LegacyCookieValidation.java new file mode 100644 index 00000000000..2f15f6b4cfa --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LegacyCookieValidation.java @@ -0,0 +1,282 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.bookie; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Lists; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; +import org.apache.commons.lang3.tuple.Pair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Legacy implementation of CookieValidation. + */ +public class LegacyCookieValidation implements CookieValidation { + private static final Logger log = LoggerFactory.getLogger(LegacyCookieValidation.class); + + private final ServerConfiguration conf; + private final RegistrationManager registrationManager; + + public LegacyCookieValidation(ServerConfiguration conf, + RegistrationManager registrationManager) { + this.conf = conf; + this.registrationManager = registrationManager; + } + + @VisibleForTesting + public static LegacyCookieValidation newLegacyCookieValidation(ServerConfiguration conf, + RegistrationManager registrationManager) { + return new LegacyCookieValidation(conf, registrationManager); + } + + @Override + public void checkCookies(List directories) throws BookieException { + try { + // 1. retrieve the instance id + String instanceId = registrationManager.getClusterInstanceId(); + + // 2. build the master cookie from the configuration + Cookie.Builder builder = Cookie.generateCookie(conf); + if (null != instanceId) { + builder.setInstanceId(instanceId); + } + Cookie masterCookie = builder.build(); + boolean allowExpansion = conf.getAllowStorageExpansion(); + + // 3. read the cookie from registration manager. it is the `source-of-truth` of a given bookie. + // if it doesn't exist in registration manager, this bookie is a new bookie, otherwise it is + // an old bookie. + List possibleBookieIds = possibleBookieIds(conf); + final Versioned rmCookie = readAndVerifyCookieFromRegistrationManager( + masterCookie, registrationManager, possibleBookieIds, allowExpansion); + + // 4. check if the cookie appear in all the directories. + List missedCookieDirs = new ArrayList<>(); + List existingCookies = Lists.newArrayList(); + if (null != rmCookie) { + existingCookies.add(rmCookie.getValue()); + } + + // 4.1 verify the cookies in journal directories + Pair, List> result = + verifyAndGetMissingDirs(masterCookie, + allowExpansion, directories); + missedCookieDirs.addAll(result.getLeft()); + existingCookies.addAll(result.getRight()); + + // 5. if there are directories missing cookies, + // this is either a: + // - new environment + // - a directory is being added + // - a directory has been corrupted/wiped, which is an error + if (!missedCookieDirs.isEmpty()) { + if (rmCookie == null) { + // 5.1 new environment: all directories should be empty + verifyDirsForNewEnvironment(missedCookieDirs); + stampNewCookie(conf, masterCookie, registrationManager, + Version.NEW, directories); + } else if (allowExpansion) { + // 5.2 storage is expanding + Set knownDirs = getKnownDirs(existingCookies); + verifyDirsForStorageExpansion(missedCookieDirs, knownDirs); + stampNewCookie(conf, masterCookie, registrationManager, + rmCookie.getVersion(), directories); + } else { + // 5.3 Cookie-less directories and + // we can't do anything with them + log.error("There are directories without a cookie," + + " and this is neither a new environment," + + " nor is storage expansion enabled. " + + "Empty directories are {}", missedCookieDirs); + throw new BookieException.InvalidCookieException(); + } + } else { + if (rmCookie == null) { + // No corresponding cookie found in registration manager. The bookie should fail to come up. + log.error("Cookie for this bookie is not stored in metadata store. Bookie failing to come up"); + throw new BookieException.InvalidCookieException(); + } + } + } catch (IOException ioe) { + log.error("Error accessing cookie on disks", ioe); + throw new BookieException.InvalidCookieException(ioe); + } + } + + private static List possibleBookieIds(ServerConfiguration conf) + throws BookieException { + // we need to loop through all possible bookie identifiers to ensure it is treated as a new environment + // just because of bad configuration + List addresses = Lists.newArrayListWithExpectedSize(3); + // we are checking all possibilities here, so we don't need to fail if we can only get + // loopback address. it will fail anyway when the bookie attempts to listen on loopback address. + try { + if (null != conf.getBookieId()) { + // If BookieID is configured, it takes precedence over default network information used as id. + addresses.add(BookieImpl.getBookieId(conf)); + } else { + // ip address + addresses.add(BookieImpl.getBookieAddress( + new ServerConfiguration(conf) + .setUseHostNameAsBookieID(false) + .setAdvertisedAddress(null) + .setAllowLoopback(true) + ).toBookieId()); + // host name + addresses.add(BookieImpl.getBookieAddress( + new ServerConfiguration(conf) + .setUseHostNameAsBookieID(true) + .setAdvertisedAddress(null) + .setAllowLoopback(true) + ).toBookieId()); + // advertised address + if (null != conf.getAdvertisedAddress()) { + addresses.add(BookieImpl.getBookieAddress(conf).toBookieId()); + } + } + } catch (UnknownHostException e) { + throw new BookieException.UnknownBookieIdException(e); + } + return addresses; + } + + private static Versioned readAndVerifyCookieFromRegistrationManager( + Cookie masterCookie, RegistrationManager rm, + List addresses, boolean allowExpansion) + throws BookieException { + Versioned rmCookie = null; + for (BookieId address : addresses) { + try { + rmCookie = Cookie.readFromRegistrationManager(rm, address); + // If allowStorageExpansion option is set, we should + // make sure that the new set of ledger/index dirs + // is a super set of the old; else, we fail the cookie check + if (allowExpansion) { + masterCookie.verifyIsSuperSet(rmCookie.getValue()); + } else { + masterCookie.verify(rmCookie.getValue()); + } + } catch (BookieException.CookieNotFoundException e) { + continue; + } + } + return rmCookie; + } + + private static Pair, List> verifyAndGetMissingDirs( + Cookie masterCookie, boolean allowExpansion, List dirs) + throws BookieException.InvalidCookieException, IOException { + List missingDirs = Lists.newArrayList(); + List existedCookies = Lists.newArrayList(); + for (File dir : dirs) { + try { + Cookie c = Cookie.readFromDirectory(dir); + if (allowExpansion) { + masterCookie.verifyIsSuperSet(c); + } else { + masterCookie.verify(c); + } + existedCookies.add(c); + } catch (FileNotFoundException fnf) { + missingDirs.add(dir); + } + } + return Pair.of(missingDirs, existedCookies); + } + + private static void verifyDirsForNewEnvironment(List missedCookieDirs) + throws BookieException.InvalidCookieException { + List nonEmptyDirs = new ArrayList<>(); + for (File dir : missedCookieDirs) { + String[] content = dir.list(); + if (content != null && content.length != 0) { + nonEmptyDirs.add(dir); + } + } + if (!nonEmptyDirs.isEmpty()) { + log.error("Not all the new directories are empty. New directories that are not empty are: " + nonEmptyDirs); + throw new BookieException.InvalidCookieException(); + } + } + + private static void stampNewCookie(ServerConfiguration conf, + Cookie masterCookie, + RegistrationManager rm, + Version version, + List dirs) + throws BookieException, IOException { + // backfill all the directories that miss cookies (for storage expansion) + log.info("Stamping new cookies on all dirs {}", dirs); + for (File dir : dirs) { + masterCookie.writeToDirectory(dir); + } + masterCookie.writeToRegistrationManager(rm, conf, version); + } + + private static Set getKnownDirs(List cookies) { + return cookies.stream() + .flatMap((c) -> { + List dirs = new ArrayList<>(Arrays.asList(c.getLedgerDirPathsFromCookie())); + if (null != c.getIndexDirPathsFromCookie()) { + dirs.addAll(Arrays.asList(c.getIndexDirPathsFromCookie())); + } + return Arrays.stream(dirs.toArray(new String[]{})); + }).map((s) -> new File(s)).collect(Collectors.toSet()); + } + + private static void verifyDirsForStorageExpansion( + List missedCookieDirs, + Set existingLedgerDirs) throws BookieException.InvalidCookieException { + + List dirsMissingData = new ArrayList(); + List nonEmptyDirs = new ArrayList(); + for (File dir : missedCookieDirs) { + if (existingLedgerDirs.contains(dir.getParentFile())) { + // if one of the existing ledger dirs doesn't have cookie, + // let us not proceed further + dirsMissingData.add(dir); + continue; + } + String[] content = dir.list(); + if (content != null && content.length != 0) { + nonEmptyDirs.add(dir); + } + } + if (dirsMissingData.size() > 0 || nonEmptyDirs.size() > 0) { + log.error("Either not all local directories have cookies or directories being added " + + " newly are not empty. " + + "Directories missing cookie file are: " + dirsMissingData + + " New directories that are not empty are: " + nonEmptyDirs); + throw new BookieException.InvalidCookieException(); + } + } +} \ No newline at end of file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LocalBookieEnsemblePlacementPolicy.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LocalBookieEnsemblePlacementPolicy.java index 46978ea5c65..7c949cb3d4b 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LocalBookieEnsemblePlacementPolicy.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/LocalBookieEnsemblePlacementPolicy.java @@ -33,8 +33,9 @@ import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.feature.FeatureProvider; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.DNSToSwitchMapping; +import org.apache.bookkeeper.proto.BookieAddressResolver; import org.apache.bookkeeper.stats.StatsLogger; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -48,19 +49,20 @@ public class LocalBookieEnsemblePlacementPolicy implements EnsemblePlacementPoli static final Logger LOG = LoggerFactory.getLogger(LocalBookieEnsemblePlacementPolicy.class); - private BookieSocketAddress bookieAddress; + private BookieId bookieAddress; @Override public EnsemblePlacementPolicy initialize(ClientConfiguration conf, Optional optionalDnsResolver, HashedWheelTimer hashedWheelTimer, - FeatureProvider featureProvider, StatsLogger statsLogger) { + FeatureProvider featureProvider, + StatsLogger statsLogger, BookieAddressResolver bookieAddressResolver) { // Configuration will have already the bookie configuration inserted ServerConfiguration serverConf = new ServerConfiguration(); serverConf.addConfiguration(conf); try { - bookieAddress = Bookie.getBookieAddress(serverConf); + bookieAddress = BookieImpl.getBookieId(serverConf); } catch (UnknownHostException e) { LOG.warn("Unable to get bookie address", e); throw new RuntimeException(e); @@ -74,53 +76,59 @@ public void uninitalize() { } @Override - public Set onClusterChanged(Set writableBookies, - Set readOnlyBookies) { + public Set onClusterChanged(Set writableBookies, + Set readOnlyBookies) { return Collections.emptySet(); } @Override - public BookieSocketAddress replaceBookie(int ensembleSize, int writeQuorumSize, int ackQuorumSize, - java.util.Map customMetadata, Set currentEnsemble, - BookieSocketAddress bookieToReplace, Set excludeBookies) + public PlacementResult replaceBookie(int ensembleSize, int writeQuorumSize, int ackQuorumSize, + java.util.Map customMetadata, List currentEnsemble, + BookieId bookieToReplace, Set excludeBookies) throws BKNotEnoughBookiesException { throw new BKNotEnoughBookiesException(); } @Override - public void registerSlowBookie(BookieSocketAddress bookieSocketAddress, long entryId) { + public void registerSlowBookie(BookieId bookieSocketAddress, long entryId) { return; } @Override public DistributionSchedule.WriteSet reorderReadSequence( - List ensemble, + List ensemble, BookiesHealthInfo bookiesHealthInfo, DistributionSchedule.WriteSet writeSet) { - return null; + return writeSet; } @Override public DistributionSchedule.WriteSet reorderReadLACSequence( - List ensemble, + List ensemble, BookiesHealthInfo bookiesHealthInfo, DistributionSchedule.WriteSet writeSet) { - return null; + return writeSet; } @Override - public List newEnsemble(int ensembleSize, int writeQuorumSize, int ackQuorumSize, - java.util.Map customMetadata, Set excludeBookies) + public PlacementResult> newEnsemble(int ensembleSize, int writeQuorumSize, + int ackQuorumSize, java.util.Map customMetadata, Set excludeBookies) throws BKNotEnoughBookiesException { if (ensembleSize > 1) { throw new IllegalArgumentException("Local ensemble policy can only return 1 bookie"); } - return Lists.newArrayList(bookieAddress); + return PlacementResult.of(Lists.newArrayList(bookieAddress), PlacementPolicyAdherence.MEETS_STRICT); } @Override - public void updateBookieInfo(Map bookieToFreeSpaceMap) { + public void updateBookieInfo(Map bookieToFreeSpaceMap) { return; } + + @Override + public PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy(List ensembleList, + int writeQuorumSize, int ackQuorumSize) { + return PlacementPolicyAdherence.MEETS_STRICT; + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ReadOnlyBookie.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ReadOnlyBookie.java index 5125c077fe2..eb9f2ec7d8c 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ReadOnlyBookie.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ReadOnlyBookie.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,9 +21,14 @@ package org.apache.bookkeeper.bookie; +import io.netty.buffer.ByteBufAllocator; import java.io.IOException; +import java.util.function.Supplier; import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.discover.BookieServiceInfo; +import org.apache.bookkeeper.discover.RegistrationManager; import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.util.DiskChecker; import org.apache.zookeeper.KeeperException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -34,13 +39,21 @@ * ReadOnlyBookie is force started as readonly, and will not change to writable. *

*/ -public class ReadOnlyBookie extends Bookie { +public class ReadOnlyBookie extends BookieImpl { private static final Logger LOG = LoggerFactory.getLogger(ReadOnlyBookie.class); - public ReadOnlyBookie(ServerConfiguration conf, StatsLogger statsLogger) + public ReadOnlyBookie(ServerConfiguration conf, + RegistrationManager registrationManager, + LedgerStorage storage, + DiskChecker diskChecker, + LedgerDirsManager ledgerDirsManager, + LedgerDirsManager indexDirsManager, + StatsLogger statsLogger, + ByteBufAllocator allocator, Supplier bookieServiceInfoProvider) throws IOException, KeeperException, InterruptedException, BookieException { - super(conf, statsLogger); + super(conf, registrationManager, storage, diskChecker, + ledgerDirsManager, indexDirsManager, statsLogger, allocator, bookieServiceInfoProvider); if (conf.isReadOnlyModeEnabled()) { stateManager.forceToReadOnly(); } else { @@ -50,22 +63,4 @@ public ReadOnlyBookie(ServerConfiguration conf, StatsLogger statsLogger) } LOG.info("Running bookie in force readonly mode."); } - - @Override - StateManager initializeStateManager() throws IOException { - return new BookieStateManager(conf, statsLogger, metadataDriver, getLedgerDirsManager()) { - - @Override - public void doTransitionToWritableMode() { - // no-op - LOG.info("Skip transition to writable mode for readonly bookie"); - } - - @Override - public void doTransitionToReadOnlyMode() { - // no-op - LOG.info("Skip transition to readonly mode for readonly bookie"); - } - }; - } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ReadOnlyDefaultEntryLogger.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ReadOnlyDefaultEntryLogger.java new file mode 100644 index 00000000000..514b0f578ba --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ReadOnlyDefaultEntryLogger.java @@ -0,0 +1,47 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.bookkeeper.bookie; + +import java.io.IOException; +import java.nio.ByteBuffer; +import org.apache.bookkeeper.conf.ServerConfiguration; + +/** + * Read Only Entry Logger. + */ +public class ReadOnlyDefaultEntryLogger extends DefaultEntryLogger { + + public ReadOnlyDefaultEntryLogger(ServerConfiguration conf) throws IOException { + super(conf); + } + + @Override + public boolean removeEntryLog(long entryLogId) { + // can't remove entry log in readonly mode + return false; + } + + @Override + public synchronized long addEntry(long ledgerId, ByteBuffer entry) throws IOException { + throw new IOException("Can't add entry to a readonly entry logger."); + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ReadOnlyEntryLogger.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ReadOnlyEntryLogger.java deleted file mode 100644 index 3a07ec4e805..00000000000 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ReadOnlyEntryLogger.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * - */ - -package org.apache.bookkeeper.bookie; - -import java.io.IOException; -import java.nio.ByteBuffer; - -import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.util.DiskChecker; - -/** - * Read Only Entry Logger. - */ -public class ReadOnlyEntryLogger extends EntryLogger { - - public ReadOnlyEntryLogger(ServerConfiguration conf) throws IOException { - super(conf, new LedgerDirsManager(conf, conf.getLedgerDirs(), - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()))); - } - - @Override - protected boolean removeEntryLog(long entryLogId) { - // can't remove entry log in readonly mode - return false; - } - - @Override - public synchronized long addEntry(long ledgerId, ByteBuffer entry) throws IOException { - throw new IOException("Can't add entry to a readonly entry logger."); - } -} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java index 137c07cd55e..be73dab3066 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,11 +21,11 @@ package org.apache.bookkeeper.bookie; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.ACTIVE_LEDGER_COUNT; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.DELETED_LEDGER_COUNT; +import static org.apache.bookkeeper.common.concurrent.FutureUtils.result; import com.google.common.collect.Sets; import java.io.IOException; +import java.net.URI; import java.util.List; import java.util.NavigableSet; import java.util.Set; @@ -33,26 +33,25 @@ import java.util.TreeSet; import java.util.concurrent.CountDownLatch; import java.util.concurrent.Semaphore; -import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; +import lombok.Cleanup; import org.apache.bookkeeper.client.BKException; -import org.apache.bookkeeper.client.LedgerMetadata; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.meta.LedgerManager; import org.apache.bookkeeper.meta.LedgerManager.LedgerRange; import org.apache.bookkeeper.meta.LedgerManager.LedgerRangeIterator; -import org.apache.bookkeeper.meta.ZkLedgerUnderreplicationManager; -import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; -import org.apache.bookkeeper.net.BookieSocketAddress; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; -import org.apache.bookkeeper.stats.Counter; -import org.apache.bookkeeper.stats.Gauge; +import org.apache.bookkeeper.meta.LedgerManagerFactory; +import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; +import org.apache.bookkeeper.meta.MetadataBookieDriver; +import org.apache.bookkeeper.meta.MetadataDrivers; +import org.apache.bookkeeper.meta.exceptions.MetadataException; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.stats.StatsLogger; -import org.apache.bookkeeper.util.MathUtils; -import org.apache.bookkeeper.util.ZkUtils; -import org.apache.bookkeeper.zookeeper.ZooKeeperClient; -import org.apache.zookeeper.KeeperException; -import org.apache.zookeeper.ZooKeeper; -import org.apache.zookeeper.data.ACL; +import org.apache.bookkeeper.versioning.Versioned; +import org.apache.commons.configuration.ConfigurationException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -69,60 +68,47 @@ * globalActiveLedgers, do garbage collection on them. * *

- * - *

TODO: eliminate the direct usage of zookeeper here {@link https://github.com/apache/bookkeeper/issues/1331} */ -public class ScanAndCompareGarbageCollector implements GarbageCollector{ +public class ScanAndCompareGarbageCollector implements GarbageCollector { static final Logger LOG = LoggerFactory.getLogger(ScanAndCompareGarbageCollector.class); - static final int MAX_CONCURRENT_ZK_REQUESTS = 1000; private final LedgerManager ledgerManager; private final CompactableLedgerStorage ledgerStorage; private final ServerConfiguration conf; - private final BookieSocketAddress selfBookieAddress; - private ZooKeeper zk = null; + private final BookieId selfBookieAddress; private boolean enableGcOverReplicatedLedger; private final long gcOverReplicatedLedgerIntervalMillis; private long lastOverReplicatedLedgerGcTimeMillis; - private final String zkServers; - private final String zkLedgersRootPath; private final boolean verifyMetadataOnGc; private int activeLedgerCounter; - private Counter deletedLedgerCounter; + private StatsLogger statsLogger; + private final int maxConcurrentRequests; public ScanAndCompareGarbageCollector(LedgerManager ledgerManager, CompactableLedgerStorage ledgerStorage, ServerConfiguration conf, StatsLogger statsLogger) throws IOException { this.ledgerManager = ledgerManager; this.ledgerStorage = ledgerStorage; this.conf = conf; - this.selfBookieAddress = Bookie.getBookieAddress(conf); + this.statsLogger = statsLogger; + this.selfBookieAddress = BookieImpl.getBookieId(conf); + this.gcOverReplicatedLedgerIntervalMillis = conf.getGcOverreplicatedLedgerWaitTimeMillis(); - this.lastOverReplicatedLedgerGcTimeMillis = MathUtils.now(); + this.lastOverReplicatedLedgerGcTimeMillis = System.currentTimeMillis(); if (gcOverReplicatedLedgerIntervalMillis > 0) { this.enableGcOverReplicatedLedger = true; } - this.zkServers = ZKMetadataDriverBase.resolveZkServers(conf); - this.zkLedgersRootPath = ZKMetadataDriverBase.resolveZkLedgersRootPath(conf); - LOG.info("Over Replicated Ledger Deletion : enabled=" + enableGcOverReplicatedLedger + ", interval=" - + gcOverReplicatedLedgerIntervalMillis); + this.maxConcurrentRequests = conf.getGcOverreplicatedLedgerMaxConcurrentRequests(); + LOG.info("Over Replicated Ledger Deletion : enabled={}, interval={}, maxConcurrentRequests={}", + enableGcOverReplicatedLedger, gcOverReplicatedLedgerIntervalMillis, maxConcurrentRequests); verifyMetadataOnGc = conf.getVerifyMetadataOnGC(); - this.deletedLedgerCounter = statsLogger.getCounter(DELETED_LEDGER_COUNT); - this.activeLedgerCounter = 0; - statsLogger.registerGauge(ACTIVE_LEDGER_COUNT, new Gauge() { - @Override - public Integer getDefaultValue() { - return 0; - } + } - @Override - public Integer getSample() { - return activeLedgerCounter; - } - }); + public int getNumActiveLedgers() { + return activeLedgerCounter; } @Override @@ -139,12 +125,12 @@ public void gc(GarbageCleaner garbageCleaner) { Long.MAX_VALUE)); this.activeLedgerCounter = bkActiveLedgers.size(); - long curTime = MathUtils.now(); + long curTime = System.currentTimeMillis(); boolean checkOverreplicatedLedgers = (enableGcOverReplicatedLedger && curTime - lastOverReplicatedLedgerGcTimeMillis > gcOverReplicatedLedgerIntervalMillis); if (checkOverreplicatedLedgers) { - zk = ZooKeeperClient.newBuilder().connectString(zkServers) - .sessionTimeoutMs(conf.getZkTimeout()).build(); + LOG.info("Start removing over-replicated ledgers. activeLedgerCounter={}", activeLedgerCounter); + // remove all the overreplicated ledgers from the local bookie Set overReplicatedLedgers = removeOverReplicatedledgers(bkActiveLedgers, garbageCleaner); if (overReplicatedLedgers.isEmpty()) { @@ -152,15 +138,19 @@ public void gc(GarbageCleaner garbageCleaner) { } else { LOG.info("Removed over-replicated ledgers: {}", overReplicatedLedgers); } - lastOverReplicatedLedgerGcTimeMillis = MathUtils.now(); + lastOverReplicatedLedgerGcTimeMillis = System.currentTimeMillis(); } // Iterate over all the ledger on the metadata store - LedgerRangeIterator ledgerRangeIterator = ledgerManager.getLedgerRanges(); + long zkOpTimeoutMs = this.conf.getZkTimeout() * 2; + LedgerRangeIterator ledgerRangeIterator = ledgerManager + .getLedgerRanges(zkOpTimeoutMs); Set ledgersInMetadata = null; long start; long end = -1; boolean done = false; + AtomicBoolean isBookieInEnsembles = new AtomicBoolean(false); + Versioned metadata = null; while (!done) { start = end + 1; if (ledgerRangeIterator.hasNext()) { @@ -181,99 +171,122 @@ public void gc(GarbageCleaner garbageCleaner) { for (Long bkLid : subBkActiveLedgers) { if (!ledgersInMetadata.contains(bkLid)) { if (verifyMetadataOnGc) { - CountDownLatch latch = new CountDownLatch(1); - final AtomicInteger metaRC = new AtomicInteger(0); - ledgerManager.readLedgerMetadata(bkLid, (int rc, LedgerMetadata x) -> { - metaRC.set(rc); - latch.countDown(); - }); - latch.await(); - if (metaRC.get() != BKException.Code.NoSuchLedgerExistsException) { - LOG.warn( - "Ledger {} Missing in metadata list, but ledgerManager returned rc: {}.", - bkLid, - metaRC.get()); + isBookieInEnsembles.set(false); + metadata = null; + int rc = BKException.Code.OK; + try { + metadata = result(ledgerManager.readLedgerMetadata(bkLid), zkOpTimeoutMs, + TimeUnit.MILLISECONDS); + } catch (BKException | TimeoutException e) { + if (e instanceof BKException) { + rc = ((BKException) e).getCode(); + } else { + LOG.warn("Time-out while fetching metadata for Ledger {} : {}.", bkLid, + e.getMessage()); + + continue; + } + } + // check bookie should be part of ensembles in one + // of the segment else ledger should be deleted from + // local storage + if (metadata != null && metadata.getValue() != null) { + metadata.getValue().getAllEnsembles().forEach((entryId, ensembles) -> { + if (ensembles != null && ensembles.contains(selfBookieAddress)) { + isBookieInEnsembles.set(true); + } + }); + if (isBookieInEnsembles.get()) { + continue; + } + } else if (rc != BKException.Code.NoSuchLedgerExistsOnMetadataServerException) { + LOG.warn("Ledger {} Missing in metadata list, but ledgerManager returned rc: {}.", + bkLid, rc); continue; } } - deletedLedgerCounter.inc(); garbageCleaner.clean(bkLid); } } } } catch (Throwable t) { // ignore exception, collecting garbage next time - LOG.warn("Exception when iterating over the metadata {}", t); - } finally { - if (zk != null) { - try { - zk.close(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - LOG.error("Error closing zk session", e); - } - zk = null; - } + LOG.warn("Exception when iterating over the metadata", t); } } private Set removeOverReplicatedledgers(Set bkActiveledgers, final GarbageCleaner garbageCleaner) - throws InterruptedException, KeeperException { - final List zkAcls = ZkUtils.getACLs(conf); + throws Exception { final Set overReplicatedLedgers = Sets.newHashSet(); - final Semaphore semaphore = new Semaphore(MAX_CONCURRENT_ZK_REQUESTS); + final Semaphore semaphore = new Semaphore(this.maxConcurrentRequests); final CountDownLatch latch = new CountDownLatch(bkActiveledgers.size()); + // instantiate zookeeper client to initialize ledger manager + + @Cleanup + MetadataBookieDriver metadataDriver = instantiateMetadataDriver(conf, statsLogger); + + @Cleanup + LedgerManagerFactory lmf = metadataDriver.getLedgerManagerFactory(); + + @Cleanup + LedgerUnderreplicationManager lum = lmf.newLedgerUnderreplicationManager(); + for (final Long ledgerId : bkActiveledgers) { + try { + // check ledger ensembles before creating lock nodes. + // this is to reduce the number of lock node creations and deletions in ZK. + // the ensemble check is done again after the lock node is created. + Versioned preCheckMetadata = ledgerManager.readLedgerMetadata(ledgerId).get(); + if (!isNotBookieIncludedInLedgerEnsembles(preCheckMetadata)) { + latch.countDown(); + continue; + } + } catch (Throwable t) { + if (!(t.getCause() instanceof BKException.BKNoSuchLedgerExistsOnMetadataServerException)) { + LOG.warn("Failed to get metadata for ledger {}. {}: {}", + ledgerId, t.getClass().getName(), t.getMessage()); + } + latch.countDown(); + continue; + } + try { // check if the ledger is being replicated already by the replication worker - if (ZkLedgerUnderreplicationManager.isLedgerBeingReplicated(zk, zkLedgersRootPath, ledgerId)) { + if (lum.isLedgerBeingReplicated(ledgerId)) { latch.countDown(); continue; } // we try to acquire the underreplicated ledger lock to not let the bookie replicate the ledger that is // already being checked for deletion, since that might change the ledger ensemble to include the // current bookie again and, in that case, we cannot remove the ledger from local storage - ZkLedgerUnderreplicationManager.acquireUnderreplicatedLedgerLock(zk, zkLedgersRootPath, ledgerId, - zkAcls); + lum.acquireUnderreplicatedLedger(ledgerId); semaphore.acquire(); - ledgerManager.readLedgerMetadata(ledgerId, new GenericCallback() { - - @Override - public void operationComplete(int rc, LedgerMetadata ledgerMetadata) { - if (rc == BKException.Code.OK) { - // do not delete a ledger that is not closed, since the ensemble might change again and - // include the current bookie while we are deleting it - if (!ledgerMetadata.isClosed()) { - release(); - return; - } - SortedMap> ensembles = - ledgerMetadata.getEnsembles(); - for (List ensemble : ensembles.values()) { - // check if this bookie is supposed to have this ledger - if (ensemble.contains(selfBookieAddress)) { - release(); - return; + ledgerManager.readLedgerMetadata(ledgerId) + .whenComplete((metadata, exception) -> { + try { + if (exception == null) { + if (isNotBookieIncludedInLedgerEnsembles(metadata)) { + // this bookie is not supposed to have this ledger, + // thus we can delete this ledger now + overReplicatedLedgers.add(ledgerId); + garbageCleaner.clean(ledgerId); + } + } else if (!(exception + instanceof BKException.BKNoSuchLedgerExistsOnMetadataServerException)) { + LOG.warn("Failed to get metadata for ledger {}. {}: {}", + ledgerId, exception.getClass().getName(), exception.getMessage()); + } + } finally { + semaphore.release(); + latch.countDown(); + try { + lum.releaseUnderreplicatedLedger(ledgerId); + } catch (Throwable t) { + LOG.error("Exception when removing underreplicated lock for ledger {}", + ledgerId, t); } } - // this bookie is not supposed to have this ledger, thus we can delete this ledger now - overReplicatedLedgers.add(ledgerId); - garbageCleaner.clean(ledgerId); - } - release(); - } - - private void release() { - semaphore.release(); - latch.countDown(); - try { - ZkLedgerUnderreplicationManager.releaseUnderreplicatedLedgerLock(zk, zkLedgersRootPath, - ledgerId); - } catch (Throwable t) { - LOG.error("Exception when removing underreplicated lock for ledger {}", ledgerId, t); - } - } - }); + }); } catch (Throwable t) { LOG.error("Exception when iterating through the ledgers to check for over-replication", t); latch.countDown(); @@ -283,4 +296,39 @@ private void release() { bkActiveledgers.removeAll(overReplicatedLedgers); return overReplicatedLedgers; } + + private static MetadataBookieDriver instantiateMetadataDriver(ServerConfiguration conf, StatsLogger statsLogger) + throws BookieException { + try { + String metadataServiceUriStr = conf.getMetadataServiceUri(); + MetadataBookieDriver driver = MetadataDrivers.getBookieDriver(URI.create(metadataServiceUriStr)); + driver.initialize( + conf, + statsLogger); + return driver; + } catch (MetadataException me) { + throw new BookieException.MetadataStoreException("Failed to initialize metadata bookie driver", me); + } catch (ConfigurationException e) { + throw new BookieException.BookieIllegalOpException(e); + } + } + + private boolean isNotBookieIncludedInLedgerEnsembles(Versioned metadata) { + // do not delete a ledger that is not closed, since the ensemble might + // change again and include the current bookie while we are deleting it + if (!metadata.getValue().isClosed()) { + return false; + } + + SortedMap> ensembles = + metadata.getValue().getAllEnsembles(); + for (List ensemble : ensembles.values()) { + // check if this bookie is supposed to have this ledger + if (ensemble.contains(selfBookieAddress)) { + return false; + } + } + + return true; + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScrubberStats.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScrubberStats.java new file mode 100644 index 00000000000..8d74636c9bf --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScrubberStats.java @@ -0,0 +1,33 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.bookkeeper.bookie; + +/** + * Stats associated with the consistency checker. + */ +public class ScrubberStats { + public static final String SCOPE = "scrubber"; + + public static final String RUN_DURATION = "runTime"; + public static final String DETECTED_SCRUB_ERRORS = "detectedScrubErrors"; + public static final String DETECTED_FATAL_SCRUB_ERRORS = "detectedFatalScrubErrors"; +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/SkipListArena.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/SkipListArena.java index 7eafb41687e..d9756f2671e 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/SkipListArena.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/SkipListArena.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -22,7 +22,6 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; - import org.apache.bookkeeper.conf.ServerConfiguration; /** diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/SkipListFlusher.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/SkipListFlusher.java index abfb264aacd..8a48e9ab925 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/SkipListFlusher.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/SkipListFlusher.java @@ -22,7 +22,6 @@ package org.apache.bookkeeper.bookie; import io.netty.buffer.ByteBuf; - import java.io.IOException; /** diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/SlowBufferedChannel.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/SlowBufferedChannel.java index 9fdc34ca7d6..13bc74e0c19 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/SlowBufferedChannel.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/SlowBufferedChannel.java @@ -22,7 +22,7 @@ */ import io.netty.buffer.ByteBuf; - +import io.netty.buffer.ByteBufAllocator; import java.io.IOException; import java.nio.channels.FileChannel; import java.util.concurrent.TimeUnit; @@ -36,12 +36,13 @@ public class SlowBufferedChannel extends BufferedChannel { public volatile long addDelay = 0; public volatile long flushDelay = 0; - public SlowBufferedChannel(FileChannel fc, int capacity) throws IOException { - super(fc, capacity); + public SlowBufferedChannel(ByteBufAllocator allocator, FileChannel fc, int capacity) throws IOException { + super(allocator, fc, capacity); } - public SlowBufferedChannel(FileChannel fc, int writeCapacity, int readCapacity) throws IOException { - super(fc, writeCapacity, readCapacity); + public SlowBufferedChannel(ByteBufAllocator allocator, FileChannel fc, int writeCapacity, int readCapacity) + throws IOException { + super(allocator, fc, writeCapacity, readCapacity); } public void setAddDelay(long delay) { diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/SortedLedgerStorage.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/SortedLedgerStorage.java index 5c4f75a22e2..689668eb938 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/SortedLedgerStorage.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/SortedLedgerStorage.java @@ -21,9 +21,15 @@ package org.apache.bookkeeper.bookie; import com.google.common.annotations.VisibleForTesting; +import com.google.common.util.concurrent.RateLimiter; import com.google.common.util.concurrent.ThreadFactoryBuilder; import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; import java.io.IOException; +import java.util.EnumSet; +import java.util.List; +import java.util.Optional; +import java.util.PrimitiveIterator; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; @@ -33,6 +39,7 @@ import org.apache.bookkeeper.meta.LedgerManager; import org.apache.bookkeeper.proto.BookieProtocol; import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.util.IteratorUtility; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,12 +51,14 @@ */ public class SortedLedgerStorage implements LedgerStorage, CacheCallback, SkipListFlusher, - CompactableLedgerStorage, EntryLogger.EntryLogListener { + CompactableLedgerStorage, DefaultEntryLogger.EntryLogListener { private static final Logger LOG = LoggerFactory.getLogger(SortedLedgerStorage.class); EntryMemTable memTable; private ScheduledExecutorService scheduler; private StateManager stateManager; + private ServerConfiguration conf; + private StatsLogger statsLogger; private final InterleavedLedgerStorage interleavedLedgerStorage; public SortedLedgerStorage() { @@ -66,32 +75,52 @@ public void initialize(ServerConfiguration conf, LedgerManager ledgerManager, LedgerDirsManager ledgerDirsManager, LedgerDirsManager indexDirsManager, - StateManager stateManager, - CheckpointSource checkpointSource, - Checkpointer checkpointer, - StatsLogger statsLogger) + StatsLogger statsLogger, + ByteBufAllocator allocator) throws IOException { + this.conf = conf; + this.statsLogger = statsLogger; - interleavedLedgerStorage.initialize( + interleavedLedgerStorage.initializeWithEntryLogListener( conf, ledgerManager, ledgerDirsManager, indexDirsManager, - stateManager, - checkpointSource, - checkpointer, - statsLogger); + // uses sorted ledger storage's own entry log listener + // since it manages entry log rotations and checkpoints. + this, + statsLogger, + allocator); + + this.scheduler = newScheduledExecutorService(); + } + + @VisibleForTesting + static ScheduledExecutorService newScheduledExecutorService() { + return Executors.newSingleThreadScheduledExecutor( + new ThreadFactoryBuilder() + .setNameFormat("SortedLedgerStorage-%d") + .setPriority((Thread.NORM_PRIORITY + Thread.MAX_PRIORITY) / 2).build()); + } + + @Override + public void setStateManager(StateManager stateManager) { + interleavedLedgerStorage.setStateManager(stateManager); + this.stateManager = stateManager; + } + @Override + public void setCheckpointSource(CheckpointSource checkpointSource) { + interleavedLedgerStorage.setCheckpointSource(checkpointSource); if (conf.isEntryLogPerLedgerEnabled()) { this.memTable = new EntryMemTableWithParallelFlusher(conf, checkpointSource, statsLogger); } else { this.memTable = new EntryMemTable(conf, checkpointSource, statsLogger); } - this.scheduler = Executors.newSingleThreadScheduledExecutor( - new ThreadFactoryBuilder() - .setNameFormat("SortedLedgerStorage-%d") - .setPriority((Thread.NORM_PRIORITY + Thread.MAX_PRIORITY) / 2).build()); - this.stateManager = stateManager; + } + @Override + public void setCheckpointer(Checkpointer checkpointer) { + interleavedLedgerStorage.setCheckpointer(checkpointer); } @VisibleForTesting @@ -137,6 +166,12 @@ public boolean ledgerExists(long ledgerId) throws IOException { return true; } + @Override + public boolean entryExists(long ledgerId, long entryId) throws IOException { + // can probably be implemented as above, but I'm not going to test it + throw new UnsupportedOperationException("Not supported for SortedLedgerStorage"); + } + @Override public boolean setFenced(long ledgerId) throws IOException { return interleavedLedgerStorage.setFenced(ledgerId); @@ -183,7 +218,7 @@ private ByteBuf getLastEntryId(long ledgerId) throws IOException { } @Override - public ByteBuf getEntry(long ledgerId, long entryId) throws IOException { + public ByteBuf getEntry(long ledgerId, long entryId) throws IOException, BookieException { if (entryId == BookieProtocol.LAST_ADD_CONFIRMED) { return getLastEntryId(ledgerId); } @@ -217,6 +252,13 @@ public boolean waitForLastAddConfirmedUpdate(long ledgerId, return interleavedLedgerStorage.waitForLastAddConfirmedUpdate(ledgerId, previousLAC, watcher); } + @Override + public void cancelWaitForLastAddConfirmedUpdate(long ledgerId, + Watcher watcher) + throws IOException { + interleavedLedgerStorage.cancelWaitForLastAddConfirmedUpdate(ledgerId, watcher); + } + @Override public void checkpoint(final Checkpoint checkpoint) throws IOException { long numBytesFlushed = memTable.flush(this, checkpoint); @@ -235,8 +277,8 @@ public void registerLedgerDeletionListener(LedgerDeletionListener listener) { } @Override - public void setExplicitlac(long ledgerId, ByteBuf lac) throws IOException { - interleavedLedgerStorage.setExplicitlac(ledgerId, lac); + public void setExplicitLac(long ledgerId, ByteBuf lac) throws IOException { + interleavedLedgerStorage.setExplicitLac(ledgerId, lac); } @Override @@ -300,8 +342,7 @@ BookieStateManager getStateManager(){ return (BookieStateManager) stateManager; } - @Override - public EntryLogger getEntryLogger() { + public DefaultEntryLogger getEntryLogger() { return interleavedLedgerStorage.getEntryLogger(); } @@ -324,4 +365,101 @@ public void flushEntriesLocationsIndex() throws IOException { public LedgerStorage getUnderlyingLedgerStorage() { return interleavedLedgerStorage; } + + @Override + public void forceGC() { + interleavedLedgerStorage.forceGC(); + } + + @Override + public void forceGC(boolean forceMajor, boolean forceMinor) { + interleavedLedgerStorage.forceGC(forceMajor, forceMinor); + } + + @Override + public void suspendMinorGC() { + interleavedLedgerStorage.suspendMinorGC(); + } + + @Override + public void suspendMajorGC() { + interleavedLedgerStorage.suspendMajorGC(); + } + + @Override + public void resumeMinorGC() { + interleavedLedgerStorage.resumeMinorGC(); + } + + @Override + public void resumeMajorGC() { + interleavedLedgerStorage.resumeMajorGC(); + } + + @Override + public boolean isMajorGcSuspended() { + return interleavedLedgerStorage.isMajorGcSuspended(); + } + + @Override + public boolean isMinorGcSuspended() { + return interleavedLedgerStorage.isMinorGcSuspended(); + } + + @Override + public List localConsistencyCheck(Optional rateLimiter) throws IOException { + return interleavedLedgerStorage.localConsistencyCheck(rateLimiter); + } + + @Override + public boolean isInForceGC() { + return interleavedLedgerStorage.isInForceGC(); + } + + @Override + public List getGarbageCollectionStatus() { + return interleavedLedgerStorage.getGarbageCollectionStatus(); + } + + @Override + public PrimitiveIterator.OfLong getListOfEntriesOfLedger(long ledgerId) throws IOException { + PrimitiveIterator.OfLong entriesInMemtableItr = memTable.getListOfEntriesOfLedger(ledgerId); + PrimitiveIterator.OfLong entriesFromILSItr = interleavedLedgerStorage.getListOfEntriesOfLedger(ledgerId); + return IteratorUtility.mergePrimitiveLongIterator(entriesInMemtableItr, entriesFromILSItr); + } + + @Override + public void setLimboState(long ledgerId) throws IOException { + throw new UnsupportedOperationException( + "Limbo state only supported for DbLedgerStorage"); + } + + @Override + public boolean hasLimboState(long ledgerId) throws IOException { + throw new UnsupportedOperationException( + "Limbo state only supported for DbLedgerStorage"); + } + + @Override + public void clearLimboState(long ledgerId) throws IOException { + throw new UnsupportedOperationException( + "Limbo state only supported for DbLedgerStorage"); + } + + @Override + public EnumSet getStorageStateFlags() throws IOException { + return EnumSet.noneOf(StorageState.class); + } + + @Override + public void setStorageStateFlag(StorageState flags) throws IOException { + throw new UnsupportedOperationException( + "Storage state only flags supported for DbLedgerStorage"); + } + + @Override + public void clearStorageStateFlag(StorageState flags) throws IOException { + throw new UnsupportedOperationException( + "Storage state flags only supported for DbLedgerStorage"); + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/StateManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/StateManager.java index 538f3ac19ef..7ed3f0b6572 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/StateManager.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/StateManager.java @@ -19,7 +19,6 @@ package org.apache.bookkeeper.bookie; import java.util.concurrent.Future; - /** * State management of Bookie, including register, turn bookie to w/r mode. */ @@ -49,6 +48,16 @@ public interface StateManager extends AutoCloseable { */ boolean isReadOnly(); + /** + * Check is forceReadOnly. + */ + boolean isForceReadOnly(); + + /** + * Check is readOnlyModeEnabled. + */ + boolean isReadOnlyModeEnabled(); + /** * Check is Running. */ diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/SyncThread.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/SyncThread.java index a7c3a7a5632..3b77cf45f10 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/SyncThread.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/SyncThread.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -28,15 +28,17 @@ import java.util.concurrent.Future; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; - import lombok.AccessLevel; import lombok.Getter; import lombok.extern.slf4j.Slf4j; import org.apache.bookkeeper.bookie.CheckpointSource.Checkpoint; import org.apache.bookkeeper.bookie.LedgerDirsManager.LedgerDirsListener; import org.apache.bookkeeper.bookie.LedgerDirsManager.NoWritableLedgerDirException; +import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.util.MathUtils; +import org.apache.bookkeeper.stats.Counter; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.ThreadRegistry; /** * SyncThread is a background thread which help checkpointing ledger storage @@ -67,15 +69,29 @@ class SyncThread implements Checkpointer { private final Object suspensionLock = new Object(); private boolean suspended = false; private boolean disableCheckpoint = false; + private final Counter syncExecutorTime; + private static String executorName = "SyncThread"; public SyncThread(ServerConfiguration conf, LedgerDirsListener dirsListener, LedgerStorage ledgerStorage, - CheckpointSource checkpointSource) { + CheckpointSource checkpointSource, + StatsLogger statsLogger) { this.dirsListener = dirsListener; this.ledgerStorage = ledgerStorage; this.checkpointSource = checkpointSource; - this.executor = Executors.newSingleThreadScheduledExecutor(new DefaultThreadFactory("SyncThread")); + this.executor = newExecutor(); + this.syncExecutorTime = statsLogger.getThreadScopedCounter("sync-thread-time"); + } + + @VisibleForTesting + static ScheduledExecutorService newExecutor() { + return Executors.newSingleThreadScheduledExecutor(new DefaultThreadFactory(executorName) { + @Override + protected Thread newThread(Runnable r, String name) { + return super.newThread(ThreadRegistry.registerThread(r, executorName), name); + } + }); } @Override @@ -85,6 +101,7 @@ public void startCheckpoint(Checkpoint checkpoint) { protected void doCheckpoint(Checkpoint checkpoint) { executor.submit(() -> { + long startTime = System.nanoTime(); try { synchronized (suspensionLock) { while (suspended) { @@ -102,16 +119,21 @@ protected void doCheckpoint(Checkpoint checkpoint) { } catch (Throwable t) { log.error("Exception in SyncThread", t); dirsListener.fatalError(); + } finally { + syncExecutorTime.addLatency(MathUtils.elapsedNanos(startTime), TimeUnit.NANOSECONDS); } }); } public Future requestFlush() { return executor.submit(() -> { + long startTime = System.nanoTime(); try { flush(); } catch (Throwable t) { log.error("Exception flushing ledgers ", t); + } finally { + syncExecutorTime.addLatency(MathUtils.elapsedNanos(startTime), TimeUnit.NANOSECONDS); } }); } @@ -205,11 +227,11 @@ void shutdown() throws InterruptedException { requestFlush(); executor.shutdown(); - long start = MathUtils.now(); + long start = System.currentTimeMillis(); while (!executor.awaitTermination(5, TimeUnit.MINUTES)) { - long now = MathUtils.now(); + long now = System.currentTimeMillis(); log.info("SyncThread taking a long time to shutdown. Has taken {}" - + " seconds so far", now - start); + + " milliseconds so far", now - start); } } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/TransactionalEntryLogCompactor.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/TransactionalEntryLogCompactor.java index 51a2cdda1d9..9a27bcccd89 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/TransactionalEntryLogCompactor.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/TransactionalEntryLogCompactor.java @@ -22,14 +22,13 @@ package org.apache.bookkeeper.bookie; import io.netty.buffer.ByteBuf; - -import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; - -import org.apache.bookkeeper.bookie.EntryLogger.EntryLogScanner; -import org.apache.bookkeeper.util.HardLink; +import org.apache.bookkeeper.bookie.storage.CompactionEntryLog; +import org.apache.bookkeeper.bookie.storage.EntryLogScanner; +import org.apache.bookkeeper.bookie.storage.EntryLogger; +import org.apache.bookkeeper.conf.ServerConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -49,14 +48,18 @@ public class TransactionalEntryLogCompactor extends AbstractLogCompactor { final List offsets = new ArrayList<>(); // compaction log file suffix - static final String COMPACTING_SUFFIX = ".log.compacting"; + public static final String COMPACTING_SUFFIX = ".log.compacting"; // flushed compaction log file suffix - static final String COMPACTED_SUFFIX = ".compacted"; - - public TransactionalEntryLogCompactor(GarbageCollectorThread gcThread) { - super(gcThread); - this.entryLogger = gcThread.getEntryLogger(); - this.ledgerStorage = gcThread.getLedgerStorage(); + public static final String COMPACTED_SUFFIX = ".compacted"; + + public TransactionalEntryLogCompactor( + ServerConfiguration conf, + EntryLogger entryLogger, + CompactableLedgerStorage ledgerStorage, + LogRemovalListener logRemover) { + super(conf, logRemover); + this.entryLogger = entryLogger; + this.ledgerStorage = ledgerStorage; } /** @@ -65,25 +68,10 @@ public TransactionalEntryLogCompactor(GarbageCollectorThread gcThread) { @Override public void cleanUpAndRecover() { // clean up compacting logs and recover index for already compacted logs - List ledgerDirs = entryLogger.getLedgerDirsManager().getAllLedgerDirs(); - for (File dir : ledgerDirs) { - File[] compactingPhaseFiles = dir.listFiles(file -> file.getName().endsWith(COMPACTING_SUFFIX)); - if (compactingPhaseFiles != null) { - for (File file : compactingPhaseFiles) { - if (file.delete()) { - LOG.info("Deleted failed compaction file {}", file); - } - } - } - File[] compactedPhaseFiles = dir.listFiles(file -> file.getName().endsWith(COMPACTED_SUFFIX)); - if (compactedPhaseFiles != null) { - for (File compactedFile : compactedPhaseFiles) { - LOG.info("Found compacted log file {} has partially flushed index, recovering index.", - compactedFile); - CompactionPhase updateIndex = new UpdateIndexPhase(compactedFile, true); - updateIndex.run(); - } - } + for (CompactionEntryLog log : entryLogger.incompleteCompactionLogs()) { + LOG.info("Found compacted log file {} has partially flushed index, recovering index.", log); + CompactionPhase updateIndex = new UpdateIndexPhase(log, true); + updateIndex.run(); } } @@ -91,20 +79,27 @@ public void cleanUpAndRecover() { public boolean compact(EntryLogMetadata metadata) { if (metadata != null) { LOG.info("Compacting entry log {} with usage {}.", - new Object[]{metadata.getEntryLogId(), metadata.getUsage()}); - CompactionPhase scanEntryLog = new ScanEntryLogPhase(metadata); + metadata.getEntryLogId(), metadata.getUsage()); + CompactionEntryLog compactionLog; + try { + compactionLog = entryLogger.newCompactionLog(metadata.getEntryLogId()); + } catch (IOException ioe) { + LOG.error("Exception creating new compaction entry log", ioe); + return false; + } + CompactionPhase scanEntryLog = new ScanEntryLogPhase(metadata, compactionLog); if (!scanEntryLog.run()) { LOG.info("Compaction for entry log {} end in ScanEntryLogPhase.", metadata.getEntryLogId()); return false; } - File compactionLogFile = entryLogger.getCurCompactionLogFile(); - CompactionPhase flushCompactionLog = new FlushCompactionLogPhase(metadata.getEntryLogId()); + + CompactionPhase flushCompactionLog = new FlushCompactionLogPhase(compactionLog); if (!flushCompactionLog.run()) { LOG.info("Compaction for entry log {} end in FlushCompactionLogPhase.", metadata.getEntryLogId()); return false; } - File compactedLogFile = getCompactedLogFile(compactionLogFile, metadata.getEntryLogId()); - CompactionPhase updateIndex = new UpdateIndexPhase(compactedLogFile); + + CompactionPhase updateIndex = new UpdateIndexPhase(compactionLog); if (!updateIndex.run()) { LOG.info("Compaction for entry log {} end in UpdateIndexPhase.", metadata.getEntryLogId()); return false; @@ -156,16 +151,17 @@ boolean run() { */ class ScanEntryLogPhase extends CompactionPhase { private final EntryLogMetadata metadata; + private final CompactionEntryLog compactionLog; - ScanEntryLogPhase(EntryLogMetadata metadata) { + ScanEntryLogPhase(EntryLogMetadata metadata, CompactionEntryLog compactionLog) { super("ScanEntryLogPhase"); this.metadata = metadata; + this.compactionLog = compactionLog; } @Override void start() throws IOException { // scan entry log into compaction log and offset list - entryLogger.createNewCompactionLog(); entryLogger.scanEntryLog(metadata.getEntryLogId(), new EntryLogScanner() { @Override public boolean accept(long ledgerId) { @@ -184,7 +180,7 @@ public void process(long ledgerId, long offset, ByteBuf entry) throws IOExceptio ledgerId, lid, entryId, offset); throw new IOException("Invalid entry found @ offset " + offset); } - long newOffset = entryLogger.addEntryForCompaction(ledgerId, entry); + long newOffset = compactionLog.addEntry(ledgerId, entry); offsets.add(new EntryLocation(ledgerId, entryId, newOffset)); if (LOG.isDebugEnabled()) { @@ -201,8 +197,9 @@ boolean complete() { if (offsets.isEmpty()) { // no valid entries is compacted, delete entry log file LOG.info("No valid entry is found in entry log after scan, removing entry log now."); - gcThread.removeEntryLog(metadata.getEntryLogId()); - entryLogger.removeCurCompactionLog(); + logRemovalListener.removeEntryLog(metadata.getEntryLogId()); + compactionLog.abort(); + compactingLogWriteDone(); return false; } return true; @@ -212,9 +209,15 @@ boolean complete() { void abort() { offsets.clear(); // since we haven't flushed yet, we only need to delete the unflushed compaction file. - entryLogger.removeCurCompactionLog(); + compactionLog.abort(); + compactingLogWriteDone(); } + } + private void compactingLogWriteDone() { + if (entryLogger instanceof DefaultEntryLogger) { + ((DefaultEntryLogger) entryLogger).clearCompactingLogId(); + } } /** @@ -225,51 +228,38 @@ void abort() { * a hardlink file "3.log.1.compacted" should be created, and "3.log.compacting" should be deleted. */ class FlushCompactionLogPhase extends CompactionPhase { - private final long compactingLogId; - private File compactedLogFile; + final CompactionEntryLog compactionLog; - FlushCompactionLogPhase(long compactingLogId) { + FlushCompactionLogPhase(CompactionEntryLog compactionLog) { super("FlushCompactionLogPhase"); - this.compactingLogId = compactingLogId; + this.compactionLog = compactionLog; } @Override void start() throws IOException { // flush the current compaction log. - File compactionLogFile = entryLogger.getCurCompactionLogFile(); - if (compactionLogFile == null || !compactionLogFile.exists()) { - throw new IOException("Compaction log doesn't exist during flushing"); - } - entryLogger.flushCompactionLog(); + compactionLog.flush(); } @Override boolean complete() throws IOException { - // create a hard link file named "x.log.y.compacted" for file "x.log.compacting". - // where x is compactionLogId and y is compactingLogId. - File compactionLogFile = entryLogger.getCurCompactionLogFile(); - if (compactionLogFile == null || !compactionLogFile.exists()) { - LOG.warn("Compaction log doesn't exist any more after flush"); + try { + compactionLog.markCompacted(); + return true; + } catch (IOException ioe) { + LOG.warn("Error marking compaction as done", ioe); return false; + } finally { + compactingLogWriteDone(); } - compactedLogFile = getCompactedLogFile(compactionLogFile, compactingLogId); - if (compactedLogFile != null && !compactedLogFile.exists()) { - HardLink.createHardLink(compactionLogFile, compactedLogFile); - } - entryLogger.removeCurCompactionLog(); - return true; } @Override void abort() { offsets.clear(); // remove compaction log file and its hardlink - entryLogger.removeCurCompactionLog(); - if (compactedLogFile != null && compactedLogFile.exists()) { - if (!compactedLogFile.delete()) { - LOG.warn("Could not delete compacted log file {}", compactedLogFile); - } - } + compactionLog.abort(); + compactingLogWriteDone(); } } @@ -284,41 +274,29 @@ void abort() { *

This phase can also used to recover partially flushed index when we pass isInRecovery=true */ class UpdateIndexPhase extends CompactionPhase { - File compactedLogFile; - File newEntryLogFile; + final CompactionEntryLog compactionLog; private final boolean isInRecovery; - public UpdateIndexPhase(File compactedLogFile) { - this(compactedLogFile, false); + public UpdateIndexPhase(CompactionEntryLog compactionLog) { + this(compactionLog, false); } - public UpdateIndexPhase(File compactedLogFile, boolean isInRecovery) { + public UpdateIndexPhase(CompactionEntryLog compactionLog, boolean isInRecovery) { super("UpdateIndexPhase"); - this.compactedLogFile = compactedLogFile; + this.compactionLog = compactionLog; this.isInRecovery = isInRecovery; } @Override void start() throws IOException { - if (compactedLogFile != null && compactedLogFile.exists()) { - File dir = compactedLogFile.getParentFile(); - String compactedFilename = compactedLogFile.getName(); - // create a hard link "x.log" for file "x.log.y.compacted" - this.newEntryLogFile = new File(dir, compactedFilename.substring(0, - compactedFilename.indexOf(".log") + 4)); - if (!newEntryLogFile.exists()) { - HardLink.createHardLink(compactedLogFile, newEntryLogFile); - } - if (isInRecovery) { - recoverEntryLocations(EntryLogger.fileName2LogId(newEntryLogFile.getName())); - } - if (!offsets.isEmpty()) { - // update entry locations and flush index - ledgerStorage.updateEntriesLocations(offsets); - ledgerStorage.flushEntriesLocationsIndex(); - } - } else { - throw new IOException("Failed to find compacted log file in UpdateIndexPhase"); + compactionLog.makeAvailable(); + if (isInRecovery) { + recoverEntryLocations(compactionLog); + } + if (!offsets.isEmpty()) { + // update entry locations and flush index + ledgerStorage.updateEntriesLocations(offsets); + ledgerStorage.flushEntriesLocationsIndex(); } } @@ -327,16 +305,8 @@ boolean complete() { // When index is flushed, and entry log is removed, // delete the ".compacted" file to indicate this phase is completed. offsets.clear(); - if (compactedLogFile != null) { - if (!compactedLogFile.delete()) { - LOG.warn("Could not delete compacted log file {}", compactedLogFile); - } - // Now delete the old entry log file since it's compacted - String compactedFilename = compactedLogFile.getName(); - String oldEntryLogFilename = compactedFilename.substring(compactedFilename.indexOf(".log") + 5); - long entryLogId = EntryLogger.fileName2LogId(oldEntryLogFilename); - gcThread.removeEntryLog(entryLogId); - } + compactionLog.finalizeAndCleanup(); + logRemovalListener.removeEntryLog(compactionLog.getSrcLogId()); return true; } @@ -348,8 +318,8 @@ void abort() { /** * Scan entry log to recover entry locations. */ - private void recoverEntryLocations(long compactedLogId) throws IOException { - entryLogger.scanEntryLog(compactedLogId, new EntryLogScanner() { + private void recoverEntryLocations(CompactionEntryLog compactionLog) throws IOException { + compactionLog.scan(new EntryLogScanner() { @Override public boolean accept(long ledgerId) { return true; @@ -365,23 +335,11 @@ public void process(long ledgerId, long offset, ByteBuf entry) throws IOExceptio ledgerId, lid, entryId, offset); throw new IOException("Invalid entry found @ offset " + offset); } - long location = (compactedLogId << 32L) | (offset + 4); + long location = (compactionLog.getDstLogId() << 32L) | (offset + 4); offsets.add(new EntryLocation(lid, entryId, location)); } }); - LOG.info("Recovered {} entry locations from compacted log {}", offsets.size(), compactedLogId); + LOG.info("Recovered {} entry locations from compacted log {}", offsets.size(), compactionLog.getDstLogId()); } } - - File getCompactedLogFile(File compactionLogFile, long compactingLogId) { - if (compactionLogFile == null) { - return null; - } - File dir = compactionLogFile.getParentFile(); - String filename = compactionLogFile.getName(); - String newSuffix = ".log." + EntryLogger.logId2HexString(compactingLogId) + COMPACTED_SUFFIX; - String hardLinkFilename = filename.replace(COMPACTING_SUFFIX, newSuffix); - return new File(dir, hardLinkFilename); - } - } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/UncleanShutdownDetection.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/UncleanShutdownDetection.java new file mode 100644 index 00000000000..4826f8050d7 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/UncleanShutdownDetection.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie; + +import java.io.IOException; + +/** + * An interface for unclean shutdown detection. The bookie + * must register its start-up and then register its graceful + * shutdown. Abrupt termination will not register the clean + * shutdown. + */ +public interface UncleanShutdownDetection { + void registerStartUp() throws IOException; + void registerCleanShutdown(); + boolean lastShutdownWasUnclean(); +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/UncleanShutdownDetectionImpl.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/UncleanShutdownDetectionImpl.java new file mode 100644 index 00000000000..33192c3092b --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/UncleanShutdownDetectionImpl.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Used to determine if the prior shutdown was unclean or not. It does so + * by adding a file to each ledger directory after successful start-up + * and removing the file on graceful shutdown. + * Any abrupt termination will cause one or more of these files to not be cleared + * and so on the subsequent boot-up, the presence of any of these files will + * indicate an unclean shutdown. + */ +public class UncleanShutdownDetectionImpl implements UncleanShutdownDetection { + private static final Logger LOG = LoggerFactory.getLogger(UncleanShutdownDetectionImpl.class); + private final LedgerDirsManager ledgerDirsManager; + static final String DIRTY_FILENAME = "DIRTY"; + + public UncleanShutdownDetectionImpl(LedgerDirsManager ledgerDirsManager) { + this.ledgerDirsManager = ledgerDirsManager; + } + + @Override + public void registerStartUp() throws IOException { + for (File ledgerDir : ledgerDirsManager.getAllLedgerDirs()) { + try { + File dirtyFile = new File(ledgerDir, DIRTY_FILENAME); + if (dirtyFile.createNewFile()) { + LOG.info("Created dirty file in ledger dir: {}", ledgerDir.getAbsolutePath()); + } else { + LOG.info("Dirty file already exists in ledger dir: {}", ledgerDir.getAbsolutePath()); + } + + } catch (IOException e) { + LOG.error("Unable to register start-up (so an unclean shutdown cannot" + + " be detected). Dirty file of ledger dir {} could not be created.", + ledgerDir.getAbsolutePath(), e); + throw e; + } + } + } + + @Override + public void registerCleanShutdown() { + for (File ledgerDir : ledgerDirsManager.getAllLedgerDirs()) { + try { + File dirtyFile = new File(ledgerDir, DIRTY_FILENAME); + if (dirtyFile.exists()) { + boolean deleted = dirtyFile.delete(); + + if (!deleted) { + LOG.error("Unable to register a clean shutdown. The dirty file of " + + " ledger dir {} could not be deleted.", + ledgerDir.getAbsolutePath()); + } + } else { + LOG.error("Unable to register a clean shutdown. The dirty file of " + + " ledger dir {} does not exist.", + ledgerDir.getAbsolutePath()); + } + } catch (Throwable t) { + LOG.error("Unable to register a clean shutdown. An error occurred while deleting " + + " the dirty file of ledger dir {}.", + ledgerDir.getAbsolutePath(), t); + } + } + } + + @Override + public boolean lastShutdownWasUnclean() { + boolean unclean = false; + List dirtyFiles = new ArrayList<>(); + try { + for (File ledgerDir : ledgerDirsManager.getAllLedgerDirs()) { + File dirtyFile = new File(ledgerDir, DIRTY_FILENAME); + if (dirtyFile.exists()) { + dirtyFiles.add(dirtyFile.getAbsolutePath()); + unclean = true; + } + } + } catch (Throwable t) { + LOG.error("Unable to determine if last shutdown was unclean (defaults to unclean)", t); + unclean = true; + } + + if (!dirtyFiles.isEmpty()) { + LOG.info("Dirty files exist on boot-up indicating an unclean shutdown. Dirty files: {}", + String.join(",", dirtyFiles)); + } + + return unclean; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/DataIntegrityCheck.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/DataIntegrityCheck.java new file mode 100644 index 00000000000..6d7af6669c6 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/DataIntegrityCheck.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.datainteg; + +import java.io.IOException; +import java.util.concurrent.CompletableFuture; + +/** + * The interface for the data integrity check feature. This feature allows + * a bookie to handle data loss scenarios such as when running without + * the journal or after a disk failure has caused the loss of all data. + */ +public interface DataIntegrityCheck { + /** + * Run quick preboot check. This check should do enough to ensure that + * it is safe to complete the boot sequence without compromising correctness. + * To this end, if it finds that this bookie is part of the last ensemble of + * an unclosed ledger, it must prevent the bookie from being able store new + * entries for that ledger and must prevent the bookie from taking part in + * the discovery of the last entry of that ledger. + */ + CompletableFuture runPreBootCheck(String reason); + + /** + * Whether we need to run a full check. + * This condition can be set by the runPreBoot() call to run a full check + * in the background once the bookie is running. This can later be used + * to run the full check periodically, or to exponentially backoff and retry + * when some transient condition prevents a ledger being fixed during a + * full check. + */ + boolean needsFullCheck() throws IOException; + + /** + * Run full check of bookies local data. This check should ensure that + * if the metadata service states that it should have an entry, then it + * should have that entry. If the entry is missing, it should copy it + * from another available source. + */ + CompletableFuture runFullCheck(); +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/DataIntegrityCheckImpl.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/DataIntegrityCheckImpl.java new file mode 100644 index 00000000000..139d07f3d03 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/DataIntegrityCheckImpl.java @@ -0,0 +1,555 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.datainteg; + +import com.google.common.collect.ImmutableSortedMap; +import io.reactivex.rxjava3.core.Flowable; +import io.reactivex.rxjava3.core.Maybe; +import io.reactivex.rxjava3.core.Scheduler; +import io.reactivex.rxjava3.core.Single; +import io.reactivex.rxjava3.disposables.Disposable; +import java.io.IOException; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.NavigableMap; +import java.util.Optional; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.LedgerStorage; +import org.apache.bookkeeper.bookie.LedgerStorage.StorageState; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.net.BookieId; + +/** + * An implementation of the DataIntegrityCheck interface. + */ +@Slf4j +public class DataIntegrityCheckImpl implements DataIntegrityCheck { + private static final int MAX_INFLIGHT = 300; + private static final int MAX_ENTRIES_INFLIGHT = 3000; + private static final int ZK_TIMEOUT_S = 30; + private final BookieId bookieId; + private final LedgerManager ledgerManager; + private final LedgerStorage ledgerStorage; + private final EntryCopier entryCopier; + private final BookKeeperAdmin admin; + private final Scheduler scheduler; + private final AtomicReference> ledgersCacheRef = + new AtomicReference<>(null); + private CompletableFuture preBootFuture; + + public DataIntegrityCheckImpl(BookieId bookieId, + LedgerManager ledgerManager, + LedgerStorage ledgerStorage, + EntryCopier entryCopier, + BookKeeperAdmin admin, + Scheduler scheduler) { + this.bookieId = bookieId; + this.ledgerManager = ledgerManager; + this.ledgerStorage = ledgerStorage; + this.entryCopier = entryCopier; + this.admin = admin; + this.scheduler = scheduler; + } + + @Override + public synchronized CompletableFuture runPreBootCheck(String reason) { + // we only run this once, it could be kicked off by different checks + if (preBootFuture == null) { + preBootFuture = runPreBootSequence(reason); + } + return preBootFuture; + + } + + private CompletableFuture runPreBootSequence(String reason) { + String runId = UUID.randomUUID().toString(); + log.info("Event: {}, RunId: {}, Reason: {}", Events.PREBOOT_START, runId, reason); + try { + this.ledgerStorage.setStorageStateFlag(StorageState.NEEDS_INTEGRITY_CHECK); + } catch (IOException ioe) { + log.error("Event: {}, RunId: {}", Events.PREBOOT_ERROR, runId, ioe); + return FutureUtils.exception(ioe); + } + + MetadataAsyncIterator iter = new MetadataAsyncIterator(scheduler, + ledgerManager, MAX_INFLIGHT, ZK_TIMEOUT_S, TimeUnit.SECONDS); + CompletableFuture promise = new CompletableFuture<>(); + Map ledgersCache = + new ConcurrentSkipListMap<>(Comparator.naturalOrder().reversed()); + iter.forEach((ledgerId, metadata) -> { + if (ensemblesContainBookie(metadata, bookieId)) { + ledgersCache.put(ledgerId, metadata); + try { + if (!ledgerStorage.ledgerExists(ledgerId)) { + ledgerStorage.setMasterKey(ledgerId, new byte[0]); + } + } catch (IOException ioe) { + log.error("Event: {}, RunId: {}, LedgerId: {}", + Events.ENSURE_LEDGER_ERROR, runId, ledgerId, ioe); + return FutureUtils.exception(ioe); + } + } + return processPreBoot(ledgerId, metadata, runId); + }) + .whenComplete((ignore, exception) -> { + if (exception != null) { + log.error("Event: {}, runId: {}", Events.PREBOOT_ERROR, runId, exception); + promise.completeExceptionally(exception); + } else { + try { + this.ledgerStorage.flush(); + + updateMetadataCache(ledgersCache); + + log.info("Event: {}, runId: {}, processed: {}", + Events.PREBOOT_END, runId, ledgersCache.size()); + promise.complete(null); + } catch (Throwable t) { + log.error("Event: {}, runId: {}", Events.PREBOOT_ERROR, runId, t); + promise.completeExceptionally(t); + } + } + }); + return promise; + } + + @Override + public boolean needsFullCheck() throws IOException { + return this.ledgerStorage.getStorageStateFlags() + .contains(StorageState.NEEDS_INTEGRITY_CHECK); + } + + @Override + public CompletableFuture runFullCheck() { + String runId = UUID.randomUUID().toString(); + + log.info("Event: {}, runId: {}", Events.FULL_CHECK_INIT, runId); + return getCachedOrReadMetadata(runId) + .thenCompose( + (ledgers) -> { + log.info("Event: {}, runId: {}, ledgerCount: {}", + Events.FULL_CHECK_START, runId, ledgers.size()); + return checkAndRecoverLedgers(ledgers, runId).thenApply((resolved) -> { + for (LedgerResult r : resolved) { + if (r.isMissing() || r.isOK()) { + ledgers.remove(r.getLedgerId()); + } else if (r.isError()) { + // if there was an error, make sure we have the latest + // metadata for the next iteration + ledgers.put(r.getLedgerId(), r.getMetadata()); + } + } + Optional firstError = resolved.stream().filter(r -> r.isError()) + .map(r -> r.getThrowable()).findFirst(); + + if (firstError.isPresent()) { + log.error("Event: {}, runId: {}, ok: {}" + + ", error: {}, missing: {}, ledgersToRetry: {}", + Events.FULL_CHECK_END, runId, + resolved.stream().filter(r -> r.isOK()).count(), + resolved.stream().filter(r -> r.isError()).count(), + resolved.stream().filter(r -> r.isMissing()).count(), + ledgers.size(), firstError.get()); + } else { + log.info("Event: {}, runId: {}, ok: {}, error: 0, missing: {}, ledgersToRetry: {}", + Events.FULL_CHECK_END, runId, + resolved.stream().filter(r -> r.isOK()).count(), + resolved.stream().filter(r -> r.isMissing()).count(), + ledgers.size()); + } + return ledgers; + }); + }) + .thenCompose( + (ledgers) -> { + CompletableFuture promise = new CompletableFuture<>(); + try { + this.ledgerStorage.flush(); + if (ledgers.isEmpty()) { + log.info("Event: {}, runId: {}", Events.CLEAR_INTEGCHECK_FLAG, runId); + this.ledgerStorage.clearStorageStateFlag( + StorageState.NEEDS_INTEGRITY_CHECK); + } + // not really needed as we are modifying the map in place + updateMetadataCache(ledgers); + log.info("Event: {}, runId: {}", Events.FULL_CHECK_COMPLETE, runId); + promise.complete(null); + } catch (IOException ioe) { + log.error("Event: {}, runId: {}", Events.FULL_CHECK_ERROR, runId, ioe); + promise.completeExceptionally(ioe); + } + return promise; + }); + } + + void updateMetadataCache(Map ledgers) { + ledgersCacheRef.set(ledgers); + } + + CompletableFuture> getCachedOrReadMetadata(String runId) { + Map map = ledgersCacheRef.get(); + if (map != null) { + log.info("Event: {}, runId: {}, ledgerCount: {}", Events.USE_CACHED_METADATA, runId, + map.size()); + return CompletableFuture.completedFuture(map); + } else { + log.info("Event: {}, runId: {}", Events.REFRESH_METADATA, runId); + MetadataAsyncIterator iter = new MetadataAsyncIterator(scheduler, + ledgerManager, MAX_INFLIGHT, ZK_TIMEOUT_S, TimeUnit.SECONDS); + Map ledgersCache = + new ConcurrentSkipListMap<>(Comparator.naturalOrder().reversed()); + return iter.forEach((ledgerId, metadata) -> { + if (ensemblesContainBookie(metadata, bookieId)) { + ledgersCache.put(ledgerId, metadata); + } + return CompletableFuture.completedFuture(null); + }) + .thenApply(ignore -> { + updateMetadataCache(ledgersCache); + return ledgersCache; + }); + } + } + + /** + * Check whether the current bookie exists in the last ensemble of the bookie. + * If it does, and the ledger is not closed, then this bookie may have accepted a fencing + * request or an entry which it no longer contains. The only way to resolve this is to + * open/recover the ledger. This bookie should not take part in the recovery, so the bookie + * must be marked as in limbo. This will stop the bookie from responding to read requests for + * that ledger, so clients will not be able to take into account the response of the bookie + * during recovery. Effectively we are telling the client that we don't know whether we had + * certain entries or not, so go look elsewhere. + * We also fence all ledgers with this bookie in the last segment, to prevent any new writes, + * so that after the limbo state is cleared, we won't accept any new writes. + + * We only need to consider final ensembles in non-closed ledgers at the moment of time that + * the preboot check commences. If this bookie is added to a new ensemble after that point in + * time, we know that we haven't received any entries for that segment, nor have we received + * a fencing request, because we are still in the preboot sequence. + */ + private CompletableFuture processPreBoot(long ledgerId, LedgerMetadata metadata, + String runId) { + Map.Entry> lastEnsemble = metadata.getAllEnsembles().lastEntry(); + CompletableFuture promise = new CompletableFuture<>(); + if (lastEnsemble == null) { + log.error("Event: {}, runId: {}, metadata: {}, ledger: {}", + Events.INVALID_METADATA, runId, metadata, ledgerId); + promise.completeExceptionally( + new IllegalStateException( + String.format("All metadata must have at least one ensemble, %d does not", ledgerId))); + return promise; + } + + + if (!metadata.isClosed() && lastEnsemble.getValue().contains(bookieId)) { + try { + log.info("Event: {}, runId: {}, metadata: {}, ledger: {}", + Events.MARK_LIMBO, runId, metadata, ledgerId); + ledgerStorage.setLimboState(ledgerId); + ledgerStorage.setFenced(ledgerId); + promise.complete(null); + } catch (IOException ioe) { + log.info("Event: {}, runId: {}, metadata: {}, ledger: {}", + Events.LIMBO_OR_FENCE_ERROR, runId, metadata, ledgerId, ioe); + promise.completeExceptionally(ioe); + } + } else { + promise.complete(null); + } + return promise; + } + + static class LedgerResult { + enum State { + MISSING, ERROR, OK + }; + + static LedgerResult missing(long ledgerId) { + return new LedgerResult(State.MISSING, ledgerId, null, null); + } + + static LedgerResult ok(long ledgerId, LedgerMetadata metadata) { + return new LedgerResult(State.OK, ledgerId, metadata, null); + } + + static LedgerResult error(long ledgerId, LedgerMetadata metadata, Throwable t) { + return new LedgerResult(State.ERROR, ledgerId, metadata, t); + } + + private final State state; + private final long ledgerId; + private final LedgerMetadata metadata; + private final Throwable throwable; + + private LedgerResult(State state, long ledgerId, + LedgerMetadata metadata, Throwable throwable) { + this.state = state; + this.ledgerId = ledgerId; + this.metadata = metadata; + this.throwable = throwable; + } + + boolean isMissing() { + return state == State.MISSING; + } + boolean isOK() { + return state == State.OK; + } + boolean isError() { + return state == State.ERROR; + } + long getLedgerId() { + return ledgerId; + } + LedgerMetadata getMetadata() { + return metadata; + } + Throwable getThrowable() { + return throwable; + } + } + + /** + * Check each ledger passed. + * If the ledger is in limbo, recover it. + * Check that the bookie has all entries that it is expected to have. + * Copy any entries that are missing. + * @return The set of results for all ledgers passed. A result can be OK, Missing or Error. + * OK and missing ledgers do not need to be looked at again. Error should be retried. + */ + CompletableFuture> checkAndRecoverLedgers(Map ledgers, + String runId) { + CompletableFuture> promise = new CompletableFuture<>(); + final Disposable disposable = Flowable.fromIterable(ledgers.entrySet()) + .subscribeOn(scheduler, false) + .flatMapSingle((mapEntry) -> { + long ledgerId = mapEntry.getKey(); + LedgerMetadata originalMetadata = mapEntry.getValue(); + return recoverLedgerIfInLimbo(ledgerId, mapEntry.getValue(), runId) + .map(newMetadata -> LedgerResult.ok(ledgerId, newMetadata)) + .onErrorReturn(t -> LedgerResult.error(ledgerId, originalMetadata, t)) + .defaultIfEmpty(LedgerResult.missing(ledgerId)) + .flatMap((res) -> { + try { + if (res.isOK()) { + this.ledgerStorage.clearLimboState(ledgerId); + } + return Single.just(res); + } catch (IOException ioe) { + return Single.just(LedgerResult.error(res.getLedgerId(), + res.getMetadata(), ioe)); + } + }); + }, + true /* delayErrors */, + MAX_INFLIGHT) + .flatMapSingle((res) -> { + if (res.isOK()) { + return checkAndRecoverLedgerEntries(res.getLedgerId(), + res.getMetadata(), runId) + .map(ignore -> LedgerResult.ok(res.getLedgerId(), + res.getMetadata())) + .onErrorReturn(t -> LedgerResult.error(res.getLedgerId(), + res.getMetadata(), t)); + } else { + return Single.just(res); + } + }, + true /* delayErrors */, + 1 /* copy 1 ledger at a time to keep entries together in entrylog */) + .collect(Collectors.toSet()) + .subscribe(resolved -> promise.complete(resolved), + throwable -> promise.completeExceptionally(throwable)); + promise.whenComplete((result, ex) -> disposable.dispose()); + return promise; + } + + /** + * Run ledger recovery on all a ledger if it has been marked as in limbo. + * @return a maybe with the most up to date metadata we have for he ledger. + * If the ledger has been deleted, returns empty. + */ + Maybe recoverLedgerIfInLimbo(long ledgerId, LedgerMetadata origMetadata, + String runId) { + try { + if (!this.ledgerStorage.ledgerExists(ledgerId)) { + this.ledgerStorage.setMasterKey(ledgerId, new byte[0]); + } + if (this.ledgerStorage.hasLimboState(ledgerId)) { + log.info("Event: {}, runId: {}, metadata: {}, ledger: {}", + Events.RECOVER_LIMBO_LEDGER, runId, origMetadata, ledgerId); + return recoverLedger(ledgerId, runId) + .toMaybe() + .onErrorResumeNext(t -> { + if (t instanceof BKException.BKNoSuchLedgerExistsOnMetadataServerException) { + log.info("Event: {}, runId: {}, metadata: {}, ledger: {}", + Events.RECOVER_LIMBO_LEDGER_MISSING, runId, origMetadata, ledgerId); + return Maybe.empty(); + } else { + log.info("Event: {}, runId: {}, metadata: {}, ledger: {}", + Events.RECOVER_LIMBO_LEDGER_ERROR, runId, origMetadata, ledgerId); + return Maybe.error(t); + } + }); + } else { + return Maybe.just(origMetadata); + } + } catch (IOException ioe) { + return Maybe.error(ioe); + } + } + + Single recoverLedger(long ledgerId, String runId) { + return Single.create((emitter) -> + admin.asyncOpenLedger(ledgerId, (rc, handle, ctx) -> { + if (rc != BKException.Code.OK) { + emitter.onError(BKException.create(rc)); + } else { + LedgerMetadata metadata = handle.getLedgerMetadata(); + handle.closeAsync().whenComplete((ignore, exception) -> { + if (exception != null) { + log.warn("Event: {}, runId: {}, ledger: {}", + Events.RECOVER_LIMBO_LEDGER_CLOSE_ERROR, runId, ledgerId, exception); + } + }); + emitter.onSuccess(metadata); + } + }, null)); + + } + + /** + * Check whether the local storage has all the entries as specified in the metadata. + * If not, copy them from other available nodes. + + * Returns a single value which is the ledgerId or an error if any entry failed to copy + * should throw error if any entry failed to copy. + */ + Single checkAndRecoverLedgerEntries(long ledgerId, LedgerMetadata metadata, + String runId) { + WriteSets writeSets = new WriteSets(metadata.getEnsembleSize(), + metadata.getWriteQuorumSize()); + + NavigableMap bookieIndices = metadata.getAllEnsembles() + .entrySet().stream() + .collect(ImmutableSortedMap.toImmutableSortedMap(Comparator.naturalOrder(), + e -> e.getKey(), + e -> e.getValue().indexOf(bookieId))); + + long lastKnownEntry; + if (metadata.isClosed()) { + lastKnownEntry = metadata.getLastEntryId(); + } else { + // if ledger is not closed, last known entry is the last entry of + // the penultimate ensemble + lastKnownEntry = metadata.getAllEnsembles().lastEntry().getKey() - 1; + } + if (lastKnownEntry < 0) { + return Single.just(ledgerId); + } + + EntryCopier.Batch batch; + try { + batch = entryCopier.newBatch(ledgerId, metadata); + } catch (IOException ioe) { + return Single.error(ioe); + } + AtomicLong byteCount = new AtomicLong(0); + AtomicInteger count = new AtomicInteger(0); + AtomicInteger errorCount = new AtomicInteger(0); + AtomicReference firstError = new AtomicReference<>(null); + log.info("Event: {}, runId: {}, metadata: {}, ledger: {}", + Events.LEDGER_CHECK_AND_COPY_START, runId, metadata, ledgerId); + return Flowable.rangeLong(0, lastKnownEntry + 1) + .subscribeOn(scheduler, false) + .flatMapMaybe((entryId) -> { + return maybeCopyEntry(writeSets, bookieIndices, ledgerId, entryId, batch) + .doOnError((t) -> { + firstError.compareAndSet(null, t); + errorCount.incrementAndGet(); + }); + }, true /* delayErrors */, MAX_ENTRIES_INFLIGHT) + .doOnNext((bytes) -> { + byteCount.addAndGet(bytes); + count.incrementAndGet(); + }) + .count() // do nothing with result, but gives a single even if empty + .doOnTerminate(() -> { + if (firstError.get() != null) { + log.warn("Event: {}, runId: {}, metadata: {}, ledger: {}, entries: {}, bytes: {}, errors: {}", + Events.LEDGER_CHECK_AND_COPY_END, runId, + metadata, ledgerId, count.get(), byteCount.get(), firstError.get()); + } else { + log.info("Event: {}, runId: {}, metadata: {}, ledger: {}, entries: {}, bytes: {}, errors: 0", + Events.LEDGER_CHECK_AND_COPY_END, runId, + metadata, ledgerId, count.get(), byteCount.get()); + } + }) + .map(ignore -> ledgerId); + } + + /** + * @return the number of bytes copied. + */ + Maybe maybeCopyEntry(WriteSets writeSets, NavigableMap bookieIndices, + long ledgerId, long entryId, EntryCopier.Batch batch) { + try { + if (isEntryMissing(writeSets, bookieIndices, ledgerId, entryId)) { + return Maybe.fromCompletionStage(batch.copyFromAvailable(entryId)); + } else { + return Maybe.empty(); + } + } catch (BookieException | IOException ioe) { + return Maybe.error(ioe); + } + } + + boolean isEntryMissing(WriteSets writeSets, NavigableMap bookieIndices, + long ledgerId, long entryId) throws IOException, BookieException { + int bookieIndexForEntry = bookieIndices.floorEntry(entryId).getValue(); + if (bookieIndexForEntry < 0) { + return false; + } + + return writeSets.getForEntry(entryId).contains(bookieIndexForEntry) + && !ledgerStorage.entryExists(ledgerId, entryId); + } + + static boolean ensemblesContainBookie(LedgerMetadata metadata, BookieId bookieId) { + return metadata.getAllEnsembles().values().stream() + .anyMatch(ensemble -> ensemble.contains(bookieId)); + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/DataIntegrityCookieValidation.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/DataIntegrityCookieValidation.java new file mode 100644 index 00000000000..a861d38e604 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/DataIntegrityCookieValidation.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.datainteg; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.UnknownHostException; +import java.text.MessageFormat; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.ExecutionException; +import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.Cookie; +import org.apache.bookkeeper.bookie.CookieValidation; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * An implementation of the CookieValidation interface that allows for auto-stamping + * cookies when configured and used in conjunction with the data integrity service. + * Because the data integrity service can heal a bookie with lost data due to a disk + * failure, a bookie can auto stamp new cookies as part of the healing process. + */ +public class DataIntegrityCookieValidation implements CookieValidation { + private static final Logger log = LoggerFactory.getLogger(DataIntegrityCookieValidation.class); + private final ServerConfiguration conf; + private final BookieId bookieId; + private final RegistrationManager registrationManager; + private final DataIntegrityCheck dataIntegCheck; + + public DataIntegrityCookieValidation(ServerConfiguration conf, + RegistrationManager registrationManager, + DataIntegrityCheck dataIntegCheck) + throws UnknownHostException { + this.conf = conf; + this.registrationManager = registrationManager; + this.bookieId = BookieImpl.getBookieId(conf); + this.dataIntegCheck = dataIntegCheck; + } + + private Optional> getRegManagerCookie() throws BookieException { + try { + return Optional.of(Cookie.readFromRegistrationManager(registrationManager, bookieId)); + } catch (BookieException.CookieNotFoundException noCookieException) { + return Optional.empty(); + } + } + + private List> collectDirectoryCookies(List directories) throws BookieException { + List> cookies = new ArrayList<>(); + for (File d : directories) { + try { + cookies.add(Optional.of(Cookie.readFromDirectory(d))); + } catch (FileNotFoundException fnfe) { + cookies.add(Optional.empty()); + } catch (IOException ioe) { + throw new BookieException.InvalidCookieException(ioe); + } + } + return cookies; + } + + private void stampCookie(Cookie masterCookie, Version expectedVersion, List directories) + throws BookieException { + // stamp to ZK first as it's the authoritative cookie. If this fails part way through + // stamping the directories, then a data integrity check will occur. + log.info("Stamping cookie to ZK"); + masterCookie.writeToRegistrationManager(registrationManager, conf, expectedVersion); + for (File d : directories) { + try { + log.info("Stamping cookie to directory {}", d); + masterCookie.writeToDirectory(d); + } catch (IOException ioe) { + log.error("Exception writing cookie", ioe); + throw new BookieException.InvalidCookieException(ioe); + } + } + } + + @Override + public void checkCookies(List directories) + throws BookieException, InterruptedException { + String instanceId = registrationManager.getClusterInstanceId(); + if (instanceId == null) { + throw new BookieException.InvalidCookieException("Cluster instance ID unavailable"); + } + Cookie masterCookie; + try { + masterCookie = Cookie.generateCookie(conf).setInstanceId(instanceId).build(); + } catch (UnknownHostException uhe) { + throw new BookieException.InvalidCookieException(uhe); + } + + // collect existing cookies + Optional> regManagerCookie = getRegManagerCookie(); + List> directoryCookies = collectDirectoryCookies(directories); + + // if master is empty, everything must be empty, otherwise the cluster is messed up + if (!regManagerCookie.isPresent()) { + // if everything is empty, it's a new install, just stamp the cookies + if (directoryCookies.stream().noneMatch(Optional::isPresent)) { + log.info("New environment found. Stamping cookies"); + stampCookie(masterCookie, Version.NEW, directories); + } else { + String errorMsg = + "Cookie missing from ZK. Either it was manually deleted, " + + "or the bookie was started pointing to a different ZK cluster " + + "than the one it was originally started with. " + + "This requires manual intervention to fix"; + log.error(errorMsg); + throw new BookieException.InvalidCookieException(errorMsg); + } + } else if (!regManagerCookie.get().getValue().equals(masterCookie) + || !directoryCookies.stream().allMatch(c -> c.map(masterCookie::equals).orElse(false))) { + if (conf.isDataIntegrityStampMissingCookiesEnabled()) { + log.warn("ZK cookie({}) or directory cookies({}) do not match master cookie ({}), running check", + regManagerCookie, directoryCookies, masterCookie); + try { + dataIntegCheck.runPreBootCheck("INVALID_COOKIE").get(); + } catch (ExecutionException ee) { + if (ee.getCause() instanceof BookieException) { + throw (BookieException) ee.getCause(); + } else { + throw new BookieException.InvalidCookieException(ee.getCause()); + } + } + log.info("Environment should be in a sane state. Stamp new cookies"); + stampCookie(masterCookie, regManagerCookie.get().getVersion(), directories); + } else { + String errorMsg = MessageFormat.format( + "ZK cookie({0}) or directory cookies({1}) do not match master cookie ({2})" + + " and missing cookie stamping is disabled.", + regManagerCookie, directoryCookies, masterCookie); + log.error(errorMsg); + throw new BookieException.InvalidCookieException(errorMsg); + } + } // else all cookies match the masterCookie, meaning nothing has changed in the configuration + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/DataIntegrityService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/DataIntegrityService.java new file mode 100644 index 00000000000..3d2c040ae61 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/DataIntegrityService.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.datainteg; + +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import java.io.IOException; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.common.component.AbstractLifecycleComponent; +import org.apache.bookkeeper.server.conf.BookieConfiguration; +import org.apache.bookkeeper.stats.StatsLogger; + +/** + * An abstract lifecycle component that can perform data integrity checking. + */ +@Slf4j +public class DataIntegrityService extends AbstractLifecycleComponent { + private final DataIntegrityCheck check; + private final ScheduledExecutorService scheduler; + private ScheduledFuture scheduledFuture; + + public DataIntegrityService(BookieConfiguration conf, + StatsLogger statsLogger, + DataIntegrityCheck check) { + super("data-integ", conf, statsLogger); + this.check = check; + scheduler = Executors.newSingleThreadScheduledExecutor( + new ThreadFactoryBuilder() + .setNameFormat("bookie-data-integ-%d") + .setUncaughtExceptionHandler( + (t, ex) -> log.error("Event: {}, thread: {}", + Events.DATA_INTEG_SERVICE_UNCAUGHT_ERROR, + t, ex)) + .build()); + scheduledFuture = null; + } + + // allow tests to reduce interval + protected int interval() { + return 3; + } + + protected TimeUnit intervalUnit() { + return TimeUnit.SECONDS; + } + + @Override + protected void doStart() { + log.info("Event: {}, interval: {}, intervalUnit: {}", + Events.DATA_INTEG_SERVICE_START, interval(), intervalUnit()); + synchronized (this) { + scheduledFuture = scheduler.scheduleAtFixedRate(() -> { + try { + if (check.needsFullCheck()) { + check.runFullCheck().get(); + } + } catch (InterruptedException ie) { + log.warn("Event: {}", Events.DATA_INTEG_SERVICE_INTERRUPTED, ie); + Thread.currentThread().interrupt(); + } catch (Throwable t) { + log.error("Event: {}", Events.DATA_INTEG_SERVICE_ERROR, t); + } + }, 0, interval(), intervalUnit()); + } + } + + @Override + protected void doStop() { + log.info("Event: {}", Events.DATA_INTEG_SERVICE_STOP); + synchronized (this) { + if (scheduledFuture != null) { + scheduledFuture.cancel(true); + scheduledFuture = null; + } + } + } + + @Override + protected void doClose() throws IOException { + synchronized (this) { + // just in case stop didn't get called, the scheduledfuture + // would stop the scheduler from shutting down + if (scheduledFuture != null) { + scheduledFuture.cancel(true); + scheduledFuture = null; + } + } + + scheduler.shutdown(); + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/EntryCopier.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/EntryCopier.java new file mode 100644 index 00000000000..9e3598d7868 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/EntryCopier.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.datainteg; + +import java.io.IOException; +import java.util.concurrent.CompletableFuture; +import org.apache.bookkeeper.client.api.LedgerMetadata; + +/** + * Interface for copying entries from other bookies. + * The implementation should take care of selecting the order of the replicas + * from which we try to read, taking into account stickiness and errors. + * The implementation should take care of rate limiting. + */ +public interface EntryCopier { + /** + * Start copying a new batch. In general, there should be a batch per ledger. + */ + Batch newBatch(long ledgerId, LedgerMetadata metadata) throws IOException; + + /** + * An interface for a batch to be copied. + */ + interface Batch { + /** + * Copy an entry from a remote bookie and store it locally. + * @return the number of bytes copied. + */ + CompletableFuture copyFromAvailable(long entryId); + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/EntryCopierImpl.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/EntryCopierImpl.java new file mode 100644 index 00000000000..a9768b32f1a --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/EntryCopierImpl.java @@ -0,0 +1,300 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.datainteg; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Ticker; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSortedMap; +import io.netty.buffer.ByteBuf; +import io.netty.util.ReferenceCountUtil; +import java.io.IOException; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Random; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.LedgerStorage; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.BookieClient; +import org.apache.bookkeeper.proto.BookieProtocol; + +/** + * Implementation for the EntryCopier interface. Handles the reading of entries + * from peer bookies. + */ +@Slf4j +public class EntryCopierImpl implements EntryCopier { + private static final long SINBIN_DURATION_MS = TimeUnit.MINUTES.toMillis(1); + private final BookieId bookieId; + private final BookieClient bookieClient; + private final LedgerStorage storage; + private final Ticker ticker; + private final SinBin sinBin; + + public EntryCopierImpl(BookieId bookieId, + BookieClient bookieClient, + LedgerStorage storage, + Ticker ticker) { + this.bookieId = bookieId; + this.bookieClient = bookieClient; + this.storage = storage; + this.ticker = ticker; + this.sinBin = new SinBin(ticker); + } + + @Override + public Batch newBatch(long ledgerId, LedgerMetadata metadata) throws IOException { + if (!storage.ledgerExists(ledgerId)) { + storage.setMasterKey(ledgerId, metadata.getPassword()); + } + return new BatchImpl(bookieId, ledgerId, metadata, sinBin); + } + + @VisibleForTesting + class BatchImpl implements Batch { + private final long ledgerId; + private final LedgerMetadata metadata; + private final SinBin sinBin; + private volatile ImmutableSortedMap writeSets; + + BatchImpl(BookieId bookieId, + long ledgerId, LedgerMetadata metadata, + SinBin sinBin) { + this.ledgerId = ledgerId; + this.metadata = metadata; + this.sinBin = sinBin; + updateWriteSets(); + } + + private void updateWriteSets() { + // clear non-erroring bookies + + // in theory we should be able to have a single set of writesets per ledger, + // however, if there are multiple ensembles, bookies will move around, and we + // still want to avoid erroring bookies + this.writeSets = preferredBookieIndices(bookieId, metadata, + sinBin.getErrorBookies(), ledgerId) + .entrySet().stream().collect( + ImmutableSortedMap.toImmutableSortedMap( + Comparator.naturalOrder(), + e -> e.getKey(), + e -> new WriteSets(e.getValue(), + metadata.getEnsembleSize(), + metadata.getWriteQuorumSize()))); + } + + @VisibleForTesting + void notifyBookieError(BookieId bookie) { + if (sinBin.addFailed(bookie)) { + updateWriteSets(); + } + } + + @Override + public CompletableFuture copyFromAvailable(long entryId) { + if (entryId < 0) { + throw new IllegalArgumentException( + String.format("Entry ID (%d) can't be less than 0", entryId)); + } + if (metadata.isClosed() && entryId > metadata.getLastEntryId()) { + throw new IllegalArgumentException( + String.format("Invalid entry id (%d), last entry for ledger %d is %d", + entryId, ledgerId, metadata.getLastEntryId())); + } + CompletableFuture promise = new CompletableFuture<>(); + fetchEntry(entryId).whenComplete((buffer, exception) -> { + if (exception != null) { + promise.completeExceptionally(exception); + } else { + try { + long length = buffer.readableBytes(); + storage.addEntry(buffer); + promise.complete(length); + } catch (Throwable t) { + promise.completeExceptionally(t); + } finally { + ReferenceCountUtil.release(buffer); + } + } + }); + return promise; + } + + @VisibleForTesting + CompletableFuture fetchEntry(long entryId) { + List ensemble = metadata.getEnsembleAt(entryId); + final Map.Entry writeSetsForEntryId = this.writeSets + .floorEntry(entryId); + if (writeSetsForEntryId == null) { + log.error("writeSets for entryId {} not found, writeSets {}", entryId, writeSets); + throw new IllegalStateException("writeSets for entryId: " + entryId + " not found"); + } + ImmutableList writeSet = writeSetsForEntryId + .getValue() + .getForEntry(entryId); + int attempt = 0; + CompletableFuture promise = new CompletableFuture<>(); + fetchRetryLoop(entryId, attempt, + ensemble, writeSet, + promise, Optional.empty()); + return promise; + } + + private void fetchRetryLoop(long entryId, int attempt, + List ensemble, + ImmutableList writeSet, + CompletableFuture promise, + Optional firstException) { + if (attempt >= writeSet.size()) { + promise.completeExceptionally( + firstException.orElse(new BKException.BKReadException())); + return; + } + BookieId bookie = ensemble.get(writeSet.get(attempt)); + readEntry(bookie, ledgerId, entryId) + .whenComplete((buffer, exception) -> { + if (exception != null) { + notifyBookieError(bookie); + Optional firstException1 = + firstException.isPresent() ? firstException : Optional.of(exception); + fetchRetryLoop(entryId, attempt + 1, + ensemble, writeSet, promise, firstException1); + } else { + promise.complete(buffer); + } + }); + } + } + + // convert callback api to future api + private CompletableFuture readEntry(BookieId bookieId, + long ledgerId, long entryId) { + CompletableFuture promise = new CompletableFuture<>(); + bookieClient.readEntry(bookieId, ledgerId, entryId, + (rc, ledgerId1, entryId1, buffer, ctx1) -> { + if (rc != BKException.Code.OK) { + promise.completeExceptionally(BKException.create(rc)); + } else { + buffer.retain(); + promise.complete(buffer); + } + }, null, BookieProtocol.FLAG_NONE); + return promise; + } + + /** + * Generate a map of preferred bookie indices. For each ensemble, generate the order + * in which bookies should be tried for entries, notwithstanding errors. + * For example, if a e5,w2,a2 ensemble has the bookies: + * [bookie1, bookie2, bookie3, bookie4, bookie5] + * and the current bookie is bookie2, then we should return something like: + * [4, 2, 0, 3] + * Then when retrieving an entry, even though it is only written to 2, we try the bookie + * in the order from this list. This will cause more requests to go to the same bookie, + * which should give us the benefit of read locality. + * We don't want to simply sort by bookie id, as that would cause the same bookies to be + * loaded for all ensembles. + * Bookies which have presented errors are always tried last. + */ + @VisibleForTesting + static ImmutableSortedMap> preferredBookieIndices( + BookieId bookieId, + LedgerMetadata metadata, + Set errorBookies, + long seed) { + return metadata.getAllEnsembles().entrySet().stream() + .collect(ImmutableSortedMap.toImmutableSortedMap( + Comparator.naturalOrder(), + e -> e.getKey(), + e -> { + List ensemble = e.getValue(); + // get indices of the interesting bookies + int myIndex = ensemble.indexOf(bookieId); + Set errorIndices = errorBookies.stream() + .map(b -> ensemble.indexOf(b)).collect(Collectors.toSet()); + + // turn bookies into positions and filter out my own + // bookie id (we're not going to try to read from outself) + List indices = IntStream.range(0, ensemble.size()) + .filter(i -> i != myIndex).boxed().collect(Collectors.toList()); + + // shuffle the indices based seed (normally ledgerId) + Collections.shuffle(indices, new Random(seed)); + + // Move the error bookies to the end + // Collections#sort is stable, so everything else remains the same + Collections.sort(indices, (a, b) -> { + boolean aErr = errorIndices.contains(a); + boolean bErr = errorIndices.contains(b); + if (aErr && !bErr) { + return 1; + } else if (!aErr && bErr) { + return -1; + } else { + return 0; + } + }); + return ImmutableList.copyOf(indices); + })); + } + + @VisibleForTesting + static class SinBin { + private final Ticker ticker; + private final ConcurrentMap errorBookies = new ConcurrentHashMap<>(); + + SinBin(Ticker ticker) { + this.ticker = ticker; + } + + /** + * Returns true if this is the first error for this bookie. + */ + boolean addFailed(BookieId bookie) { + long newDeadline = TimeUnit.NANOSECONDS.toMillis(ticker.read()) + SINBIN_DURATION_MS; + Long oldDeadline = errorBookies.put(bookie, newDeadline); + return oldDeadline == null; + } + + Set getErrorBookies() { + long now = TimeUnit.NANOSECONDS.toMillis(ticker.read()); + Iterator> iterator = errorBookies.entrySet().iterator(); + while (iterator.hasNext()) { + if (iterator.next().getValue() < now) { + iterator.remove(); + } + } + return errorBookies.keySet(); + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/Events.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/Events.java new file mode 100644 index 00000000000..5d984111824 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/Events.java @@ -0,0 +1,235 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.datainteg; + +enum Events { + /** + * Data integrity service has started + * It runs at an interval to check if a full integrity check is needed, + * and if so runs it. + */ + DATA_INTEG_SERVICE_START, + /** + * Data integrity service has been stopped. + */ + DATA_INTEG_SERVICE_STOP, + /** + * An exception was thrown on the data integrity service executor + * and never caught. This is a programmer error and should be reported + * as such. + */ + DATA_INTEG_SERVICE_UNCAUGHT_ERROR, + /** + * Data integrity service thread interrupted. + * This is non-fatal and indicates that the bookie is shutting down. + * The full check will resume once the bookie is started again. + */ + DATA_INTEG_SERVICE_INTERRUPTED, + /** + * An error occurred in the in the data integrity service loop. + * This normally indicates that an error occurred in the full check. + * The full check will be tried again. + * It could also indicate an error checking the NEEDS_INTEGRITY_CHECK + * flag, which indicates disk issues. + */ + DATA_INTEG_SERVICE_ERROR, + + /** + * Mark a ledger as in-limbo. In limbo ledgers are ledgers for whose + * entries we cannot safely answer queries positively or negatively. + * These are ledgers which have not been closed and where this bookie + * appears in the final ensemble. + + * We may have had an entry in the past, but due to disk failures or + * configuration changes it may not currently exist locally. However, + * we cannot tell clients that the entry doesn't exist, because the client + * would understand that to mean that it never existed, and this would + * break consistency in the ledger recovery protocol. + + * For limbo ledgers, all entry level queries should throw an exception. + + * We also mark the ledger as fenced at this point, as it may have been set + * on this ledger previously. This means no more writes for this ledger + * can come to this bookie. + */ + MARK_LIMBO, + + /** + * An error occurred marking the ledger as fenced or as in-limbo. + * The most likely cause is a bad disk. + * This is a fatal error, as we cannot safely serve entries if we cannot + * set limbo and fence flags. + */ + LIMBO_OR_FENCE_ERROR, + + /** + * Start the preboot check. The preboot check runs when some configuration + * has changed regarding the disk configuration. This may be simply a disk + * being added, or it could be the disks being wiped. The preboot check + * needs to check which ledgers we are supposed to store according to + * ledger metadata. Any unclosed ledgers which contain this bookie in its last + * ensemble must be marked as in-limbo, as we don't know if entries from that + * ledger have previously existed on this bookie. + + * The preboot check doesn't copy any data. That is left up to the full check + * which can run in the background while the bookie is serving data for non-limbo + * ledgers. + + * The preboot check has a runId associated which can be used to pull together + * all the events from the same run. + * The preboot check will set the NEEDS_INTEGRITY_CHECK flag on storage to + * trigger a full check after the bookie has booted. + */ + PREBOOT_START, + /** + * The preboot check has completed successfully. The event contains the number + * of ledgers that have been processed. + */ + PREBOOT_END, + /** + * An error occurred during the preboot check. This is a fatal error as we cannot + * safely serve data if the correct ledgers have not been marked as in-limbo. The + * error could be due to problems accessing the metadata store, or due to disk + * issues. + */ + PREBOOT_ERROR, + /** + * Preboot found an invalid ledger metadata. All ledger metadata must have at least + * one ensemble but the process found one with none. + */ + INVALID_METADATA, + /** + * Preboot must create a ledger that the bookie does not have but that metadata says + * the bookie should have. This can happen due to things like ensemble changes and + * when a ledger is closed. If the ledger cannot be created on the bookie then + * this error will cause preboot to fail. + */ + ENSURE_LEDGER_ERROR, + /** + * Initialized the full check. If we have cached metadata from a previous run, or + * the preboot check, then we use that. Otherwise we read the metadata from the + * metadata store. + + * The full check goes through each ledger for which this bookie is supposed to + * store entries and checks that these entries exist on the bookie. If they do not + * exist, they are copied from another bookie. + + * Each full check has a runId associated which can be used to find all events from + * the check. + */ + FULL_CHECK_INIT, + /** + * The full check has completed. + */ + FULL_CHECK_COMPLETE, + /** + * Start iterating through the ledger that should be on this bookie. + * The event is annotated with the number of ledgers which will be checked, + * which may be fewer that the total number of ledgers on the bookie as + * a previous run may have verified that some ledgers are ok and don't need + * to be checked. + */ + FULL_CHECK_START, + /** + * The full check has completed. This can be an info event or an error event. + * The event is annotated with the number of ledgers which were checked and found + * to be ok, the number that were found to be missing and the number for which + * errors occurred during the check. The missing ledgers have been deleted on + * the cluster, so don't need to be processed again. If there is a non-zero of + * ledgers with errors, the whole event is an error. + + * An error for this event is non-fatal. Any ledgers which finished with error + * will be processed again the next time the full check runs. The full check + * continues retrying until there are no errors. + */ + FULL_CHECK_END, + /** + * An error occurred during the full check, but not while processing ledgers. + * This error could occur while flushing the ledger storage or clearing the + * full check flag. + */ + FULL_CHECK_ERROR, + + /** + * The full check will use cached metadata. + */ + USE_CACHED_METADATA, + /** + * The full check will read the metadata from the metadata store. + */ + REFRESH_METADATA, + + /** + * The NEEDS_INTEGRITY_CHECK will be cleared from the ledger storage. + * This signifies that the ledger storage contains everything it should + * and the full check does not need to be retried, even after reboot. + */ + CLEAR_INTEGCHECK_FLAG, + + /** + * An error occurred while clearing the limbo flag for a ledger. + * This is generally a disk error. This error is non-fatal and the operation + * will be tried again on the next full check. + */ + CLEAR_LIMBO_ERROR, + /** + * Recover a ledger that has been marked as in limbo. This runs the ledger + * recovery algorithm to find the last entry of the ledger and mark the ledger + * as closed. As the ledger is marked as in-limbo locally, the current bookie + * not take part in the recovery process apart from initializing it. + + * Once recovery completes successfully, the limbo flag can be cleared for the + * ledger. + */ + RECOVER_LIMBO_LEDGER, + /** + * The ledger has been deleted from the ledger metadata store, so we don't need + * to continue any processing on it. + */ + RECOVER_LIMBO_LEDGER_MISSING, + /** + * An error occurred during recovery. This could be due to not having enough + * bookies available to recover the ledger. + * The error is non-fatal. The recovery will be tried again on the next run of + * ledger recovery. + */ + RECOVER_LIMBO_LEDGER_ERROR, + /** + * An error occurred when trying to close the ledger handle of a recovered ledger. + * This shouldn't happen, as closing a recovered ledger should not involve any I/O. + * This error is non-fatal and the event is registered for informational purposes + * only. + */ + RECOVER_LIMBO_LEDGER_CLOSE_ERROR, + + /** + * Start checking whether the entries for a ledger exist locally, and copying them + * if they do not. + */ + LEDGER_CHECK_AND_COPY_START, + /** + * Checking and copying has completed for a ledger. If any entry failed to copy + * this is a warning event. The ledger will be retried on the next run of the full + * check. + * This event is annotated with the number of entries copied, the number of errors + * and the total number of bytes copied for the ledger. + */ + LEDGER_CHECK_AND_COPY_END +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/MetadataAsyncIterator.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/MetadataAsyncIterator.java new file mode 100644 index 00000000000..f942b6b069d --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/MetadataAsyncIterator.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.datainteg; + +import io.reactivex.rxjava3.core.Completable; +import io.reactivex.rxjava3.core.Flowable; +import io.reactivex.rxjava3.core.Scheduler; +import io.reactivex.rxjava3.disposables.Disposable; +import java.io.IOException; +import java.util.Iterator; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.function.BiFunction; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.versioning.Versioned; + +/** + * An rxjava ledger metadata iterator. + */ +@Slf4j +public class MetadataAsyncIterator { + private final Scheduler scheduler; + private final LedgerManager ledgerManager; + private final long zkTimeoutMs; + private final int maxInFlight; + + MetadataAsyncIterator(Scheduler scheduler, + LedgerManager ledgerManager, int maxInFlight, + int zkTimeout, TimeUnit zkTimeoutUnit) { + this.scheduler = scheduler; + this.ledgerManager = ledgerManager; + this.maxInFlight = maxInFlight; + this.zkTimeoutMs = zkTimeoutUnit.toMillis(zkTimeout); + } + + + private static class FlatIterator { + final LedgerManager.LedgerRangeIterator ranges; + Iterator range = null; + FlatIterator(LedgerManager.LedgerRangeIterator ranges) { + this.ranges = ranges; + } + boolean hasNext() throws IOException { + if (range == null || !range.hasNext()) { + if (ranges.hasNext()) { + range = ranges.next().getLedgers().iterator(); + } + } + return range != null && range.hasNext(); + } + Long next() throws IOException { + return range.next(); + } + } + + public CompletableFuture forEach(BiFunction> consumer) { + CompletableFuture promise = new CompletableFuture<>(); + final Disposable disposable = Flowable.generate( + () -> new FlatIterator(ledgerManager.getLedgerRanges(zkTimeoutMs)), + (iter, emitter) -> { + try { + if (iter.hasNext()) { + emitter.onNext(iter.next()); + } else { + emitter.onComplete(); + } + } catch (Exception e) { + emitter.onError(e); + } + }) + .subscribeOn(scheduler) + .flatMapCompletable((ledgerId) -> Completable.fromCompletionStage(processOne(ledgerId, consumer)), + false /* delayErrors */, + maxInFlight) + .subscribe(() -> promise.complete(null), + t -> promise.completeExceptionally(unwrap(t))); + promise.whenComplete((result, ex) -> disposable.dispose()); + return promise; + } + + private CompletableFuture processOne(long ledgerId, + BiFunction> consumer) { + return ledgerManager.readLedgerMetadata(ledgerId) + .thenApply(Versioned::getValue) + .thenCompose((metadata) -> consumer.apply(ledgerId, metadata)) + .exceptionally((e) -> { + Throwable realException = unwrap(e); + log.warn("Got exception processing ledger {}", ledgerId, realException); + if (realException instanceof BKException.BKNoSuchLedgerExistsOnMetadataServerException) { + return null; + } else { + throw new CompletionException(realException); + } + }); + } + + static Throwable unwrap(Throwable e) { + if (e instanceof CompletionException || e instanceof ExecutionException) { + return unwrap(e.getCause()); + } + return e; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/WriteSets.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/WriteSets.java new file mode 100644 index 00000000000..ff9274d982e --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/WriteSets.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.datainteg; + +import static com.google.common.base.Preconditions.checkState; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Pregenerate the write sets. RoundRobinDistributionSchedule should really be doing this also. + */ +class WriteSets { + private static final Logger log = LoggerFactory.getLogger(WriteSets.class); + private final int ensembleSize; + private final ImmutableList> sets; + + WriteSets(List preferredOrder, + int ensembleSize, + int writeQuorumSize) { + this.ensembleSize = ensembleSize; + + ImmutableList.Builder> builder = + new ImmutableList.Builder>(); + for (int i = 0; i < ensembleSize; i++) { + builder.add(generateWriteSet(preferredOrder, ensembleSize, writeQuorumSize, i)); + } + sets = builder.build(); + } + + WriteSets(int ensembleSize, int writeQuorumSize) { + this(IntStream.range(0, ensembleSize).boxed().collect(Collectors.toList()), + ensembleSize, writeQuorumSize); + } + + ImmutableList getForEntry(long entryId) { + return sets.get((int) (entryId % ensembleSize)); + } + + static ImmutableList generateWriteSet(List preferredOrder, + int ensembleSize, + int writeQuorumSize, + int offset) { + ImmutableList.Builder builder = + new ImmutableList.Builder (); + int firstIndex = offset; + int lastIndex = (offset + writeQuorumSize - 1) % ensembleSize; + for (Integer i : preferredOrder) { + if (firstIndex <= lastIndex + && i >= firstIndex + && i <= lastIndex) { + builder.add(i); + } else if (lastIndex < firstIndex + && (i <= lastIndex + || i >= firstIndex)) { + builder.add(i); + } + } + ImmutableList writeSet = builder.build(); + + // writeSet may be one smaller than the configured write + // set size if we are excluding ourself + checkState(writeSet.size() == writeQuorumSize + || (writeSet.size() == writeQuorumSize - 1)); + return writeSet; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/package-info.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/package-info.java new file mode 100644 index 00000000000..ff6ab25425d --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/datainteg/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * Package of the classes for defining bookie stats. + */ +package org.apache.bookkeeper.bookie.datainteg; \ No newline at end of file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/BookieStats.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/BookieStats.java new file mode 100644 index 00000000000..d30d94471fa --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/BookieStats.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.stats; + +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.ADD_ENTRY; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_ADD_ENTRY; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_ADD_ENTRY_BYTES; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_FORCE_LEDGER; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_GET_LIST_OF_ENTRIES_OF_LEDGER; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_READ_ENTRY; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_READ_ENTRY_BYTES; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_RECOVERY_ADD_ENTRY; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_SCOPE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.CATEGORY_SERVER; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.GET_LIST_OF_ENTRIES_OF_LEDGER; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_DIRS; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_QUEUE_MAX_SIZE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_BYTES; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.WRITE_BYTES; + +import lombok.Getter; +import org.apache.bookkeeper.stats.Counter; +import org.apache.bookkeeper.stats.Gauge; +import org.apache.bookkeeper.stats.OpStatsLogger; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.annotations.StatsDoc; + +/** + * A umbrella class for bookie related stats. + */ +@StatsDoc( + name = BOOKIE_SCOPE, + category = CATEGORY_SERVER, + help = "Bookie related stats" +) +@Getter +public class BookieStats { + + // Expose Stats + final StatsLogger statsLogger; + @StatsDoc(name = WRITE_BYTES, help = "total bytes written to a bookie") + private final Counter writeBytes; + @StatsDoc(name = READ_BYTES, help = "total bytes read from a bookie") + private final Counter readBytes; + @StatsDoc(name = BOOKIE_FORCE_LEDGER, help = "total force operations occurred on a bookie") + private final Counter forceLedgerOps; + // Bookie Operation Latency Stats + @StatsDoc( + name = BOOKIE_ADD_ENTRY, + help = "operations stats of AddEntry on a bookie", + parent = ADD_ENTRY + ) + private final OpStatsLogger addEntryStats; + @StatsDoc(name = BOOKIE_RECOVERY_ADD_ENTRY, help = "operation stats of RecoveryAddEntry on a bookie") + private final OpStatsLogger recoveryAddEntryStats; + @StatsDoc( + name = BOOKIE_READ_ENTRY, + help = "operation stats of ReadEntry on a bookie", + parent = READ_ENTRY + ) + private final OpStatsLogger readEntryStats; + @StatsDoc( + name = BOOKIE_GET_LIST_OF_ENTRIES_OF_LEDGER, + help = "operation stats of GetListOfEntriesOfLedger on a bookie", + parent = GET_LIST_OF_ENTRIES_OF_LEDGER + ) + private final OpStatsLogger getListOfEntriesOfLedgerStats; + // Bookie Operation Bytes Stats + @StatsDoc(name = BOOKIE_ADD_ENTRY_BYTES, help = "bytes stats of AddEntry on a bookie") + private final OpStatsLogger addBytesStats; + @StatsDoc(name = BOOKIE_READ_ENTRY_BYTES, help = "bytes stats of ReadEntry on a bookie") + private final OpStatsLogger readBytesStats; + @StatsDoc(name = JOURNAL_DIRS, help = "number of configured journal directories") + private final Gauge journalDirsGauge; + @StatsDoc(name = JOURNAL_QUEUE_MAX_SIZE, help = "maximum length of a journal queue") + private final Gauge journalQueueMaxQueueSizeGauge; + + public BookieStats(StatsLogger statsLogger, int numJournalDirs, int maxJournalQueueSize) { + this.statsLogger = statsLogger; + writeBytes = statsLogger.getCounter(WRITE_BYTES); + readBytes = statsLogger.getCounter(READ_BYTES); + forceLedgerOps = statsLogger.getCounter(BOOKIE_FORCE_LEDGER); + addEntryStats = statsLogger.getOpStatsLogger(BOOKIE_ADD_ENTRY); + recoveryAddEntryStats = statsLogger.getOpStatsLogger(BOOKIE_RECOVERY_ADD_ENTRY); + readEntryStats = statsLogger.getOpStatsLogger(BOOKIE_READ_ENTRY); + getListOfEntriesOfLedgerStats = statsLogger.getOpStatsLogger(BOOKIE_GET_LIST_OF_ENTRIES_OF_LEDGER); + addBytesStats = statsLogger.getOpStatsLogger(BOOKIE_ADD_ENTRY_BYTES); + readBytesStats = statsLogger.getOpStatsLogger(BOOKIE_READ_ENTRY_BYTES); + journalDirsGauge = new Gauge() { + @Override + public Integer getDefaultValue() { + return numJournalDirs; + } + + @Override + public Integer getSample() { + return numJournalDirs; + } + }; + statsLogger.registerGauge(JOURNAL_DIRS, journalDirsGauge); + journalQueueMaxQueueSizeGauge = new Gauge() { + @Override + public Integer getDefaultValue() { + return maxJournalQueueSize; + } + + @Override + public Integer getSample() { + return maxJournalQueueSize; + } + }; + statsLogger.registerGauge(JOURNAL_QUEUE_MAX_SIZE, journalQueueMaxQueueSizeGauge); + } + + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/EntryMemTableStats.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/EntryMemTableStats.java new file mode 100644 index 00000000000..35419ae9f3a --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/EntryMemTableStats.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.stats; + +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_ADD_ENTRY; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_READ_ENTRY; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_SCOPE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.CATEGORY_SERVER; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.SKIP_LIST_FLUSH_BYTES; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.SKIP_LIST_GET_ENTRY; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.SKIP_LIST_PUT_ENTRY; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.SKIP_LIST_SNAPSHOT; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.SKIP_LIST_THROTTLING; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.SKIP_LIST_THROTTLING_LATENCY; + +import lombok.Getter; +import org.apache.bookkeeper.stats.Counter; +import org.apache.bookkeeper.stats.OpStatsLogger; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.annotations.StatsDoc; + +/** + * A umbrella class for memtable related stats. + */ +@StatsDoc( + name = BOOKIE_SCOPE, + category = CATEGORY_SERVER, + help = "EntryMemTable related stats" +) +@Getter +public class EntryMemTableStats { + + @StatsDoc( + name = SKIP_LIST_SNAPSHOT, + help = "operation stats of taking memtable snapshots" + ) + private final OpStatsLogger snapshotStats; + @StatsDoc( + name = SKIP_LIST_PUT_ENTRY, + help = "operation stats of putting entries to memtable", + parent = BOOKIE_ADD_ENTRY + ) + private final OpStatsLogger putEntryStats; + @StatsDoc( + name = SKIP_LIST_GET_ENTRY, + help = "operation stats of getting entries from memtable", + parent = BOOKIE_READ_ENTRY + ) + private final OpStatsLogger getEntryStats; + @StatsDoc( + name = SKIP_LIST_FLUSH_BYTES, + help = "The number of bytes flushed from memtable to entry log files" + ) + private final Counter flushBytesCounter; + @StatsDoc( + name = SKIP_LIST_THROTTLING, + help = "The number of requests throttled due to memtables are full" + ) + private final Counter throttlingCounter; + @StatsDoc( + name = SKIP_LIST_THROTTLING_LATENCY, + help = "The distribution of request throttled duration" + ) + private final OpStatsLogger throttlingStats; + + public EntryMemTableStats(StatsLogger statsLogger) { + this.snapshotStats = statsLogger.getOpStatsLogger(SKIP_LIST_SNAPSHOT); + this.putEntryStats = statsLogger.getOpStatsLogger(SKIP_LIST_PUT_ENTRY); + this.getEntryStats = statsLogger.getOpStatsLogger(SKIP_LIST_GET_ENTRY); + this.flushBytesCounter = statsLogger.getCounter(SKIP_LIST_FLUSH_BYTES); + this.throttlingCounter = statsLogger.getCounter(SKIP_LIST_THROTTLING); + this.throttlingStats = statsLogger.getOpStatsLogger(SKIP_LIST_THROTTLING_LATENCY); + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/GarbageCollectorStats.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/GarbageCollectorStats.java new file mode 100644 index 00000000000..f9f1e31feee --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/GarbageCollectorStats.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.stats; + +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.ACTIVE_ENTRY_LOG_COUNT; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.ACTIVE_ENTRY_LOG_SPACE_BYTES; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.ACTIVE_LEDGER_COUNT; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_SCOPE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.CATEGORY_SERVER; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.DELETED_LEDGER_COUNT; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.MAJOR_COMPACTION_COUNT; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.MINOR_COMPACTION_COUNT; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.RECLAIMED_COMPACTION_SPACE_BYTES; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.RECLAIMED_DELETION_SPACE_BYTES; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.RECLAIM_FAILED_TO_DELETE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.THREAD_RUNTIME; + +import java.util.function.Supplier; +import lombok.Getter; +import org.apache.bookkeeper.stats.Counter; +import org.apache.bookkeeper.stats.Gauge; +import org.apache.bookkeeper.stats.OpStatsLogger; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.annotations.StatsDoc; + +/** + * A umbrella class for gc stats. + */ +@StatsDoc( + name = BOOKIE_SCOPE, + category = CATEGORY_SERVER, + help = "Garbage Collector related stats" +) +@Getter +public class GarbageCollectorStats { + + final StatsLogger statsLogger; + @StatsDoc( + name = MINOR_COMPACTION_COUNT, + help = "Number of minor compactions" + ) + private final Counter minorCompactionCounter; + @StatsDoc( + name = MAJOR_COMPACTION_COUNT, + help = "Number of major compactions" + ) + private final Counter majorCompactionCounter; + @StatsDoc( + name = RECLAIMED_DELETION_SPACE_BYTES, + help = "Number of disk space bytes reclaimed via deleting entry log files" + ) + private final Counter reclaimedSpaceViaDeletes; + @StatsDoc( + name = RECLAIMED_COMPACTION_SPACE_BYTES, + help = "Number of disk space bytes reclaimed via compacting entry log files" + ) + private final Counter reclaimedSpaceViaCompaction; + @StatsDoc( + name = RECLAIM_FAILED_TO_DELETE, + help = "Number of reclaim failed counts when deleting entry log files" + ) + private final Counter reclaimFailedToDelete; + @StatsDoc( + name = DELETED_LEDGER_COUNT, + help = "Number of ledgers deleted by garbage collection" + ) + private final Counter deletedLedgerCounter; + @StatsDoc( + name = THREAD_RUNTIME, + help = "Operation stats of garbage collections" + ) + private final OpStatsLogger gcThreadRuntime; + @StatsDoc( + name = ACTIVE_ENTRY_LOG_COUNT, + help = "Current number of active entry log files" + ) + private final Gauge activeEntryLogCountGauge; + @StatsDoc( + name = ACTIVE_ENTRY_LOG_SPACE_BYTES, + help = "Current number of active entry log space bytes" + ) + private final Gauge activeEntryLogSpaceBytesGauge; + @StatsDoc( + name = ACTIVE_LEDGER_COUNT, + help = "Current number of active ledgers" + ) + private final Gauge activeLedgerCountGauge; + + public GarbageCollectorStats(StatsLogger statsLogger, + Supplier activeEntryLogCountSupplier, + Supplier activeEntryLogSpaceBytesSupplier, + Supplier activeLedgerCountSupplier) { + this.statsLogger = statsLogger; + + this.minorCompactionCounter = statsLogger.getCounter(MINOR_COMPACTION_COUNT); + this.majorCompactionCounter = statsLogger.getCounter(MAJOR_COMPACTION_COUNT); + this.reclaimedSpaceViaCompaction = statsLogger.getCounter(RECLAIMED_COMPACTION_SPACE_BYTES); + this.reclaimedSpaceViaDeletes = statsLogger.getCounter(RECLAIMED_DELETION_SPACE_BYTES); + this.reclaimFailedToDelete = statsLogger.getCounter(RECLAIM_FAILED_TO_DELETE); + this.gcThreadRuntime = statsLogger.getOpStatsLogger(THREAD_RUNTIME); + this.deletedLedgerCounter = statsLogger.getCounter(DELETED_LEDGER_COUNT); + + this.activeEntryLogCountGauge = new Gauge() { + @Override + public Integer getDefaultValue() { + return 0; + } + + @Override + public Integer getSample() { + return activeEntryLogCountSupplier.get(); + } + }; + statsLogger.registerGauge(ACTIVE_ENTRY_LOG_COUNT, activeEntryLogCountGauge); + this.activeEntryLogSpaceBytesGauge = new Gauge() { + @Override + public Long getDefaultValue() { + return 0L; + } + + @Override + public Long getSample() { + return activeEntryLogSpaceBytesSupplier.get(); + } + }; + statsLogger.registerGauge(ACTIVE_ENTRY_LOG_SPACE_BYTES, activeEntryLogSpaceBytesGauge); + this.activeLedgerCountGauge = new Gauge() { + @Override + public Integer getDefaultValue() { + return 0; + } + + @Override + public Integer getSample() { + return activeLedgerCountSupplier.get(); + } + }; + statsLogger.registerGauge(ACTIVE_LEDGER_COUNT, activeLedgerCountGauge); + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/IndexInMemPageMgrStats.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/IndexInMemPageMgrStats.java new file mode 100644 index 00000000000..7d2b1914a9d --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/IndexInMemPageMgrStats.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.stats; + +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_SCOPE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.CATEGORY_SERVER; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.INDEX_INMEM_ILLEGAL_STATE_DELETE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.INDEX_INMEM_ILLEGAL_STATE_RESET; + +import lombok.Getter; +import org.apache.bookkeeper.stats.Counter; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.annotations.StatsDoc; + +/** + * A umbrella class for {@link org.apache.bookkeeper.bookie.IndexInMemPageMgr} stats. + */ +@StatsDoc( + name = BOOKIE_SCOPE, + category = CATEGORY_SERVER, + help = "Index InMemPage Manager related stats" +) +@Getter +public class IndexInMemPageMgrStats { + + // Stats + @StatsDoc( + name = INDEX_INMEM_ILLEGAL_STATE_RESET, + help = "The number of index pages detected as in illegal state when resetting" + ) + private final Counter illegalStateResetCounter; + @StatsDoc( + name = INDEX_INMEM_ILLEGAL_STATE_DELETE, + help = "The number of index pages detected as in illegal state when deleting" + ) + private final Counter illegalStateDeleteCounter; + + public IndexInMemPageMgrStats(StatsLogger statsLogger) { + illegalStateResetCounter = statsLogger.getCounter(INDEX_INMEM_ILLEGAL_STATE_RESET); + illegalStateDeleteCounter = statsLogger.getCounter(INDEX_INMEM_ILLEGAL_STATE_DELETE); + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/IndexPersistenceMgrStats.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/IndexPersistenceMgrStats.java new file mode 100644 index 00000000000..3c06b5bc100 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/IndexPersistenceMgrStats.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.stats; + +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_SCOPE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.CATEGORY_SERVER; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LEDGER_CACHE_NUM_EVICTED_LEDGERS; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.PENDING_GET_FILE_INFO; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_FILE_INFO_CACHE_SIZE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.WRITE_FILE_INFO_CACHE_SIZE; + +import java.util.function.Supplier; +import lombok.Getter; +import org.apache.bookkeeper.stats.Counter; +import org.apache.bookkeeper.stats.Gauge; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.annotations.StatsDoc; + +/** + * A umbrella class for {@link org.apache.bookkeeper.bookie.IndexPersistenceMgr} stats. + */ +@StatsDoc( + name = BOOKIE_SCOPE, + category = CATEGORY_SERVER, + help = "Index Persistence Manager related stats" +) +@Getter +public class IndexPersistenceMgrStats { + + // Stats + @StatsDoc( + name = LEDGER_CACHE_NUM_EVICTED_LEDGERS, + help = "Number of ledgers evicted from ledger caches" + ) + private final Counter evictedLedgersCounter; + @StatsDoc( + name = PENDING_GET_FILE_INFO, + help = "Number of pending get-file-info requests" + ) + private final Counter pendingGetFileInfoCounter; + @StatsDoc( + name = WRITE_FILE_INFO_CACHE_SIZE, + help = "Current write file info cache size" + ) + private final Gauge writeFileInfoCacheSizeGauge; + @StatsDoc( + name = READ_FILE_INFO_CACHE_SIZE, + help = "Current read file info cache size" + ) + private final Gauge readFileInfoCacheSizeGauge; + + public IndexPersistenceMgrStats(StatsLogger statsLogger, + Supplier writeFileInfoCacheSizeSupplier, + Supplier readFileInfoCacheSizeSupplier) { + evictedLedgersCounter = statsLogger.getCounter(LEDGER_CACHE_NUM_EVICTED_LEDGERS); + pendingGetFileInfoCounter = statsLogger.getCounter(PENDING_GET_FILE_INFO); + writeFileInfoCacheSizeGauge = new Gauge() { + @Override + public Number getDefaultValue() { + return 0; + } + + @Override + public Number getSample() { + return writeFileInfoCacheSizeSupplier.get(); + } + }; + statsLogger.registerGauge(WRITE_FILE_INFO_CACHE_SIZE, writeFileInfoCacheSizeGauge); + readFileInfoCacheSizeGauge = new Gauge() { + @Override + public Number getDefaultValue() { + return 0; + } + + @Override + public Number getSample() { + return readFileInfoCacheSizeSupplier.get(); + } + }; + statsLogger.registerGauge(READ_FILE_INFO_CACHE_SIZE, readFileInfoCacheSizeGauge); + } + + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/JournalStats.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/JournalStats.java new file mode 100644 index 00000000000..d5f68e7267a --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/JournalStats.java @@ -0,0 +1,231 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.stats; + +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.ADD_ENTRY; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.CATEGORY_SERVER; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.FORCE_LEDGER; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_ADD_ENTRY; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_CREATION_LATENCY; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_FLUSH_LATENCY; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_FORCE_LEDGER; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_FORCE_WRITE_BATCH_BYTES; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_FORCE_WRITE_BATCH_ENTRIES; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_FORCE_WRITE_ENQUEUE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_FORCE_WRITE_GROUPING_COUNT; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_FORCE_WRITE_QUEUE_SIZE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_MEMORY_MAX; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_MEMORY_USED; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_NUM_FLUSH_EMPTY_QUEUE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_NUM_FLUSH_MAX_OUTSTANDING_BYTES; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_NUM_FLUSH_MAX_WAIT; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_PROCESS_TIME_LATENCY; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_QUEUE_LATENCY; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_QUEUE_SIZE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_SCOPE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_SYNC; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_WRITE_BYTES; + +import com.google.common.annotations.VisibleForTesting; +import java.util.function.Supplier; +import lombok.Getter; +import org.apache.bookkeeper.bookie.BookKeeperServerStats; +import org.apache.bookkeeper.stats.Counter; +import org.apache.bookkeeper.stats.Gauge; +import org.apache.bookkeeper.stats.OpStatsLogger; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.annotations.StatsDoc; + +/** + * A umbrella class for journal related stats. + */ +@StatsDoc( + name = JOURNAL_SCOPE, + category = CATEGORY_SERVER, + help = "Journal related stats" +) +@Getter +public class JournalStats { + + @StatsDoc( + name = JOURNAL_ADD_ENTRY, + help = "operation stats of recording addEntry requests in the journal", + parent = ADD_ENTRY + ) + private final OpStatsLogger journalAddEntryStats; + @StatsDoc( + name = JOURNAL_FORCE_LEDGER, + help = "operation stats of recording forceLedger requests in the journal", + parent = FORCE_LEDGER + ) + private final OpStatsLogger journalForceLedgerStats; + @StatsDoc( + name = JOURNAL_SYNC, + help = "operation stats of syncing data to journal disks", + parent = JOURNAL_ADD_ENTRY, + happensAfter = JOURNAL_FORCE_WRITE_ENQUEUE + ) + private final OpStatsLogger journalSyncStats; + @StatsDoc( + name = JOURNAL_FORCE_WRITE_ENQUEUE, + help = "operation stats of enqueueing force write requests to force write queue", + parent = JOURNAL_ADD_ENTRY, + happensAfter = JOURNAL_PROCESS_TIME_LATENCY + ) + private final OpStatsLogger fwEnqueueTimeStats; + @StatsDoc( + name = JOURNAL_CREATION_LATENCY, + help = "operation stats of creating journal files", + parent = JOURNAL_PROCESS_TIME_LATENCY + ) + private final OpStatsLogger journalCreationStats; + @StatsDoc( + name = JOURNAL_FLUSH_LATENCY, + help = "operation stats of flushing data from memory to filesystem (but not yet fsyncing to disks)", + parent = JOURNAL_PROCESS_TIME_LATENCY, + happensAfter = JOURNAL_CREATION_LATENCY + ) + private final OpStatsLogger journalFlushStats; + @StatsDoc( + name = JOURNAL_PROCESS_TIME_LATENCY, + help = "operation stats of processing requests in a journal (from dequeue an item to finish processing it)", + parent = JOURNAL_ADD_ENTRY, + happensAfter = JOURNAL_QUEUE_LATENCY + ) + private final OpStatsLogger journalProcessTimeStats; + @StatsDoc( + name = JOURNAL_QUEUE_LATENCY, + help = "operation stats of enqueuing requests to a journal", + parent = JOURNAL_ADD_ENTRY + ) + private final OpStatsLogger journalQueueStats; + @StatsDoc( + name = JOURNAL_FORCE_WRITE_GROUPING_COUNT, + help = "The distribution of number of force write requests grouped in a force write" + ) + private final OpStatsLogger forceWriteGroupingCountStats; + @StatsDoc( + name = JOURNAL_FORCE_WRITE_BATCH_ENTRIES, + help = "The distribution of number of entries grouped together into a force write request" + ) + private final OpStatsLogger forceWriteBatchEntriesStats; + @StatsDoc( + name = JOURNAL_FORCE_WRITE_BATCH_BYTES, + help = "The distribution of number of bytes grouped together into a force write request" + ) + private final OpStatsLogger forceWriteBatchBytesStats; + @StatsDoc( + name = JOURNAL_QUEUE_SIZE, + help = "The journal queue size" + ) + private final Counter journalQueueSize; + @StatsDoc( + name = JOURNAL_FORCE_WRITE_QUEUE_SIZE, + help = "The force write queue size" + ) + private final Counter forceWriteQueueSize; + + @StatsDoc( + name = JOURNAL_NUM_FLUSH_MAX_WAIT, + help = "The number of journal flushes triggered by MAX_WAIT time" + ) + private final Counter flushMaxWaitCounter; + @StatsDoc( + name = JOURNAL_NUM_FLUSH_MAX_OUTSTANDING_BYTES, + help = "The number of journal flushes triggered by MAX_OUTSTANDING_BYTES" + ) + private Counter flushMaxOutstandingBytesCounter; + @StatsDoc( + name = JOURNAL_NUM_FLUSH_EMPTY_QUEUE, + help = "The number of journal flushes triggered when journal queue becomes empty" + ) + private final Counter flushEmptyQueueCounter; + @StatsDoc( + name = JOURNAL_WRITE_BYTES, + help = "The number of bytes appended to the journal" + ) + private final Counter journalWriteBytes; + @StatsDoc( + name = JOURNAL_MEMORY_MAX, + help = "The max amount of memory in bytes that can be used by the bookie journal" + ) + private final Gauge journalMemoryMaxStats; + @StatsDoc( + name = JOURNAL_MEMORY_USED, + help = "The actual amount of memory in bytes currently used by the bookie journal" + ) + private final Gauge journalMemoryUsedStats; + + public JournalStats(StatsLogger statsLogger, final long maxJournalMemoryBytes, + Supplier currentJournalMemoryBytes) { + journalAddEntryStats = statsLogger.getOpStatsLogger(BookKeeperServerStats.JOURNAL_ADD_ENTRY); + journalForceLedgerStats = statsLogger.getOpStatsLogger(BookKeeperServerStats.JOURNAL_FORCE_LEDGER); + journalSyncStats = statsLogger.getOpStatsLogger(BookKeeperServerStats.JOURNAL_SYNC); + fwEnqueueTimeStats = statsLogger.getOpStatsLogger(BookKeeperServerStats.JOURNAL_FORCE_WRITE_ENQUEUE); + journalCreationStats = statsLogger.getOpStatsLogger(BookKeeperServerStats.JOURNAL_CREATION_LATENCY); + journalFlushStats = statsLogger.getOpStatsLogger(BookKeeperServerStats.JOURNAL_FLUSH_LATENCY); + journalQueueStats = statsLogger.getOpStatsLogger(BookKeeperServerStats.JOURNAL_QUEUE_LATENCY); + journalProcessTimeStats = statsLogger.getOpStatsLogger(BookKeeperServerStats.JOURNAL_PROCESS_TIME_LATENCY); + forceWriteGroupingCountStats = + statsLogger.getOpStatsLogger(BookKeeperServerStats.JOURNAL_FORCE_WRITE_GROUPING_COUNT); + forceWriteBatchEntriesStats = + statsLogger.getOpStatsLogger(BookKeeperServerStats.JOURNAL_FORCE_WRITE_BATCH_ENTRIES); + forceWriteBatchBytesStats = statsLogger.getOpStatsLogger(BookKeeperServerStats.JOURNAL_FORCE_WRITE_BATCH_BYTES); + journalQueueSize = statsLogger.getCounter(BookKeeperServerStats.JOURNAL_QUEUE_SIZE); + forceWriteQueueSize = statsLogger.getCounter(BookKeeperServerStats.JOURNAL_FORCE_WRITE_QUEUE_SIZE); + flushMaxWaitCounter = statsLogger.getCounter(BookKeeperServerStats.JOURNAL_NUM_FLUSH_MAX_WAIT); + flushMaxOutstandingBytesCounter = + statsLogger.getCounter(BookKeeperServerStats.JOURNAL_NUM_FLUSH_MAX_OUTSTANDING_BYTES); + flushEmptyQueueCounter = statsLogger.getCounter(BookKeeperServerStats.JOURNAL_NUM_FLUSH_EMPTY_QUEUE); + journalWriteBytes = statsLogger.getCounter(BookKeeperServerStats.JOURNAL_WRITE_BYTES); + + journalMemoryMaxStats = new Gauge() { + @Override + public Long getDefaultValue() { + return maxJournalMemoryBytes; + } + + @Override + public Long getSample() { + return maxJournalMemoryBytes; + } + }; + statsLogger.registerGauge(JOURNAL_MEMORY_MAX, journalMemoryMaxStats); + + journalMemoryUsedStats = new Gauge() { + @Override + public Long getDefaultValue() { + return -1L; + } + + @Override + public Long getSample() { + return currentJournalMemoryBytes.get(); + } + }; + statsLogger.registerGauge(JOURNAL_MEMORY_USED, journalMemoryUsedStats); + } + + @VisibleForTesting + public void setFlushMaxOutstandingBytesCounter(Counter flushMaxOutstandingBytesCounter) { + this.flushMaxOutstandingBytesCounter = flushMaxOutstandingBytesCounter; + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/package-info.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/package-info.java new file mode 100644 index 00000000000..99261760435 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * Package of the classes for defining bookie stats. + */ +package org.apache.bookkeeper.bookie.stats; \ No newline at end of file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/CompactionEntryLog.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/CompactionEntryLog.java new file mode 100644 index 00000000000..f018d9c2b96 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/CompactionEntryLog.java @@ -0,0 +1,90 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage; + +import io.netty.buffer.ByteBuf; +import java.io.IOException; + +/** + * An entrylog to received compacted entries. + *

+ * The expected lifecycle for a compaction entry log is: + * 1. Creation + * 2. Mark compacted + * 3. Make available + * 4. Cleanup + *

+ * Abort can happen at during any step. + */ +public interface CompactionEntryLog { + /** + * Add an entry to the log. + * @param ledgerId the ledger the entry belong to + * @param entry the payload of the entry + * @return the position to which the entry was written + */ + long addEntry(long ledgerId, ByteBuf entry) throws IOException; + + /** + * Scan the entry log, reading out all contained entries. + */ + void scan(EntryLogScanner scanner) throws IOException; + + /** + * Flush any unwritten entries to physical storage. + */ + void flush() throws IOException; + + /** + * Abort the compaction log. This should delete any resources held + * by this log. + */ + void abort(); + + /** + * Mark the compaction log as compacted. + * From this point, the heavy work of copying entries from one log + * to another should be done. We don't want to repeat that work, + * so this method should take steps to ensure that if the bookie crashes + * we can resume the compaction from this point. + */ + void markCompacted() throws IOException; + + /** + * Make the log written by the compaction process available for reads. + */ + void makeAvailable() throws IOException; + + /** + * Clean up any temporary resources that were used by the compaction process. + */ + void finalizeAndCleanup(); + + /** + * Get the log ID of the entrylog to which compacted entries are being written. + */ + long getDstLogId(); + + /** + * Get the log ID of the entrylog which is being compacted. + */ + long getSrcLogId(); +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/EntryLogIds.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/EntryLogIds.java new file mode 100644 index 00000000000..463cbc00558 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/EntryLogIds.java @@ -0,0 +1,33 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage; + +import java.io.IOException; + +/** + * Generate unique entry log ids. + */ +public interface EntryLogIds { + /** + * Get the next available entry log ID. + */ + int nextId() throws IOException; +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/EntryLogScanner.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/EntryLogScanner.java new file mode 100644 index 00000000000..9305b6bef04 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/EntryLogScanner.java @@ -0,0 +1,52 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage; + +import io.netty.buffer.ByteBuf; +import java.io.IOException; + +/** + * Scan entries in a entry log file. + */ +public interface EntryLogScanner { + /** + * Tests whether or not the entries belongs to the specified ledger + * should be processed. + * + * @param ledgerId + * Ledger ID. + * @return true if and only the entries of the ledger should be scanned. + */ + boolean accept(long ledgerId); + + /** + * Process an entry. + * + * @param ledgerId + * Ledger ID. + * @param offset + * File offset of this entry. + * @param entry + * Entry ByteBuf + * @throws IOException + */ + void process(long ledgerId, long offset, ByteBuf entry) throws IOException; +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/EntryLogger.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/EntryLogger.java new file mode 100644 index 00000000000..c8d127c96ba --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/EntryLogger.java @@ -0,0 +1,132 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage; + +import io.netty.buffer.ByteBuf; +import java.io.IOException; +import java.util.Collection; +import org.apache.bookkeeper.bookie.AbstractLogCompactor; +import org.apache.bookkeeper.bookie.Bookie.NoEntryException; +import org.apache.bookkeeper.bookie.EntryLogMetadata; + + +/** + * Entry logger. Sequentially writes entries for a large number of ledgers to + * a small number of log files, to avoid many random writes. + * When an entry is added, a location is returned, which consists of the ID of the + * log into which the entry was added, and the offset of that entry within the log. + * The location is a long, with 32 bits each for the log ID and the offset. This + * naturally limits the offset and thus the size of the log to Integer.MAX_VALUE. + */ +public interface EntryLogger extends AutoCloseable { + long UNASSIGNED_LEDGERID = -1L; + // log file suffix + String LOG_FILE_SUFFIX = ".log"; + + /** + * Add an entry for ledger ```ledgerId``` to the entrylog. + * @param ledgerId the ledger for which the entry is being added + * @param buf the contents of the entry (this method does not take ownership of the refcount) + * @return the location in the entry log of the added entry + */ + long addEntry(long ledgerId, ByteBuf buf) throws IOException; + + /** + * Read an entry from an entrylog location. + * @param entryLocation the location from which to read the entry + * @return the entry + */ + ByteBuf readEntry(long entryLocation) + throws IOException, NoEntryException; + /** + * Read an entry from an entrylog location, and verify that is matches the + * expected ledger and entry ID. + * @param ledgerId the ledgerID to match + * @param entryId the entryID to match + * @param entryLocation the location from which to read the entry + * @return the entry + */ + ByteBuf readEntry(long ledgerId, long entryId, long entryLocation) + throws IOException, NoEntryException; + + /** + * Flush any outstanding writes to disk. + */ + void flush() throws IOException; + + @Override + void close() throws IOException; + + /** + * Create a new entrylog into which compacted entries can be added. + * There is a 1-1 mapping between logs that are being compacted + * and the log the compacted entries are written to. + */ + CompactionEntryLog newCompactionLog(long logToCompact) throws IOException; + + /** + * Return a collection of all the compaction entry logs which have been + * compacted, but have not been cleaned up. + */ + Collection incompleteCompactionLogs(); + + /** + * Get the log ids for the set of logs which have been completely flushed to + * disk. + * Only log ids in this set are considered for either compaction or garbage + * collection. + */ + Collection getFlushedLogIds(); + + /** + * Scan the given entrylog, returning all entries contained therein. + */ + void scanEntryLog(long entryLogId, EntryLogScanner scanner) throws IOException; + + /** + * Retrieve metadata for the given entrylog ID. + * The metadata contains the size of the log, the size of the data in the log which is still + * active, and a list of all the ledgers contained in the log and the size of the data stored + * for each ledger. + */ + default EntryLogMetadata getEntryLogMetadata(long entryLogId) throws IOException { + return getEntryLogMetadata(entryLogId, null); + } + + /** + * Retrieve metadata for the given entrylog ID. + * The metadata contains the size of the log, the size of the data in the log which is still + * active, and a list of all the ledgers contained in the log and the size of the data stored + * for each ledger. + */ + EntryLogMetadata getEntryLogMetadata(long entryLogId, AbstractLogCompactor.Throttler throttler) throws IOException; + + /** + * Check whether an entrylog with the given ID exists. + */ + boolean logExists(long logId); + + /** + * Delete the entrylog with the given ID. + * @return false if the entrylog doesn't exist. + */ + boolean removeEntryLog(long entryLogId); +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/Buffer.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/Buffer.java new file mode 100644 index 00000000000..2fa0866c890 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/Buffer.java @@ -0,0 +1,269 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.directentrylogger; + +import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.bookkeeper.common.util.ExceptionMessageHelper.exMsg; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.util.ReferenceCountUtil; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.Arrays; +import org.apache.bookkeeper.common.util.nativeio.NativeIO; + +/** + * A utility buffer class to be used with native calls. + *

+ * Buffers are page aligned (4k pages). + *

+ * The wrapper mostly handles writes between ByteBuffers and + * ByteBufs. It also provides a method for padding the buffer to the next + * alignment, so writes can have an aligned size also (as required by + * direct I/O). The padding is done with 0xF0, so that if it is read as + * an integer, or long, the value will be negative (assuming the read is + * a java read, and thus a signed int). + */ +class Buffer { + /* Padding byte must have MSB set, so if read at the start + * of an integer or long, the returned value is negative. */ + public static final byte PADDING_BYTE = (byte) 0xF0; + + /* Some machines can live with 512 alignment, but others + * appear to require 4096, so go with 4096, which is page + * alignment */ + public static final int ALIGNMENT = 4096; + private static final int MAX_ALIGNMENT = Integer.MAX_VALUE & ~(ALIGNMENT - 1); + static final byte[] PADDING = generatePadding(); + + final NativeIO nativeIO; + final int bufferSize; + ByteBuf buffer; + ByteBuffer byteBuffer; + ByteBufAllocator allocator; + long pointer = 0; + + Buffer(NativeIO nativeIO, ByteBufAllocator allocator, int bufferSize) throws IOException { + checkArgument(isAligned(bufferSize), + "Buffer size not aligned %d", bufferSize); + + this.allocator = allocator; + this.buffer = allocateAligned(ALIGNMENT, bufferSize); + this.nativeIO = nativeIO; + this.bufferSize = bufferSize; + byteBuffer = buffer.nioBuffer(0, bufferSize); + byteBuffer.order(ByteOrder.BIG_ENDIAN); + } + + private ByteBuf allocateAligned(int alignment, int bufferSize) { + ByteBuf buf = allocator.directBuffer(bufferSize + alignment); + long addr = buf.memoryAddress(); + if ((addr & (alignment - 1)) == 0) { + // The address is already aligned + pointer = addr; + return buf.slice(0, bufferSize); + } else { + int alignOffset = (int) (alignment - (addr & (alignment - 1))); + pointer = addr + alignOffset; + return buf.slice(alignOffset, bufferSize); + } + } + + /** + * @return whether there is space in the buffer for size bytes. + */ + boolean hasSpace(int size) throws IOException { + if (size > bufferSize) { + throw new IOException(exMsg("Write too large").kv("writeSize", size) + .kv("maxSize", bufferSize).toString()); + } + return byteBuffer.remaining() >= size; + } + + /** + * @return whether the buffer can honour a read of size at offset. + */ + boolean hasData(int offset, int size) { + return offset + size <= bufferSize; + } + + /** + * Write an integer to buffer. Progresses the position of the buffer by 4 bytes. + */ + void writeInt(int value) throws IOException { + byteBuffer.putInt(value); + } + + /** + * Write a btebuf to this buffer. Progresses the position of the buffer by the + * number of readable bytes of the bytebuf. Progresses the readerIndex of the passed + * bytebuf by the number of bytes read (i.e. to the end). + */ + void writeByteBuf(ByteBuf bytebuf) throws IOException { + int bytesWritten = bytebuf.readableBytes(); + ByteBuffer bytesToPut = bytebuf.nioBuffer(); + byteBuffer.put(bytesToPut); + bytebuf.skipBytes(bytesWritten); + } + + /** + * Read an integer from the buffer at the given offset. The offset is in bytes. + */ + int readInt(int offset) throws IOException { + if (!hasData(offset, Integer.BYTES)) { + throw new IOException(exMsg("Buffer cannot satisfy int read") + .kv("offset", offset) + .kv("bufferSize", bufferSize).toString()); + } + try { + return byteBuffer.getInt(offset); + } catch (Exception e) { + throw new IOException(exMsg("Error reading int") + .kv("byteBuffer", byteBuffer.toString()) + .kv("offset", offset) + .kv("bufferSize", bufferSize).toString(), e); + } + } + + /** + * Read a long from the buffer at the given offset. The offset is in bytes. + */ + long readLong(int offset) throws IOException { + if (!hasData(offset, Long.BYTES)) { + throw new IOException(exMsg("Buffer cannot satisfy long read") + .kv("offset", offset) + .kv("bufferSize", bufferSize).toString()); + } + try { + return byteBuffer.getLong(offset); + } catch (Exception e) { + throw new IOException(exMsg("Error reading long") + .kv("byteBuffer", byteBuffer.toString()) + .kv("offset", offset) + .kv("bufferSize", bufferSize).toString(), e); + } + } + + /** + * Read a bytebuf of size from the buffer at the given offset. + * If there are not enough bytes in the buffer to satisfy the read, some of the bytes are read + * into the byte buffer and the number of bytes read is returned. + */ + int readByteBuf(ByteBuf buffer, int offset, int size) throws IOException { + int originalLimit = byteBuffer.limit(); + byteBuffer.position(offset); + int bytesToRead = Math.min(size, byteBuffer.capacity() - offset); + byteBuffer.limit(offset + bytesToRead); + try { + buffer.writeBytes(byteBuffer); + } catch (Exception e) { + throw new IOException(exMsg("Error reading buffer") + .kv("byteBuffer", byteBuffer.toString()) + .kv("offset", offset).kv("size", size) + .kv("bufferSize", bufferSize).toString(), e); + } finally { + byteBuffer.limit(originalLimit); + } + return bytesToRead; + } + + /** + * The data pointer object for the native buffer. This can be used + * by JNI method which take a char* or void*. + */ + long pointer() { + return pointer; + } + + long pointer(long offset, long expectedWrite) { + if (offset == 0) { + return pointer; + } else { + if (offset + expectedWrite > byteBuffer.capacity()) { + throw new IllegalArgumentException( + exMsg("Buffer overflow").kv("offset", offset).kv("expectedWrite", expectedWrite) + .kv("capacity", byteBuffer.capacity()).toString()); + } + + return pointer + offset; + } + } + /** + * @return the number of bytes which have been written to this buffer. + */ + int position() { + return byteBuffer.position(); + } + + /** + * @return the size of the buffer (i.e. the max number of bytes writable, or the max offset readable) + */ + int size() { + return bufferSize; + } + + /** + * Pad the buffer to the next alignment position. + * @return the position of the next alignment. This should be used as the size argument to make aligned writes. + */ + int padToAlignment() { + int bufferPos = byteBuffer.position(); + int nextAlignment = nextAlignment(bufferPos); + byteBuffer.put(PADDING, 0, nextAlignment - bufferPos); + return nextAlignment; + } + + /** + * Clear the bytes written. This doesn't actually destroy the data, but moves the position back to the start of + * the buffer. + */ + void reset() { + byteBuffer.clear(); + } + + /** + * Free the memory that backs this buffer. + */ + void free() { + ReferenceCountUtil.release(buffer); + buffer = null; + byteBuffer = null; + } + private static byte[] generatePadding() { + byte[] padding = new byte[ALIGNMENT]; + Arrays.fill(padding, (byte) PADDING_BYTE); + return padding; + } + + static boolean isAligned(long size) { + return size >= 0 && ((ALIGNMENT - 1) & size) == 0; + } + + static int nextAlignment(int pos) { + checkArgument(pos <= MAX_ALIGNMENT, + "position (0x%x) must be lower or equal to max alignment (0x%x)", + pos, MAX_ALIGNMENT); + checkArgument(pos >= 0, "position (0x%x) must be positive", pos); + return (pos + (ALIGNMENT - 1)) & ~(ALIGNMENT - 1); + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/BufferPool.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/BufferPool.java new file mode 100644 index 00000000000..3614c65c0f8 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/BufferPool.java @@ -0,0 +1,68 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.directentrylogger; + +import io.netty.buffer.ByteBufAllocator; +import java.io.IOException; +import java.util.concurrent.ArrayBlockingQueue; +import org.apache.bookkeeper.common.util.nativeio.NativeIO; + +/** + * BufferPool used to manage Buffers. + */ +public class BufferPool implements AutoCloseable { + private final ArrayBlockingQueue pool; + + BufferPool(NativeIO nativeIO, ByteBufAllocator allocator, int bufferSize, int maxPoolSize) throws IOException { + pool = new ArrayBlockingQueue<>(maxPoolSize); + for (int i = 0; i < maxPoolSize; i++) { + pool.add(new Buffer(nativeIO, allocator, bufferSize)); + } + } + + Buffer acquire() throws IOException { + try { + return pool.take(); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new IOException(ie); + } + } + + void release(Buffer buffer) { + buffer.reset(); + if (!pool.add(buffer)) { + buffer.free(); + } + } + + @Override + public void close() { + while (true) { + Buffer b = pool.poll(); + if (b == null) { + break; + } + + b.free(); + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/DirectCompactionEntryLog.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/DirectCompactionEntryLog.java new file mode 100644 index 00000000000..58ac98fec53 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/DirectCompactionEntryLog.java @@ -0,0 +1,283 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.directentrylogger; + +import static org.apache.bookkeeper.bookie.TransactionalEntryLogCompactor.COMPACTED_SUFFIX; +import static org.apache.bookkeeper.bookie.TransactionalEntryLogCompactor.COMPACTING_SUFFIX; +import static org.apache.bookkeeper.common.util.ExceptionMessageHelper.exMsg; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.util.concurrent.ExecutorService; +import org.apache.bookkeeper.bookie.EntryLogMetadata; +import org.apache.bookkeeper.bookie.storage.CompactionEntryLog; +import org.apache.bookkeeper.bookie.storage.EntryLogScanner; +import org.apache.bookkeeper.common.util.nativeio.NativeIO; +import org.apache.bookkeeper.slogger.Slogger; +import org.apache.bookkeeper.stats.OpStatsLogger; + +/** + * DirectCompactionEntryLog. + */ +public abstract class DirectCompactionEntryLog implements CompactionEntryLog { + protected final int srcLogId; + protected final int dstLogId; + protected final Slogger slog; + + protected final File compactingFile; + protected final File compactedFile; + protected final File completeFile; + + static CompactionEntryLog newLog(int srcLogId, + int dstLogId, + File ledgerDir, + long maxFileSize, + ExecutorService writeExecutor, + BufferPool writeBuffers, + NativeIO nativeIO, + ByteBufAllocator allocator, + Slogger slog) throws IOException { + return new WritingDirectCompactionEntryLog( + srcLogId, dstLogId, ledgerDir, maxFileSize, + writeExecutor, writeBuffers, nativeIO, allocator, slog); + } + + static CompactionEntryLog recoverLog(int srcLogId, + int dstLogId, + File ledgerDir, + int readBufferSize, + int maxSaneEntrySize, + NativeIO nativeIO, + ByteBufAllocator allocator, + OpStatsLogger readBlockStats, + Slogger slog) { + return new RecoveredDirectCompactionEntryLog(srcLogId, dstLogId, ledgerDir, readBufferSize, + maxSaneEntrySize, nativeIO, allocator, readBlockStats, slog); + } + + private DirectCompactionEntryLog(int srcLogId, + int dstLogId, + File ledgerDir, + Slogger slog) { + compactingFile = compactingFile(ledgerDir, dstLogId); + compactedFile = compactedFile(ledgerDir, dstLogId, srcLogId); + completeFile = DirectEntryLogger.logFile(ledgerDir, dstLogId); + + this.srcLogId = srcLogId; + this.dstLogId = dstLogId; + + this.slog = slog.kv("dstLogId", dstLogId).kv("srcLogId", srcLogId).ctx(DirectCompactionEntryLog.class); + } + + @Override + public void abort() { + try { + Files.deleteIfExists(compactingFile.toPath()); + } catch (IOException ioe) { + slog.kv("compactingFile", compactingFile).warn(Events.COMPACTION_ABORT_EXCEPTION, ioe); + } + + try { + Files.deleteIfExists(compactedFile.toPath()); + } catch (IOException ioe) { + slog.kv("compactedFile", compactedFile).warn(Events.COMPACTION_ABORT_EXCEPTION, ioe); + } + } + + + @Override + public void makeAvailable() throws IOException { + idempotentLink(compactedFile, completeFile); + slog.kv("compactedFile", compactedFile).kv("completeFile", completeFile) + .info(Events.COMPACTION_MAKE_AVAILABLE); + } + + private static void idempotentLink(File src, File dst) throws IOException { + if (!src.exists()) { + throw new IOException(exMsg("src doesn't exist, aborting link") + .kv("src", src).kv("dst", dst).toString()); + } + if (!dst.exists()) { + Files.createLink(dst.toPath(), src.toPath()); + } else if (!Files.isSameFile(src.toPath(), dst.toPath())) { + throw new IOException(exMsg("dst exists, but doesn't match src") + .kv("src", src) + .kv("dst", dst).toString()); + } // else src and dst point to the same inode so we have nothing to do + } + + @Override + public void finalizeAndCleanup() { + try { + Files.deleteIfExists(compactingFile.toPath()); + } catch (IOException ioe) { + slog.kv("compactingFile", compactingFile).warn(Events.COMPACTION_DELETE_FAILURE, ioe); + } + + try { + Files.deleteIfExists(compactedFile.toPath()); + } catch (IOException ioe) { + slog.kv("compactedFile", compactedFile).warn(Events.COMPACTION_DELETE_FAILURE, ioe); + } + slog.info(Events.COMPACTION_COMPLETE); + } + + @Override + public long getDstLogId() { + return dstLogId; + } + + @Override + public long getSrcLogId() { + return srcLogId; + } + + private static class RecoveredDirectCompactionEntryLog extends DirectCompactionEntryLog { + private final ByteBufAllocator allocator; + private final NativeIO nativeIO; + private final int readBufferSize; + private final int maxSaneEntrySize; + private final OpStatsLogger readBlockStats; + + RecoveredDirectCompactionEntryLog(int srcLogId, + int dstLogId, + File ledgerDir, + int readBufferSize, + int maxSaneEntrySize, + NativeIO nativeIO, + ByteBufAllocator allocator, + OpStatsLogger readBlockStats, + Slogger slog) { + super(srcLogId, dstLogId, ledgerDir, slog); + this.allocator = allocator; + this.nativeIO = nativeIO; + this.readBufferSize = readBufferSize; + this.maxSaneEntrySize = maxSaneEntrySize; + this.readBlockStats = readBlockStats; + + this.slog.info(Events.COMPACTION_LOG_RECOVERED); + } + + private IllegalStateException illegalOpException() { + return new IllegalStateException(exMsg("Invalid operation for recovered log") + .kv("srcLogId", srcLogId) + .kv("dstLogId", dstLogId) + .kv("compactingFile", compactingFile) + .kv("compactedFile", compactedFile) + .kv("completeFile", completeFile).toString()); + } + + @Override + public long addEntry(long ledgerId, ByteBuf entry) throws IOException { + throw illegalOpException(); + } + + @Override + public void flush() throws IOException { + throw illegalOpException(); + } + + @Override + public void markCompacted() throws IOException { + throw illegalOpException(); + } + + @Override + public void scan(EntryLogScanner scanner) throws IOException { + try (LogReader reader = new DirectReader(dstLogId, compactedFile.toString(), allocator, nativeIO, + readBufferSize, maxSaneEntrySize, readBlockStats)) { + LogReaderScan.scan(allocator, reader, scanner); + } + } + } + + private static class WritingDirectCompactionEntryLog extends DirectCompactionEntryLog { + private final WriterWithMetadata writer; + + WritingDirectCompactionEntryLog(int srcLogId, + int dstLogId, + File ledgerDir, + long maxFileSize, + ExecutorService writeExecutor, + BufferPool writeBuffers, + NativeIO nativeIO, + ByteBufAllocator allocator, + Slogger slog) throws IOException { + super(srcLogId, dstLogId, ledgerDir, slog); + + this.writer = new WriterWithMetadata( + new DirectWriter(dstLogId, compactingFile.toString(), maxFileSize, + writeExecutor, writeBuffers, nativeIO, slog), + new EntryLogMetadata(dstLogId), + allocator); + + this.slog.info(Events.COMPACTION_LOG_CREATED); + } + + @Override + public long addEntry(long ledgerId, ByteBuf entry) throws IOException { + return writer.addEntry(ledgerId, entry); + } + + @Override + public void flush() throws IOException { + writer.flush(); + } + + @Override + public void markCompacted() throws IOException { + writer.finalizeAndClose(); + + idempotentLink(compactingFile, compactedFile); + if (!compactingFile.delete()) { + slog.kv("compactingFile", compactingFile) + .kv("compactedFile", compactedFile) + .info(Events.COMPACTION_DELETE_FAILURE); + } else { + slog.kv("compactingFile", compactingFile) + .kv("compactedFile", compactedFile) + .info(Events.COMPACTION_MARK_COMPACTED); + } + } + + @Override + public void scan(EntryLogScanner scanner) throws IOException { + throw new IllegalStateException(exMsg("Scan only valid for recovered log") + .kv("srcLogId", srcLogId) + .kv("dstLogId", dstLogId) + .kv("compactingFile", compactingFile) + .kv("compactedFile", compactedFile) + .kv("completeFile", completeFile).toString()); + } + } + + public static File compactingFile(File directory, int logId) { + return new File(directory, String.format("%x%s", logId, COMPACTING_SUFFIX)); + } + + public static File compactedFile(File directory, int newLogId, int compactedLogId) { + return new File(directory, String.format("%x.log.%x%s", newLogId, + compactedLogId, COMPACTED_SUFFIX)); + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/DirectEntryLogger.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/DirectEntryLogger.java new file mode 100644 index 00000000000..035981514e9 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/DirectEntryLogger.java @@ -0,0 +1,513 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.directentrylogger; + +import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.bookkeeper.bookie.TransactionalEntryLogCompactor.COMPACTING_SUFFIX; +import static org.apache.bookkeeper.common.util.ExceptionMessageHelper.exMsg; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.RemovalListener; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.util.ReferenceCountUtil; +import java.io.EOFException; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.stream.Collectors; +import org.apache.bookkeeper.bookie.AbstractLogCompactor; +import org.apache.bookkeeper.bookie.Bookie.NoEntryException; +import org.apache.bookkeeper.bookie.EntryLogMetadata; +import org.apache.bookkeeper.bookie.storage.CompactionEntryLog; +import org.apache.bookkeeper.bookie.storage.EntryLogIds; +import org.apache.bookkeeper.bookie.storage.EntryLogScanner; +import org.apache.bookkeeper.bookie.storage.EntryLogger; +import org.apache.bookkeeper.common.util.nativeio.NativeIO; +import org.apache.bookkeeper.slogger.Slogger; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.util.LedgerDirUtil; + +/** + * DirectEntryLogger. + */ +public class DirectEntryLogger implements EntryLogger { + private final Slogger slog; + private final File ledgerDir; + private final EntryLogIds ids; + private final ExecutorService writeExecutor; + private final ExecutorService flushExecutor; + private final long maxFileSize; + private final DirectEntryLoggerStats stats; + private final ByteBufAllocator allocator; + private final BufferPool writeBuffers; + private final int readBufferSize; + private final int maxSaneEntrySize; + private final Set unflushedLogs; + + private WriterWithMetadata curWriter; + + private List> pendingFlushes; + private final NativeIO nativeIO; + private final List> allCaches = new CopyOnWriteArrayList<>(); + private final ThreadLocal> caches; + + private static final int NUMBER_OF_WRITE_BUFFERS = 8; + + public DirectEntryLogger(File ledgerDir, + EntryLogIds ids, + NativeIO nativeIO, + ByteBufAllocator allocator, + ExecutorService writeExecutor, + ExecutorService flushExecutor, + long maxFileSize, + int maxSaneEntrySize, + long totalWriteBufferSize, + long totalReadBufferSize, + int readBufferSize, + int numReadThreads, + int maxFdCacheTimeSeconds, + Slogger slogParent, + StatsLogger stats) throws IOException { + this.ledgerDir = ledgerDir; + this.flushExecutor = flushExecutor; + this.writeExecutor = writeExecutor; + this.pendingFlushes = new ArrayList<>(); + this.nativeIO = nativeIO; + this.unflushedLogs = ConcurrentHashMap.newKeySet(); + + this.maxFileSize = maxFileSize; + this.maxSaneEntrySize = maxSaneEntrySize; + this.readBufferSize = Buffer.nextAlignment(readBufferSize); + this.ids = ids; + this.slog = slogParent.kv("directory", ledgerDir).ctx(DirectEntryLogger.class); + + this.stats = new DirectEntryLoggerStats(stats); + + this.allocator = allocator; + + int singleWriteBufferSize = Buffer.nextAlignment((int) (totalWriteBufferSize / NUMBER_OF_WRITE_BUFFERS)); + this.writeBuffers = new BufferPool(nativeIO, allocator, singleWriteBufferSize, NUMBER_OF_WRITE_BUFFERS); + + // The total read buffer memory needs to get split across all the read threads, since the caches + // are thread-specific and we want to ensure we don't pass the total memory limit. + long perThreadBufferSize = totalReadBufferSize / numReadThreads; + + // if the amount of total read buffer size is too low, and/or the number of read threads is too high + // then the perThreadBufferSize can be lower than the readBufferSize causing immediate eviction of readers + // from the cache + if (perThreadBufferSize < readBufferSize) { + slog.kv("reason", "perThreadBufferSize lower than readBufferSize (causes immediate reader cache eviction)") + .kv("totalReadBufferSize", totalReadBufferSize) + .kv("totalNumReadThreads", numReadThreads) + .kv("readBufferSize", readBufferSize) + .kv("perThreadBufferSize", perThreadBufferSize) + .error(Events.ENTRYLOGGER_MISCONFIGURED); + } + + long maxCachedReadersPerThread = perThreadBufferSize / readBufferSize; + long maxCachedReaders = maxCachedReadersPerThread * numReadThreads; + + this.slog + .kv("maxFileSize", maxFileSize) + .kv("maxSaneEntrySize", maxSaneEntrySize) + .kv("totalWriteBufferSize", totalWriteBufferSize) + .kv("singleWriteBufferSize", singleWriteBufferSize) + .kv("totalReadBufferSize", totalReadBufferSize) + .kv("readBufferSize", readBufferSize) + .kv("perThreadBufferSize", perThreadBufferSize) + .kv("maxCachedReadersPerThread", maxCachedReadersPerThread) + .kv("maxCachedReaders", maxCachedReaders) + .info(Events.ENTRYLOGGER_CREATED); + + this.caches = ThreadLocal.withInitial(() -> { + RemovalListener rl = (notification) -> { + try { + notification.getValue().close(); + this.stats.getCloseReaderCounter().inc(); + } catch (IOException ioe) { + slog.kv("logID", notification.getKey()).error(Events.READER_CLOSE_ERROR); + } + }; + Cache cache = CacheBuilder.newBuilder() + .maximumWeight(perThreadBufferSize) + .weigher((key, value) -> readBufferSize) + .removalListener(rl) + .expireAfterAccess(maxFdCacheTimeSeconds, TimeUnit.SECONDS) + .concurrencyLevel(1) // important to avoid too aggressive eviction + .build(); + allCaches.add(cache); + return cache; + }); + } + + @Override + public long addEntry(long ledgerId, ByteBuf buf) throws IOException { + long start = System.nanoTime(); + + long offset; + synchronized (this) { + if (curWriter != null + && curWriter.shouldRoll(buf, maxFileSize)) { + // roll the log. asynchronously flush and close current log + flushAndCloseCurrent(); + curWriter = null; + } + if (curWriter == null) { + int newId = ids.nextId(); + curWriter = new WriterWithMetadata(newDirectWriter(newId), + new EntryLogMetadata(newId), + allocator); + slog.kv("newLogId", newId).info(Events.LOG_ROLL); + } + + offset = curWriter.addEntry(ledgerId, buf); + } + stats.getAddEntryStats().registerSuccessfulEvent(System.nanoTime() - start, TimeUnit.NANOSECONDS); + return offset; + } + + @Override + public ByteBuf readEntry(long entryLocation) + throws IOException, NoEntryException { + return internalReadEntry(-1L, -1L, entryLocation, false); + } + + @Override + public ByteBuf readEntry(long ledgerId, long entryId, long entryLocation) + throws IOException, NoEntryException { + return internalReadEntry(ledgerId, entryId, entryLocation, true); + } + + private LogReader getReader(int logId) throws IOException { + Cache cache = caches.get(); + try { + LogReader reader = cache.get(logId, () -> { + this.stats.getOpenReaderCounter().inc(); + return newDirectReader(logId); + }); + + // it is possible though unlikely, that the cache has already cleaned up this cache entry + // during the get operation. This is more likely to happen when there is great demand + // for many separate readers in a low memory environment. + if (reader.isClosed()) { + this.stats.getCachedReadersServedClosedCounter().inc(); + throw new IOException(exMsg("Cached reader already closed").kv("logId", logId).toString()); + } + + return reader; + } catch (ExecutionException ee) { + if (ee.getCause() instanceof IOException) { + throw (IOException) ee.getCause(); + } else { + throw new IOException(exMsg("Error loading reader in cache").kv("logId", logId).toString(), ee); + } + } + } + + private ByteBuf internalReadEntry(long ledgerId, long entryId, long location, boolean validateEntry) + throws IOException, NoEntryException { + int logId = (int) (location >> 32); + int pos = (int) (location & 0xFFFFFFFF); + + long start = System.nanoTime(); + LogReader reader = getReader(logId); + + try { + ByteBuf buf = reader.readEntryAt(pos); + if (validateEntry) { + long thisLedgerId = buf.getLong(0); + long thisEntryId = buf.getLong(8); + if (thisLedgerId != ledgerId + || thisEntryId != entryId) { + ReferenceCountUtil.release(buf); + throw new IOException( + exMsg("Bad location").kv("location", location) + .kv("expectedLedger", ledgerId).kv("expectedEntry", entryId) + .kv("foundLedger", thisLedgerId).kv("foundEntry", thisEntryId) + .toString()); + } + } + stats.getReadEntryStats().registerSuccessfulEvent(System.nanoTime() - start, TimeUnit.NANOSECONDS); + return buf; + } catch (EOFException eof) { + stats.getReadEntryStats().registerFailedEvent(System.nanoTime() - start, TimeUnit.NANOSECONDS); + throw new NoEntryException( + exMsg("Entry location doesn't exist").kv("location", location).toString(), + ledgerId, entryId); + } + } + + @Override + public void flush() throws IOException { + long start = System.nanoTime(); + Future currentFuture = flushCurrent(); + + List> outstandingFlushes; + synchronized (this) { + outstandingFlushes = this.pendingFlushes; + this.pendingFlushes = new ArrayList<>(); + } + outstandingFlushes.add(currentFuture); + + for (Future f: outstandingFlushes) { + try { + f.get(); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new IOException("Interrupted while flushing", ie); + } catch (ExecutionException ee) { + if (ee.getCause() instanceof IOException) { + throw (IOException) ee.getCause(); + } else { + throw new IOException("Exception flushing writer", ee); + } + } + } + stats.getFlushStats().registerSuccessfulEvent(System.nanoTime() - start, TimeUnit.NANOSECONDS); + } + + private Future flushCurrent() throws IOException { + WriterWithMetadata flushWriter; + synchronized (this) { + flushWriter = this.curWriter; + } + if (flushWriter != null) { + return flushExecutor.submit(() -> { + long start = System.nanoTime(); + try { + flushWriter.flush(); + stats.getWriterFlushStats().registerSuccessfulEvent( + System.nanoTime() - start, TimeUnit.NANOSECONDS); + } catch (Throwable t) { + stats.getWriterFlushStats().registerFailedEvent( + System.nanoTime() - start, TimeUnit.NANOSECONDS); + throw t; + } + return null; + }); + } else { + return CompletableFuture.completedFuture(null); + } + } + + private void flushAndCloseCurrent() throws IOException { + WriterWithMetadata flushWriter; + + CompletableFuture flushPromise = new CompletableFuture<>(); + synchronized (this) { + flushWriter = this.curWriter; + this.curWriter = null; + + pendingFlushes.add(flushPromise); + } + if (flushWriter != null) { + flushExecutor.execute(() -> { + long start = System.nanoTime(); + try { + flushWriter.finalizeAndClose(); + stats.getWriterFlushStats() + .registerSuccessfulEvent(System.nanoTime() - start, TimeUnit.NANOSECONDS); + unflushedLogs.remove(flushWriter.logId()); + flushPromise.complete(null); + } catch (Throwable t) { + stats.getWriterFlushStats() + .registerFailedEvent(System.nanoTime() - start, TimeUnit.NANOSECONDS); + flushPromise.completeExceptionally(t); + } + }); + } else { + flushPromise.complete(null); + } + } + + @Override + public void close() throws IOException { + flushAndCloseCurrent(); // appends metadata to current log + flush(); // wait for all outstanding flushes + + for (Cache c : allCaches) { + c.invalidateAll(); + } + + writeBuffers.close(); + } + + @Override + public Collection getFlushedLogIds() { + return LedgerDirUtil.logIdsInDirectory(ledgerDir).stream() + .filter(logId -> !unflushedLogs.contains(logId)) + .map(i -> Long.valueOf(i)) + .collect(Collectors.toList()); + } + + @Override + public boolean removeEntryLog(long entryLogId) { + checkArgument(entryLogId < Integer.MAX_VALUE, "Entry log id must be an int [%d]", entryLogId); + File file = logFile(ledgerDir, (int) entryLogId); + boolean result = file.delete(); + slog.kv("file", file).kv("logId", entryLogId).kv("result", result).info(Events.LOG_DELETED); + return result; + } + + @Override + public void scanEntryLog(long entryLogId, EntryLogScanner scanner) throws IOException { + checkArgument(entryLogId < Integer.MAX_VALUE, "Entry log id must be an int [%d]", entryLogId); + try (LogReader reader = newDirectReader((int) entryLogId)) { + LogReaderScan.scan(allocator, reader, scanner); + } + } + + @Override + public boolean logExists(long logId) { + checkArgument(logId < Integer.MAX_VALUE, "Entry log id must be an int [%d]", logId); + return logFile(ledgerDir, (int) logId).exists(); + } + + @Override + public EntryLogMetadata getEntryLogMetadata(long entryLogId, AbstractLogCompactor.Throttler throttler) + throws IOException { + try { + return readEntryLogIndex(entryLogId); + } catch (IOException e) { + slog.kv("entryLogId", entryLogId).kv("reason", e.getMessage()) + .info(Events.READ_METADATA_FALLBACK); + return scanEntryLogMetadata(entryLogId, throttler); + } + } + + @VisibleForTesting + EntryLogMetadata readEntryLogIndex(long logId) throws IOException { + checkArgument(logId < Integer.MAX_VALUE, "Entry log id must be an int [%d]", logId); + + try (LogReader reader = newDirectReader((int) logId)) { + return LogMetadata.read(reader); + } + } + + @VisibleForTesting + EntryLogMetadata scanEntryLogMetadata(long logId, AbstractLogCompactor.Throttler throttler) throws IOException { + final EntryLogMetadata meta = new EntryLogMetadata(logId); + + // Read through the entry log file and extract the entry log meta + scanEntryLog(logId, new EntryLogScanner() { + @Override + public void process(long ledgerId, long offset, ByteBuf entry) throws IOException { + // add new entry size of a ledger to entry log meta + if (throttler != null) { + throttler.acquire(entry.readableBytes()); + } + meta.addLedgerSize(ledgerId, entry.readableBytes() + Integer.BYTES); + } + + @Override + public boolean accept(long ledgerId) { + return ledgerId >= 0; + } + }); + return meta; + } + + @VisibleForTesting + LogReader newDirectReader(int logId) throws IOException { + return new DirectReader(logId, logFilename(ledgerDir, logId), + allocator, nativeIO, readBufferSize, + maxSaneEntrySize, stats.getReadBlockStats()); + } + + private LogWriter newDirectWriter(int newId) throws IOException { + unflushedLogs.add(newId); + LogWriter writer = new DirectWriter(newId, logFilename(ledgerDir, newId), maxFileSize, + writeExecutor, writeBuffers, nativeIO, slog); + ByteBuf buf = allocator.buffer(Buffer.ALIGNMENT); + try { + Header.writeEmptyHeader(buf); + writer.writeAt(0, buf); + writer.position(buf.capacity()); + } finally { + ReferenceCountUtil.release(buf); + } + return writer; + } + + public static File logFile(File directory, int logId) { + return new File(directory, Long.toHexString(logId) + LOG_FILE_SUFFIX); + } + + public static String logFilename(File directory, int logId) { + return logFile(directory, logId).toString(); + } + + @Override + public CompactionEntryLog newCompactionLog(long srcLogId) throws IOException { + int dstLogId = ids.nextId(); + return DirectCompactionEntryLog.newLog((int) srcLogId, dstLogId, ledgerDir, + maxFileSize, writeExecutor, writeBuffers, + nativeIO, allocator, slog); + } + + @Override + public Collection incompleteCompactionLogs() { + List logs = new ArrayList<>(); + + if (ledgerDir.exists() && ledgerDir.isDirectory()) { + File[] files = ledgerDir.listFiles(); + if (files != null && files.length > 0) { + for (File f : files) { + if (f.getName().endsWith(COMPACTING_SUFFIX)) { + try { + Files.deleteIfExists(f.toPath()); + } catch (IOException ioe) { + slog.kv("file", f).warn(Events.COMPACTION_DELETE_FAILURE); + } + } + + Matcher m = LedgerDirUtil.COMPACTED_FILE_PATTERN.matcher(f.getName()); + if (m.matches()) { + int dstLogId = Integer.parseUnsignedInt(m.group(1), 16); + int srcLogId = Integer.parseUnsignedInt(m.group(2), 16); + + logs.add(DirectCompactionEntryLog.recoverLog(srcLogId, dstLogId, ledgerDir, + readBufferSize, maxSaneEntrySize, + nativeIO, allocator, + stats.getReadBlockStats(), + slog)); + } + } + } + } + return logs; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/DirectEntryLoggerStats.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/DirectEntryLoggerStats.java new file mode 100644 index 00000000000..796051b1040 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/DirectEntryLoggerStats.java @@ -0,0 +1,177 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.directentrylogger; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_ADD_ENTRY; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_READ_ENTRY; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_SCOPE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.CATEGORY_SERVER; + +import org.apache.bookkeeper.stats.Counter; +import org.apache.bookkeeper.stats.OpStatsLogger; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.annotations.StatsDoc; + +@StatsDoc( + name = BOOKIE_SCOPE, + category = CATEGORY_SERVER, + help = "Direct entry logger stats" +) +class DirectEntryLoggerStats { + private static final String ADD_ENTRY = "entrylog-add-entry"; + private static final String READ_ENTRY = "entrylog-read-entry"; + private static final String FLUSH = "entrylog-flush"; + private static final String WRITER_FLUSH = "entrylog-writer-flush"; + private static final String READ_BLOCK = "entrylog-read-block"; + private static final String READER_OPEN = "entrylog-open-reader"; + private static final String READER_CLOSE = "entrylog-close-reader"; + private static final String CACHED_READER_SERVED_CLOSED = "entrylog-cached-reader-closed"; + + @StatsDoc( + name = ADD_ENTRY, + help = "Operation stats of adding entries to the entry log", + parent = BOOKIE_ADD_ENTRY + ) + private final OpStatsLogger addEntryStats; + + @StatsDoc( + name = READ_ENTRY, + help = "Operation stats of reading entries from the entry log", + parent = BOOKIE_READ_ENTRY + ) + private static ThreadLocal readEntryStats; + + @StatsDoc( + name = FLUSH, + help = "Stats for persisting outstanding entrylog writes to disk" + ) + private final OpStatsLogger flushStats; + + @StatsDoc( + name = WRITER_FLUSH, + help = "Stats for persisting outstanding entrylog writes for a single writer" + ) + private final OpStatsLogger writerFlushStats; + + @StatsDoc( + name = READ_BLOCK, + help = "Stats for reading blocks from disk" + ) + private static ThreadLocal readBlockStats; + + @StatsDoc( + name = READER_OPEN, + help = "Stats for reader open operations" + ) + private static ThreadLocal openReaderStats; + + @StatsDoc( + name = READER_CLOSE, + help = "Stats for reader close operations" + ) + private static ThreadLocal closeReaderStats; + + @StatsDoc( + name = CACHED_READER_SERVED_CLOSED, + help = "Stats for cached readers being served closed" + ) + private static ThreadLocal cachedReadersServedClosed; + + DirectEntryLoggerStats(StatsLogger stats) { + addEntryStats = stats.getOpStatsLogger(ADD_ENTRY); + + flushStats = stats.getOpStatsLogger(FLUSH); + writerFlushStats = stats.getOpStatsLogger(WRITER_FLUSH); + setStats(stats); + } + + private static synchronized void setStats(StatsLogger stats) { + readEntryStats = new ThreadLocal() { + @Override + public OpStatsLogger initialValue() { + return stats.scopeLabel("thread", String.valueOf(Thread.currentThread().getId())) + .getOpStatsLogger(READ_ENTRY); + } + }; + readBlockStats = new ThreadLocal() { + @Override + public OpStatsLogger initialValue() { + return stats.scopeLabel("thread", String.valueOf(Thread.currentThread().getId())) + .getOpStatsLogger(READ_BLOCK); + } + }; + + DirectEntryLoggerStats.openReaderStats = new ThreadLocal() { + @Override + public Counter initialValue() { + return stats.scopeLabel("thread", String.valueOf(Thread.currentThread().getId())) + .getCounter(READER_OPEN); + } + }; + + DirectEntryLoggerStats.closeReaderStats = new ThreadLocal() { + @Override + public Counter initialValue() { + return stats.scopeLabel("thread", String.valueOf(Thread.currentThread().getId())) + .getCounter(READER_CLOSE); + } + }; + + DirectEntryLoggerStats.cachedReadersServedClosed = new ThreadLocal() { + @Override + public Counter initialValue() { + return stats.scopeLabel("thread", String.valueOf(Thread.currentThread().getId())) + .getCounter(CACHED_READER_SERVED_CLOSED); + } + }; + } + + OpStatsLogger getAddEntryStats() { + return addEntryStats; + } + + OpStatsLogger getFlushStats() { + return flushStats; + } + + OpStatsLogger getWriterFlushStats() { + return writerFlushStats; + } + + OpStatsLogger getReadEntryStats() { + return readEntryStats.get(); + } + + OpStatsLogger getReadBlockStats() { + return readBlockStats.get(); + } + + Counter getOpenReaderCounter() { + return openReaderStats.get(); + } + + Counter getCloseReaderCounter() { + return closeReaderStats.get(); + } + + Counter getCachedReadersServedClosedCounter() { + return cachedReadersServedClosed.get(); + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/DirectReader.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/DirectReader.java new file mode 100644 index 00000000000..707bf307c05 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/DirectReader.java @@ -0,0 +1,339 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.directentrylogger; + +import static com.google.common.base.Preconditions.checkState; +import static org.apache.bookkeeper.common.util.ExceptionMessageHelper.exMsg; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.util.ReferenceCountUtil; +import java.io.EOFException; +import java.io.IOException; +import java.util.concurrent.TimeUnit; +import org.apache.bookkeeper.common.util.nativeio.NativeIO; +import org.apache.bookkeeper.common.util.nativeio.NativeIOException; +import org.apache.bookkeeper.stats.OpStatsLogger; + +class DirectReader implements LogReader { + private final ByteBufAllocator allocator; + private final NativeIO nativeIO; + private final Buffer nativeBuffer; + private final String filename; + private final int logId; + private final int fd; + private final int maxSaneEntrySize; + private final OpStatsLogger readBlockStats; + private long currentBlock = -1; + private long currentBlockEnd = -1; + private long maxOffset; + private boolean closed; + + DirectReader(int logId, String filename, ByteBufAllocator allocator, + NativeIO nativeIO, int bufferSize, + int maxSaneEntrySize, OpStatsLogger readBlockStats) throws IOException { + this.nativeIO = nativeIO; + this.allocator = allocator; + this.logId = logId; + this.filename = filename; + this.maxSaneEntrySize = maxSaneEntrySize; + this.readBlockStats = readBlockStats; + closed = false; + + try { + fd = nativeIO.open(filename, + NativeIO.O_RDONLY | NativeIO.O_DIRECT, + 00755); + checkState(fd >= 0, "Open should throw exception on negative return (%d)", fd); + } catch (NativeIOException ne) { + throw new IOException(exMsg(ne.getMessage()) + .kv("file", filename) + .kv("errno", ne.getErrno()).toString()); + } + refreshMaxOffset(); + nativeBuffer = new Buffer(nativeIO, allocator, bufferSize); + } + + @Override + public int logId() { + return logId; + } + + private void clearCache() { + synchronized (nativeBuffer) { + currentBlock = -1; + currentBlockEnd = -1; + } + } + + @Override + public ByteBuf readBufferAt(long offset, int size) throws IOException, EOFException { + ByteBuf buf = allocator.buffer(size); + try { + readIntoBufferAt(buf, offset, size); + } catch (IOException e) { + ReferenceCountUtil.release(buf); + throw e; + } + + return buf; + } + + @Override + public void readIntoBufferAt(ByteBuf buffer, long offset, int size) throws IOException, EOFException { + assertValidOffset(offset); + synchronized (nativeBuffer) { + while (size > 0) { + int bytesRead = readBytesIntoBuf(buffer, offset, size); + size -= bytesRead; + offset += bytesRead; + } + } + } + + @Override + public int readIntAt(long offset) throws IOException, EOFException { + assertValidOffset(offset); + synchronized (nativeBuffer) { + if (offset >= currentBlock && offset + Integer.BYTES <= currentBlockEnd) { // fast path + return nativeBuffer.readInt(offsetInBlock(offset)); + } else { // slow path + ByteBuf intBuf = readBufferAt(offset, Integer.BYTES); + try { + return intBuf.getInt(0); + } finally { + ReferenceCountUtil.release(intBuf); + } + } + } + } + + @Override + public long readLongAt(long offset) throws IOException, EOFException { + assertValidOffset(offset); + synchronized (nativeBuffer) { + if (offset >= currentBlock && offset + Long.BYTES <= currentBlockEnd) { // fast path + return nativeBuffer.readLong(offsetInBlock(offset)); + } else { // slow path + ByteBuf longBuf = readBufferAt(offset, Long.BYTES); + try { + return longBuf.getLong(0); + } finally { + ReferenceCountUtil.release(longBuf); + } + } + } + } + + private int readBytesIntoBuf(ByteBuf buf, long offset, int size) throws IOException, EOFException { + synchronized (nativeBuffer) { + if (offset < currentBlock || offset >= currentBlockEnd) { + readBlock(offset); + } + int offsetInBuffer = offsetInBlock(offset); + int sizeInBuffer = sizeInBlock(offset, size); + if (sizeInBuffer <= 0) { + throw new EOFException(exMsg("Not enough bytes available") + .kv("file", filename) + .kv("fileSize", maxOffset) + .kv("offset", offset) + .kv("size", size).toString()); + } + return nativeBuffer.readByteBuf(buf, offsetInBuffer, size); + } + } + + @Override + public ByteBuf readEntryAt(int offset) throws IOException, EOFException { + assertValidEntryOffset(offset); + int sizeOffset = offset - Integer.BYTES; + if (sizeOffset < 0) { + throw new IOException(exMsg("Invalid offset, buffer size missing") + .kv("file", filename) + .kv("offset", offset).toString()); + } + + int entrySize = readIntAt(sizeOffset); + if (entrySize == 0) { + // reading an entry with size 0 may mean reading from preallocated + // space. if we receive an offset in preallocated space, it may + // mean that a write has occurred and been flushed, but our view + // of that block is out of date. So clear the cache and let it be + // loaded again. + clearCache(); + entrySize = readIntAt(sizeOffset); + } + if (entrySize > maxSaneEntrySize || entrySize <= 0) { + throw new IOException(exMsg("Invalid entry size") + .kv("file", filename) + .kv("offset", offset) + .kv("maxSaneEntrySize", maxSaneEntrySize) + .kv("readEntrySize", entrySize).toString()); + } + return readBufferAt(offset, entrySize); + } + + void readBlock(long offset) throws IOException { + final int blockSize = nativeBuffer.size(); + assertValidBlockSize(blockSize); + final long blockStart = offset & ~(blockSize - 1); + + if (blockStart + blockSize > maxOffset) { + // Check if there's new data in the file + refreshMaxOffset(); + } + final long bytesAvailable = maxOffset > blockStart ? maxOffset - blockStart : 0; + final long startNs = System.nanoTime(); + + long bufferOffset = 0; + long bytesToRead = Math.min(blockSize, bytesAvailable); + long bytesOutstanding = bytesToRead; + long bytesRead = -1; + try { + while (true) { + long readSize = blockSize - bufferOffset; + long pointerWithOffset = nativeBuffer.pointer(bufferOffset, readSize); + bytesRead = nativeIO.pread(fd, pointerWithOffset, + readSize, + blockStart + bufferOffset); + // offsets and counts must be aligned, so ensure that if we + // get a short read, we don't throw off the alignment. For example + // if we're trying to read 12K and we only managed 100 bytes, + // we don't progress the offset or outstanding at all. However, if we + // read 4196 bytes, we can progress the offset by 4KB and the outstanding + // bytes will then be 100. + // the only non-short read that isn't aligned is the bytes at the end of + // of the file, which is why we don't align before we check if we should + // exit the loop + if ((bytesOutstanding - bytesRead) <= 0) { + break; + } + bytesOutstanding -= bytesRead & Buffer.ALIGNMENT; + bufferOffset += bytesRead & Buffer.ALIGNMENT; + } + } catch (NativeIOException ne) { + readBlockStats.registerFailedEvent(System.nanoTime() - startNs, TimeUnit.NANOSECONDS); + throw new IOException(exMsg(ne.getMessage()) + .kv("requestedBytes", blockSize) + .kv("offset", blockStart) + .kv("expectedBytes", Math.min(blockSize, bytesAvailable)) + .kv("bytesOutstanding", bytesOutstanding) + .kv("bufferOffset", bufferOffset) + .kv("file", filename) + .kv("fd", fd) + .kv("errno", ne.getErrno()).toString()); + } + readBlockStats.registerSuccessfulEvent(System.nanoTime() - startNs, TimeUnit.NANOSECONDS); + currentBlock = blockStart; + currentBlockEnd = blockStart + Math.min(blockSize, bytesAvailable); + } + + @Override + public void close() throws IOException { + synchronized (nativeBuffer) { + nativeBuffer.free(); + } + + try { + int ret = nativeIO.close(fd); + checkState(ret == 0, "Close should throw exception on non-zero return (%d)", ret); + closed = true; + } catch (NativeIOException ne) { + throw new IOException(exMsg(ne.getMessage()) + .kv("file", filename) + .kv("errno", ne.getErrno()).toString()); + } + } + + @Override + public boolean isClosed() { + return closed; + } + + @Override + public long maxOffset() { + return maxOffset; + } + + private void refreshMaxOffset() throws IOException { + try { + long ret = nativeIO.lseek(fd, 0, NativeIO.SEEK_END); + checkState(ret >= 0, + "Lseek should throw exception on negative return (%d)", ret); + synchronized (this) { + maxOffset = ret; + } + } catch (NativeIOException ne) { + throw new IOException(exMsg(ne.getMessage()) + .kv("file", filename) + .kv("fd", fd) + .kv("errno", ne.getErrno()).toString()); + } + } + + private int offsetInBlock(long offset) { + long blockOffset = offset - currentBlock; + if (blockOffset < 0 || blockOffset > Integer.MAX_VALUE) { + throw new IllegalArgumentException(exMsg("Invalid offset passed") + .kv("offset", offset).kv("blockOffset", blockOffset) + .kv("currentBlock", currentBlock).toString()); + } + return (int) blockOffset; + } + + private int sizeInBlock(long offset, int size) { + if (offset > currentBlockEnd || offset < currentBlock) { + throw new IllegalArgumentException(exMsg("Invalid offset passed") + .kv("offset", offset) + .kv("currentBlock", currentBlock) + .kv("currentBlockEnd", currentBlockEnd).toString()); + } + + long available = currentBlockEnd - offset; + checkState(available <= Integer.MAX_VALUE, "Available(%d) must be less than max int", available); + return Math.min(size, (int) available); + } + + private static void assertValidOffset(long offset) { + if (offset < 0) { + throw new IllegalArgumentException( + exMsg("Offset can't be negative").kv("offset", offset).toString()); + } + } + + private static void assertValidEntryOffset(long offset) { + assertValidOffset(offset); + if (offset > Integer.MAX_VALUE) { + throw new IllegalArgumentException( + exMsg("Entry offset must be less than max int").kv("offset", offset).toString()); + } + } + + private static void assertValidBlockSize(int blockSize) { + boolean valid = blockSize > 0 && Buffer.isAligned(blockSize); + if (!valid) { + throw new IllegalArgumentException( + exMsg("Invalid block size, must be power of 2") + .kv("blockSize", blockSize) + .kv("minBlockSize", Buffer.ALIGNMENT).toString()); + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/DirectWriter.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/DirectWriter.java new file mode 100644 index 00000000000..20a3d855b4a --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/DirectWriter.java @@ -0,0 +1,321 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.directentrylogger; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkState; +import static org.apache.bookkeeper.common.util.ExceptionMessageHelper.exMsg; + +import io.netty.buffer.ByteBuf; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import org.apache.bookkeeper.common.util.nativeio.NativeIO; +import org.apache.bookkeeper.common.util.nativeio.NativeIOException; +import org.apache.bookkeeper.slogger.Slogger; +import org.apache.commons.lang3.SystemUtils; + +class DirectWriter implements LogWriter { + final NativeIO nativeIO; + final int fd; + final int id; + final String filename; + final BufferPool bufferPool; + final ExecutorService writeExecutor; + final Object bufferLock = new Object(); + final List> outstandingWrites = new ArrayList>(); + final Slogger slog; + Buffer nativeBuffer; + long offset; + private static volatile boolean useFallocate = true; + + DirectWriter(int id, + String filename, + long maxFileSize, + ExecutorService writeExecutor, + BufferPool bufferPool, + NativeIO nativeIO, Slogger slog) throws IOException { + checkArgument(maxFileSize > 0, "Max file size (%d) must be positive"); + this.id = id; + this.filename = filename; + this.writeExecutor = writeExecutor; + this.nativeIO = nativeIO; + this.slog = slog.ctx(DirectWriter.class); + + offset = 0; + + try { + fd = nativeIO.open(filename, + NativeIO.O_CREAT | NativeIO.O_WRONLY | NativeIO.O_DIRECT, + 00644); + checkState(fd >= 0, "Open should have thrown exception, fd is invalid : %d", fd); + } catch (NativeIOException ne) { + throw new IOException(exMsg(ne.getMessage()).kv("file", filename) + .kv("errno", ne.getErrno()).toString(), ne); + } + + if (useFallocate) { + if (!SystemUtils.IS_OS_LINUX) { + disableUseFallocate(); + this.slog.warn(Events.FALLOCATE_NOT_AVAILABLE); + } else { + try { + int ret = nativeIO.fallocate(fd, NativeIO.FALLOC_FL_ZERO_RANGE, 0, maxFileSize); + checkState(ret == 0, "Exception should have been thrown on non-zero ret: %d", ret); + } catch (NativeIOException ex) { + // fallocate(2) is not supported on all filesystems. Since this is an optimization, disable + // subsequent usage instead of failing the operation. + disableUseFallocate(); + this.slog.kv("message", ex.getMessage()) + .kv("file", filename) + .kv("errno", ex.getErrno()) + .warn(Events.FALLOCATE_NOT_AVAILABLE); + } + } + } + + this.bufferPool = bufferPool; + this.nativeBuffer = bufferPool.acquire(); + } + + private static void disableUseFallocate() { + DirectWriter.useFallocate = false; + } + + @Override + public int logId() { + return id; + } + + @Override + public void writeAt(long offset, ByteBuf buf) throws IOException { + checkArgument(Buffer.isAligned(offset), + "Offset to writeAt must be aligned to %d: %d is not", Buffer.ALIGNMENT, offset); + checkArgument(Buffer.isAligned(buf.readableBytes()), + "Buffer must write multiple of alignment bytes (%d), %d is not", + Buffer.ALIGNMENT, buf.readableBytes()); + + int bytesToWrite = buf.readableBytes(); + if (bytesToWrite <= 0) { + return; + } + + Buffer tmpBuffer = bufferPool.acquire(); + tmpBuffer.reset(); + tmpBuffer.writeByteBuf(buf); + Future f = writeExecutor.submit(() -> { + writeByteBuf(tmpBuffer, bytesToWrite, offset); + return null; + }); + addOutstandingWrite(f); + } + + private void writeByteBuf(Buffer buffer, int bytesToWrite, long offsetToWrite) throws IOException{ + try { + if (bytesToWrite <= 0) { + return; + } + int ret = nativeIO.pwrite(fd, buffer.pointer(), bytesToWrite, offsetToWrite); + if (ret != bytesToWrite) { + throw new IOException(exMsg("Incomplete write") + .kv("filename", filename) + .kv("pointer", buffer.pointer()) + .kv("offset", offsetToWrite) + .kv("writeSize", bytesToWrite) + .kv("bytesWritten", ret) + .toString()); + } + } catch (NativeIOException ne) { + throw new IOException(exMsg("Write error") + .kv("filename", filename) + .kv("offset", offsetToWrite) + .kv("writeSize", bytesToWrite) + .kv("pointer", buffer.pointer()) + .kv("errno", ne.getErrno()) + .toString()); + } finally { + bufferPool.release(buffer); + } + } + + @Override + public int writeDelimited(ByteBuf buf) throws IOException { + synchronized (bufferLock) { + if (!nativeBuffer.hasSpace(serializedSize(buf))) { + flushBuffer(); + } + + int readable = buf.readableBytes(); + long bufferPosition = position() + Integer.BYTES; + if (bufferPosition > Integer.MAX_VALUE) { + throw new IOException(exMsg("Cannot write past max int") + .kv("filename", filename) + .kv("writeSize", readable) + .kv("position", bufferPosition) + .toString()); + } + nativeBuffer.writeInt(readable); + nativeBuffer.writeByteBuf(buf); + return (int) bufferPosition; + } + } + + @Override + public void position(long offset) throws IOException { + synchronized (bufferLock) { + if (nativeBuffer != null && nativeBuffer.position() > 0) { + flushBuffer(); + } + if ((offset % Buffer.ALIGNMENT) != 0) { + throw new IOException(exMsg("offset must be multiple of alignment") + .kv("offset", offset) + .kv("alignment", Buffer.ALIGNMENT) + .toString()); + } + this.offset = offset; + } + } + + @Override + public long position() { + synchronized (bufferLock) { + return this.offset + (nativeBuffer != null ? nativeBuffer.position() : 0); + } + } + + @Override + public void flush() throws IOException { + flushBuffer(); + + waitForOutstandingWrites(); + + try { + int ret = nativeIO.fsync(fd); + checkState(ret == 0, "Fsync should throw exception on non-zero return (%d)", ret); + } catch (NativeIOException ne) { + throw new IOException(exMsg(ne.getMessage()) + .kv("file", filename) + .kv("errno", ne.getErrno()).toString()); + } + } + + @Override + public void close() throws IOException { + synchronized (bufferLock) { + if (nativeBuffer != null && nativeBuffer.position() > 0) { + flush(); + } + } + + try { + int ret = nativeIO.close(fd); + checkState(ret == 0, "Close should throw exception on non-zero return (%d)", ret); + } catch (NativeIOException ne) { + throw new IOException(exMsg(ne.getMessage()) + .kv("file", filename) + .kv("errno", ne.getErrno()).toString()); + } finally { + synchronized (bufferLock) { + bufferPool.release(nativeBuffer); + nativeBuffer = null; + } + } + } + + private void addOutstandingWrite(Future toAdd) throws IOException { + synchronized (outstandingWrites) { + outstandingWrites.add(toAdd); + + Iterator> iter = outstandingWrites.iterator(); + while (iter.hasNext()) { // clear out completed futures + Future f = iter.next(); + if (f.isDone()) { + waitForFuture(f); + iter.remove(); + } else { + break; + } + } + } + } + + private void waitForOutstandingWrites() throws IOException { + synchronized (outstandingWrites) { + Iterator> iter = outstandingWrites.iterator(); + while (iter.hasNext()) { // clear out completed futures + Future f = iter.next(); + waitForFuture(f); + iter.remove(); + } + } + } + + private void waitForFuture(Future f) throws IOException { + try { + f.get(); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new IOException(ie); + } catch (Throwable t) { + if (t.getCause() instanceof IOException) { + throw (IOException) t.getCause(); + } else { + throw new IOException(t); + } + } + } + + private void flushBuffer() throws IOException { + synchronized (bufferLock) { + if (this.nativeBuffer != null) { + int bytesToWrite = this.nativeBuffer.padToAlignment(); + if (bytesToWrite == 0) { + return; + } + + Buffer bufferToFlush = this.nativeBuffer; + this.nativeBuffer = null; + + long offsetToWrite = offset; + offset += bytesToWrite; + + Future f = writeExecutor.submit(() -> { + writeByteBuf(bufferToFlush, bytesToWrite, offsetToWrite); + return null; + }); + addOutstandingWrite(f); + + // must acquire after triggering the write + // otherwise it could try to acquire a buffer without kicking off + // a subroutine that will free another + this.nativeBuffer = bufferPool.acquire(); + } + } + } + + @Override + public int serializedSize(ByteBuf buf) { + return buf.readableBytes() + Integer.BYTES; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/EntryLogIdsImpl.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/EntryLogIdsImpl.java new file mode 100644 index 00000000000..2b63ec74f28 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/EntryLogIdsImpl.java @@ -0,0 +1,83 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.directentrylogger; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; +import org.apache.bookkeeper.bookie.LedgerDirsManager; +import org.apache.bookkeeper.bookie.storage.EntryLogIds; +import org.apache.bookkeeper.slogger.Slogger; +import org.apache.bookkeeper.util.LedgerDirUtil; +import org.apache.commons.lang3.tuple.Pair; +/** + * EntryLogIdsImpl. + */ +public class EntryLogIdsImpl implements EntryLogIds { + + + private final LedgerDirsManager ledgerDirsManager; + private final Slogger slog; + private int nextId; + private int maxId; + + public EntryLogIdsImpl(LedgerDirsManager ledgerDirsManager, + Slogger slog) throws IOException { + this.ledgerDirsManager = ledgerDirsManager; + this.slog = slog.ctx(EntryLogIdsImpl.class); + findLargestGap(); + } + + @Override + public int nextId() throws IOException { + while (true) { + synchronized (this) { + int current = nextId; + nextId++; + if (nextId == maxId) { + findLargestGap(); + } else { + return current; + } + } + } + } + + private void findLargestGap() throws IOException { + long start = System.nanoTime(); + List currentIds = new ArrayList(); + + for (File ledgerDir : ledgerDirsManager.getAllLedgerDirs()) { + currentIds.addAll(LedgerDirUtil.logIdsInDirectory(ledgerDir)); + currentIds.addAll(LedgerDirUtil.compactedLogIdsInDirectory(ledgerDir)); + } + Pair gap = LedgerDirUtil.findLargestGap(currentIds); + nextId = gap.getLeft(); + maxId = gap.getRight(); + slog.kv("dirs", ledgerDirsManager.getAllLedgerDirs()) + .kv("nextId", nextId) + .kv("maxId", maxId) + .kv("durationMs", TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start)) + .info(Events.ENTRYLOG_IDS_CANDIDATES_SELECTED); + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/Events.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/Events.java new file mode 100644 index 00000000000..8f473c0f3ff --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/Events.java @@ -0,0 +1,145 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.directentrylogger; + +/** + * Events. + */ +public enum Events { + /** + * Fallocate is not available on this host. This generally indicates that the process is running on a + * non-Linux operating system. The lack of fallocate means that the filesystem will have to do more + * bookkeeping as data is written to the file, which will slow down writes. + */ + FALLOCATE_NOT_AVAILABLE, + + /** + * EntryLog ID candidates selected. These are the set entry log ID that subsequent entry log files + * will use. To find the candidates, the bookie lists all the log ids which have already been used, + * and finds the longest contiguous block of free ids. Over the lifetime of a bookie, a log id can + * be reused. This is not a problem, as the ids are only referenced from the index, and an + * entry log file will not be deleted if there are still references to it in the index. + * Generally candidates are selected at bookie boot, but they may also be selected at a later time + * if the current set of candidates is depleted. + */ + ENTRYLOG_IDS_CANDIDATES_SELECTED, + + /** + * The entrylogger({@link org.apache.bookkeeper.bookie.storage.EntryLogger}) has been created. + * This occurs during bookie bootup, and the same entry logger will be used for the duration of + * the bookie process's lifetime. + */ + ENTRYLOGGER_CREATED, + + /** + * The entrylogger has been configured in a way that will likely result in errors during operation. + */ + ENTRYLOGGER_MISCONFIGURED, + + /** + * The entrylogger has started writing a new log file. The previous log file may not + * be entirely flushed when this is called, though they will be after an explicit flush call. + */ + LOG_ROLL, + + /** + * A log file has been deleted. This happens as a result of GC, when all entries in the file + * belong to deleted ledgers, or compaction, where the live entries have been copied to a new + * log. + */ + LOG_DELETED, + + /** + * An error occurred closing an entrylog reader. This is non-fatal but it may leak the file handle + * and the memory buffer of the reader in question. + */ + READER_CLOSE_ERROR, + + /** + * An attempt to read entrylog metadata failed. Falling back to scanning the log to get the metadata. + * This can occur if a bookie crashes before closing the entrylog cleanly. + */ + READ_METADATA_FALLBACK, + + /** + * A new entrylog has been created. The filename has the format [dstLogId].compacting, where dstLogId is + * a new unique log ID. Entrylog compaction will copy live entries from an existing src log to this new + * compacting destination log. There is a 1-1 relationship between source logs and destination log logs. + * Once the copy completes, the compacting log will be marked as compacted by renaming the file to + * [dstLogId].log.[srcLogId].compacted, where srcLogId is the ID of the entrylog from which the live entries + * were copied. A new hardlink, [dstLogId].log, is created to point to the same inode, making the entry + * log available to be read. The compaction algorithm then updates the index with the offsets of the entries + * in the compacted destination log. Once complete, the index is flushed and all intermediate files (links) + * are deleted along with the original source log file. + * The entry copying phase of compaction is expensive. The renaming and linking in the algorithm exists so + * if a failure occurs after copying has completed, the work will not need to be redone. + */ + COMPACTION_LOG_CREATED, + + /** + * A partially compacted log has been recovered. The log file is of the format [dstLogId].log.[srcLogId].compacted. + * The log will be scanned and the index updated with the offsets of the entries in the log. Once complete, the + * log with ID srcLogId is deleted. + *

+ * See {@link #COMPACTION_LOG_CREATED} for more information on compaction. + */ + COMPACTION_LOG_RECOVERED, + + /** + * A compaction log has been marked as compacted. A log is marked as compacted by renaming from [dstLogId].log to + * [dstLogId].log.[srcLogId].compacted. All live entries from the src log have been successfully copied to the + * destination log, at this point. + *

+ * See {@link #COMPACTION_LOG_CREATED} for more information on compaction. + */ + COMPACTION_MARK_COMPACTED, + + /** + * A compacted log has been made available for reads. A log is made available by creating a hardlink + * pointing from [dstLogId].log, to [dstLogId].log.[srcLogId].compacted. These files, pointing to the + * same inode, will continue to exist until the compaction operation is complete. + *

+ * A reader with a valid offset will now be able to read from this log, so the index can be updated. + *

+ * See {@link #COMPACTION_LOG_CREATED} for more information on compaction. + */ + COMPACTION_MAKE_AVAILABLE, + + /** + * Compaction has been completed for a log. + * All intermediatory files are deleted, along with the src entrylog file. + *

+ * See {@link #COMPACTION_LOG_CREATED} for more information on compaction. + */ + COMPACTION_COMPLETE, + + /** + * Failed to delete files while aborting a compaction operation. While this is not fatal, it + * can mean that there are issues writing to the filesystem that need to be investigated. + */ + COMPACTION_ABORT_EXCEPTION, + + /** + * Failed to delete files while completing a compaction operation. While this is not fatal, it + * can mean that there are issues writing to the filesystem that need to be investigated. + */ + COMPACTION_DELETE_FAILURE, +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/Header.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/Header.java new file mode 100644 index 00000000000..21823a44bdb --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/Header.java @@ -0,0 +1,105 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.directentrylogger; + +import static org.apache.bookkeeper.common.util.ExceptionMessageHelper.exMsg; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import java.io.IOException; + +/** + * The 1K block at the head of the entry logger file + * that contains the fingerprint and meta-data. + * + *

+ * Header is composed of:
+ * Fingerprint: 4 bytes "BKLO"
+ * Log file HeaderVersion enum: 4 bytes
+ * Ledger map offset: 8 bytes
+ * Ledgers Count: 4 bytes
+ * 
+ */ +class Header { + static final int LOGFILE_LEGACY_HEADER_SIZE = 1024; + static final int LOGFILE_DIRECT_HEADER_SIZE = Buffer.ALIGNMENT; + static final int HEADER_VERSION_OFFSET = 4; + static final int LEDGERS_MAP_OFFSET = HEADER_VERSION_OFFSET + Integer.BYTES; + static final int LEDGER_COUNT_OFFSET = LEDGERS_MAP_OFFSET + Long.BYTES; + static final int HEADER_V0 = 0; // Old log file format (no ledgers map index) + static final int HEADER_V1 = 1; // Introduced ledger map index + static final int HEADER_CURRENT_VERSION = HEADER_V1; + + static final byte[] EMPTY_HEADER = new byte[LOGFILE_DIRECT_HEADER_SIZE]; + static { + ByteBuf buf = Unpooled.wrappedBuffer(EMPTY_HEADER); + buf.setByte(0, 'B'); + buf.setByte(1, 'K'); + buf.setByte(2, 'L'); + buf.setByte(3, 'O'); + buf.setInt(HEADER_VERSION_OFFSET, HEADER_V1); + // legacy header size is 1024, while direct is 4096 so that it can be written as a single block + // to avoid legacy failing when it encounters the header in direct, create a dummy entry, which + // skips to the start of the second block + buf.setInt(LOGFILE_LEGACY_HEADER_SIZE, (buf.capacity() - LOGFILE_LEGACY_HEADER_SIZE) - Integer.BYTES); + buf.setLong(LOGFILE_LEGACY_HEADER_SIZE + Integer.BYTES, LogMetadata.INVALID_LID); + }; + static int extractVersion(ByteBuf header) throws IOException { + assertFingerPrint(header); + return header.getInt(HEADER_VERSION_OFFSET); + } + + static long extractLedgerMapOffset(ByteBuf header) throws IOException { + assertFingerPrint(header); + return header.getLong(LEDGERS_MAP_OFFSET); + } + + static int extractLedgerCount(ByteBuf header) throws IOException { + assertFingerPrint(header); + return header.getInt(LEDGER_COUNT_OFFSET); + } + + static void assertFingerPrint(ByteBuf header) throws IOException { + if (header.getByte(0) != 'B' + || header.getByte(1) != 'K' + || header.getByte(2) != 'L' + || header.getByte(3) != 'O') { + throw new IOException(exMsg("Bad fingerprint (should be BKLO)") + .kv("byte0", header.getByte(0)) + .kv("byte1", header.getByte(1)) + .kv("byte2", header.getByte(2)) + .kv("byte3", header.getByte(3)) + .toString()); + } + } + + static void writeEmptyHeader(ByteBuf header) throws IOException { + header.writeBytes(EMPTY_HEADER); + } + + static void writeHeader(ByteBuf header, + long ledgerMapOffset, int ledgerCount) throws IOException { + header.writeBytes(EMPTY_HEADER); + header.setLong(LEDGERS_MAP_OFFSET, ledgerMapOffset); + header.setInt(LEDGER_COUNT_OFFSET, ledgerCount); + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/LogMetadata.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/LogMetadata.java new file mode 100644 index 00000000000..7cb40e4d8d1 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/LogMetadata.java @@ -0,0 +1,192 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.directentrylogger; + +import static org.apache.bookkeeper.common.util.ExceptionMessageHelper.exMsg; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.util.ReferenceCountUtil; +import java.io.IOException; +import org.apache.bookkeeper.bookie.EntryLogMetadata; +import org.apache.bookkeeper.util.collections.ConcurrentLongLongHashMap; +import org.apache.bookkeeper.util.collections.ConcurrentLongLongHashMap.BiConsumerLong; + +class LogMetadata { + + /** + * Ledgers map is composed of multiple parts that can be split into separated entries. Each of them is composed of: + * + *
+     * length: (4 bytes) [0-3]
+     * ledger id (-1): (8 bytes) [4 - 11]
+     * entry id: (8 bytes) [12-19]
+     * num ledgers stored in current metadata entry: (4 bytes) [20 - 23]
+     * ledger entries: sequence of (ledgerid, size) (8 + 8 bytes each) [24..]
+     * 
+ */ + static final int LEDGERS_MAP_HEADER_SIZE = 4 + 8 + 8 + 4; + static final int LEDGERS_MAP_ENTRY_SIZE = 8 + 8; + + // Break the ledgers map into multiple batches, each of which can contain up to 10K ledgers + static final int LEDGERS_MAP_MAX_BATCH_SIZE = 10000; + static final int LEDGERS_MAP_MAX_MAP_SIZE = + LEDGERS_MAP_HEADER_SIZE + LEDGERS_MAP_ENTRY_SIZE * LEDGERS_MAP_MAX_BATCH_SIZE; + + static final long INVALID_LID = -1L; + // EntryId used to mark an entry (belonging to INVALID_ID) + // as a component of the serialized ledgers map + static final long LEDGERS_MAP_ENTRY_ID = -2L; + + static void write(LogWriter writer, + EntryLogMetadata metadata, + ByteBufAllocator allocator) + throws IOException { + long ledgerMapOffset = writer.position(); + ConcurrentLongLongHashMap ledgersMap = metadata.getLedgersMap(); + int numberOfLedgers = (int) ledgersMap.size(); + + // Write the ledgers map into several batches + final ByteBuf serializedMap = allocator.buffer(LEDGERS_MAP_MAX_BATCH_SIZE); + BiConsumerLong writingConsumer = new BiConsumerLong() { + int remainingLedgers = numberOfLedgers; + boolean startNewBatch = true; + int remainingInBatch = 0; + + @Override + public void accept(long ledgerId, long size) { + if (startNewBatch) { + int batchSize = Math.min(remainingLedgers, LEDGERS_MAP_MAX_BATCH_SIZE); + serializedMap.clear(); + serializedMap.writeLong(INVALID_LID); + serializedMap.writeLong(LEDGERS_MAP_ENTRY_ID); + serializedMap.writeInt(batchSize); + + startNewBatch = false; + remainingInBatch = batchSize; + } + // Dump the ledger in the current batch + serializedMap.writeLong(ledgerId); + serializedMap.writeLong(size); + --remainingLedgers; + + if (--remainingInBatch == 0) { + // Close current batch + try { + writer.writeDelimited(serializedMap); + } catch (IOException e) { + throw new RuntimeException(e); + } + startNewBatch = true; + } + } + }; + try { + ledgersMap.forEach(writingConsumer); + } catch (RuntimeException e) { + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } else { + throw e; + } + } finally { + ReferenceCountUtil.release(serializedMap); + } + ByteBuf buf = allocator.buffer(Buffer.ALIGNMENT); + try { + Header.writeHeader(buf, ledgerMapOffset, numberOfLedgers); + writer.writeAt(0, buf); + } finally { + ReferenceCountUtil.release(buf); + } + writer.flush(); + } + + static EntryLogMetadata read(LogReader reader) throws IOException { + ByteBuf header = reader.readBufferAt(0, Header.LOGFILE_LEGACY_HEADER_SIZE); + try { + int headerVersion = Header.extractVersion(header); + if (headerVersion < Header.HEADER_V1) { + throw new IOException(exMsg("Old log file header").kv("headerVersion", headerVersion).toString()); + } + long ledgerMapOffset = Header.extractLedgerMapOffset(header); + if (ledgerMapOffset > Integer.MAX_VALUE) { + throw new IOException(exMsg("ledgerMapOffset too high").kv("ledgerMapOffset", ledgerMapOffset) + .kv("maxOffset", Integer.MAX_VALUE).toString()); + } + if (ledgerMapOffset <= 0) { + throw new IOException(exMsg("ledgerMap never written").kv("ledgerMapOffset", ledgerMapOffset) + .toString()); + } + + long offset = ledgerMapOffset; + EntryLogMetadata meta = new EntryLogMetadata(reader.logId()); + while (offset < reader.maxOffset()) { + int mapSize = reader.readIntAt((int) offset); + if (mapSize >= LogMetadata.LEDGERS_MAP_MAX_MAP_SIZE) { + throw new IOException(exMsg("ledgerMap too large") + .kv("maxSize", LogMetadata.LEDGERS_MAP_MAX_MAP_SIZE) + .kv("mapSize", mapSize).toString()); + } else if (mapSize <= 0) { + break; + } + offset += Integer.BYTES; + + ByteBuf ledgerMapBuffer = reader.readBufferAt(offset, mapSize); + try { + offset += mapSize; + + long ledgerId = ledgerMapBuffer.readLong(); + if (ledgerId != LogMetadata.INVALID_LID) { + throw new IOException(exMsg("Bad ledgerID").kv("ledgerId", ledgerId).toString()); + } + long entryId = ledgerMapBuffer.readLong(); + if (entryId != LogMetadata.LEDGERS_MAP_ENTRY_ID) { + throw new IOException(exMsg("Unexpected entry ID. Expected special value") + .kv("entryIdRead", entryId) + .kv("entryIdExpected", LogMetadata.LEDGERS_MAP_ENTRY_ID).toString()); + } + int countInBatch = ledgerMapBuffer.readInt(); + for (int i = 0; i < countInBatch; i++) { + ledgerId = ledgerMapBuffer.readLong(); + long size = ledgerMapBuffer.readLong(); + meta.addLedgerSize(ledgerId, size); + } + if (ledgerMapBuffer.isReadable()) { + throw new IOException(exMsg("ledgerMapSize didn't match content") + .kv("expectedCount", countInBatch) + .kv("bufferSize", mapSize) + .kv("bytesRemaining", ledgerMapBuffer.readableBytes()) + .toString()); + } + } finally { + ReferenceCountUtil.release(ledgerMapBuffer); + } + } + return meta; + } catch (IOException ioe) { + throw new IOException(exMsg("Error reading index").kv("logId", reader.logId()) + .kv("reason", ioe.getMessage()).toString(), ioe); + } finally { + ReferenceCountUtil.release(header); + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/LogReader.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/LogReader.java new file mode 100644 index 00000000000..9f865d55699 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/LogReader.java @@ -0,0 +1,80 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.directentrylogger; + +import io.netty.buffer.ByteBuf; +import java.io.EOFException; +import java.io.IOException; + +/** + * Interface for reading from a bookkeeper entry log. + */ +public interface LogReader extends AutoCloseable { + /** + * @return the id of the log being read from. + */ + int logId(); + + /** + * @return the maximum offset in the file that can be read from. + */ + long maxOffset(); + + /** + * Read a buffer from the file. It is the responsibility of the caller to release + * the returned buffer. + * @param offset the offset to read at + * @param size the number of bytes to read + * @return a bytebuf. The caller must release. + */ + ByteBuf readBufferAt(long offset, int size) throws IOException, EOFException; + + void readIntoBufferAt(ByteBuf buffer, long offset, int size) throws IOException, EOFException; + + /** + * Read an integer at a given offset. + * @param offset the offset to read from. + * @return the integer at that offset. + */ + int readIntAt(long offset) throws IOException, EOFException; + + /** + * Read an long at a given offset. + * @param offset the offset to read from. + * @return the long at that offset. + */ + long readLongAt(long offset) throws IOException, EOFException; + + /** + * Read an entry at a given offset. + * The size of the entry must be at (offset - Integer.BYTES). + * The payload of the entry starts at offset. + * It is the responsibility of the caller to release the returned buffer. + * @param offset the offset at which to read the entry. + * @return a bytebuf. The caller must release. + */ + ByteBuf readEntryAt(int offset) throws IOException, EOFException; + + @Override + void close() throws IOException; + + boolean isClosed(); +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/LogReaderScan.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/LogReaderScan.java new file mode 100644 index 00000000000..9718795143d --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/LogReaderScan.java @@ -0,0 +1,62 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.directentrylogger; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.util.ReferenceCountUtil; +import java.io.IOException; +import org.apache.bookkeeper.bookie.storage.EntryLogScanner; + +class LogReaderScan { + static void scan(ByteBufAllocator allocator, LogReader reader, EntryLogScanner scanner) throws IOException { + int offset = Header.LOGFILE_LEGACY_HEADER_SIZE; + + ByteBuf entry = allocator.directBuffer(16 * 1024 * 1024); + + try { + while (offset < reader.maxOffset()) { + int initOffset = offset; + int entrySize = reader.readIntAt(offset); + if (entrySize < 0) { // padding, skip it + offset = Buffer.nextAlignment(offset); + continue; + } else if (entrySize == 0) { // preallocated space, we're done + break; + } + + // The 4 bytes for the entrySize need to be added only after we + // have realigned on the block boundary. + offset += Integer.BYTES; + + entry.clear(); + reader.readIntoBufferAt(entry, offset, entrySize); + long ledgerId = entry.getLong(0); + if (ledgerId >= 0 && scanner.accept(ledgerId)) { + scanner.process(ledgerId, initOffset, entry); + } + offset += entrySize; + } + } finally { + ReferenceCountUtil.release(entry); + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/LogWriter.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/LogWriter.java new file mode 100644 index 00000000000..29af72e73d6 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/LogWriter.java @@ -0,0 +1,81 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.directentrylogger; + +import io.netty.buffer.ByteBuf; +import java.io.IOException; + +/** + * Interface for writing data to a bookkeeper entry log. + */ +interface LogWriter extends AutoCloseable { + /** + * Return the ID of the log being written. + */ + int logId(); + + /** + * Write the contents of a buffer at a predefined position in the log. + * Both the position and the size of the buffer must be page aligned (i.e. to 4096). + */ + void writeAt(long offset, ByteBuf buf) throws IOException; + + /** + * Write a delimited buffer the log. The size of the buffer is first + * written and then the buffer itself. + * Note that the returned offset is for the buffer itself, not the size. + * So, if a buffer is written at the start of the file, the returned offset + * will be 4, not 0. + * The returned offset is an int. Consequently, entries can only be written + * in the first Integer.MAX_VALUE bytes of the file. This is due to how + * offsets are stored in the index. + * + * @return the offset of the buffer within the file. + */ + int writeDelimited(ByteBuf buf) throws IOException; + + /** + * @return the number of bytes consumed by the buffer when written with #writeDelimited + */ + int serializedSize(ByteBuf buf); + + /** + * The current offset within the log at which the next call to #writeDelimited will + * start writing. + */ + long position() throws IOException; + + /** + * Set the offset within the log at which the next call to #writeDelimited will start writing. + */ + void position(long offset) throws IOException; + + /** + * Flush all buffered writes to disk. This call must ensure that the bytes are actually on + * disk before returning. + */ + void flush() throws IOException; + + /** + * Close any held resources. + */ + void close() throws IOException; +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/WriterWithMetadata.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/WriterWithMetadata.java new file mode 100644 index 00000000000..50960294440 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/WriterWithMetadata.java @@ -0,0 +1,79 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.directentrylogger; + +import static com.google.common.base.Preconditions.checkState; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import java.io.IOException; +import org.apache.bookkeeper.bookie.EntryLogMetadata; + +/** + * WriterWithMetadata. + */ +class WriterWithMetadata { + private final LogWriter writer; + private final EntryLogMetadata metadata; + private final ByteBufAllocator allocator; + + WriterWithMetadata(LogWriter writer, EntryLogMetadata metadata, + ByteBufAllocator allocator) throws IOException { + this.writer = writer; + this.metadata = metadata; + this.allocator = allocator; + + ByteBuf buf = allocator.buffer(Buffer.ALIGNMENT); + try { + Header.writeEmptyHeader(buf); + writer.writeAt(0, buf); + writer.position(buf.capacity()); + } finally { + buf.release(); + } + } + + int logId() { + return writer.logId(); + } + + boolean shouldRoll(ByteBuf entry, long rollThreshold) throws IOException { + return (writer.position() + writer.serializedSize(entry)) > rollThreshold; + } + + long addEntry(long ledgerId, ByteBuf entry) throws IOException { + int size = entry.readableBytes(); + metadata.addLedgerSize(ledgerId, size + Integer.BYTES); + long offset = writer.writeDelimited(entry); + checkState(offset < Integer.MAX_VALUE, "Offsets can't be higher than max int (%d)", offset); + return ((long) writer.logId()) << 32 | offset; + } + + void flush() throws IOException { + writer.flush(); + } + + void finalizeAndClose() throws IOException { + writer.flush(); + LogMetadata.write(writer, metadata, allocator); + writer.close(); + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/package-info.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/package-info.java new file mode 100644 index 00000000000..a714867782b --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/directentrylogger/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/** + * Support for bookie entry logs using Direct IO. + */ +package org.apache.bookkeeper.bookie.storage.directentrylogger; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/ArrayGroupSort.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/ArrayGroupSort.java index 719b33dbc5f..af4f3ef931b 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/ArrayGroupSort.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/ArrayGroupSort.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -22,52 +22,61 @@ import static com.google.common.base.Preconditions.checkArgument; +import lombok.experimental.UtilityClass; + /** * Sort an array of longs, grouping the items in tuples. * *

Group size decides how many longs are included in the tuples and key size controls how many items to use for * comparison. */ +@UtilityClass public class ArrayGroupSort { - private final int keySize; - private final int groupSize; + private static final int INSERTION_SORT_THRESHOLD = 100; - public ArrayGroupSort(int keySize, int groupSize) { - checkArgument(keySize > 0); - checkArgument(groupSize > 0); - checkArgument(keySize <= groupSize, "keySize need to be less or equal the groupSize"); - this.keySize = keySize; - this.groupSize = groupSize; - } + private static final int GROUP_SIZE = 4; public void sort(long[] array) { sort(array, 0, array.length); } - public void sort(long[] array, int offset, int length) { - checkArgument(length % groupSize == 0, "Array length must be multiple of groupSize"); - quickSort(array, offset, (length + offset - groupSize)); + public static void sort(long[] array, int offset, int length) { + checkArgument(length % GROUP_SIZE == 0, "Array length must be multiple of 4"); + quickSort(array, offset, (length + offset - GROUP_SIZE)); } ////// Private - private void quickSort(long array[], int low, int high) { - if (low < high) { - int pivotIdx = partition(array, low, high); - quickSort(array, low, pivotIdx - groupSize); - quickSort(array, pivotIdx + groupSize, high); + private static void quickSort(long[] array, int low, int high) { + if (low >= high) { + return; + } + + if (high - low < INSERTION_SORT_THRESHOLD) { + insertionSort(array, low, high); + return; } + + int pivotIdx = partition(array, low, high); + quickSort(array, low, pivotIdx - GROUP_SIZE); + quickSort(array, pivotIdx + GROUP_SIZE, high); + } + + private static int alignGroup(int count) { + return count - (count % GROUP_SIZE); } - private int partition(long array[], int low, int high) { - int pivotIdx = high; + private static int partition(long[] array, int low, int high) { + int mid = low + alignGroup((high - low) / 2); + swap(array, mid, high); + int i = low; - for (int j = low; j < high; j += groupSize) { - if (isLess(array, j, pivotIdx)) { + for (int j = low; j < high; j += GROUP_SIZE) { + if (isLess(array, j, high)) { swap(array, j, i); - i += groupSize; + i += GROUP_SIZE; } } @@ -75,26 +84,43 @@ private int partition(long array[], int low, int high) { return i; } - private void swap(long array[], int a, int b) { - long tmp; - for (int k = 0; k < groupSize; k++) { - tmp = array[a + k]; - array[a + k] = array[b + k]; - array[b + k] = tmp; + private static void swap(long[] array, int a, int b) { + long tmp0 = array[a]; + long tmp1 = array[a + 1]; + long tmp2 = array[a + 2]; + long tmp3 = array[a + 3]; + + array[a] = array[b]; + array[a + 1] = array[b + 1]; + array[a + 2] = array[b + 2]; + array[a + 3] = array[b + 3]; + + array[b] = tmp0; + array[b + 1] = tmp1; + array[b + 2] = tmp2; + array[b + 3] = tmp3; + } + + private static boolean isLess(long[] array, int a, int b) { + long a0 = array[a]; + long b0 = array[b]; + + if (a0 < b0) { + return true; + } else if (a0 > b0) { + return false; } + + return array[a + 1] < array[b + 1]; } - private boolean isLess(long array[], int idx1, int idx2) { - for (int i = 0; i < keySize; i++) { - long k1 = array[idx1 + i]; - long k2 = array[idx2 + i]; - if (k1 < k2) { - return true; - } else if (k1 > k2) { - return false; + private static void insertionSort(long[] a, int low, int high) { + for (int i = low + GROUP_SIZE; i <= high; i += GROUP_SIZE) { + int j = i; + while (j > 0 && isLess(a, j, j - GROUP_SIZE)) { + swap(a, j, j - GROUP_SIZE); + j -= GROUP_SIZE; } } - - return false; } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/ArrayUtil.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/ArrayUtil.java index 67e98f265c3..a0d4141754b 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/ArrayUtil.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/ArrayUtil.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,10 +20,6 @@ */ package org.apache.bookkeeper.bookie.storage.ldb; -// CHECKSTYLE.OFF: IllegalImport -import io.netty.util.internal.PlatformDependent; -// CHECKSTYLE.ON: IllegalImport - import java.nio.ByteOrder; /** @@ -31,29 +27,30 @@ */ class ArrayUtil { - private static final boolean UNALIGNED = PlatformDependent.isUnaligned(); - private static final boolean HAS_UNSAFE = PlatformDependent.hasUnsafe(); + private static final boolean UNALIGNED = io.netty.util.internal.PlatformDependent.isUnaligned(); + private static final boolean HAS_UNSAFE = io.netty.util.internal.PlatformDependent.hasUnsafe(); private static final boolean BIG_ENDIAN_NATIVE_ORDER = ByteOrder.nativeOrder() == ByteOrder.BIG_ENDIAN; public static long getLong(byte[] array, int index) { if (HAS_UNSAFE && UNALIGNED) { - long v = PlatformDependent.getLong(array, index); + long v = io.netty.util.internal.PlatformDependent.getLong(array, index); return BIG_ENDIAN_NATIVE_ORDER ? v : Long.reverseBytes(v); } - return ((long) array[index] & 0xff) << 56 | // - ((long) array[index + 1] & 0xff) << 48 | // - ((long) array[index + 2] & 0xff) << 40 | // - ((long) array[index + 3] & 0xff) << 32 | // - ((long) array[index + 4] & 0xff) << 24 | // - ((long) array[index + 5] & 0xff) << 16 | // - ((long) array[index + 6] & 0xff) << 8 | // - (long) array[index + 7] & 0xff; + return ((long) array[index] & 0xff) << 56 + | ((long) array[index + 1] & 0xff) << 48 + | ((long) array[index + 2] & 0xff) << 40 + | ((long) array[index + 3] & 0xff) << 32 + | ((long) array[index + 4] & 0xff) << 24 + | ((long) array[index + 5] & 0xff) << 16 + | ((long) array[index + 6] & 0xff) << 8 + | (long) array[index + 7] & 0xff; } public static void setLong(byte[] array, int index, long value) { if (HAS_UNSAFE && UNALIGNED) { - PlatformDependent.putLong(array, index, BIG_ENDIAN_NATIVE_ORDER ? value : Long.reverseBytes(value)); + io.netty.util.internal.PlatformDependent + .putLong(array, index, BIG_ENDIAN_NATIVE_ORDER ? value : Long.reverseBytes(value)); } else { array[index] = (byte) (value >>> 56); array[index + 1] = (byte) (value >>> 48); @@ -66,7 +63,7 @@ public static void setLong(byte[] array, int index, long value) { } } - public static final boolean isArrayAllZeros(final byte[] array) { - return PlatformDependent.isZero(array, 0, array.length); + public static boolean isArrayAllZeros(final byte[] array) { + return io.netty.util.internal.PlatformDependent.isZero(array, 0, array.length); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorage.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorage.java index 8753363699f..a47baddd35c 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorage.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorage.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -22,42 +22,56 @@ import static com.google.common.base.Preconditions.checkNotNull; +// CHECKSTYLE.OFF: IllegalImport import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; - +import com.google.common.collect.Maps; import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; import io.netty.util.concurrent.DefaultThreadFactory; - +import io.netty.util.internal.PlatformDependent; import java.io.File; import java.io.IOException; import java.util.ArrayList; +import java.util.EnumSet; +import java.util.HashMap; import java.util.List; -import java.util.SortedMap; +import java.util.Map; +import java.util.PrimitiveIterator.OfLong; +import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; - +import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; - import org.apache.bookkeeper.bookie.BookieException; import org.apache.bookkeeper.bookie.CheckpointSource; import org.apache.bookkeeper.bookie.CheckpointSource.Checkpoint; import org.apache.bookkeeper.bookie.Checkpointer; +import org.apache.bookkeeper.bookie.DefaultEntryLogger; +import org.apache.bookkeeper.bookie.GarbageCollectionStatus; import org.apache.bookkeeper.bookie.LastAddConfirmedUpdateNotification; +import org.apache.bookkeeper.bookie.LedgerCache; import org.apache.bookkeeper.bookie.LedgerDirsManager; import org.apache.bookkeeper.bookie.LedgerStorage; import org.apache.bookkeeper.bookie.StateManager; +import org.apache.bookkeeper.bookie.storage.EntryLogger; +import org.apache.bookkeeper.bookie.storage.directentrylogger.DirectEntryLogger; +import org.apache.bookkeeper.bookie.storage.directentrylogger.EntryLogIdsImpl; import org.apache.bookkeeper.bookie.storage.ldb.KeyValueStorageFactory.DbConfigType; import org.apache.bookkeeper.bookie.storage.ldb.SingleDirectoryDbLedgerStorage.LedgerLoggerProcessor; import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.common.util.Watcher; +import org.apache.bookkeeper.common.util.nativeio.NativeIOImpl; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.slogger.slf4j.Slf4jSlogger; import org.apache.bookkeeper.stats.Gauge; import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.annotations.StatsDoc; import org.apache.bookkeeper.util.DiskChecker; - +import org.apache.commons.lang3.StringUtils; +// CHECKSTYLE.ON: IllegalImport /** @@ -66,30 +80,82 @@ @Slf4j public class DbLedgerStorage implements LedgerStorage { - static final String WRITE_CACHE_MAX_SIZE_MB = "dbStorage_writeCacheMaxSizeMb"; - - static final String READ_AHEAD_CACHE_MAX_SIZE_MB = "dbStorage_readAheadCacheMaxSizeMb"; + public static final String WRITE_CACHE_MAX_SIZE_MB = "dbStorage_writeCacheMaxSizeMb"; + public static final String READ_AHEAD_CACHE_MAX_SIZE_MB = "dbStorage_readAheadCacheMaxSizeMb"; + public static final String DIRECT_IO_ENTRYLOGGER = "dbStorage_directIOEntryLogger"; + public static final String DIRECT_IO_ENTRYLOGGER_TOTAL_WRITEBUFFER_SIZE_MB = + "dbStorage_directIOEntryLoggerTotalWriteBufferSizeMB"; + public static final String DIRECT_IO_ENTRYLOGGER_TOTAL_READBUFFER_SIZE_MB = + "dbStorage_directIOEntryLoggerTotalReadBufferSizeMB"; + public static final String DIRECT_IO_ENTRYLOGGER_READBUFFER_SIZE_MB = + "dbStorage_directIOEntryLoggerReadBufferSizeMB"; + public static final String DIRECT_IO_ENTRYLOGGER_MAX_FD_CACHE_TIME_SECONDS = + "dbStorage_directIOEntryLoggerMaxFdCacheTimeSeconds"; static final String MAX_THROTTLE_TIME_MILLIS = "dbStorage_maxThrottleTimeMs"; - private static final long DEFAULT_WRITE_CACHE_MAX_SIZE_MB = 16; - private static final long DEFAULT_READ_CACHE_MAX_SIZE_MB = 16; - private static final int MB = 1024 * 1024; + private static final long DEFAULT_WRITE_CACHE_MAX_SIZE_MB = + (long) (0.25 * PlatformDependent.estimateMaxDirectMemory()) / MB; + private static final long DEFAULT_READ_CACHE_MAX_SIZE_MB = + (long) (0.25 * PlatformDependent.estimateMaxDirectMemory()) / MB; + + static final String READ_AHEAD_CACHE_BATCH_SIZE = "dbStorage_readAheadCacheBatchSize"; + static final String READ_AHEAD_CACHE_BATCH_BYTES_SIZE = "dbStorage_readAheadCacheBatchBytesSize"; + private static final int DEFAULT_READ_AHEAD_CACHE_BATCH_SIZE = 100; + // the default value is -1. this feature(limit of read ahead bytes) is disabled + private static final int DEFAULT_READ_AHEAD_CACHE_BATCH_BYTES_SIZE = -1; + + private static final long DEFAULT_DIRECT_IO_TOTAL_WRITEBUFFER_SIZE_MB = + (long) (0.125 * PlatformDependent.estimateMaxDirectMemory()) + / MB; + private static final long DEFAULT_DIRECT_IO_TOTAL_READBUFFER_SIZE_MB = + (long) (0.125 * PlatformDependent.estimateMaxDirectMemory()) + / MB; + private static final long DEFAULT_DIRECT_IO_READBUFFER_SIZE_MB = 8; + + private static final int DEFAULT_DIRECT_IO_MAX_FD_CACHE_TIME_SECONDS = 300; + + // use the storage assigned to ledger 0 for flags. + // if the storage configuration changes, the flags may be lost + // but in that case data integrity should kick off anyhow. + private static final long STORAGE_FLAGS_KEY = 0L; private int numberOfDirs; private List ledgerStorageList; - // Keep 1 single Bookie GC thread so the the compactions from multiple individual directories are serialized - private ScheduledExecutorService gcExecutor; + private ExecutorService entryLoggerWriteExecutor = null; + private ExecutorService entryLoggerFlushExecutor = null; + + protected ByteBufAllocator allocator; + + // parent DbLedgerStorage stats (not per directory) + private static final String MAX_READAHEAD_BATCH_SIZE = "readahead-max-batch-size"; + private static final String MAX_WRITE_CACHE_SIZE = "write-cache-max-size"; + + @StatsDoc( + name = MAX_READAHEAD_BATCH_SIZE, + help = "the configured readahead batch size" + ) + private Gauge readaheadBatchSizeGauge; + + @StatsDoc( + name = MAX_WRITE_CACHE_SIZE, + help = "the configured write cache size" + ) + private Gauge writeCacheSizeGauge; @Override public void initialize(ServerConfiguration conf, LedgerManager ledgerManager, LedgerDirsManager ledgerDirsManager, - LedgerDirsManager indexDirsManager, StateManager stateManager, CheckpointSource checkpointSource, - Checkpointer checkpointer, StatsLogger statsLogger) throws IOException { - long writeCacheMaxSize = conf.getLong(WRITE_CACHE_MAX_SIZE_MB, DEFAULT_WRITE_CACHE_MAX_SIZE_MB) * MB; - long readCacheMaxSize = conf.getLong(READ_AHEAD_CACHE_MAX_SIZE_MB, DEFAULT_READ_CACHE_MAX_SIZE_MB) * MB; + LedgerDirsManager indexDirsManager, StatsLogger statsLogger, ByteBufAllocator allocator) + throws IOException { + long writeCacheMaxSize = getLongVariableOrDefault(conf, WRITE_CACHE_MAX_SIZE_MB, + DEFAULT_WRITE_CACHE_MAX_SIZE_MB) * MB; + long readCacheMaxSize = getLongVariableOrDefault(conf, READ_AHEAD_CACHE_MAX_SIZE_MB, + DEFAULT_READ_CACHE_MAX_SIZE_MB) * MB; + boolean directIOEntryLogger = getBooleanVariableOrDefault(conf, DIRECT_IO_ENTRYLOGGER, false); + this.allocator = allocator; this.numberOfDirs = ledgerDirsManager.getAllLedgerDirs().size(); log.info("Started Db Ledger Storage"); @@ -97,81 +163,142 @@ public void initialize(ServerConfiguration conf, LedgerManager ledgerManager, Le log.info(" - Write cache size: {} MB", writeCacheMaxSize / MB); log.info(" - Read Cache: {} MB", readCacheMaxSize / MB); + if (readCacheMaxSize + writeCacheMaxSize > PlatformDependent.estimateMaxDirectMemory()) { + throw new IOException("Read and write cache sizes exceed the configured max direct memory size"); + } + + if (ledgerDirsManager.getAllLedgerDirs().size() != indexDirsManager.getAllLedgerDirs().size()) { + throw new IOException("ledger and index dirs size not matched"); + } + long perDirectoryWriteCacheSize = writeCacheMaxSize / numberOfDirs; long perDirectoryReadCacheSize = readCacheMaxSize / numberOfDirs; - - gcExecutor = Executors.newSingleThreadScheduledExecutor(new DefaultThreadFactory("GarbageCollector")); + int readAheadCacheBatchSize = conf.getInt(READ_AHEAD_CACHE_BATCH_SIZE, DEFAULT_READ_AHEAD_CACHE_BATCH_SIZE); + long readAheadCacheBatchBytesSize = conf.getInt(READ_AHEAD_CACHE_BATCH_BYTES_SIZE, + DEFAULT_READ_AHEAD_CACHE_BATCH_BYTES_SIZE); ledgerStorageList = Lists.newArrayList(); - for (File ledgerDir : ledgerDirsManager.getAllLedgerDirs()) { + for (int i = 0; i < ledgerDirsManager.getAllLedgerDirs().size(); i++) { + File ledgerDir = ledgerDirsManager.getAllLedgerDirs().get(i); + File indexDir = indexDirsManager.getAllLedgerDirs().get(i); // Create a ledger dirs manager for the single directory - File[] dirs = new File[1]; + File[] lDirs = new File[1]; // Remove the `/current` suffix which will be appended again by LedgersDirManager - dirs[0] = ledgerDir.getParentFile(); - LedgerDirsManager ldm = new LedgerDirsManager(conf, dirs, ledgerDirsManager.getDiskChecker(), statsLogger); - ledgerStorageList.add(newSingleDirectoryDbLedgerStorage(conf, ledgerManager, ldm, indexDirsManager, - stateManager, checkpointSource, checkpointer, statsLogger, gcExecutor, perDirectoryWriteCacheSize, - perDirectoryReadCacheSize)); - } + lDirs[0] = ledgerDir.getParentFile(); + LedgerDirsManager ldm = new LedgerDirsManager(conf, lDirs, ledgerDirsManager.getDiskChecker(), + NullStatsLogger.INSTANCE); - registerStats(statsLogger); - } - - @VisibleForTesting - protected SingleDirectoryDbLedgerStorage newSingleDirectoryDbLedgerStorage(ServerConfiguration conf, - LedgerManager ledgerManager, LedgerDirsManager ledgerDirsManager, LedgerDirsManager indexDirsManager, - StateManager stateManager, CheckpointSource checkpointSource, Checkpointer checkpointer, - StatsLogger statsLogger, ScheduledExecutorService gcExecutor, long writeCacheSize, long readCacheSize) - throws IOException { - return new SingleDirectoryDbLedgerStorage(conf, ledgerManager, ledgerDirsManager, indexDirsManager, - stateManager, checkpointSource, checkpointer, statsLogger, gcExecutor, writeCacheSize, readCacheSize); - } - - public void registerStats(StatsLogger stats) { - stats.registerGauge("write-cache-size", new Gauge() { - @Override - public Long getDefaultValue() { - return 0L; - } + // Create a index dirs manager for the single directory + File[] iDirs = new File[1]; + // Remove the `/current` suffix which will be appended again by LedgersDirManager + iDirs[0] = indexDir.getParentFile(); + LedgerDirsManager idm = new LedgerDirsManager(conf, iDirs, indexDirsManager.getDiskChecker(), + NullStatsLogger.INSTANCE); + + EntryLogger entrylogger; + if (directIOEntryLogger) { + long perDirectoryTotalWriteBufferSize = MB * getLongVariableOrDefault( + conf, + DIRECT_IO_ENTRYLOGGER_TOTAL_WRITEBUFFER_SIZE_MB, + DEFAULT_DIRECT_IO_TOTAL_WRITEBUFFER_SIZE_MB) / numberOfDirs; + long perDirectoryTotalReadBufferSize = MB * getLongVariableOrDefault( + conf, + DIRECT_IO_ENTRYLOGGER_TOTAL_READBUFFER_SIZE_MB, + DEFAULT_DIRECT_IO_TOTAL_READBUFFER_SIZE_MB) / numberOfDirs; + int readBufferSize = MB * (int) getLongVariableOrDefault( + conf, + DIRECT_IO_ENTRYLOGGER_READBUFFER_SIZE_MB, + DEFAULT_DIRECT_IO_READBUFFER_SIZE_MB); + int maxFdCacheTimeSeconds = (int) getLongVariableOrDefault( + conf, + DIRECT_IO_ENTRYLOGGER_MAX_FD_CACHE_TIME_SECONDS, + DEFAULT_DIRECT_IO_MAX_FD_CACHE_TIME_SECONDS); + Slf4jSlogger slog = new Slf4jSlogger(DbLedgerStorage.class); + entryLoggerWriteExecutor = Executors.newSingleThreadExecutor( + new DefaultThreadFactory("EntryLoggerWrite")); + entryLoggerFlushExecutor = Executors.newSingleThreadExecutor( + new DefaultThreadFactory("EntryLoggerFlush")); + + int numReadThreads = conf.getNumReadWorkerThreads(); + if (numReadThreads == 0) { + numReadThreads = conf.getServerNumIOThreads(); + } - @Override - public Long getSample() { - return ledgerStorageList.stream().mapToLong(SingleDirectoryDbLedgerStorage::getWriteCacheSize).sum(); + entrylogger = new DirectEntryLogger(ledgerDir, new EntryLogIdsImpl(ldm, slog), + new NativeIOImpl(), + allocator, entryLoggerWriteExecutor, entryLoggerFlushExecutor, + conf.getEntryLogSizeLimit(), + conf.getNettyMaxFrameSizeBytes() - 500, + perDirectoryTotalWriteBufferSize, + perDirectoryTotalReadBufferSize, + readBufferSize, + numReadThreads, + maxFdCacheTimeSeconds, + slog, statsLogger); + } else { + entrylogger = new DefaultEntryLogger(conf, ldm, null, statsLogger, allocator); } - }); - stats.registerGauge("write-cache-count", new Gauge() { - @Override - public Long getDefaultValue() { - return 0L; + ledgerStorageList.add(newSingleDirectoryDbLedgerStorage(conf, ledgerManager, ldm, + idm, entrylogger, + statsLogger, perDirectoryWriteCacheSize, + perDirectoryReadCacheSize, + readAheadCacheBatchSize, readAheadCacheBatchBytesSize)); + ldm.getListeners().forEach(ledgerDirsManager::addLedgerDirsListener); + if (!lDirs[0].getPath().equals(iDirs[0].getPath())) { + idm.getListeners().forEach(indexDirsManager::addLedgerDirsListener); } + } + // parent DbLedgerStorage stats (not per directory) + readaheadBatchSizeGauge = new Gauge() { @Override - public Long getSample() { - return ledgerStorageList.stream().mapToLong(SingleDirectoryDbLedgerStorage::getWriteCacheCount).sum(); - } - }); - stats.registerGauge("read-cache-size", new Gauge() { - @Override - public Long getDefaultValue() { - return 0L; + public Integer getDefaultValue() { + return readAheadCacheBatchSize; } @Override - public Long getSample() { - return ledgerStorageList.stream().mapToLong(SingleDirectoryDbLedgerStorage::getReadCacheSize).sum(); + public Integer getSample() { + return readAheadCacheBatchSize; } - }); - stats.registerGauge("read-cache-count", new Gauge() { + }; + statsLogger.registerGauge(MAX_READAHEAD_BATCH_SIZE, readaheadBatchSizeGauge); + + writeCacheSizeGauge = new Gauge() { @Override public Long getDefaultValue() { - return 0L; + return perDirectoryWriteCacheSize; } @Override public Long getSample() { - return ledgerStorageList.stream().mapToLong(SingleDirectoryDbLedgerStorage::getReadCacheCount).sum(); + return perDirectoryWriteCacheSize; } - }); + }; + statsLogger.registerGauge(MAX_WRITE_CACHE_SIZE, writeCacheSizeGauge); + } + + @VisibleForTesting + protected SingleDirectoryDbLedgerStorage newSingleDirectoryDbLedgerStorage(ServerConfiguration conf, + LedgerManager ledgerManager, LedgerDirsManager ledgerDirsManager, LedgerDirsManager indexDirsManager, + EntryLogger entryLogger, StatsLogger statsLogger, long writeCacheSize, long readCacheSize, + int readAheadCacheBatchSize, long readAheadCacheBatchBytesSize) + throws IOException { + return new SingleDirectoryDbLedgerStorage(conf, ledgerManager, ledgerDirsManager, indexDirsManager, entryLogger, + statsLogger, allocator, writeCacheSize, readCacheSize, + readAheadCacheBatchSize, readAheadCacheBatchBytesSize); + } + + @Override + public void setStateManager(StateManager stateManager) { + ledgerStorageList.forEach(s -> s.setStateManager(stateManager)); + } + @Override + public void setCheckpointSource(CheckpointSource checkpointSource) { + ledgerStorageList.forEach(s -> s.setCheckpointSource(checkpointSource)); + } + @Override + public void setCheckpointer(Checkpointer checkpointer) { + ledgerStorageList.forEach(s -> s.setCheckpointer(checkpointer)); } @Override @@ -184,53 +311,72 @@ public void shutdown() throws InterruptedException { for (LedgerStorage ls : ledgerStorageList) { ls.shutdown(); } + + if (entryLoggerWriteExecutor != null) { + entryLoggerWriteExecutor.shutdown(); + } + if (entryLoggerFlushExecutor != null) { + entryLoggerFlushExecutor.shutdown(); + } } @Override public boolean ledgerExists(long ledgerId) throws IOException { - return getLedgerSorage(ledgerId).ledgerExists(ledgerId); + return getLedgerStorage(ledgerId).ledgerExists(ledgerId); + } + + @Override + public boolean entryExists(long ledgerId, long entryId) throws IOException, BookieException { + return getLedgerStorage(ledgerId).entryExists(ledgerId, entryId); } @Override public boolean setFenced(long ledgerId) throws IOException { - return getLedgerSorage(ledgerId).setFenced(ledgerId); + return getLedgerStorage(ledgerId).setFenced(ledgerId); } @Override - public boolean isFenced(long ledgerId) throws IOException { - return getLedgerSorage(ledgerId).isFenced(ledgerId); + public boolean isFenced(long ledgerId) throws IOException, BookieException { + return getLedgerStorage(ledgerId).isFenced(ledgerId); } @Override public void setMasterKey(long ledgerId, byte[] masterKey) throws IOException { - getLedgerSorage(ledgerId).setMasterKey(ledgerId, masterKey); + getLedgerStorage(ledgerId).setMasterKey(ledgerId, masterKey); } @Override public byte[] readMasterKey(long ledgerId) throws IOException, BookieException { - return getLedgerSorage(ledgerId).readMasterKey(ledgerId); + return getLedgerStorage(ledgerId).readMasterKey(ledgerId); } @Override public long addEntry(ByteBuf entry) throws IOException, BookieException { long ledgerId = entry.getLong(entry.readerIndex()); - return getLedgerSorage(ledgerId).addEntry(entry); + return getLedgerStorage(ledgerId).addEntry(entry); } @Override - public ByteBuf getEntry(long ledgerId, long entryId) throws IOException { - return getLedgerSorage(ledgerId).getEntry(ledgerId, entryId); + public ByteBuf getEntry(long ledgerId, long entryId) throws IOException, BookieException { + return getLedgerStorage(ledgerId).getEntry(ledgerId, entryId); } @Override - public long getLastAddConfirmed(long ledgerId) throws IOException { - return getLedgerSorage(ledgerId).getLastAddConfirmed(ledgerId); + public long getLastAddConfirmed(long ledgerId) throws IOException, BookieException { + return getLedgerStorage(ledgerId).getLastAddConfirmed(ledgerId); } @Override public boolean waitForLastAddConfirmedUpdate(long ledgerId, long previousLAC, Watcher watcher) throws IOException { - return getLedgerSorage(ledgerId).waitForLastAddConfirmedUpdate(ledgerId, previousLAC, watcher); + return getLedgerStorage(ledgerId).waitForLastAddConfirmedUpdate(ledgerId, previousLAC, watcher); + } + + @Override + public void cancelWaitForLastAddConfirmedUpdate(long ledgerId, + Watcher watcher) + throws IOException { + getLedgerStorage(ledgerId).cancelWaitForLastAddConfirmedUpdate(ledgerId, watcher); } @Override @@ -249,7 +395,7 @@ public void checkpoint(Checkpoint checkpoint) throws IOException { @Override public void deleteLedger(long ledgerId) throws IOException { - getLedgerSorage(ledgerId).deleteLedger(ledgerId); + getLedgerStorage(ledgerId).deleteLedger(ledgerId); } @Override @@ -258,29 +404,29 @@ public void registerLedgerDeletionListener(LedgerDeletionListener listener) { } @Override - public void setExplicitlac(long ledgerId, ByteBuf lac) throws IOException { - getLedgerSorage(ledgerId).setExplicitlac(ledgerId, lac); + public void setExplicitLac(long ledgerId, ByteBuf lac) throws IOException { + getLedgerStorage(ledgerId).setExplicitLac(ledgerId, lac); } @Override - public ByteBuf getExplicitLac(long ledgerId) { - return getLedgerSorage(ledgerId).getExplicitLac(ledgerId); + public ByteBuf getExplicitLac(long ledgerId) throws IOException, BookieException { + return getLedgerStorage(ledgerId).getExplicitLac(ledgerId); } public long addLedgerToIndex(long ledgerId, boolean isFenced, byte[] masterKey, - Iterable> entries) throws Exception { - return getLedgerSorage(ledgerId).addLedgerToIndex(ledgerId, isFenced, masterKey, entries); + LedgerCache.PageEntriesIterable pages) throws Exception { + return getLedgerStorage(ledgerId).addLedgerToIndex(ledgerId, isFenced, masterKey, pages); } public long getLastEntryInLedger(long ledgerId) throws IOException { - return getLedgerSorage(ledgerId).getEntryLocationIndex().getLastEntryInLedger(ledgerId); + return getLedgerStorage(ledgerId).getEntryLocationIndex().getLastEntryInLedger(ledgerId); } public long getLocation(long ledgerId, long entryId) throws IOException { - return getLedgerSorage(ledgerId).getEntryLocationIndex().getLocation(ledgerId, entryId); + return getLedgerStorage(ledgerId).getEntryLocationIndex().getLocation(ledgerId, entryId); } - private SingleDirectoryDbLedgerStorage getLedgerSorage(long ledgerId) { + private SingleDirectoryDbLedgerStorage getLedgerStorage(long ledgerId) { return ledgerStorageList.get(MathUtils.signSafeMod(ledgerId, numberOfDirs)); } @@ -293,8 +439,8 @@ public Iterable getActiveLedgersInRange(long firstLedgerId, long lastLedge return Iterables.concat(listIt); } - public ByteBuf getLastEntry(long ledgerId) throws IOException { - return getLedgerSorage(ledgerId).getLastEntry(ledgerId); + public ByteBuf getLastEntry(long ledgerId) throws IOException, BookieException { + return getLedgerStorage(ledgerId).getLastEntry(ledgerId); } @VisibleForTesting @@ -319,18 +465,29 @@ public static void readLedgerIndexEntries(long ledgerId, ServerConfiguration ser LedgerLoggerProcessor processor) throws IOException { checkNotNull(serverConf, "ServerConfiguration can't be null"); - checkNotNull(processor, "LedgerLoggger info processor can't null"); - - LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(serverConf, serverConf.getLedgerDirs(), - new DiskChecker(serverConf.getDiskUsageThreshold(), serverConf.getDiskUsageWarnThreshold())); + checkNotNull(processor, "LedgerLogger info processor can't null"); + + DiskChecker diskChecker = new DiskChecker(serverConf.getDiskUsageThreshold(), + serverConf.getDiskUsageWarnThreshold()); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(serverConf, + serverConf.getLedgerDirs(), diskChecker); + LedgerDirsManager indexDirsManager = ledgerDirsManager; + File[] idxDirs = serverConf.getIndexDirs(); + if (null != idxDirs) { + indexDirsManager = new LedgerDirsManager(serverConf, idxDirs, diskChecker); + } List ledgerDirs = ledgerDirsManager.getAllLedgerDirs(); - + List indexDirs = indexDirsManager.getAllLedgerDirs(); + if (ledgerDirs.size() != indexDirs.size()) { + throw new IOException("ledger and index dirs size not matched"); + } int dirIndex = MathUtils.signSafeMod(ledgerId, ledgerDirs.size()); - String ledgerBasePath = ledgerDirs.get(dirIndex).toString(); + String indexBasePath = indexDirs.get(dirIndex).toString(); EntryLocationIndex entryLocationIndex = new EntryLocationIndex(serverConf, - (path, dbConfigType, conf1) -> new KeyValueStorageRocksDB(path, DbConfigType.Small, conf1, true), - ledgerBasePath, NullStatsLogger.INSTANCE); + (basePath, subPath, dbConfigType, conf1) -> + new KeyValueStorageRocksDB(basePath, subPath, DbConfigType.Default, conf1, true), + indexBasePath, NullStatsLogger.INSTANCE); try { long lastEntryId = entryLocationIndex.getLastEntryInLedger(ledgerId); for (long currentEntry = 0; currentEntry <= lastEntryId; currentEntry++) { @@ -348,4 +505,158 @@ public static void readLedgerIndexEntries(long ledgerId, ServerConfiguration ser } } + @Override + public void forceGC() { + ledgerStorageList.stream().forEach(SingleDirectoryDbLedgerStorage::forceGC); + } + + @Override + public void forceGC(boolean forceMajor, boolean forceMinor) { + ledgerStorageList.stream().forEach(s -> s.forceGC(forceMajor, forceMinor)); + } + + @Override + public boolean isInForceGC() { + return ledgerStorageList.stream().anyMatch(SingleDirectoryDbLedgerStorage::isInForceGC); + } + + @Override + public void suspendMinorGC() { + ledgerStorageList.stream().forEach(SingleDirectoryDbLedgerStorage::suspendMinorGC); + } + + @Override + public void suspendMajorGC() { + ledgerStorageList.stream().forEach(SingleDirectoryDbLedgerStorage::suspendMajorGC); + } + + @Override + public void resumeMinorGC() { + ledgerStorageList.stream().forEach(SingleDirectoryDbLedgerStorage::resumeMinorGC); + } + + @Override + public void resumeMajorGC() { + ledgerStorageList.stream().forEach(SingleDirectoryDbLedgerStorage::resumeMajorGC); + } + + @Override + public boolean isMajorGcSuspended() { + return ledgerStorageList.stream().allMatch(SingleDirectoryDbLedgerStorage::isMajorGcSuspended); + } + + @Override + public boolean isMinorGcSuspended() { + return ledgerStorageList.stream().allMatch(SingleDirectoryDbLedgerStorage::isMinorGcSuspended); + } + + @Override + public void entryLocationCompact() { + ledgerStorageList.forEach(SingleDirectoryDbLedgerStorage::entryLocationCompact); + } + + @Override + public void entryLocationCompact(List locations) { + for (SingleDirectoryDbLedgerStorage ledgerStorage : ledgerStorageList) { + String entryLocation = ledgerStorage.getEntryLocationDBPath().get(0); + if (locations.contains(entryLocation)) { + ledgerStorage.entryLocationCompact(); + } + } + } + + @Override + public boolean isEntryLocationCompacting() { + return ledgerStorageList.stream().anyMatch(SingleDirectoryDbLedgerStorage::isEntryLocationCompacting); + } + + @Override + public Map isEntryLocationCompacting(List locations) { + HashMap isCompacting = Maps.newHashMap(); + for (SingleDirectoryDbLedgerStorage ledgerStorage : ledgerStorageList) { + String entryLocation = ledgerStorage.getEntryLocationDBPath().get(0); + if (locations.contains(entryLocation)) { + isCompacting.put(entryLocation, ledgerStorage.isEntryLocationCompacting()); + } + } + return isCompacting; + } + + @Override + public List getEntryLocationDBPath() { + List allEntryLocationDBPath = Lists.newArrayList(); + for (SingleDirectoryDbLedgerStorage ledgerStorage : ledgerStorageList) { + allEntryLocationDBPath.addAll(ledgerStorage.getEntryLocationDBPath()); + } + return allEntryLocationDBPath; + } + + @Override + public List getGarbageCollectionStatus() { + return ledgerStorageList.stream() + .map(single -> single.getGarbageCollectionStatus().get(0)).collect(Collectors.toList()); + } + + static long getLongVariableOrDefault(ServerConfiguration conf, String keyName, long defaultValue) { + Object obj = conf.getProperty(keyName); + if (obj instanceof Number) { + return ((Number) obj).longValue(); + } else if (obj == null) { + return defaultValue; + } else if (StringUtils.isEmpty(conf.getString(keyName))) { + return defaultValue; + } else { + return conf.getLong(keyName); + } + } + + static boolean getBooleanVariableOrDefault(ServerConfiguration conf, String keyName, boolean defaultValue) { + Object obj = conf.getProperty(keyName); + if (obj instanceof Boolean) { + return (Boolean) obj; + } else if (obj == null) { + return defaultValue; + } else if (StringUtils.isEmpty(conf.getString(keyName))) { + return defaultValue; + } else { + return conf.getBoolean(keyName); + } + } + + @Override + public OfLong getListOfEntriesOfLedger(long ledgerId) throws IOException { + // check Issue #2078 + throw new UnsupportedOperationException( + "getListOfEntriesOfLedger method is currently unsupported for DbLedgerStorage"); + } + + @Override + public void setLimboState(long ledgerId) throws IOException { + getLedgerStorage(ledgerId).setLimboState(ledgerId); + } + + @Override + public boolean hasLimboState(long ledgerId) throws IOException { + return getLedgerStorage(ledgerId).hasLimboState(ledgerId); + } + + @Override + public void clearLimboState(long ledgerId) throws IOException { + getLedgerStorage(ledgerId).clearLimboState(ledgerId); + } + + @Override + public EnumSet getStorageStateFlags() throws IOException { + return getLedgerStorage(STORAGE_FLAGS_KEY).getStorageStateFlags(); + } + + @Override + public void setStorageStateFlag(StorageState flag) throws IOException { + getLedgerStorage(STORAGE_FLAGS_KEY).setStorageStateFlag(flag); + } + + @Override + public void clearStorageStateFlag(StorageState flag) throws IOException { + getLedgerStorage(STORAGE_FLAGS_KEY).clearStorageStateFlag(flag); + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageStats.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageStats.java new file mode 100644 index 00000000000..6546dfde0f8 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageStats.java @@ -0,0 +1,276 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.storage.ldb; + +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_ADD_ENTRY; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_READ_ENTRY; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_SCOPE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.CATEGORY_SERVER; + +import java.util.function.Supplier; +import lombok.Getter; +import org.apache.bookkeeper.stats.Counter; +import org.apache.bookkeeper.stats.Gauge; +import org.apache.bookkeeper.stats.OpStatsLogger; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.annotations.StatsDoc; + +/** + * A umbrella class for db ledger storage stats with one instance per + * ledger directory. + */ +@StatsDoc( + name = BOOKIE_SCOPE, + category = CATEGORY_SERVER, + help = "DbLedgerStorage related stats" +) +@Getter +class DbLedgerStorageStats { + + private static final String ADD_ENTRY = "add-entry"; + private static final String READ_ENTRY = "read-entry"; + private static final String READ_ENTRY_LOCATIONS_INDEX_TIME = "read-locations-index-time"; + private static final String READ_ENTRYLOG_TIME = "read-entrylog-time"; + private static final String WRITE_CACHE_HITS = "write-cache-hits"; + private static final String WRITE_CACHE_MISSES = "write-cache-misses"; + private static final String READ_CACHE_HITS = "read-cache-hits"; + private static final String READ_CACHE_MISSES = "read-cache-misses"; + private static final String READAHEAD_BATCH_COUNT = "readahead-batch-count"; + private static final String READAHEAD_BATCH_SIZE = "readahead-batch-size"; + private static final String READAHEAD_TIME = "readahead-time"; + private static final String FLUSH = "flush"; + private static final String FLUSH_ENTRYLOG = "flush-entrylog"; + private static final String FLUSH_LOCATIONS_INDEX = "flush-locations-index"; + private static final String FLUSH_LEDGER_INDEX = "flush-ledger-index"; + private static final String FLUSH_SIZE = "flush-size"; + + @Deprecated + private static final String THROTTLED_WRITE_REQUESTS = "throttled-write-requests"; + // throttled-write-requests is deprecated, use new metric: throttled-write + private static final String THROTTLED_WRITE = "throttled-write"; + private static final String REJECTED_WRITE_REQUESTS = "rejected-write-requests"; + private static final String WRITE_CACHE_SIZE = "write-cache-size"; + private static final String WRITE_CACHE_COUNT = "write-cache-count"; + private static final String READ_CACHE_SIZE = "read-cache-size"; + private static final String READ_CACHE_COUNT = "read-cache-count"; + + @StatsDoc( + name = ADD_ENTRY, + help = "operation stats of adding entries to db ledger storage", + parent = BOOKIE_ADD_ENTRY + ) + private final OpStatsLogger addEntryStats; + @StatsDoc( + name = READ_ENTRY, + help = "operation stats of reading entries from db ledger storage", + parent = BOOKIE_READ_ENTRY + ) + private final OpStatsLogger readEntryStats; + @StatsDoc( + name = READ_ENTRY_LOCATIONS_INDEX_TIME, + help = "time spent reading entries from the locations index of the db ledger storage engine", + parent = READ_ENTRY + ) + private final Counter readFromLocationIndexTime; + @StatsDoc( + name = READ_ENTRYLOG_TIME, + help = "time spent reading entries from the entry log files of the db ledger storage engine", + parent = READ_ENTRY + ) + private final Counter readFromEntryLogTime; + @StatsDoc( + name = WRITE_CACHE_HITS, + help = "number of write cache hits (on reads)", + parent = READ_ENTRY + ) + private final Counter writeCacheHitCounter; + @StatsDoc( + name = WRITE_CACHE_MISSES, + help = "number of write cache misses (on reads)", + parent = READ_ENTRY + ) + private final Counter writeCacheMissCounter; + @StatsDoc( + name = READ_CACHE_HITS, + help = "number of read cache hits", + parent = READ_ENTRY + ) + private final Counter readCacheHitCounter; + @StatsDoc( + name = READ_CACHE_MISSES, + help = "number of read cache misses", + parent = READ_ENTRY + ) + private final Counter readCacheMissCounter; + @StatsDoc( + name = READAHEAD_BATCH_COUNT, + help = "the distribution of num of entries to read in one readahead batch" + ) + private final OpStatsLogger readAheadBatchCountStats; + @StatsDoc( + name = READAHEAD_BATCH_SIZE, + help = "the distribution of num of bytes to read in one readahead batch" + ) + private final OpStatsLogger readAheadBatchSizeStats; + @StatsDoc( + name = READAHEAD_TIME, + help = "Time spent on readahead operations" + ) + private final Counter readAheadTime; + @StatsDoc( + name = FLUSH, + help = "operation stats of flushing write cache to entry log files" + ) + private final OpStatsLogger flushStats; + @StatsDoc( + name = FLUSH_ENTRYLOG, + help = "operation stats of flushing to the current entry log file" + ) + private final OpStatsLogger flushEntryLogStats; + @StatsDoc( + name = FLUSH_LOCATIONS_INDEX, + help = "operation stats of flushing to the locations index" + ) + private final OpStatsLogger flushLocationIndexStats; + @StatsDoc( + name = FLUSH_LOCATIONS_INDEX, + help = "operation stats of flushing to the ledger index" + ) + private final OpStatsLogger flushLedgerIndexStats; + @StatsDoc( + name = FLUSH_SIZE, + help = "the distribution of number of bytes flushed from write cache to entry log files" + ) + private final OpStatsLogger flushSizeStats; + @StatsDoc( + name = THROTTLED_WRITE_REQUESTS, + help = "The number of requests throttled due to write cache is full" + ) + private final Counter throttledWriteRequests; + @StatsDoc( + name = THROTTLED_WRITE, + help = "The stats of throttled write due to write cache is full" + ) + private final OpStatsLogger throttledWriteStats; + @StatsDoc( + name = REJECTED_WRITE_REQUESTS, + help = "The number of requests rejected due to write cache is full" + ) + private final Counter rejectedWriteRequests; + + @StatsDoc( + name = WRITE_CACHE_SIZE, + help = "Current number of bytes in write cache" + ) + private final Gauge writeCacheSizeGauge; + @StatsDoc( + name = WRITE_CACHE_COUNT, + help = "Current number of entries in write cache" + ) + private final Gauge writeCacheCountGauge; + @StatsDoc( + name = READ_CACHE_SIZE, + help = "Current number of bytes in read cache" + ) + private final Gauge readCacheSizeGauge; + @StatsDoc( + name = READ_CACHE_COUNT, + help = "Current number of entries in read cache" + ) + private final Gauge readCacheCountGauge; + + DbLedgerStorageStats(StatsLogger stats, + Supplier writeCacheSizeSupplier, + Supplier writeCacheCountSupplier, + Supplier readCacheSizeSupplier, + Supplier readCacheCountSupplier) { + addEntryStats = stats.getThreadScopedOpStatsLogger(ADD_ENTRY); + readEntryStats = stats.getThreadScopedOpStatsLogger(READ_ENTRY); + readFromLocationIndexTime = stats.getThreadScopedCounter(READ_ENTRY_LOCATIONS_INDEX_TIME); + readFromEntryLogTime = stats.getThreadScopedCounter(READ_ENTRYLOG_TIME); + readCacheHitCounter = stats.getCounter(READ_CACHE_HITS); + readCacheMissCounter = stats.getCounter(READ_CACHE_MISSES); + writeCacheHitCounter = stats.getCounter(WRITE_CACHE_HITS); + writeCacheMissCounter = stats.getCounter(WRITE_CACHE_MISSES); + readAheadBatchCountStats = stats.getOpStatsLogger(READAHEAD_BATCH_COUNT); + readAheadBatchSizeStats = stats.getOpStatsLogger(READAHEAD_BATCH_SIZE); + readAheadTime = stats.getThreadScopedCounter(READAHEAD_TIME); + flushStats = stats.getOpStatsLogger(FLUSH); + flushEntryLogStats = stats.getOpStatsLogger(FLUSH_ENTRYLOG); + flushLocationIndexStats = stats.getOpStatsLogger(FLUSH_LOCATIONS_INDEX); + flushLedgerIndexStats = stats.getOpStatsLogger(FLUSH_LEDGER_INDEX); + flushSizeStats = stats.getOpStatsLogger(FLUSH_SIZE); + + throttledWriteRequests = stats.getThreadScopedCounter(THROTTLED_WRITE_REQUESTS); + throttledWriteStats = stats.getOpStatsLogger(THROTTLED_WRITE); + rejectedWriteRequests = stats.getThreadScopedCounter(REJECTED_WRITE_REQUESTS); + + writeCacheSizeGauge = new Gauge() { + @Override + public Long getDefaultValue() { + return 0L; + } + + @Override + public Long getSample() { + return writeCacheSizeSupplier.get(); + } + }; + stats.registerGauge(WRITE_CACHE_SIZE, writeCacheSizeGauge); + writeCacheCountGauge = new Gauge() { + @Override + public Long getDefaultValue() { + return 0L; + } + + @Override + public Long getSample() { + return writeCacheCountSupplier.get(); + } + }; + stats.registerGauge(WRITE_CACHE_COUNT, writeCacheCountGauge); + readCacheSizeGauge = new Gauge() { + @Override + public Long getDefaultValue() { + return 0L; + } + + @Override + public Long getSample() { + return readCacheSizeSupplier.get(); + } + }; + stats.registerGauge(READ_CACHE_SIZE, readCacheSizeGauge); + readCacheCountGauge = new Gauge() { + + @Override + public Long getDefaultValue() { + return 0L; + } + + @Override + public Long getSample() { + return readCacheCountSupplier.get(); + } + }; + stats.registerGauge(READ_CACHE_COUNT, readCacheCountGauge); + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/EntryLocationIndex.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/EntryLocationIndex.java index 21b87e2be96..d06822ba6b7 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/EntryLocationIndex.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/EntryLocationIndex.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,20 +21,17 @@ package org.apache.bookkeeper.bookie.storage.ldb; import com.google.common.collect.Iterables; - import java.io.Closeable; import java.io.IOException; -import java.nio.file.FileSystems; import java.util.Map.Entry; import java.util.Set; import java.util.concurrent.TimeUnit; - import org.apache.bookkeeper.bookie.Bookie; import org.apache.bookkeeper.bookie.EntryLocation; import org.apache.bookkeeper.bookie.storage.ldb.KeyValueStorage.Batch; import org.apache.bookkeeper.bookie.storage.ldb.KeyValueStorageFactory.DbConfigType; +import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.stats.Gauge; import org.apache.bookkeeper.stats.StatsLogger; import org.apache.bookkeeper.util.collections.ConcurrentLongHashSet; import org.slf4j.Logger; @@ -49,35 +46,23 @@ public class EntryLocationIndex implements Closeable { private final KeyValueStorage locationsDb; - private final ConcurrentLongHashSet deletedLedgers = new ConcurrentLongHashSet(); - - private StatsLogger stats; + private final ConcurrentLongHashSet deletedLedgers = ConcurrentLongHashSet.newBuilder().build(); + private final EntryLocationIndexStats stats; + private boolean isCompacting; public EntryLocationIndex(ServerConfiguration conf, KeyValueStorageFactory storageFactory, String basePath, StatsLogger stats) throws IOException { - String locationsDbPath = FileSystems.getDefault().getPath(basePath, "locations").toFile().toString(); - locationsDb = storageFactory.newKeyValueStorage(locationsDbPath, DbConfigType.Huge, conf); - - this.stats = stats; - registerStats(); - } + locationsDb = storageFactory.newKeyValueStorage(basePath, "locations", DbConfigType.EntryLocation, conf); - public void registerStats() { - stats.registerGauge("entries-count", new Gauge() { - @Override - public Long getDefaultValue() { - return 0L; - } - - @Override - public Long getSample() { + this.stats = new EntryLocationIndexStats( + stats, + () -> { try { return locationsDb.count(); } catch (IOException e) { return -1L; } - } - }); + }); } @Override @@ -89,6 +74,8 @@ public long getLocation(long ledgerId, long entryId) throws IOException { LongPairWrapper key = LongPairWrapper.get(ledgerId, entryId); LongWrapper value = LongWrapper.get(); + long startTimeNanos = MathUtils.nowInNano(); + boolean operationSuccess = false; try { if (locationsDb.get(key.array, value.array) < 0) { if (log.isDebugEnabled()) { @@ -96,20 +83,34 @@ public long getLocation(long ledgerId, long entryId) throws IOException { } return 0; } - + operationSuccess = true; return value.getValue(); } finally { key.recycle(); value.recycle(); + if (operationSuccess) { + stats.getLookupEntryLocationStats() + .registerSuccessfulEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); + } else { + stats.getLookupEntryLocationStats() + .registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); + } } } public long getLastEntryInLedger(long ledgerId) throws IOException { if (deletedLedgers.contains(ledgerId)) { // Ledger already deleted - return -1; + if (log.isDebugEnabled()) { + log.debug("Ledger {} already deleted in db", ledgerId); + } + /** + * when Ledger already deleted, + * throw Bookie.NoEntryException same like the method + * {@link EntryLocationIndex.getLastEntryInLedgerInternal} solving ledgerId is not found. + * */ + throw new Bookie.NoEntryException(ledgerId, -1); } - return getLastEntryInLedgerInternal(ledgerId); } @@ -189,27 +190,37 @@ public void delete(long ledgerId) throws IOException { deletedLedgers.add(ledgerId); } - private static final int DELETE_ENTRIES_BATCH_SIZE = 100000; + public String getEntryLocationDBPath() { + return locationsDb.getDBPath(); + } - public void removeOffsetFromDeletedLedgers() throws IOException { - LongPairWrapper firstKeyWrapper = LongPairWrapper.get(-1, -1); - LongPairWrapper lastKeyWrapper = LongPairWrapper.get(-1, -1); - LongPairWrapper keyToDelete = LongPairWrapper.get(-1, -1); + public void compact() throws IOException { + try { + isCompacting = true; + locationsDb.compact(); + } finally { + isCompacting = false; + } + } + + public boolean isCompacting() { + return isCompacting; + } + public void removeOffsetFromDeletedLedgers() throws IOException { Set ledgersToDelete = deletedLedgers.items(); if (ledgersToDelete.isEmpty()) { return; } + LongPairWrapper firstKeyWrapper = LongPairWrapper.get(-1, -1); + LongPairWrapper lastKeyWrapper = LongPairWrapper.get(-1, -1); + log.info("Deleting indexes for ledgers: {}", ledgersToDelete); long startTime = System.nanoTime(); - long deletedEntries = 0; - long deletedEntriesInBatch = 0; - - Batch batch = locationsDb.newBatch(); - try { + try (Batch batch = locationsDb.newBatch()) { for (long ledgerId : ledgersToDelete) { if (log.isDebugEnabled()) { log.debug("Deleting indexes from ledger {}", ledgerId); @@ -218,59 +229,20 @@ public void removeOffsetFromDeletedLedgers() throws IOException { firstKeyWrapper.set(ledgerId, 0); lastKeyWrapper.set(ledgerId, Long.MAX_VALUE); - Entry firstKeyRes = locationsDb.getCeil(firstKeyWrapper.array); - if (firstKeyRes == null || ArrayUtil.getLong(firstKeyRes.getKey(), 0) != ledgerId) { - // No entries found for ledger - if (log.isDebugEnabled()) { - log.debug("No entries found for ledger {}", ledgerId); - } - continue; - } - - long firstEntryId = ArrayUtil.getLong(firstKeyRes.getKey(), 8); - long lastEntryId = getLastEntryInLedgerInternal(ledgerId); - if (log.isDebugEnabled()) { - log.debug("Deleting index for ledger {} entries ({} -> {})", - ledgerId, firstEntryId, lastEntryId); - } - - // Iterate over all the keys and remove each of them - for (long entryId = firstEntryId; entryId <= lastEntryId; entryId++) { - keyToDelete.set(ledgerId, entryId); - if (log.isDebugEnabled()) { - log.debug("Deleting index for ({}, {})", keyToDelete.getFirst(), keyToDelete.getSecond()); - } - batch.remove(keyToDelete.array); - ++deletedEntriesInBatch; - ++deletedEntries; - } + batch.deleteRange(firstKeyWrapper.array, lastKeyWrapper.array); + } - if (deletedEntriesInBatch > DELETE_ENTRIES_BATCH_SIZE) { - batch.flush(); - batch.clear(); - deletedEntriesInBatch = 0; - } + batch.flush(); + for (long ledgerId : ledgersToDelete) { + deletedLedgers.remove(ledgerId); } } finally { - try { - batch.flush(); - batch.clear(); - } finally { - - firstKeyWrapper.recycle(); - lastKeyWrapper.recycle(); - keyToDelete.recycle(); - batch.close(); - } + firstKeyWrapper.recycle(); + lastKeyWrapper.recycle(); } - log.info("Deleted indexes for {} entries from {} ledgers in {} seconds", deletedEntries, ledgersToDelete.size(), + log.info("Deleted indexes from {} ledgers in {} seconds", ledgersToDelete.size(), TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime) / 1000.0); - - // Removed from pending set - for (long ledgerId : ledgersToDelete) { - deletedLedgers.remove(ledgerId); - } } private static final Logger log = LoggerFactory.getLogger(EntryLocationIndex.class); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/EntryLocationIndexStats.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/EntryLocationIndexStats.java new file mode 100644 index 00000000000..80bdda48256 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/EntryLocationIndexStats.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.storage.ldb; + +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_SCOPE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.CATEGORY_SERVER; + +import java.util.function.Supplier; +import lombok.Getter; +import org.apache.bookkeeper.stats.Gauge; +import org.apache.bookkeeper.stats.OpStatsLogger; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.annotations.StatsDoc; + +/** + * A umbrella class for ledger metadata index stats. + */ +@StatsDoc( + name = BOOKIE_SCOPE, + category = CATEGORY_SERVER, + help = "Entry location index stats" +) +@Getter +class EntryLocationIndexStats { + + private static final String ENTRIES_COUNT = "entries-count"; + private static final String LOOKUP_ENTRY_LOCATION = "lookup-entry-location"; + + @StatsDoc( + name = ENTRIES_COUNT, + help = "Current number of entries" + ) + private final Gauge entriesCountGauge; + + @StatsDoc( + name = LOOKUP_ENTRY_LOCATION, + help = "operation stats of looking up entry location" + ) + private final OpStatsLogger lookupEntryLocationStats; + + EntryLocationIndexStats(StatsLogger statsLogger, + Supplier entriesCountSupplier) { + entriesCountGauge = new Gauge() { + @Override + public Long getDefaultValue() { + return 0L; + } + + @Override + public Long getSample() { + return entriesCountSupplier.get(); + } + }; + statsLogger.registerGauge(ENTRIES_COUNT, entriesCountGauge); + lookupEntryLocationStats = statsLogger.getOpStatsLogger(LOOKUP_ENTRY_LOCATION); + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorage.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorage.java index aa0119ad6bc..8e18148c085 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorage.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorage.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -96,6 +96,26 @@ public interface KeyValueStorage extends Closeable { */ void delete(byte[] key) throws IOException; + /** + * Compact storage within a specified range. + * + * @param firstKey + * the first key in the range (included) + * @param lastKey + * the last key in the range (not included) + */ + default void compact(byte[] firstKey, byte[] lastKey) throws IOException {} + + /** + * Compact storage full range. + */ + default void compact() throws IOException {} + + /** + * Get storage path. + */ + String getDBPath(); + /** * Get an iterator over to scan sequentially through all the keys in the * database. @@ -158,5 +178,9 @@ public interface Batch extends Closeable { void clear(); void flush() throws IOException; + + default int batchCount() { + return -1; + } } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorageFactory.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorageFactory.java index c35628d7789..e510f381edb 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorageFactory.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorageFactory.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,7 +21,6 @@ package org.apache.bookkeeper.bookie.storage.ldb; import java.io.IOException; - import org.apache.bookkeeper.conf.ServerConfiguration; /** @@ -33,10 +32,12 @@ public interface KeyValueStorageFactory { * Enum used to specify different config profiles in the underlying storage. */ enum DbConfigType { - Small, // Used for ledgers db, doesn't need particular configuration - Huge // Used for location index, lots of writes and much bigger dataset + Default, // Used for default,command until or test case + LedgerMetadata, // Used for ledgers db, doesn't need particular configuration + EntryLocation // Used for location index, lots of writes and much bigger dataset } - KeyValueStorage newKeyValueStorage(String path, DbConfigType dbConfigType, ServerConfiguration conf) + KeyValueStorage newKeyValueStorage(String defaultBasePath, String subPath, DbConfigType dbConfigType, + ServerConfiguration conf) throws IOException; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorageRocksDB.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorageRocksDB.java index a343b59a814..a77a0a18f7c 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorageRocksDB.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorageRocksDB.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,27 +20,46 @@ */ package org.apache.bookkeeper.bookie.storage.ldb; -import static com.google.common.base.Preconditions.checkState; -import com.google.common.primitives.UnsignedBytes; +import static com.google.common.base.Preconditions.checkState; +//CHECKSTYLE.OFF: IllegalImport +//CHECKSTYLE.OFF: ImportOrder +import static io.netty.util.internal.PlatformDependent.maxDirectMemory; +//CHECKSTYLE.ON: IllegalImport +//CHECKSTYLE.ON: ImportOrder +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.io.IOException; -import java.util.Comparator; +import java.nio.file.FileSystems; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; import java.util.Map.Entry; import java.util.concurrent.TimeUnit; - import org.apache.bookkeeper.bookie.storage.ldb.KeyValueStorageFactory.DbConfigType; import org.apache.bookkeeper.conf.ServerConfiguration; import org.rocksdb.BlockBasedTableConfig; import org.rocksdb.BloomFilter; +import org.rocksdb.Cache; import org.rocksdb.ChecksumType; +import org.rocksdb.ColumnFamilyDescriptor; +import org.rocksdb.ColumnFamilyHandle; import org.rocksdb.CompressionType; +import org.rocksdb.DBOptions; +import org.rocksdb.Env; import org.rocksdb.InfoLogLevel; +import org.rocksdb.LRUCache; +import org.rocksdb.LiveFileMetaData; import org.rocksdb.Options; +import org.rocksdb.OptionsUtil; import org.rocksdb.ReadOptions; import org.rocksdb.RocksDB; import org.rocksdb.RocksDBException; import org.rocksdb.RocksIterator; +import org.rocksdb.RocksObject; +import org.rocksdb.Slice; import org.rocksdb.WriteBatch; import org.rocksdb.WriteOptions; import org.slf4j.Logger; @@ -51,19 +70,25 @@ */ public class KeyValueStorageRocksDB implements KeyValueStorage { - static KeyValueStorageFactory factory = (path, dbConfigType, conf) -> new KeyValueStorageRocksDB(path, dbConfigType, - conf); + static KeyValueStorageFactory factory = (defaultBasePath, subPath, dbConfigType, conf) -> + new KeyValueStorageRocksDB(defaultBasePath, subPath, dbConfigType, conf); private final RocksDB db; + private RocksObject options; + private List columnFamilyDescriptors; private final WriteOptions optionSync; private final WriteOptions optionDontSync; + private Cache cache; private final ReadOptions optionCache; private final ReadOptions optionDontCache; - private final WriteBatch emptyBatch; + private final int writeBatchMaxSize; + + private String dbPath; + private static final String ROCKSDB_LOG_PATH = "dbStorage_rocksDB_logPath"; private static final String ROCKSDB_LOG_LEVEL = "dbStorage_rocksDB_logLevel"; private static final String ROCKSDB_LZ4_COMPRESSION_ENABLED = "dbStorage_rocksDB_lz4CompressionEnabled"; private static final String ROCKSDB_WRITE_BUFFER_SIZE_MB = "dbStorage_rocksDB_writeBufferSizeMB"; @@ -74,12 +99,16 @@ public class KeyValueStorageRocksDB implements KeyValueStorage { private static final String ROCKSDB_NUM_LEVELS = "dbStorage_rocksDB_numLevels"; private static final String ROCKSDB_NUM_FILES_IN_LEVEL0 = "dbStorage_rocksDB_numFilesInLevel0"; private static final String ROCKSDB_MAX_SIZE_IN_LEVEL1_MB = "dbStorage_rocksDB_maxSizeInLevel1MB"; + private static final String ROCKSDB_FORMAT_VERSION = "dbStorage_rocksDB_format_version"; + private static final String ROCKSDB_CHECKSUM_TYPE = "dbStorage_rocksDB_checksum_type"; - public KeyValueStorageRocksDB(String path, DbConfigType dbConfigType, ServerConfiguration conf) throws IOException { - this(path, dbConfigType, conf, false); + public KeyValueStorageRocksDB(String basePath, String subPath, DbConfigType dbConfigType, ServerConfiguration conf) + throws IOException { + this(basePath, subPath, dbConfigType, conf, false); } - public KeyValueStorageRocksDB(String path, DbConfigType dbConfigType, ServerConfiguration conf, boolean readOnly) + public KeyValueStorageRocksDB(String basePath, String subPath, DbConfigType dbConfigType, ServerConfiguration conf, + boolean readOnly) throws IOException { try { RocksDB.loadLibrary(); @@ -93,58 +122,136 @@ public KeyValueStorageRocksDB(String path, DbConfigType dbConfigType, ServerConf this.optionDontCache = new ReadOptions(); this.emptyBatch = new WriteBatch(); - try (Options options = new Options()) { - options.setCreateIfMissing(true); - - if (dbConfigType == DbConfigType.Huge) { - long writeBufferSizeMB = conf.getInt(ROCKSDB_WRITE_BUFFER_SIZE_MB, 64); - long sstSizeMB = conf.getInt(ROCKSDB_SST_SIZE_MB, 64); - int numLevels = conf.getInt(ROCKSDB_NUM_LEVELS, -1); - int numFilesInLevel0 = conf.getInt(ROCKSDB_NUM_FILES_IN_LEVEL0, 4); - long maxSizeInLevel1MB = conf.getLong(ROCKSDB_MAX_SIZE_IN_LEVEL1_MB, 256); - int blockSize = conf.getInt(ROCKSDB_BLOCK_SIZE, 64 * 1024); - long blockCacheSize = conf.getLong(ROCKSDB_BLOCK_CACHE_SIZE, 256 * 1024 * 1024); - int bloomFilterBitsPerKey = conf.getInt(ROCKSDB_BLOOM_FILTERS_BITS_PER_KEY, 10); - boolean lz4CompressionEnabled = conf.getBoolean(ROCKSDB_LZ4_COMPRESSION_ENABLED, true); - - if (lz4CompressionEnabled) { - options.setCompressionType(CompressionType.LZ4_COMPRESSION); - } + String dbFilePath = ""; + if (dbConfigType == DbConfigType.EntryLocation) { + dbFilePath = conf.getEntryLocationRocksdbConf(); + } else if (dbConfigType == DbConfigType.LedgerMetadata) { + dbFilePath = conf.getLedgerMetadataRocksdbConf(); + } else { + dbFilePath = conf.getDefaultRocksDBConf(); + } + log.info("Searching for a RocksDB configuration file in {}", dbFilePath); + if (Paths.get(dbFilePath).toFile().exists()) { + log.info("Found a RocksDB configuration file and using it to initialize the RocksDB"); + db = initializeRocksDBWithConfFile(basePath, subPath, dbConfigType, conf, readOnly, dbFilePath); + } else { + log.info("Haven't found the file and read the configuration from the main bookkeeper configuration"); + db = initializeRocksDBWithBookieConf(basePath, subPath, dbConfigType, conf, readOnly); + } + + optionSync.setSync(true); + optionDontSync.setSync(false); + + optionCache.setFillCache(true); + optionDontCache.setFillCache(false); + + this.writeBatchMaxSize = conf.getMaxOperationNumbersInSingleRocksDBBatch(); + } + + private RocksDB initializeRocksDBWithConfFile(String basePath, String subPath, DbConfigType dbConfigType, + ServerConfiguration conf, boolean readOnly, + String dbFilePath) throws IOException { + DBOptions dbOptions = new DBOptions(); + final List cfDescs = new ArrayList<>(); + final List cfHandles = new ArrayList<>(); + try { + OptionsUtil.loadOptionsFromFile(dbFilePath, Env.getDefault(), dbOptions, cfDescs, false); + // Configure file path + String logPath = conf.getString(ROCKSDB_LOG_PATH, ""); + if (!logPath.isEmpty()) { + Path logPathSetting = FileSystems.getDefault().getPath(logPath, subPath); + Files.createDirectories(logPathSetting); + log.info("RocksDB<{}> log path: {}", subPath, logPathSetting); + dbOptions.setDbLogDir(logPathSetting.toString()); + } + this.dbPath = FileSystems.getDefault().getPath(basePath, subPath).toFile().toString(); + this.options = dbOptions; + this.columnFamilyDescriptors = cfDescs; + if (readOnly) { + return RocksDB.openReadOnly(dbOptions, dbPath, cfDescs, cfHandles); + } else { + return RocksDB.open(dbOptions, dbPath, cfDescs, cfHandles); + } + } catch (RocksDBException e) { + throw new IOException("Error open RocksDB database", e); + } + } + + private RocksDB initializeRocksDBWithBookieConf(String basePath, String subPath, DbConfigType dbConfigType, + ServerConfiguration conf, boolean readOnly) throws IOException { + Options options = new Options(); + options.setCreateIfMissing(true); + ChecksumType checksumType = ChecksumType.valueOf(conf.getString(ROCKSDB_CHECKSUM_TYPE, "kxxHash")); + + if (dbConfigType == DbConfigType.EntryLocation) { + /* Set default RocksDB block-cache size to 10% / numberOfLedgers of direct memory, unless override */ + int ledgerDirsSize = conf.getLedgerDirNames().length; + long defaultRocksDBBlockCacheSizeBytes = maxDirectMemory() / ledgerDirsSize / 10; + long blockCacheSize = DbLedgerStorage.getLongVariableOrDefault(conf, ROCKSDB_BLOCK_CACHE_SIZE, + defaultRocksDBBlockCacheSizeBytes); + + long writeBufferSizeMB = conf.getInt(ROCKSDB_WRITE_BUFFER_SIZE_MB, 64); + long sstSizeMB = conf.getInt(ROCKSDB_SST_SIZE_MB, 64); + int numLevels = conf.getInt(ROCKSDB_NUM_LEVELS, -1); + int numFilesInLevel0 = conf.getInt(ROCKSDB_NUM_FILES_IN_LEVEL0, 4); + long maxSizeInLevel1MB = conf.getLong(ROCKSDB_MAX_SIZE_IN_LEVEL1_MB, 256); + int blockSize = conf.getInt(ROCKSDB_BLOCK_SIZE, 64 * 1024); + int bloomFilterBitsPerKey = conf.getInt(ROCKSDB_BLOOM_FILTERS_BITS_PER_KEY, 10); + boolean lz4CompressionEnabled = conf.getBoolean(ROCKSDB_LZ4_COMPRESSION_ENABLED, true); + int formatVersion = conf.getInt(ROCKSDB_FORMAT_VERSION, 2); + + if (lz4CompressionEnabled) { options.setCompressionType(CompressionType.LZ4_COMPRESSION); - options.setWriteBufferSize(writeBufferSizeMB * 1024 * 1024); - options.setMaxWriteBufferNumber(4); - if (numLevels > 0) { - options.setNumLevels(numLevels); - } - options.setLevelZeroFileNumCompactionTrigger(numFilesInLevel0); - options.setMaxBytesForLevelBase(maxSizeInLevel1MB * 1024 * 1024); - options.setMaxBackgroundCompactions(16); - options.setMaxBackgroundFlushes(16); - options.setIncreaseParallelism(32); - options.setMaxTotalWalSize(512 * 1024 * 1024); - options.setMaxOpenFiles(-1); - options.setTargetFileSizeBase(sstSizeMB * 1024 * 1024); - options.setDeleteObsoleteFilesPeriodMicros(TimeUnit.HOURS.toMicros(1)); - - BlockBasedTableConfig tableOptions = new BlockBasedTableConfig(); - tableOptions.setBlockSize(blockSize); - tableOptions.setBlockCacheSize(blockCacheSize); - tableOptions.setFormatVersion(2); - tableOptions.setChecksumType(ChecksumType.kxxHash); - if (bloomFilterBitsPerKey > 0) { - tableOptions.setFilter(new BloomFilter(bloomFilterBitsPerKey, false)); - } - - // Options best suited for HDDs - tableOptions.setCacheIndexAndFilterBlocks(true); - options.setLevelCompactionDynamicLevelBytes(true); - - options.setTableFormatConfig(tableOptions); } + options.setWriteBufferSize(writeBufferSizeMB * 1024 * 1024); + options.setMaxWriteBufferNumber(4); + if (numLevels > 0) { + options.setNumLevels(numLevels); + } + options.setLevelZeroFileNumCompactionTrigger(numFilesInLevel0); + options.setMaxBytesForLevelBase(maxSizeInLevel1MB * 1024 * 1024); + options.setMaxBackgroundJobs(32); + options.setIncreaseParallelism(32); + options.setMaxTotalWalSize(512 * 1024 * 1024); + options.setMaxOpenFiles(-1); + options.setTargetFileSizeBase(sstSizeMB * 1024 * 1024); + options.setDeleteObsoleteFilesPeriodMicros(TimeUnit.HOURS.toMicros(1)); + + this.cache = new LRUCache(blockCacheSize); + BlockBasedTableConfig tableOptions = new BlockBasedTableConfig(); + tableOptions.setBlockSize(blockSize); + tableOptions.setBlockCache(cache); + tableOptions.setFormatVersion(formatVersion); + tableOptions.setChecksumType(checksumType); + if (bloomFilterBitsPerKey > 0) { + tableOptions.setFilterPolicy(new BloomFilter(bloomFilterBitsPerKey, false)); + } + + // Options best suited for HDDs + tableOptions.setCacheIndexAndFilterBlocks(true); + options.setLevelCompactionDynamicLevelBytes(true); - // Configure log level - String logLevel = conf.getString(ROCKSDB_LOG_LEVEL, "info"); - switch (logLevel) { + options.setTableFormatConfig(tableOptions); + } else { + this.cache = null; + BlockBasedTableConfig tableOptions = new BlockBasedTableConfig(); + tableOptions.setChecksumType(checksumType); + options.setTableFormatConfig(tableOptions); + } + + // Configure file path + String logPath = conf.getString(ROCKSDB_LOG_PATH, ""); + if (!logPath.isEmpty()) { + Path logPathSetting = FileSystems.getDefault().getPath(logPath, subPath); + Files.createDirectories(logPathSetting); + log.info("RocksDB<{}> log path: {}", subPath, logPathSetting); + options.setDbLogDir(logPathSetting.toString()); + } + this.dbPath = FileSystems.getDefault().getPath(basePath, subPath).toFile().toString(); + + // Configure log level + String logLevel = conf.getString(ROCKSDB_LOG_LEVEL, "info"); + switch (logLevel) { case "debug": options.setInfoLogLevel(InfoLogLevel.DEBUG_LEVEL); break; @@ -159,33 +266,32 @@ public KeyValueStorageRocksDB(String path, DbConfigType dbConfigType, ServerConf break; default: log.warn("Unrecognized RockDB log level: {}", logLevel); - } + } // Keep log files for 1month - options.setKeepLogFileNum(30); - options.setLogFileTimeToRoll(TimeUnit.DAYS.toSeconds(1)); - - try { - if (readOnly) { - db = RocksDB.openReadOnly(options, path); - } else { - db = RocksDB.open(options, path); - } - } catch (RocksDBException e) { - throw new IOException("Error open RocksDB database", e); + options.setKeepLogFileNum(30); + options.setLogFileTimeToRoll(TimeUnit.DAYS.toSeconds(1)); + this.options = options; + try { + if (readOnly) { + return RocksDB.openReadOnly(options, dbPath); + } else { + return RocksDB.open(options, dbPath); } + } catch (RocksDBException e) { + throw new IOException("Error open RocksDB database", e); } - - optionSync.setSync(true); - optionDontSync.setSync(false); - - optionCache.setFillCache(true); - optionDontCache.setFillCache(false); } @Override public void close() throws IOException { db.close(); + if (cache != null) { + cache.close(); + } + if (options != null) { + options.close(); + } optionSync.close(); optionDontSync.close(); optionCache.close(); @@ -228,35 +334,21 @@ public int get(byte[] key, byte[] value) throws IOException { } @Override + @SuppressFBWarnings("RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE") public Entry getFloor(byte[] key) throws IOException { - try (RocksIterator iterator = db.newIterator(optionCache)) { - // Position the iterator on the record whose key is >= to the supplied key - iterator.seek(key); - - if (!iterator.isValid()) { - // There are no entries >= key - iterator.seekToLast(); - if (iterator.isValid()) { - return new EntryWrapper(iterator.key(), iterator.value()); - } else { - // Db is empty - return null; - } - } - - iterator.prev(); - - if (!iterator.isValid()) { - // Iterator is on the 1st entry of the db and this entry key is >= to the target - // key - return null; - } else { + try (Slice upperBound = new Slice(key); + ReadOptions option = new ReadOptions(optionCache).setIterateUpperBound(upperBound); + RocksIterator iterator = db.newIterator(option)) { + iterator.seekToLast(); + if (iterator.isValid()) { return new EntryWrapper(iterator.key(), iterator.value()); } } + return null; } @Override + @SuppressFBWarnings("RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE") public Entry getCeil(byte[] key) throws IOException { try (RocksIterator iterator = db.newIterator(optionCache)) { // Position the iterator on the record whose key is >= to the supplied key @@ -279,6 +371,50 @@ public void delete(byte[] key) throws IOException { } } + @Override + public String getDBPath() { + return dbPath; + } + + @Override + public void compact(byte[] firstKey, byte[] lastKey) throws IOException { + try { + db.compactRange(firstKey, lastKey); + } catch (RocksDBException e) { + throw new IOException("Error in RocksDB compact", e); + } + } + + @Override + public void compact() throws IOException { + try { + final long start = System.currentTimeMillis(); + final int oriRocksDBFileCount = db.getLiveFilesMetaData().size(); + final long oriRocksDBSize = getRocksDBSize(); + log.info("Starting RocksDB {} compact, current RocksDB hold {} files and {} Bytes.", + db.getName(), oriRocksDBFileCount, oriRocksDBSize); + + db.compactRange(); + + final long end = System.currentTimeMillis(); + final int rocksDBFileCount = db.getLiveFilesMetaData().size(); + final long rocksDBSize = getRocksDBSize(); + log.info("RocksDB {} compact finished {} ms, space reduced {} Bytes, current hold {} files and {} Bytes.", + db.getName(), end - start, oriRocksDBSize - rocksDBSize, rocksDBFileCount, rocksDBSize); + } catch (RocksDBException e) { + throw new IOException("Error in RocksDB compact", e); + } + } + + private long getRocksDBSize() { + List liveFilesMetaData = db.getLiveFilesMetaData(); + long rocksDBFileSize = 0L; + for (LiveFileMetaData fileMetaData : liveFilesMetaData) { + rocksDBFileSize += fileMetaData.size(); + } + return rocksDBFileSize; + } + @Override public void sync() throws IOException { try { @@ -316,13 +452,15 @@ public void close() { @Override public CloseableIterator keys(byte[] firstKey, byte[] lastKey) { - final RocksIterator iterator = db.newIterator(optionCache); + final Slice upperBound = new Slice(lastKey); + final ReadOptions option = new ReadOptions(optionCache).setIterateUpperBound(upperBound); + final RocksIterator iterator = db.newIterator(option); iterator.seek(firstKey); return new CloseableIterator() { @Override public boolean hasNext() { - return iterator.isValid() && ByteComparator.compare(iterator.key(), lastKey) < 0; + return iterator.isValid(); } @Override @@ -336,6 +474,8 @@ public byte[] next() { @Override public void close() { iterator.close(); + option.close(); + upperBound.close(); } }; } @@ -379,21 +519,29 @@ public long count() throws IOException { @Override public Batch newBatch() { - return new RocksDBBatch(); + return new RocksDBBatch(writeBatchMaxSize); } private class RocksDBBatch implements Batch { private final WriteBatch writeBatch = new WriteBatch(); + private final int batchSize; + private int batchCount = 0; + + RocksDBBatch(int batchSize) { + this.batchSize = batchSize; + } @Override public void close() { writeBatch.close(); + batchCount = 0; } @Override public void put(byte[] key, byte[] value) throws IOException { try { writeBatch.put(key, value); + countBatchAndFlushIfNeeded(); } catch (RocksDBException e) { throw new IOException("Failed to flush RocksDB batch", e); } @@ -403,6 +551,7 @@ public void put(byte[] key, byte[] value) throws IOException { public void remove(byte[] key) throws IOException { try { writeBatch.delete(key); + countBatchAndFlushIfNeeded(); } catch (RocksDBException e) { throw new IOException("Failed to flush RocksDB batch", e); } @@ -411,17 +560,31 @@ public void remove(byte[] key) throws IOException { @Override public void clear() { writeBatch.clear(); + batchCount = 0; } @Override public void deleteRange(byte[] beginKey, byte[] endKey) throws IOException { try { writeBatch.deleteRange(beginKey, endKey); + countBatchAndFlushIfNeeded(); } catch (RocksDBException e) { throw new IOException("Failed to flush RocksDB batch", e); } } + private void countBatchAndFlushIfNeeded() throws IOException { + if (++batchCount >= batchSize) { + flush(); + clear(); + } + } + + @Override + public int batchCount() { + return batchCount; + } + @Override public void flush() throws IOException { try { @@ -464,7 +627,17 @@ public byte[] getKey() { } } - private static final Comparator ByteComparator = UnsignedBytes.lexicographicalComparator(); + RocksDB db() { + return db; + } + + List getColumnFamilyDescriptors() { + return columnFamilyDescriptors; + } + + RocksObject getOptions() { + return options; + } private static final Logger log = LoggerFactory.getLogger(KeyValueStorageRocksDB.class); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LedgerMetadataIndex.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LedgerMetadataIndex.java index 04bf32dba71..73b0fd02dae 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LedgerMetadataIndex.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LedgerMetadataIndex.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -22,24 +22,24 @@ import com.google.common.base.Predicate; import com.google.common.collect.Iterables; +import com.google.common.collect.Sets; import com.google.protobuf.ByteString; - +import io.netty.buffer.ByteBuf; import java.io.Closeable; import java.io.IOException; -import java.nio.file.FileSystems; import java.util.AbstractMap.SimpleEntry; import java.util.Arrays; import java.util.Map.Entry; +import java.util.Set; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.atomic.AtomicInteger; - +import java.util.concurrent.locks.ReentrantLock; import org.apache.bookkeeper.bookie.Bookie; import org.apache.bookkeeper.bookie.BookieException; import org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorageDataFormats.LedgerData; import org.apache.bookkeeper.bookie.storage.ldb.KeyValueStorage.CloseableIterator; import org.apache.bookkeeper.bookie.storage.ldb.KeyValueStorageFactory.DbConfigType; import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.stats.Gauge; import org.apache.bookkeeper.stats.StatsLogger; import org.apache.bookkeeper.util.collections.ConcurrentLongHashMap; import org.slf4j.Logger; @@ -51,25 +51,28 @@ *

The key is the ledgerId and the value is the {@link LedgerData} content. */ public class LedgerMetadataIndex implements Closeable { + // Non-ledger data should have negative ID + private static final long STORAGE_FLAGS = -0xeefd; + // Contains all ledgers stored in the bookie private final ConcurrentLongHashMap ledgers; private final AtomicInteger ledgersCount; private final KeyValueStorage ledgersDb; - private StatsLogger stats; + private final LedgerMetadataIndexStats stats; // Holds ledger modifications applied in memory map, and pending to be flushed on db private final ConcurrentLinkedQueue> pendingLedgersUpdates; // Holds ledger ids that were delete from memory map, and pending to be flushed on db - private final ConcurrentLinkedQueue pendingDeletedLedgers; + private final Set pendingDeletedLedgers; + private final ReentrantLock[] locks = new ReentrantLock[16]; public LedgerMetadataIndex(ServerConfiguration conf, KeyValueStorageFactory storageFactory, String basePath, StatsLogger stats) throws IOException { - String ledgersPath = FileSystems.getDefault().getPath(basePath, "ledgers").toFile().toString(); - ledgersDb = storageFactory.newKeyValueStorage(ledgersPath, DbConfigType.Small, conf); + ledgersDb = storageFactory.newKeyValueStorage(basePath, "ledgers", DbConfigType.LedgerMetadata, conf); - ledgers = new ConcurrentLongHashMap<>(); + ledgers = ConcurrentLongHashMap.newBuilder().build(); ledgersCount = new AtomicInteger(); // Read all ledgers from db @@ -78,33 +81,26 @@ public LedgerMetadataIndex(ServerConfiguration conf, KeyValueStorageFactory stor while (iterator.hasNext()) { Entry entry = iterator.next(); long ledgerId = ArrayUtil.getLong(entry.getKey(), 0); - LedgerData ledgerData = LedgerData.parseFrom(entry.getValue()); - ledgers.put(ledgerId, ledgerData); - ledgersCount.incrementAndGet(); + if (ledgerId >= 0) { + LedgerData ledgerData = LedgerData.parseFrom(entry.getValue()); + ledgers.put(ledgerId, ledgerData); + ledgersCount.incrementAndGet(); + } } } finally { iterator.close(); } this.pendingLedgersUpdates = new ConcurrentLinkedQueue>(); - this.pendingDeletedLedgers = new ConcurrentLinkedQueue(); - - this.stats = stats; - registerStats(); - } + this.pendingDeletedLedgers = Sets.newConcurrentHashSet(); - public void registerStats() { - stats.registerGauge("ledgers-count", new Gauge() { - @Override - public Long getDefaultValue() { - return 0L; - } + this.stats = new LedgerMetadataIndexStats( + stats, + () -> (long) ledgersCount.get()); - @Override - public Long getSample() { - return (long) ledgersCount.get(); - } - }); + for (int i = 0; i < locks.length; i++) { + locks[i] = new ReentrantLock(); + } } @Override @@ -127,31 +123,46 @@ public LedgerData get(long ledgerId) throws IOException { public void set(long ledgerId, LedgerData ledgerData) throws IOException { ledgerData = LedgerData.newBuilder(ledgerData).setExists(true).build(); - if (ledgers.put(ledgerId, ledgerData) == null) { - if (log.isDebugEnabled()) { - log.debug("Added new ledger {}", ledgerId); + ReentrantLock lock = lockForLedger(ledgerId); + lock.lock(); + try { + if (ledgers.put(ledgerId, ledgerData) == null) { + if (log.isDebugEnabled()) { + log.debug("Added new ledger {}", ledgerId); + } + ledgersCount.incrementAndGet(); } - ledgersCount.incrementAndGet(); - } - pendingLedgersUpdates.add(new SimpleEntry(ledgerId, ledgerData)); - pendingDeletedLedgers.remove(ledgerId); + pendingLedgersUpdates.add(new SimpleEntry(ledgerId, ledgerData)); + pendingDeletedLedgers.remove(ledgerId); + } finally { + lock.unlock(); + } } public void delete(long ledgerId) throws IOException { - if (ledgers.remove(ledgerId) != null) { - if (log.isDebugEnabled()) { - log.debug("Removed ledger {}", ledgerId); + ReentrantLock lock = lockForLedger(ledgerId); + lock.lock(); + try { + if (ledgers.remove(ledgerId) != null) { + if (log.isDebugEnabled()) { + log.debug("Removed ledger {}", ledgerId); + } + ledgersCount.decrementAndGet(); } - ledgersCount.decrementAndGet(); - } - pendingDeletedLedgers.add(ledgerId); - pendingLedgersUpdates.removeIf(e -> e.getKey() == ledgerId); + pendingDeletedLedgers.add(ledgerId); + pendingLedgersUpdates.removeIf(e -> e.getKey() == ledgerId); + } finally { + lock.unlock(); + } } public Iterable getActiveLedgersInRange(final long firstLedgerId, final long lastLedgerId) throws IOException { + if (firstLedgerId <= 0 && lastLedgerId == Long.MAX_VALUE) { + return ledgers.keys(); + } return Iterables.filter(ledgers.keys(), new Predicate() { @Override public boolean apply(Long ledgerId) { @@ -161,102 +172,254 @@ public boolean apply(Long ledgerId) { } public boolean setFenced(long ledgerId) throws IOException { - LedgerData ledgerData = get(ledgerId); - if (ledgerData.getFenced()) { - return false; - } + ReentrantLock lock = lockForLedger(ledgerId); + lock.lock(); + try { + LedgerData ledgerData = get(ledgerId); + if (ledgerData.getFenced()) { + return false; + } - LedgerData newLedgerData = LedgerData.newBuilder(ledgerData).setFenced(true).build(); + LedgerData newLedgerData = LedgerData.newBuilder(ledgerData).setFenced(true).build(); - if (ledgers.put(ledgerId, newLedgerData) == null) { - // Ledger had been deleted - if (log.isDebugEnabled()) { - log.debug("Re-inserted fenced ledger {}", ledgerId); - } - ledgersCount.incrementAndGet(); - } else { - if (log.isDebugEnabled()) { + if (ledgers.put(ledgerId, newLedgerData) == null) { + // Ledger had been deleted + if (log.isDebugEnabled()) { + log.debug("Re-inserted fenced ledger {}", ledgerId); + } + ledgersCount.incrementAndGet(); + } else if (log.isDebugEnabled()) { log.debug("Set fenced ledger {}", ledgerId); } - } - pendingLedgersUpdates.add(new SimpleEntry(ledgerId, newLedgerData)); - pendingDeletedLedgers.remove(ledgerId); - return true; + pendingLedgersUpdates.add(new SimpleEntry(ledgerId, newLedgerData)); + pendingDeletedLedgers.remove(ledgerId); + return true; + } finally { + lock.unlock(); + } } - public void setMasterKey(long ledgerId, byte[] masterKey) throws IOException { - LedgerData ledgerData = ledgers.get(ledgerId); - if (ledgerData == null) { - // New ledger inserted - ledgerData = LedgerData.newBuilder().setExists(true).setFenced(false) - .setMasterKey(ByteString.copyFrom(masterKey)).build(); - if (log.isDebugEnabled()) { - log.debug("Inserting new ledger {}", ledgerId); + public boolean setLimbo(long ledgerId) throws IOException { + ReentrantLock lock = lockForLedger(ledgerId); + lock.lock(); + try { + LedgerData ledgerData = get(ledgerId); + if (ledgerData.getLimbo()) { + return false; } - } else { - byte[] storedMasterKey = ledgerData.getMasterKey().toByteArray(); - if (ArrayUtil.isArrayAllZeros(storedMasterKey)) { - // update master key of the ledger - ledgerData = LedgerData.newBuilder(ledgerData).setMasterKey(ByteString.copyFrom(masterKey)).build(); + + LedgerData newLedgerData = LedgerData.newBuilder(ledgerData).setLimbo(true).build(); + + if (ledgers.put(ledgerId, newLedgerData) == null) { + // Ledger had been deleted if (log.isDebugEnabled()) { - log.debug("Replace old master key {} with new master key {}", storedMasterKey, masterKey); + log.debug("Re-inserted limbo ledger {}", ledgerId); } - } else if (!Arrays.equals(storedMasterKey, masterKey) && !ArrayUtil.isArrayAllZeros(masterKey)) { - log.warn("Ledger {} masterKey in db can only be set once.", ledgerId); - throw new IOException(BookieException.create(BookieException.Code.IllegalOpException)); + ledgersCount.incrementAndGet(); + } else if (log.isDebugEnabled()) { + log.debug("Set limbo ledger {}", ledgerId); } + + pendingLedgersUpdates.add(new SimpleEntry(ledgerId, newLedgerData)); + pendingDeletedLedgers.remove(ledgerId); + return true; + } finally { + lock.unlock(); } + } + + public boolean clearLimbo(long ledgerId) throws IOException { + ReentrantLock lock = lockForLedger(ledgerId); + lock.lock(); + try { + LedgerData ledgerData = get(ledgerId); + if (ledgerData == null) { + throw new Bookie.NoLedgerException(ledgerId); + } + final boolean oldValue = ledgerData.getLimbo(); + LedgerData newLedgerData = LedgerData.newBuilder(ledgerData).setLimbo(false).build(); + + if (ledgers.put(ledgerId, newLedgerData) == null) { + // Ledger had been deleted + if (log.isDebugEnabled()) { + log.debug("Re-inserted limbo ledger {}", ledgerId); + } + ledgersCount.incrementAndGet(); + } else if (log.isDebugEnabled()) { + log.debug("Set limbo ledger {}", ledgerId); + } - if (ledgers.put(ledgerId, ledgerData) == null) { - ledgersCount.incrementAndGet(); + pendingLedgersUpdates.add(new SimpleEntry(ledgerId, newLedgerData)); + pendingDeletedLedgers.remove(ledgerId); + return oldValue; + } finally { + lock.unlock(); } + } + + + public void setMasterKey(long ledgerId, byte[] masterKey) throws IOException { + ReentrantLock lock = lockForLedger(ledgerId); + lock.lock(); + try { + LedgerData ledgerData = ledgers.get(ledgerId); + if (ledgerData == null) { + // New ledger inserted + ledgerData = LedgerData.newBuilder().setExists(true).setFenced(false) + .setMasterKey(ByteString.copyFrom(masterKey)).build(); + if (log.isDebugEnabled()) { + log.debug("Inserting new ledger {}", ledgerId); + } + } else { + byte[] storedMasterKey = ledgerData.getMasterKey().toByteArray(); + if (ArrayUtil.isArrayAllZeros(storedMasterKey)) { + // update master key of the ledger + ledgerData = LedgerData.newBuilder(ledgerData).setMasterKey(ByteString.copyFrom(masterKey)).build(); + if (log.isDebugEnabled()) { + log.debug("Replace old master key {} with new master key {}", storedMasterKey, masterKey); + } + } else if (!Arrays.equals(storedMasterKey, masterKey) && !ArrayUtil.isArrayAllZeros(masterKey)) { + log.warn("Ledger {} masterKey in db can only be set once.", ledgerId); + throw new IOException(BookieException.create(BookieException.Code.IllegalOpException)); + } + } + + if (ledgers.put(ledgerId, ledgerData) == null) { + ledgersCount.incrementAndGet(); + } - pendingLedgersUpdates.add(new SimpleEntry(ledgerId, ledgerData)); - pendingDeletedLedgers.remove(ledgerId); + pendingLedgersUpdates.add(new SimpleEntry(ledgerId, ledgerData)); + pendingDeletedLedgers.remove(ledgerId); + } finally { + lock.unlock(); + } } /** * Flushes all pending changes. */ public void flush() throws IOException { + if (pendingLedgersUpdates.isEmpty()) { + return; + } + LongWrapper key = LongWrapper.get(); - int updatedLedgers = 0; - while (!pendingLedgersUpdates.isEmpty()) { - Entry entry = pendingLedgersUpdates.poll(); - key.set(entry.getKey()); - byte[] value = entry.getValue().toByteArray(); - ledgersDb.put(key.array, value); - ++updatedLedgers; - } + try { + int updatedLedgers = 0; + Entry entry; + while ((entry = pendingLedgersUpdates.poll()) != null) { + key.set(entry.getKey()); + byte[] value = entry.getValue().toByteArray(); + ledgersDb.put(key.array, value); + ++updatedLedgers; + } - if (log.isDebugEnabled()) { - log.debug("Persisting updates to {} ledgers", updatedLedgers); - } + if (log.isDebugEnabled()) { + log.debug("Persisting updates to {} ledgers", updatedLedgers); + } - ledgersDb.sync(); - key.recycle(); + ledgersDb.sync(); + } finally { + key.recycle(); + } } public void removeDeletedLedgers() throws IOException { + if (pendingDeletedLedgers.isEmpty()) { + return; + } + LongWrapper key = LongWrapper.get(); - int deletedLedgers = 0; - while (!pendingDeletedLedgers.isEmpty()) { - long ledgerId = pendingDeletedLedgers.poll(); - key.set(ledgerId); - ledgersDb.delete(key.array); - deletedLedgers++; + try { + int deletedLedgers = 0; + for (Long ledgerId : pendingDeletedLedgers) { + key.set(ledgerId); + ledgersDb.delete(key.array); + ++deletedLedgers; + } + + if (log.isDebugEnabled()) { + log.debug("Persisting deletes of ledgers {}", deletedLedgers); + } + + ledgersDb.sync(); + } finally { + key.recycle(); } + } + + private ReentrantLock lockForLedger(long ledgerId) { + return locks[(int) (Math.abs(ledgerId) % locks.length)]; + } + + int getStorageStateFlags() throws IOException { + LongWrapper keyWrapper = LongWrapper.get(); + LongWrapper currentWrapper = LongWrapper.get(); - if (log.isDebugEnabled()) { - log.debug("Persisting deletes of ledgers {}", deletedLedgers); + try { + keyWrapper.set(STORAGE_FLAGS); + synchronized (ledgersDb) { + int current = 0; + if (ledgersDb.get(keyWrapper.array, currentWrapper.array) >= 0) { + current = (int) currentWrapper.getValue(); + } + return current; + } + } finally { + keyWrapper.recycle(); + currentWrapper.recycle(); } + } + + boolean setStorageStateFlags(int expected, int newFlags) throws IOException { + LongWrapper keyWrapper = LongWrapper.get(); + LongWrapper currentWrapper = LongWrapper.get(); + LongWrapper newFlagsWrapper = LongWrapper.get(); - ledgersDb.sync(); - key.recycle(); + try { + keyWrapper.set(STORAGE_FLAGS); + newFlagsWrapper.set(newFlags); + synchronized (ledgersDb) { + int current = 0; + if (ledgersDb.get(keyWrapper.array, currentWrapper.array) >= 0) { + current = (int) currentWrapper.getValue(); + } + if (current == expected) { + ledgersDb.put(keyWrapper.array, newFlagsWrapper.array); + ledgersDb.sync(); + return true; + } + } + } finally { + keyWrapper.recycle(); + currentWrapper.recycle(); + newFlagsWrapper.recycle(); + } + return false; } private static final Logger log = LoggerFactory.getLogger(LedgerMetadataIndex.class); + + void setExplicitLac(long ledgerId, ByteBuf lac) throws IOException { + LedgerData ledgerData = ledgers.get(ledgerId); + if (ledgerData != null) { + LedgerData newLedgerData = LedgerData.newBuilder(ledgerData) + .setExplicitLac(ByteString.copyFrom(lac.nioBuffer())).build(); + + if (ledgers.put(ledgerId, newLedgerData) == null) { + // Ledger had been deleted + ledgersCount.incrementAndGet(); + return; + } else if (log.isDebugEnabled()) { + log.debug("Set explicitLac on ledger {}", ledgerId); + } + pendingLedgersUpdates.add(new SimpleEntry(ledgerId, newLedgerData)); + } else { + // unknown ledger here + } + } + } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LedgerMetadataIndexStats.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LedgerMetadataIndexStats.java new file mode 100644 index 00000000000..a46e38b650c --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LedgerMetadataIndexStats.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.storage.ldb; + +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_SCOPE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.CATEGORY_SERVER; + +import java.util.function.Supplier; +import lombok.Getter; +import org.apache.bookkeeper.stats.Gauge; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.annotations.StatsDoc; + +/** + * A umbrella class for ledger metadata index stats. + */ +@StatsDoc( + name = BOOKIE_SCOPE, + category = CATEGORY_SERVER, + help = "Ledger metadata index stats" +) +@Getter +class LedgerMetadataIndexStats { + + private static final String LEDGERS_COUNT = "ledgers-count"; + + @StatsDoc( + name = LEDGERS_COUNT, + help = "Current number of ledgers" + ) + private final Gauge ledgersCountGauge; + + LedgerMetadataIndexStats(StatsLogger statsLogger, + Supplier ledgersCountSupplier) { + ledgersCountGauge = new Gauge() { + @Override + public Long getDefaultValue() { + return 0L; + } + + @Override + public Long getSample() { + return ledgersCountSupplier.get(); + } + }; + statsLogger.registerGauge(LEDGERS_COUNT, ledgersCountGauge); + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LedgersIndexCheckOp.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LedgersIndexCheckOp.java new file mode 100644 index 00000000000..d8f064f9272 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LedgersIndexCheckOp.java @@ -0,0 +1,113 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.ldb; + +import java.io.File; +import java.io.IOException; +import java.nio.file.FileSystems; +import java.nio.file.Path; +import java.util.Base64; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.storage.ldb.KeyValueStorageFactory.DbConfigType; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.commons.lang.time.DurationFormatUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Scan the ledgers index to make sure it is readable. + */ +public class LedgersIndexCheckOp { + private static final Logger LOG = LoggerFactory.getLogger(LedgersIndexCheckOp.class); + + private final ServerConfiguration conf; + private final boolean verbose; + private static final String LedgersSubPath = "ledgers"; + + public LedgersIndexCheckOp(ServerConfiguration conf, boolean verbose) { + this.conf = conf; + this.verbose = verbose; + } + + public boolean initiate() throws IOException { + File[] indexDirs = conf.getIndexDirs(); + if (indexDirs == null) { + indexDirs = conf.getLedgerDirs(); + } + if (indexDirs.length != conf.getLedgerDirs().length) { + throw new IOException("ledger and index dirs size not matched"); + } + long startTime = System.nanoTime(); + for (int i = 0; i < conf.getLedgerDirs().length; i++) { + File indexDir = indexDirs[i]; + + String iBasePath = BookieImpl.getCurrentDirectory(indexDir).toString(); + Path indexCurrentPath = FileSystems.getDefault().getPath(iBasePath, LedgersSubPath); + + LOG.info("Loading ledgers index from {}", indexCurrentPath); + LOG.info("Starting index scan"); + + try { + KeyValueStorage index = new KeyValueStorageRocksDB(iBasePath, LedgersSubPath, + DbConfigType.Default, conf, true); + // Read all ledgers from db + KeyValueStorage.CloseableIterator> iterator = index.iterator(); + int ctr = 0; + try { + while (iterator.hasNext()) { + ctr++; + Map.Entry entry = iterator.next(); + long ledgerId = ArrayUtil.getLong(entry.getKey(), 0); + DbLedgerStorageDataFormats.LedgerData ledgerData = + DbLedgerStorageDataFormats.LedgerData.parseFrom(entry.getValue()); + if (verbose) { + LOG.info( + "Scanned: {}, ledger: {}, exists: {}, isFenced: {}, masterKey: {}, explicitLAC: {}", + ctr, + ledgerId, + (ledgerData.hasExists() ? ledgerData.getExists() : "-"), + (ledgerData.hasFenced() ? ledgerData.getFenced() : "-"), + (ledgerData.hasMasterKey() + ? Base64.getEncoder() + .encodeToString(ledgerData.getMasterKey().toByteArray()) + : "-"), + (ledgerData.hasExplicitLac() ? ledgerData.getExplicitLac() : "-")); + } else if (ctr % 100 == 0) { + LOG.info("Scanned {} ledgers", ctr); + } + } + } finally { + iterator.close(); + } + LOG.info("Scanned {} ledgers", ctr); + } catch (Throwable t) { + LOG.error("Index scan has failed with error", t); + return false; + } + } + LOG.info("Index scan has completed successfully. Total time: {}", + DurationFormatUtils.formatDurationHMS( + TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime))); + return true; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LedgersIndexRebuildOp.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LedgersIndexRebuildOp.java new file mode 100644 index 00000000000..2725897e804 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LedgersIndexRebuildOp.java @@ -0,0 +1,235 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.ldb; + +import com.google.common.collect.Lists; +import com.google.protobuf.ByteString; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.file.FileSystems; +import java.nio.file.Files; +import java.nio.file.Path; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.DefaultEntryLogger; +import org.apache.bookkeeper.bookie.Journal; +import org.apache.bookkeeper.bookie.LedgerDirsManager; +import org.apache.bookkeeper.bookie.storage.EntryLogScanner; +import org.apache.bookkeeper.bookie.storage.ldb.KeyValueStorageFactory.DbConfigType; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.util.BookKeeperConstants; +import org.apache.bookkeeper.util.DiskChecker; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Scan all entries in the journal and entry log files then rebuilds the ledgers index. + * Notable stuff: + * - Fences every ledger as even if we check the metadata, we cannot guarantee that + * a fence request was served while the rebuild was taking place (even if the bookie + * is running in read-only mode). + * Losing the fenced status of a ledger is UNSAFE. + * - Sets the master key as an empty byte array. This is correct as empty master keys + * are overwritten and we cannot use the password from metadata, and cannot know 100% + * for sure how a digest for the password was generated. + */ +public class LedgersIndexRebuildOp { + private static final Logger LOG = LoggerFactory.getLogger(LedgersIndexRebuildOp.class); + + private final ServerConfiguration conf; + private final boolean verbose; + private static final String LedgersSubPath = "ledgers"; + + public LedgersIndexRebuildOp(ServerConfiguration conf, boolean verbose) { + this.conf = conf; + this.verbose = verbose; + } + + @SuppressFBWarnings("RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE") + public boolean initiate() { + LOG.info("Starting ledger index rebuilding"); + File[] indexDirs = conf.getIndexDirs(); + if (indexDirs == null) { + indexDirs = conf.getLedgerDirs(); + } + if (indexDirs.length != conf.getLedgerDirs().length) { + LOG.error("ledger and index dirs size not matched"); + return false; + } + + for (int i = 0; i < indexDirs.length; i++) { + File indexDir = indexDirs[i]; + File ledgerDir = conf.getLedgerDirs()[i]; + + String timestamp = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ").format(new Date()); + String indexBasePath = BookieImpl.getCurrentDirectory(indexDir).toString(); + String tempLedgersSubPath = LedgersSubPath + ".TEMP-" + timestamp; + Path indexTempPath = FileSystems.getDefault().getPath(indexBasePath, tempLedgersSubPath); + Path indexCurrentPath = FileSystems.getDefault().getPath(indexBasePath, LedgersSubPath); + + LOG.info("Starting scan phase (scans journal and entry log files)"); + + try { + Set ledgers = new HashSet<>(); + scanJournals(ledgers); + File[] lDirs = new File[1]; + lDirs[0] = ledgerDir; + scanEntryLogFiles(ledgers, lDirs); + + LOG.info("Scan complete, found {} ledgers. " + + "Starting to build a new ledgers index", ledgers.size()); + + try (KeyValueStorage newIndex = KeyValueStorageRocksDB.factory.newKeyValueStorage( + indexBasePath, tempLedgersSubPath, DbConfigType.Default, conf)) { + LOG.info("Created ledgers index at temp location {}", indexTempPath); + + for (Long ledgerId : ledgers) { + DbLedgerStorageDataFormats.LedgerData ledgerData = + DbLedgerStorageDataFormats.LedgerData.newBuilder() + .setExists(true) + .setFenced(true) + .setMasterKey(ByteString.EMPTY).build(); + + byte[] ledgerArray = new byte[16]; + ArrayUtil.setLong(ledgerArray, 0, ledgerId); + newIndex.put(ledgerArray, ledgerData.toByteArray()); + } + + newIndex.sync(); + } + } catch (Throwable t) { + LOG.error("Error during rebuild, the original index remains unchanged", t); + delete(indexTempPath); + return false; + } + + // replace the existing index + try { + Path prevPath = FileSystems.getDefault().getPath(indexBasePath, + LedgersSubPath + ".PREV-" + timestamp); + LOG.info("Moving original index from original location: {} up to back-up location: {}", + indexCurrentPath, prevPath); + Files.move(indexCurrentPath, prevPath); + LOG.info("Moving rebuilt index from: {} to: {}", indexTempPath, indexCurrentPath); + Files.move(indexTempPath, indexCurrentPath); + LOG.info("Original index has been replaced with the new index. " + + "The original index has been moved to {}", prevPath); + } catch (IOException e) { + LOG.error("Could not replace original index with rebuilt index. " + + "To return to the original state, ensure the original index is in its original location", e); + return false; + } + } + + return true; + } + + private void scanEntryLogFiles(Set ledgers, File[] lDirs) throws IOException { + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf, new LedgerDirsManager(conf, lDirs, + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()))); + Set entryLogs = entryLogger.getEntryLogsSet(); + + int totalEntryLogs = entryLogs.size(); + int completedEntryLogs = 0; + LOG.info("Scanning {} entry logs", totalEntryLogs); + + for (long entryLogId : entryLogs) { + entryLogger.scanEntryLog(entryLogId, new EntryLogScanner() { + @Override + public void process(long ledgerId, long offset, ByteBuf entry) throws IOException { + if (ledgers.add(ledgerId)) { + if (verbose) { + LOG.info("Found ledger {} in entry log", ledgerId); + } + } + } + + @Override + public boolean accept(long ledgerId) { + return true; + } + }); + + ++completedEntryLogs; + LOG.info("Completed scanning of log {}.log -- {} / {}", Long.toHexString(entryLogId), completedEntryLogs, + totalEntryLogs); + } + } + + private void scanJournals(Set ledgers) throws IOException { + for (Journal journal : getJournals(conf)) { + List journalIds = Journal.listJournalIds(journal.getJournalDirectory(), + new Journal.JournalIdFilter() { + @Override + public boolean accept(long journalId) { + return true; + } + }); + + for (Long journalId : journalIds) { + scanJournal(journal, journalId, ledgers); + } + } + } + + private List getJournals(ServerConfiguration conf) throws IOException { + List journals = Lists.newArrayListWithCapacity(conf.getJournalDirs().length); + int idx = 0; + for (File journalDir : conf.getJournalDirs()) { + journals.add(new Journal(idx++, new File(journalDir, BookKeeperConstants.CURRENT_DIR), conf, + new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())))); + } + + return journals; + } + + private void scanJournal(Journal journal, long journalId, Set ledgers) throws IOException { + LOG.info("Scanning journal " + journalId + " (" + Long.toHexString(journalId) + ".txn)"); + journal.scanJournal(journalId, 0L, new Journal.JournalScanner() { + @Override + public void process(int journalVersion, long offset, ByteBuffer entry) { + ByteBuf buf = Unpooled.wrappedBuffer(entry); + long ledgerId = buf.readLong(); + + if (ledgers.add(ledgerId) && verbose) { + LOG.info("Found ledger {} in journal", ledgerId); + } + } + }, false); + } + + private void delete(Path path) { + try { + Files.delete(path); + } catch (IOException e) { + LOG.warn("Unable to delete {}", path.toAbsolutePath(), e); + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LocationsIndexRebuildOp.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LocationsIndexRebuildOp.java index 6cf62326773..b9aaebec9e2 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LocationsIndexRebuildOp.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LocationsIndexRebuildOp.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,9 +21,8 @@ package org.apache.bookkeeper.bookie.storage.ldb; import com.google.common.collect.Sets; - import io.netty.buffer.ByteBuf; - +import java.io.File; import java.io.IOException; import java.nio.file.FileSystems; import java.nio.file.Files; @@ -32,11 +31,12 @@ import java.util.Date; import java.util.Set; import java.util.concurrent.TimeUnit; - -import org.apache.bookkeeper.bookie.Bookie; -import org.apache.bookkeeper.bookie.EntryLogger; -import org.apache.bookkeeper.bookie.EntryLogger.EntryLogScanner; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.DefaultEntryLogger; import org.apache.bookkeeper.bookie.LedgerDirsManager; +import org.apache.bookkeeper.bookie.storage.EntryLogScanner; import org.apache.bookkeeper.bookie.storage.ldb.KeyValueStorageFactory.DbConfigType; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.stats.NullStatsLogger; @@ -46,7 +46,7 @@ import org.slf4j.LoggerFactory; /** - * Scan all entries in the entry log and rebuild the ledgerStorageIndex. + * Scan all entries in the entry log and rebuild the locations index. */ public class LocationsIndexRebuildOp { private final ServerConfiguration conf; @@ -55,72 +55,102 @@ public LocationsIndexRebuildOp(ServerConfiguration conf) { this.conf = conf; } - public void initiate() throws IOException { - LOG.info("Starting index rebuilding"); - - // Move locations index to a backup directory - String basePath = Bookie.getCurrentDirectory(conf.getLedgerDirs()[0]).toString(); - Path currentPath = FileSystems.getDefault().getPath(basePath, "locations"); - String timestamp = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ").format(new Date()); - Path backupPath = FileSystems.getDefault().getPath(basePath, "locations.BACKUP-" + timestamp); - Files.move(currentPath, backupPath); - - LOG.info("Created locations index backup at {}", backupPath); + private static final int BATCH_COMMIT_SIZE = 10_000; + public void initiate() throws IOException { + LOG.info("Starting locations index rebuilding"); + File[] indexDirs = conf.getIndexDirs(); + if (indexDirs == null) { + indexDirs = conf.getLedgerDirs(); + } + if (indexDirs.length != conf.getLedgerDirs().length) { + throw new IOException("ledger and index dirs size not matched"); + } long startTime = System.nanoTime(); + // Move locations index to a backup directory + for (int i = 0; i < conf.getLedgerDirs().length; i++) { + File ledgerDir = conf.getLedgerDirs()[i]; + File indexDir = indexDirs[i]; + String iBasePath = BookieImpl.getCurrentDirectory(indexDir).toString(); + Path indexCurrentPath = FileSystems.getDefault().getPath(iBasePath, "locations"); + String timestamp = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ").format(new Date()); + Path backupPath = FileSystems.getDefault().getPath(iBasePath, "locations.BACKUP-" + timestamp); + Files.move(indexCurrentPath, backupPath); + + LOG.info("Created locations index backup at {}", backupPath); + + File[] lDirs = new File[1]; + lDirs[0] = ledgerDir; + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf, new LedgerDirsManager(conf, lDirs, + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()))); + Set entryLogs = entryLogger.getEntryLogsSet(); + + Set activeLedgers = getActiveLedgers(conf, KeyValueStorageRocksDB.factory, iBasePath); + LOG.info("Found {} active ledgers in ledger manager", activeLedgers.size()); + + KeyValueStorage newIndex = KeyValueStorageRocksDB.factory.newKeyValueStorage(iBasePath, "locations", + DbConfigType.Default, conf); + + int totalEntryLogs = entryLogs.size(); + int completedEntryLogs = 0; + LOG.info("Scanning {} entry logs", totalEntryLogs); + AtomicReference batch = new AtomicReference<>(newIndex.newBatch()); + AtomicInteger count = new AtomicInteger(); + + for (long entryLogId : entryLogs) { + entryLogger.scanEntryLog(entryLogId, new EntryLogScanner() { + @Override + public void process(long ledgerId, long offset, ByteBuf entry) throws IOException { + long entryId = entry.getLong(8); + + // Actual location indexed is pointing past the entry size + long location = (entryLogId << 32L) | (offset + 4); + + if (LOG.isDebugEnabled()) { + LOG.debug("Rebuilding {}:{} at location {} / {}", ledgerId, entryId, location >> 32, + location & (Integer.MAX_VALUE - 1)); + } + + // Update the ledger index page + LongPairWrapper key = LongPairWrapper.get(ledgerId, entryId); + LongWrapper value = LongWrapper.get(location); + + try { + batch.get().put(key.array, value.array); + } finally { + key.recycle(); + value.recycle(); + } + + if (count.incrementAndGet() > BATCH_COMMIT_SIZE) { + batch.get().flush(); + batch.get().close(); + + batch.set(newIndex.newBatch()); + count.set(0); + } + } - EntryLogger entryLogger = new EntryLogger(conf, new LedgerDirsManager(conf, conf.getLedgerDirs(), - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()))); - Set entryLogs = entryLogger.getEntryLogsSet(); - - String locationsDbPath = FileSystems.getDefault().getPath(basePath, "locations").toFile().toString(); - - Set activeLedgers = getActiveLedgers(conf, KeyValueStorageRocksDB.factory, basePath); - LOG.info("Found {} active ledgers in ledger manager", activeLedgers.size()); - - KeyValueStorage newIndex = KeyValueStorageRocksDB.factory.newKeyValueStorage(locationsDbPath, DbConfigType.Huge, - conf); - - int totalEntryLogs = entryLogs.size(); - int completedEntryLogs = 0; - LOG.info("Scanning {} entry logs", totalEntryLogs); - - for (long entryLogId : entryLogs) { - entryLogger.scanEntryLog(entryLogId, new EntryLogScanner() { - @Override - public void process(long ledgerId, long offset, ByteBuf entry) throws IOException { - long entryId = entry.getLong(8); - - // Actual location indexed is pointing past the entry size - long location = (entryLogId << 32L) | (offset + 4); - - if (LOG.isDebugEnabled()) { - LOG.debug("Rebuilding {}:{} at location {} / {}", ledgerId, entryId, location >> 32, - location & (Integer.MAX_VALUE - 1)); + @Override + public boolean accept(long ledgerId) { + return activeLedgers.contains(ledgerId); } + }); - // Update the ledger index page - LongPairWrapper key = LongPairWrapper.get(ledgerId, entryId); - LongWrapper value = LongWrapper.get(location); - newIndex.put(key.array, value.array); - } - - @Override - public boolean accept(long ledgerId) { - return activeLedgers.contains(ledgerId); - } - }); - - ++completedEntryLogs; - LOG.info("Completed scanning of log {}.log -- {} / {}", Long.toHexString(entryLogId), completedEntryLogs, - totalEntryLogs); - } + ++completedEntryLogs; + LOG.info("Completed scanning of log {}.log -- {} / {}", Long.toHexString(entryLogId), + completedEntryLogs, totalEntryLogs); + } - newIndex.sync(); - newIndex.close(); + batch.get().flush(); + batch.get().close(); + newIndex.sync(); + newIndex.close(); + } LOG.info("Rebuilding index is done. Total time: {}", - DurationFormatUtils.formatDurationHMS(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime))); + DurationFormatUtils.formatDurationHMS( + TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime))); } private Set getActiveLedgers(ServerConfiguration conf, KeyValueStorageFactory storageFactory, String basePath) diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LongPairWrapper.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LongPairWrapper.java index 993297c2b36..488d5978f3b 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LongPairWrapper.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LongPairWrapper.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LongWrapper.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LongWrapper.java index 144b9ee75a1..451f0b76141 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LongWrapper.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/LongWrapper.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/PersistentEntryLogMetadataMap.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/PersistentEntryLogMetadataMap.java new file mode 100644 index 00000000000..bfb8d0d43fd --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/PersistentEntryLogMetadataMap.java @@ -0,0 +1,263 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.ldb; + +import static org.apache.bookkeeper.util.BookKeeperConstants.METADATA_CACHE; + +import io.netty.util.concurrent.FastThreadLocal; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.File; +import java.io.IOException; +import java.util.Map.Entry; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.BiConsumer; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.BookieException.EntryLogMetadataMapException; +import org.apache.bookkeeper.bookie.EntryLogMetadata; +import org.apache.bookkeeper.bookie.EntryLogMetadata.EntryLogMetadataRecyclable; +import org.apache.bookkeeper.bookie.EntryLogMetadataMap; +import org.apache.bookkeeper.bookie.storage.ldb.KeyValueStorage.CloseableIterator; +import org.apache.bookkeeper.bookie.storage.ldb.KeyValueStorageFactory.DbConfigType; +import org.apache.bookkeeper.conf.ServerConfiguration; + +/** + * Persistent entryLogMetadata-map that stores entry-loggers metadata into + * rocksDB. + */ +@Slf4j +public class PersistentEntryLogMetadataMap implements EntryLogMetadataMap { + // persistent Rocksdb to store metadata-map + private final KeyValueStorage metadataMapDB; + private AtomicBoolean isClosed = new AtomicBoolean(false); + + private static final FastThreadLocal baos = new FastThreadLocal() { + @Override + protected ByteArrayOutputStream initialValue() { + return new ByteArrayOutputStream(); + } + }; + private static final FastThreadLocal bais = new FastThreadLocal() { + @Override + protected ByteArrayInputStream initialValue() { + return new ByteArrayInputStream(new byte[1]); + } + }; + private static final FastThreadLocal dataos = new FastThreadLocal() { + @Override + protected DataOutputStream initialValue() { + return new DataOutputStream(baos.get()); + } + }; + private static final FastThreadLocal datais = new FastThreadLocal() { + @Override + protected DataInputStream initialValue() { + return new DataInputStream(bais.get()); + } + }; + + public PersistentEntryLogMetadataMap(String metadataPath, ServerConfiguration conf) throws IOException { + log.info("Loading persistent entrylog metadata-map from {}/{}", metadataPath, METADATA_CACHE); + File dir = new File(metadataPath); + if (!dir.mkdirs() && !dir.exists()) { + String err = "Unable to create directory " + dir; + log.error(err); + throw new IOException(err); + } + metadataMapDB = KeyValueStorageRocksDB.factory.newKeyValueStorage(metadataPath, METADATA_CACHE, + DbConfigType.Default, conf); + } + + @Override + public boolean containsKey(long entryLogId) throws EntryLogMetadataMapException { + throwIfClosed(); + LongWrapper key = LongWrapper.get(entryLogId); + try { + boolean isExist; + try { + isExist = metadataMapDB.get(key.array) != null; + } catch (IOException e) { + throw new EntryLogMetadataMapException(e); + } + return isExist; + } finally { + key.recycle(); + } + } + + @Override + public void put(long entryLogId, EntryLogMetadata entryLogMeta) throws EntryLogMetadataMapException { + throwIfClosed(); + LongWrapper key = LongWrapper.get(entryLogId); + try { + baos.get().reset(); + try { + entryLogMeta.serialize(dataos.get()); + metadataMapDB.put(key.array, baos.get().toByteArray()); + } catch (IllegalStateException | IOException e) { + log.error("Failed to serialize entrylog-metadata, entryLogId {}", entryLogId); + throw new EntryLogMetadataMapException(e); + } + } finally { + key.recycle(); + } + + } + + /** + * {@link EntryLogMetadata} life-cycle in supplied action will be transient + * and it will be recycled as soon as supplied action is completed. + */ + @Override + public void forEach(BiConsumer action) throws EntryLogMetadataMapException { + throwIfClosed(); + CloseableIterator> iterator = metadataMapDB.iterator(); + try { + while (iterator.hasNext()) { + if (isClosed.get()) { + break; + } + Entry entry = iterator.next(); + long entryLogId = ArrayUtil.getLong(entry.getKey(), 0); + EntryLogMetadataRecyclable metadata = getEntryLogMetadataRecyclable(entry.getValue()); + try { + action.accept(entryLogId, metadata); + } finally { + metadata.recycle(); + } + } + } catch (IOException e) { + log.error("Failed to iterate over entry-log metadata map {}", e.getMessage(), e); + throw new EntryLogMetadataMapException(e); + } finally { + try { + iterator.close(); + } catch (IOException e) { + log.error("Failed to close entry-log metadata-map rocksDB iterator {}", e.getMessage(), e); + } + } + } + + /** + * {@link EntryLogMetadata} life-cycle in supplied action will be transient + * and it will be recycled as soon as supplied action is completed. + */ + @Override + public void forKey(long entryLogId, BiConsumer action) throws EntryLogMetadataMapException { + throwIfClosed(); + LongWrapper key = LongWrapper.get(entryLogId); + try { + byte[] value = metadataMapDB.get(key.array); + if (value == null || value.length == 0) { + action.accept(entryLogId, null); + return; + } + EntryLogMetadataRecyclable metadata = getEntryLogMetadataRecyclable(value); + try { + action.accept(entryLogId, metadata); + } finally { + metadata.recycle(); + } + } catch (IOException e) { + log.error("Failed to get metadata for entryLogId {}: {}", entryLogId, e.getMessage(), e); + throw new EntryLogMetadataMapException(e); + } finally { + key.recycle(); + } + } + + private EntryLogMetadataRecyclable getEntryLogMetadataRecyclable(byte[] value) throws IOException { + ByteArrayInputStream localBais = bais.get(); + DataInputStream localDatais = datais.get(); + if (localBais.available() < value.length) { + localBais.close(); + localDatais.close(); + ByteArrayInputStream newBais = new ByteArrayInputStream(value); + bais.set(newBais); + datais.set(new DataInputStream(newBais)); + } else { + localBais.read(value, 0, value.length); + } + localBais.reset(); + localDatais.reset(); + EntryLogMetadataRecyclable metadata = EntryLogMetadata.deserialize(datais.get()); + return metadata; + } + + @Override + public void remove(long entryLogId) throws EntryLogMetadataMapException { + throwIfClosed(); + LongWrapper key = LongWrapper.get(entryLogId); + try { + try { + metadataMapDB.delete(key.array); + } catch (IOException e) { + throw new EntryLogMetadataMapException(e); + } + } finally { + key.recycle(); + } + } + + @Override + public int size() throws EntryLogMetadataMapException { + throwIfClosed(); + try { + return (int) metadataMapDB.count(); + } catch (IOException e) { + throw new EntryLogMetadataMapException(e); + } + } + + @Override + public void clear() throws EntryLogMetadataMapException { + try { + try (KeyValueStorage.Batch b = metadataMapDB.newBatch(); + CloseableIterator itr = metadataMapDB.keys()) { + while (itr.hasNext()) { + b.remove(itr.next()); + } + b.flush(); + } + } catch (IOException e) { + throw new EntryLogMetadataMapException(e); + } + } + + @Override + public void close() throws IOException { + if (isClosed.compareAndSet(false, true)) { + metadataMapDB.close(); + } else { + log.warn("Attempted to close already closed PersistentEntryLogMetadataMap"); + } + } + + public void throwIfClosed() throws EntryLogMetadataMapException { + if (isClosed.get()) { + final String msg = "Attempted to use PersistentEntryLogMetadataMap after it was closed"; + log.error(msg); + throw new EntryLogMetadataMapException(new IOException(msg)); + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/ReadCache.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/ReadCache.java index b14478fccd6..1d850c456c6 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/ReadCache.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/ReadCache.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -25,27 +25,29 @@ import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBufAllocator; import io.netty.buffer.Unpooled; - +import io.netty.util.ReferenceCountUtil; import java.io.Closeable; import java.util.ArrayList; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.ReentrantReadWriteLock; - import org.apache.bookkeeper.util.collections.ConcurrentLongLongPairHashMap; import org.apache.bookkeeper.util.collections.ConcurrentLongLongPairHashMap.LongPair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Read cache implementation. * *

Uses the specified amount of memory and pairs it with a hashmap. * - *

The memory is splitted in multiple segments that are used in a + *

The memory is split in multiple segments that are used in a * ring-buffer fashion. When the read cache is full, the oldest segment * is cleared and rotated to make space for new entries to be added to * the read cache. */ public class ReadCache implements Closeable { + private static final Logger log = LoggerFactory.getLogger(ReadCache.class); private static final int DEFAULT_MAX_SEGMENT_SIZE = 1 * 1024 * 1024 * 1024; @@ -57,13 +59,15 @@ public class ReadCache implements Closeable { private final int segmentSize; + private ByteBufAllocator allocator; private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); - public ReadCache(long maxCacheSize) { - this(maxCacheSize, DEFAULT_MAX_SEGMENT_SIZE); + public ReadCache(ByteBufAllocator allocator, long maxCacheSize) { + this(allocator, maxCacheSize, DEFAULT_MAX_SEGMENT_SIZE); } - public ReadCache(long maxCacheSize, int maxSegmentSize) { + public ReadCache(ByteBufAllocator allocator, long maxCacheSize, int maxSegmentSize) { + this.allocator = allocator; int segmentsCount = Math.max(2, (int) (maxCacheSize / maxSegmentSize)); segmentSize = (int) (maxCacheSize / segmentsCount); @@ -72,13 +76,17 @@ public ReadCache(long maxCacheSize, int maxSegmentSize) { for (int i = 0; i < segmentsCount; i++) { cacheSegments.add(Unpooled.directBuffer(segmentSize, segmentSize)); - cacheIndexes.add(new ConcurrentLongLongPairHashMap(4096, 2 * Runtime.getRuntime().availableProcessors())); + ConcurrentLongLongPairHashMap concurrentLongLongPairHashMap = ConcurrentLongLongPairHashMap.newBuilder() + .expectedItems(4096) + .concurrencyLevel(2 * Runtime.getRuntime().availableProcessors()) + .build(); + cacheIndexes.add(concurrentLongLongPairHashMap); } } @Override public void close() { - cacheSegments.forEach(ByteBuf::release); + cacheSegments.forEach(ReferenceCountUtil::safeRelease); } public void put(long ledgerId, long entryId, ByteBuf entry) { @@ -88,6 +96,10 @@ public void put(long ledgerId, long entryId, ByteBuf entry) { lock.readLock().lock(); try { + if (entrySize > segmentSize) { + log.warn("entrySize {} > segmentSize {}, skip update read cache!", entrySize, segmentSize); + return; + } int offset = currentSegmentOffset.getAndAdd(alignedSize); if (offset + entrySize > segmentSize) { // Roll-over the segment (outside the read-lock) @@ -140,7 +152,7 @@ public ByteBuf get(long ledgerId, long entryId) { int entryOffset = (int) res.first; int entryLen = (int) res.second; - ByteBuf entry = ByteBufAllocator.DEFAULT.directBuffer(entryLen, entryLen); + ByteBuf entry = allocator.buffer(entryLen, entryLen); entry.writeBytes(cacheSegments.get(segmentIdx), entryOffset, entryLen); return entry; } @@ -153,6 +165,27 @@ public ByteBuf get(long ledgerId, long entryId) { return null; } + public boolean hasEntry(long ledgerId, long entryId) { + lock.readLock().lock(); + + try { + int size = cacheSegments.size(); + for (int i = 0; i < size; i++) { + int segmentIdx = (currentSegmentIdx + (size - i)) % size; + + LongPair res = cacheIndexes.get(segmentIdx).get(ledgerId, entryId); + if (res != null) { + return true; + } + } + } finally { + lock.readLock().unlock(); + } + + // Entry not found in any segment + return false; + } + /** * @return the total size of cached entries */ diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java index e31b44e8147..6ce2d4b4f54 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,26 +21,33 @@ package org.apache.bookkeeper.bookie.storage.ldb; import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkState; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.protobuf.ByteString; - +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.Unpooled; +import io.netty.util.ReferenceCountUtil; import io.netty.util.concurrent.DefaultThreadFactory; - +import java.io.File; import java.io.IOException; -import java.util.SortedMap; +import java.util.Collections; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; +import java.util.PrimitiveIterator.OfLong; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.ReentrantLock; import java.util.concurrent.locks.StampedLock; - import org.apache.bookkeeper.bookie.Bookie; import org.apache.bookkeeper.bookie.Bookie.NoEntryException; import org.apache.bookkeeper.bookie.BookieException; @@ -50,23 +57,29 @@ import org.apache.bookkeeper.bookie.Checkpointer; import org.apache.bookkeeper.bookie.CompactableLedgerStorage; import org.apache.bookkeeper.bookie.EntryLocation; -import org.apache.bookkeeper.bookie.EntryLogger; +import org.apache.bookkeeper.bookie.GarbageCollectionStatus; import org.apache.bookkeeper.bookie.GarbageCollectorThread; import org.apache.bookkeeper.bookie.LastAddConfirmedUpdateNotification; +import org.apache.bookkeeper.bookie.LedgerCache; import org.apache.bookkeeper.bookie.LedgerDirsManager; +import org.apache.bookkeeper.bookie.LedgerDirsManager.LedgerDirsListener; +import org.apache.bookkeeper.bookie.LedgerEntryPage; import org.apache.bookkeeper.bookie.StateManager; +import org.apache.bookkeeper.bookie.storage.EntryLogger; import org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorageDataFormats.LedgerData; import org.apache.bookkeeper.bookie.storage.ldb.KeyValueStorage.Batch; +import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.common.util.Watcher; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.meta.LedgerManager; import org.apache.bookkeeper.proto.BookieProtocol; import org.apache.bookkeeper.stats.Counter; -import org.apache.bookkeeper.stats.Gauge; import org.apache.bookkeeper.stats.OpStatsLogger; import org.apache.bookkeeper.stats.StatsLogger; -import org.apache.bookkeeper.util.MathUtils; +import org.apache.bookkeeper.stats.ThreadRegistry; import org.apache.bookkeeper.util.collections.ConcurrentLongHashMap; +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang.mutable.MutableLong; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -102,7 +115,14 @@ public class SingleDirectoryDbLedgerStorage implements CompactableLedgerStorage protected final AtomicBoolean hasFlushBeenTriggered = new AtomicBoolean(false); private final AtomicBoolean isFlushOngoing = new AtomicBoolean(false); - private final ExecutorService executor = Executors.newSingleThreadExecutor(new DefaultThreadFactory("db-storage")); + private static String dbStoragerExecutorName = "db-storage"; + private final ExecutorService executor = Executors.newSingleThreadExecutor( + new DefaultThreadFactory(dbStoragerExecutorName) { + @Override + protected Thread newThread(Runnable r, String name) { + return super.newThread(ThreadRegistry.registerThread(r, dbStoragerExecutorName), name); + } + }); // Executor used to for db index cleanup private final ScheduledExecutorService cleanupExecutor = Executors @@ -111,139 +131,126 @@ public class SingleDirectoryDbLedgerStorage implements CompactableLedgerStorage private final CopyOnWriteArrayList ledgerDeletionListeners = Lists .newCopyOnWriteArrayList(); - private final CheckpointSource checkpointSource; + private CheckpointSource checkpointSource = CheckpointSource.DEFAULT; private Checkpoint lastCheckpoint = Checkpoint.MIN; private final long writeCacheMaxSize; private final long readCacheMaxSize; private final int readAheadCacheBatchSize; + private final long readAheadCacheBatchBytesSize; private final long maxThrottleTimeNanos; - private final StatsLogger stats; - - private final OpStatsLogger addEntryStats; - private final OpStatsLogger readEntryStats; - private final OpStatsLogger readCacheHitStats; - private final OpStatsLogger readCacheMissStats; - private final OpStatsLogger readAheadBatchCountStats; - private final OpStatsLogger readAheadBatchSizeStats; - private final OpStatsLogger flushStats; - private final OpStatsLogger flushSizeStats; + private final DbLedgerStorageStats dbLedgerStorageStats; - private final Counter throttledWriteRequests; - private final Counter rejectedWriteRequests; + private static final long DEFAULT_MAX_THROTTLE_TIME_MILLIS = TimeUnit.SECONDS.toMillis(10); - static final String READ_AHEAD_CACHE_BATCH_SIZE = "dbStorage_readAheadCacheBatchSize"; - private static final int DEFAULT_READ_AHEAD_CACHE_BATCH_SIZE = 100; + private final long maxReadAheadBytesSize; - private static final long DEFAULT_MAX_THROTTLE_TIME_MILLIS = TimeUnit.SECONDS.toMillis(10); + private final Counter flushExecutorTime; + private final boolean singleLedgerDirs; public SingleDirectoryDbLedgerStorage(ServerConfiguration conf, LedgerManager ledgerManager, - LedgerDirsManager ledgerDirsManager, LedgerDirsManager indexDirsManager, StateManager stateManager, - CheckpointSource checkpointSource, Checkpointer checkpointer, StatsLogger statsLogger, - ScheduledExecutorService gcExecutor, long writeCacheSize, long readCacheSize) throws IOException { - + LedgerDirsManager ledgerDirsManager, LedgerDirsManager indexDirsManager, + EntryLogger entryLogger, StatsLogger statsLogger, ByteBufAllocator allocator, + long writeCacheSize, long readCacheSize, int readAheadCacheBatchSize, + long readAheadCacheBatchBytesSize) + throws IOException { checkArgument(ledgerDirsManager.getAllLedgerDirs().size() == 1, "Db implementation only allows for one storage dir"); - String baseDir = ledgerDirsManager.getAllLedgerDirs().get(0).toString(); - log.info("Creating single directory db ledger storage on {}", baseDir); + String ledgerBaseDir = ledgerDirsManager.getAllLedgerDirs().get(0).getPath(); + // indexBaseDir default use ledgerBaseDir + String indexBaseDir = ledgerBaseDir; + if (CollectionUtils.isEmpty(indexDirsManager.getAllLedgerDirs()) + || ledgerBaseDir.equals(indexDirsManager.getAllLedgerDirs().get(0).getPath())) { + log.info("indexDir is equals ledgerBaseDir, creating single directory db ledger storage on {}", + indexBaseDir); + } else { + // if indexDir is specified, set new value + indexBaseDir = indexDirsManager.getAllLedgerDirs().get(0).getPath(); + log.info("indexDir is specified a separate dir, creating single directory db ledger storage on {}", + indexBaseDir); + } - this.writeCacheMaxSize = writeCacheSize; - this.writeCache = new WriteCache(writeCacheMaxSize / 2); - this.writeCacheBeingFlushed = new WriteCache(writeCacheMaxSize / 2); + StatsLogger ledgerIndexDirStatsLogger = statsLogger + .scopeLabel("ledgerDir", ledgerBaseDir) + .scopeLabel("indexDir", indexBaseDir); - this.checkpointSource = checkpointSource; + this.writeCacheMaxSize = writeCacheSize; + this.writeCache = new WriteCache(allocator, writeCacheMaxSize / 2); + this.writeCacheBeingFlushed = new WriteCache(allocator, writeCacheMaxSize / 2); + this.singleLedgerDirs = conf.getLedgerDirs().length == 1; readCacheMaxSize = readCacheSize; - readAheadCacheBatchSize = conf.getInt(READ_AHEAD_CACHE_BATCH_SIZE, DEFAULT_READ_AHEAD_CACHE_BATCH_SIZE); + this.readAheadCacheBatchSize = readAheadCacheBatchSize; + this.readAheadCacheBatchBytesSize = readAheadCacheBatchBytesSize; + + // Do not attempt to perform read-ahead more than half the total size of the cache + maxReadAheadBytesSize = readCacheMaxSize / 2; long maxThrottleTimeMillis = conf.getLong(DbLedgerStorage.MAX_THROTTLE_TIME_MILLIS, DEFAULT_MAX_THROTTLE_TIME_MILLIS); maxThrottleTimeNanos = TimeUnit.MILLISECONDS.toNanos(maxThrottleTimeMillis); - readCache = new ReadCache(readCacheMaxSize); + readCache = new ReadCache(allocator, readCacheMaxSize); - this.stats = statsLogger; + ledgerIndex = new LedgerMetadataIndex(conf, + KeyValueStorageRocksDB.factory, indexBaseDir, ledgerIndexDirStatsLogger); + entryLocationIndex = new EntryLocationIndex(conf, + KeyValueStorageRocksDB.factory, indexBaseDir, ledgerIndexDirStatsLogger); - ledgerIndex = new LedgerMetadataIndex(conf, KeyValueStorageRocksDB.factory, baseDir, stats); - entryLocationIndex = new EntryLocationIndex(conf, KeyValueStorageRocksDB.factory, baseDir, stats); - - transientLedgerInfoCache = new ConcurrentLongHashMap<>(16 * 1024, - Runtime.getRuntime().availableProcessors() * 2); + transientLedgerInfoCache = ConcurrentLongHashMap.newBuilder() + .expectedItems(16 * 1024) + .concurrencyLevel(Runtime.getRuntime().availableProcessors() * 2) + .build(); cleanupExecutor.scheduleAtFixedRate(this::cleanupStaleTransientLedgerInfo, TransientLedgerInfo.LEDGER_INFO_CACHING_TIME_MINUTES, TransientLedgerInfo.LEDGER_INFO_CACHING_TIME_MINUTES, TimeUnit.MINUTES); - entryLogger = new EntryLogger(conf, ledgerDirsManager, null, statsLogger); - gcThread = new GarbageCollectorThread(conf, ledgerManager, this, statsLogger); + this.entryLogger = entryLogger; + gcThread = new GarbageCollectorThread(conf, + ledgerManager, ledgerDirsManager, this, entryLogger, ledgerIndexDirStatsLogger); - stats.registerGauge("write-cache-size", new Gauge() { - @Override - public Long getDefaultValue() { - return 0L; - } + dbLedgerStorageStats = new DbLedgerStorageStats( + ledgerIndexDirStatsLogger, + () -> writeCache.size() + writeCacheBeingFlushed.size(), + () -> writeCache.count() + writeCacheBeingFlushed.count(), + () -> readCache.size(), + () -> readCache.count() + ); - @Override - public Long getSample() { - return writeCache.size() + writeCacheBeingFlushed.size(); - } - }); - stats.registerGauge("write-cache-count", new Gauge() { - @Override - public Long getDefaultValue() { - return 0L; - } - - @Override - public Long getSample() { - return writeCache.count() + writeCacheBeingFlushed.count(); - } - }); - stats.registerGauge("read-cache-size", new Gauge() { - @Override - public Long getDefaultValue() { - return 0L; - } - - @Override - public Long getSample() { - return readCache.size(); - } - }); - stats.registerGauge("read-cache-count", new Gauge() { - @Override - public Long getDefaultValue() { - return 0L; - } + flushExecutorTime = ledgerIndexDirStatsLogger.getThreadScopedCounter("db-storage-thread-time"); - @Override - public Long getSample() { - return readCache.count(); - } + executor.submit(() -> { + // ensure the metric gets registered on start-up as this thread only executes + // when the write cache is full which may not happen or not for a long time + flushExecutorTime.addLatency(0, TimeUnit.NANOSECONDS); }); - addEntryStats = stats.getOpStatsLogger("add-entry"); - readEntryStats = stats.getOpStatsLogger("read-entry"); - readCacheHitStats = stats.getOpStatsLogger("read-cache-hits"); - readCacheMissStats = stats.getOpStatsLogger("read-cache-misses"); - readAheadBatchCountStats = stats.getOpStatsLogger("readahead-batch-count"); - readAheadBatchSizeStats = stats.getOpStatsLogger("readahead-batch-size"); - flushStats = stats.getOpStatsLogger("flush"); - flushSizeStats = stats.getOpStatsLogger("flush-size"); - - throttledWriteRequests = stats.getCounter("throttled-write-requests"); - rejectedWriteRequests = stats.getCounter("rejected-write-requests"); + ledgerDirsManager.addLedgerDirsListener(getLedgerDirsListener()); + if (!ledgerBaseDir.equals(indexBaseDir)) { + indexDirsManager.addLedgerDirsListener(getLedgerDirsListener()); + } } @Override public void initialize(ServerConfiguration conf, LedgerManager ledgerManager, LedgerDirsManager ledgerDirsManager, - LedgerDirsManager indexDirsManager, StateManager stateManager, CheckpointSource checkpointSource, - Checkpointer checkpointer, StatsLogger statsLogger) throws IOException { + LedgerDirsManager indexDirsManager, StatsLogger statsLogger, + ByteBufAllocator allocator) throws IOException { /// Initialized in constructor } + @Override + public void setStateManager(StateManager stateManager) { } + + @Override + public void setCheckpointSource(CheckpointSource checkpointSource) { + this.checkpointSource = checkpointSource; + } + @Override + public void setCheckpointer(Checkpointer checkpointer) { } + /** * Evict all the ledger info object that were not used recently. */ @@ -263,13 +270,80 @@ public void start() { gcThread.start(); } + @Override + public void forceGC() { + gcThread.enableForceGC(); + } + + @Override + public void forceGC(boolean forceMajor, boolean forceMinor) { + gcThread.enableForceGC(forceMajor, forceMinor); + } + + @Override + public boolean isInForceGC() { + return gcThread.isInForceGC(); + } + + public void suspendMinorGC() { + gcThread.suspendMinorGC(); + } + + public void suspendMajorGC() { + gcThread.suspendMajorGC(); + } + + public void resumeMinorGC() { + gcThread.resumeMinorGC(); + } + + public void resumeMajorGC() { + gcThread.resumeMajorGC(); + } + + public boolean isMajorGcSuspended() { + return gcThread.isMajorGcSuspend(); + } + + public boolean isMinorGcSuspended() { + return gcThread.isMinorGcSuspend(); + } + + @Override + public void entryLocationCompact() { + if (entryLocationIndex.isCompacting()) { + // RocksDB already running compact. + return; + } + cleanupExecutor.execute(() -> { + // There can only be one single cleanup task running because the cleanupExecutor + // is single-threaded + try { + log.info("Trigger entry location index RocksDB compact."); + entryLocationIndex.compact(); + } catch (Throwable t) { + log.warn("Failed to trigger entry location index RocksDB compact", t); + } + }); + } + + @Override + public boolean isEntryLocationCompacting() { + return entryLocationIndex.isCompacting(); + } + + @Override + public List getEntryLocationDBPath() { + return Lists.newArrayList(entryLocationIndex.getEntryLocationDBPath()); + } + @Override public void shutdown() throws InterruptedException { try { flush(); gcThread.shutdown(); - entryLogger.shutdown(); + entryLogger.close(); cleanupExecutor.shutdown(); cleanupExecutor.awaitTermination(1, TimeUnit.SECONDS); @@ -302,11 +376,62 @@ public boolean ledgerExists(long ledgerId) throws IOException { } @Override - public boolean isFenced(long ledgerId) throws IOException { + public boolean entryExists(long ledgerId, long entryId) throws IOException, BookieException { + if (entryId == BookieProtocol.LAST_ADD_CONFIRMED) { + return false; + } + + // We need to try to read from both write caches, since recent entries could be found in either of the two. The + // write caches are already thread safe on their own, here we just need to make sure we get references to both + // of them. Using an optimistic lock since the read lock is always free, unless we're swapping the caches. + long stamp = writeCacheRotationLock.tryOptimisticRead(); + WriteCache localWriteCache = writeCache; + WriteCache localWriteCacheBeingFlushed = writeCacheBeingFlushed; + if (!writeCacheRotationLock.validate(stamp)) { + // Fallback to regular read lock approach + stamp = writeCacheRotationLock.readLock(); + try { + localWriteCache = writeCache; + localWriteCacheBeingFlushed = writeCacheBeingFlushed; + } finally { + writeCacheRotationLock.unlockRead(stamp); + } + } + + boolean inCache = localWriteCache.hasEntry(ledgerId, entryId) + || localWriteCacheBeingFlushed.hasEntry(ledgerId, entryId) + || readCache.hasEntry(ledgerId, entryId); + + if (inCache) { + return true; + } + + // Read from main storage + long entryLocation = entryLocationIndex.getLocation(ledgerId, entryId); + if (entryLocation != 0) { + return true; + } + + // Only a negative result while in limbo equates to unknown + throwIfLimbo(ledgerId); + + return false; + } + + @Override + public boolean isFenced(long ledgerId) throws IOException, BookieException { + boolean isFenced = ledgerIndex.get(ledgerId).getFenced(); + if (log.isDebugEnabled()) { - log.debug("isFenced. ledger: {}", ledgerId); + log.debug("ledger: {}, isFenced: {}.", ledgerId, isFenced); } - return ledgerIndex.get(ledgerId).getFenced(); + + // Only a negative result while in limbo equates to unknown + if (!isFenced) { + throwIfLimbo(ledgerId); + } + + return isFenced; } @Override @@ -359,8 +484,12 @@ public long addEntry(ByteBuf entry) throws IOException, BookieException { long stamp = writeCacheRotationLock.tryOptimisticRead(); boolean inserted = false; - inserted = writeCache.put(ledgerId, entryId, entry); - if (!writeCacheRotationLock.validate(stamp)) { + // If the stamp is 0, the lock was exclusively acquired, validation will fail, and we can skip this put. + if (stamp != 0) { + inserted = writeCache.put(ledgerId, entryId, entry); + } + + if (stamp == 0 || !writeCacheRotationLock.validate(stamp)) { // The write cache was rotated while we were inserting. We need to acquire the proper read lock and repeat // the operation because we might have inserted in a write cache that was already being flushed and cleared, // without being sure about this last entry being flushed or not. @@ -379,35 +508,40 @@ public long addEntry(ByteBuf entry) throws IOException, BookieException { // after successfully insert the entry, update LAC and notify the watchers updateCachedLacIfNeeded(ledgerId, lac); - recordSuccessfulEvent(addEntryStats, startTime); + recordSuccessfulEvent(dbLedgerStorageStats.getAddEntryStats(), startTime); return entryId; } private void triggerFlushAndAddEntry(long ledgerId, long entryId, ByteBuf entry) throws IOException, BookieException { - // Write cache is full, we need to trigger a flush so that it gets rotated - // If the flush has already been triggered or flush has already switched the - // cache, we don't need to trigger another flush - if (!isFlushOngoing.get() && hasFlushBeenTriggered.compareAndSet(false, true)) { - // Trigger an early flush in background - log.info("Write cache is full, triggering flush"); - executor.execute(() -> { - try { - flush(); - } catch (IOException e) { - log.error("Error during flush", e); - } - }); - } - - throttledWriteRequests.inc(); + long throttledStartTime = MathUtils.nowInNano(); + dbLedgerStorageStats.getThrottledWriteRequests().inc(); long absoluteTimeoutNanos = System.nanoTime() + maxThrottleTimeNanos; while (System.nanoTime() < absoluteTimeoutNanos) { + // Write cache is full, we need to trigger a flush so that it gets rotated + // If the flush has already been triggered or flush has already switched the + // cache, we don't need to trigger another flush + if (!isFlushOngoing.get() && hasFlushBeenTriggered.compareAndSet(false, true)) { + // Trigger an early flush in background + log.info("Write cache is full, triggering flush"); + executor.execute(() -> { + long startTime = System.nanoTime(); + try { + flush(); + } catch (IOException e) { + log.error("Error during flush", e); + } finally { + flushExecutorTime.addLatency(MathUtils.elapsedNanos(startTime), TimeUnit.NANOSECONDS); + } + }); + } + long stamp = writeCacheRotationLock.readLock(); try { if (writeCache.put(ledgerId, entryId, entry)) { // We succeeded in putting the entry in write cache in the + recordSuccessfulEvent(dbLedgerStorageStats.getThrottledWriteStats(), throttledStartTime); return; } } finally { @@ -424,13 +558,25 @@ private void triggerFlushAndAddEntry(long ledgerId, long entryId, ByteBuf entry) } // Timeout expired and we weren't able to insert in write cache - rejectedWriteRequests.inc(); + dbLedgerStorageStats.getRejectedWriteRequests().inc(); + recordFailedEvent(dbLedgerStorageStats.getThrottledWriteStats(), throttledStartTime); throw new OperationRejectedException(); } @Override - public ByteBuf getEntry(long ledgerId, long entryId) throws IOException { + public ByteBuf getEntry(long ledgerId, long entryId) throws IOException, BookieException { long startTime = MathUtils.nowInNano(); + try { + ByteBuf entry = doGetEntry(ledgerId, entryId); + recordSuccessfulEvent(dbLedgerStorageStats.getReadEntryStats(), startTime); + return entry; + } catch (IOException e) { + recordFailedEvent(dbLedgerStorageStats.getReadEntryStats(), startTime); + throw e; + } + } + + private ByteBuf doGetEntry(long ledgerId, long entryId) throws IOException, BookieException { if (log.isDebugEnabled()) { log.debug("Get Entry: {}@{}", ledgerId, entryId); } @@ -459,38 +605,50 @@ public ByteBuf getEntry(long ledgerId, long entryId) throws IOException { // First try to read from the write cache of recent entries ByteBuf entry = localWriteCache.get(ledgerId, entryId); if (entry != null) { - recordSuccessfulEvent(readCacheHitStats, startTime); - recordSuccessfulEvent(readEntryStats, startTime); + dbLedgerStorageStats.getWriteCacheHitCounter().inc(); return entry; } // If there's a flush going on, the entry might be in the flush buffer entry = localWriteCacheBeingFlushed.get(ledgerId, entryId); if (entry != null) { - recordSuccessfulEvent(readCacheHitStats, startTime); - recordSuccessfulEvent(readEntryStats, startTime); + dbLedgerStorageStats.getWriteCacheHitCounter().inc(); return entry; } + dbLedgerStorageStats.getWriteCacheMissCounter().inc(); + // Try reading from read-ahead cache entry = readCache.get(ledgerId, entryId); if (entry != null) { - recordSuccessfulEvent(readCacheHitStats, startTime); - recordSuccessfulEvent(readEntryStats, startTime); + dbLedgerStorageStats.getReadCacheHitCounter().inc(); return entry; } + dbLedgerStorageStats.getReadCacheMissCounter().inc(); + // Read from main storage long entryLocation; + long locationIndexStartNano = MathUtils.nowInNano(); try { entryLocation = entryLocationIndex.getLocation(ledgerId, entryId); if (entryLocation == 0) { + // Only a negative result while in limbo equates to unknown + throwIfLimbo(ledgerId); + throw new NoEntryException(ledgerId, entryId); } + } finally { + dbLedgerStorageStats.getReadFromLocationIndexTime().addLatency( + MathUtils.elapsedNanos(locationIndexStartNano), TimeUnit.NANOSECONDS); + } + + long readEntryStartNano = MathUtils.nowInNano(); + try { entry = entryLogger.readEntry(ledgerId, entryId, entryLocation); - } catch (NoEntryException e) { - recordFailedEvent(readEntryStats, startTime); - throw e; + } finally { + dbLedgerStorageStats.getReadFromEntryLogTime().addLatency( + MathUtils.elapsedNanos(readEntryStartNano), TimeUnit.NANOSECONDS); } readCache.put(ledgerId, entryId, entry); @@ -499,55 +657,70 @@ public ByteBuf getEntry(long ledgerId, long entryId) throws IOException { long nextEntryLocation = entryLocation + 4 /* size header */ + entry.readableBytes(); fillReadAheadCache(ledgerId, entryId + 1, nextEntryLocation); - recordSuccessfulEvent(readCacheMissStats, startTime); - recordSuccessfulEvent(readEntryStats, startTime); return entry; } - private void fillReadAheadCache(long orginalLedgerId, long firstEntryId, long firstEntryLocation) { + private void fillReadAheadCache(long originalLedgerId, long firstEntryId, long firstEntryLocation) { + long readAheadStartNano = MathUtils.nowInNano(); + int count = 0; + long size = 0; + try { long firstEntryLogId = (firstEntryLocation >> 32); long currentEntryLogId = firstEntryLogId; long currentEntryLocation = firstEntryLocation; - int count = 0; - long size = 0; - while (count < readAheadCacheBatchSize && currentEntryLogId == firstEntryLogId) { - ByteBuf entry = entryLogger.internalReadEntry(orginalLedgerId, -1, currentEntryLocation); + while (chargeReadAheadCache(count, size) && currentEntryLogId == firstEntryLogId) { + ByteBuf entry = entryLogger.readEntry(originalLedgerId, + firstEntryId, currentEntryLocation); try { long currentEntryLedgerId = entry.getLong(0); long currentEntryId = entry.getLong(8); - if (currentEntryLedgerId != orginalLedgerId) { + if (currentEntryLedgerId != originalLedgerId) { // Found an entry belonging to a different ledger, stopping read-ahead - return; + break; } // Insert entry in read cache - readCache.put(orginalLedgerId, currentEntryId, entry); + readCache.put(originalLedgerId, currentEntryId, entry); count++; + firstEntryId++; size += entry.readableBytes(); currentEntryLocation += 4 + entry.readableBytes(); currentEntryLogId = currentEntryLocation >> 32; } finally { - entry.release(); + ReferenceCountUtil.release(entry); } } - - readAheadBatchCountStats.registerSuccessfulValue(count); - readAheadBatchSizeStats.registerSuccessfulValue(size); } catch (Exception e) { if (log.isDebugEnabled()) { - log.debug("Exception during read ahead for ledger: {}: e", orginalLedgerId, e); + log.debug("Exception during read ahead for ledger: {}: e", originalLedgerId, e); } + } finally { + dbLedgerStorageStats.getReadAheadBatchCountStats().registerSuccessfulValue(count); + dbLedgerStorageStats.getReadAheadBatchSizeStats().registerSuccessfulValue(size); + dbLedgerStorageStats.getReadAheadTime().addLatency( + MathUtils.elapsedNanos(readAheadStartNano), TimeUnit.NANOSECONDS); } } - public ByteBuf getLastEntry(long ledgerId) throws IOException { - long startTime = MathUtils.nowInNano(); + protected boolean chargeReadAheadCache(int currentReadAheadCount, long currentReadAheadBytes) { + // compatible with old logic + boolean chargeSizeCondition = currentReadAheadCount < readAheadCacheBatchSize + && currentReadAheadBytes < maxReadAheadBytesSize; + if (chargeSizeCondition && readAheadCacheBatchBytesSize > 0) { + // exact limits limit the size and count for each batch + chargeSizeCondition = currentReadAheadBytes < readAheadCacheBatchBytesSize; + } + return chargeSizeCondition; + } + + public ByteBuf getLastEntry(long ledgerId) throws IOException, BookieException { + throwIfLimbo(ledgerId); long stamp = writeCacheRotationLock.readLock(); try { @@ -555,7 +728,7 @@ public ByteBuf getLastEntry(long ledgerId) throws IOException { ByteBuf entry = writeCache.getLastEntry(ledgerId); if (entry != null) { if (log.isDebugEnabled()) { - long foundLedgerId = entry.readLong(); // ledgedId + long foundLedgerId = entry.readLong(); // ledgerId long entryId = entry.readLong(); entry.resetReaderIndex(); if (log.isDebugEnabled()) { @@ -564,8 +737,7 @@ public ByteBuf getLastEntry(long ledgerId) throws IOException { } } - recordSuccessfulEvent(readCacheHitStats, startTime); - recordSuccessfulEvent(readEntryStats, startTime); + dbLedgerStorageStats.getWriteCacheHitCounter().inc(); return entry; } @@ -573,7 +745,7 @@ public ByteBuf getLastEntry(long ledgerId) throws IOException { entry = writeCacheBeingFlushed.getLastEntry(ledgerId); if (entry != null) { if (log.isDebugEnabled()) { - entry.readLong(); // ledgedId + entry.readLong(); // ledgerId long entryId = entry.readLong(); entry.resetReaderIndex(); if (log.isDebugEnabled()) { @@ -581,25 +753,30 @@ public ByteBuf getLastEntry(long ledgerId) throws IOException { } } - recordSuccessfulEvent(readCacheHitStats, startTime); - recordSuccessfulEvent(readEntryStats, startTime); + dbLedgerStorageStats.getWriteCacheHitCounter().inc(); return entry; } } finally { writeCacheRotationLock.unlockRead(stamp); } + dbLedgerStorageStats.getWriteCacheMissCounter().inc(); + // Search the last entry in storage + long locationIndexStartNano = MathUtils.nowInNano(); long lastEntryId = entryLocationIndex.getLastEntryInLedger(ledgerId); if (log.isDebugEnabled()) { log.debug("Found last entry for ledger {} in db: {}", ledgerId, lastEntryId); } long entryLocation = entryLocationIndex.getLocation(ledgerId, lastEntryId); - ByteBuf content = entryLogger.readEntry(ledgerId, lastEntryId, entryLocation); + dbLedgerStorageStats.getReadFromLocationIndexTime().addLatency( + MathUtils.elapsedNanos(locationIndexStartNano), TimeUnit.NANOSECONDS); - recordSuccessfulEvent(readCacheMissStats, startTime); - recordSuccessfulEvent(readEntryStats, startTime); + long readEntryStartNano = MathUtils.nowInNano(); + ByteBuf content = entryLogger.readEntry(ledgerId, lastEntryId, entryLocation); + dbLedgerStorageStats.getReadFromEntryLogTime().addLatency( + MathUtils.elapsedNanos(readEntryStartNano), TimeUnit.NANOSECONDS); return content; } @@ -620,12 +797,21 @@ public void checkpoint(Checkpoint checkpoint) throws IOException { return; } - long startTime = MathUtils.nowInNano(); - // Only a single flush operation can happen at a time flushMutex.lock(); + long startTime = -1; + try { + startTime = MathUtils.nowInNano(); + } catch (Throwable e) { + // Fix spotbugs warning. Should never happen + flushMutex.unlock(); + throw new IOException(e); + } try { + if (writeCache.isEmpty()) { + return; + } // Swap the write cache so that writes can continue to happen while the flush is // ongoing swapWriteCache(); @@ -641,40 +827,26 @@ public void checkpoint(Checkpoint checkpoint) throws IOException { Batch batch = entryLocationIndex.newBatch(); writeCacheBeingFlushed.forEach((ledgerId, entryId, entry) -> { - try { - long location = entryLogger.addEntry(ledgerId, entry, true); - entryLocationIndex.addLocation(batch, ledgerId, entryId, location); - } catch (IOException e) { - throw new RuntimeException(e); - } + long location = entryLogger.addEntry(ledgerId, entry); + entryLocationIndex.addLocation(batch, ledgerId, entryId, location); }); + long entryLoggerStart = MathUtils.nowInNano(); entryLogger.flush(); + recordSuccessfulEvent(dbLedgerStorageStats.getFlushEntryLogStats(), entryLoggerStart); - long batchFlushStarTime = System.nanoTime(); + long batchFlushStartTime = MathUtils.nowInNano(); batch.flush(); batch.close(); + recordSuccessfulEvent(dbLedgerStorageStats.getFlushLocationIndexStats(), batchFlushStartTime); if (log.isDebugEnabled()) { log.debug("DB batch flushed time : {} s", - MathUtils.elapsedNanos(batchFlushStarTime) / (double) TimeUnit.SECONDS.toNanos(1)); + MathUtils.elapsedNanos(batchFlushStartTime) / (double) TimeUnit.SECONDS.toNanos(1)); } + long ledgerIndexStartTime = MathUtils.nowInNano(); ledgerIndex.flush(); - - cleanupExecutor.execute(() -> { - // There can only be one single cleanup task running because the cleanupExecutor - // is single-threaded - try { - if (log.isDebugEnabled()) { - log.debug("Removing deleted ledgers from db indexes"); - } - - entryLocationIndex.removeOffsetFromDeletedLedgers(); - ledgerIndex.removeDeletedLedgers(); - } catch (Throwable t) { - log.warn("Failed to cleanup db indexes", t); - } - }); + recordSuccessfulEvent(dbLedgerStorageStats.getFlushLedgerIndexStats(), ledgerIndexStartTime); lastCheckpoint = thisCheckpoint; @@ -688,16 +860,29 @@ public void checkpoint(Checkpoint checkpoint) throws IOException { log.debug("Flushing done time {} s -- Written {} MB/s", flushTimeSeconds, flushThroughput); } - recordSuccessfulEvent(flushStats, startTime); - flushSizeStats.registerSuccessfulValue(sizeToFlush); + recordSuccessfulEvent(dbLedgerStorageStats.getFlushStats(), startTime); + dbLedgerStorageStats.getFlushSizeStats().registerSuccessfulValue(sizeToFlush); } catch (IOException e) { - // Leave IOExecption as it is + recordFailedEvent(dbLedgerStorageStats.getFlushStats(), startTime); + // Leave IOException as it is throw e; - } catch (RuntimeException e) { - // Wrap unchecked exceptions - throw new IOException(e); } finally { try { + cleanupExecutor.execute(() -> { + // There can only be one single cleanup task running because the cleanupExecutor + // is single-threaded + try { + if (log.isDebugEnabled()) { + log.debug("Removing deleted ledgers from db indexes"); + } + + entryLocationIndex.removeOffsetFromDeletedLedgers(); + ledgerIndex.removeDeletedLedgers(); + } catch (Throwable t) { + log.warn("Failed to cleanup db indexes", t); + } + }); + isFlushOngoing.set(false); } finally { flushMutex.unlock(); @@ -717,14 +902,12 @@ private void swapWriteCache() { writeCacheBeingFlushed = writeCache; writeCache = tmp; + // Set to true before updating hasFlushBeenTriggered to false. + isFlushOngoing.set(true); // since the cache is switched, we can allow flush to be triggered hasFlushBeenTriggered.set(false); } finally { - try { - isFlushOngoing.set(true); - } finally { - writeCacheRotationLock.unlockWrite(stamp); - } + writeCacheRotationLock.unlockWrite(stamp); } } @@ -732,7 +915,9 @@ private void swapWriteCache() { public void flush() throws IOException { Checkpoint cp = checkpointSource.newCheckpoint(); checkpoint(cp); - checkpointSource.checkpointComplete(cp, true); + if (singleLedgerDirs) { + checkpointSource.checkpointComplete(cp, true); + } } @Override @@ -770,19 +955,42 @@ public Iterable getActiveLedgersInRange(long firstLedgerId, long lastLedge @Override public void updateEntriesLocations(Iterable locations) throws IOException { - // Trigger a flush to have all the entries being compacted in the db storage - flush(); + // Before updating the DB with the new location for the compacted entries, we need to + // make sure that there is no ongoing flush() operation. + // If there were a flush, we could have the following situation, which is highly + // unlikely though possible: + // 1. Flush operation has written the write-cache content into entry-log files + // 2. The DB location index is not yet updated + // 3. Compaction is triggered and starts compacting some of the recent files + // 4. Compaction will write the "new location" into the DB + // 5. The pending flush() will overwrite the DB with the "old location", pointing + // to a file that no longer exists + // + // To avoid this race condition, we need that all the entries that are potentially + // included in the compaction round to have all the indexes already flushed into + // the DB. + // The easiest lightweight way to achieve this is to wait for any pending + // flush operation to be completed before updating the index with the compacted + // entries, by blocking on the flushMutex. + flushMutex.lock(); + flushMutex.unlock(); + // We don't need to keep the flush mutex locked here while updating the DB. + // It's fine to have a concurrent flush operation at this point, because we + // know that none of the entries being flushed was included in the compaction + // round that we are dealing with. entryLocationIndex.updateLocations(locations); } - @Override - public EntryLogger getEntryLogger() { + @VisibleForTesting + EntryLogger getEntryLogger() { return entryLogger; } @Override - public long getLastAddConfirmed(long ledgerId) throws IOException { + public long getLastAddConfirmed(long ledgerId) throws IOException, BookieException { + throwIfLimbo(ledgerId); + TransientLedgerInfo ledgerInfo = transientLedgerInfoCache.get(ledgerId); long lac = null != ledgerInfo ? ledgerInfo.getLastAddConfirmed() : TransientLedgerInfo.NOT_ASSIGNED_LAC; if (lac == TransientLedgerInfo.NOT_ASSIGNED_LAC) { @@ -792,7 +1000,7 @@ public long getLastAddConfirmed(long ledgerId) throws IOException { lac = bb.readLong(); lac = getOrAddLedgerInfo(ledgerId).setLastAddConfirmed(lac); } finally { - bb.release(); + ReferenceCountUtil.release(bb); } } return lac; @@ -805,34 +1013,52 @@ public boolean waitForLastAddConfirmedUpdate(long ledgerId, long previousLAC, } @Override - public void setExplicitlac(long ledgerId, ByteBuf lac) throws IOException { - getOrAddLedgerInfo(ledgerId).setExplicitLac(lac); + public void cancelWaitForLastAddConfirmedUpdate(long ledgerId, + Watcher watcher) + throws IOException { + getOrAddLedgerInfo(ledgerId).cancelWaitForLastAddConfirmedUpdate(watcher); } @Override - public ByteBuf getExplicitLac(long ledgerId) { - TransientLedgerInfo ledgerInfo = transientLedgerInfoCache.get(ledgerId); - if (null == ledgerInfo) { - return null; - } else { + public void setExplicitLac(long ledgerId, ByteBuf lac) throws IOException { + TransientLedgerInfo ledgerInfo = getOrAddLedgerInfo(ledgerId); + ledgerInfo.setExplicitLac(lac); + ledgerIndex.setExplicitLac(ledgerId, lac); + ledgerInfo.notifyWatchers(Long.MAX_VALUE); + } + + @Override + public ByteBuf getExplicitLac(long ledgerId) throws IOException, BookieException { + throwIfLimbo(ledgerId); + if (log.isDebugEnabled()) { + log.debug("getExplicitLac ledger {}", ledgerId); + } + TransientLedgerInfo ledgerInfo = getOrAddLedgerInfo(ledgerId); + if (ledgerInfo.getExplicitLac() != null) { + if (log.isDebugEnabled()) { + log.debug("getExplicitLac ledger {} returned from TransientLedgerInfo", ledgerId); + } return ledgerInfo.getExplicitLac(); } + LedgerData ledgerData = ledgerIndex.get(ledgerId); + if (!ledgerData.hasExplicitLac()) { + if (log.isDebugEnabled()) { + log.debug("getExplicitLac ledger {} missing from LedgerData", ledgerId); + } + return null; + } + if (log.isDebugEnabled()) { + log.debug("getExplicitLac ledger {} returned from LedgerData", ledgerId); + } + ByteString persistedLac = ledgerData.getExplicitLac(); + ledgerInfo.setExplicitLac(Unpooled.wrappedBuffer(persistedLac.toByteArray())); + return ledgerInfo.getExplicitLac(); } private TransientLedgerInfo getOrAddLedgerInfo(long ledgerId) { - TransientLedgerInfo tli = transientLedgerInfoCache.get(ledgerId); - if (tli != null) { - return tli; - } else { - TransientLedgerInfo newTli = new TransientLedgerInfo(ledgerId, ledgerIndex); - tli = transientLedgerInfoCache.putIfAbsent(ledgerId, newTli); - if (tli != null) { - newTli.close(); - return tli; - } else { - return newTli; - } - } + return transientLedgerInfoCache.computeIfAbsent(ledgerId, l -> { + return new TransientLedgerInfo(l, ledgerIndex); + }); } private void updateCachedLacIfNeeded(long ledgerId, long lac) { @@ -854,35 +1080,35 @@ public void flushEntriesLocationsIndex() throws IOException { * * @param ledgerId * the ledger id - * @param entries - * a map of entryId -> location + * @param pages + * Iterator over index pages from Indexed * @return the number of */ + @SuppressFBWarnings("RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE") public long addLedgerToIndex(long ledgerId, boolean isFenced, byte[] masterKey, - Iterable> entries) throws Exception { + LedgerCache.PageEntriesIterable pages) throws Exception { LedgerData ledgerData = LedgerData.newBuilder().setExists(true).setFenced(isFenced) .setMasterKey(ByteString.copyFrom(masterKey)).build(); ledgerIndex.set(ledgerId, ledgerData); - AtomicLong numberOfEntries = new AtomicLong(); + MutableLong numberOfEntries = new MutableLong(); // Iterate over all the entries pages Batch batch = entryLocationIndex.newBatch(); - entries.forEach(map -> { - map.forEach((entryId, location) -> { - try { + for (LedgerCache.PageEntries page: pages) { + try (LedgerEntryPage lep = page.getLEP()) { + lep.getEntries((entryId, location) -> { entryLocationIndex.addLocation(batch, ledgerId, entryId, location); - } catch (IOException e) { - throw new RuntimeException(e); - } - - numberOfEntries.incrementAndGet(); - }); - }); + numberOfEntries.increment(); + return true; + }); + } + } + ledgerIndex.flush(); batch.flush(); batch.close(); - return numberOfEntries.get(); + return numberOfEntries.longValue(); } @Override @@ -902,28 +1128,173 @@ private void recordFailedEvent(OpStatsLogger logger, long startTimeNanos) { logger.registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); } - long getWriteCacheSize() { - return writeCache.size() + writeCacheBeingFlushed.size(); + @Override + public List getGarbageCollectionStatus() { + return Collections.singletonList(gcThread.getGarbageCollectionStatus()); } - long getWriteCacheCount() { - return writeCache.count() + writeCacheBeingFlushed.count(); + /** + * Interface which process ledger logger. + */ + public interface LedgerLoggerProcessor { + void process(long entryId, long entryLogId, long position); } - long getReadCacheSize() { - return readCache.size(); + private static final Logger log = LoggerFactory.getLogger(SingleDirectoryDbLedgerStorage.class); + + @Override + public OfLong getListOfEntriesOfLedger(long ledgerId) throws IOException { + throw new UnsupportedOperationException( + "getListOfEntriesOfLedger method is currently unsupported for SingleDirectoryDbLedgerStorage"); } - long getReadCacheCount() { - return readCache.count(); + private LedgerDirsManager.LedgerDirsListener getLedgerDirsListener() { + return new LedgerDirsListener() { + + @Override + public void diskAlmostFull(File disk) { + if (gcThread.isForceGCAllowWhenNoSpace()) { + gcThread.enableForceGC(); + } else { + gcThread.suspendMajorGC(); + } + } + + @Override + public void diskFull(File disk) { + if (gcThread.isForceGCAllowWhenNoSpace()) { + gcThread.enableForceGC(); + } else { + gcThread.suspendMajorGC(); + gcThread.suspendMinorGC(); + } + } + + @Override + public void allDisksFull(boolean highPriorityWritesAllowed) { + if (gcThread.isForceGCAllowWhenNoSpace()) { + gcThread.enableForceGC(); + } else { + gcThread.suspendMajorGC(); + gcThread.suspendMinorGC(); + } + } + + @Override + public void diskWritable(File disk) { + // we have enough space now + if (gcThread.isForceGCAllowWhenNoSpace()) { + // disable force gc. + gcThread.disableForceGC(); + } else { + // resume compaction to normal. + gcThread.resumeMajorGC(); + gcThread.resumeMinorGC(); + } + } + + @Override + public void diskJustWritable(File disk) { + if (gcThread.isForceGCAllowWhenNoSpace()) { + // if a disk is just writable, we still need force gc. + gcThread.enableForceGC(); + } else { + // still under warn threshold, only resume minor compaction. + gcThread.resumeMinorGC(); + } + } + }; + } + + @Override + public void setLimboState(long ledgerId) throws IOException { + if (log.isDebugEnabled()) { + log.debug("setLimboState. ledger: {}", ledgerId); + } + ledgerIndex.setLimbo(ledgerId); + } + + @Override + public boolean hasLimboState(long ledgerId) throws IOException { + if (log.isDebugEnabled()) { + log.debug("hasLimboState. ledger: {}", ledgerId); + } + return ledgerIndex.get(ledgerId).getLimbo(); + } + + @Override + public void clearLimboState(long ledgerId) throws IOException { + if (log.isDebugEnabled()) { + log.debug("clearLimboState. ledger: {}", ledgerId); + } + ledgerIndex.clearLimbo(ledgerId); + } + + private void throwIfLimbo(long ledgerId) throws IOException, BookieException { + if (hasLimboState(ledgerId)) { + if (log.isDebugEnabled()) { + log.debug("Accessing ledger({}) in limbo state, throwing exception", ledgerId); + } + throw BookieException.create(BookieException.Code.DataUnknownException); + } } /** - * Interface which process ledger logger. + * Mapping of enums to bitmaps. The bitmaps must not overlap so that we can + * do bitwise operations on them. */ - public interface LedgerLoggerProcessor { - void process(long entryId, long entryLogId, long position); + private static final Map stateBitmaps = ImmutableMap.of( + StorageState.NEEDS_INTEGRITY_CHECK, 0x00000001); + + @Override + public EnumSet getStorageStateFlags() throws IOException { + int flags = ledgerIndex.getStorageStateFlags(); + EnumSet flagsEnum = EnumSet.noneOf(StorageState.class); + for (Map.Entry e : stateBitmaps.entrySet()) { + int value = e.getValue(); + if ((flags & value) == value) { + flagsEnum.add(e.getKey()); + } + flags = flags & ~value; + } + checkState(flags == 0, "Unknown storage state flag found " + flags); + return flagsEnum; + } + + @Override + public void setStorageStateFlag(StorageState flag) throws IOException { + checkArgument(stateBitmaps.containsKey(flag), "Unsupported flag " + flag); + int flagInt = stateBitmaps.get(flag); + while (true) { + int curFlags = ledgerIndex.getStorageStateFlags(); + int newFlags = curFlags | flagInt; + if (ledgerIndex.setStorageStateFlags(curFlags, newFlags)) { + return; + } else { + log.info("Conflict updating storage state flags {} -> {}, retrying", + curFlags, newFlags); + } + } } - private static final Logger log = LoggerFactory.getLogger(DbLedgerStorage.class); + @Override + public void clearStorageStateFlag(StorageState flag) throws IOException { + checkArgument(stateBitmaps.containsKey(flag), "Unsupported flag " + flag); + int flagInt = stateBitmaps.get(flag); + while (true) { + int curFlags = ledgerIndex.getStorageStateFlags(); + int newFlags = curFlags & ~flagInt; + if (ledgerIndex.setStorageStateFlags(curFlags, newFlags)) { + return; + } else { + log.info("Conflict updating storage state flags {} -> {}, retrying", + curFlags, newFlags); + } + } + } + + @VisibleForTesting + DbLedgerStorageStats getDbLedgerStorageStats() { + return dbLedgerStorageStats; + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/TransientLedgerInfo.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/TransientLedgerInfo.java index 27d63e8dbfd..2c325308e93 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/TransientLedgerInfo.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/TransientLedgerInfo.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -24,11 +24,9 @@ import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; - import java.io.IOException; import java.nio.ByteBuffer; import java.util.concurrent.TimeUnit; - import org.apache.bookkeeper.bookie.LastAddConfirmedUpdateNotification; import org.apache.bookkeeper.common.util.Watchable; import org.apache.bookkeeper.common.util.Watcher; @@ -91,7 +89,7 @@ long setLastAddConfirmed(long lac) { synchronized boolean waitForLastAddConfirmedUpdate(long previousLAC, Watcher watcher) throws IOException { lastAccessed = System.currentTimeMillis(); - if ((lac != NOT_ASSIGNED_LAC && lac > previousLAC) || isClosed || ledgerIndex.get(ledgerId).getFenced()) { + if ((lac != NOT_ASSIGNED_LAC && lac > previousLAC) || isClosed) { return false; } @@ -99,6 +97,11 @@ synchronized boolean waitForLastAddConfirmedUpdate(long previousLAC, return true; } + synchronized void cancelWaitForLastAddConfirmedUpdate(Watcher watcher) + throws IOException { + deleteWatcher(watcher); + } + public ByteBuf getExplicitLac() { ByteBuf retLac = null; synchronized (this) { @@ -119,7 +122,9 @@ public void setExplicitLac(ByteBuf lac) { if (explicitLac == null) { explicitLac = ByteBuffer.allocate(lac.capacity()); } + int readerIndex = lac.readerIndex(); lac.readBytes(explicitLac); + lac.readerIndex(readerIndex); explicitLac.rewind(); // skip the ledger id @@ -153,4 +158,4 @@ public void close() { notifyWatchers(Long.MAX_VALUE); } -} \ No newline at end of file +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/WriteCache.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/WriteCache.java index 08ffe6732dc..d152c6fc1de 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/WriteCache.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/WriteCache.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -25,12 +25,11 @@ import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBufAllocator; import io.netty.buffer.Unpooled; - import java.io.Closeable; +import java.io.IOException; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.LongAdder; import java.util.concurrent.locks.ReentrantLock; - import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.util.collections.ConcurrentLongHashSet; import org.apache.bookkeeper.util.collections.ConcurrentLongLongHashMap; @@ -57,14 +56,18 @@ public class WriteCache implements Closeable { * Consumer that is used to scan the entire write cache. */ public interface EntryConsumer { - void accept(long ledgerId, long entryId, ByteBuf entry); + void accept(long ledgerId, long entryId, ByteBuf entry) throws IOException; } - private final ConcurrentLongLongPairHashMap index = - new ConcurrentLongLongPairHashMap(4096, 2 * Runtime.getRuntime().availableProcessors()); + private final ConcurrentLongLongPairHashMap index = ConcurrentLongLongPairHashMap.newBuilder() + .expectedItems(4096) + .concurrencyLevel(2 * Runtime.getRuntime().availableProcessors()) + .build(); - private final ConcurrentLongLongHashMap lastEntryMap = - new ConcurrentLongLongHashMap(4096, 2 * Runtime.getRuntime().availableProcessors()); + private final ConcurrentLongLongHashMap lastEntryMap = ConcurrentLongLongHashMap.newBuilder() + .expectedItems(4096) + .concurrencyLevel(2 * Runtime.getRuntime().availableProcessors()) + .build(); private final ByteBuf[] cacheSegments; private final int segmentsCount; @@ -78,19 +81,22 @@ public interface EntryConsumer { private final AtomicLong cacheOffset = new AtomicLong(0); private final LongAdder cacheCount = new LongAdder(); - private final ConcurrentLongHashSet deletedLedgers = new ConcurrentLongHashSet(); + private final ConcurrentLongHashSet deletedLedgers = ConcurrentLongHashSet.newBuilder().build(); + + private final ByteBufAllocator allocator; - public WriteCache(long maxCacheSize) { + public WriteCache(ByteBufAllocator allocator, long maxCacheSize) { // Default maxSegmentSize set to 1Gb - this(maxCacheSize, 1 * 1024 * 1024 * 1024); + this(allocator, maxCacheSize, 1 * 1024 * 1024 * 1024); } - public WriteCache(long maxCacheSize, int maxSegmentSize) { + public WriteCache(ByteBufAllocator allocator, long maxCacheSize, int maxSegmentSize) { checkArgument(maxSegmentSize > 0); long alignedMaxSegmentSize = alignToPowerOfTwo(maxSegmentSize); checkArgument(maxSegmentSize == alignedMaxSegmentSize, "Max segment size needs to be in form of 2^n"); + this.allocator = allocator; this.maxCacheSize = maxCacheSize; this.maxSegmentSize = (int) maxSegmentSize; this.segmentOffsetMask = maxSegmentSize - 1; @@ -185,7 +191,7 @@ public ByteBuf get(long ledgerId, long entryId) { long offset = result.first; int size = (int) result.second; - ByteBuf entry = ByteBufAllocator.DEFAULT.buffer(size, size); + ByteBuf entry = allocator.buffer(size, size); int localOffset = (int) (offset & segmentOffsetMask); int segmentIdx = (int) (offset >>> segmentOffsetBits); @@ -193,6 +199,10 @@ public ByteBuf get(long ledgerId, long entryId) { return entry; } + public boolean hasEntry(long ledgerId, long entryId) { + return index.get(ledgerId, entryId) != null; + } + public ByteBuf getLastEntry(long ledgerId) { long lastEntryId = lastEntryMap.get(ledgerId); if (lastEntryId == -1) { @@ -207,9 +217,7 @@ public void deleteLedger(long ledgerId) { deletedLedgers.add(ledgerId); } - private static final ArrayGroupSort groupSorter = new ArrayGroupSort(2, 4); - - public void forEach(EntryConsumer consumer) { + public void forEach(EntryConsumer consumer) throws IOException { sortedEntriesLock.lock(); try { @@ -241,7 +249,7 @@ public void forEach(EntryConsumer consumer) { startTime = MathUtils.nowInNano(); // Sort entries by (ledgerId, entryId) maintaining the 4 items groups - groupSorter.sort(sortedEntries, 0, sortedEntriesIdx); + ArrayGroupSort.sort(sortedEntries, 0, sortedEntriesIdx); if (log.isDebugEnabled()) { log.debug("sorting {} ms", (MathUtils.elapsedNanos(startTime) / 1e6)); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/package-info.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/package-info.java index 6c6cd8c92b5..fa456388e4f 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/package-info.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/package-info.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/package-info.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/package-info.java new file mode 100644 index 00000000000..f8744532700 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/** + * Provides a Bookie server that stores entries for clients. + */ +package org.apache.bookkeeper.bookie.storage; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/AsyncCallback.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/AsyncCallback.java index cde3f06aefb..715d788ddc8 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/AsyncCallback.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/AsyncCallback.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with this * work for additional information regarding copyright ownership. The ASF @@ -87,6 +87,7 @@ interface AddCallback extends AddCallbackWithLatency { * @param ctx * context object */ + @Override default void addCompleteWithLatency(int rc, LedgerHandle lh, long entryId, long qwcLatency, Object ctx) { addComplete(rc, lh, entryId, ctx); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BKException.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BKException.java index ddfc795acbc..b13702f31e2 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BKException.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BKException.java @@ -21,7 +21,6 @@ package org.apache.bookkeeper.client; import java.util.function.Function; - /** * Class the enumerates all the possible error conditions. * @@ -47,6 +46,10 @@ public abstract class BKException extends org.apache.bookkeeper.client.api.BKExc super(code); } + BKException(int code, Throwable cause) { + super(code, cause); + } + /** * Create an exception from an error code. * @param code return error code @@ -68,6 +71,8 @@ public static BKException create(int code) { return new BKNotEnoughBookiesException(); case Code.NoSuchLedgerExistsException: return new BKNoSuchLedgerExistsException(); + case Code.NoSuchLedgerExistsOnMetadataServerException: + return new BKNoSuchLedgerExistsOnMetadataServerException(); case Code.BookieHandleNotAvailableException: return new BKBookieHandleNotAvailableException(); case Code.ZKException: @@ -118,6 +123,8 @@ public static BKException create(int code) { return new BKLedgerIdOverflowException(); case Code.SecurityException: return new BKSecurityException(); + case Code.MetadataSerializationException: + return new BKMetadataSerializationException(); default: return new BKUnexpectedConditionException(); } @@ -135,7 +142,7 @@ public interface Code extends org.apache.bookkeeper.client.api.BKException.Code */ public static class BKSecurityException extends BKException { public BKSecurityException() { - super(Code.SecurityException); + super(BKException.Code.SecurityException); } } @@ -144,7 +151,7 @@ public BKSecurityException() { */ public static class BKReadException extends BKException { public BKReadException() { - super(Code.ReadException); + super(BKException.Code.ReadException); } } @@ -153,7 +160,7 @@ public BKReadException() { */ public static class BKNoSuchEntryException extends BKException { public BKNoSuchEntryException() { - super(Code.NoSuchEntryException); + super(BKException.Code.NoSuchEntryException); } } @@ -162,7 +169,7 @@ public BKNoSuchEntryException() { */ public static class BKQuorumException extends BKException { public BKQuorumException() { - super(Code.QuorumException); + super(BKException.Code.QuorumException); } } @@ -171,7 +178,7 @@ public BKQuorumException() { */ public static class BKBookieException extends BKException { public BKBookieException() { - super(Code.NoBookieAvailableException); + super(BKException.Code.NoBookieAvailableException); } } @@ -180,7 +187,7 @@ public BKBookieException() { */ public static class BKDigestNotInitializedException extends BKException { public BKDigestNotInitializedException() { - super(Code.DigestNotInitializedException); + super(BKException.Code.DigestNotInitializedException); } } @@ -189,7 +196,7 @@ public BKDigestNotInitializedException() { */ public static class BKDigestMatchException extends BKException { public BKDigestMatchException() { - super(Code.DigestMatchException); + super(BKException.Code.DigestMatchException); } } @@ -198,7 +205,7 @@ public BKDigestMatchException() { */ public static class BKIllegalOpException extends BKException { public BKIllegalOpException() { - super(Code.IllegalOpException); + super(BKException.Code.IllegalOpException); } } @@ -207,7 +214,7 @@ public BKIllegalOpException() { */ public static class BKAddEntryQuorumTimeoutException extends BKException { public BKAddEntryQuorumTimeoutException() { - super(Code.AddEntryQuorumTimeoutException); + super(BKException.Code.AddEntryQuorumTimeoutException); } } @@ -216,7 +223,7 @@ public BKAddEntryQuorumTimeoutException() { */ public static class BKDuplicateEntryIdException extends BKException { public BKDuplicateEntryIdException() { - super(Code.DuplicateEntryIdException); + super(BKException.Code.DuplicateEntryIdException); } } @@ -225,7 +232,7 @@ public BKDuplicateEntryIdException() { */ public static class BKUnexpectedConditionException extends BKException { public BKUnexpectedConditionException() { - super(Code.UnexpectedConditionException); + super(BKException.Code.UnexpectedConditionException); } } @@ -234,7 +241,10 @@ public BKUnexpectedConditionException() { */ public static class BKNotEnoughBookiesException extends BKException { public BKNotEnoughBookiesException() { - super(Code.NotEnoughBookiesException); + super(BKException.Code.NotEnoughBookiesException); + } + public BKNotEnoughBookiesException(Throwable cause) { + super(BKException.Code.NotEnoughBookiesException, cause); } } @@ -243,7 +253,7 @@ public BKNotEnoughBookiesException() { */ public static class BKWriteException extends BKException { public BKWriteException() { - super(Code.WriteException); + super(BKException.Code.WriteException); } } @@ -252,7 +262,7 @@ public BKWriteException() { */ public static class BKProtocolVersionException extends BKException { public BKProtocolVersionException() { - super(Code.ProtocolVersionException); + super(BKException.Code.ProtocolVersionException); } } @@ -261,7 +271,7 @@ public BKProtocolVersionException() { */ public static class BKMetadataVersionException extends BKException { public BKMetadataVersionException() { - super(Code.MetadataVersionException); + super(BKException.Code.MetadataVersionException); } } @@ -270,7 +280,15 @@ public BKMetadataVersionException() { */ public static class BKNoSuchLedgerExistsException extends BKException { public BKNoSuchLedgerExistsException() { - super(Code.NoSuchLedgerExistsException); + super(BKException.Code.NoSuchLedgerExistsException); + } + } + /** + * Bookkeeper no such ledger exists on metadata server exception. + */ + public static class BKNoSuchLedgerExistsOnMetadataServerException extends BKException { + public BKNoSuchLedgerExistsOnMetadataServerException() { + super(BKException.Code.NoSuchLedgerExistsOnMetadataServerException); } } @@ -279,7 +297,7 @@ public BKNoSuchLedgerExistsException() { */ public static class BKBookieHandleNotAvailableException extends BKException { public BKBookieHandleNotAvailableException() { - super(Code.BookieHandleNotAvailableException); + super(BKException.Code.BookieHandleNotAvailableException); } } @@ -288,7 +306,11 @@ public BKBookieHandleNotAvailableException() { */ public static class ZKException extends BKException { public ZKException() { - super(Code.ZKException); + super(BKException.Code.ZKException); + } + + public ZKException(Throwable cause) { + super(BKException.Code.ZKException, cause); } } @@ -297,7 +319,11 @@ public ZKException() { */ public static class MetaStoreException extends BKException { public MetaStoreException() { - super(Code.MetaStoreException); + super(BKException.Code.MetaStoreException); + } + + public MetaStoreException(Throwable cause) { + super(BKException.Code.MetaStoreException, cause); } } @@ -306,7 +332,7 @@ public MetaStoreException() { */ public static class BKLedgerExistException extends BKException { public BKLedgerExistException() { - super(Code.LedgerExistException); + super(BKException.Code.LedgerExistException); } } @@ -315,7 +341,7 @@ public BKLedgerExistException() { */ public static class BKLedgerRecoveryException extends BKException { public BKLedgerRecoveryException() { - super(Code.LedgerRecoveryException); + super(BKException.Code.LedgerRecoveryException); } } @@ -324,7 +350,7 @@ public BKLedgerRecoveryException() { */ public static class BKLedgerClosedException extends BKException { public BKLedgerClosedException() { - super(Code.LedgerClosedException); + super(BKException.Code.LedgerClosedException); } } @@ -333,7 +359,7 @@ public BKLedgerClosedException() { */ public static class BKIncorrectParameterException extends BKException { public BKIncorrectParameterException() { - super(Code.IncorrectParameterException); + super(BKException.Code.IncorrectParameterException); } } @@ -342,7 +368,7 @@ public BKIncorrectParameterException() { */ public static class BKInterruptedException extends BKException { public BKInterruptedException() { - super(Code.InterruptedException); + super(BKException.Code.InterruptedException); } } @@ -351,7 +377,7 @@ public BKInterruptedException() { */ public static class BKLedgerFencedException extends BKException { public BKLedgerFencedException() { - super(Code.LedgerFencedException); + super(BKException.Code.LedgerFencedException); } } @@ -360,7 +386,7 @@ public BKLedgerFencedException() { */ public static class BKUnauthorizedAccessException extends BKException { public BKUnauthorizedAccessException() { - super(Code.UnauthorizedAccessException); + super(BKException.Code.UnauthorizedAccessException); } } @@ -369,7 +395,7 @@ public BKUnauthorizedAccessException() { */ public static class BKUnclosedFragmentException extends BKException { public BKUnclosedFragmentException() { - super(Code.UnclosedFragmentException); + super(BKException.Code.UnclosedFragmentException); } } @@ -378,7 +404,7 @@ public BKUnclosedFragmentException() { */ public static class BKWriteOnReadOnlyBookieException extends BKException { public BKWriteOnReadOnlyBookieException() { - super(Code.WriteOnReadOnlyBookieException); + super(BKException.Code.WriteOnReadOnlyBookieException); } } @@ -387,7 +413,7 @@ public BKWriteOnReadOnlyBookieException() { */ public static class BKTooManyRequestsException extends BKException { public BKTooManyRequestsException() { - super(Code.TooManyRequestsException); + super(BKException.Code.TooManyRequestsException); } } @@ -396,7 +422,7 @@ public BKTooManyRequestsException() { */ public static class BKReplicationException extends BKException { public BKReplicationException() { - super(Code.ReplicationException); + super(BKException.Code.ReplicationException); } } @@ -405,7 +431,7 @@ public BKReplicationException() { */ public static class BKClientClosedException extends BKException { public BKClientClosedException() { - super(Code.ClientClosedException); + super(BKException.Code.ClientClosedException); } } @@ -414,7 +440,7 @@ public BKClientClosedException() { */ public static class BKTimeoutException extends BKException { public BKTimeoutException() { - super(Code.TimeoutException); + super(BKException.Code.TimeoutException); } } @@ -423,7 +449,56 @@ public BKTimeoutException() { */ public static class BKLedgerIdOverflowException extends BKException { public BKLedgerIdOverflowException() { - super(Code.LedgerIdOverflowException); + super(BKException.Code.LedgerIdOverflowException); + } + } + + /** + * Bookkeeper metadata serialization exception. + */ + public static class BKMetadataSerializationException extends BKException { + public BKMetadataSerializationException() { + super(BKException.Code.MetadataSerializationException); } + + public BKMetadataSerializationException(Throwable cause) { + super(BKException.Code.MetadataSerializationException, cause); + } + } + + /** + * Bookkeeper ledger in limbo and data may or may not exist. + */ + public static class BKDataUnknownException extends BKException { + public BKDataUnknownException() { + super(BKException.Code.DataUnknownException); + } + } + + /** + * Extract an exception code from an BKException, or use a default if it's another type. + * The throwable is null, assume that no exception took place and return + * {@link BKException.Code#OK}. + */ + public static int getExceptionCode(Throwable t, int defaultCode) { + if (t == null) { + return BKException.Code.OK; + } else if (t instanceof BKException) { + return ((BKException) t).getCode(); + } else if (t.getCause() != null) { + return getExceptionCode(t.getCause(), defaultCode); + } else { + return defaultCode; + } + } + + /** + * Extract an exception code from an BKException, or default to unexpected exception if throwable + * is not a BKException. + * + * @see #getExceptionCode(Throwable,int) + */ + public static int getExceptionCode(Throwable t) { + return getExceptionCode(t, Code.UnexpectedConditionException); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BatchedReadOp.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BatchedReadOp.java new file mode 100644 index 00000000000..97a284d04ec --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BatchedReadOp.java @@ -0,0 +1,321 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.client; + +import io.netty.buffer.ByteBuf; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.List; +import java.util.concurrent.TimeUnit; +import org.apache.bookkeeper.client.api.LedgerEntry; +import org.apache.bookkeeper.client.impl.LedgerEntriesImpl; +import org.apache.bookkeeper.client.impl.LedgerEntryImpl; +import org.apache.bookkeeper.common.util.MathUtils; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.BookieProtocol; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.BatchedReadEntryCallback; +import org.apache.bookkeeper.proto.checksum.DigestManager; +import org.apache.bookkeeper.util.ByteBufList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class BatchedReadOp extends ReadOpBase implements BatchedReadEntryCallback { + + private static final Logger LOG = LoggerFactory.getLogger(BatchedReadOp.class); + + final int maxCount; + final long maxSize; + + BatchedLedgerEntryRequest request; + + BatchedReadOp(LedgerHandle lh, + ClientContext clientCtx, + long startEntryId, + int maxCount, + long maxSize, + boolean isRecoveryRead) { + super(lh, clientCtx, startEntryId, -1L, isRecoveryRead); + this.maxCount = maxCount; + this.maxSize = maxSize; + } + + @Override + void initiate() { + this.requestTimeNanos = MathUtils.nowInNano(); + List ensemble = getLedgerMetadata().getEnsembleAt(startEntryId); + request = new SequenceReadRequest(ensemble, lh.ledgerId, startEntryId, maxCount, maxSize); + request.read(); + if (clientCtx.getConf().readSpeculativeRequestPolicy.isPresent()) { + speculativeTask = clientCtx.getConf().readSpeculativeRequestPolicy.get() + .initiateSpeculativeRequest(clientCtx.getScheduler(), request); + } + } + + @Override + protected void submitCallback(int code) { + // ensure callback once + if (!complete.compareAndSet(false, true)) { + return; + } + + cancelSpeculativeTask(true); + + long latencyNanos = MathUtils.elapsedNanos(requestTimeNanos); + if (code != BKException.Code.OK) { + LOG.error( + "Batch read of ledger entry failed: L{} E{}-E{}, Sent to {}, " + + "Heard from {} : bitset = {}, Error = '{}'. First unread entry is ({}, rc = {})", + lh.getId(), startEntryId, startEntryId + maxCount - 1, sentToHosts, heardFromHosts, + heardFromHostsBitSet, BKException.getMessage(code), startEntryId, code); + clientCtx.getClientStats().getReadOpLogger().registerFailedEvent(latencyNanos, TimeUnit.NANOSECONDS); + // release the entries + + request.close(); + future.completeExceptionally(BKException.create(code)); + } else { + clientCtx.getClientStats().getReadOpLogger().registerSuccessfulEvent(latencyNanos, TimeUnit.NANOSECONDS); + future.complete(LedgerEntriesImpl.create(request.entries)); + } + } + + @Override + public void readEntriesComplete(int rc, long ledgerId, long startEntryId, ByteBufList bufList, Object ctx) { + final ReadContext rctx = (ReadContext) ctx; + final BatchedLedgerEntryRequest entry = (BatchedLedgerEntryRequest) rctx.entry; + + if (rc != BKException.Code.OK) { + entry.logErrorAndReattemptRead(rctx.bookieIndex, rctx.to, "Error: " + BKException.getMessage(rc), rc); + return; + } + + heardFromHosts.add(rctx.to); + heardFromHostsBitSet.set(rctx.bookieIndex, true); + + bufList.retain(); + // if entry has completed don't handle twice + if (entry.complete(rctx.bookieIndex, rctx.to, bufList)) { + if (!isRecoveryRead) { + // do not advance LastAddConfirmed for recovery reads + lh.updateLastConfirmed(rctx.getLastAddConfirmed(), 0L); + } + submitCallback(BKException.Code.OK); + } else { + bufList.release(); + } + } + + void sendReadTo(int bookieIndex, BookieId to, BatchedLedgerEntryRequest entry) throws InterruptedException { + if (lh.throttler != null) { + lh.throttler.acquire(); + } + if (isRecoveryRead) { + int flags = BookieProtocol.FLAG_HIGH_PRIORITY | BookieProtocol.FLAG_DO_FENCING; + clientCtx.getBookieClient().batchReadEntries(to, lh.ledgerId, entry.eId, + maxCount, maxSize, this, new ReadContext(bookieIndex, to, entry), flags, lh.ledgerKey); + } else { + clientCtx.getBookieClient().batchReadEntries(to, lh.ledgerId, entry.eId, maxCount, maxSize, + this, new ReadContext(bookieIndex, to, entry), BookieProtocol.FLAG_NONE); + } + } + + abstract class BatchedLedgerEntryRequest extends LedgerEntryRequest { + + //Indicate which ledger the BatchedLedgerEntryRequest is reading. + final long lId; + final int maxCount; + final long maxSize; + + final List entries; + + BatchedLedgerEntryRequest(List ensemble, long lId, long eId, int maxCount, long maxSize) { + super(ensemble, eId); + this.lId = lId; + this.maxCount = maxCount; + this.maxSize = maxSize; + this.entries = new ArrayList<>(maxCount); + } + + boolean complete(int bookieIndex, BookieId host, final ByteBufList bufList) { + if (isComplete()) { + return false; + } + if (!complete.getAndSet(true)) { + for (int i = 0; i < bufList.size(); i++) { + ByteBuf buffer = bufList.getBuffer(i); + ByteBuf content; + try { + content = lh.macManager.verifyDigestAndReturnData(eId + i, buffer); + } catch (BKException.BKDigestMatchException e) { + clientCtx.getClientStats().getReadOpDmCounter().inc(); + logErrorAndReattemptRead(bookieIndex, host, "Mac mismatch", + BKException.Code.DigestMatchException); + return false; + } + rc = BKException.Code.OK; + /* + * The length is a long and it is the last field of the metadata of an entry. + * Consequently, we have to subtract 8 from METADATA_LENGTH to get the length. + */ + LedgerEntryImpl entryImpl = LedgerEntryImpl.create(lh.ledgerId, startEntryId + i); + entryImpl.setLength(buffer.getLong(DigestManager.METADATA_LENGTH - 8)); + entryImpl.setEntryBuf(content); + entries.add(entryImpl); + } + writeSet.recycle(); + return true; + } else { + writeSet.recycle(); + return false; + } + } + + @Override + public String toString() { + return String.format("L%d-E%d~%d s-%d", lh.getId(), eId, eId + maxCount, maxSize); + } + } + + class SequenceReadRequest extends BatchedLedgerEntryRequest { + + static final int NOT_FOUND = -1; + int nextReplicaIndexToReadFrom = 0; + final BitSet sentReplicas; + final BitSet erroredReplicas; + SequenceReadRequest(List ensemble, + long lId, + long eId, + int maxCount, + long maxSize) { + super(ensemble, lId, eId, maxCount, maxSize); + this.sentReplicas = new BitSet(lh.getLedgerMetadata().getWriteQuorumSize()); + this.erroredReplicas = new BitSet(lh.getLedgerMetadata().getWriteQuorumSize()); + } + + @Override + void read() { + sendNextRead(); + } + + private synchronized int getNextReplicaIndexToReadFrom() { + return nextReplicaIndexToReadFrom; + } + + private BitSet getSentToBitSet() { + BitSet b = new BitSet(ensemble.size()); + + for (int i = 0; i < sentReplicas.length(); i++) { + if (sentReplicas.get(i)) { + b.set(writeSet.get(i)); + } + } + return b; + } + + private boolean readsOutstanding() { + return (sentReplicas.cardinality() - erroredReplicas.cardinality()) > 0; + } + + @Override + synchronized BookieId maybeSendSpeculativeRead(BitSet heardFrom) { + if (nextReplicaIndexToReadFrom >= getLedgerMetadata().getWriteQuorumSize()) { + return null; + } + + BitSet sentTo = getSentToBitSet(); + sentTo.and(heardFrom); + + // only send another read if we have had no successful response at all + // (even for other entries) from any of the other bookies we have sent the + // request to + if (sentTo.cardinality() == 0) { + clientCtx.getClientStats().getSpeculativeReadCounter().inc(); + return sendNextRead(); + } else { + return null; + } + } + + synchronized BookieId sendNextRead() { + if (nextReplicaIndexToReadFrom >= getLedgerMetadata().getWriteQuorumSize()) { + // we are done, the read has failed from all replicas, just fail the + // read + fail(firstError); + return null; + } + + // ToDo: pick replica with writable PCBC. ISSUE #1239 + // https://github.com/apache/bookkeeper/issues/1239 + int replica = nextReplicaIndexToReadFrom; + int bookieIndex = writeSet.get(nextReplicaIndexToReadFrom); + nextReplicaIndexToReadFrom++; + + try { + BookieId to = ensemble.get(bookieIndex); + sendReadTo(bookieIndex, to, this); + sentToHosts.add(to); + sentReplicas.set(replica); + return to; + } catch (InterruptedException ie) { + LOG.error("Interrupted reading entry " + this, ie); + Thread.currentThread().interrupt(); + fail(BKException.Code.InterruptedException); + return null; + } + } + + @Override + synchronized void logErrorAndReattemptRead(int bookieIndex, BookieId host, String errMsg, int rc) { + super.logErrorAndReattemptRead(bookieIndex, host, errMsg, rc); + int replica = writeSet.indexOf(bookieIndex); + if (replica == NOT_FOUND) { + LOG.error("Received error from a host which is not in the ensemble {} {}.", host, ensemble); + return; + } + erroredReplicas.set(replica); + if (isRecoveryRead && (numBookiesMissingEntry >= requiredBookiesMissingEntryForRecovery)) { + /* For recovery, report NoSuchEntry as soon as wQ-aQ+1 bookies report that they do not + * have the entry */ + fail(BKException.Code.NoSuchEntryException); + return; + } + + if (!readsOutstanding()) { + sendNextRead(); + } + } + + @Override + boolean complete(int bookieIndex, BookieId host, final ByteBufList bufList) { + boolean completed = super.complete(bookieIndex, host, bufList); + if (completed) { + int numReplicasTried = getNextReplicaIndexToReadFrom(); + // Check if any speculative reads were issued and mark any slow bookies before + // the first successful speculative read as "slow" + for (int i = 0; i < numReplicasTried - 1; i++) { + int slowBookieIndex = writeSet.get(i); + BookieId slowBookieSocketAddress = ensemble.get(slowBookieIndex); + clientCtx.getPlacementPolicy().registerSlowBookie(slowBookieSocketAddress, eId); + } + } + return completed; + } + } +} \ No newline at end of file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeper.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeper.java index 1396ee75a64..751d40ef536 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeper.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeper.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -25,22 +25,26 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.util.concurrent.ThreadFactoryBuilder; +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.UnpooledByteBufAllocator; import io.netty.channel.EventLoopGroup; -import io.netty.channel.epoll.EpollEventLoopGroup; -import io.netty.channel.nio.NioEventLoopGroup; import io.netty.util.HashedWheelTimer; import io.netty.util.concurrent.DefaultThreadFactory; import java.io.IOException; import java.net.URI; +import java.util.Collections; +import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ThreadFactory; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReentrantReadWriteLock; +import org.apache.bookkeeper.bookie.BookKeeperServerStats; import org.apache.bookkeeper.client.AsyncCallback.CreateCallback; import org.apache.bookkeeper.client.AsyncCallback.DeleteCallback; import org.apache.bookkeeper.client.AsyncCallback.IsClosedCallback; @@ -53,10 +57,16 @@ import org.apache.bookkeeper.client.api.BookKeeperBuilder; import org.apache.bookkeeper.client.api.CreateBuilder; import org.apache.bookkeeper.client.api.DeleteBuilder; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.client.api.LedgersIterator; +import org.apache.bookkeeper.client.api.ListLedgersResult; +import org.apache.bookkeeper.client.api.ListLedgersResultBuilder; import org.apache.bookkeeper.client.api.OpenBuilder; import org.apache.bookkeeper.client.api.WriteFlag; +import org.apache.bookkeeper.common.allocator.ByteBufAllocatorBuilder; import org.apache.bookkeeper.common.util.OrderedExecutor; import org.apache.bookkeeper.common.util.OrderedScheduler; +import org.apache.bookkeeper.common.util.ReflectionUtils; import org.apache.bookkeeper.conf.AbstractConfiguration; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.feature.FeatureProvider; @@ -64,23 +74,22 @@ import org.apache.bookkeeper.meta.CleanupLedgerManager; import org.apache.bookkeeper.meta.LedgerIdGenerator; import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.LedgerManager.LedgerRangeIterator; import org.apache.bookkeeper.meta.LedgerManagerFactory; import org.apache.bookkeeper.meta.MetadataClientDriver; import org.apache.bookkeeper.meta.MetadataDrivers; import org.apache.bookkeeper.meta.exceptions.MetadataException; -import org.apache.bookkeeper.meta.zk.ZKMetadataClientDriver; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.DNSToSwitchMapping; +import org.apache.bookkeeper.proto.BookieAddressResolver; import org.apache.bookkeeper.proto.BookieClient; import org.apache.bookkeeper.proto.BookieClientImpl; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.proto.DataFormats; import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.stats.StatsLogger; -import org.apache.bookkeeper.util.ReflectionUtils; -import org.apache.bookkeeper.util.SafeRunnable; +import org.apache.bookkeeper.util.EventLoopUtil; +import org.apache.bookkeeper.versioning.Versioned; import org.apache.commons.configuration.ConfigurationException; -import org.apache.commons.lang.SystemUtils; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.ZooKeeper; import org.slf4j.Logger; @@ -99,14 +108,16 @@ */ public class BookKeeper implements org.apache.bookkeeper.client.api.BookKeeper { - static final Logger LOG = LoggerFactory.getLogger(BookKeeper.class); + private static final Logger LOG = LoggerFactory.getLogger(BookKeeper.class); final EventLoopGroup eventLoopGroup; + private final ByteBufAllocator allocator; // The stats logger for this client. private final StatsLogger statsLogger; private final BookKeeperClientStats clientStats; + private final double bookieQuarantineRatio; // whether the event loop group is one we created, or is owned by whoever // instantiated us @@ -149,6 +160,7 @@ public static class Builder { ZooKeeper zk = null; EventLoopGroup eventLoopGroup = null; + ByteBufAllocator allocator = null; StatsLogger statsLogger = NullStatsLogger.INSTANCE; DNSToSwitchMapping dnsResolver = null; HashedWheelTimer requestTimer = null; @@ -213,6 +225,18 @@ public Builder eventLoopGroup(EventLoopGroup f) { return this; } + /** + * Configure the bookkeeper client with a provided {@link ByteBufAllocator}. + * + * @param allocator an external {@link ByteBufAllocator} to use by the bookkeeper client. + * @return client builder. + * @since 4.9 + */ + public Builder allocator(ByteBufAllocator allocator) { + this.allocator = allocator; + return this; + } + /** * Configure the bookkeeper client with a provided {@link ZooKeeper} client. * @@ -276,7 +300,8 @@ public Builder featureProvider(FeatureProvider featureProvider) { public BookKeeper build() throws IOException, InterruptedException, BKException { checkNotNull(statsLogger, "No stats logger provided"); - return new BookKeeper(conf, zk, eventLoopGroup, statsLogger, dnsResolver, requestTimer, featureProvider); + return new BookKeeper(conf, zk, eventLoopGroup, allocator, statsLogger, dnsResolver, requestTimer, + featureProvider); } } @@ -313,7 +338,7 @@ public BookKeeper(String servers) throws IOException, InterruptedException, */ public BookKeeper(final ClientConfiguration conf) throws IOException, InterruptedException, BKException { - this(conf, null, null, NullStatsLogger.INSTANCE, + this(conf, null, null, null, NullStatsLogger.INSTANCE, null, null, null); } @@ -347,7 +372,7 @@ private static EventLoopGroup validateEventLoopGroup(EventLoopGroup eventLoopGro */ public BookKeeper(ClientConfiguration conf, ZooKeeper zk) throws IOException, InterruptedException, BKException { - this(conf, validateZooKeeper(zk), null, NullStatsLogger.INSTANCE, null, null, null); + this(conf, validateZooKeeper(zk), null, null, NullStatsLogger.INSTANCE, null, null, null); } /** @@ -369,17 +394,19 @@ public BookKeeper(ClientConfiguration conf, ZooKeeper zk) */ public BookKeeper(ClientConfiguration conf, ZooKeeper zk, EventLoopGroup eventLoopGroup) throws IOException, InterruptedException, BKException { - this(conf, validateZooKeeper(zk), validateEventLoopGroup(eventLoopGroup), NullStatsLogger.INSTANCE, + this(conf, validateZooKeeper(zk), validateEventLoopGroup(eventLoopGroup), null, NullStatsLogger.INSTANCE, null, null, null); } /** * Constructor for use with the builder. Other constructors also use it. */ + @SuppressWarnings("deprecation") @VisibleForTesting BookKeeper(ClientConfiguration conf, ZooKeeper zkc, EventLoopGroup eventLoopGroup, + ByteBufAllocator byteBufAllocator, StatsLogger rootStatsLogger, DNSToSwitchMapping dnsResolver, HashedWheelTimer requestTimer, @@ -402,7 +429,9 @@ public BookKeeper(ClientConfiguration conf, ZooKeeper zk, EventLoopGroup eventLo .numThreads(conf.getNumWorkerThreads()) .statsLogger(rootStatsLogger) .traceTaskExecution(conf.getEnableTaskExecutionStats()) + .preserveMdcForTaskExecution(conf.getPreserveMdcForTaskExecution()) .traceTaskWarnTimeMicroSec(conf.getTaskExecutionWarnTimeMicros()) + .enableBusyWait(conf.isBusyWaitEnabled()) .build(); // initialize stats logger @@ -422,7 +451,7 @@ public BookKeeper(ClientConfiguration conf, ZooKeeper zk, EventLoopGroup eventLo conf, scheduler, rootStatsLogger, - java.util.Optional.ofNullable(zkc)); + Optional.ofNullable(zkc)); } catch (ConfigurationException ce) { LOG.error("Failed to initialize metadata client driver using invalid metadata service uri", ce); throw new IOException("Failed to initialize metadata client driver", ce); @@ -433,16 +462,25 @@ public BookKeeper(ClientConfiguration conf, ZooKeeper zk, EventLoopGroup eventLo // initialize event loop group if (null == eventLoopGroup) { - this.eventLoopGroup = getDefaultEventLoopGroup(conf); + this.eventLoopGroup = EventLoopUtil.getClientEventLoopGroup(conf, + new DefaultThreadFactory("bookkeeper-io")); this.ownEventLoopGroup = true; } else { this.eventLoopGroup = eventLoopGroup; this.ownEventLoopGroup = false; } - // initialize bookie client - this.bookieClient = new BookieClientImpl(conf, this.eventLoopGroup, this.mainWorkerPool, - scheduler, rootStatsLogger); + if (byteBufAllocator != null) { + this.allocator = byteBufAllocator; + } else { + this.allocator = ByteBufAllocatorBuilder.create() + .poolingPolicy(conf.getAllocatorPoolingPolicy()) + .poolingConcurrency(conf.getAllocatorPoolingConcurrency()) + .outOfMemoryPolicy(conf.getAllocatorOutOfMemoryPolicy()) + .leakDetectionPolicy(conf.getAllocatorLeakDetectionPolicy()) + .build(); + } + if (null == requestTimer) { this.requestTimer = new HashedWheelTimer( @@ -455,14 +493,24 @@ public BookKeeper(ClientConfiguration conf, ZooKeeper zk, EventLoopGroup eventLo this.ownTimer = false; } + BookieAddressResolver bookieAddressResolver = conf.getBookieAddressResolverEnabled() + ? new DefaultBookieAddressResolver(metadataDriver.getRegistrationClient()) + : new BookieAddressResolverDisabled(); + if (dnsResolver != null) { + dnsResolver.setBookieAddressResolver(bookieAddressResolver); + } // initialize the ensemble placement this.placementPolicy = initializeEnsemblePlacementPolicy(conf, - dnsResolver, this.requestTimer, this.featureProvider, this.statsLogger); - + dnsResolver, this.requestTimer, this.featureProvider, this.statsLogger, bookieAddressResolver); this.bookieWatcher = new BookieWatcherImpl( - conf, this.placementPolicy, metadataDriver.getRegistrationClient(), + conf, this.placementPolicy, metadataDriver.getRegistrationClient(), bookieAddressResolver, this.statsLogger.scope(WATCHER_SCOPE)); + + // initialize bookie client + this.bookieClient = new BookieClientImpl(conf, this.eventLoopGroup, this.allocator, this.mainWorkerPool, + scheduler, rootStatsLogger, this.bookieWatcher.getBookieAddressResolver()); + if (conf.getDiskWeightBasedPlacementEnabled()) { LOG.info("Weighted ledger placement enabled"); ThreadFactoryBuilder tFBuilder = new ThreadFactoryBuilder() @@ -488,6 +536,7 @@ public BookKeeper(ClientConfiguration conf, ZooKeeper zk, EventLoopGroup eventLo this.ledgerManager = new CleanupLedgerManager(ledgerManagerFactory.newLedgerManager()); this.ledgerIdGenerator = ledgerManagerFactory.newLedgerIdGenerator(); + this.bookieQuarantineRatio = conf.getBookieQuarantineRatio(); scheduleBookieHealthCheckIfEnabled(conf); } @@ -514,18 +563,21 @@ public BookKeeper(ClientConfiguration conf, ZooKeeper zk, EventLoopGroup eventLo bookieWatcher = null; bookieInfoScheduler = null; bookieClient = null; + allocator = UnpooledByteBufAllocator.DEFAULT; + bookieQuarantineRatio = 1.0; } - private EnsemblePlacementPolicy initializeEnsemblePlacementPolicy(ClientConfiguration conf, + protected EnsemblePlacementPolicy initializeEnsemblePlacementPolicy(ClientConfiguration conf, DNSToSwitchMapping dnsResolver, HashedWheelTimer timer, FeatureProvider featureProvider, - StatsLogger statsLogger) + StatsLogger statsLogger, + BookieAddressResolver bookieAddressResolver) throws IOException { try { Class policyCls = conf.getEnsemblePlacementPolicy(); - return ReflectionUtils.newInstance(policyCls).initialize(conf, java.util.Optional.ofNullable(dnsResolver), - timer, featureProvider, statsLogger); + return ReflectionUtils.newInstance(policyCls).initialize(conf, Optional.ofNullable(dnsResolver), + timer, featureProvider, statsLogger, bookieAddressResolver); } catch (ConfigurationException e) { throw new IOException("Failed to initialize ensemble placement policy : ", e); } @@ -549,21 +601,42 @@ static int getReturnRc(BookieClient bookieClient, int rc) { void scheduleBookieHealthCheckIfEnabled(ClientConfiguration conf) { if (conf.isBookieHealthCheckEnabled()) { - scheduler.scheduleAtFixedRate(new SafeRunnable() { - - @Override - public void safeRun() { - checkForFaultyBookies(); - } - }, conf.getBookieHealthCheckIntervalSeconds(), conf.getBookieHealthCheckIntervalSeconds(), + scheduler.scheduleAtFixedRate( + () -> checkForFaultyBookies(), + conf.getBookieHealthCheckIntervalSeconds(), + conf.getBookieHealthCheckIntervalSeconds(), TimeUnit.SECONDS); } } void checkForFaultyBookies() { - List faultyBookies = bookieClient.getFaultyBookies(); - for (BookieSocketAddress faultyBookie : faultyBookies) { - bookieWatcher.quarantineBookie(faultyBookie); + List faultyBookies = bookieClient.getFaultyBookies(); + if (faultyBookies.isEmpty()) { + return; + } + + boolean isEnabled = false; + try { + isEnabled = metadataDriver.isHealthCheckEnabled().get(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOG.error("Cannot verify if healthcheck is enabled", e); + } catch (ExecutionException e) { + LOG.error("Cannot verify if healthcheck is enabled", e.getCause()); + } + if (!isEnabled) { + LOG.info("Health checks is currently disabled!"); + bookieWatcher.releaseAllQuarantinedBookies(); + return; + } + + for (BookieId faultyBookie : faultyBookies) { + if (Math.random() <= bookieQuarantineRatio) { + bookieWatcher.quarantineBookie(faultyBookie); + statsLogger.getCounter(BookKeeperServerStats.BOOKIE_QUARANTINE).inc(); + } else { + statsLogger.getCounter(BookKeeperServerStats.BOOKIE_QUARANTINE_SKIP).inc(); + } } } @@ -575,6 +648,11 @@ public LedgerManager getLedgerManager() { return ledgerManager; } + @VisibleForTesting + public LedgerManagerFactory getLedgerManagerFactory() { + return ledgerManagerFactory; + } + @VisibleForTesting LedgerManager getUnderlyingLedgerManager() { return ((CleanupLedgerManager) ledgerManager).getUnderlying(); @@ -600,8 +678,11 @@ BookieWatcher getBookieWatcher() { return bookieWatcher; } - @VisibleForTesting - OrderedExecutor getMainWorkerPool() { + public BookieAddressResolver getBookieAddressResolver() { + return bookieWatcher.getBookieAddressResolver(); + } + + public OrderedExecutor getMainWorkerPool() { return mainWorkerPool; } @@ -625,7 +706,7 @@ public MetadataClientDriver getMetadataClientDriver() { * cheap to compute but does not protect against byzantine bookies (i.e., a * bookie might report fake bytes and a matching CRC32). The MAC code is more * expensive to compute, but is protected by a password, i.e., a bookie can't - * report fake bytes with a mathching MAC unless it knows the password. + * report fake bytes with a matching MAC unless it knows the password. * The CRC32C, which use SSE processor instruction, has better performance than CRC32. * Legacy DigestType for backward compatibility. If we want to add new DigestType, * we should add it in here, client.api.DigestType and DigestType in DataFormats.proto. @@ -680,10 +761,6 @@ public org.apache.bookkeeper.client.api.DigestType toApiDigestType() { } } - ZooKeeper getZkHandle() { - return ((ZKMetadataClientDriver) metadataDriver).getZk(); - } - protected ClientConfiguration getConf() { return conf; } @@ -711,7 +788,7 @@ BookieClient getBookieClient() { * @throws BKException * @throws InterruptedException */ - public Map getBookieInfo() throws BKException, InterruptedException { + public Map getBookieInfo() throws BKException, InterruptedException { return bookieInfoReader.getBookieInfo(); } @@ -743,7 +820,8 @@ public void asyncCreateLedger(final int ensSize, final int writeQuorumSize, final DigestType digestType, final byte[] passwd, final CreateCallback cb, final Object ctx) { - asyncCreateLedger(ensSize, writeQuorumSize, writeQuorumSize, digestType, passwd, cb, ctx, null); + asyncCreateLedger(ensSize, writeQuorumSize, writeQuorumSize, + digestType, passwd, cb, ctx, Collections.emptyMap()); } /** @@ -811,7 +889,7 @@ public void asyncCreateLedger(final int ensSize, final int writeQuorumSize, fina * @throws InterruptedException * @throws BKException */ - public LedgerHandle createLedger(DigestType digestType, byte passwd[]) + public LedgerHandle createLedger(DigestType digestType, byte[] passwd) throws BKException, InterruptedException { return createLedger(3, 2, digestType, passwd); } @@ -830,9 +908,9 @@ public LedgerHandle createLedger(DigestType digestType, byte passwd[]) * @throws BKException */ public LedgerHandle createLedger(int ensSize, int qSize, - DigestType digestType, byte passwd[]) + DigestType digestType, byte[] passwd) throws InterruptedException, BKException { - return createLedger(ensSize, qSize, qSize, digestType, passwd, null); + return createLedger(ensSize, qSize, qSize, digestType, passwd, Collections.emptyMap()); } /** @@ -850,9 +928,9 @@ public LedgerHandle createLedger(int ensSize, int qSize, * @throws BKException */ public LedgerHandle createLedger(int ensSize, int writeQuorumSize, int ackQuorumSize, - DigestType digestType, byte passwd[]) + DigestType digestType, byte[] passwd) throws InterruptedException, BKException { - return createLedger(ensSize, writeQuorumSize, ackQuorumSize, digestType, passwd, null); + return createLedger(ensSize, writeQuorumSize, ackQuorumSize, digestType, passwd, Collections.emptyMap()); } /** @@ -869,7 +947,7 @@ public LedgerHandle createLedger(int ensSize, int writeQuorumSize, int ackQuorum * @throws BKException */ public LedgerHandle createLedger(int ensSize, int writeQuorumSize, int ackQuorumSize, - DigestType digestType, byte passwd[], final Map customMetadata) + DigestType digestType, byte[] passwd, final Map customMetadata) throws InterruptedException, BKException { CompletableFuture future = new CompletableFuture<>(); SyncCreateCallback result = new SyncCreateCallback(future); @@ -904,9 +982,10 @@ public LedgerHandle createLedger(int ensSize, int writeQuorumSize, int ackQuorum * @throws BKException */ public LedgerHandle createLedgerAdv(int ensSize, int writeQuorumSize, int ackQuorumSize, - DigestType digestType, byte passwd[]) + DigestType digestType, byte[] passwd) throws InterruptedException, BKException { - return createLedgerAdv(ensSize, writeQuorumSize, ackQuorumSize, digestType, passwd, null); + return createLedgerAdv(ensSize, writeQuorumSize, ackQuorumSize, + digestType, passwd, Collections.emptyMap()); } /** @@ -925,7 +1004,7 @@ public LedgerHandle createLedgerAdv(int ensSize, int writeQuorumSize, int ackQuo * @throws BKException */ public LedgerHandle createLedgerAdv(int ensSize, int writeQuorumSize, int ackQuorumSize, - DigestType digestType, byte passwd[], final Map customMetadata) + DigestType digestType, byte[] passwd, final Map customMetadata) throws InterruptedException, BKException { CompletableFuture future = new CompletableFuture<>(); SyncCreateAdvCallback result = new SyncCreateAdvCallback(future); @@ -1017,7 +1096,7 @@ public LedgerHandle createLedgerAdv(final long ledgerId, int writeQuorumSize, int ackQuorumSize, DigestType digestType, - byte passwd[], + byte[] passwd, final Map customMetadata) throws InterruptedException, BKException { CompletableFuture future = new CompletableFuture<>(); @@ -1038,8 +1117,7 @@ public LedgerHandle createLedgerAdv(final long ledgerId, throw BKException.create(BKException.Code.UnexpectedConditionException); } - LOG.info("Ensemble: {} for ledger: {}", lh.getLedgerMetadata().getEnsemble(0L), - lh.getId()); + LOG.info("Ensemble: {} for ledger: {}", lh.getLedgerMetadata().getEnsembleAt(0L), lh.getId()); return lh; } @@ -1131,7 +1209,7 @@ public void asyncCreateLedgerAdv(final long ledgerId, * @param ctx * optional control object */ - public void asyncOpenLedger(final long lId, final DigestType digestType, final byte passwd[], + public void asyncOpenLedger(final long lId, final DigestType digestType, final byte[] passwd, final OpenCallback cb, final Object ctx) { closeLock.readLock().lock(); try { @@ -1176,7 +1254,7 @@ public void asyncOpenLedger(final long lId, final DigestType digestType, final b * @param ctx * optional control object */ - public void asyncOpenLedgerNoRecovery(final long lId, final DigestType digestType, final byte passwd[], + public void asyncOpenLedgerNoRecovery(final long lId, final DigestType digestType, final byte[] passwd, final OpenCallback cb, final Object ctx) { closeLock.readLock().lock(); try { @@ -1206,7 +1284,7 @@ public void asyncOpenLedgerNoRecovery(final long lId, final DigestType digestTyp * @throws InterruptedException * @throws BKException */ - public LedgerHandle openLedger(long lId, DigestType digestType, byte passwd[]) + public LedgerHandle openLedger(long lId, DigestType digestType, byte[] passwd) throws BKException, InterruptedException { CompletableFuture future = new CompletableFuture<>(); SyncOpenCallback result = new SyncOpenCallback(future); @@ -1233,7 +1311,7 @@ public LedgerHandle openLedger(long lId, DigestType digestType, byte passwd[]) * @throws InterruptedException * @throws BKException */ - public LedgerHandle openLedgerNoRecovery(long lId, DigestType digestType, byte passwd[]) + public LedgerHandle openLedgerNoRecovery(long lId, DigestType digestType, byte[] passwd) throws BKException, InterruptedException { CompletableFuture future = new CompletableFuture<>(); SyncOpenCallback result = new SyncOpenCallback(future); @@ -1280,7 +1358,6 @@ public void asyncDeleteLedger(final long lId, final DeleteCallback cb, final Obj * @throws InterruptedException * @throws BKException */ - @SuppressWarnings("unchecked") public void deleteLedger(long lId) throws InterruptedException, BKException { CompletableFuture future = new CompletableFuture<>(); SyncDeleteCallback result = new SyncDeleteCallback(future); @@ -1298,16 +1375,13 @@ public void deleteLedger(long lId) throws InterruptedException, BKException { * @param cb callback method */ public void asyncIsClosed(long lId, final IsClosedCallback cb, final Object ctx){ - ledgerManager.readLedgerMetadata(lId, new GenericCallback(){ - @Override - public void operationComplete(int rc, LedgerMetadata lm){ - if (rc == BKException.Code.OK) { - cb.isClosedComplete(rc, lm.isClosed(), ctx); + ledgerManager.readLedgerMetadata(lId).whenComplete((metadata, exception) -> { + if (exception == null) { + cb.isClosedComplete(BKException.Code.OK, metadata.getValue().isClosed(), ctx); } else { - cb.isClosedComplete(rc, false, ctx); + cb.isClosedComplete(BKException.getExceptionCode(exception), false, ctx); } - } - }); + }); } /** @@ -1407,22 +1481,6 @@ public void close() throws BKException, InterruptedException { this.metadataDriver.close(); } - static EventLoopGroup getDefaultEventLoopGroup(ClientConfiguration conf) { - ThreadFactory threadFactory = new DefaultThreadFactory("bookkeeper-io"); - final int numThreads = conf.getNumIOThreads(); - - if (SystemUtils.IS_OS_LINUX) { - try { - return new EpollEventLoopGroup(numThreads, threadFactory); - } catch (Throwable t) { - LOG.warn("Could not use Netty Epoll event loop for bookie server: {}", t.getMessage()); - return new NioEventLoopGroup(numThreads, threadFactory); - } - } else { - return new NioEventLoopGroup(numThreads, threadFactory); - } - } - @Override public CreateBuilder newCreateLedgerOp() { return new LedgerCreateOp.CreateBuilderImpl(this); @@ -1438,6 +1496,118 @@ public DeleteBuilder newDeleteLedgerOp() { return new LedgerDeleteOp.DeleteBuilderImpl(this); } + private static final class SyncLedgerIterator implements LedgersIterator { + + private final LedgerRangeIterator iterator; + private final ListLedgersResultImpl parent; + Iterator currentRange = null; + + public SyncLedgerIterator(LedgerRangeIterator iterator, ListLedgersResultImpl parent) { + this.iterator = iterator; + this.parent = parent; + } + + @Override + public boolean hasNext() throws IOException { + parent.checkClosed(); + if (currentRange != null) { + if (currentRange.hasNext()) { + return true; + } + } else if (iterator.hasNext()) { + return true; + } + return false; + } + + @Override + public long next() throws IOException { + parent.checkClosed(); + if (currentRange == null || !currentRange.hasNext()) { + currentRange = iterator.next().getLedgers().iterator(); + } + return currentRange.next(); + } + } + + private static final class ListLedgersResultImpl implements ListLedgersResult { + + private final LedgerRangeIterator iterator; + private boolean closed = false; + private LedgersIterator ledgersIterator; + + public ListLedgersResultImpl(LedgerRangeIterator iterator) { + this.iterator = iterator; + } + + void checkClosed() { + if (closed) { + throw new IllegalStateException("ListLedgersResult is closed"); + } + } + + private void initLedgersIterator() { + if (ledgersIterator != null) { + throw new IllegalStateException("LedgersIterator must be requested once"); + } + ledgersIterator = new SyncLedgerIterator(iterator, this); + } + + @Override + public LedgersIterator iterator() { + checkClosed(); + initLedgersIterator(); + return ledgersIterator; + } + + @Override + public Iterable toIterable() { + checkClosed(); + initLedgersIterator(); + + return () -> new Iterator() { + @Override + public boolean hasNext() { + try { + return ledgersIterator.hasNext(); + } catch (IOException ex) { + throw new RuntimeException(ex); + } + } + + @Override + public Long next() { + try { + return ledgersIterator.next(); + } catch (IOException ex) { + throw new RuntimeException(ex); + } + } + }; + } + + @Override + public void close() throws Exception { + closed = true; + } + } + + @Override + public ListLedgersResultBuilder newListLedgersOp() { + return () -> { + final LedgerRangeIterator iterator = getLedgerManager().getLedgerRanges(0); + return CompletableFuture.completedFuture(new ListLedgersResultImpl(iterator)); + }; + } + + @Override + public CompletableFuture getLedgerMetadata(long ledgerId) { + CompletableFuture> versioned = getLedgerManager().readLedgerMetadata(ledgerId); + return versioned.thenApply(versionedLedgerMetadata -> { + return versionedLedgerMetadata.getValue(); + }); + } + private final ClientContext clientCtx = new ClientContext() { @Override public ClientInternalConf getConf() { @@ -1483,9 +1653,14 @@ public BookKeeperClientStats getClientStats() { public boolean isClientClosed() { return BookKeeper.this.isClosed(); } + + @Override + public ByteBufAllocator getByteBufAllocator() { + return allocator; + } }; - ClientContext getClientCtx() { + public ClientContext getClientCtx() { return clientCtx; } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java index 59c203ba0fa..371c2145323 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -24,16 +24,17 @@ import static org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithMetadataBookieDriver; import static org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithRegistrationManager; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; -import com.google.common.util.concurrent.AbstractFuture; - import com.google.common.util.concurrent.UncheckedExecutionException; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.Enumeration; import java.util.HashMap; import java.util.HashSet; @@ -43,8 +44,8 @@ import java.util.Map; import java.util.Map.Entry; import java.util.NoSuchElementException; +import java.util.Objects; import java.util.Optional; -import java.util.Random; import java.util.Set; import java.util.SortedMap; import java.util.concurrent.CompletableFuture; @@ -52,36 +53,47 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicReference; +import java.util.function.BiConsumer; +import java.util.function.Function; import java.util.function.Predicate; - -import org.apache.bookkeeper.bookie.Bookie; +import lombok.SneakyThrows; import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.BookieImpl; import org.apache.bookkeeper.client.AsyncCallback.OpenCallback; import org.apache.bookkeeper.client.AsyncCallback.RecoverCallback; +import org.apache.bookkeeper.client.EnsemblePlacementPolicy.PlacementPolicyAdherence; import org.apache.bookkeeper.client.LedgerFragmentReplicator.SingleFragmentCallback; import org.apache.bookkeeper.client.SyncCallbackUtils.SyncOpenCallback; import org.apache.bookkeeper.client.SyncCallbackUtils.SyncReadCallback; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.common.concurrent.FutureUtils; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.discover.BookieServiceInfo; import org.apache.bookkeeper.discover.RegistrationClient.RegistrationListener; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.meta.LedgerAuditorManager; import org.apache.bookkeeper.meta.LedgerManager; import org.apache.bookkeeper.meta.LedgerManager.LedgerRangeIterator; import org.apache.bookkeeper.meta.LedgerManagerFactory; import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; +import org.apache.bookkeeper.meta.MetadataBookieDriver; import org.apache.bookkeeper.meta.UnderreplicatedLedger; import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.BookieAddressResolver; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.MultiCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.Processor; -import org.apache.bookkeeper.replication.AuditorElector; import org.apache.bookkeeper.replication.BookieLedgerIndexer; import org.apache.bookkeeper.replication.ReplicationException.BKAuditException; import org.apache.bookkeeper.replication.ReplicationException.CompatibilityException; import org.apache.bookkeeper.replication.ReplicationException.UnavailableException; import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.util.AvailabilityOfEntriesOfLedger; import org.apache.bookkeeper.util.IOUtils; +import org.apache.commons.collections4.MapUtils; import org.apache.zookeeper.AsyncCallback; import org.apache.zookeeper.KeeperException; import org.slf4j.Logger; @@ -94,6 +106,7 @@ public class BookKeeperAdmin implements AutoCloseable { private static final Logger LOG = LoggerFactory.getLogger(BookKeeperAdmin.class); private static final Logger VERBOSE = LoggerFactory.getLogger("verbose"); + private static final BiConsumer NOOP_BICONSUMER = (l, e) -> { }; // BookKeeper client instance private BookKeeper bkc; @@ -102,12 +115,6 @@ public class BookKeeperAdmin implements AutoCloseable { // LedgerFragmentReplicator instance private LedgerFragmentReplicator lfr; - /* - * Random number generator used to choose an available bookie server to - * replicate data from a dead bookie. - */ - private Random rand = new Random(); - private LedgerManagerFactory mFactory; /* @@ -116,6 +123,8 @@ public class BookKeeperAdmin implements AutoCloseable { */ private LedgerUnderreplicationManager underreplicationManager; + private LedgerAuditorManager ledgerAuditorManager; + /** * Constructor that takes in a ZooKeeper servers connect string so we know * how to connect to ZooKeeper to retrieve information about the BookKeeper @@ -161,10 +170,16 @@ public BookKeeperAdmin(ClientConfiguration conf) throws IOException, Interrupted // Create the BookKeeper client instance bkc = new BookKeeper(conf); ownsBK = true; - this.lfr = new LedgerFragmentReplicator(bkc, NullStatsLogger.INSTANCE); + this.lfr = new LedgerFragmentReplicator(bkc, NullStatsLogger.INSTANCE, conf); this.mFactory = bkc.ledgerManagerFactory; } + @VisibleForTesting + public static BookKeeperAdmin newBookKeeperAdmin(ClientConfiguration conf) + throws IOException, InterruptedException, BKException { + return new BookKeeperAdmin(conf); + } + /** * Constructor that takes in a BookKeeper instance . This will be useful, * when user already has bk instance ready. @@ -174,15 +189,22 @@ public BookKeeperAdmin(ClientConfiguration conf) throws IOException, Interrupted * @param statsLogger * - stats logger */ - public BookKeeperAdmin(final BookKeeper bkc, StatsLogger statsLogger) { + public BookKeeperAdmin(final BookKeeper bkc, StatsLogger statsLogger, ClientConfiguration conf) { + Objects.requireNonNull(conf, "Client configuration cannot be null"); this.bkc = bkc; ownsBK = false; - this.lfr = new LedgerFragmentReplicator(bkc, statsLogger); + this.lfr = new LedgerFragmentReplicator(bkc, statsLogger, conf); this.mFactory = bkc.ledgerManagerFactory; } + public BookKeeperAdmin(final BookKeeper bkc, ClientConfiguration conf) { + this(bkc, NullStatsLogger.INSTANCE, conf); + } + public BookKeeperAdmin(final BookKeeper bkc) { - this(bkc, NullStatsLogger.INSTANCE); + this.bkc = bkc; + ownsBK = false; + this.mFactory = bkc.ledgerManagerFactory; } public ClientConfiguration getConf() { @@ -201,6 +223,14 @@ public void close() throws InterruptedException, BKException { if (ownsBK) { bkc.close(); } + + if (ledgerAuditorManager != null) { + try { + ledgerAuditorManager.close(); + } catch (Exception e) { + throw new BKException.MetaStoreException(e); + } + } } /** @@ -208,25 +238,45 @@ public void close() throws InterruptedException, BKException { * * @return a collection of bookie addresses */ - public Collection getAvailableBookies() + public Collection getAvailableBookies() throws BKException { return bkc.bookieWatcher.getBookies(); } + /** + * Get a list of all bookies including the not available ones. + * + * @return a collection of bookie addresses + */ + public Collection getAllBookies() + throws BKException { + return bkc.bookieWatcher.getAllBookies(); + } + + public BookieAddressResolver getBookieAddressResolver() { + return bkc.bookieWatcher.getBookieAddressResolver(); + } + + @SneakyThrows + public BookieServiceInfo getBookieServiceInfo(BookieId bookiedId) + throws BKException { + return FutureUtils.result(bkc.getMetadataClientDriver() + .getRegistrationClient().getBookieServiceInfo(bookiedId)).getValue(); + } + /** * Get a list of readonly bookies synchronously. * * @return a collection of bookie addresses * @throws BKException if there are issues trying to read the list. */ - public Collection getReadOnlyBookies() throws BKException { + public Collection getReadOnlyBookies() throws BKException { return bkc.bookieWatcher.getReadOnlyBookies(); } /** * Notify when the available list of bookies changes. - * This is a one-shot notification. To receive subsequent notifications - * the listener must be registered again. + * Once registered, the listener will be notified when the list of available bookies changes. * * @param listener the listener to notify */ @@ -240,8 +290,7 @@ public void watchWritableBookiesChanged(final RegistrationListener listener) /** * Notify when the available list of read only bookies changes. - * This is a one-shot notification. To receive subsequent notifications - * the listener must be registered again. + * Once registered, the listener will be notified when the list of available bookies changes. * * @param listener the listener to notify */ @@ -392,12 +441,11 @@ public boolean hasNext() { if (currentEntry != null) { return true; } - if (lastEntryId == -1 || nextEntryId <= lastEntryId) { + if ((lastEntryId == -1 || nextEntryId <= lastEntryId) && nextEntryId <= handle.getLastAddConfirmed()) { try { CompletableFuture> result = new CompletableFuture<>(); - handle.asyncReadEntriesInternal(nextEntryId, nextEntryId, - new SyncReadCallback(result), null, false); + new SyncReadCallback(result), null, false); currentEntry = SyncCallbackUtils.waitForResult(result).nextElement(); @@ -409,7 +457,7 @@ public boolean hasNext() { close(); return false; } - LOG.error("Error reading entry {} from ledger {}", new Object[] { nextEntryId, ledgerId }, e); + LOG.error("Error reading entry {} from ledger {}", nextEntryId, ledgerId, e); close(); throw new RuntimeException(e); } @@ -456,7 +504,7 @@ public SyncObject() { } } - public SortedMap getLedgersContainBookies(Set bookies) + public SortedMap getLedgersContainBookies(Set bookies) throws InterruptedException, BKException { final SyncObject sync = new SyncObject(); final AtomicReference> resultHolder = @@ -484,32 +532,32 @@ public void operationComplete(int rc, SortedMap result) { return resultHolder.get(); } - public void asyncGetLedgersContainBookies(final Set bookies, + public void asyncGetLedgersContainBookies(final Set bookies, final GenericCallback> callback) { final SortedMap ledgers = new ConcurrentSkipListMap(); bkc.getLedgerManager().asyncProcessLedgers(new Processor() { @Override public void process(final Long lid, final AsyncCallback.VoidCallback cb) { - bkc.getLedgerManager().readLedgerMetadata(lid, new GenericCallback() { - @Override - public void operationComplete(int rc, LedgerMetadata metadata) { - if (BKException.Code.NoSuchLedgerExistsException == rc) { - // the ledger was deleted during this iteration. - cb.processResult(BKException.Code.OK, null, null); - return; - } else if (BKException.Code.OK != rc) { - cb.processResult(rc, null, null); - return; - } - Set bookiesInLedger = metadata.getBookiesInThisLedger(); - Sets.SetView intersection = + bkc.getLedgerManager().readLedgerMetadata(lid) + .whenComplete((metadata, exception) -> { + if (BKException.getExceptionCode(exception) + == BKException.Code.NoSuchLedgerExistsOnMetadataServerException) { + // the ledger was deleted during this iteration. + cb.processResult(BKException.Code.OK, null, null); + return; + } else if (exception != null) { + cb.processResult(BKException.getExceptionCode(exception), null, null); + return; + } + Set bookiesInLedger = + LedgerMetadataUtils.getBookiesInThisLedger(metadata.getValue()); + Sets.SetView intersection = Sets.intersection(bookiesInLedger, bookies); - if (!intersection.isEmpty()) { - ledgers.put(lid, metadata); - } - cb.processResult(BKException.Code.OK, null, null); - } - }); + if (!intersection.isEmpty()) { + ledgers.put(lid, metadata.getValue()); + } + cb.processResult(BKException.Code.OK, null, null); + }); } }, new AsyncCallback.VoidCallback() { @Override @@ -534,22 +582,27 @@ public void processResult(int rc, String path, Object ctx) { * Source bookie that had a failure. We want to replicate the * ledger fragments that were stored there. */ - public void recoverBookieData(final BookieSocketAddress bookieSrc) + public void recoverBookieData(final BookieId bookieSrc) throws InterruptedException, BKException { - Set bookiesSrc = Sets.newHashSet(bookieSrc); + Set bookiesSrc = Sets.newHashSet(bookieSrc); recoverBookieData(bookiesSrc); } - public void recoverBookieData(final Set bookiesSrc) + public void recoverBookieData(final Set bookiesSrc) throws InterruptedException, BKException { recoverBookieData(bookiesSrc, false, false); } - public void recoverBookieData(final Set bookiesSrc, boolean dryrun, boolean skipOpenLedgers) - throws InterruptedException, BKException { + public void recoverBookieData(final Set bookiesSrc, boolean dryrun, boolean skipOpenLedgers) + throws InterruptedException, BKException { + recoverBookieData(bookiesSrc, dryrun, skipOpenLedgers, false); + } + + public void recoverBookieData(final Set bookiesSrc, boolean dryrun, boolean skipOpenLedgers, + boolean skipUnrecoverableLedgers) throws InterruptedException, BKException { SyncObject sync = new SyncObject(); // Call the async method to recover bookie data. - asyncRecoverBookieData(bookiesSrc, dryrun, skipOpenLedgers, new RecoverCallback() { + asyncRecoverBookieData(bookiesSrc, dryrun, skipOpenLedgers, skipUnrecoverableLedgers, new RecoverCallback() { @Override public void recoverComplete(int rc, Object ctx) { LOG.info("Recover bookie operation completed with rc: {}", BKException.codeLogger(rc)); @@ -574,7 +627,7 @@ public void recoverComplete(int rc, Object ctx) { } public void recoverBookieData(final long lid, - final Set bookiesSrc, + final Set bookiesSrc, boolean dryrun, boolean skipOpenLedgers) throws InterruptedException, BKException { @@ -621,20 +674,21 @@ public void recoverBookieData(final long lid, * @param context * Context for the RecoverCallback to call. */ - public void asyncRecoverBookieData(final BookieSocketAddress bookieSrc, + public void asyncRecoverBookieData(final BookieId bookieSrc, final RecoverCallback cb, final Object context) { - Set bookiesSrc = Sets.newHashSet(bookieSrc); + Set bookiesSrc = Sets.newHashSet(bookieSrc); asyncRecoverBookieData(bookiesSrc, cb, context); } - public void asyncRecoverBookieData(final Set bookieSrc, + public void asyncRecoverBookieData(final Set bookieSrc, final RecoverCallback cb, final Object context) { - asyncRecoverBookieData(bookieSrc, false, false, cb, context); + asyncRecoverBookieData(bookieSrc, false, false, false, cb, context); } - public void asyncRecoverBookieData(final Set bookieSrc, boolean dryrun, - final boolean skipOpenLedgers, final RecoverCallback cb, final Object context) { - getActiveLedgers(bookieSrc, dryrun, skipOpenLedgers, cb, context); + public void asyncRecoverBookieData(final Set bookieSrc, boolean dryrun, + final boolean skipOpenLedgers, final boolean skipUnrecoverableLedgers, + final RecoverCallback cb, final Object context) { + getActiveLedgers(bookieSrc, dryrun, skipOpenLedgers, skipUnrecoverableLedgers, cb, context); } /** @@ -654,7 +708,7 @@ public void asyncRecoverBookieData(final Set bookieSrc, boo * @param context * Context for the RecoverCallback to call. */ - public void asyncRecoverBookieData(long lid, final Set bookieSrc, boolean dryrun, + public void asyncRecoverBookieData(long lid, final Set bookieSrc, boolean dryrun, boolean skipOpenLedgers, final RecoverCallback callback, final Object context) { AsyncCallback.VoidCallback callbackWrapper = (rc, path, ctx) -> callback.recoverComplete(bkc.getReturnRc(rc), context); @@ -680,8 +734,9 @@ public void asyncRecoverBookieData(long lid, final Set book * @param context * Context for the RecoverCallback to call. */ - private void getActiveLedgers(final Set bookiesSrc, final boolean dryrun, - final boolean skipOpenLedgers, final RecoverCallback cb, final Object context) { + private void getActiveLedgers(final Set bookiesSrc, final boolean dryrun, + final boolean skipOpenLedgers, final boolean skipUnrecoverableLedgers, + final RecoverCallback cb, final Object context) { // Wrapper class around the RecoverCallback so it can be used // as the final VoidCallback to process ledgers class RecoverCallbackWrapper implements AsyncCallback.VoidCallback { @@ -700,7 +755,7 @@ public void processResult(int rc, String path, Object ctx) { Processor ledgerProcessor = new Processor() { @Override public void process(Long ledgerId, AsyncCallback.VoidCallback iterCallback) { - recoverLedger(bookiesSrc, ledgerId, dryrun, skipOpenLedgers, iterCallback); + recoverLedger(bookiesSrc, ledgerId, dryrun, skipOpenLedgers, skipUnrecoverableLedgers, iterCallback); } }; bkc.getLedgerManager().asyncProcessLedgers( @@ -725,8 +780,33 @@ ledgerProcessor, new RecoverCallbackWrapper(cb), * IterationCallback to invoke once we've recovered the current * ledger. */ - private void recoverLedger(final Set bookiesSrc, final long lId, final boolean dryrun, + private void recoverLedger(final Set bookiesSrc, final long lId, final boolean dryrun, final boolean skipOpenLedgers, final AsyncCallback.VoidCallback finalLedgerIterCb) { + recoverLedger(bookiesSrc, lId, dryrun, skipOpenLedgers, false, finalLedgerIterCb); + } + + /** + * This method asynchronously recovers a given ledger if any of the ledger + * entries were stored on the failed bookie. + * + * @param bookiesSrc + * Source bookies that had a failure. We want to replicate the + * ledger fragments that were stored there. + * @param lId + * Ledger id we want to recover. + * @param dryrun + * printing the recovery plan without actually recovering bookies + * @param skipOpenLedgers + * Skip recovering open ledgers. + * @param skipUnrecoverableLedgers + * Skip unrecoverable ledgers. + * @param finalLedgerIterCb + * IterationCallback to invoke once we've recovered the current + * ledger. + */ + private void recoverLedger(final Set bookiesSrc, final long lId, final boolean dryrun, + final boolean skipOpenLedgers, final boolean skipUnrecoverableLedgers, + final AsyncCallback.VoidCallback finalLedgerIterCb) { if (LOG.isDebugEnabled()) { LOG.debug("Recovering ledger : {}", lId); } @@ -735,13 +815,18 @@ private void recoverLedger(final Set bookiesSrc, final long @Override public void openComplete(int rc, final LedgerHandle lh, Object ctx) { if (rc != BKException.Code.OK) { - LOG.error("BK error opening ledger: " + lId, BKException.create(rc)); - finalLedgerIterCb.processResult(rc, null, null); + if (skipUnrecoverableLedgers) { + LOG.warn("BK error opening ledger: {}, skip recover it.", lId, BKException.create(rc)); + finalLedgerIterCb.processResult(BKException.Code.OK, null, null); + } else { + LOG.error("BK error opening ledger: {}", lId, BKException.create(rc)); + finalLedgerIterCb.processResult(rc, null, null); + } return; } LedgerMetadata lm = lh.getLedgerMetadata(); - if (skipOpenLedgers && !lm.isClosed() && !lm.isInRecovery()) { + if (skipOpenLedgers && lm.getState() == LedgerMetadata.State.OPEN) { LOG.info("Skip recovering open ledger {}.", lId); try { lh.close(); @@ -770,13 +855,20 @@ public void openComplete(int rc, final LedgerHandle lh, Object ctx) { @Override public void openComplete(int newrc, final LedgerHandle newlh, Object newctx) { if (newrc != BKException.Code.OK) { - LOG.error("BK error close ledger: " + lId, BKException.create(newrc)); - finalLedgerIterCb.processResult(newrc, null, null); + if (skipUnrecoverableLedgers) { + LOG.warn("BK error opening ledger: {}, skip recover it.", + lId, BKException.create(newrc)); + finalLedgerIterCb.processResult(BKException.Code.OK, null, null); + } else { + LOG.error("BK error close ledger: {}", lId, BKException.create(newrc)); + finalLedgerIterCb.processResult(newrc, null, null); + } return; } bkc.mainWorkerPool.submit(() -> { // do recovery - recoverLedger(bookiesSrc, lId, dryrun, skipOpenLedgers, finalLedgerIterCb); + recoverLedger(bookiesSrc, lId, dryrun, skipOpenLedgers, + skipUnrecoverableLedgers, finalLedgerIterCb); }); } }, null); @@ -787,7 +879,13 @@ public void openComplete(int newrc, final LedgerHandle newlh, Object newctx) { @Override public void processResult(int rc, String path, Object ctx) { if (BKException.Code.OK != rc) { - LOG.error("Failed to recover ledger {} : {}", lId, BKException.codeLogger(rc)); + if (skipUnrecoverableLedgers) { + LOG.warn("Failed to recover ledger: {} : {}, skip recover it.", lId, + BKException.codeLogger(rc)); + rc = BKException.Code.OK; + } else { + LOG.error("Failed to recover ledger {} : {}", lId, BKException.codeLogger(rc)); + } } else { LOG.info("Recovered ledger {}.", lId); } @@ -818,8 +916,8 @@ public void processResult(int rc, String path, Object ctx) { */ Map ledgerFragmentsRange = new HashMap(); Long curEntryId = null; - for (Map.Entry> entry : lh.getLedgerMetadata().getEnsembles() - .entrySet()) { + for (Map.Entry> entry : + lh.getLedgerMetadata().getAllEnsembles().entrySet()) { if (curEntryId != null) { ledgerFragmentsRange.put(curEntryId, entry.getKey() - 1); } @@ -864,9 +962,9 @@ public void processResult(int rc, String path, Object ctx) { */ for (final Long startEntryId : ledgerFragmentsToRecover) { Long endEntryId = ledgerFragmentsRange.get(startEntryId); - List ensemble = lh.getLedgerMetadata().getEnsembles().get(startEntryId); + List ensemble = lh.getLedgerMetadata().getAllEnsembles().get(startEntryId); // Get bookies to replace - Map targetBookieAddresses; + Map targetBookieAddresses; try { targetBookieAddresses = getReplacementBookies(lh, ensemble, bookiesSrc); } catch (BKException.BKNotEnoughBookiesException e) { @@ -880,7 +978,7 @@ public void processResult(int rc, String path, Object ctx) { } if (dryrun) { - ArrayList newEnsemble = + ArrayList newEnsemble = replaceBookiesInEnsemble(ensemble, targetBookieAddresses); VERBOSE.info(" Fragment [{} - {}] : ", startEntryId, endEntryId); VERBOSE.info(" old ensemble : {}", formatEnsemble(ensemble, bookiesSrc, '*')); @@ -893,12 +991,12 @@ public void processResult(int rc, String path, Object ctx) { try { LedgerFragmentReplicator.SingleFragmentCallback cb = new LedgerFragmentReplicator.SingleFragmentCallback(ledgerFragmentsMcb, lh, - bkc.getMainWorkerPool(), + bkc.getLedgerManager(), startEntryId, getReplacementBookiesMap(ensemble, targetBookieAddresses)); LedgerFragment ledgerFragment = new LedgerFragment(lh, startEntryId, endEntryId, targetBookieAddresses.keySet()); asyncRecoverLedgerFragment(lh, ledgerFragment, cb, - Sets.newHashSet(targetBookieAddresses.values())); + Sets.newHashSet(targetBookieAddresses.values()), NOOP_BICONSUMER); } catch (InterruptedException e) { Thread.currentThread().interrupt(); return; @@ -912,7 +1010,7 @@ public void processResult(int rc, String path, Object ctx) { }, null); } - static String formatEnsemble(List ensemble, Set bookiesSrc, + static String formatEnsemble(List ensemble, Set bookiesSrc, char marker) { StringBuilder sb = new StringBuilder(); sb.append("["); @@ -950,18 +1048,19 @@ static String formatEnsemble(List ensemble, Set newBookies) throws InterruptedException { - lfr.replicate(lh, ledgerFragment, ledgerFragmentMcb, newBookies); + final Set newBookies, + final BiConsumer onReadEntryFailureCallback) throws InterruptedException { + lfr.replicate(lh, ledgerFragment, ledgerFragmentMcb, newBookies, onReadEntryFailureCallback); } - private Map getReplacementBookies( + private Map getReplacementBookies( LedgerHandle lh, - List ensemble, - Set bookiesToRereplicate) + List ensemble, + Set bookiesToRereplicate) throws BKException.BKNotEnoughBookiesException { Set bookieIndexesToRereplicate = Sets.newHashSet(); for (int bookieIndex = 0; bookieIndex < ensemble.size(); bookieIndex++) { - BookieSocketAddress bookieInEnsemble = ensemble.get(bookieIndex); + BookieId bookieInEnsemble = ensemble.get(bookieIndex); if (bookiesToRereplicate.contains(bookieInEnsemble)) { bookieIndexesToRereplicate.add(bookieIndex); } @@ -970,39 +1069,47 @@ private Map getReplacementBookies( lh, ensemble, bookieIndexesToRereplicate, Optional.of(bookiesToRereplicate)); } - private Map getReplacementBookiesByIndexes( + private Map getReplacementBookiesByIndexes( LedgerHandle lh, - List ensemble, + List ensemble, Set bookieIndexesToRereplicate, - Optional> excludedBookies) + Optional> excludedBookies) throws BKException.BKNotEnoughBookiesException { // target bookies to replicate - Map targetBookieAddresses = + Map targetBookieAddresses = Maps.newHashMapWithExpectedSize(bookieIndexesToRereplicate.size()); // bookies to exclude for ensemble allocation - Set bookiesToExclude = Sets.newHashSet(); + Set bookiesToExclude = Sets.newHashSet(); if (excludedBookies.isPresent()) { bookiesToExclude.addAll(excludedBookies.get()); } // excluding bookies that need to be replicated for (Integer bookieIndex : bookieIndexesToRereplicate) { - BookieSocketAddress bookie = ensemble.get(bookieIndex); + BookieId bookie = ensemble.get(bookieIndex); bookiesToExclude.add(bookie); } // allocate bookies for (Integer bookieIndex : bookieIndexesToRereplicate) { - BookieSocketAddress oldBookie = ensemble.get(bookieIndex); - BookieSocketAddress newBookie = + BookieId oldBookie = ensemble.get(bookieIndex); + EnsemblePlacementPolicy.PlacementResult replaceBookieResponse = bkc.getPlacementPolicy().replaceBookie( lh.getLedgerMetadata().getEnsembleSize(), lh.getLedgerMetadata().getWriteQuorumSize(), lh.getLedgerMetadata().getAckQuorumSize(), lh.getLedgerMetadata().getCustomMetadata(), - new HashSet<>(ensemble), + ensemble, oldBookie, bookiesToExclude); + BookieId newBookie = replaceBookieResponse.getResult(); + PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy = replaceBookieResponse.getAdheringToPolicy(); + if (isEnsembleAdheringToPlacementPolicy == PlacementPolicyAdherence.FAIL && LOG.isDebugEnabled()) { + LOG.debug( + "replaceBookie for bookie: {} in ensemble: {} " + + "is not adhering to placement policy and chose {}", + oldBookie, ensemble, newBookie); + } targetBookieAddresses.put(bookieIndex, newBookie); bookiesToExclude.add(newBookie); } @@ -1010,11 +1117,11 @@ private Map getReplacementBookiesByIndexes( return targetBookieAddresses; } - private ArrayList replaceBookiesInEnsemble( - List ensemble, - Map replacedBookies) { - ArrayList newEnsemble = Lists.newArrayList(ensemble); - for (Map.Entry entry : replacedBookies.entrySet()) { + private ArrayList replaceBookiesInEnsemble( + List ensemble, + Map replacedBookies) { + ArrayList newEnsemble = Lists.newArrayList(ensemble); + for (Map.Entry entry : replacedBookies.entrySet()) { newEnsemble.set(entry.getKey(), entry.getValue()); } return newEnsemble; @@ -1028,32 +1135,43 @@ private ArrayList replaceBookiesInEnsemble( * @param ledgerFragment * - LedgerFragment to replicate */ - public void replicateLedgerFragment(LedgerHandle lh, - final LedgerFragment ledgerFragment) - throws InterruptedException, BKException { - Optional> excludedBookies = Optional.empty(); - Map targetBookieAddresses = - getReplacementBookiesByIndexes(lh, ledgerFragment.getEnsemble(), - ledgerFragment.getBookiesIndexes(), excludedBookies); - replicateLedgerFragment(lh, ledgerFragment, targetBookieAddresses); + public void replicateLedgerFragment(LedgerHandle lh, final LedgerFragment ledgerFragment, + final BiConsumer onReadEntryFailureCallback) throws InterruptedException, BKException { + Map targetBookieAddresses = null; + if (LedgerFragment.ReplicateType.DATA_LOSS == ledgerFragment.getReplicateType()) { + Optional> excludedBookies = Optional.empty(); + targetBookieAddresses = getReplacementBookiesByIndexes(lh, ledgerFragment.getEnsemble(), + ledgerFragment.getBookiesIndexes(), excludedBookies); + } else if (LedgerFragment.ReplicateType.DATA_NOT_ADHERING_PLACEMENT == ledgerFragment.getReplicateType()) { + targetBookieAddresses = replaceNotAdheringPlacementPolicyBookie(ledgerFragment.getEnsemble(), + lh.getLedgerMetadata().getWriteQuorumSize(), lh.getLedgerMetadata().getAckQuorumSize()); + ledgerFragment.getBookiesIndexes().addAll(targetBookieAddresses.keySet()); + } + if (MapUtils.isEmpty(targetBookieAddresses)) { + LOG.warn("Could not replicate for {} ledger: {}, not find target bookie.", + ledgerFragment.getReplicateType(), ledgerFragment.getLedgerId()); + throw new BKException.BKLedgerRecoveryException(); + } + replicateLedgerFragment(lh, ledgerFragment, targetBookieAddresses, onReadEntryFailureCallback); } private void replicateLedgerFragment(LedgerHandle lh, final LedgerFragment ledgerFragment, - final Map targetBookieAddresses) + final Map targetBookieAddresses, + final BiConsumer onReadEntryFailureCallback) throws InterruptedException, BKException { CompletableFuture result = new CompletableFuture<>(); ResultCallBack resultCallBack = new ResultCallBack(result); SingleFragmentCallback cb = new SingleFragmentCallback( resultCallBack, lh, - bkc.getMainWorkerPool(), + bkc.getLedgerManager(), ledgerFragment.getFirstEntryId(), getReplacementBookiesMap(ledgerFragment, targetBookieAddresses)); - Set targetBookieSet = Sets.newHashSet(); + Set targetBookieSet = Sets.newHashSet(); targetBookieSet.addAll(targetBookieAddresses.values()); - asyncRecoverLedgerFragment(lh, ledgerFragment, cb, targetBookieSet); + asyncRecoverLedgerFragment(lh, ledgerFragment, cb, targetBookieSet, onReadEntryFailureCallback); try { SyncCallbackUtils.waitForResult(result); @@ -1062,45 +1180,45 @@ private void replicateLedgerFragment(LedgerHandle lh, } } - private static Map getReplacementBookiesMap( - List ensemble, - Map targetBookieAddresses) { - Map bookiesMap = - new HashMap(); - for (Map.Entry entry : targetBookieAddresses.entrySet()) { - BookieSocketAddress oldBookie = ensemble.get(entry.getKey()); - BookieSocketAddress newBookie = entry.getValue(); + private static Map getReplacementBookiesMap( + List ensemble, + Map targetBookieAddresses) { + Map bookiesMap = + new HashMap(); + for (Map.Entry entry : targetBookieAddresses.entrySet()) { + BookieId oldBookie = ensemble.get(entry.getKey()); + BookieId newBookie = entry.getValue(); bookiesMap.put(oldBookie, newBookie); } return bookiesMap; } - private static Map getReplacementBookiesMap( + private static Map getReplacementBookiesMap( LedgerFragment ledgerFragment, - Map targetBookieAddresses) { - Map bookiesMap = - new HashMap(); + Map targetBookieAddresses) { + Map bookiesMap = + new HashMap(); for (Integer bookieIndex : ledgerFragment.getBookiesIndexes()) { - BookieSocketAddress oldBookie = ledgerFragment.getAddress(bookieIndex); - BookieSocketAddress newBookie = targetBookieAddresses.get(bookieIndex); + BookieId oldBookie = ledgerFragment.getAddress(bookieIndex); + BookieId newBookie = targetBookieAddresses.get(bookieIndex); bookiesMap.put(oldBookie, newBookie); } return bookiesMap; } private static boolean containBookiesInLastEnsemble(LedgerMetadata lm, - Set bookies) { - if (lm.getEnsembles().size() <= 0) { + Set bookies) { + if (lm.getAllEnsembles().size() <= 0) { return false; } - Long lastKey = lm.getEnsembles().lastKey(); - List lastEnsemble = lm.getEnsembles().get(lastKey); + Long lastKey = lm.getAllEnsembles().lastKey(); + List lastEnsemble = lm.getAllEnsembles().get(lastKey); return containBookies(lastEnsemble, bookies); } - private static boolean containBookies(List ensemble, - Set bookies) { - for (BookieSocketAddress bookie : ensemble) { + private static boolean containBookies(List ensemble, + Set bookies) { + for (BookieId bookie : ensemble) { if (bookies.contains(bookie)) { return true; } @@ -1111,7 +1229,7 @@ private static boolean containBookies(List ensemble, /** * This is the class for getting the replication result. */ - static class ResultCallBack implements AsyncCallback.VoidCallback { + public static class ResultCallBack implements AsyncCallback.VoidCallback { private final CompletableFuture sync; public ResultCallBack(CompletableFuture sync) { @@ -1138,46 +1256,48 @@ public void processResult(int rc, String s, Object ctx) { */ public static boolean format(ServerConfiguration conf, boolean isInteractive, boolean force) throws Exception { - return runFunctionWithMetadataBookieDriver(conf, driver -> { - try { - boolean ledgerRootExists = driver.getRegistrationManager().prepareFormat(); - - // If old data was there then confirm with admin. - boolean doFormat = true; - if (ledgerRootExists) { - if (!isInteractive) { - // If non interactive and force is set, then delete old data. - if (force) { - doFormat = true; - } else { - doFormat = false; + return runFunctionWithMetadataBookieDriver(conf, new Function() { + @Override + @SuppressFBWarnings("RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE") + public Boolean apply(MetadataBookieDriver driver) { + try { + try (RegistrationManager regManager = driver.createRegistrationManager()) { + boolean ledgerRootExists = regManager.prepareFormat(); + + // If old data was there then confirm with admin. + boolean doFormat = true; + if (ledgerRootExists) { + if (!isInteractive) { + // If non interactive and force is set, then delete old data. + doFormat = force; + } else { + // Confirm with the admin. + doFormat = IOUtils + .confirmPrompt("Ledger root already exists. " + + "Are you sure to format bookkeeper metadata? " + + "This may cause data loss."); + } } - } else { - // Confirm with the admin. - doFormat = IOUtils - .confirmPrompt("Ledger root already exists. " - + "Are you sure to format bookkeeper metadata? " - + "This may cause data loss."); - } - } - if (!doFormat) { - return false; - } + if (!doFormat) { + return false; + } - driver.getLedgerManagerFactory().format( - conf, - driver.getLayoutManager()); + driver.getLedgerManagerFactory().format( + conf, + driver.getLayoutManager()); - return driver.getRegistrationManager().format(); - } catch (Exception e) { - throw new UncheckedExecutionException(e.getMessage(), e); + return regManager.format(); + } + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } } }); } /** - * Intializes new cluster by creating required znodes for the cluster. If + * Initializes new cluster by creating required znodes for the cluster. If * ledgersrootpath is already existing then it will error out. * * @param conf @@ -1268,7 +1388,7 @@ public static boolean initBookie(ServerConfiguration conf) throws Exception { * make sure that there is no bookie registered with the same * bookieid and the cookie for the same bookieid is not existing. */ - String bookieId = Bookie.getBookieAddress(conf).toString(); + BookieId bookieId = BookieImpl.getBookieId(conf); if (rm.isBookieRegistered(bookieId)) { LOG.error("Bookie with bookieId: {} is still registered, " + "If this node is running bookie process, try stopping it first.", bookieId); @@ -1311,8 +1431,9 @@ private static boolean validateDirectoriesAreEmpty(File[] dirs, String typeOfDir */ public Iterable listLedgers() throws IOException { - final LedgerRangeIterator iterator = bkc.getLedgerManager().getLedgerRanges(); + final LedgerRangeIterator iterator = bkc.getLedgerManager().getLedgerRanges(0); return new Iterable() { + @Override public Iterator iterator() { return new Iterator() { Iterator currentRange = null; @@ -1366,13 +1487,21 @@ public LedgerMetadata getLedgerMetadata(LedgerHandle lh) { } private LedgerUnderreplicationManager getUnderreplicationManager() - throws CompatibilityException, KeeperException, InterruptedException { + throws CompatibilityException, UnavailableException, InterruptedException { if (underreplicationManager == null) { underreplicationManager = mFactory.newLedgerUnderreplicationManager(); } return underreplicationManager; } + private LedgerAuditorManager getLedgerAuditorManager() + throws IOException, InterruptedException { + if (ledgerAuditorManager == null) { + ledgerAuditorManager = mFactory.newLedgerAuditorManager(); + } + return ledgerAuditorManager; + } + /** * Setter for LostBookieRecoveryDelay value (in seconds) in Zookeeper. * @@ -1424,8 +1553,7 @@ public void triggerAudit() throw new UnavailableException("Autorecovery is disabled. So giving up!"); } - BookieSocketAddress auditorId = - AuditorElector.getCurrentAuditor(new ServerConfiguration(bkc.getConf()), bkc.getZkHandle()); + BookieId auditorId = getLedgerAuditorManager().getCurrentAuditor(); if (auditorId == null) { LOG.error("No auditor elected, though Autorecovery is enabled. So giving up."); throw new UnavailableException("No auditor elected, though Autorecovery is enabled. So giving up."); @@ -1441,7 +1569,7 @@ public void triggerAudit() * Triggers AuditTask by resetting lostBookieRecoveryDelay and then make * sure the ledgers stored in the given decommissioning bookie are properly * replicated and they are not underreplicated because of the given bookie. - * This method waits untill there are no underreplicatedledgers because of this + * This method waits until there are no underreplicatedledgers because of this * bookie. If the given Bookie is not shutdown yet, then it will throw * BKIllegalOpException. * @@ -1456,7 +1584,7 @@ public void triggerAudit() * @throws TimeoutException * @throws BKException */ - public void decommissionBookie(BookieSocketAddress bookieAddress) + public void decommissionBookie(BookieId bookieAddress) throws CompatibilityException, UnavailableException, KeeperException, InterruptedException, IOException, BKAuditException, TimeoutException, BKException { if (getAvailableBookies().contains(bookieAddress) || getReadOnlyBookies().contains(bookieAddress)) { @@ -1484,7 +1612,7 @@ public void decommissionBookie(BookieSocketAddress bookieAddress) Set ledgersStoredInThisBookie = bookieToLedgersMap.get(bookieAddress.toString()); if ((ledgersStoredInThisBookie != null) && (!ledgersStoredInThisBookie.isEmpty())) { /* - * wait untill all the ledgers are replicated to other + * wait until all the ledgers are replicated to other * bookies by making sure that these ledgers metadata don't * contain this bookie as part of their ensemble. */ @@ -1506,70 +1634,77 @@ public void decommissionBookie(BookieSocketAddress bookieAddress) } } - private void waitForLedgersToBeReplicated(Collection ledgers, BookieSocketAddress thisBookieAddress, + private void waitForLedgersToBeReplicated(Collection ledgers, BookieId thisBookieAddress, LedgerManager ledgerManager) throws InterruptedException, TimeoutException { - int maxSleepTimeInBetweenChecks = 10 * 60 * 1000; // 10 minutes - int sleepTimePerLedger = 10 * 1000; // 10 secs + int maxSleepTimeInBetweenChecks = 5 * 60 * 1000; // 5 minutes + int sleepTimePerLedger = 3 * 1000; // 3 secs Predicate validateBookieIsNotPartOfEnsemble = ledgerId -> !areEntriesOfLedgerStoredInTheBookie(ledgerId, thisBookieAddress, ledgerManager); + ledgers.removeIf(validateBookieIsNotPartOfEnsemble); + while (!ledgers.isEmpty()) { - LOG.info("Count of Ledgers which need to be rereplicated: {}", ledgers.size()); - int sleepTimeForThisCheck = ledgers.size() * sleepTimePerLedger > maxSleepTimeInBetweenChecks + int sleepTimeForThisCheck = (long) ledgers.size() * sleepTimePerLedger > maxSleepTimeInBetweenChecks ? maxSleepTimeInBetweenChecks : ledgers.size() * sleepTimePerLedger; + LOG.info("Count of Ledgers which need to be rereplicated: {}, waiting {} seconds for next check", + ledgers.size(), sleepTimeForThisCheck / 1000); Thread.sleep(sleepTimeForThisCheck); - LOG.debug("Making sure following ledgers replication to be completed: {}", ledgers); + if (LOG.isDebugEnabled()) { + LOG.debug("Making sure following ledgers replication to be completed: {}", ledgers); + } ledgers.removeIf(validateBookieIsNotPartOfEnsemble); } } - public static boolean areEntriesOfLedgerStoredInTheBookie(long ledgerId, BookieSocketAddress bookieAddress, + public static boolean areEntriesOfLedgerStoredInTheBookie(long ledgerId, BookieId bookieAddress, LedgerManager ledgerManager) { - ReadMetadataCallback cb = new ReadMetadataCallback(ledgerId); - ledgerManager.readLedgerMetadata(ledgerId, cb); try { - LedgerMetadata ledgerMetadata = cb.get(); + LedgerMetadata ledgerMetadata = ledgerManager.readLedgerMetadata(ledgerId).get().getValue(); return areEntriesOfLedgerStoredInTheBookie(ledgerId, bookieAddress, ledgerMetadata); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); throw new RuntimeException(ie); } catch (ExecutionException e) { if (e.getCause() != null - && e.getCause().getClass().equals(BKException.BKNoSuchLedgerExistsException.class)) { - LOG.debug("Ledger: {} has been deleted", ledgerId); + && e.getCause().getClass() + .equals(BKException.BKNoSuchLedgerExistsOnMetadataServerException.class)) { + if (LOG.isDebugEnabled()) { + LOG.debug("Ledger: {} has been deleted", ledgerId); + } return false; } else { - LOG.error("Got exception while trying to read LedgerMeatadata of " + ledgerId, e); + LOG.error("Got exception while trying to read LedgerMetadata of " + ledgerId, e); throw new RuntimeException(e); } } } - public static boolean areEntriesOfLedgerStoredInTheBookie(long ledgerId, BookieSocketAddress bookieAddress, + public static boolean areEntriesOfLedgerStoredInTheBookie(long ledgerId, BookieId bookieAddress, LedgerMetadata ledgerMetadata) { - Collection> ensemblesOfSegments = ledgerMetadata.getEnsembles().values(); - Iterator> ensemblesOfSegmentsIterator = ensemblesOfSegments.iterator(); - List ensemble; + Collection> ensemblesOfSegments = ledgerMetadata.getAllEnsembles().values(); + Iterator> ensemblesOfSegmentsIterator = ensemblesOfSegments.iterator(); + List ensemble; int segmentNo = 0; while (ensemblesOfSegmentsIterator.hasNext()) { ensemble = ensemblesOfSegmentsIterator.next(); if (ensemble.contains(bookieAddress)) { - if (areEntriesOfSegmentStoredInTheBookie(ledgerMetadata, bookieAddress, segmentNo++)) { + if (areEntriesOfSegmentStoredInTheBookie(ledgerMetadata, bookieAddress, segmentNo)) { return true; } } + segmentNo++; } return false; } private static boolean areEntriesOfSegmentStoredInTheBookie(LedgerMetadata ledgerMetadata, - BookieSocketAddress bookieAddress, int segmentNo) { + BookieId bookieAddress, int segmentNo) { boolean isLedgerClosed = ledgerMetadata.isClosed(); int ensembleSize = ledgerMetadata.getEnsembleSize(); int writeQuorumSize = ledgerMetadata.getWriteQuorumSize(); - List>> segments = - new LinkedList<>(ledgerMetadata.getEnsembles().entrySet()); - + List>> segments = + new LinkedList<>(ledgerMetadata.getAllEnsembles().entrySet()); + List currentSegmentEnsemble = segments.get(segmentNo).getValue(); boolean lastSegment = (segmentNo == (segments.size() - 1)); /* @@ -1585,6 +1720,14 @@ private static boolean areEntriesOfSegmentStoredInTheBookie(LedgerMetadata ledge return false; } + /* + * If current segment ensemble doesn't contain this bookie then return + * false. + */ + if (!currentSegmentEnsemble.contains(bookieAddress)) { + return false; + } + /* * if ensembleSize is equal to writeQuorumSize, then ofcourse all * the entries of this segment are supposed to be stored in this @@ -1618,7 +1761,7 @@ private static boolean areEntriesOfSegmentStoredInTheBookie(LedgerMetadata ledge DistributionSchedule distributionSchedule = new RoundRobinDistributionSchedule( ledgerMetadata.getWriteQuorumSize(), ledgerMetadata.getAckQuorumSize(), ledgerMetadata.getEnsembleSize()); - List currentSegmentEnsemble = segments.get(segmentNo).getValue(); + int thisBookieIndexInCurrentEnsemble = currentSegmentEnsemble.indexOf(bookieAddress); long firstEntryId = segments.get(segmentNo).getKey(); long lastEntryId = lastSegment ? ledgerMetadata.getLastEntryId() : segments.get(segmentNo + 1).getKey() - 1; @@ -1636,24 +1779,67 @@ private static boolean areEntriesOfSegmentStoredInTheBookie(LedgerMetadata ledge return firstStoredEntryId != LedgerHandle.INVALID_ENTRY_ID; } - static class ReadMetadataCallback extends AbstractFuture - implements GenericCallback { - final long ledgerId; + /** + * returns boolean value specifying if the ensemble of the segment is + * adhering to the ensemble placement policy for the given writeQuorumSize + * and ackQuorumSize. + * + * @param ensembleBookiesList + * ensemble of the segment + * @param writeQuorumSize + * writeQuorumSize of the ledger + * @param ackQuorumSize + * ackQuorumSize of the ledger + * @return true if the ledger is adhering to + * EnsemblePlacementPolicy + */ + public PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy(List ensembleBookiesList, + int writeQuorumSize, int ackQuorumSize) { + return bkc.getPlacementPolicy().isEnsembleAdheringToPlacementPolicy(ensembleBookiesList, writeQuorumSize, + ackQuorumSize); + } - ReadMetadataCallback(long ledgerId) { - this.ledgerId = ledgerId; + public Map replaceNotAdheringPlacementPolicyBookie(List ensembleBookiesList, + int writeQuorumSize, int ackQuorumSize) { + try { + EnsemblePlacementPolicy.PlacementResult> placementResult = bkc.getPlacementPolicy() + .replaceToAdherePlacementPolicy(ensembleBookiesList.size(), writeQuorumSize, ackQuorumSize, + new HashSet<>(), ensembleBookiesList); + if (PlacementPolicyAdherence.FAIL != placementResult.getAdheringToPolicy()) { + Map targetMap = new HashMap<>(); + List newEnsembles = placementResult.getResult(); + for (int i = 0; i < ensembleBookiesList.size(); i++) { + BookieId originBookie = ensembleBookiesList.get(i); + BookieId newBookie = newEnsembles.get(i); + if (!originBookie.equals(newBookie)) { + targetMap.put(i, newBookie); + } + } + return targetMap; + } + } catch (UnsupportedOperationException e) { + LOG.warn("The placement policy: {} didn't support replaceToAdherePlacementPolicy, " + + "ignore replace not adhere bookie.", bkc.getPlacementPolicy().getClass().getName()); } + return Collections.emptyMap(); + } - long getLedgerId() { - return ledgerId; - } + /** + * Makes async request for getting list of entries of ledger from a bookie + * and returns Future for the result. + * + * @param address + * BookieId of the bookie + * @param ledgerId + * ledgerId + * @return returns Future + */ + public CompletableFuture asyncGetListOfEntriesOfLedger(BookieId address, + long ledgerId) { + return bkc.getBookieClient().getListOfEntriesOfLedger(address, ledgerId); + } - public void operationComplete(int rc, LedgerMetadata result) { - if (rc != 0) { - setException(BKException.create(rc)); - } else { - set(result); - } - } + public BookieId getCurrentAuditor() throws IOException, InterruptedException { + return getLedgerAuditorManager().getCurrentAuditor(); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperClientStats.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperClientStats.java index 2fa6753b105..7d58e5fb27b 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperClientStats.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperClientStats.java @@ -21,6 +21,7 @@ package org.apache.bookkeeper.client; +import org.apache.bookkeeper.client.impl.BookKeeperClientStatsImpl; import org.apache.bookkeeper.stats.Counter; import org.apache.bookkeeper.stats.Gauge; import org.apache.bookkeeper.stats.OpStatsLogger; @@ -30,6 +31,8 @@ * List of constants for defining client stats names. */ public interface BookKeeperClientStats { + String CATEGORY_CLIENT = "client"; + String CLIENT_SCOPE = "bookkeeper_client"; // Metadata Operations @@ -61,6 +64,7 @@ public interface BookKeeperClientStats { String GET_BOOKIE_INFO_OP = "GET_BOOKIE_INFO"; String SPECULATIVE_READ_COUNT = "SPECULATIVE_READ_COUNT"; String READ_REQUESTS_REORDERED = "READ_REQUESTS_REORDERED"; + String GET_LIST_OF_ENTRIES_OF_LEDGER_OP = "GET_LIST_OF_ENTRIES_OF_LEDGER"; // per channel stats String CHANNEL_SCOPE = "per_channel_bookie_client"; @@ -78,6 +82,7 @@ public interface BookKeeperClientStats { String TIMEOUT_GET_BOOKIE_INFO = "TIMEOUT_GET_BOOKIE_INFO"; String CHANNEL_START_TLS_OP = "START_TLS"; String CHANNEL_TIMEOUT_START_TLS_OP = "TIMEOUT_START_TLS"; + String TIMEOUT_GET_LIST_OF_ENTRIES_OF_LEDGER = "TIMEOUT_GET_LIST_OF_ENTRIES_OF_LEDGER"; String NETTY_EXCEPTION_CNT = "NETTY_EXCEPTION_CNT"; String CLIENT_CHANNEL_WRITE_WAIT = "CLIENT_CHANNEL_WRITE_WAIT"; @@ -90,6 +95,16 @@ public interface BookKeeperClientStats { String FAILED_CONNECTION_COUNTER = "FAILED_CONNECTION_COUNTER"; String FAILED_TLS_HANDSHAKE_COUNTER = "FAILED_TLS_HANDSHAKE_COUNTER"; + // placementpolicy stats + String NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK = "NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK"; + String WRITE_DELAYED_DUE_TO_NOT_ENOUGH_FAULT_DOMAINS = "WRITE_DELAYED_DUE_TO_NOT_ENOUGH_FAULT_DOMAINS"; + String WRITE_DELAYED_DUE_TO_NOT_ENOUGH_FAULT_DOMAINS_LATENCY = + "WRITE_DELAYED_DUE_TO_NOT_ENOUGH_FAULT_DOMAINS_LATENCY"; + String WRITE_TIMED_OUT_DUE_TO_NOT_ENOUGH_FAULT_DOMAINS = "WRITE_TIME_OUT_DUE_TO_NOT_ENOUGH_FAULT_DOMAINS"; + String NUM_WRITABLE_BOOKIES_IN_DEFAULT_FAULTDOMAIN = "NUM_WRITABLE_BOOKIES_IN_DEFAULT_FAULTDOMAIN"; + + String BOOKIE_LABEL = "bookie"; + OpStatsLogger getCreateOpLogger(); OpStatsLogger getOpenOpLogger(); OpStatsLogger getDeleteOpLogger(); @@ -111,122 +126,13 @@ public interface BookKeeperClientStats { Counter getLacUpdateHitsCounter(); Counter getLacUpdateMissesCounter(); OpStatsLogger getClientChannelWriteWaitLogger(); + OpStatsLogger getWriteDelayedDueToNotEnoughFaultDomainsLatency(); + Counter getWriteDelayedDueToNotEnoughFaultDomains(); + Counter getWriteTimedOutDueToNotEnoughFaultDomains(); void registerPendingAddsGauge(Gauge gauge); static BookKeeperClientStats newInstance(StatsLogger stats) { - OpStatsLogger createOpLogger = stats.getOpStatsLogger(CREATE_OP); - OpStatsLogger deleteOpLogger = stats.getOpStatsLogger(DELETE_OP); - OpStatsLogger openOpLogger = stats.getOpStatsLogger(OPEN_OP); - OpStatsLogger recoverOpLogger = stats.getOpStatsLogger(RECOVER_OP); - OpStatsLogger readOpLogger = stats.getOpStatsLogger(READ_OP); - Counter readOpDmCounter = stats.getCounter(READ_OP_DM); - OpStatsLogger readLacAndEntryOpLogger = stats.getOpStatsLogger(READ_LAST_CONFIRMED_AND_ENTRY); - OpStatsLogger readLacAndEntryRespLogger = stats.getOpStatsLogger(READ_LAST_CONFIRMED_AND_ENTRY_RESPONSE); - OpStatsLogger addOpLogger = stats.getOpStatsLogger(ADD_OP); - OpStatsLogger forceOpLogger = stats.getOpStatsLogger(FORCE_OP); - Counter addOpUrCounter = stats.getCounter(ADD_OP_UR); - OpStatsLogger writeLacOpLogger = stats.getOpStatsLogger(WRITE_LAC_OP); - OpStatsLogger readLacOpLogger = stats.getOpStatsLogger(READ_LAC_OP); - OpStatsLogger recoverAddEntriesStats = stats.getOpStatsLogger(LEDGER_RECOVER_ADD_ENTRIES); - OpStatsLogger recoverReadEntriesStats = stats.getOpStatsLogger(LEDGER_RECOVER_READ_ENTRIES); - - Counter ensembleChangeCounter = stats.getCounter(ENSEMBLE_CHANGES); - Counter lacUpdateHitsCounter = stats.getCounter(LAC_UPDATE_HITS); - Counter lacUpdateMissesCounter = stats.getCounter(LAC_UPDATE_MISSES); - OpStatsLogger clientChannelWriteWaitStats = stats.getOpStatsLogger(CLIENT_CHANNEL_WRITE_WAIT); - - Counter speculativeReadCounter = stats.getCounter(SPECULATIVE_READ_COUNT); - - return new BookKeeperClientStats() { - @Override - public OpStatsLogger getCreateOpLogger() { - return createOpLogger; - } - @Override - public OpStatsLogger getOpenOpLogger() { - return openOpLogger; - } - @Override - public OpStatsLogger getDeleteOpLogger() { - return deleteOpLogger; - } - @Override - public OpStatsLogger getRecoverOpLogger() { - return recoverOpLogger; - } - @Override - public OpStatsLogger getReadOpLogger() { - return readOpLogger; - } - @Override - public OpStatsLogger getReadLacAndEntryOpLogger() { - return readLacAndEntryOpLogger; - } - @Override - public OpStatsLogger getReadLacAndEntryRespLogger() { - return readLacAndEntryRespLogger; - } - @Override - public OpStatsLogger getAddOpLogger() { - return addOpLogger; - } - @Override - public OpStatsLogger getForceOpLogger() { - return forceOpLogger; - } - @Override - public OpStatsLogger getWriteLacOpLogger() { - return writeLacOpLogger; - } - @Override - public OpStatsLogger getReadLacOpLogger() { - return readLacOpLogger; - } - @Override - public OpStatsLogger getRecoverAddCountLogger() { - return recoverAddEntriesStats; - } - @Override - public OpStatsLogger getRecoverReadCountLogger() { - return recoverReadEntriesStats; - } - @Override - public Counter getReadOpDmCounter() { - return readOpDmCounter; - } - @Override - public Counter getAddOpUrCounter() { - return addOpUrCounter; - } - @Override - public Counter getSpeculativeReadCounter() { - return speculativeReadCounter; - } - @Override - public Counter getEnsembleChangeCounter() { - return ensembleChangeCounter; - } - @Override - public Counter getLacUpdateHitsCounter() { - return lacUpdateHitsCounter; - } - @Override - public Counter getLacUpdateMissesCounter() { - return lacUpdateMissesCounter; - } - @Override - public OpStatsLogger getClientChannelWriteWaitLogger() { - return clientChannelWriteWaitStats; - } - @Override - public Counter getEnsembleBookieDistributionCounter(String bookie) { - return stats.getCounter(LEDGER_ENSEMBLE_BOOKIE_DISTRIBUTION + "-" + bookie); - } - @Override - public void registerPendingAddsGauge(Gauge gauge) { - stats.registerGauge(PENDING_ADDS, gauge); - } - }; + return new BookKeeperClientStatsImpl(stats); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieAddressResolverDisabled.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieAddressResolverDisabled.java new file mode 100644 index 00000000000..7fb7e82d755 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieAddressResolverDisabled.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.client; + +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.proto.BookieAddressResolver; + +/** + * Resolve legacy style BookieIDs to Network addresses. + */ +@Slf4j +public final class BookieAddressResolverDisabled implements BookieAddressResolver { + + public BookieAddressResolverDisabled() { + } + + @Override + public BookieSocketAddress resolve(BookieId bookieId) { + return BookieSocketAddress.resolveLegacyBookieId(bookieId); + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieInfoReader.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieInfoReader.java index 112cd4821e1..ea6dea0904e 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieInfoReader.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieInfoReader.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -31,7 +31,7 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.bookkeeper.client.WeightedRandomSelection.WeightedObject; import org.apache.bookkeeper.conf.ClientConfiguration; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookieClient; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GetBookieInfoCallback; import org.apache.bookkeeper.proto.BookkeeperProtocol; @@ -79,6 +79,7 @@ public long getTotalDiskSpace() { public long getWeight() { return freeDiskSpace; } + @Override public String toString() { return "FreeDiskSpace: " + this.freeDiskSpace + " TotalDiskCapacity: " + this.totalDiskSpace; } @@ -94,16 +95,16 @@ private static class BookieInfoMap { * Contains the most recently obtained information on the contained bookies. * When an error happens querying a bookie, the entry is removed. */ - private final Map infoMap = new HashMap<>(); + private final Map infoMap = new HashMap<>(); /** * Contains the most recently reported set of bookies from BookieWatcher * A partial query consists of every member of mostRecentlyReportedBookies * minus the entries in bookieInfoMap. */ - private Collection mostRecentlyReportedBookies = new ArrayList<>(); + private Collection mostRecentlyReportedBookies = new ArrayList<>(); - public void updateBookies(Collection updatedBookieSet) { + public void updateBookies(Collection updatedBookieSet) { if (LOG.isDebugEnabled()) { LOG.debug( "updateBookies: current: {}, new: {}", @@ -114,11 +115,11 @@ public void updateBookies(Collection updatedBookieSet) { } @SuppressWarnings("unchecked") - public Collection getPartialScanTargets() { + public Collection getPartialScanTargets() { return CollectionUtils.subtract(mostRecentlyReportedBookies, infoMap.keySet()); } - public Collection getFullScanTargets() { + public Collection getFullScanTargets() { return mostRecentlyReportedBookies; } @@ -128,7 +129,7 @@ public Collection getFullScanTargets() { * @param bookie bookie for which to get info * @return Info for bookie, null otherwise */ - public BookieInfo getInfo(BookieSocketAddress bookie) { + public BookieInfo getInfo(BookieId bookie) { return infoMap.get(bookie); } @@ -137,7 +138,7 @@ public BookieInfo getInfo(BookieSocketAddress bookie) { * * @param bookie bookie on which we observed an error */ - public void clearInfo(BookieSocketAddress bookie) { + public void clearInfo(BookieId bookie) { infoMap.remove(bookie); } @@ -147,14 +148,14 @@ public void clearInfo(BookieSocketAddress bookie) { * @param bookie bookie for which we obtained new info * @param info the new info */ - public void gotInfo(BookieSocketAddress bookie, BookieInfo info) { + public void gotInfo(BookieId bookie, BookieInfo info) { infoMap.put(bookie, info); } /** * Get bookie info map. */ - public Map getBookieMap() { + public Map getBookieMap() { return infoMap; } } @@ -245,7 +246,7 @@ public void run() { LOG.debug("Running periodic BookieInfo scan"); } try { - Collection updatedBookies = bk.bookieWatcher.getBookies(); + Collection updatedBookies = bk.bookieWatcher.getBookies(); bookieInfoMap.updateBookies(updatedBookies); } catch (BKException e) { LOG.info("Got exception while querying bookies from watcher, rerunning after {}s", @@ -269,7 +270,7 @@ private void submitTaskWithDelay(int delaySeconds) { scheduler.schedule(() -> getReadWriteBookieInfo(), delaySeconds, TimeUnit.SECONDS); } - synchronized void availableBookiesChanged(Set updatedBookiesList) { + synchronized void availableBookiesChanged(Set updatedBookiesList) { if (LOG.isInfoEnabled()) { LOG.info("Scheduling bookie info read due to changes in available bookies."); } @@ -285,7 +286,7 @@ synchronized void availableBookiesChanged(Set updatedBookie * @param bookie to lookup * @return None if absent, free disk space if present */ - synchronized Optional getFreeDiskSpace(BookieSocketAddress bookie) { + synchronized Optional getFreeDiskSpace(BookieId bookie) { BookieInfo bookieInfo = bookieInfoMap.getInfo(bookie); if (bookieInfo != null) { return Optional.of(bookieInfo.getFreeDiskSpace()); @@ -305,7 +306,7 @@ synchronized Optional getFreeDiskSpace(BookieSocketAddress bookie) { */ synchronized void getReadWriteBookieInfo() { State queuedType = instanceState.getAndClearQueuedType(); - Collection toScan; + Collection toScan; if (queuedType == State.FULL) { if (LOG.isDebugEnabled()) { LOG.debug("Doing full scan"); @@ -334,12 +335,12 @@ synchronized void getReadWriteBookieInfo() { if (LOG.isDebugEnabled()) { LOG.debug("Getting bookie info for: {}", toScan); } - for (BookieSocketAddress b : toScan) { + for (BookieId b : toScan) { bkc.getBookieInfo(b, requested, new GetBookieInfoCallback() { void processReadInfoComplete(int rc, BookieInfo bInfo, Object ctx) { synchronized (BookieInfoReader.this) { - BookieSocketAddress b = (BookieSocketAddress) ctx; + BookieId b = (BookieId) ctx; if (rc != BKException.Code.OK) { if (LOG.isErrorEnabled()) { LOG.error("Reading bookie info from bookie {} failed due to {}", @@ -395,26 +396,28 @@ void onExit() { } } - Map getBookieInfo() throws BKException, InterruptedException { + Map getBookieInfo() throws BKException, InterruptedException { BookieClient bkc = bk.getBookieClient(); final AtomicInteger totalSent = new AtomicInteger(); final AtomicInteger totalCompleted = new AtomicInteger(); - final ConcurrentMap map = - new ConcurrentHashMap(); + final ConcurrentMap map = + new ConcurrentHashMap(); final CountDownLatch latch = new CountDownLatch(1); long requested = BookkeeperProtocol.GetBookieInfoRequest.Flags.TOTAL_DISK_CAPACITY_VALUE | BookkeeperProtocol.GetBookieInfoRequest.Flags.FREE_DISK_SPACE_VALUE; - Collection bookies; + Collection bookies; bookies = bk.bookieWatcher.getBookies(); bookies.addAll(bk.bookieWatcher.getReadOnlyBookies()); - + if (bookies.isEmpty()) { + return map; + } totalSent.set(bookies.size()); - for (BookieSocketAddress b : bookies) { + for (BookieId b : bookies) { bkc.getBookieInfo(b, requested, new GetBookieInfoCallback() { @Override public void getBookieInfoComplete(int rc, BookieInfo bInfo, Object ctx) { - BookieSocketAddress b = (BookieSocketAddress) ctx; + BookieId b = (BookieId) ctx; if (rc != BKException.Code.OK) { if (LOG.isErrorEnabled()) { LOG.error("Reading bookie info from bookie {} failed due to {}", diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieWatcher.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieWatcher.java index 0f760a9d93e..bfee656e2c1 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieWatcher.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieWatcher.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -20,13 +20,27 @@ import java.util.List; import java.util.Map; import java.util.Set; - import org.apache.bookkeeper.client.BKException.BKNotEnoughBookiesException; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.BookieAddressResolver; -interface BookieWatcher { - Set getBookies() throws BKException; - Set getReadOnlyBookies() throws BKException; +/** + * Watch for Bookkeeper cluster status. + */ +public interface BookieWatcher { + Set getBookies() throws BKException; + Set getAllBookies() throws BKException; + Set getReadOnlyBookies() throws BKException; + BookieAddressResolver getBookieAddressResolver(); + + /** + * Determine if a bookie should be considered unavailable. + * + * @param id + * Bookie to check + * @return whether or not the given bookie is unavailable + */ + boolean isBookieUnavailable(BookieId id); /** * Create an ensemble with given ensembleSize and writeQuorumSize. @@ -38,7 +52,7 @@ interface BookieWatcher { * @return list of bookies for new ensemble. * @throws BKNotEnoughBookiesException */ - List newEnsemble(int ensembleSize, int writeQuorumSize, + List newEnsemble(int ensembleSize, int writeQuorumSize, int ackQuorumSize, Map customMetadata) throws BKNotEnoughBookiesException; @@ -51,10 +65,10 @@ List newEnsemble(int ensembleSize, int writeQuorumSize, * @return the bookie to replace. * @throws BKNotEnoughBookiesException */ - BookieSocketAddress replaceBookie(int ensembleSize, int writeQuorumSize, int ackQuorumSize, + BookieId replaceBookie(int ensembleSize, int writeQuorumSize, int ackQuorumSize, Map customMetadata, - List existingBookies, int bookieIdx, - Set excludeBookies) + List existingBookies, int bookieIdx, + Set excludeBookies) throws BKNotEnoughBookiesException; @@ -62,5 +76,10 @@ BookieSocketAddress replaceBookie(int ensembleSize, int writeQuorumSize, int ack * Quarantine bookie so it will not be preferred to be chosen for new ensembles. * @param bookie */ - void quarantineBookie(BookieSocketAddress bookie); + void quarantineBookie(BookieId bookie); + + /** + * Release all quarantined bookies, let it can be chosen for new ensembles. + */ + void releaseAllQuarantinedBookies(); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieWatcherImpl.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieWatcherImpl.java index ff707f0f995..978842b114a 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieWatcherImpl.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieWatcherImpl.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,8 +17,11 @@ */ package org.apache.bookkeeper.client; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.ENSEMBLE_NOT_ADHERING_TO_PLACEMENT_POLICY_COUNT; import static org.apache.bookkeeper.bookie.BookKeeperServerStats.NEW_ENSEMBLE_TIME; import static org.apache.bookkeeper.bookie.BookKeeperServerStats.REPLACE_BOOKIE_TIME; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.WATCHER_SCOPE; +import static org.apache.bookkeeper.client.BookKeeperClientStats.CREATE_OP; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; @@ -33,16 +36,21 @@ import java.util.concurrent.TimeUnit; import java.util.function.Function; import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.BookKeeperServerStats; import org.apache.bookkeeper.client.BKException.BKInterruptedException; import org.apache.bookkeeper.client.BKException.BKNotEnoughBookiesException; import org.apache.bookkeeper.client.BKException.MetaStoreException; +import org.apache.bookkeeper.client.EnsemblePlacementPolicy.PlacementPolicyAdherence; import org.apache.bookkeeper.common.concurrent.FutureUtils; import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.discover.RegistrationClient; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.BookieAddressResolver; +import org.apache.bookkeeper.stats.Counter; import org.apache.bookkeeper.stats.OpStatsLogger; import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.annotations.StatsDoc; /** * This class is responsible for maintaining a consistent view of what bookies @@ -51,6 +59,10 @@ * replacement * */ +@StatsDoc( + name = WATCHER_SCOPE, + help = "Bookie watcher related stats" +) @Slf4j class BookieWatcherImpl implements BookieWatcher { @@ -62,48 +74,70 @@ class BookieWatcherImpl implements BookieWatcher { log.error("Interrupted reading bookie list : ", cause); return new BKInterruptedException(); } else { - return new MetaStoreException(); + MetaStoreException mse = new MetaStoreException(cause); + return mse; } }; private final ClientConfiguration conf; private final RegistrationClient registrationClient; private final EnsemblePlacementPolicy placementPolicy; + @StatsDoc( + name = NEW_ENSEMBLE_TIME, + help = "operation stats of new ensembles", + parent = CREATE_OP + ) private final OpStatsLogger newEnsembleTimer; + @StatsDoc( + name = REPLACE_BOOKIE_TIME, + help = "operation stats of replacing bookie in an ensemble" + ) private final OpStatsLogger replaceBookieTimer; + @StatsDoc( + name = ENSEMBLE_NOT_ADHERING_TO_PLACEMENT_POLICY_COUNT, + help = "total number of newEnsemble/replaceBookie operations failed to adhere" + + " EnsemblePlacementPolicy" + ) + private final Counter ensembleNotAdheringToPlacementPolicy; // Bookies that will not be preferred to be chosen in a new ensemble - final Cache quarantinedBookies; + final Cache quarantinedBookies; - private volatile Set writableBookies = Collections.emptySet(); - private volatile Set readOnlyBookies = Collections.emptySet(); + private volatile Set writableBookies = Collections.emptySet(); + private volatile Set readOnlyBookies = Collections.emptySet(); private CompletableFuture initialWritableBookiesFuture = null; private CompletableFuture initialReadonlyBookiesFuture = null; + private final BookieAddressResolver bookieAddressResolver; + public BookieWatcherImpl(ClientConfiguration conf, EnsemblePlacementPolicy placementPolicy, RegistrationClient registrationClient, + BookieAddressResolver bookieAddressResolver, StatsLogger statsLogger) { this.conf = conf; + this.bookieAddressResolver = bookieAddressResolver; this.placementPolicy = placementPolicy; this.registrationClient = registrationClient; this.quarantinedBookies = CacheBuilder.newBuilder() .expireAfterWrite(conf.getBookieQuarantineTimeSeconds(), TimeUnit.SECONDS) - .removalListener(new RemovalListener() { + .removalListener(new RemovalListener() { @Override - public void onRemoval(RemovalNotification bookie) { + public void onRemoval(RemovalNotification bookie) { log.info("Bookie {} is no longer quarantined", bookie.getKey()); } }).build(); this.newEnsembleTimer = statsLogger.getOpStatsLogger(NEW_ENSEMBLE_TIME); this.replaceBookieTimer = statsLogger.getOpStatsLogger(REPLACE_BOOKIE_TIME); + this.ensembleNotAdheringToPlacementPolicy = statsLogger + .getCounter(BookKeeperServerStats.ENSEMBLE_NOT_ADHERING_TO_PLACEMENT_POLICY_COUNT); } @Override - public Set getBookies() throws BKException { + public Set getBookies() throws BKException { try { return FutureUtils.result(registrationClient.getWritableBookies(), EXCEPTION_FUNC).getValue(); } catch (BKInterruptedException ie) { @@ -113,7 +147,22 @@ public Set getBookies() throws BKException { } @Override - public Set getReadOnlyBookies() + public Set getAllBookies() throws BKException { + try { + return FutureUtils.result(registrationClient.getAllBookies(), EXCEPTION_FUNC).getValue(); + } catch (BKInterruptedException ie) { + Thread.currentThread().interrupt(); + throw ie; + } + } + + @Override + public BookieAddressResolver getBookieAddressResolver() { + return this.bookieAddressResolver; + } + + @Override + public Set getReadOnlyBookies() throws BKException { try { return FutureUtils.result(registrationClient.getReadOnlyBookies(), EXCEPTION_FUNC).getValue(); @@ -123,8 +172,24 @@ public Set getReadOnlyBookies() } } + /** + * Determine if a bookie should be considered unavailable. + * This does not require a network call because this class + * maintains a current view of readonly and writable bookies. + * An unavailable bookie is one that is neither read only nor + * writable. + * + * @param id + * Bookie to check + * @return whether or not the given bookie is unavailable + */ + @Override + public boolean isBookieUnavailable(BookieId id) { + return !readOnlyBookies.contains(id) && !writableBookies.contains(id); + } + // this callback is already not executed in zookeeper thread - private synchronized void processWritableBookiesChanged(Set newBookieAddrs) { + private synchronized void processWritableBookiesChanged(Set newBookieAddrs) { // Update watcher outside ZK callback thread, to avoid deadlock in case some other // component is trying to do a blocking ZK operation this.writableBookies = newBookieAddrs; @@ -145,7 +210,7 @@ private synchronized void processWritableBookiesChanged(Set // } } - private synchronized void processReadOnlyBookiesChanged(Set readOnlyBookies) { + private synchronized void processReadOnlyBookiesChanged(Set readOnlyBookies) { this.readOnlyBookies = readOnlyBookies; placementPolicy.onClusterChanged(writableBookies, readOnlyBookies); } @@ -156,6 +221,7 @@ private synchronized void processReadOnlyBookiesChanged(Set * @throws BKException when failed to read bookies */ public void initialBlockingBookieRead() throws BKException { + CompletableFuture writable; CompletableFuture readonly; synchronized (this) { @@ -174,7 +240,6 @@ public void initialBlockingBookieRead() throws BKException { readonly = initialReadonlyBookiesFuture; } } - try { FutureUtils.result(writable, EXCEPTION_FUNC); } catch (BKInterruptedException ie) { @@ -192,52 +257,90 @@ public void initialBlockingBookieRead() throws BKException { } @Override - public List newEnsemble(int ensembleSize, int writeQuorumSize, + public List newEnsemble(int ensembleSize, int writeQuorumSize, int ackQuorumSize, Map customMetadata) throws BKNotEnoughBookiesException { long startTime = MathUtils.nowInNano(); - List socketAddresses; + EnsemblePlacementPolicy.PlacementResult> newEnsembleResponse; + List socketAddresses; + PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy; try { - socketAddresses = placementPolicy.newEnsemble(ensembleSize, - writeQuorumSize, ackQuorumSize, customMetadata, new HashSet( - quarantinedBookies.asMap().keySet())); + Set quarantinedBookiesSet = quarantinedBookies.asMap().keySet(); + newEnsembleResponse = placementPolicy.newEnsemble(ensembleSize, writeQuorumSize, ackQuorumSize, + customMetadata, new HashSet(quarantinedBookiesSet)); + socketAddresses = newEnsembleResponse.getResult(); + isEnsembleAdheringToPlacementPolicy = newEnsembleResponse.getAdheringToPolicy(); + if (isEnsembleAdheringToPlacementPolicy == PlacementPolicyAdherence.FAIL) { + ensembleNotAdheringToPlacementPolicy.inc(); + if (ensembleSize > 1) { + log.warn("New ensemble: {} is not adhering to Placement Policy. quarantinedBookies: {}", + socketAddresses, quarantinedBookiesSet); + } + } // we try to only get from the healthy bookies first newEnsembleTimer.registerSuccessfulEvent(MathUtils.nowInNano() - startTime, TimeUnit.NANOSECONDS); } catch (BKNotEnoughBookiesException e) { if (log.isDebugEnabled()) { log.debug("Not enough healthy bookies available, using quarantined bookies"); } - socketAddresses = placementPolicy.newEnsemble( + newEnsembleResponse = placementPolicy.newEnsemble( ensembleSize, writeQuorumSize, ackQuorumSize, customMetadata, new HashSet<>()); + socketAddresses = newEnsembleResponse.getResult(); + isEnsembleAdheringToPlacementPolicy = newEnsembleResponse.getAdheringToPolicy(); + if (isEnsembleAdheringToPlacementPolicy == PlacementPolicyAdherence.FAIL) { + ensembleNotAdheringToPlacementPolicy.inc(); + log.warn("New ensemble: {} is not adhering to Placement Policy", socketAddresses); + } newEnsembleTimer.registerFailedEvent(MathUtils.nowInNano() - startTime, TimeUnit.NANOSECONDS); } return socketAddresses; } @Override - public BookieSocketAddress replaceBookie(int ensembleSize, int writeQuorumSize, int ackQuorumSize, + public BookieId replaceBookie(int ensembleSize, int writeQuorumSize, int ackQuorumSize, Map customMetadata, - List existingBookies, int bookieIdx, - Set excludeBookies) + List existingBookies, int bookieIdx, + Set excludeBookies) throws BKNotEnoughBookiesException { long startTime = MathUtils.nowInNano(); - BookieSocketAddress addr = existingBookies.get(bookieIdx); - BookieSocketAddress socketAddress; + BookieId addr = existingBookies.get(bookieIdx); + EnsemblePlacementPolicy.PlacementResult replaceBookieResponse; + BookieId socketAddress; + PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy = PlacementPolicyAdherence.FAIL; try { // we exclude the quarantined bookies also first - Set existingAndQuarantinedBookies = new HashSet(existingBookies); - existingAndQuarantinedBookies.addAll(quarantinedBookies.asMap().keySet()); - socketAddress = placementPolicy.replaceBookie( + Set excludedBookiesAndQuarantinedBookies = new HashSet( + excludeBookies); + Set quarantinedBookiesSet = quarantinedBookies.asMap().keySet(); + excludedBookiesAndQuarantinedBookies.addAll(quarantinedBookiesSet); + replaceBookieResponse = placementPolicy.replaceBookie( ensembleSize, writeQuorumSize, ackQuorumSize, customMetadata, - existingAndQuarantinedBookies, addr, excludeBookies); + existingBookies, addr, excludedBookiesAndQuarantinedBookies); + socketAddress = replaceBookieResponse.getResult(); + isEnsembleAdheringToPlacementPolicy = replaceBookieResponse.getAdheringToPolicy(); + if (isEnsembleAdheringToPlacementPolicy == PlacementPolicyAdherence.FAIL) { + ensembleNotAdheringToPlacementPolicy.inc(); + log.warn( + "replaceBookie for bookie: {} in ensemble: {} is not adhering to placement policy and" + + " chose {}. excludedBookies {} and quarantinedBookies {}", + addr, existingBookies, socketAddress, excludeBookies, quarantinedBookiesSet); + } replaceBookieTimer.registerSuccessfulEvent(MathUtils.nowInNano() - startTime, TimeUnit.NANOSECONDS); } catch (BKNotEnoughBookiesException e) { if (log.isDebugEnabled()) { log.debug("Not enough healthy bookies available, using quarantined bookies"); } - socketAddress = placementPolicy.replaceBookie( - ensembleSize, writeQuorumSize, ackQuorumSize, customMetadata, - new HashSet(existingBookies), addr, excludeBookies); + replaceBookieResponse = placementPolicy.replaceBookie(ensembleSize, writeQuorumSize, ackQuorumSize, + customMetadata, existingBookies, addr, excludeBookies); + socketAddress = replaceBookieResponse.getResult(); + isEnsembleAdheringToPlacementPolicy = replaceBookieResponse.getAdheringToPolicy(); + if (isEnsembleAdheringToPlacementPolicy == PlacementPolicyAdherence.FAIL) { + ensembleNotAdheringToPlacementPolicy.inc(); + log.warn( + "replaceBookie for bookie: {} in ensemble: {} is not adhering to placement policy and" + + " chose {}. excludedBookies {}", + addr, existingBookies, socketAddress, excludeBookies); + } replaceBookieTimer.registerFailedEvent(MathUtils.nowInNano() - startTime, TimeUnit.NANOSECONDS); } return socketAddress; @@ -248,11 +351,15 @@ public BookieSocketAddress replaceBookie(int ensembleSize, int writeQuorumSize, * @param bookie */ @Override - public void quarantineBookie(BookieSocketAddress bookie) { + public void quarantineBookie(BookieId bookie) { if (quarantinedBookies.getIfPresent(bookie) == null) { quarantinedBookies.put(bookie, Boolean.TRUE); log.warn("Bookie {} has been quarantined because of read/write errors.", bookie); } } + @Override + public void releaseAllQuarantinedBookies(){ + quarantinedBookies.invalidateAll(); + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookiesHealthInfo.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookiesHealthInfo.java index b0404407587..11c4a08529a 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookiesHealthInfo.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookiesHealthInfo.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,7 +20,7 @@ */ package org.apache.bookkeeper.client; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; /** * This interface returns heuristics used to determine the health of a Bookkeeper server for read @@ -34,7 +34,7 @@ public interface BookiesHealthInfo { * @param bookieSocketAddress * @return failed entries on a bookie, -1 if there have been no failures */ - long getBookieFailureHistory(BookieSocketAddress bookieSocketAddress); + long getBookieFailureHistory(BookieId bookieSocketAddress); /** * Returns pending requests to a bookie. @@ -42,6 +42,6 @@ public interface BookiesHealthInfo { * @param bookieSocketAddress * @return number of pending requests */ - long getBookiePendingRequests(BookieSocketAddress bookieSocketAddress); + long getBookiePendingRequests(BookieId bookieSocketAddress); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookiesListener.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookiesListener.java index 0a1a884524d..7b7dd9c673c 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookiesListener.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookiesListener.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -21,7 +21,7 @@ import org.apache.bookkeeper.common.annotation.InterfaceStability; /** - * Listener for the the available bookies changes. + * Listener for the available bookies changes. */ @InterfaceAudience.Private diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ClientContext.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ClientContext.java index d8803d0ea6d..3b43502d96b 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ClientContext.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ClientContext.java @@ -20,6 +20,7 @@ */ package org.apache.bookkeeper.client; +import io.netty.buffer.ByteBufAllocator; import org.apache.bookkeeper.common.util.OrderedExecutor; import org.apache.bookkeeper.common.util.OrderedScheduler; import org.apache.bookkeeper.meta.LedgerManager; @@ -31,12 +32,13 @@ * but they are present to the LedgerHandle through this interface to allow * tests to easily inject mocked versions. */ -interface ClientContext { +public interface ClientContext { ClientInternalConf getConf(); LedgerManager getLedgerManager(); BookieWatcher getBookieWatcher(); EnsemblePlacementPolicy getPlacementPolicy(); BookieClient getBookieClient(); + ByteBufAllocator getByteBufAllocator(); OrderedExecutor getMainWorkerPool(); OrderedScheduler getScheduler(); BookKeeperClientStats getClientStats(); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ClientInternalConf.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ClientInternalConf.java index ac56a1fbfb8..fc83617cef6 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ClientInternalConf.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ClientInternalConf.java @@ -20,10 +20,8 @@ */ package org.apache.bookkeeper.client; -import com.google.common.base.Optional; - +import java.util.Optional; import java.util.concurrent.TimeUnit; - import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.feature.Feature; import org.apache.bookkeeper.feature.FeatureProvider; @@ -41,6 +39,7 @@ class ClientInternalConf { final long addEntryQuorumTimeoutNanos; final boolean enableParallelRecoveryRead; final boolean enableReorderReadSequence; + final boolean enableStickyReads; final int recoveryReadBatchSize; final int throttleValue; final int bookieFailureHistoryExpirationMSec; @@ -48,6 +47,9 @@ class ClientInternalConf { final long timeoutMonitorIntervalSec; final boolean enableBookieFailureTracking; final boolean useV2WireProtocol; + final boolean enforceMinNumFaultDomainsForWrite; + final boolean batchReadEnabled; + final int nettyMaxFrameSizeBytes; static ClientInternalConf defaultValues() { return fromConfig(new ClientConfiguration()); @@ -72,14 +74,16 @@ private ClientInternalConf(ClientConfiguration conf, this.addEntryQuorumTimeoutNanos = TimeUnit.SECONDS.toNanos(conf.getAddEntryQuorumTimeout()); this.throttleValue = conf.getThrottleValue(); this.bookieFailureHistoryExpirationMSec = conf.getBookieFailureHistoryExpirationMSec(); - + this.batchReadEnabled = conf.isBatchReadEnabled(); + this.nettyMaxFrameSizeBytes = conf.getNettyMaxFrameSizeBytes(); this.disableEnsembleChangeFeature = featureProvider.getFeature(conf.getDisableEnsembleChangeFeatureName()); - this.delayEnsembleChange = conf.getDelayEnsembleChange(); this.maxAllowedEnsembleChanges = conf.getMaxAllowedEnsembleChanges(); this.timeoutMonitorIntervalSec = conf.getTimeoutMonitorIntervalSec(); this.enableBookieFailureTracking = conf.getEnableBookieFailureTracking(); this.useV2WireProtocol = conf.getUseV2WireProtocol(); + this.enableStickyReads = conf.isStickyReadsEnabled(); + this.enforceMinNumFaultDomainsForWrite = conf.getEnforceMinNumFaultDomainsForWrite(); if (conf.getFirstSpeculativeReadTimeout() > 0) { this.readSpeculativeRequestPolicy = @@ -88,7 +92,7 @@ private ClientInternalConf(ClientConfiguration conf, conf.getMaxSpeculativeReadTimeout(), conf.getSpeculativeReadTimeoutBackoffMultiplier())); } else { - this.readSpeculativeRequestPolicy = Optional.absent(); + this.readSpeculativeRequestPolicy = Optional.empty(); } if (conf.getFirstSpeculativeReadLACTimeout() > 0) { this.readLACSpeculativeRequestPolicy = @@ -97,7 +101,7 @@ private ClientInternalConf(ClientConfiguration conf, conf.getMaxSpeculativeReadLACTimeout(), conf.getSpeculativeReadLACTimeoutBackoffMultiplier())); } else { - this.readLACSpeculativeRequestPolicy = Optional.absent(); + this.readLACSpeculativeRequestPolicy = Optional.empty(); } } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/DefaultBookieAddressResolver.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/DefaultBookieAddressResolver.java new file mode 100644 index 00000000000..08016bdd821 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/DefaultBookieAddressResolver.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.client; + +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.discover.BookieServiceInfo; +import org.apache.bookkeeper.discover.RegistrationClient; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.proto.BookieAddressResolver; + +/** + * Resolve BookieIDs to Network addresses. + */ +@Slf4j +public class DefaultBookieAddressResolver implements BookieAddressResolver { + + private final RegistrationClient registrationClient; + + public DefaultBookieAddressResolver(RegistrationClient registrationClient) { + this.registrationClient = registrationClient; + } + + public RegistrationClient getRegistrationClient() { + return registrationClient; + } + + @Override + public BookieSocketAddress resolve(BookieId bookieId) { + try { + BookieServiceInfo info = FutureUtils.result(registrationClient.getBookieServiceInfo(bookieId)).getValue(); + BookieServiceInfo.Endpoint endpoint = info.getEndpoints() + .stream().filter(e -> e.getProtocol().equals("bookie-rpc")).findAny().orElse(null); + if (endpoint == null) { + throw new Exception("bookie " + bookieId + " does not publish a bookie-rpc endpoint"); + } + BookieSocketAddress res = new BookieSocketAddress(endpoint.getHost(), endpoint.getPort()); + if (!bookieId.toString().equals(res.toString())) { + // only print if the information is useful + log.info("Resolved {} as {}", bookieId, res); + } else if (log.isDebugEnabled()) { + log.debug("Resolved {} as {}", bookieId, res); + } + return res; + } catch (BKException.BKBookieHandleNotAvailableException ex) { + if (BookieSocketAddress.isDummyBookieIdForHostname(bookieId)) { + if (log.isDebugEnabled()) { + log.debug("Resolving dummy bookie Id {} using legacy bookie resolver", bookieId); + } + return BookieSocketAddress.resolveLegacyBookieId(bookieId); + } + log.info("Cannot resolve {}, bookie is unknown {}", bookieId, ex.toString()); + throw new BookieIdNotResolvedException(bookieId, ex); + } catch (Exception ex) { + if (ex instanceof InterruptedException) { + Thread.currentThread().interrupt(); + } + throw new BookieIdNotResolvedException(bookieId, ex); + } + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/DefaultEnsemblePlacementPolicy.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/DefaultEnsemblePlacementPolicy.java index 28efe661545..94cd30344f1 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/DefaultEnsemblePlacementPolicy.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/DefaultEnsemblePlacementPolicy.java @@ -18,24 +18,24 @@ package org.apache.bookkeeper.client; import io.netty.util.HashedWheelTimer; - import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.concurrent.locks.ReentrantReadWriteLock; - import org.apache.bookkeeper.client.BKException.BKNotEnoughBookiesException; import org.apache.bookkeeper.client.BookieInfoReader.BookieInfo; import org.apache.bookkeeper.client.WeightedRandomSelection.WeightedObject; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.feature.FeatureProvider; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.DNSToSwitchMapping; +import org.apache.bookkeeper.proto.BookieAddressResolver; import org.apache.bookkeeper.stats.StatsLogger; import org.apache.commons.collections4.CollectionUtils; import org.slf4j.Logger; @@ -48,31 +48,32 @@ */ public class DefaultEnsemblePlacementPolicy implements EnsemblePlacementPolicy { static final Logger LOG = LoggerFactory.getLogger(DefaultEnsemblePlacementPolicy.class); - static final Set EMPTY_SET = new HashSet(); + static final Set EMPTY_SET = new HashSet(); private boolean isWeighted; private int maxWeightMultiple; - private Set knownBookies = new HashSet(); - private Map bookieInfoMap; - private WeightedRandomSelection weightedSelection; + private Set knownBookies = new HashSet(); + private Map bookieInfoMap; + private WeightedRandomSelection weightedSelection; private final ReentrantReadWriteLock rwLock; DefaultEnsemblePlacementPolicy() { + bookieInfoMap = new HashMap(); rwLock = new ReentrantReadWriteLock(); } @Override - public ArrayList newEnsemble(int ensembleSize, int quorumSize, int ackQuorumSize, - Map customMetadata, Set excludeBookies) + public PlacementResult> newEnsemble(int ensembleSize, int quorumSize, int ackQuorumSize, + Map customMetadata, Set excludeBookies) throws BKNotEnoughBookiesException { - ArrayList newBookies = new ArrayList(ensembleSize); + ArrayList newBookies = new ArrayList(ensembleSize); if (ensembleSize <= 0) { - return newBookies; + return PlacementResult.of(newBookies, PlacementPolicyAdherence.FAIL); } - List allBookies; + List allBookies; rwLock.readLock().lock(); try { - allBookies = new ArrayList(knownBookies); + allBookies = new ArrayList(knownBookies); } finally { rwLock.readLock().unlock(); } @@ -86,26 +87,31 @@ public ArrayList newEnsemble(int ensembleSize, int quorumSi throw new BKNotEnoughBookiesException(); } while (ensembleSize > 0) { - BookieSocketAddress b = weightedSelection.getNextRandom(); + BookieId b = weightedSelection.getNextRandom(); if (newBookies.contains(b) || excludeBookies.contains(b)) { continue; } newBookies.add(b); --ensembleSize; + if (ensembleSize == 0) { + return PlacementResult.of(newBookies, + isEnsembleAdheringToPlacementPolicy(newBookies, quorumSize, ackQuorumSize)); + } } } finally { rwLock.readLock().unlock(); } } else { Collections.shuffle(allBookies); - for (BookieSocketAddress bookie : allBookies) { + for (BookieId bookie : allBookies) { if (excludeBookies.contains(bookie)) { continue; } newBookies.add(bookie); --ensembleSize; if (ensembleSize == 0) { - return newBookies; + return PlacementResult.of(newBookies, + isEnsembleAdheringToPlacementPolicy(newBookies, quorumSize, ackQuorumSize)); } } } @@ -113,32 +119,37 @@ public ArrayList newEnsemble(int ensembleSize, int quorumSi } @Override - public BookieSocketAddress replaceBookie(int ensembleSize, int writeQuorumSize, int ackQuorumSize, - Map customMetadata, Set currentEnsemble, - BookieSocketAddress bookieToReplace, Set excludeBookies) + public PlacementResult replaceBookie(int ensembleSize, int writeQuorumSize, int ackQuorumSize, + Map customMetadata, List currentEnsemble, + BookieId bookieToReplace, Set excludeBookies) throws BKNotEnoughBookiesException { excludeBookies.addAll(currentEnsemble); - ArrayList addresses = newEnsemble(1, 1, 1, customMetadata, excludeBookies); - return addresses.get(0); + List addresses = newEnsemble(1, 1, 1, customMetadata, excludeBookies).getResult(); + + BookieId candidateAddr = addresses.get(0); + List newEnsemble = new ArrayList(currentEnsemble); + newEnsemble.set(currentEnsemble.indexOf(bookieToReplace), candidateAddr); + return PlacementResult.of(candidateAddr, + isEnsembleAdheringToPlacementPolicy(newEnsemble, writeQuorumSize, ackQuorumSize)); } @Override - public Set onClusterChanged(Set writableBookies, - Set readOnlyBookies) { + public Set onClusterChanged(Set writableBookies, + Set readOnlyBookies) { rwLock.writeLock().lock(); try { - HashSet deadBookies; - deadBookies = new HashSet(knownBookies); + HashSet deadBookies; + deadBookies = new HashSet(knownBookies); deadBookies.removeAll(writableBookies); // readonly bookies should not be treated as dead bookies deadBookies.removeAll(readOnlyBookies); if (this.isWeighted) { - for (BookieSocketAddress b : deadBookies) { + for (BookieId b : deadBookies) { this.bookieInfoMap.remove(b); } @SuppressWarnings("unchecked") - Collection newBookies = CollectionUtils.subtract(writableBookies, knownBookies); - for (BookieSocketAddress b : newBookies) { + Collection newBookies = CollectionUtils.subtract(writableBookies, knownBookies); + for (BookieId b : newBookies) { this.bookieInfoMap.put(b, new BookieInfo()); } if (deadBookies.size() > 0 || newBookies.size() > 0) { @@ -153,13 +164,13 @@ public Set onClusterChanged(Set writab } @Override - public void registerSlowBookie(BookieSocketAddress bookieSocketAddress, long entryId) { + public void registerSlowBookie(BookieId bookieSocketAddress, long entryId) { return; } @Override public DistributionSchedule.WriteSet reorderReadSequence( - List ensemble, + List ensemble, BookiesHealthInfo bookiesHealthInfo, DistributionSchedule.WriteSet writeSet) { return writeSet; @@ -167,7 +178,7 @@ public DistributionSchedule.WriteSet reorderReadSequence( @Override public DistributionSchedule.WriteSet reorderReadLACSequence( - List ensemble, + List ensemble, BookiesHealthInfo bookiesHealthInfo, DistributionSchedule.WriteSet writeSet) { writeSet.addMissingIndices(ensemble.size()); @@ -176,23 +187,24 @@ public DistributionSchedule.WriteSet reorderReadLACSequence( @Override public EnsemblePlacementPolicy initialize(ClientConfiguration conf, - Optional optionalDnsResolver, - HashedWheelTimer timer, - FeatureProvider featureProvider, - StatsLogger statsLogger) { + Optional optionalDnsResolver, + HashedWheelTimer hashedWheelTimer, + FeatureProvider featureProvider, + StatsLogger statsLogger, + BookieAddressResolver bookieAddressResolver) { this.isWeighted = conf.getDiskWeightBasedPlacementEnabled(); if (this.isWeighted) { this.maxWeightMultiple = conf.getBookieMaxWeightMultipleForWeightBasedPlacement(); - this.weightedSelection = new WeightedRandomSelection(this.maxWeightMultiple); + this.weightedSelection = new WeightedRandomSelectionImpl(this.maxWeightMultiple); } return this; } @Override - public void updateBookieInfo(Map bookieInfoMap) { + public void updateBookieInfo(Map bookieInfoMap) { rwLock.writeLock().lock(); try { - for (Map.Entry e : bookieInfoMap.entrySet()) { + for (Map.Entry e : bookieInfoMap.entrySet()) { this.bookieInfoMap.put(e.getKey(), e.getValue()); } this.weightedSelection.updateMap(this.bookieInfoMap); @@ -205,4 +217,10 @@ public void updateBookieInfo(Map bookieInfoMap) public void uninitalize() { // do nothing } + + @Override + public PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy(List ensembleList, + int writeQuorumSize, int ackQuorumSize) { + return PlacementPolicyAdherence.MEETS_STRICT; + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/DefaultSpeculativeRequestExecutionPolicy.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/DefaultSpeculativeRequestExecutionPolicy.java index 7474e56acb4..c85fb1dc29e 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/DefaultSpeculativeRequestExecutionPolicy.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/DefaultSpeculativeRequestExecutionPolicy.java @@ -20,14 +20,15 @@ */ package org.apache.bookkeeper.client; +import static com.google.common.util.concurrent.MoreExecutors.directExecutor; + import com.google.common.util.concurrent.FutureCallback; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; - import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -64,23 +65,25 @@ public DefaultSpeculativeRequestExecutionPolicy(int firstSpeculativeRequestTimeo * * @param scheduler The scheduler service to issue the speculative request * @param requestExecutor The executor is used to issue the actual speculative requests + * @return ScheduledFuture, in case caller needs to cancel it. */ @Override - public void initiateSpeculativeRequest(final ScheduledExecutorService scheduler, + public ScheduledFuture initiateSpeculativeRequest(final ScheduledExecutorService scheduler, final SpeculativeRequestExecutor requestExecutor) { - scheduleSpeculativeRead(scheduler, requestExecutor, firstSpeculativeRequestTimeout); + return scheduleSpeculativeRead(scheduler, requestExecutor, firstSpeculativeRequestTimeout); } - private void scheduleSpeculativeRead(final ScheduledExecutorService scheduler, + private ScheduledFuture scheduleSpeculativeRead(final ScheduledExecutorService scheduler, final SpeculativeRequestExecutor requestExecutor, final int speculativeRequestTimeout) { try { - scheduler.schedule(new Runnable() { + return scheduler.schedule(new Runnable() { @Override public void run() { ListenableFuture issueNextRequest = requestExecutor.issueSpeculativeRequest(); Futures.addCallback(issueNextRequest, new FutureCallback() { // we want this handler to run immediately after we push the big red button! + @Override public void onSuccess(Boolean issueNextRequest) { if (issueNextRequest) { scheduleSpeculativeRead(scheduler, requestExecutor, @@ -94,11 +97,12 @@ public void onSuccess(Boolean issueNextRequest) { } } + @Override public void onFailure(Throwable thrown) { LOG.warn("Failed to issue speculative request for {}, speculativeReadTimeout = {} : ", requestExecutor, speculativeRequestTimeout, thrown); } - }); + }, directExecutor()); } }, speculativeRequestTimeout, TimeUnit.MILLISECONDS); } catch (RejectedExecutionException re) { @@ -107,5 +111,6 @@ public void onFailure(Throwable thrown) { requestExecutor, speculativeRequestTimeout, re); } } + return null; } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/DistributionSchedule.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/DistributionSchedule.java index d53129d22f4..295cbd9faa4 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/DistributionSchedule.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/DistributionSchedule.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,12 +17,12 @@ */ package org.apache.bookkeeper.client; +import java.util.BitSet; import java.util.Map; - -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; /** - * This interface determins how entries are distributed among bookies. + * This interface determines how entries are distributed among bookies. * *

Every entry gets replicated to some number of replicas. The first replica for * an entry is given a replicaIndex of 0, and so on. To distribute write load, @@ -136,11 +136,23 @@ public WriteSet copy() { } }; + int getWriteQuorumSize(); + /** * Return the set of bookie indices to send the message to. */ WriteSet getWriteSet(long entryId); + /** + * Return the WriteSet bookie index for a given and index + * in the WriteSet. + * + * @param entryId + * @param writeSetIndex + * @return + */ + int getWriteSetBookieIndex(long entryId, int writeSetIndex); + /** * Return the set of bookies indices to send the messages to the whole ensemble. * @@ -171,14 +183,14 @@ interface AckSet { * bookie address * @return true if ack quorum is broken, false otherwise. */ - boolean failBookieAndCheck(int bookieIndexHeardFrom, BookieSocketAddress address); + boolean failBookieAndCheck(int bookieIndexHeardFrom, BookieId address); /** * Return the list of bookies that already failed. * * @return the list of bookies that already failed. */ - Map getFailedBookies(); + Map getFailedBookies(); /** * Invalidate a previous bookie response. @@ -236,4 +248,21 @@ interface QuorumCoverageSet { * @return true if it has entry otherwise false. */ boolean hasEntry(long entryId, int bookieIndex); + + /** + * Get the bitset representing the entries from entry 'startEntryId' to + * 'lastEntryId', that would be striped to the bookie with index - + * bookieIndex. Value of the bit with the 'bitIndex+n', indicate whether + * entry with entryid 'startEntryId+n' is striped to this bookie or not. + * + * @param bookieIndex + * index of the bookie in the ensemble starting with 0 + * @param startEntryId + * starting entryid + * @param lastEntryId + * last entryid + * @return the bitset representing the entries that would be striped to the + * bookie + */ + BitSet getEntriesStripedToTheBookie(int bookieIndex, long startEntryId, long lastEntryId); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/DynamicWeightedRandomSelectionImpl.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/DynamicWeightedRandomSelectionImpl.java new file mode 100644 index 00000000000..7da6b4c20d6 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/DynamicWeightedRandomSelectionImpl.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.client; + +import com.google.common.math.Quantiles; +import com.google.common.math.Quantiles.ScaleAndIndex; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import java.util.Random; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.function.Function; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * DynamicWeightedRandomSelectionImpl class implements both getNextRandom + * overloaded methods. Where getNextRandom() method considers all bookies it + * knows of as candidates, but getNextRandom(Collection selectedNodes) method + * considers only 'selectedNodes' as candidates. + */ +class DynamicWeightedRandomSelectionImpl implements WeightedRandomSelection { + static final Logger LOG = LoggerFactory.getLogger(DynamicWeightedRandomSelectionImpl.class); + + int maxProbabilityMultiplier; + final Map weightMap; + final ReadWriteLock rwLock = new ReentrantReadWriteLock(true); + Random rand; + + DynamicWeightedRandomSelectionImpl() { + this(-1); + } + + DynamicWeightedRandomSelectionImpl(int maxMultiplier) { + this.maxProbabilityMultiplier = maxMultiplier; + this.weightMap = new HashMap(); + rand = new Random(System.currentTimeMillis()); + } + + @Override + public void updateMap(Map updatedMap) { + rwLock.writeLock().lock(); + try { + weightMap.clear(); + weightMap.putAll(updatedMap); + } finally { + rwLock.writeLock().unlock(); + } + } + + @Override + public T getNextRandom() { + rwLock.readLock().lock(); + try { + return getNextRandom(weightMap.keySet()); + } finally { + rwLock.readLock().unlock(); + } + } + + @Override + public T getNextRandom(Collection selectedNodes) { + rwLock.readLock().lock(); + try { + /* + * calculate minWeight and actual total weight. + */ + long minWeight = Long.MAX_VALUE; + long actTotalWeight = 0; + for (T node : selectedNodes) { + long weight = 0; + if ((weightMap.containsKey(node))) { + weight = weightMap.get(node).getWeight(); + } + actTotalWeight += weight; + if (weight > 0 && minWeight > weight) { + minWeight = weight; + } + } + + long medianWeight; + /* + * if actTotalWeight is 0, then assign 1 to minWeight and + * medianWeight. + */ + if (actTotalWeight == 0) { + minWeight = 1L; + medianWeight = 1L; + } else { + /* + * calculate medianWeight. + */ + Function weightFunc = (node) -> { + long weight = 0; + if ((weightMap.containsKey(node))) { + weight = weightMap.get(node).getWeight(); + } + return weight; + }; + ArrayList weightList = selectedNodes.stream().map(weightFunc) + .collect(Collectors.toCollection(ArrayList::new)); + ScaleAndIndex median = Quantiles.median(); + medianWeight = (long) median.compute(weightList); + } + + /* + * initialize maxWeight value based on maxProbabilityMultiplier. + */ + long maxWeight = maxProbabilityMultiplier * medianWeight; + + /* + * apply weighted random selection to select an element randomly + * based on weight. + */ + long cumTotalWeight = 0; + T nextRandomNode = null; + for (T node : selectedNodes) { + long weight = 0; + if ((weightMap.containsKey(node))) { + weight = weightMap.get(node).getWeight(); + } + if (weight <= 0) { + weight = minWeight; + } else if (maxWeight > 0 && weight > maxWeight) { + weight = maxWeight; + } + long tmpRandLong = rand.nextLong(); + if (tmpRandLong == Long.MIN_VALUE) { + tmpRandLong++; + } + long randValue = Math.abs(tmpRandLong) % (cumTotalWeight + weight); + if (randValue >= cumTotalWeight) { + nextRandomNode = node; + } + cumTotalWeight += weight; + } + return nextRandomNode; + } finally { + rwLock.readLock().unlock(); + } + } + + @Override + public void setMaxProbabilityMultiplier(int max) { + this.maxProbabilityMultiplier = max; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/EnsemblePlacementPolicy.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/EnsemblePlacementPolicy.java index e185964d36b..58d2bc0fc42 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/EnsemblePlacementPolicy.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/EnsemblePlacementPolicy.java @@ -22,15 +22,19 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.concurrent.ThreadLocalRandom; import org.apache.bookkeeper.client.BKException.BKNotEnoughBookiesException; import org.apache.bookkeeper.client.BookieInfoReader.BookieInfo; import org.apache.bookkeeper.client.DistributionSchedule.WriteSet; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.common.annotation.InterfaceAudience; import org.apache.bookkeeper.common.annotation.InterfaceStability; +import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.feature.FeatureProvider; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.DNSToSwitchMapping; +import org.apache.bookkeeper.proto.BookieAddressResolver; import org.apache.bookkeeper.stats.StatsLogger; /** @@ -52,11 +56,12 @@ * *

The ensemble placement policy is constructed by jvm reflection during constructing bookkeeper client. * After the {@code EnsemblePlacementPolicy} is constructed, bookkeeper client will call - * {@link #initialize(ClientConfiguration, Optional, HashedWheelTimer, FeatureProvider, StatsLogger)} to initialize - * the placement policy. + * {@link #initialize(ClientConfiguration, Optional, HashedWheelTimer, FeatureProvider, StatsLogger, + * BookieAddressResolver)} to initialize the placement policy. * - *

The {@link #initialize(ClientConfiguration, Optional, HashedWheelTimer, FeatureProvider, StatsLogger)} - * method takes a few resources from bookkeeper for instantiating itself. These resources include: + *

The {@link #initialize(ClientConfiguration, Optional, HashedWheelTimer, FeatureProvider, StatsLogger, + * BookieAddressResolver)} method takes a few resources from bookkeeper for instantiating itself. + * These resources include: * *

    *
  • `ClientConfiguration` : The client configuration that used for constructing the bookkeeper client. @@ -76,7 +81,8 @@ *

    The ensemble placement policy is a single instance per bookkeeper client. The instance will * be {@link #uninitalize()} when closing the bookkeeper client. The implementation of a placement policy should be * responsible for releasing all the resources that allocated during - * {@link #initialize(ClientConfiguration, Optional, HashedWheelTimer, FeatureProvider, StatsLogger)}. + * {@link #initialize(ClientConfiguration, Optional, HashedWheelTimer, FeatureProvider, StatsLogger, + * BookieAddressResolver)}. * *

    How to choose bookies to place data

    * @@ -84,8 +90,7 @@ * bookie changes, the ensemble placement policy will be notified with new list of bookies via * {@link #onClusterChanged(Set, Set)}. The implementation of the ensemble placement policy will react on those * changes to build new network topology. Subsequent operations like {@link #newEnsemble(int, int, int, Map, Set)} or - * {@link #replaceBookie(int, int, int, java.util.Map, java.util.Set, - * org.apache.bookkeeper.net.BookieSocketAddress, java.util.Set)} + * {@link #replaceBookie(int, int, int, java.util.Map, java.util.List, BookieId, java.util.Set)} * hence can operate on the new * network topology. * @@ -212,7 +217,8 @@ EnsemblePlacementPolicy initialize(ClientConfiguration conf, Optional optionalDnsResolver, HashedWheelTimer hashedWheelTimer, FeatureProvider featureProvider, - StatsLogger statsLogger); + StatsLogger statsLogger, + BookieAddressResolver bookieAddressResolver); /** * Uninitialize the policy. @@ -225,8 +231,7 @@ EnsemblePlacementPolicy initialize(ClientConfiguration conf, * *

    The implementation should take actions when the cluster view is changed. So subsequent * {@link #newEnsemble(int, int, int, Map, Set)} and - * {@link #replaceBookie(int, int, int, java.util.Map, java.util.Set, - * org.apache.bookkeeper.net.BookieSocketAddress, java.util.Set) } + * {@link #replaceBookie(int, int, int, java.util.Map, java.util.List, BookieId, java.util.Set) } * can choose proper bookies. * * @param writableBookies @@ -235,8 +240,8 @@ EnsemblePlacementPolicy initialize(ClientConfiguration conf, * All the bookies in the cluster available for readonly. * @return the dead bookies during this cluster change. */ - Set onClusterChanged(Set writableBookies, - Set readOnlyBookies); + Set onClusterChanged(Set writableBookies, + Set readOnlyBookies); /** * Choose numBookies bookies for ensemble. If the count is more than the number of available @@ -248,6 +253,9 @@ Set onClusterChanged(Set writableBooki *

    {@code customMetadata} is the same user defined data that user provides * when {@link BookKeeper#createLedger(int, int, int, BookKeeper.DigestType, byte[], Map)}. * + *

    If 'enforceMinNumRacksPerWriteQuorum' config is enabled then the bookies belonging to default + * faultzone (rack) will be excluded while selecting bookies. + * * @param ensembleSize * Ensemble Size * @param writeQuorumSize @@ -258,19 +266,22 @@ Set onClusterChanged(Set writableBooki * provides in {@link BookKeeper#createLedger(int, int, int, BookKeeper.DigestType, byte[])} * @param excludeBookies Bookies that should not be considered as targets. * @throws BKNotEnoughBookiesException if not enough bookies available. - * @return the List<org.apache.bookkeeper.net.BookieSocketAddress> + * @return a placement result containing list of bookie addresses for the ensemble. */ - List newEnsemble(int ensembleSize, - int writeQuorumSize, - int ackQuorumSize, - Map customMetadata, - Set excludeBookies) - throws BKNotEnoughBookiesException; + PlacementResult> newEnsemble(int ensembleSize, + int writeQuorumSize, + int ackQuorumSize, + Map customMetadata, + Set excludeBookies) + throws BKNotEnoughBookiesException; /** * Choose a new bookie to replace bookieToReplace. If no bookie available in the cluster, * {@link BKNotEnoughBookiesException} is thrown. * + *

    If 'enforceMinNumRacksPerWriteQuorum' config is enabled then the bookies belonging to default + * faultzone (rack) will be excluded while selecting bookies. + * * @param ensembleSize * the value of ensembleSize * @param writeQuorumSize @@ -282,16 +293,16 @@ List newEnsemble(int ensembleSize, * @param bookieToReplace bookie to replace * @param excludeBookies bookies that should not be considered as candidate. * @throws BKNotEnoughBookiesException - * @return the org.apache.bookkeeper.net.BookieSocketAddress + * @return a placement result containing the new bookie address. */ - BookieSocketAddress replaceBookie(int ensembleSize, - int writeQuorumSize, - int ackQuorumSize, - Map customMetadata, - Set currentEnsemble, - BookieSocketAddress bookieToReplace, - Set excludeBookies) - throws BKNotEnoughBookiesException; + PlacementResult replaceBookie(int ensembleSize, + int writeQuorumSize, + int ackQuorumSize, + Map customMetadata, + List currentEnsemble, + BookieId bookieToReplace, + Set excludeBookies) + throws BKNotEnoughBookiesException; /** * Register a bookie as slow so that it is tried after available and read-only bookies. @@ -301,7 +312,7 @@ BookieSocketAddress replaceBookie(int ensembleSize, * @param entryId * Entry ID that caused a speculative timeout on the bookie. */ - void registerSlowBookie(BookieSocketAddress bookieSocketAddress, long entryId); + void registerSlowBookie(BookieId bookieSocketAddress, long entryId); /** * Reorder the read sequence of a given write quorum writeSet. @@ -318,7 +329,7 @@ BookieSocketAddress replaceBookie(int ensembleSize, * @since 4.5 */ DistributionSchedule.WriteSet reorderReadSequence( - List ensemble, + List ensemble, BookiesHealthInfo bookiesHealthInfo, DistributionSchedule.WriteSet writeSet); @@ -338,7 +349,7 @@ DistributionSchedule.WriteSet reorderReadSequence( * @since 4.5 */ DistributionSchedule.WriteSet reorderReadLACSequence( - List ensemble, + List ensemble, BookiesHealthInfo bookiesHealthInfo, DistributionSchedule.WriteSet writeSet); @@ -349,6 +360,162 @@ DistributionSchedule.WriteSet reorderReadLACSequence( * A map that has the bookie to BookieInfo * @since 4.5 */ - default void updateBookieInfo(Map bookieInfoMap) { + default void updateBookieInfo(Map bookieInfoMap) { + } + + /** + * Select one bookie to the "sticky" bookie where all reads for a particular + * ledger will be directed to. + * + *

    The default implementation will pick a bookie randomly from the ensemble. + * Other placement policies will be able to do better decisions based on + * additional information (eg: rack or region awareness). + * + * @param metadata + * the {@link LedgerMetadata} object + * @param currentStickyBookieIndex + * if we are changing the sticky bookie after a read failure, the + * current sticky bookie is passed in so that we will avoid + * choosing it again + * @return the index, within the ensemble of the bookie chosen as the sticky + * bookie + * + * @since 4.9 + */ + default int getStickyReadBookieIndex(LedgerMetadata metadata, Optional currentStickyBookieIndex) { + if (!currentStickyBookieIndex.isPresent()) { + // Pick one bookie randomly from the current ensemble as the initial + // "sticky bookie" + return ThreadLocalRandom.current().nextInt(metadata.getEnsembleSize()); + } else { + // When choosing a new sticky bookie index (eg: after the current + // one has read failures), by default we pick the next one in the + // ensemble, to avoid picking up the same one again. + return MathUtils.signSafeMod(currentStickyBookieIndex.get() + 1, metadata.getEnsembleSize()); + } + } + + /** + * returns AdherenceLevel if the Ensemble is strictly/softly/fails adhering + * to placement policy, like in the case of + * RackawareEnsemblePlacementPolicy, bookies in the writeset are from + * 'minNumRacksPerWriteQuorum' number of racks. And in the case of + * RegionawareEnsemblePlacementPolicy, check for + * minimumRegionsForDurability, reppRegionsToWrite, rack distribution within + * a region and other parameters of RegionAwareEnsemblePlacementPolicy. In + * ZoneAwareEnsemblePlacementPolicy if bookies in the writeset are from + * 'desiredNumOfZones' then it is considered as MEETS_STRICT if they are + * from 'minNumOfZones' then it is considered as MEETS_SOFT otherwise + * considered as FAIL. + * + * @param ensembleList + * list of BookieId of bookies in the ensemble + * @param writeQuorumSize + * writeQuorumSize of the ensemble + * @param ackQuorumSize + * ackQuorumSize of the ensemble + * @return + */ + default PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy(List ensembleList, + int writeQuorumSize, int ackQuorumSize) { + return PlacementPolicyAdherence.FAIL; + } + + /** + * Returns true if the bookies that have acknowledged a write adhere to the minimum fault domains as defined in the + * placement policy in use. Ex: In the case of RackawareEnsemblePlacementPolicy, bookies belong to at least + * 'minNumRacksPerWriteQuorum' number of racks. + * + * @param ackedBookies + * list of BookieId of bookies that have acknowledged a write. + * @param writeQuorumSize + * writeQuorumSize of the ensemble + * @param ackQuorumSize + * ackQuorumSize of the ensemble + * @return + */ + default boolean areAckedBookiesAdheringToPlacementPolicy(Set ackedBookies, + int writeQuorumSize, + int ackQuorumSize) { + return true; + } + + /** + * Returns placement result. If the currentEnsemble is not adhering placement policy, returns new ensemble that + * adheres placement policy. It should be implemented so as to minify the number of bookies replaced. + * + * @param ensembleSize + * ensemble size + * @param writeQuorumSize + * writeQuorumSize of the ensemble + * @param ackQuorumSize + * ackQuorumSize of the ensemble + * @param excludeBookies + * bookies that should not be considered as targets + * @param currentEnsemble + * current ensemble + * @return a placement result + */ + default PlacementResult> replaceToAdherePlacementPolicy( + int ensembleSize, + int writeQuorumSize, + int ackQuorumSize, + Set excludeBookies, + List currentEnsemble) { + throw new UnsupportedOperationException(); + } + + /** + * enum for PlacementPolicyAdherence. Currently we are supporting tri-value + * enum for PlacementPolicyAdherence. If placement policy is met strictly + * then it is MEETS_STRICT, if it doesn't adhere to placement policy then it + * is FAIL. But there are certain placement policies, like + * ZoneAwareEnsemblePlacementPolicy which has definition of soft adherence + * level to support zone down scenarios. + */ + enum PlacementPolicyAdherence { + FAIL(1), MEETS_SOFT(3), MEETS_STRICT(5); + private int numVal; + + private PlacementPolicyAdherence(int numVal) { + this.numVal = numVal; + } + + public int getNumVal() { + return numVal; + } + } + + /** + * Result of a placement calculation against a placement policy. + */ + final class PlacementResult { + private final T result; + private final PlacementPolicyAdherence policyAdherence; + + public static PlacementResult of(T result, PlacementPolicyAdherence policyAdherence) { + return new PlacementResult<>(result, policyAdherence); + } + + private PlacementResult(T result, PlacementPolicyAdherence policyAdherence) { + this.result = result; + this.policyAdherence = policyAdherence; + } + + public T getResult() { + return result; + } + + /** + * Use {@link #getAdheringToPolicy}. + */ + @Deprecated + public PlacementPolicyAdherence isAdheringToPolicy() { + return policyAdherence; + } + + public PlacementPolicyAdherence getAdheringToPolicy() { + return policyAdherence; + } } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/EnsembleUtils.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/EnsembleUtils.java new file mode 100644 index 00000000000..dd16f800c61 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/EnsembleUtils.java @@ -0,0 +1,97 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.client; + +import static com.google.common.base.Preconditions.checkArgument; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.net.BookieId; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class EnsembleUtils { + private static final Logger LOG = LoggerFactory.getLogger(EnsembleUtils.class); + + static List replaceBookiesInEnsemble(BookieWatcher bookieWatcher, + LedgerMetadata metadata, + List oldEnsemble, + Map failedBookies, + String logContext) + throws BKException.BKNotEnoughBookiesException { + List newEnsemble = new ArrayList<>(oldEnsemble); + + int ensembleSize = metadata.getEnsembleSize(); + int writeQ = metadata.getWriteQuorumSize(); + int ackQ = metadata.getAckQuorumSize(); + Map customMetadata = metadata.getCustomMetadata(); + + Set exclude = new HashSet<>(failedBookies.values()); + + int replaced = 0; + for (Map.Entry entry : failedBookies.entrySet()) { + int idx = entry.getKey(); + BookieId addr = entry.getValue(); + if (LOG.isDebugEnabled()) { + LOG.debug("{} replacing bookie: {} index: {}", logContext, addr, idx); + } + + if (!newEnsemble.get(idx).equals(addr)) { + if (LOG.isDebugEnabled()) { + LOG.debug("{} Not changing failed bookie {} at index {}, already changed to {}", + logContext, addr, idx, newEnsemble.get(idx)); + } + continue; + } + try { + BookieId newBookie = bookieWatcher.replaceBookie( + ensembleSize, writeQ, ackQ, customMetadata, newEnsemble, idx, exclude); + newEnsemble.set(idx, newBookie); + + replaced++; + } catch (BKException.BKNotEnoughBookiesException e) { + // if there is no bookie replaced, we throw not enough bookie exception + if (replaced <= 0) { + throw e; + } else { + break; + } + } + } + return newEnsemble; + } + + static Set diffEnsemble(List e1, + List e2) { + checkArgument(e1.size() == e2.size(), "Ensembles must be of same size"); + Set diff = new HashSet<>(); + for (int i = 0; i < e1.size(); i++) { + if (!e1.get(i).equals(e2.get(i))) { + diff.add(i); + } + } + return diff; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ExplicitLacFlushPolicy.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ExplicitLacFlushPolicy.java index c9e6def287e..e2c222220d2 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ExplicitLacFlushPolicy.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ExplicitLacFlushPolicy.java @@ -23,10 +23,8 @@ import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; - import org.apache.bookkeeper.client.SyncCallbackUtils.LastAddConfirmedCallback; import org.apache.bookkeeper.util.ByteBufList; -import org.apache.bookkeeper.util.SafeRunnable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -85,9 +83,9 @@ public void setPiggyBackedLac(long piggyBackedLac) { } private void scheduleExplictLacFlush() { - final SafeRunnable updateLacTask = new SafeRunnable() { + final Runnable updateLacTask = new Runnable() { @Override - public void safeRun() { + public void run() { // Made progress since previous explicitLAC through // Piggyback, so no need to send an explicit LAC update to // bookies. @@ -124,8 +122,7 @@ public String toString() { scheduledFuture = clientCtx.getScheduler().scheduleAtFixedRateOrdered(lh.getId(), updateLacTask, explicitLacIntervalInMs, explicitLacIntervalInMs, TimeUnit.MILLISECONDS); } catch (RejectedExecutionException re) { - LOG.error("Scheduling of ExplictLastAddConfirmedFlush for ledger: {} has failed because of {}", - lh.getId(), re); + LOG.error("Scheduling of ExplictLastAddConfirmedFlush for ledger: {} has failed.", lh.getId(), re); } } @@ -140,13 +137,10 @@ void asyncExplicitLacFlush(final long explicitLac) { if (LOG.isDebugEnabled()) { LOG.debug("Sending Explicit LAC: {}", explicitLac); } - clientCtx.getMainWorkerPool().submit(new SafeRunnable() { - @Override - public void safeRun() { - ByteBufList toSend = lh.macManager - .computeDigestAndPackageForSendingLac(lh.getLastAddConfirmed()); - op.initiate(toSend); - } + clientCtx.getMainWorkerPool().submit(() -> { + ByteBufList toSend = lh.macManager + .computeDigestAndPackageForSendingLac(lh.getLastAddConfirmed()); + op.initiate(toSend); }); } catch (RejectedExecutionException e) { cb.addLacComplete(BookKeeper.getReturnRc(clientCtx.getBookieClient(), diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ForceLedgerOp.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ForceLedgerOp.java index 2d785ab6a17..b96fa095ddb 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ForceLedgerOp.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ForceLedgerOp.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,20 +18,20 @@ package org.apache.bookkeeper.client; import static com.google.common.base.Preconditions.checkState; + import java.util.List; import java.util.concurrent.CompletableFuture; import org.apache.bookkeeper.common.concurrent.FutureUtils; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookieClient; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ForceLedgerCallback; -import org.apache.bookkeeper.util.SafeRunnable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * This represents a request to sync the ledger on every bookie. */ -class ForceLedgerOp extends SafeRunnable implements ForceLedgerCallback { +class ForceLedgerOp implements Runnable, ForceLedgerCallback { private static final Logger LOG = LoggerFactory.getLogger(ForceLedgerOp.class); final CompletableFuture cb; @@ -40,7 +40,7 @@ class ForceLedgerOp extends SafeRunnable implements ForceLedgerCallback { boolean completed = false; boolean errored = false; int lastSeenError = BKException.Code.WriteException; - final List currentEnsemble; + final List currentEnsemble; long currentNonDurableLastAddConfirmed = LedgerHandle.INVALID_ENTRY_ID; @@ -48,7 +48,7 @@ class ForceLedgerOp extends SafeRunnable implements ForceLedgerCallback { final BookieClient bookieClient; ForceLedgerOp(LedgerHandle lh, BookieClient bookieClient, - List ensemble, + List ensemble, CompletableFuture cb) { this.lh = lh; this.bookieClient = bookieClient; @@ -61,7 +61,7 @@ void sendForceLedgerRequest(int bookieIndex) { } @Override - public void safeRun() { + public void run() { initiate(); } @@ -89,7 +89,7 @@ void initiate() { } @Override - public void forceLedgerComplete(int rc, long ledgerId, BookieSocketAddress addr, Object ctx) { + public void forceLedgerComplete(int rc, long ledgerId, BookieId addr, Object ctx) { int bookieIndex = (Integer) ctx; checkState(!completed, "We are waiting for all the bookies, it is not expected an early exit"); @@ -107,7 +107,7 @@ public void forceLedgerComplete(int rc, long ledgerId, BookieSocketAddress addr, if (ackSet.completeBookieAndCheck(bookieIndex)) { completed = true; // we are able to say that every bookie sync'd its own journal - // for every ackknowledged entry before issuing the force() call + // for every acknowledged entry before issuing the force() call if (LOG.isDebugEnabled()) { LOG.debug("After force on ledger {} updating LastAddConfirmed to {} ", ledgerId, currentNonDurableLastAddConfirmed); @@ -118,7 +118,7 @@ public void forceLedgerComplete(int rc, long ledgerId, BookieSocketAddress addr, } else { // at least one bookie failed, as we are waiting for all the bookies // we can fail immediately - LOG.info("ForceLedger did not succeed: Ledger {} on {}", ledgerId, addr); + LOG.error("ForceLedger did not succeed: Ledger {} on {}", ledgerId, addr); errored = true; // notify the failure diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ITopologyAwareEnsemblePlacementPolicy.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ITopologyAwareEnsemblePlacementPolicy.java index 7c9e07cd10a..2f6f314e539 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ITopologyAwareEnsemblePlacementPolicy.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ITopologyAwareEnsemblePlacementPolicy.java @@ -19,14 +19,11 @@ import java.util.List; import java.util.Set; - import org.apache.bookkeeper.client.BKException.BKNotEnoughBookiesException; -import org.apache.bookkeeper.client.ITopologyAwareEnsemblePlacementPolicy.Ensemble; -import org.apache.bookkeeper.client.ITopologyAwareEnsemblePlacementPolicy.Predicate; -import org.apache.bookkeeper.client.TopologyAwareEnsemblePlacementPolicy.BookieNode; import org.apache.bookkeeper.common.annotation.InterfaceAudience; import org.apache.bookkeeper.common.annotation.InterfaceStability; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieNode; import org.apache.bookkeeper.net.Node; /** @@ -66,7 +63,7 @@ interface Ensemble { /** * @return list of addresses representing the ensemble */ - List toList(); + List toList(); /** * Validates if an ensemble is valid. @@ -93,11 +90,11 @@ interface Ensemble { * @return list of bookies forming the ensemble * @throws BKException.BKNotEnoughBookiesException */ - List newEnsemble( + PlacementResult> newEnsemble( int ensembleSize, int writeQuorumSize, int ackQuorumSize, - Set excludeBookies, + Set excludeBookies, Ensemble parentEnsemble, Predicate parentPredicate) throws BKException.BKNotEnoughBookiesException; @@ -176,7 +173,7 @@ T selectFromNetworkLocation(String networkLoc, * @param leftBookies * bookies that left */ - void handleBookiesThatLeft(Set leftBookies); + void handleBookiesThatLeft(Set leftBookies); /** * Handle bookies that joined. @@ -184,5 +181,12 @@ T selectFromNetworkLocation(String networkLoc, * @param joinedBookies * bookies that joined. */ - void handleBookiesThatJoined(Set joinedBookies); + void handleBookiesThatJoined(Set joinedBookies); + + /** + * Handle rack change for the bookies. + * + * @param bookieAddressList + */ + void onBookieRackChange(List bookieAddressList); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerChecker.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerChecker.java index cbd99767764..9364b8563c6 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerChecker.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerChecker.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -22,16 +22,19 @@ import io.netty.buffer.ByteBuf; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Semaphore; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import org.apache.bookkeeper.client.BKException.Code; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookieClient; import org.apache.bookkeeper.proto.BookieProtocol; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; @@ -39,7 +42,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; - /** * A utility class to check the complete ledger and finds the UnderReplicated fragments if any. * @@ -49,6 +51,9 @@ public class LedgerChecker { private static final Logger LOG = LoggerFactory.getLogger(LedgerChecker.class); public final BookieClient bookieClient; + public final BookieWatcher bookieWatcher; + + final Semaphore semaphore; static class InvalidFragmentException extends Exception { private static final long serialVersionUID = 1467201276417062353L; @@ -59,7 +64,7 @@ static class InvalidFragmentException extends Exception { * call back to previous call back API which is waiting for it once it meets * the expected call backs from down. */ - private static class ReadManyEntriesCallback implements ReadEntryCallback { + private class ReadManyEntriesCallback implements ReadEntryCallback { AtomicBoolean completed = new AtomicBoolean(false); final AtomicLong numEntries; final LedgerFragment fragment; @@ -72,14 +77,19 @@ private static class ReadManyEntriesCallback implements ReadEntryCallback { this.cb = cb; } + @Override public void readEntryComplete(int rc, long ledgerId, long entryId, ByteBuf buffer, Object ctx) { + releasePermit(); if (rc == BKException.Code.OK) { if (numEntries.decrementAndGet() == 0 && !completed.getAndSet(true)) { cb.operationComplete(rc, fragment); } } else if (!completed.getAndSet(true)) { + if (LOG.isDebugEnabled()) { + LOG.debug("Read {}:{} from {} failed, the error code: {}", ledgerId, entryId, ctx, rc); + } cb.operationComplete(rc, fragment); } } @@ -135,7 +145,44 @@ public void operationComplete(int rc, LedgerFragment lf) { } public LedgerChecker(BookKeeper bkc) { - bookieClient = bkc.getBookieClient(); + this(bkc.getBookieClient(), bkc.getBookieWatcher()); + } + + public LedgerChecker(BookieClient client, BookieWatcher watcher) { + this(client, watcher, -1); + } + + public LedgerChecker(BookKeeper bkc, int inFlightReadEntryNum) { + this(bkc.getBookieClient(), bkc.getBookieWatcher(), inFlightReadEntryNum); + } + + public LedgerChecker(BookieClient client, BookieWatcher watcher, int inFlightReadEntryNum) { + bookieClient = client; + bookieWatcher = watcher; + if (inFlightReadEntryNum > 0) { + semaphore = new Semaphore(inFlightReadEntryNum); + } else { + semaphore = null; + } + } + + /** + * Acquires a permit from permit manager, + * blocking until all are available. + */ + public void acquirePermit() throws InterruptedException { + if (null != semaphore) { + semaphore.acquire(1); + } + } + + /** + * Release a given permit. + */ + public void releasePermit() { + if (null != semaphore) { + semaphore.release(); + } } /** @@ -150,7 +197,7 @@ public LedgerChecker(BookKeeper bkc) { private void verifyLedgerFragment(LedgerFragment fragment, GenericCallback cb, Long percentageOfLedgerFragmentToBeVerified) - throws InvalidFragmentException, BKException { + throws InvalidFragmentException, BKException, InterruptedException { Set bookiesToCheck = fragment.getBookiesIndexes(); if (bookiesToCheck.isEmpty()) { cb.operationComplete(BKException.Code.OK, fragment); @@ -181,11 +228,11 @@ private void verifyLedgerFragment(LedgerFragment fragment, int bookieIndex, GenericCallback cb, long percentageOfLedgerFragmentToBeVerified) - throws InvalidFragmentException { + throws InvalidFragmentException, InterruptedException { long firstStored = fragment.getFirstStoredEntryId(bookieIndex); long lastStored = fragment.getLastStoredEntryId(bookieIndex); - BookieSocketAddress bookie = fragment.getAddress(bookieIndex); + BookieId bookie = fragment.getAddress(bookieIndex); if (null == bookie) { throw new InvalidFragmentException(); } @@ -195,12 +242,22 @@ private void verifyLedgerFragment(LedgerFragment fragment, if (lastStored != LedgerHandle.INVALID_ENTRY_ID) { throw new InvalidFragmentException(); } - cb.operationComplete(BKException.Code.OK, fragment); + + if (bookieWatcher.isBookieUnavailable(fragment.getAddress(bookieIndex))) { + // fragment is on this bookie, but already know it's unavailable, so skip the call + cb.operationComplete(BKException.Code.BookieHandleNotAvailableException, fragment); + } else { + cb.operationComplete(BKException.Code.OK, fragment); + } + } else if (bookieWatcher.isBookieUnavailable(fragment.getAddress(bookieIndex))) { + // fragment is on this bookie, but already know it's unavailable, so skip the call + cb.operationComplete(BKException.Code.BookieHandleNotAvailableException, fragment); } else if (firstStored == lastStored) { + acquirePermit(); ReadManyEntriesCallback manycb = new ReadManyEntriesCallback(1, fragment, cb); bookieClient.readEntry(bookie, fragment.getLedgerId(), firstStored, - manycb, null, BookieProtocol.FLAG_NONE); + manycb, bookie, BookieProtocol.FLAG_NONE); } else { if (lastStored <= firstStored) { cb.operationComplete(Code.IncorrectParameterException, null); @@ -241,7 +298,9 @@ private void verifyLedgerFragment(LedgerFragment fragment, ReadManyEntriesCallback manycb = new ReadManyEntriesCallback(entriesToBeVerified.size(), fragment, cb); for (Long entryID: entriesToBeVerified) { - bookieClient.readEntry(bookie, fragment.getLedgerId(), entryID, manycb, null, BookieProtocol.FLAG_NONE); + acquirePermit(); + bookieClient.readEntry(bookie, fragment.getLedgerId(), entryID, manycb, bookie, + BookieProtocol.FLAG_NONE); } } } @@ -251,7 +310,7 @@ private void verifyLedgerFragment(LedgerFragment fragment, * It is used to differentiate the cases where it has been written * but now cannot be read, and where it never has been written. */ - private static class EntryExistsCallback implements ReadEntryCallback { + private class EntryExistsCallback implements ReadEntryCallback { AtomicBoolean entryMayExist = new AtomicBoolean(false); final AtomicInteger numReads; final GenericCallback cb; @@ -262,9 +321,12 @@ private static class EntryExistsCallback implements ReadEntryCallback { this.cb = cb; } + @Override public void readEntryComplete(int rc, long ledgerId, long entryId, ByteBuf buffer, Object ctx) { - if (BKException.Code.NoSuchEntryException != rc && BKException.Code.NoSuchLedgerExistsException != rc) { + releasePermit(); + if (BKException.Code.NoSuchEntryException != rc && BKException.Code.NoSuchLedgerExistsException != rc + && BKException.Code.NoSuchLedgerExistsOnMetadataServerException != rc) { entryMayExist.set(true); } @@ -287,11 +349,12 @@ private static class FullLedgerCallback implements FullLedgerCallback(long numFragments, GenericCallback> cb) { - badFragments = new HashSet(); + badFragments = new LinkedHashSet<>(); this.numFragments = new AtomicLong(numFragments); this.cb = cb; } + @Override public void operationComplete(int rc, LedgerFragment result) { if (rc == BKException.Code.ClientClosedException) { cb.operationComplete(BKException.Code.ClientClosedException, badFragments); @@ -318,12 +381,12 @@ public void checkLedger(final LedgerHandle lh, final GenericCallback> cb, long percentageOfLedgerFragmentToBeVerified) { // build a set of all fragment replicas - final Set fragments = new HashSet(); + final Set fragments = new LinkedHashSet<>(); Long curEntryId = null; - List curEnsemble = null; - for (Map.Entry> e : lh - .getLedgerMetadata().getEnsembles().entrySet()) { + List curEnsemble = null; + for (Map.Entry> e : lh + .getLedgerMetadata().getAllEnsembles().entrySet()) { if (curEntryId != null) { Set bookieIndexes = new HashSet(); for (int i = 0; i < curEnsemble.size(); i++) { @@ -368,24 +431,33 @@ public void checkLedger(final LedgerHandle lh, if (curEntryId == lastEntry) { final long entryToRead = curEntryId; + final CompletableFuture future = new CompletableFuture<>(); + future.whenCompleteAsync((re, ex) -> { + checkFragments(fragments, cb, percentageOfLedgerFragmentToBeVerified); + }); + final EntryExistsCallback eecb = new EntryExistsCallback(lh.getLedgerMetadata().getWriteQuorumSize(), new GenericCallback() { + @Override public void operationComplete(int rc, Boolean result) { if (result) { fragments.add(lastLedgerFragment); } - checkFragments(fragments, cb, - percentageOfLedgerFragmentToBeVerified); + future.complete(null); } }); - DistributionSchedule.WriteSet writeSet = lh.getDistributionSchedule().getWriteSet(entryToRead); - for (int i = 0; i < writeSet.size(); i++) { - BookieSocketAddress addr = curEnsemble.get(writeSet.get(i)); - bookieClient.readEntry(addr, lh.getId(), entryToRead, - eecb, null, BookieProtocol.FLAG_NONE); + DistributionSchedule ds = lh.getDistributionSchedule(); + for (int i = 0; i < ds.getWriteQuorumSize(); i++) { + try { + acquirePermit(); + BookieId addr = curEnsemble.get(ds.getWriteSetBookieIndex(entryToRead, i)); + bookieClient.readEntry(addr, lh.getId(), entryToRead, + eecb, null, BookieProtocol.FLAG_NONE); + } catch (InterruptedException e) { + LOG.error("InterruptedException when checking entry : {}", entryToRead, e); + } } - writeSet.recycle(); return; } else { fragments.add(lastLedgerFragment); @@ -406,7 +478,9 @@ private void checkFragments(Set fragments, FullLedgerCallback allFragmentsCb = new FullLedgerCallback(fragments .size(), cb); for (LedgerFragment r : fragments) { - LOG.debug("Checking fragment {}", r); + if (LOG.isDebugEnabled()) { + LOG.debug("Checking fragment {}", r); + } try { verifyLedgerFragment(r, allFragmentsCb, percentageOfLedgerFragmentToBeVerified); } catch (InvalidFragmentException ife) { @@ -415,6 +489,8 @@ private void checkFragments(Set fragments, BKException.Code.IncorrectParameterException, r); } catch (BKException e) { LOG.error("BKException when checking fragment : {}", r, e); + } catch (InterruptedException e) { + LOG.error("InterruptedException when checking fragment : {}", r, e); } } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerCreateOp.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerCreateOp.java index e39b2a19b00..5eaeebf6837 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerCreateOp.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerCreateOp.java @@ -30,7 +30,6 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReentrantReadWriteLock; - import org.apache.bookkeeper.client.AsyncCallback.CreateCallback; import org.apache.bookkeeper.client.BKException.BKNotEnoughBookiesException; import org.apache.bookkeeper.client.BookKeeper.DigestType; @@ -38,14 +37,16 @@ import org.apache.bookkeeper.client.SyncCallbackUtils.SyncCreateCallback; import org.apache.bookkeeper.client.api.CreateAdvBuilder; import org.apache.bookkeeper.client.api.CreateBuilder; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.client.api.WriteAdvHandle; import org.apache.bookkeeper.client.api.WriteFlag; import org.apache.bookkeeper.client.api.WriteHandle; +import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.meta.LedgerIdGenerator; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.stats.OpStatsLogger; -import org.apache.bookkeeper.util.MathUtils; +import org.apache.bookkeeper.versioning.Versioned; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -53,15 +54,20 @@ * Encapsulates asynchronous ledger create operation. * */ -class LedgerCreateOp implements GenericCallback { +class LedgerCreateOp { static final Logger LOG = LoggerFactory.getLogger(LedgerCreateOp.class); final CreateCallback cb; - final LedgerMetadata metadata; + LedgerMetadata metadata; LedgerHandle lh; long ledgerId = -1L; final Object ctx; + final int ensembleSize; + final int writeQuorumSize; + final int ackQuorumSize; + final Map customMetadata; + final int metadataFormatVersion; final byte[] passwd; final BookKeeper bk; final DigestType digestType; @@ -101,15 +107,12 @@ class LedgerCreateOp implements GenericCallback { EnumSet writeFlags, BookKeeperClientStats clientStats) { this.bk = bk; - this.metadata = new LedgerMetadata( - ensembleSize, - writeQuorumSize, - ackQuorumSize, - digestType, - passwd, - customMetadata, - bk.getConf().getStoreSystemtimeAsLedgerCreationTime()); + this.metadataFormatVersion = bk.getConf().getLedgerMetadataFormatVersion(); + this.ensembleSize = ensembleSize; + this.writeQuorumSize = writeQuorumSize; + this.ackQuorumSize = ackQuorumSize; this.digestType = digestType; + this.customMetadata = customMetadata; this.writeFlags = writeFlags; this.passwd = passwd; this.cb = cb; @@ -123,38 +126,71 @@ class LedgerCreateOp implements GenericCallback { * Initiates the operation. */ public void initiate() { - // allocate ensemble first - - /* - * Adding bookies to ledger handle - */ - - List ensemble; - try { - ensemble = bk.getBookieWatcher() - .newEnsemble(metadata.getEnsembleSize(), - metadata.getWriteQuorumSize(), - metadata.getAckQuorumSize(), - metadata.getCustomMetadata()); - } catch (BKNotEnoughBookiesException e) { - LOG.error("Not enough bookies to create ledger"); - createComplete(e.getCode(), null); - return; + int actualEnsembleSize = ensembleSize; + List ensemble = null; + // select bookies for first ensemble + if (bk.getConf().getOpportunisticStriping()) { + BKNotEnoughBookiesException lastError = null; + // we would like to select ensembleSize bookies, but + // we can settle to writeQuorumSize + while (actualEnsembleSize >= writeQuorumSize) { + try { + ensemble = bk.getBookieWatcher() + .newEnsemble(actualEnsembleSize, writeQuorumSize, ackQuorumSize, customMetadata); + lastError = null; + break; + } catch (BKNotEnoughBookiesException e) { + if (actualEnsembleSize >= writeQuorumSize + 1) { + LOG.info("Not enough bookies to create ledger with ensembleSize={}," + + " writeQuorumSize={} and ackQuorumSize={}, opportusticStriping enabled, try again", + actualEnsembleSize, writeQuorumSize, ackQuorumSize); + } + lastError = e; + actualEnsembleSize--; + } + } + if (lastError != null) { + LOG.error("Not enough bookies to create ledger with ensembleSize={}," + + " writeQuorumSize={} and ackQuorumSize={}", + actualEnsembleSize, writeQuorumSize, ackQuorumSize); + createComplete(lastError.getCode(), null); + return; + } + } else { + try { + ensemble = bk.getBookieWatcher() + .newEnsemble(actualEnsembleSize, writeQuorumSize, ackQuorumSize, customMetadata); + } catch (BKNotEnoughBookiesException e) { + LOG.error("Not enough bookies to create ledger with ensembleSize={}," + + " writeQuorumSize={} and ackQuorumSize={}", + actualEnsembleSize, writeQuorumSize, ackQuorumSize); + createComplete(e.getCode(), null); + return; + } + } + LedgerMetadataBuilder metadataBuilder = LedgerMetadataBuilder.create() + .withEnsembleSize(actualEnsembleSize).withWriteQuorumSize(writeQuorumSize).withAckQuorumSize(ackQuorumSize) + .withDigestType(digestType.toApiDigestType()).withPassword(passwd); + metadataBuilder.newEnsembleEntry(0L, ensemble); + if (customMetadata != null) { + metadataBuilder.withCustomMetadata(customMetadata); + } + metadataBuilder.withMetadataFormatVersion(metadataFormatVersion); + if (bk.getConf().getStoreSystemtimeAsLedgerCreationTime()) { + metadataBuilder.withCreationTime(System.currentTimeMillis()).storingCreationTime(true); } - /* - * Add ensemble to the configuration - */ - metadata.addEnsemble(0L, ensemble); if (this.generateLedgerId) { - generateLedgerIdAndCreateLedger(); + generateLedgerIdAndCreateLedger(metadataBuilder); } else { + this.metadata = metadataBuilder.withId(ledgerId).build(); // Create ledger with supplied ledgerId - bk.getLedgerManager().createLedgerMetadata(ledgerId, metadata, LedgerCreateOp.this); + bk.getLedgerManager().createLedgerMetadata(ledgerId, metadata) + .whenComplete((written, exception) -> metadataCallback(written, exception, metadataBuilder)); } } - void generateLedgerIdAndCreateLedger() { + void generateLedgerIdAndCreateLedger(LedgerMetadataBuilder metadataBuilder) { // generate a ledgerId final LedgerIdGenerator ledgerIdGenerator = bk.getLedgerIdGenerator(); ledgerIdGenerator.generateLedgerId(new GenericCallback() { @@ -165,8 +201,10 @@ public void operationComplete(int rc, Long ledgerId) { return; } LedgerCreateOp.this.ledgerId = ledgerId; + LedgerCreateOp.this.metadata = metadataBuilder.withId(ledgerId).build(); // create a ledger with metadata - bk.getLedgerManager().createLedgerMetadata(ledgerId, metadata, LedgerCreateOp.this); + bk.getLedgerManager().createLedgerMetadata(ledgerId, metadata) + .whenComplete((written, exception) -> metadataCallback(written, exception, metadataBuilder)); } }); } @@ -184,44 +222,46 @@ public void initiateAdv(final long ledgerId) { } /** - * Callback when created ledger. + * Callback when metadata store has responded. */ - @Override - public void operationComplete(int rc, LedgerMetadata writtenMetadata) { - if (this.generateLedgerId && (BKException.Code.LedgerExistException == rc)) { - // retry to generate a new ledger id - generateLedgerIdAndCreateLedger(); - return; - } else if (BKException.Code.OK != rc) { - createComplete(rc, null); - return; - } - - try { - if (adv) { - lh = new LedgerHandleAdv(bk.getClientCtx(), ledgerId, metadata, digestType, passwd, writeFlags); + private void metadataCallback(Versioned writtenMetadata, + Throwable exception, LedgerMetadataBuilder metadataBuilder) { + if (exception != null) { + if (this.generateLedgerId + && (BKException.getExceptionCode(exception) == BKException.Code.LedgerExistException)) { + // retry to generate a new ledger id + generateLedgerIdAndCreateLedger(metadataBuilder); } else { - lh = new LedgerHandle(bk.getClientCtx(), ledgerId, metadata, digestType, passwd, writeFlags); + createComplete(BKException.getExceptionCode(exception), null); + } + } else { + try { + if (adv) { + lh = new LedgerHandleAdv(bk.getClientCtx(), ledgerId, writtenMetadata, + digestType, passwd, writeFlags); + } else { + lh = new LedgerHandle(bk.getClientCtx(), ledgerId, writtenMetadata, digestType, passwd, writeFlags); + } + } catch (GeneralSecurityException e) { + LOG.error("Security exception while creating ledger: " + ledgerId, e); + createComplete(BKException.Code.DigestNotInitializedException, null); + return; + } catch (NumberFormatException e) { + LOG.error("Incorrectly entered parameter throttle: " + bk.getConf().getThrottleValue(), e); + createComplete(BKException.Code.IncorrectParameterException, null); + return; } - } catch (GeneralSecurityException e) { - LOG.error("Security exception while creating ledger: " + ledgerId, e); - createComplete(BKException.Code.DigestNotInitializedException, null); - return; - } catch (NumberFormatException e) { - LOG.error("Incorrectly entered parameter throttle: " + bk.getConf().getThrottleValue(), e); - createComplete(BKException.Code.IncorrectParameterException, null); - return; - } - List curEns = lh.getLedgerMetadata().getEnsemble(0L); - LOG.info("Ensemble: {} for ledger: {}", curEns, lh.getId()); + List curEns = lh.getLedgerMetadata().getEnsembleAt(0L); + LOG.info("Ensemble: {} for ledger: {}", curEns, lh.getId()); - for (BookieSocketAddress bsa : curEns) { - clientStats.getEnsembleBookieDistributionCounter(bsa.toString()).inc(); - } + for (BookieId bsa : curEns) { + clientStats.getEnsembleBookieDistributionCounter(bsa.toString()).inc(); + } - // return the ledger handle back - createComplete(BKException.Code.OK, lh); + // return the ledger handle back + createComplete(BKException.Code.OK, lh); + } } private void createComplete(int rc, LedgerHandle lh) { @@ -231,7 +271,11 @@ private void createComplete(int rc, LedgerHandle lh) { } else { createOpLogger.registerSuccessfulEvent(MathUtils.elapsedNanos(startTime), TimeUnit.NANOSECONDS); } - cb.createComplete(rc, lh, ctx); + if (lh != null) { // lh is null in case of errors + lh.executeOrdered(() -> cb.createComplete(rc, lh, ctx)); + } else { + cb.createComplete(rc, null, ctx); + } } public static class CreateBuilderImpl implements CreateBuilder { diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerDeleteOp.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerDeleteOp.java index f2461d50898..a19bd761de0 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerDeleteOp.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerDeleteOp.java @@ -24,13 +24,11 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReentrantReadWriteLock; - import org.apache.bookkeeper.client.AsyncCallback.DeleteCallback; import org.apache.bookkeeper.client.SyncCallbackUtils.SyncDeleteCallback; import org.apache.bookkeeper.client.api.DeleteBuilder; +import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.stats.OpStatsLogger; -import org.apache.bookkeeper.util.MathUtils; -import org.apache.bookkeeper.util.OrderedGenericCallback; import org.apache.bookkeeper.versioning.Version; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -39,7 +37,7 @@ * Encapsulates asynchronous ledger delete operation. * */ -class LedgerDeleteOp extends OrderedGenericCallback { +class LedgerDeleteOp { static final Logger LOG = LoggerFactory.getLogger(LedgerDeleteOp.class); @@ -64,7 +62,6 @@ class LedgerDeleteOp extends OrderedGenericCallback { */ LedgerDeleteOp(BookKeeper bk, BookKeeperClientStats clientStats, long ledgerId, DeleteCallback cb, Object ctx) { - super(bk.getMainWorkerPool(), ledgerId); this.bk = bk; this.ledgerId = ledgerId; this.cb = cb; @@ -79,20 +76,15 @@ class LedgerDeleteOp extends OrderedGenericCallback { public void initiate() { // Asynchronously delete the ledger from meta manager // When this completes, it will invoke the callback method below. - bk.getLedgerManager().removeLedgerMetadata(ledgerId, Version.ANY, this); - } - - /** - * Implements Delete Callback. - */ - @Override - public void safeOperationComplete(int rc, Void result) { - if (BKException.Code.OK != rc) { - deleteOpLogger.registerFailedEvent(MathUtils.elapsedNanos(startTime), TimeUnit.NANOSECONDS); - } else { - deleteOpLogger.registerSuccessfulEvent(MathUtils.elapsedNanos(startTime), TimeUnit.NANOSECONDS); - } - cb.deleteComplete(rc, this.ctx); + bk.getLedgerManager().removeLedgerMetadata(ledgerId, Version.ANY) + .whenCompleteAsync((ignore, exception) -> { + if (exception != null) { + deleteOpLogger.registerFailedEvent(MathUtils.elapsedNanos(startTime), TimeUnit.NANOSECONDS); + } else { + deleteOpLogger.registerSuccessfulEvent(MathUtils.elapsedNanos(startTime), TimeUnit.NANOSECONDS); + } + cb.deleteComplete(BKException.getExceptionCode(exception), this.ctx); + }, bk.getMainWorkerPool().chooseThread(ledgerId)); } @Override diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerEntry.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerEntry.java index 37ae70d28db..05bbd09f30f 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerEntry.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerEntry.java @@ -24,9 +24,7 @@ import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBufInputStream; - import java.io.InputStream; - import org.apache.bookkeeper.client.impl.LedgerEntryImpl; import org.apache.bookkeeper.conf.ClientConfiguration; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerFragment.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerFragment.java index 49b8de26690..ad84aaaa474 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerFragment.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerFragment.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -23,7 +23,7 @@ import java.util.List; import java.util.Set; import java.util.SortedMap; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; /** * Represents the entries of a segment of a ledger which are stored on subset of @@ -33,14 +33,15 @@ */ public class LedgerFragment { private final Set bookieIndexes; - private final List ensemble; + private final List ensemble; private final long firstEntryId; private final long lastKnownEntryId; private final long ledgerId; private final DistributionSchedule schedule; private final boolean isLedgerClosed; + private ReplicateType replicateType = ReplicateType.DATA_LOSS; - LedgerFragment(LedgerHandle lh, + public LedgerFragment(LedgerHandle lh, long firstEntryId, long lastKnownEntryId, Set bookieIndexes) { @@ -48,15 +49,20 @@ public class LedgerFragment { this.firstEntryId = firstEntryId; this.lastKnownEntryId = lastKnownEntryId; this.bookieIndexes = bookieIndexes; - this.ensemble = lh.getLedgerMetadata().getEnsemble(firstEntryId); + this.ensemble = lh.getLedgerMetadata().getEnsembleAt(firstEntryId); this.schedule = lh.getDistributionSchedule(); - SortedMap> ensembles = lh - .getLedgerMetadata().getEnsembles(); + SortedMap> ensembles = lh + .getLedgerMetadata().getAllEnsembles(); + // Check if the ledger fragment is closed has two conditions + // 1. The ledger is closed + // 2. This fragment is not the last fragment and this ledger's lastAddConfirm >= ensembles.lastKey() - 1. + // This case happens when the ledger's last ensemble is empty this.isLedgerClosed = lh.getLedgerMetadata().isClosed() - || !ensemble.equals(ensembles.get(ensembles.lastKey())); + || (!ensemble.equals(ensembles.get(ensembles.lastKey())) + && lh.getLastAddConfirmed() >= ensembles.lastKey() - 1); } - LedgerFragment(LedgerFragment lf, Set subset) { + public LedgerFragment(LedgerFragment lf, Set subset) { this.ledgerId = lf.ledgerId; this.firstEntryId = lf.firstEntryId; this.lastKnownEntryId = lf.lastKnownEntryId; @@ -91,27 +97,27 @@ public boolean isClosed() { return isLedgerClosed; } - long getLedgerId() { + public long getLedgerId() { return ledgerId; } - long getFirstEntryId() { + public long getFirstEntryId() { return firstEntryId; } - long getLastKnownEntryId() { + public long getLastKnownEntryId() { return lastKnownEntryId; } /** * Gets the failedBookie address. */ - public BookieSocketAddress getAddress(int bookieIndex) { + public BookieId getAddress(int bookieIndex) { return ensemble.get(bookieIndex); } - public Set getAddresses() { - Set addresses = new HashSet(); + public Set getAddresses() { + Set addresses = new HashSet(); for (int bookieIndex : bookieIndexes) { addresses.add(ensemble.get(bookieIndex)); } @@ -131,16 +137,18 @@ public Set getBookiesIndexes() { * @return entryId */ public long getFirstStoredEntryId() { - Long firstEntry = null; + long firstEntry = LedgerHandle.INVALID_ENTRY_ID; for (int bookieIndex : bookieIndexes) { Long firstStoredEntryForBookie = getFirstStoredEntryId(bookieIndex); - if (null == firstEntry) { - firstEntry = firstStoredEntryForBookie; - } else if (null != firstStoredEntryForBookie) { - firstEntry = Math.min(firstEntry, firstStoredEntryForBookie); + if (firstStoredEntryForBookie != LedgerHandle.INVALID_ENTRY_ID) { + if (firstEntry == LedgerHandle.INVALID_ENTRY_ID) { + firstEntry = firstStoredEntryForBookie; + } else { + firstEntry = Math.min(firstEntry, firstStoredEntryForBookie); + } } } - return null == firstEntry ? LedgerHandle.INVALID_ENTRY_ID : firstEntry; + return firstEntry; } /** @@ -169,16 +177,18 @@ public Long getFirstStoredEntryId(int bookieIndex) { * @return entryId */ public long getLastStoredEntryId() { - Long lastEntry = null; + long lastEntry = LedgerHandle.INVALID_ENTRY_ID; for (int bookieIndex : bookieIndexes) { Long lastStoredEntryIdForBookie = getLastStoredEntryId(bookieIndex); - if (null == lastEntry) { - lastEntry = lastStoredEntryIdForBookie; - } else if (null != lastStoredEntryIdForBookie) { - lastEntry = Math.max(lastEntry, lastStoredEntryIdForBookie); + if (lastStoredEntryIdForBookie != LedgerHandle.INVALID_ENTRY_ID) { + if (lastEntry == LedgerHandle.INVALID_ENTRY_ID) { + lastEntry = lastStoredEntryIdForBookie; + } else { + lastEntry = Math.max(lastEntry, lastStoredEntryIdForBookie); + } } } - return null == lastEntry ? LedgerHandle.INVALID_ENTRY_ID : lastEntry; + return lastEntry; } /** @@ -209,15 +219,31 @@ public boolean isStoredEntryId(long entryId, int bookieIndex) { * * @return the ensemble for the segment which this fragment is a part of */ - public List getEnsemble() { + public List getEnsemble() { return this.ensemble; } + public ReplicateType getReplicateType() { + return replicateType; + } + + public void setReplicateType(ReplicateType replicateType) { + this.replicateType = replicateType; + } + @Override public String toString() { return String.format("Fragment(LedgerID: %d, FirstEntryID: %d[%d], " - + "LastKnownEntryID: %d[%d], Host: %s, Closed: %s)", ledgerId, firstEntryId, + + "LastKnownEntryID: %d[%d], Host: %s, Closed: %s, Type: %s)", ledgerId, firstEntryId, getFirstStoredEntryId(), lastKnownEntryId, getLastStoredEntryId(), - getAddresses(), isLedgerClosed); + getAddresses(), isLedgerClosed, replicateType); + } + + /** + * ReplicateType. + */ + public enum ReplicateType { + DATA_LOSS, + DATA_NOT_ADHERING_PLACEMENT } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerFragmentReplicator.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerFragmentReplicator.java index eb11b30c5ac..f6d54e1d02b 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerFragmentReplicator.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerFragmentReplicator.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -20,10 +20,18 @@ package org.apache.bookkeeper.client; import static org.apache.bookkeeper.client.LedgerHandle.INVALID_ENTRY_ID; +import static org.apache.bookkeeper.replication.ReplicationStats.NUM_BYTES_READ; +import static org.apache.bookkeeper.replication.ReplicationStats.NUM_BYTES_WRITTEN; +import static org.apache.bookkeeper.replication.ReplicationStats.NUM_ENTRIES_READ; +import static org.apache.bookkeeper.replication.ReplicationStats.NUM_ENTRIES_WRITTEN; +import static org.apache.bookkeeper.replication.ReplicationStats.READ_DATA_LATENCY; +import static org.apache.bookkeeper.replication.ReplicationStats.REPLICATION_WORKER_SCOPE; +import static org.apache.bookkeeper.replication.ReplicationStats.WRITE_DATA_LATENCY; +import com.google.common.util.concurrent.RateLimiter; +import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; - -import java.util.ArrayList; +import io.netty.util.ReferenceCounted; import java.util.Enumeration; import java.util.HashSet; import java.util.Iterator; @@ -31,24 +39,26 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; - +import java.util.function.BiConsumer; +import java.util.stream.Collectors; import org.apache.bookkeeper.client.AsyncCallback.ReadCallback; import org.apache.bookkeeper.client.api.WriteFlag; -import org.apache.bookkeeper.common.util.OrderedExecutor; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.common.util.MathUtils; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookieProtocol; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.MultiCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback; -import org.apache.bookkeeper.replication.ReplicationStats; import org.apache.bookkeeper.stats.Counter; import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.stats.OpStatsLogger; import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.annotations.StatsDoc; import org.apache.bookkeeper.util.ByteBufList; -import org.apache.bookkeeper.util.OrderedGenericCallback; import org.apache.zookeeper.AsyncCallback; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -57,27 +67,72 @@ * This is the helper class for replicating the fragments from one bookie to * another. */ +@StatsDoc( + name = REPLICATION_WORKER_SCOPE, + help = "Ledger fragment replicator related stats" +) public class LedgerFragmentReplicator { // BookKeeper instance private BookKeeper bkc; private StatsLogger statsLogger; + @StatsDoc( + name = NUM_ENTRIES_READ, + help = "Number of entries read by the replicator" + ) private final Counter numEntriesRead; + @StatsDoc( + name = NUM_BYTES_READ, + help = "The distribution of size of entries read by the replicator" + ) private final OpStatsLogger numBytesRead; + @StatsDoc( + name = NUM_ENTRIES_WRITTEN, + help = "Number of entries written by the replicator" + ) private final Counter numEntriesWritten; + @StatsDoc( + name = NUM_BYTES_WRITTEN, + help = "The distribution of size of entries written by the replicator" + ) private final OpStatsLogger numBytesWritten; + @StatsDoc( + name = READ_DATA_LATENCY, + help = "The distribution of latency of read entries by the replicator" + ) + private final OpStatsLogger readDataLatency; + @StatsDoc( + name = WRITE_DATA_LATENCY, + help = "The distribution of latency of write entries by the replicator" + ) + private final OpStatsLogger writeDataLatency; + + protected Throttler replicationThrottle = null; - public LedgerFragmentReplicator(BookKeeper bkc, StatsLogger statsLogger) { + private AtomicInteger averageEntrySize; + + private static final int INITIAL_AVERAGE_ENTRY_SIZE = 1024; + private static final double AVERAGE_ENTRY_SIZE_RATIO = 0.8; + private ClientConfiguration conf; + + public LedgerFragmentReplicator(BookKeeper bkc, StatsLogger statsLogger, ClientConfiguration conf) { this.bkc = bkc; this.statsLogger = statsLogger; - numEntriesRead = this.statsLogger.getCounter(ReplicationStats.NUM_ENTRIES_READ); - numBytesRead = this.statsLogger.getOpStatsLogger(ReplicationStats.NUM_BYTES_READ); - numEntriesWritten = this.statsLogger.getCounter(ReplicationStats.NUM_ENTRIES_WRITTEN); - numBytesWritten = this.statsLogger.getOpStatsLogger(ReplicationStats.NUM_BYTES_WRITTEN); + numEntriesRead = this.statsLogger.getCounter(NUM_ENTRIES_READ); + numBytesRead = this.statsLogger.getOpStatsLogger(NUM_BYTES_READ); + numEntriesWritten = this.statsLogger.getCounter(NUM_ENTRIES_WRITTEN); + numBytesWritten = this.statsLogger.getOpStatsLogger(NUM_BYTES_WRITTEN); + readDataLatency = this.statsLogger.getOpStatsLogger(READ_DATA_LATENCY); + writeDataLatency = this.statsLogger.getOpStatsLogger(WRITE_DATA_LATENCY); + if (conf.getReplicationRateByBytes() > 0) { + this.replicationThrottle = new Throttler(conf.getReplicationRateByBytes()); + } + averageEntrySize = new AtomicInteger(INITIAL_AVERAGE_ENTRY_SIZE); + this.conf = conf; } - public LedgerFragmentReplicator(BookKeeper bkc) { - this(bkc, NullStatsLogger.INSTANCE); + public LedgerFragmentReplicator(BookKeeper bkc, ClientConfiguration conf) { + this(bkc, NullStatsLogger.INSTANCE, conf); } private static final Logger LOG = LoggerFactory @@ -86,7 +141,8 @@ public LedgerFragmentReplicator(BookKeeper bkc) { private void replicateFragmentInternal(final LedgerHandle lh, final LedgerFragment lf, final AsyncCallback.VoidCallback ledgerFragmentMcb, - final Set newBookies) throws InterruptedException { + final Set newBookies, + final BiConsumer onReadEntryFailureCallback) throws InterruptedException { if (!lf.isClosed()) { LOG.error("Trying to replicate an unclosed fragment;" + " This is not safe {}", lf); @@ -96,17 +152,17 @@ private void replicateFragmentInternal(final LedgerHandle lh, } Long startEntryId = lf.getFirstStoredEntryId(); Long endEntryId = lf.getLastStoredEntryId(); - if (endEntryId == null) { - /* - * Ideally this should never happen if bookie failure is taken care - * of properly. Nothing we can do though in this case. - */ - LOG.warn("Dead bookie (" + lf.getAddresses() - + ") is still part of the current" - + " active ensemble for ledgerId: " + lh.getId()); - ledgerFragmentMcb.processResult(BKException.Code.OK, null, null); - return; + + /* + * if startEntryId is INVALID_ENTRY_ID then endEntryId should be + * INVALID_ENTRY_ID and viceversa. + */ + if (startEntryId == INVALID_ENTRY_ID ^ endEntryId == INVALID_ENTRY_ID) { + LOG.error("For LedgerFragment: {}, seeing inconsistent firstStoredEntryId: {} and lastStoredEntryId: {}", + lf, startEntryId, endEntryId); + assert false; } + if (startEntryId > endEntryId || endEntryId <= INVALID_ENTRY_ID) { // for open ledger which there is no entry, the start entry id is 0, // the end entry id is -1. @@ -115,26 +171,41 @@ private void replicateFragmentInternal(final LedgerHandle lh, return; } - /* - * Add all the entries to entriesToReplicate list from - * firstStoredEntryId to lastStoredEntryID. - */ - List entriesToReplicate = new LinkedList(); - long lastStoredEntryId = lf.getLastStoredEntryId(); - for (long i = lf.getFirstStoredEntryId(); i <= lastStoredEntryId; i++) { - entriesToReplicate.add(i); - } /* * Now asynchronously replicate all of the entries for the ledger * fragment that were on the dead bookie. */ + int entriesToReplicateCnt = (int) (endEntryId - startEntryId + 1); MultiCallback ledgerFragmentEntryMcb = new MultiCallback( - entriesToReplicate.size(), ledgerFragmentMcb, null, BKException.Code.OK, + entriesToReplicateCnt, ledgerFragmentMcb, null, BKException.Code.OK, BKException.Code.LedgerRecoveryException); - for (final Long entryId : entriesToReplicate) { - recoverLedgerFragmentEntry(entryId, lh, ledgerFragmentEntryMcb, - newBookies); + if (this.replicationThrottle != null) { + this.replicationThrottle.resetRate(this.conf.getReplicationRateByBytes()); + } + + if (conf.isRecoveryBatchReadEnabled() + && conf.getUseV2WireProtocol() + && conf.isBatchReadEnabled() + && lh.getLedgerMetadata().getEnsembleSize() == lh.getLedgerMetadata().getWriteQuorumSize()) { + batchRecoverLedgerFragmentEntry(startEntryId, endEntryId, lh, ledgerFragmentEntryMcb, + newBookies, onReadEntryFailureCallback); + + } else { + /* + * Add all the entries to entriesToReplicate list from + * firstStoredEntryId to lastStoredEntryID. + */ + List entriesToReplicate = new LinkedList(); + long lastStoredEntryId = lf.getLastStoredEntryId(); + for (long i = lf.getFirstStoredEntryId(); i <= lastStoredEntryId; i++) { + entriesToReplicate.add(i); + } + for (final Long entryId : entriesToReplicate) { + recoverLedgerFragmentEntry(entryId, lh, ledgerFragmentEntryMcb, + newBookies, onReadEntryFailureCallback); + } } + } /** @@ -159,14 +230,15 @@ private void replicateFragmentInternal(final LedgerHandle lh, */ void replicate(final LedgerHandle lh, final LedgerFragment lf, final AsyncCallback.VoidCallback ledgerFragmentMcb, - final Set targetBookieAddresses) + final Set targetBookieAddresses, + final BiConsumer onReadEntryFailureCallback) throws InterruptedException { - Set partionedFragments = splitIntoSubFragments(lh, lf, + Set partitionedFragments = splitIntoSubFragments(lh, lf, bkc.getConf().getRereplicationEntryBatchSize()); LOG.info("Replicating fragment {} in {} sub fragments.", - lf, partionedFragments.size()); - replicateNextBatch(lh, partionedFragments.iterator(), - ledgerFragmentMcb, targetBookieAddresses); + lf, partitionedFragments.size()); + replicateNextBatch(lh, partitionedFragments.iterator(), + ledgerFragmentMcb, targetBookieAddresses, onReadEntryFailureCallback); } /** @@ -175,7 +247,8 @@ void replicate(final LedgerHandle lh, final LedgerFragment lf, private void replicateNextBatch(final LedgerHandle lh, final Iterator fragments, final AsyncCallback.VoidCallback ledgerFragmentMcb, - final Set targetBookieAddresses) { + final Set targetBookieAddresses, + final BiConsumer onReadEntryFailureCallback) { if (fragments.hasNext()) { try { replicateFragmentInternal(lh, fragments.next(), @@ -188,11 +261,12 @@ public void processResult(int rc, String v, Object ctx) { } else { replicateNextBatch(lh, fragments, ledgerFragmentMcb, - targetBookieAddresses); + targetBookieAddresses, + onReadEntryFailureCallback); } } - }, targetBookieAddresses); + }, targetBookieAddresses, onReadEntryFailureCallback); } catch (InterruptedException e) { ledgerFragmentMcb.processResult( BKException.Code.InterruptedException, null, null); @@ -220,7 +294,18 @@ static Set splitIntoSubFragments(LedgerHandle lh, long firstEntryId = ledgerFragment.getFirstStoredEntryId(); long lastEntryId = ledgerFragment.getLastStoredEntryId(); - long numberOfEntriesToReplicate = (lastEntryId - firstEntryId) + 1; + + /* + * if firstEntryId is INVALID_ENTRY_ID then lastEntryId should be + * INVALID_ENTRY_ID and viceversa. + */ + if (firstEntryId == INVALID_ENTRY_ID ^ lastEntryId == INVALID_ENTRY_ID) { + LOG.error("For LedgerFragment: {}, seeing inconsistent firstStoredEntryId: {} and lastStoredEntryId: {}", + ledgerFragment, firstEntryId, lastEntryId); + assert false; + } + + long numberOfEntriesToReplicate = firstEntryId == INVALID_ENTRY_ID ? 0 : (lastEntryId - firstEntryId) + 1; long splitsWithFullEntries = numberOfEntriesToReplicate / rereplicationEntryBatchSize; @@ -263,15 +348,22 @@ static Set splitIntoSubFragments(LedgerHandle lh, * New bookies we want to use to recover and replicate the ledger * entries that were stored on the failed bookie. */ - private void recoverLedgerFragmentEntry(final Long entryId, + void recoverLedgerFragmentEntry(final Long entryId, final LedgerHandle lh, final AsyncCallback.VoidCallback ledgerFragmentEntryMcb, - final Set newBookies) throws InterruptedException { + final Set newBookies, + final BiConsumer onReadEntryFailureCallback) throws InterruptedException { + final long ledgerId = lh.getId(); final AtomicInteger numCompleted = new AtomicInteger(0); final AtomicBoolean completed = new AtomicBoolean(false); + + if (replicationThrottle != null) { + replicationThrottle.acquire(averageEntrySize.get()); + } + final WriteCallback multiWriteCallback = new WriteCallback() { @Override - public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddress addr, Object ctx) { + public void writeComplete(int rc, long ledgerId, long entryId, BookieId addr, Object ctx) { if (rc != BKException.Code.OK) { LOG.error("BK error writing entry for ledgerId: {}, entryId: {}, bookie: {}", ledgerId, entryId, addr, BKException.create(rc)); @@ -293,6 +385,8 @@ public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddre } } }; + + long startReadEntryTime = MathUtils.nowInNano(); /* * Read the ledger entry using the LedgerHandle. This will allow us to * read the entry from one of the other replicated bookies other than @@ -305,9 +399,14 @@ public void readComplete(int rc, LedgerHandle lh, if (rc != BKException.Code.OK) { LOG.error("BK error reading ledger entry: " + entryId, BKException.create(rc)); + onReadEntryFailureCallback.accept(ledgerId, entryId); ledgerFragmentEntryMcb.processResult(rc, null, null); return; } + + readDataLatency.registerSuccessfulEvent(MathUtils.elapsedNanos(startReadEntryTime), + TimeUnit.NANOSECONDS); + /* * Now that we've read the ledger entry, write it to the new * bookie we've selected. @@ -317,40 +416,165 @@ public void readComplete(int rc, LedgerHandle lh, final long dataLength = data.length; numEntriesRead.inc(); numBytesRead.registerSuccessfulValue(dataLength); - ByteBufList toSend = lh.getDigestManager() + + ReferenceCounted toSend = lh.getDigestManager() .computeDigestAndPackageForSending(entryId, lh.getLastAddConfirmed(), entry.getLength(), - Unpooled.wrappedBuffer(data, 0, data.length)); - for (BookieSocketAddress newBookie : newBookies) { + Unpooled.wrappedBuffer(data, 0, data.length), + lh.getLedgerKey(), + BookieProtocol.FLAG_RECOVERY_ADD + ); + if (replicationThrottle != null) { + if (toSend instanceof ByteBuf) { + updateAverageEntrySize(((ByteBuf) toSend).readableBytes()); + } else if (toSend instanceof ByteBufList) { + updateAverageEntrySize(((ByteBufList) toSend).readableBytes()); + } + } + for (BookieId newBookie : newBookies) { + long startWriteEntryTime = MathUtils.nowInNano(); bkc.getBookieClient().addEntry(newBookie, lh.getId(), - lh.getLedgerKey(), entryId, ByteBufList.clone(toSend), + lh.getLedgerKey(), entryId, toSend, multiWriteCallback, dataLength, BookieProtocol.FLAG_RECOVERY_ADD, false, WriteFlag.NONE); + writeDataLatency.registerSuccessfulEvent( + MathUtils.elapsedNanos(startWriteEntryTime), TimeUnit.NANOSECONDS); } toSend.release(); } }, null); } + void batchRecoverLedgerFragmentEntry(final long startEntryId, + final long endEntryId, + final LedgerHandle lh, + final AsyncCallback.VoidCallback ledgerFragmentMcb, + final Set newBookies, + final BiConsumer onReadEntryFailureCallback) + throws InterruptedException { + int entriesToReplicateCnt = (int) (endEntryId - startEntryId + 1); + int maxBytesToReplicate = conf.getReplicationRateByBytes(); + if (replicationThrottle != null) { + if (maxBytesToReplicate != -1 && maxBytesToReplicate > averageEntrySize.get() * entriesToReplicateCnt) { + maxBytesToReplicate = averageEntrySize.get() * entriesToReplicateCnt; + } + replicationThrottle.acquire(maxBytesToReplicate); + } + + lh.asyncBatchReadEntries(startEntryId, entriesToReplicateCnt, maxBytesToReplicate, + new ReadCallback() { + @Override + public void readComplete(int rc, LedgerHandle lh, Enumeration seq, Object ctx) { + if (rc != BKException.Code.OK) { + LOG.error("BK error reading ledger entries: {} - {}", + startEntryId, endEntryId, BKException.create(rc)); + onReadEntryFailureCallback.accept(lh.getId(), startEntryId); + for (int i = 0; i < entriesToReplicateCnt; i++) { + ledgerFragmentMcb.processResult(rc, null, null); + } + return; + } + long lastEntryId = startEntryId; + while (seq.hasMoreElements()) { + LedgerEntry entry = seq.nextElement(); + lastEntryId = entry.getEntryId(); + byte[] data = entry.getEntry(); + final long dataLength = data.length; + numEntriesRead.inc(); + numBytesRead.registerSuccessfulValue(dataLength); + + ReferenceCounted toSend = lh.getDigestManager() + .computeDigestAndPackageForSending(entry.getEntryId(), + lh.getLastAddConfirmed(), entry.getLength(), + Unpooled.wrappedBuffer(data, 0, data.length), + lh.getLedgerKey(), + BookieProtocol.FLAG_RECOVERY_ADD); + if (replicationThrottle != null) { + if (toSend instanceof ByteBuf) { + updateAverageEntrySize(((ByteBuf) toSend).readableBytes()); + } else if (toSend instanceof ByteBufList) { + updateAverageEntrySize(((ByteBufList) toSend).readableBytes()); + } + } + AtomicInteger numCompleted = new AtomicInteger(0); + AtomicBoolean completed = new AtomicBoolean(false); + + WriteCallback multiWriteCallback = new WriteCallback() { + @Override + public void writeComplete(int rc, long ledgerId, long entryId, BookieId addr, Object ctx) { + if (rc != BKException.Code.OK) { + LOG.error("BK error writing entry for ledgerId: {}, entryId: {}, bookie: {}", + ledgerId, entryId, addr, BKException.create(rc)); + if (completed.compareAndSet(false, true)) { + ledgerFragmentMcb.processResult(rc, null, null); + } + } else { + numEntriesWritten.inc(); + if (ctx instanceof Long) { + numBytesWritten.registerSuccessfulValue((Long) ctx); + } + if (LOG.isDebugEnabled()) { + LOG.debug("Success writing ledger id {}, entry id {} to a new bookie {}!", + ledgerId, entryId, addr); + } + if (numCompleted.incrementAndGet() == newBookies.size() + && completed.compareAndSet(false, true)) { + ledgerFragmentMcb.processResult(rc, null, null); + } + } + } + }; + + for (BookieId newBookie : newBookies) { + long startWriteEntryTime = MathUtils.nowInNano(); + bkc.getBookieClient().addEntry(newBookie, lh.getId(), + lh.getLedgerKey(), entry.getEntryId(), toSend, + multiWriteCallback, dataLength, BookieProtocol.FLAG_RECOVERY_ADD, + false, WriteFlag.NONE); + writeDataLatency.registerSuccessfulEvent( + MathUtils.elapsedNanos(startWriteEntryTime), TimeUnit.NANOSECONDS); + } + toSend.release(); + } + if (lastEntryId != endEntryId) { + try { + batchRecoverLedgerFragmentEntry(lastEntryId + 1, endEntryId, lh, + ledgerFragmentMcb, newBookies, onReadEntryFailureCallback); + } catch (InterruptedException e) { + int remainingEntries = (int) (endEntryId - lastEntryId); + for (int i = 0; i < remainingEntries; i++) { + ledgerFragmentMcb.processResult(BKException.Code.InterruptedException, null, null); + } + } + } + } + }, null); + } + + private void updateAverageEntrySize(int toSendSize) { + averageEntrySize.updateAndGet(value -> (int) (value * AVERAGE_ENTRY_SIZE_RATIO + + (1 - AVERAGE_ENTRY_SIZE_RATIO) * toSendSize)); + } + /** * Callback for recovery of a single ledger fragment. Once the fragment has * had all entries replicated, update the ensemble in zookeeper. Once - * finished propogate callback up to ledgerFragmentsMcb which should be a + * finished propagate callback up to ledgerFragmentsMcb which should be a * multicallback responsible for all fragments in a single ledger */ static class SingleFragmentCallback implements AsyncCallback.VoidCallback { final AsyncCallback.VoidCallback ledgerFragmentsMcb; final LedgerHandle lh; - final OrderedExecutor mainWorkerPool; + final LedgerManager ledgerManager; final long fragmentStartId; - final Map oldBookie2NewBookie; + final Map oldBookie2NewBookie; SingleFragmentCallback(AsyncCallback.VoidCallback ledgerFragmentsMcb, - LedgerHandle lh, OrderedExecutor mainWorkerPool, long fragmentStartId, - Map oldBookie2NewBookie) { + LedgerHandle lh, LedgerManager ledgerManager, long fragmentStartId, + Map oldBookie2NewBookie) { this.ledgerFragmentsMcb = ledgerFragmentsMcb; this.lh = lh; - this.mainWorkerPool = mainWorkerPool; + this.ledgerManager = ledgerManager; this.fragmentStartId = fragmentStartId; this.oldBookie2NewBookie = oldBookie2NewBookie; } @@ -363,7 +587,7 @@ public void processResult(int rc, String path, Object ctx) { ledgerFragmentsMcb.processResult(rc, null, null); return; } - updateEnsembleInfo(ledgerFragmentsMcb, fragmentStartId, lh, mainWorkerPool, oldBookie2NewBookie); + updateEnsembleInfo(ledgerManager, ledgerFragmentsMcb, fragmentStartId, lh, oldBookie2NewBookie); } } @@ -371,104 +595,64 @@ public void processResult(int rc, String path, Object ctx) { * Updates the ensemble with newBookie and notify the ensembleUpdatedCb. */ private static void updateEnsembleInfo( - AsyncCallback.VoidCallback ensembleUpdatedCb, long fragmentStartId, - LedgerHandle lh, OrderedExecutor mainWorkerPool, - Map oldBookie2NewBookie) { - /* - * Update the ledger metadata's ensemble info to point to the new - * bookie. - */ - List ensemble = lh.getLedgerMetadata().getEnsembles().get(fragmentStartId); - List newEnsemble = new ArrayList<>(ensemble); - for (Map.Entry entry : oldBookie2NewBookie.entrySet()) { - int deadBookieIndex = newEnsemble.indexOf(entry.getKey()); - // update ensemble info might happen after re-read ledger metadata, so the ensemble might already - // change. if ensemble is already changed, skip replacing the bookie doesn't exist. - if (deadBookieIndex >= 0) { - newEnsemble.set(deadBookieIndex, entry.getValue()); - } else { - LOG.info("Bookie {} doesn't exist in ensemble {} anymore.", entry.getKey(), ensemble); - } - } - lh.getLedgerMetadata().updateEnsemble(fragmentStartId, newEnsemble); - lh.writeLedgerConfig(new UpdateEnsembleCb(ensembleUpdatedCb, - fragmentStartId, lh, mainWorkerPool, oldBookie2NewBookie)); + LedgerManager ledgerManager, AsyncCallback.VoidCallback ensembleUpdatedCb, long fragmentStartId, + LedgerHandle lh, Map oldBookie2NewBookie) { + + MetadataUpdateLoop updateLoop = new MetadataUpdateLoop( + ledgerManager, + lh.getId(), + lh::getVersionedLedgerMetadata, + (metadata) -> { + // returns true if any of old bookies exist in ensemble + List ensemble = metadata.getAllEnsembles().get(fragmentStartId); + return oldBookie2NewBookie.keySet().stream().anyMatch(ensemble::contains); + }, + (currentMetadata) -> { + // replace all old bookies with new bookies in ensemble + List newEnsemble = currentMetadata.getAllEnsembles().get(fragmentStartId) + .stream().map((bookie) -> oldBookie2NewBookie.getOrDefault(bookie, bookie)) + .collect(Collectors.toList()); + return LedgerMetadataBuilder.from(currentMetadata) + .replaceEnsembleEntry(fragmentStartId, newEnsemble).build(); + }, + lh::setLedgerMetadata); + + updateLoop.run().whenComplete((result, ex) -> { + if (ex == null) { + LOG.info("Updated ZK to point ledger fragments" + + " from old bookies to new bookies: {}", oldBookie2NewBookie); + + ensembleUpdatedCb.processResult(BKException.Code.OK, null, null); + } else { + LOG.error("Error updating ledger config metadata for ledgerId {}", lh.getId(), ex); + + ensembleUpdatedCb.processResult( + BKException.getExceptionCode(ex, BKException.Code.UnexpectedConditionException), + null, null); + } + }); } - /** - * Update the ensemble data with newBookie. re-reads the metadata on - * MetadataVersionException and update ensemble again. On successfull - * updation, it will also notify to super call back - */ - private static class UpdateEnsembleCb implements GenericCallback { - final AsyncCallback.VoidCallback ensembleUpdatedCb; - final LedgerHandle lh; - final OrderedExecutor mainWorkerPool; - final long fragmentStartId; - final Map oldBookie2NewBookie; + static class Throttler { + private final RateLimiter rateLimiter; - public UpdateEnsembleCb(AsyncCallback.VoidCallback ledgerFragmentsMcb, - long fragmentStartId, LedgerHandle lh, - OrderedExecutor mainWorkerPool, - Map oldBookie2NewBookie) { - this.ensembleUpdatedCb = ledgerFragmentsMcb; - this.lh = lh; - this.mainWorkerPool = mainWorkerPool; - this.fragmentStartId = fragmentStartId; - this.oldBookie2NewBookie = oldBookie2NewBookie; + Throttler(int throttleBytes) { + this.rateLimiter = RateLimiter.create(throttleBytes); } - @Override - public void operationComplete(int rc, LedgerMetadata writtenMetadata) { - if (rc == BKException.Code.MetadataVersionException) { - LOG.warn("Two fragments attempted update at once; ledger id: " - + lh.getId() + " startid: " + fragmentStartId); - // try again, the previous success (with which this has - // conflicted) will have updated the stat other operations - // such as (addEnsemble) would update it too. - lh.rereadMetadata(new OrderedGenericCallback(mainWorkerPool, lh.getId()) { - @Override - public void safeOperationComplete(int rc, - LedgerMetadata newMeta) { - if (rc != BKException.Code.OK) { - LOG - .error("Error reading updated ledger metadata for ledger " - + lh.getId()); - ensembleUpdatedCb.processResult(rc, null, - null); - } else { - while (true) { - // temporary change, metadata really shouldn't be updated - // until the new metadata has been written successfully - LedgerMetadata currentMetadata = lh.getLedgerMetadata(); - if (lh.setLedgerMetadata(currentMetadata, newMeta)) { - break; - } - } - updateEnsembleInfo(ensembleUpdatedCb, - fragmentStartId, lh, mainWorkerPool, oldBookie2NewBookie); - } - } - @Override - public String toString() { - return String.format("ReReadMetadataForUpdateEnsemble(%d)", lh.getId()); - } - }); - return; - } else if (rc != BKException.Code.OK) { - LOG.error("Error updating ledger config metadata for ledgerId {} : {}", - lh.getId(), BKException.codeLogger(rc)); - } else { - LOG.info("Updated ZK for ledgerId: (" + lh.getId() + " : " - + fragmentStartId - + ") to point ledger fragments from old bookies to new bookies: " - + oldBookie2NewBookie); - } - /* - * Pass the return code result up the chain with the parent - * callback. - */ - ensembleUpdatedCb.processResult(rc, null, null); + // reset rate of limiter before compact one entry log file + void resetRate(int throttleBytes) { + this.rateLimiter.setRate(throttleBytes); + } + + // get rate of limiter for unit test + double getRate() { + return this.rateLimiter.getRate(); + } + + // acquire. if bybytes: bytes of this entry; if byentries: 1. + void acquire(int permits) { + rateLimiter.acquire(permits); } } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandle.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandle.java index 3e9f1c2a349..6a15cb42f72 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandle.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandle.java @@ -20,11 +20,11 @@ */ package org.apache.bookkeeper.client; +import static com.google.common.base.Preconditions.checkState; import static org.apache.bookkeeper.client.api.BKException.Code.ClientClosedException; import static org.apache.bookkeeper.client.api.BKException.Code.WriteException; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Objects; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; @@ -34,19 +34,19 @@ import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; import java.security.GeneralSecurityException; -import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.Arrays; import java.util.EnumSet; import java.util.Enumeration; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Queue; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.ExecutorService; import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; @@ -59,6 +59,7 @@ import org.apache.bookkeeper.client.AsyncCallback.ReadLastConfirmedCallback; import org.apache.bookkeeper.client.BKException.BKIncorrectParameterException; import org.apache.bookkeeper.client.BKException.BKReadException; +import org.apache.bookkeeper.client.DistributionSchedule.WriteSet; import org.apache.bookkeeper.client.SyncCallbackUtils.FutureReadLastConfirmed; import org.apache.bookkeeper.client.SyncCallbackUtils.FutureReadLastConfirmedAndEntry; import org.apache.bookkeeper.client.SyncCallbackUtils.SyncAddCallback; @@ -68,25 +69,20 @@ import org.apache.bookkeeper.client.api.BKException.Code; import org.apache.bookkeeper.client.api.LastConfirmedAndEntry; import org.apache.bookkeeper.client.api.LedgerEntries; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.client.api.WriteFlag; import org.apache.bookkeeper.client.api.WriteHandle; import org.apache.bookkeeper.client.impl.LedgerEntryImpl; import org.apache.bookkeeper.common.concurrent.FutureEventListener; import org.apache.bookkeeper.common.concurrent.FutureUtils; import org.apache.bookkeeper.common.util.MathUtils; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookieProtocol; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.TimedGenericCallback; -import org.apache.bookkeeper.proto.DataFormats.LedgerMetadataFormat.State; import org.apache.bookkeeper.proto.checksum.DigestManager; -import org.apache.bookkeeper.proto.checksum.MacDigestManager; import org.apache.bookkeeper.stats.Counter; import org.apache.bookkeeper.stats.Gauge; import org.apache.bookkeeper.stats.OpStatsLogger; -import org.apache.bookkeeper.util.OrderedGenericCallback; -import org.apache.bookkeeper.util.SafeRunnable; +import org.apache.bookkeeper.versioning.Versioned; import org.apache.commons.collections4.IteratorUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -98,37 +94,62 @@ public class LedgerHandle implements WriteHandle { static final Logger LOG = LoggerFactory.getLogger(LedgerHandle.class); + private static final int STICKY_READ_BOOKIE_INDEX_UNSET = -1; + final ClientContext clientCtx; final byte[] ledgerKey; - private LedgerMetadata metadata; + private Versioned versionedMetadata; final long ledgerId; + final ExecutorService executor; long lastAddPushed; + boolean notSupportBatch; + + private enum HandleState { + OPEN, + CLOSED + } + + private HandleState handleState = HandleState.OPEN; + private final CompletableFuture closePromise = new CompletableFuture<>(); /** * Last entryId which has been confirmed to be written durably to the bookies. - * This value is used by readers, the the LAC protocol + * This value is used by readers, the LAC protocol */ volatile long lastAddConfirmed; /** * Next entryId which is expected to move forward during {@link #sendAddSuccessCallbacks() }. This is important - * in order to have an ordered sequence of addEntry ackknowledged to the writer + * in order to have an ordered sequence of addEntry acknowledged to the writer */ volatile long pendingAddsSequenceHead; + /** + * If bookie sticky reads are enabled, this will contain the index of the bookie + * selected as "sticky" for this ledger. The bookie is chosen at random when the + * LedgerHandle is created. + * + *

    In case of failures, the bookie index will be updated (to the next bookie in + * the ensemble) to avoid continuing to attempt to read from a failed bookie. + * + *

    If the index is -1, it means the sticky reads are disabled. + */ + private int stickyBookieIndex; + long length; final DigestManager macManager; final DistributionSchedule distributionSchedule; final RateLimiter throttler; - final LoadingCache bookieFailureHistory; + final LoadingCache bookieFailureHistory; final BookiesHealthInfo bookiesHealthInfo; final EnumSet writeFlags; ScheduledFuture timeoutFuture = null; - private final Map delayedWriteFailedBookies = - new HashMap(); + @VisibleForTesting + final Map delayedWriteFailedBookies = + new HashMap(); /** * Invalid entry id. This value is returned from methods which @@ -142,7 +163,8 @@ public class LedgerHandle implements WriteHandle { */ public static final long INVALID_LEDGER_ID = -0xABCDABCDL; - final AtomicInteger blockAddCompletions = new AtomicInteger(0); + final Object metadataLock = new Object(); + boolean changingEnsemble = false; final AtomicInteger numEnsembleChanges = new AtomicInteger(0); Queue pendingAddOps; ExplicitLacFlushPolicy explicitLacFlushPolicy; @@ -152,30 +174,18 @@ public class LedgerHandle implements WriteHandle { final Counter lacUpdateMissesCounter; private final OpStatsLogger clientChannelWriteWaitStats; - // This empty master key is used when an empty password is provided which is the hash of an empty string - private static final byte[] emptyLedgerKey; - static { - try { - emptyLedgerKey = MacDigestManager.genDigest("ledger", new byte[0]); - } catch (NoSuchAlgorithmException e) { - throw new RuntimeException(e); - } - } - - public Map getDelayedWriteFailedBookies() { - return delayedWriteFailedBookies; - } - LedgerHandle(ClientContext clientCtx, - long ledgerId, LedgerMetadata metadata, + long ledgerId, Versioned versionedMetadata, BookKeeper.DigestType digestType, byte[] password, EnumSet writeFlags) throws GeneralSecurityException, NumberFormatException { this.clientCtx = clientCtx; - this.metadata = metadata; + this.versionedMetadata = versionedMetadata; this.pendingAddOps = new ConcurrentLinkedQueue(); this.writeFlags = writeFlags; + + LedgerMetadata metadata = versionedMetadata.getValue(); if (metadata.isClosed()) { lastAddConfirmed = lastAddPushed = metadata.getLastEntryId(); length = metadata.getLength(); @@ -187,6 +197,14 @@ public Map getDelayedWriteFailedBookies() { this.pendingAddsSequenceHead = lastAddConfirmed; this.ledgerId = ledgerId; + this.executor = clientCtx.getMainWorkerPool().chooseThread(ledgerId); + + if (clientCtx.getConf().enableStickyReads + && getLedgerMetadata().getEnsembleSize() == getLedgerMetadata().getWriteQuorumSize()) { + stickyBookieIndex = clientCtx.getPlacementPolicy().getStickyReadBookieIndex(metadata, Optional.empty()); + } else { + stickyBookieIndex = STICKY_READ_BOOKIE_INDEX_UNSET; + } if (clientCtx.getConf().throttleValue > 0) { this.throttler = RateLimiter.create(clientCtx.getConf().throttleValue); @@ -195,30 +213,32 @@ public Map getDelayedWriteFailedBookies() { } macManager = DigestManager.instantiate(ledgerId, password, BookKeeper.DigestType.toProtoDigestType(digestType), - clientCtx.getConf().useV2WireProtocol); + clientCtx.getByteBufAllocator(), clientCtx.getConf().useV2WireProtocol); // If the password is empty, pass the same random ledger key which is generated by the hash of the empty // password, so that the bookie can avoid processing the keys for each entry - this.ledgerKey = password.length > 0 ? MacDigestManager.genDigest("ledger", password) : emptyLedgerKey; + this.ledgerKey = DigestManager.generateMasterKey(password); distributionSchedule = new RoundRobinDistributionSchedule( - metadata.getWriteQuorumSize(), metadata.getAckQuorumSize(), metadata.getEnsembleSize()); + metadata.getWriteQuorumSize(), + metadata.getAckQuorumSize(), + metadata.getEnsembleSize()); this.bookieFailureHistory = CacheBuilder.newBuilder() .expireAfterWrite(clientCtx.getConf().bookieFailureHistoryExpirationMSec, TimeUnit.MILLISECONDS) - .build(new CacheLoader() { + .build(new CacheLoader() { @Override - public Long load(BookieSocketAddress key) { + public Long load(BookieId key) { return -1L; } }); this.bookiesHealthInfo = new BookiesHealthInfo() { @Override - public long getBookieFailureHistory(BookieSocketAddress bookieSocketAddress) { + public long getBookieFailureHistory(BookieId bookieSocketAddress) { Long lastFailure = bookieFailureHistory.getIfPresent(bookieSocketAddress); return lastFailure == null ? -1L : lastFailure; } @Override - public long getBookiePendingRequests(BookieSocketAddress bookieSocketAddress) { + public long getBookiePendingRequests(BookieId bookieSocketAddress) { return clientCtx.getBookieClient().getNumPendingRequests(bookieSocketAddress, ledgerId); } }; @@ -238,31 +258,46 @@ public Integer getSample() { return pendingAddOps.size(); } }); - initializeExplicitLacFlushPolicy(); + + initializeWriteHandleState(); + } + + /** + * Notify the LedgerHandle that a read operation was failed on a particular bookie. + */ + void recordReadErrorOnBookie(int bookieIndex) { + // If sticky bookie reads are enabled, switch the sticky bookie to the + // next bookie in the ensemble so that we avoid to keep reading from the + // same failed bookie + if (stickyBookieIndex != STICKY_READ_BOOKIE_INDEX_UNSET) { + // This will be idempotent when we have multiple read errors on the + // same bookie. The net result is that we just go to the next bookie + stickyBookieIndex = clientCtx.getPlacementPolicy().getStickyReadBookieIndex(getLedgerMetadata(), + Optional.of(bookieIndex)); + } + } + + protected void initializeWriteHandleState() { + if (clientCtx.getConf().explicitLacInterval > 0) { + explicitLacFlushPolicy = new ExplicitLacFlushPolicy.ExplicitLacFlushPolicyImpl( + this, clientCtx); + } else { + explicitLacFlushPolicy = ExplicitLacFlushPolicy.VOID_EXPLICITLAC_FLUSH_POLICY; + } if (clientCtx.getConf().addEntryQuorumTimeoutNanos > 0) { - SafeRunnable monitor = new SafeRunnable() { - @Override - public void safeRun() { - monitorPendingAddOps(); - } - }; this.timeoutFuture = clientCtx.getScheduler().scheduleAtFixedRate( - monitor, + () -> monitorPendingAddOps(), clientCtx.getConf().timeoutMonitorIntervalSec, clientCtx.getConf().timeoutMonitorIntervalSec, TimeUnit.SECONDS); } } - protected void initializeExplicitLacFlushPolicy() { - if (!getLedgerMetadata().isClosed() - && !(this instanceof ReadOnlyLedgerHandle) - && clientCtx.getConf().explicitLacInterval > 0) { - explicitLacFlushPolicy = new ExplicitLacFlushPolicy.ExplicitLacFlushPolicyImpl( - this, clientCtx); - } else { - explicitLacFlushPolicy = ExplicitLacFlushPolicy.VOID_EXPLICITLAC_FLUSH_POLICY; + private void tearDownWriteHandleState() { + explicitLacFlushPolicy.stopExplicitLacFlush(); + if (timeoutFuture != null) { + timeoutFuture.cancel(false); } } @@ -315,14 +350,23 @@ public byte[] getLedgerKey() { */ @Override public LedgerMetadata getLedgerMetadata() { - return metadata; + return versionedMetadata.getValue(); } - boolean setLedgerMetadata(LedgerMetadata expected, LedgerMetadata newMetadata) { + Versioned getVersionedLedgerMetadata() { + return versionedMetadata; + } + + boolean setLedgerMetadata(Versioned expected, Versioned newMetadata) { synchronized (this) { // ensure that we only update the metadata if it is the object we expect it to be - if (metadata == expected) { - metadata = newMetadata; + if (versionedMetadata == expected) { + versionedMetadata = newMetadata; + LedgerMetadata metadata = versionedMetadata.getValue(); + if (metadata.isClosed()) { + lastAddConfirmed = lastAddPushed = metadata.getLastEntryId(); + length = metadata.getLength(); + } return true; } else { return false; @@ -345,7 +389,7 @@ public Map getCustomMetadata() { * @return the count of fragments */ public synchronized long getNumFragments() { - return getLedgerMetadata().getEnsembles().size(); + return getLedgerMetadata().getAllEnsembles().size(); } /** @@ -355,9 +399,9 @@ public synchronized long getNumFragments() { * @return count of unique bookies */ public synchronized long getNumBookies() { - Map> m = getLedgerMetadata().getEnsembles(); - Set s = Sets.newHashSet(); - for (List aList : m.values()) { + Map> m = getLedgerMetadata().getAllEnsembles(); + Set s = Sets.newHashSet(); + for (List aList : m.values()) { s.addAll(aList); } return s.size(); @@ -420,14 +464,6 @@ BookiesHealthInfo getBookiesHealthInfo() { return bookiesHealthInfo; } - void writeLedgerConfig(GenericCallback writeCb) { - if (LOG.isDebugEnabled()) { - LOG.debug("Writing metadata to ledger manager: {}, {}", this.ledgerId, getLedgerMetadata().getVersion()); - } - - clientCtx.getLedgerManager().writeLedgerMetadata(ledgerId, getLedgerMetadata(), writeCb); - } - /** * {@inheritDoc} */ @@ -445,10 +481,6 @@ public CompletableFuture closeAsync() { CompletableFuture result = new CompletableFuture<>(); SyncCloseCallback callback = new SyncCloseCallback(result); asyncClose(callback, null); - explicitLacFlushPolicy.stopExplicitLacFlush(); - if (timeoutFuture != null) { - timeoutFuture.cancel(false); - } return result; } @@ -476,6 +508,10 @@ public synchronized boolean isClosed() { return getLedgerMetadata().isClosed(); } + boolean isHandleWritable() { + return !getLedgerMetadata().isClosed() && handleState == HandleState.OPEN; + } + void asyncCloseInternal(final CloseCallback cb, final Object ctx, final int rc) { try { doAsyncCloseInternal(cb, ctx, rc); @@ -499,140 +535,92 @@ void asyncCloseInternal(final CloseCallback cb, final Object ctx, final int rc) * @param rc */ void doAsyncCloseInternal(final CloseCallback cb, final Object ctx, final int rc) { - clientCtx.getMainWorkerPool().executeOrdered(ledgerId, new SafeRunnable() { - @Override - public void safeRun() { - final long prevLastEntryId; - final long prevLength; - final State prevState; - List pendingAdds; - - if (isClosed()) { - // TODO: make ledger metadata immutable {@link https://github.com/apache/bookkeeper/issues/281} - // Although the metadata is already closed, we don't need to proceed zookeeper metadata update, but - // we still need to error out the pending add ops. - // - // There is a race condition a pending add op is enqueued, after a close op reset ledger metadata - // state to unclosed to resolve metadata conflicts. If we don't error out these pending add ops, - // they would be leak and never callback. - // - // The race condition happen in following sequence: - // a) ledger L is fenced - // b) write entry E encountered LedgerFencedException, trigger ledger close procedure - // c) ledger close encountered metadata version exception and set ledger metadata back to open - // d) writer tries to write entry E+1, since ledger metadata is still open (reset by c)) - // e) the close procedure in c) resolved the metadata conflicts and set ledger metadata to closed - // f) writing entry E+1 encountered LedgerFencedException which will enter ledger close procedure - // g) it would find that ledger metadata is closed, then it callbacks immediately without erroring - // out any pendings - synchronized (LedgerHandle.this) { - pendingAdds = drainPendingAddsToErrorOut(); - } - errorOutPendingAdds(rc, pendingAdds); - cb.closeComplete(BKException.Code.OK, LedgerHandle.this, ctx); - return; - } + executeOrdered(() -> { + final HandleState prevHandleState; + final List pendingAdds; + final long lastEntry; + final long finalLength; + + closePromise.whenComplete((ignore, ex) -> { + if (ex != null) { + cb.closeComplete( + BKException.getExceptionCode(ex, BKException.Code.UnexpectedConditionException), + LedgerHandle.this, ctx); + } else { + cb.closeComplete(BKException.Code.OK, LedgerHandle.this, ctx); + } + }); - synchronized (LedgerHandle.this) { - LedgerMetadata metadata = getLedgerMetadata(); - prevState = metadata.getState(); - prevLastEntryId = metadata.getLastEntryId(); - prevLength = metadata.getLength(); - - // drain pending adds first - pendingAdds = drainPendingAddsToErrorOut(); - - // synchronized on LedgerHandle.this to ensure that - // lastAddPushed can not be updated after the metadata - // is closed. - metadata.setLength(length); - metadata.close(lastAddConfirmed); - lastAddPushed = lastAddConfirmed; - } + synchronized (LedgerHandle.this) { + prevHandleState = handleState; - // error out all pending adds during closing, the callbacks shouldn't be - // running under any bk locks. - errorOutPendingAdds(rc, pendingAdds); + // drain pending adds first + pendingAdds = drainPendingAddsAndAdjustLength(); - if (LOG.isDebugEnabled()) { - LedgerMetadata metadata = getLedgerMetadata(); - LOG.debug("Closing ledger: " + ledgerId + " at entryId: " - + metadata.getLastEntryId() + " with this many bytes: " + metadata.getLength()); - } + // taking the length must occur after draining, as draining changes the length + lastEntry = lastAddPushed = LedgerHandle.this.lastAddConfirmed; + finalLength = LedgerHandle.this.length; + handleState = HandleState.CLOSED; + } - final class CloseCb extends OrderedGenericCallback { - CloseCb() { - super(clientCtx.getMainWorkerPool(), ledgerId); + // error out all pending adds during closing, the callbacks shouldn't be + // running under any bk locks. + try { + errorOutPendingAdds(rc, pendingAdds); + } catch (Throwable e) { + closePromise.completeExceptionally(e); + return; } - @Override - public void safeOperationComplete(final int rc, LedgerMetadata writtenMetadata) { - if (rc == BKException.Code.MetadataVersionException) { - rereadMetadata(new OrderedGenericCallback(clientCtx.getMainWorkerPool(), - ledgerId) { - @Override - public void safeOperationComplete(int newrc, LedgerMetadata newMeta) { - if (newrc != BKException.Code.OK) { - LOG.error("Error reading new metadata from ledger {} when closing: {}", - ledgerId, BKException.codeLogger(newrc)); - cb.closeComplete(rc, LedgerHandle.this, ctx); - } else { - LedgerMetadata metadata = getLedgerMetadata(); - metadata.setState(prevState); - if (prevState.equals(State.CLOSED)) { - metadata.close(prevLastEntryId); - } + if (prevHandleState != HandleState.CLOSED) { + if (LOG.isDebugEnabled()) { + LOG.debug("Closing ledger: {} at entryId {} with {} bytes", getId(), lastEntry, + finalLength); + } - metadata.setLength(prevLength); - if (!metadata.isNewerThan(newMeta) - && !metadata.isConflictWith(newMeta)) { - // use the new metadata's ensemble, in case re-replication already - // replaced some bookies in the ensemble. - metadata.setEnsembles(newMeta.getEnsembles()); - metadata.setVersion(newMeta.version); - metadata.setLength(length); - metadata.close(getLastAddConfirmed()); - writeLedgerConfig(new CloseCb()); - return; + tearDownWriteHandleState(); + new MetadataUpdateLoop( + clientCtx.getLedgerManager(), getId(), + LedgerHandle.this::getVersionedLedgerMetadata, + (metadata) -> { + if (metadata.isClosed()) { + /* If the ledger has been closed with the same lastEntry + * and length that we planned to close with, we have nothing to do, + * so just return success */ + if (lastEntry == metadata.getLastEntryId() + && finalLength == metadata.getLength()) { + return false; } else { - metadata.setLength(length); - metadata.close(getLastAddConfirmed()); - LOG.warn("Conditional update ledger metadata for ledger {} failed.", - ledgerId); - cb.closeComplete(rc, LedgerHandle.this, ctx); + LOG.error("Metadata conflict when closing ledger {}." + + " Another client may have recovered the ledger while " + + "there" + + " were writes outstanding. (local lastEntry:{} " + + "length:{}) " + + " (metadata lastEntry:{} length:{})", + getId(), lastEntry, finalLength, + metadata.getLastEntryId(), metadata.getLength()); + throw new BKException.BKMetadataVersionException(); } + } else { + return true; } - } - - @Override - public String toString() { - return String.format("ReReadMetadataForClose(%d)", ledgerId); - } - }); - } else if (rc != BKException.Code.OK) { - LOG.error("Error update ledger metadata for ledger {} : {}", - ledgerId, BKException.codeLogger(rc)); - cb.closeComplete(rc, LedgerHandle.this, ctx); - } else { - cb.closeComplete(BKException.Code.OK, LedgerHandle.this, ctx); - } - } - - @Override - public String toString() { - return String.format("WriteLedgerConfigForClose(%d)", ledgerId); + }, + (metadata) -> { + return LedgerMetadataBuilder.from(metadata) + .withClosedState().withLastEntryId(lastEntry) + .withLength(finalLength).build(); + }, + LedgerHandle.this::setLedgerMetadata) + .run().whenComplete((metadata, ex) -> { + if (ex != null) { + closePromise.completeExceptionally(ex); + } else { + FutureUtils.complete(closePromise, null); + } + }); } } - - writeLedgerConfig(new CloseCb()); - - } - - @Override - public String toString() { - return String.format("CloseLedgerHandle(%d)", ledgerId); - } - }); + ); } /** @@ -654,6 +642,26 @@ public Enumeration readEntries(long firstEntry, long lastEntry) return SyncCallbackUtils.waitForResult(result); } + /** + * Read a sequence of entries synchronously. + * + * @param startEntry + * start entry id + * @param maxCount + * the total entries count. + * @param maxSize + * the total entries size. + * @see #asyncBatchReadEntries(long, int, long, ReadCallback, Object) + */ + public Enumeration batchReadEntries(long startEntry, int maxCount, long maxSize) + throws InterruptedException, BKException { + CompletableFuture> result = new CompletableFuture<>(); + + asyncBatchReadEntries(startEntry, maxCount, maxSize, new SyncReadCallback(result), null); + + return SyncCallbackUtils.waitForResult(result); + } + /** * Read a sequence of entries synchronously, allowing to read after the LastAddConfirmed range.
    * This is the same of @@ -677,6 +685,27 @@ public Enumeration readUnconfirmedEntries(long firstEntry, long las return SyncCallbackUtils.waitForResult(result); } + /** + * Read a sequence of entries synchronously, allowing to read after the LastAddConfirmed range.
    + * This is the same of + * {@link #asyncBatchReadUnconfirmedEntries(long, int, long, ReadCallback, Object) } + * + * @param firstEntry + * id of first entry of sequence (included) + * @param maxCount + * id of last entry of sequence (included) + * @param maxSize + * the total entries size + */ + public Enumeration batchReadUnconfirmedEntries(long firstEntry, int maxCount, long maxSize) + throws InterruptedException, BKException { + CompletableFuture> result = new CompletableFuture<>(); + + asyncBatchReadUnconfirmedEntries(firstEntry, maxCount, maxSize, new SyncReadCallback(result), null); + + return SyncCallbackUtils.waitForResult(result); + } + /** * Read a sequence of entries asynchronously. * @@ -699,8 +728,8 @@ public void asyncReadEntries(long firstEntry, long lastEntry, ReadCallback cb, O } if (lastEntry > lastAddConfirmed) { - LOG.error("ReadException on ledgerId:{} firstEntry:{} lastEntry:{}", - ledgerId, firstEntry, lastEntry); + LOG.error("ReadEntries exception on ledgerId:{} firstEntry:{} lastEntry:{} lastAddConfirmed:{}", + ledgerId, firstEntry, lastEntry, lastAddConfirmed); cb.readComplete(BKException.Code.ReadException, this, null, ctx); return; } @@ -708,11 +737,55 @@ public void asyncReadEntries(long firstEntry, long lastEntry, ReadCallback cb, O asyncReadEntriesInternal(firstEntry, lastEntry, cb, ctx, false); } + /** + * Read a sequence of entries in asynchronously. + * It send an RPC to get all entries instead of send multi RPC to get all entries. + * + * @param startEntry + * id of first entry of sequence + * @param maxCount + * the entries count + * @param maxSize + * the total entries size + * @param cb + * object implementing read callback interface + * @param ctx + * control object + */ + public void asyncBatchReadEntries(long startEntry, int maxCount, long maxSize, ReadCallback cb, Object ctx) { + // Little sanity check + if (startEntry > lastAddConfirmed) { + LOG.error("ReadEntries exception on ledgerId:{} firstEntry:{} lastAddConfirmed:{}", + ledgerId, startEntry, lastAddConfirmed); + cb.readComplete(BKException.Code.ReadException, this, null, ctx); + return; + } + if (notSupportBatchRead()) { + long lastEntry = Math.min(startEntry + maxCount - 1, lastAddConfirmed); + asyncReadEntriesInternal(startEntry, lastEntry, cb, ctx, false); + } else { + asyncBatchReadEntriesInternal(startEntry, maxCount, maxSize, new ReadCallback() { + @Override + public void readComplete(int rc, LedgerHandle lh, Enumeration seq, Object ctx) { + //If the bookie server not support the batch read request, the bookie server will close the + // connection, then get the BookieHandleNotAvailableException. + if (rc == Code.BookieHandleNotAvailableException) { + notSupportBatch = true; + long lastEntry = Math.min(startEntry + maxCount - 1, lastAddConfirmed); + asyncReadEntriesInternal(startEntry, lastEntry, cb, ctx, false); + } else { + cb.readComplete(rc, lh, seq, ctx); + } + } + }, ctx, false); + } + } + /** * Read a sequence of entries asynchronously, allowing to read after the LastAddConfirmed range. *
    This is the same of * {@link #asyncReadEntries(long, long, ReadCallback, Object) } - * but it lets the client read without checking the local value of LastAddConfirmed, so that it is possibile to + * but it lets the client read without checking the local value of LastAddConfirmed, so that it is possible to * read entries for which the writer has not received the acknowledge yet.
    * For entries which are within the range 0..LastAddConfirmed BookKeeper guarantees that the writer has successfully * received the acknowledge.
    @@ -747,6 +820,48 @@ public void asyncReadUnconfirmedEntries(long firstEntry, long lastEntry, ReadCal asyncReadEntriesInternal(firstEntry, lastEntry, cb, ctx, false); } + /** + * Read a sequence of entries asynchronously, allowing to read after the LastAddConfirmed range. + * It sends an RPC to get all entries instead of send multi RPC to get all entries. + * @param startEntry + * id of first entry of sequence + * @param maxCount + * the entries count + * @param maxSize + * the total entries size + * @param cb + * object implementing read callback interface + * @param ctx + * control object + */ + public void asyncBatchReadUnconfirmedEntries(long startEntry, int maxCount, long maxSize, ReadCallback cb, + Object ctx) { + // Little sanity check + if (startEntry < 0) { + LOG.error("IncorrectParameterException on ledgerId:{} firstEntry:{}", ledgerId, startEntry); + cb.readComplete(BKException.Code.IncorrectParameterException, this, null, ctx); + } + if (notSupportBatchRead()) { + long lastEntry = startEntry + maxCount - 1; + asyncReadEntriesInternal(startEntry, lastEntry, cb, ctx, false); + } else { + asyncBatchReadEntriesInternal(startEntry, maxCount, maxSize, new ReadCallback() { + @Override + public void readComplete(int rc, LedgerHandle lh, Enumeration seq, Object ctx) { + //If the bookie server not support the batch read request, the bookie server will close the + // connection, then get the BookieHandleNotAvailableException. + if (rc == Code.BookieHandleNotAvailableException) { + notSupportBatch = true; + long lastEntry = startEntry + maxCount - 1; + asyncReadEntriesInternal(startEntry, lastEntry, cb, ctx, false); + } else { + cb.readComplete(rc, lh, seq, ctx); + } + } + }, ctx, false); + } + } + /** * Read a sequence of entries asynchronously. * @@ -765,19 +880,136 @@ public CompletableFuture readAsync(long firstEntry, long lastEntr } if (lastEntry > lastAddConfirmed) { - LOG.error("ReadException on ledgerId:{} firstEntry:{} lastEntry:{}", - ledgerId, firstEntry, lastEntry); + LOG.error("ReadAsync exception on ledgerId:{} firstEntry:{} lastEntry:{} lastAddConfirmed:{}", + ledgerId, firstEntry, lastEntry, lastAddConfirmed); return FutureUtils.exception(new BKReadException()); } return readEntriesInternalAsync(firstEntry, lastEntry, false); } + /** + * Read a sequence of entries in asynchronously. + * It sends an RPC to get all entries instead of send multi RPC to get all entries. + * + * @param startEntry + * id of first entry of sequence + * @param maxCount + * the entries count + * @param maxSize + * the total entries size + */ + @Override + public CompletableFuture batchReadAsync(long startEntry, int maxCount, long maxSize) { + // Little sanity check + if (startEntry < 0) { + LOG.error("IncorrectParameterException on ledgerId:{} firstEntry:{}", ledgerId, startEntry); + return FutureUtils.exception(new BKIncorrectParameterException()); + } + if (startEntry > lastAddConfirmed) { + LOG.error("ReadAsync exception on ledgerId:{} firstEntry:{} lastAddConfirmed:{}", + ledgerId, startEntry, lastAddConfirmed); + return FutureUtils.exception(new BKReadException()); + } + if (notSupportBatchRead()) { + long lastEntry = Math.min(startEntry + maxCount - 1, lastAddConfirmed); + return readEntriesInternalAsync(startEntry, lastEntry, false); + } + CompletableFuture future = new CompletableFuture<>(); + batchReadEntriesInternalAsync(startEntry, maxCount, maxSize, false) + .whenComplete((entries, ex) -> { + if (ex != null) { + //If the bookie server not support the batch read request, the bookie server will close the + // connection, then get the BookieHandleNotAvailableException. + if (ex instanceof BKException.BKBookieHandleNotAvailableException) { + notSupportBatch = true; + long lastEntry = Math.min(startEntry + maxCount - 1, lastAddConfirmed); + readEntriesInternalAsync(startEntry, lastEntry, false).whenComplete((entries1, ex1) -> { + if (ex1 != null) { + future.completeExceptionally(ex1); + } else { + future.complete(entries1); + } + }); + } else { + future.completeExceptionally(ex); + } + } else { + future.complete(entries); + } + }); + return future; + } + + private boolean notSupportBatchRead() { + if (!clientCtx.getConf().batchReadEnabled) { + return true; + } + if (notSupportBatch) { + return true; + } + LedgerMetadata ledgerMetadata = getLedgerMetadata(); + return ledgerMetadata.getEnsembleSize() != ledgerMetadata.getWriteQuorumSize(); + } + + private CompletableFuture batchReadEntriesInternalAsync(long startEntry, int maxCount, long maxSize, + boolean isRecoveryRead) { + int nettyMaxFrameSizeBytes = clientCtx.getConf().nettyMaxFrameSizeBytes; + if (maxSize > nettyMaxFrameSizeBytes) { + LOG.info( + "The max size is greater than nettyMaxFrameSizeBytes, use nettyMaxFrameSizeBytes:{} to replace it.", + nettyMaxFrameSizeBytes); + maxSize = nettyMaxFrameSizeBytes; + } + if (maxSize <= 0) { + LOG.info("The max size is negative, use nettyMaxFrameSizeBytes:{} to replace it.", nettyMaxFrameSizeBytes); + maxSize = nettyMaxFrameSizeBytes; + } + BatchedReadOp op = new BatchedReadOp(this, clientCtx, + startEntry, maxCount, maxSize, isRecoveryRead); + if (!clientCtx.isClientClosed()) { + // Waiting on the first one. + // This is not very helpful if there are multiple ensembles or if bookie goes into unresponsive + // state later after N requests sent. + // Unfortunately it seems that alternatives are: + // - send reads one-by-one (up to the app) + // - rework LedgerHandle to send requests one-by-one (maybe later, potential perf impact) + // - block worker pool (not good) + // Even with this implementation one should be more concerned about OOME when all read responses arrive + // or about overloading bookies with these requests then about submission of many small requests. + // Naturally one of the solutions would be to submit smaller batches and in this case + // current implementation will prevent next batch from starting when bookie is + // unresponsive thus helpful enough. + if (clientCtx.getConf().waitForWriteSetMs >= 0) { + DistributionSchedule.WriteSet ws = distributionSchedule.getWriteSet(startEntry); + try { + if (!waitForWritable(ws, ws.size() - 1, clientCtx.getConf().waitForWriteSetMs)) { + op.allowFailFastOnUnwritableChannel(); + } + } finally { + ws.recycle(); + } + } + + if (isHandleWritable()) { + // Ledger handle in read/write mode: submit to OSE for ordered execution. + executeOrdered(op); + } else { + // Read-only ledger handle: bypass OSE and execute read directly in client thread. + // This avoids a context-switch to OSE thread and thus reduces latency. + op.run(); + } + } else { + op.future().completeExceptionally(BKException.create(ClientClosedException)); + } + return op.future(); + } + /** * Read a sequence of entries asynchronously, allowing to read after the LastAddConfirmed range. *
    This is the same of * {@link #asyncReadEntries(long, long, ReadCallback, Object) } - * but it lets the client read without checking the local value of LastAddConfirmed, so that it is possibile to + * but it lets the client read without checking the local value of LastAddConfirmed, so that it is possible to * read entries for which the writer has not received the acknowledge yet.
    * For entries which are within the range 0..LastAddConfirmed BookKeeper guarantees that the writer has successfully * received the acknowledge.
    @@ -842,6 +1074,40 @@ public void onFailure(Throwable cause) { } } + void asyncBatchReadEntriesInternal(long startEntry, int maxCount, long maxSize, ReadCallback cb, + Object ctx, boolean isRecoveryRead) { + if (!clientCtx.isClientClosed()) { + batchReadEntriesInternalAsync(startEntry, maxCount, maxSize, isRecoveryRead) + .whenCompleteAsync(new FutureEventListener() { + @Override + public void onSuccess(LedgerEntries entries) { + cb.readComplete( + Code.OK, + LedgerHandle.this, + IteratorUtils.asEnumeration( + Iterators.transform(entries.iterator(), le -> { + LedgerEntry entry = new LedgerEntry((LedgerEntryImpl) le); + le.close(); + return entry; + })), + ctx); + } + + @Override + public void onFailure(Throwable cause) { + if (cause instanceof BKException) { + BKException bke = (BKException) cause; + cb.readComplete(bke.getCode(), LedgerHandle.this, null, ctx); + } else { + cb.readComplete(Code.UnexpectedConditionException, LedgerHandle.this, null, ctx); + } + } + }, clientCtx.getMainWorkerPool().chooseThread(ledgerId)); + } else { + cb.readComplete(Code.ClientClosedException, LedgerHandle.this, null, ctx); + } + } + /* * Read the last entry in the ledger * @@ -892,16 +1158,25 @@ CompletableFuture readEntriesInternalAsync(long firstEntry, // Naturally one of the solutions would be to submit smaller batches and in this case // current implementation will prevent next batch from starting when bookie is // unresponsive thus helpful enough. - DistributionSchedule.WriteSet ws = distributionSchedule.getWriteSet(firstEntry); - try { - if (!waitForWritable(ws, firstEntry, ws.size() - 1, clientCtx.getConf().waitForWriteSetMs)) { - op.allowFailFastOnUnwritableChannel(); + if (clientCtx.getConf().waitForWriteSetMs >= 0) { + DistributionSchedule.WriteSet ws = distributionSchedule.getWriteSet(firstEntry); + try { + if (!waitForWritable(ws, ws.size() - 1, clientCtx.getConf().waitForWriteSetMs)) { + op.allowFailFastOnUnwritableChannel(); + } + } finally { + ws.recycle(); } - } finally { - ws.recycle(); } - clientCtx.getMainWorkerPool().executeOrdered(ledgerId, op); + if (isHandleWritable()) { + // Ledger handle in read/write mode: submit to OSE for ordered execution. + executeOrdered(op); + } else { + // Read-only ledger handle: bypass OSE and execute read directly in client thread. + // This avoids a context-switch to OSE thread and thus reduces latency. + op.run(); + } } else { op.future().completeExceptionally(BKException.create(ClientClosedException)); } @@ -913,6 +1188,7 @@ CompletableFuture readEntriesInternalAsync(long firstEntry, * * @param data * array of bytes to be written to the ledger + * do not reuse the buffer, bk-client will release it appropriately * @return the entryId of the new inserted entry */ public long addEntry(byte[] data) throws InterruptedException, BKException { @@ -932,13 +1208,14 @@ public CompletableFuture appendAsync(ByteBuf data) { /** * Add entry synchronously to an open ledger. This can be used only with * {@link LedgerHandleAdv} returned through ledgers created with {@link - * BookKeeper#createLedgerAdv(int, int, int, DigestType, byte[])}. + * BookKeeper#createLedgerAdv(int, int, int, BookKeeper.DigestType, byte[])}. * * * @param entryId * entryId to be added * @param data * array of bytes to be written to the ledger + * do not reuse the buffer, bk-client will release it appropriately * @return the entryId of the new inserted entry */ public long addEntry(final long entryId, byte[] data) throws InterruptedException, BKException { @@ -951,6 +1228,7 @@ public long addEntry(final long entryId, byte[] data) throws InterruptedExceptio * * @param data * array of bytes to be written to the ledger + * do not reuse the buffer, bk-client will release it appropriately * @param offset * offset from which to take bytes from data * @param length @@ -972,12 +1250,13 @@ public long addEntry(byte[] data, int offset, int length) /** * Add entry synchronously to an open ledger. This can be used only with * {@link LedgerHandleAdv} returned through ledgers created with {@link - * BookKeeper#createLedgerAdv(int, int, int, DigestType, byte[])}. + * BookKeeper#createLedgerAdv(int, int, int, BookKeeper.DigestType, byte[])}. * * @param entryId * entryId to be added. * @param data * array of bytes to be written to the ledger + * do not reuse the buffer, bk-client will release it appropriately * @param offset * offset from which to take bytes from data * @param length @@ -995,6 +1274,7 @@ public long addEntry(final long entryId, byte[] data, int offset, int length) th * * @param data * array of bytes to be written + * do not reuse the buffer, bk-client will release it appropriately * @param cb * object implementing callbackinterface * @param ctx @@ -1008,12 +1288,13 @@ public void asyncAddEntry(final byte[] data, final AddCallback cb, /** * Add entry asynchronously to an open ledger. This can be used only with * {@link LedgerHandleAdv} returned through ledgers created with {@link - * BookKeeper#createLedgerAdv(int, int, int, DigestType, byte[])}. + * BookKeeper#createLedgerAdv(int, int, int, BookKeeper.DigestType, byte[])}. * * @param entryId * entryId to be added * @param data * array of bytes to be written + * do not reuse the buffer, bk-client will release it appropriately * @param cb * object implementing callbackinterface * @param ctx @@ -1029,6 +1310,7 @@ public void asyncAddEntry(final long entryId, final byte[] data, final AddCallba * * @param data * array of bytes to be written + * do not reuse the buffer, bk-client will release it appropriately * @param offset * offset from which to take bytes from data * @param length @@ -1060,12 +1342,13 @@ public void asyncAddEntry(ByteBuf data, final AddCallback cb, final Object ctx) * Add entry asynchronously to an open ledger, using an offset and range. * This can be used only with {@link LedgerHandleAdv} returned through * ledgers created with - * {@link BookKeeper#createLedgerAdv(int, int, int, org.apache.bookkeeper.client.BookKeeper.DigestType, byte[])}. + * {@link BookKeeper#createLedgerAdv(int, int, int, BookKeeper.DigestType, byte[])}. * * @param entryId * entryId of the entry to add. * @param data * array of bytes to be written + * do not reuse the buffer, bk-client will release it appropriately * @param offset * offset from which to take bytes from data * @param length @@ -1091,6 +1374,7 @@ public void asyncAddEntry(final long entryId, final byte[] data, final int offse * entryId of the entry to add * @param data * array of bytes to be written + * do not reuse the buffer, bk-client will release it appropriately * @param offset * offset from which to take bytes from data * @param length @@ -1112,12 +1396,13 @@ public void asyncAddEntry(final long entryId, final byte[] data, final int offse /** * Add entry asynchronously to an open ledger, using an offset and range. * This can be used only with {@link LedgerHandleAdv} returned through - * ledgers created with {@link createLedgerAdv(int, int, int, DigestType, byte[])}. + * ledgers created with {@link BookKeeper#createLedgerAdv(int, int, int, BookKeeper.DigestType, byte[])}. * * @param entryId * entryId of the entry to add. * @param data * io.netty.buffer.ByteBuf of bytes to be written + * do not reuse the buffer, bk-client will release it appropriately * @param cb * object implementing callbackinterface * @param ctx @@ -1141,7 +1426,7 @@ public CompletableFuture force() { // synchronized on this to ensure that // the ledger isn't closed between checking and // updating lastAddPushed - if (getLedgerMetadata().isClosed()) { + if (!isHandleWritable()) { wasClosed = true; } } @@ -1149,9 +1434,9 @@ public CompletableFuture force() { if (wasClosed) { // make sure the callback is triggered in main worker pool try { - clientCtx.getMainWorkerPool().executeOrdered(ledgerId, new SafeRunnable() { + executeOrdered(new Runnable() { @Override - public void safeRun() { + public void run() { LOG.warn("Force() attempted on a closed ledger: {}", ledgerId); result.completeExceptionally(new BKException.BKLedgerClosedException()); } @@ -1169,9 +1454,9 @@ public String toString() { // early exit: no write has been issued yet if (pendingAddsSequenceHead == INVALID_ENTRY_ID) { - clientCtx.getMainWorkerPool().executeOrdered(ledgerId, new SafeRunnable() { + executeOrdered(new Runnable() { @Override - public void safeRun() { + public void run() { FutureUtils.complete(result, null); } @@ -1184,7 +1469,7 @@ public String toString() { } try { - clientCtx.getMainWorkerPool().executeOrdered(ledgerId, op); + executeOrdered(op); } catch (RejectedExecutionException e) { result.completeExceptionally(new BKException.BKInterruptedException()); } @@ -1209,8 +1494,8 @@ void asyncRecoveryAddEntry(final byte[] data, final int offset, final int length doAsyncAddEntry(op); } - private boolean isWritesetWritable(DistributionSchedule.WriteSet writeSet, - long key, int allowedNonWritableCount) { + private boolean isWriteSetWritable(DistributionSchedule.WriteSet writeSet, + int allowedNonWritableCount) { if (allowedNonWritableCount < 0) { allowedNonWritableCount = 0; } @@ -1219,9 +1504,11 @@ private boolean isWritesetWritable(DistributionSchedule.WriteSet writeSet, final int requiredWritable = sz - allowedNonWritableCount; int nonWritableCount = 0; - List currentEnsemble = getCurrentEnsemble(); + List currentEnsemble = getCurrentEnsemble(); for (int i = 0; i < sz; i++) { - if (!clientCtx.getBookieClient().isWritable(currentEnsemble.get(i), key)) { + int writeBookieIndex = writeSet.get(i); + if (writeBookieIndex < currentEnsemble.size() + && !clientCtx.getBookieClient().isWritable(currentEnsemble.get(writeBookieIndex), ledgerId)) { nonWritableCount++; if (nonWritableCount >= allowedNonWritableCount) { return false; @@ -1236,21 +1523,22 @@ private boolean isWritesetWritable(DistributionSchedule.WriteSet writeSet, return true; } - protected boolean waitForWritable(DistributionSchedule.WriteSet writeSet, long key, + @VisibleForTesting + protected boolean waitForWritable(DistributionSchedule.WriteSet writeSet, int allowedNonWritableCount, long durationMs) { if (durationMs < 0) { return true; } final long startTime = MathUtils.nowInNano(); - boolean success = isWritesetWritable(writeSet, key, allowedNonWritableCount); + boolean writableResult = isWriteSetWritable(writeSet, allowedNonWritableCount); - if (!success && durationMs > 0) { + if (!writableResult && durationMs > 0) { int backoff = 1; final int maxBackoff = 4; final long deadline = startTime + TimeUnit.MILLISECONDS.toNanos(durationMs); - while (!isWritesetWritable(writeSet, key, allowedNonWritableCount)) { + while (!(writableResult = isWriteSetWritable(writeSet, allowedNonWritableCount))) { if (MathUtils.nowInNano() < deadline) { long maxSleep = MathUtils.elapsedMSec(startTime); if (maxSleep < 0) { @@ -1262,32 +1550,33 @@ protected boolean waitForWritable(DistributionSchedule.WriteSet writeSet, long k TimeUnit.MILLISECONDS.sleep(sleepMs); } catch (InterruptedException e) { Thread.currentThread().interrupt(); - success = isWritesetWritable(writeSet, key, allowedNonWritableCount); + writableResult = isWriteSetWritable(writeSet, allowedNonWritableCount); break; } if (backoff <= maxBackoff) { backoff++; } } else { - success = false; + writableResult = false; break; } } if (backoff > 1) { - LOG.info("Spent {} ms waiting for {} writable channels", + LOG.info("Spent {} ms waiting for {} writable channels, writable result {}", MathUtils.elapsedMSec(startTime), - writeSet.size() - allowedNonWritableCount); + writeSet.size() - allowedNonWritableCount, + writableResult); } } - if (success) { + if (writableResult) { clientChannelWriteWaitStats.registerSuccessfulEvent( MathUtils.elapsedNanos(startTime), TimeUnit.NANOSECONDS); } else { clientChannelWriteWaitStats.registerFailedEvent( MathUtils.elapsedNanos(startTime), TimeUnit.NANOSECONDS); } - return success; + return writableResult; } protected void doAsyncAddEntry(final PendingAddOp op) { @@ -1300,26 +1589,27 @@ protected void doAsyncAddEntry(final PendingAddOp op) { // synchronized on this to ensure that // the ledger isn't closed between checking and // updating lastAddPushed - if (getLedgerMetadata().isClosed()) { - wasClosed = true; - } else { + if (isHandleWritable()) { long entryId = ++lastAddPushed; long currentLedgerLength = addToLength(op.payload.readableBytes()); op.setEntryId(entryId); op.setLedgerLength(currentLedgerLength); pendingAddOps.add(op); + } else { + wasClosed = true; } } if (wasClosed) { // make sure the callback is triggered in main worker pool try { - clientCtx.getMainWorkerPool().executeOrdered(ledgerId, new SafeRunnable() { + executeOrdered(new Runnable() { @Override - public void safeRun() { + public void run() { LOG.warn("Attempt to add to closed ledger: {}", ledgerId); op.cb.addCompleteWithLatency(BKException.Code.LedgerClosedException, LedgerHandle.this, INVALID_ENTRY_ID, 0, op.ctx); + op.recyclePendAddOpObject(); } @Override @@ -1329,28 +1619,26 @@ public String toString() { }); } catch (RejectedExecutionException e) { op.cb.addCompleteWithLatency(BookKeeper.getReturnRc(clientCtx.getBookieClient(), - BKException.Code.InterruptedException), + BKException.Code.InterruptedException), LedgerHandle.this, INVALID_ENTRY_ID, 0, op.ctx); + op.recyclePendAddOpObject(); } return; } - DistributionSchedule.WriteSet ws = distributionSchedule.getWriteSet(op.getEntryId()); - try { - if (!waitForWritable(ws, op.getEntryId(), 0, clientCtx.getConf().waitForWriteSetMs)) { - op.allowFailFastOnUnwritableChannel(); + if (clientCtx.getConf().waitForWriteSetMs >= 0) { + DistributionSchedule.WriteSet ws = distributionSchedule.getWriteSet(op.getEntryId()); + try { + if (!waitForWritable(ws, 0, clientCtx.getConf().waitForWriteSetMs)) { + op.allowFailFastOnUnwritableChannel(); + } + } finally { + ws.recycle(); } - } finally { - ws.recycle(); } - try { - clientCtx.getMainWorkerPool().executeOrdered(ledgerId, op); - } catch (RejectedExecutionException e) { - op.cb.addCompleteWithLatency( - BookKeeper.getReturnRc(clientCtx.getBookieClient(), BKException.Code.InterruptedException), - LedgerHandle.this, INVALID_ENTRY_ID, 0, op.ctx); - } + op.initiate(); + } synchronized void updateLastConfirmed(long lac, long len) { @@ -1366,7 +1654,7 @@ synchronized void updateLastConfirmed(long lac, long len) { /** * Obtains asynchronously the last confirmed write from a quorum of bookies. This - * call obtains the the last add confirmed each bookie has received for this ledger + * call obtains the last add confirmed each bookie has received for this ledger * and returns the maximum. If the ledger has been closed, the value returned by this * call may not correspond to the id of the last entry of the ledger, since it reads * the hint of bookies. Consequently, in the case the ledger has been closed, it may @@ -1380,6 +1668,15 @@ synchronized void updateLastConfirmed(long lac, long len) { */ public void asyncReadLastConfirmed(final ReadLastConfirmedCallback cb, final Object ctx) { + if (clientCtx.getConf().useV2WireProtocol) { + // in v2 protocol we don't support readLAC RPC + asyncReadPiggybackLastConfirmed(cb, ctx); + } else { + asyncReadExplicitLastConfirmed(cb, ctx); + } + } + + private void asyncReadPiggybackLastConfirmed(final ReadLastConfirmedCallback cb, final Object ctx) { boolean isClosed; long lastEntryId; synchronized (this) { @@ -1404,7 +1701,13 @@ public void readLastConfirmedDataComplete(int rc, DigestManager.RecoveryData dat } }; - new ReadLastConfirmedOp(this, clientCtx.getBookieClient(), getCurrentEnsemble(), innercb).initiate(); + new ReadLastConfirmedOp(clientCtx.getBookieClient(), + distributionSchedule, + macManager, + ledgerId, + getCurrentEnsemble(), + ledgerKey, + innercb).initiate(); } /** @@ -1600,7 +1903,7 @@ boolean ready() { /** * Obtains synchronously the last confirmed write from a quorum of bookies. This call - * obtains the the last add confirmed each bookie has received for this ledger + * obtains the last add confirmed each bookie has received for this ledger * and returns the maximum. If the ledger has been closed, the value returned by this * call may not correspond to the id of the last entry of the ledger, since it reads * the hint of bookies. Consequently, in the case the ledger has been closed, it may @@ -1740,7 +2043,7 @@ public long readExplicitLastConfirmed() throws InterruptedException, BKException // close the ledger and send fails to all the adds in the pipeline void handleUnrecoverableErrorDuringAdd(int rc) { - if (getLedgerMetadata().isInRecovery()) { + if (getLedgerMetadata().getState() == LedgerMetadata.State.IN_RECOVERY) { // we should not close ledger if ledger is recovery mode // otherwise we may lose entry. errorOutPendingAdds(rc); @@ -1763,10 +2066,10 @@ private void monitorPendingAddOps() { } void errorOutPendingAdds(int rc) { - errorOutPendingAdds(rc, drainPendingAddsToErrorOut()); + errorOutPendingAdds(rc, drainPendingAddsAndAdjustLength()); } - synchronized List drainPendingAddsToErrorOut() { + synchronized List drainPendingAddsAndAdjustLength() { PendingAddOp pendingAddOp; List opsDrained = new ArrayList(pendingAddOps.size()); while ((pendingAddOp = pendingAddOps.poll()) != null) { @@ -1788,7 +2091,7 @@ void sendAddSuccessCallbacks() { PendingAddOp pendingAddOp; while ((pendingAddOp = pendingAddOps.peek()) != null - && blockAddCompletions.get() == 0) { + && !changingEnsemble) { if (!pendingAddOp.completed) { if (LOG.isDebugEnabled()) { LOG.debug("pending add not completed: {}", pendingAddOp); @@ -1816,87 +2119,45 @@ void sendAddSuccessCallbacks() { } - EnsembleInfo replaceBookieInMetadata(final Map failedBookies, - int ensembleChangeIdx) - throws BKException.BKNotEnoughBookiesException { - final ArrayList newEnsemble = new ArrayList(); - final long newEnsembleStartEntry = getLastAddConfirmed() + 1; - final HashSet replacedBookies = new HashSet(); - final LedgerMetadata metadata = getLedgerMetadata(); - synchronized (metadata) { - newEnsemble.addAll(getCurrentEnsemble()); - for (Map.Entry entry : failedBookies.entrySet()) { - int idx = entry.getKey(); - BookieSocketAddress addr = entry.getValue(); - if (LOG.isDebugEnabled()) { - LOG.debug("[EnsembleChange-L{}-{}] : replacing bookie: {} index: {}", - getId(), ensembleChangeIdx, addr, idx); - } - if (!newEnsemble.get(idx).equals(addr)) { - // ensemble has already changed, failure of this addr is immaterial - if (LOG.isDebugEnabled()) { - LOG.debug("Write did not succeed to {}, bookieIndex {}, but we have already fixed it.", - addr, idx); - } - continue; - } - try { - BookieSocketAddress newBookie = clientCtx.getBookieWatcher().replaceBookie( - metadata.getEnsembleSize(), - metadata.getWriteQuorumSize(), - metadata.getAckQuorumSize(), - metadata.getCustomMetadata(), - newEnsemble, - idx, - new HashSet(failedBookies.values())); - newEnsemble.set(idx, newBookie); - replacedBookies.add(idx); - } catch (BKException.BKNotEnoughBookiesException e) { - // if there is no bookie replaced, we throw not enough bookie exception - if (replacedBookies.size() <= 0) { - throw e; - } else { - break; - } - } - } - if (LOG.isDebugEnabled()) { - LOG.debug("[EnsembleChange-L{}-{}] : changing ensemble from: {} to: {} starting at entry: {}," - + " failed bookies: {}, replaced bookies: {}", - ledgerId, ensembleChangeIdx, getCurrentEnsemble(), newEnsemble, - (getLastAddConfirmed() + 1), failedBookies, replacedBookies); - } - metadata.addEnsemble(newEnsembleStartEntry, newEnsemble); + @VisibleForTesting + boolean hasDelayedWriteFailedBookies() { + return !delayedWriteFailedBookies.isEmpty(); + } + + void notifyWriteFailed(int index, BookieId addr) { + synchronized (metadataLock) { + delayedWriteFailedBookies.put(index, addr); } - return new EnsembleInfo(newEnsemble, failedBookies, replacedBookies); } - void handleDelayedWriteBookieFailure() { - final Map copyDelayedWriteFailedBookies = - new HashMap(delayedWriteFailedBookies); - delayedWriteFailedBookies.clear(); + void maybeHandleDelayedWriteBookieFailure() { + synchronized (metadataLock) { + if (delayedWriteFailedBookies.isEmpty()) { + return; + } + Map toReplace = new HashMap<>(delayedWriteFailedBookies); + delayedWriteFailedBookies.clear(); - // Original intent of this change is to do a best-effort ensemble change. - // But this is not possible until the local metadata is completely immutable. - // Until the feature "Make LedgerMetadata Immutable #610" Is complete we will use - // handleBookieFailure() to handle delayed writes as regular bookie failures. - handleBookieFailure(copyDelayedWriteFailedBookies); + // Original intent of this change is to do a best-effort ensemble change. + // But this is not possible until the local metadata is completely immutable. + // Until the feature "Make LedgerMetadata Immutable #610" Is complete we will use + // handleBookieFailure() to handle delayed writes as regular bookie failures. + handleBookieFailure(toReplace); + } } - void handleBookieFailure(final Map failedBookies) { - int curBlockAddCompletions = blockAddCompletions.incrementAndGet(); + void handleBookieFailure(final Map failedBookies) { if (clientCtx.getConf().disableEnsembleChangeFeature.isAvailable()) { - blockAddCompletions.decrementAndGet(); if (LOG.isDebugEnabled()) { LOG.debug("Ensemble change is disabled. Retry sending to failed bookies {} for ledger {}.", failedBookies, ledgerId); } - unsetSuccessAndSendWriteRequest(getCurrentEnsemble(), failedBookies.keySet()); + executeOrdered(() -> + unsetSuccessAndSendWriteRequest(getCurrentEnsemble(), failedBookies.keySet())); return; } if (writeFlags.contains(WriteFlag.DEFERRED_SYNC)) { - blockAddCompletions.decrementAndGet(); if (LOG.isDebugEnabled()) { LOG.debug("Cannot perform ensemble change with write flags {}. " + "Failed bookies {} for ledger {}.", @@ -1906,305 +2167,123 @@ void handleBookieFailure(final Map failedBookies) return; } - int curNumEnsembleChanges = numEnsembleChanges.incrementAndGet(); - // when the ensemble changes are too frequent, close handle - if (curNumEnsembleChanges > clientCtx.getConf().maxAllowedEnsembleChanges) { - if (LOG.isDebugEnabled()) { - LOG.debug("Ledger {} reaches max allowed ensemble change number {}", - ledgerId, clientCtx.getConf().maxAllowedEnsembleChanges); - } - handleUnrecoverableErrorDuringAdd(WriteException); - return; - } - LedgerMetadata metadata = getLedgerMetadata(); - synchronized (metadata) { - try { - EnsembleInfo ensembleInfo = replaceBookieInMetadata(failedBookies, curNumEnsembleChanges); - if (ensembleInfo.replacedBookies.isEmpty()) { - blockAddCompletions.decrementAndGet(); - return; - } - if (LOG.isDebugEnabled()) { - LOG.debug("[EnsembleChange-L{}-{}] : writing new ensemble info = {}, block add completions = {}", - getId(), curNumEnsembleChanges, ensembleInfo, curBlockAddCompletions); - } - writeLedgerConfig(new ChangeEnsembleCb(ensembleInfo, curBlockAddCompletions, - curNumEnsembleChanges)); - // clear if there are any delayed write failures were recorded. - delayedWriteFailedBookies.clear(); - } catch (BKException.BKNotEnoughBookiesException e) { - LOG.error("Could not get additional bookie to remake ensemble, closing ledger: {}", ledgerId); - handleUnrecoverableErrorDuringAdd(e.getCode()); - return; - } - } - } - - // Contains newly reformed ensemble, bookieIndex, failedBookieAddress - static final class EnsembleInfo { - final ArrayList newEnsemble; - private final Map failedBookies; - final Set replacedBookies; - - public EnsembleInfo(ArrayList newEnsemble, - Map failedBookies, - Set replacedBookies) { - this.newEnsemble = newEnsemble; - this.failedBookies = failedBookies; - this.replacedBookies = replacedBookies; - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append("Ensemble Info : failed bookies = ").append(failedBookies) - .append(", replaced bookies = ").append(replacedBookies) - .append(", new ensemble = ").append(newEnsemble); - return sb.toString(); - } - } - - /** - * Callback which is updating the ledgerMetadata in zk with the newly - * reformed ensemble. On MetadataVersionException, will reread latest - * ledgerMetadata and act upon. - */ - private final class ChangeEnsembleCb extends OrderedGenericCallback { - private final EnsembleInfo ensembleInfo; - private final int curBlockAddCompletions; - private final int ensembleChangeIdx; - - ChangeEnsembleCb(EnsembleInfo ensembleInfo, - int curBlockAddCompletions, - int ensembleChangeIdx) { - super(clientCtx.getMainWorkerPool(), ledgerId); - this.ensembleInfo = ensembleInfo; - this.curBlockAddCompletions = curBlockAddCompletions; - this.ensembleChangeIdx = ensembleChangeIdx; - } - - @Override - public void safeOperationComplete(final int rc, LedgerMetadata writtenMetadata) { - if (rc == BKException.Code.MetadataVersionException) { - // We changed the ensemble, but got a version exception. We - // should still consider this as an ensemble change - ensembleChangeCounter.inc(); - - if (LOG.isDebugEnabled()) { - LOG.info("[EnsembleChange-L{}-{}] : encountered version conflicts, re-read ledger metadata.", - getId(), ensembleChangeIdx); - } - - rereadMetadata(new ReReadLedgerMetadataCb(rc, - ensembleInfo, curBlockAddCompletions, ensembleChangeIdx)); - return; - } else if (rc != BKException.Code.OK) { - LOG.error("[EnsembleChange-L{}-{}] : could not persist ledger metadata : info = {}, " - + "closing ledger : {}.", getId(), ensembleChangeIdx, ensembleInfo, rc); - handleUnrecoverableErrorDuringAdd(rc); - return; - } - int newBlockAddCompletions = blockAddCompletions.decrementAndGet(); + boolean triggerLoop = false; + Map toReplace = null; + List origEnsemble = null; + synchronized (metadataLock) { + if (changingEnsemble) { + delayedWriteFailedBookies.putAll(failedBookies); + } else { + changingEnsemble = true; + triggerLoop = true; + toReplace = new HashMap<>(delayedWriteFailedBookies); + delayedWriteFailedBookies.clear(); + toReplace.putAll(failedBookies); - if (LOG.isDebugEnabled()) { - LOG.info("[EnsembleChange-L{}-{}] : completed ensemble change, block add completion {} => {}", - getId(), ensembleChangeIdx, curBlockAddCompletions, newBlockAddCompletions); + origEnsemble = getCurrentEnsemble(); } - - // We've successfully changed an ensemble - ensembleChangeCounter.inc(); - LOG.info("New Ensemble: {} for ledger: {}", ensembleInfo.newEnsemble, ledgerId); - - // the failed bookie has been replaced - unsetSuccessAndSendWriteRequest(ensembleInfo.newEnsemble, ensembleInfo.replacedBookies); } - - @Override - public String toString() { - return String.format("ChangeEnsemble(%d)", ledgerId); + if (triggerLoop) { + ensembleChangeLoop(origEnsemble, toReplace); } } - /** - * Callback which is reading the ledgerMetadata present in zk. This will try - * to resolve the version conflicts. - */ - private final class ReReadLedgerMetadataCb extends OrderedGenericCallback { - private final int rc; - private final EnsembleInfo ensembleInfo; - private final int curBlockAddCompletions; - private final int ensembleChangeIdx; - - ReReadLedgerMetadataCb(int rc, - EnsembleInfo ensembleInfo, - int curBlockAddCompletions, - int ensembleChangeIdx) { - super(clientCtx.getMainWorkerPool(), ledgerId); - this.rc = rc; - this.ensembleInfo = ensembleInfo; - this.curBlockAddCompletions = curBlockAddCompletions; - this.ensembleChangeIdx = ensembleChangeIdx; - } + void ensembleChangeLoop(List origEnsemble, Map failedBookies) { + int ensembleChangeId = numEnsembleChanges.incrementAndGet(); + ensembleChangeCounter.inc(); + String logContext = String.format("[EnsembleChange(ledger:%d, change-id:%010d)]", ledgerId, ensembleChangeId); - @Override - public void safeOperationComplete(int newrc, LedgerMetadata newMeta) { - if (newrc != BKException.Code.OK) { - LOG.error("[EnsembleChange-L{}-{}] : error re-reading metadata " - + "to address ensemble change conflicts: {}", - ledgerId, ensembleChangeIdx, BKException.codeLogger(newrc)); - handleUnrecoverableErrorDuringAdd(rc); - } else { - if (!resolveConflict(newMeta)) { - LOG.error("[EnsembleChange-L{}-{}] : could not resolve ledger metadata conflict" - + " while changing ensemble to: {}, local meta data is \n {} \n," - + " zk meta data is \n {} \n, closing ledger", - ledgerId, ensembleChangeIdx, ensembleInfo.newEnsemble, getLedgerMetadata(), newMeta); - handleUnrecoverableErrorDuringAdd(rc); - } - } + // when the ensemble changes are too frequent, close handle + if (ensembleChangeId > clientCtx.getConf().maxAllowedEnsembleChanges) { + LOG.info("{} reaches max allowed ensemble change number {}", + logContext, clientCtx.getConf().maxAllowedEnsembleChanges); + handleUnrecoverableErrorDuringAdd(WriteException); + return; } - /** - * Specific resolve conflicts happened when multiple bookies failures in same ensemble. - * - *

    Resolving the version conflicts between local ledgerMetadata and zk - * ledgerMetadata. This will do the following: - *

      - *
    • - * check whether ledgerMetadata state matches of local and zk
    • - *
    • - * if the zk ledgerMetadata still contains the failed bookie, then - * update zookeeper with the newBookie otherwise send write request
    • - *
    - *

    - */ - private boolean resolveConflict(LedgerMetadata newMeta) { - LedgerMetadata metadata = getLedgerMetadata(); - if (LOG.isDebugEnabled()) { - LOG.debug("[EnsembleChange-L{}-{}] : resolving conflicts - local metadata = \n {} \n," - + " zk metadata = \n {} \n", ledgerId, ensembleChangeIdx, metadata, newMeta); - } - // make sure the ledger isn't closed by other ones. - if (metadata.getState() != newMeta.getState()) { - if (LOG.isDebugEnabled()) { - LOG.info("[EnsembleChange-L{}-{}] : resolving conflicts but state changed," - + " local metadata = \n {} \n, zk metadata = \n {} \n", - ledgerId, ensembleChangeIdx, metadata, newMeta); - } - return false; - } + if (LOG.isDebugEnabled()) { + LOG.debug("{} Replacing {} in {}", logContext, failedBookies, origEnsemble); + } + + AtomicInteger attempts = new AtomicInteger(0); + new MetadataUpdateLoop( + clientCtx.getLedgerManager(), getId(), + this::getVersionedLedgerMetadata, + (metadata) -> metadata.getState() == LedgerMetadata.State.OPEN + && failedBookies.entrySet().stream().anyMatch( + e -> LedgerMetadataUtils.getLastEnsembleValue(metadata) + .get(e.getKey()).equals(e.getValue())), + (metadata) -> { + attempts.incrementAndGet(); + + List currentEnsemble = getCurrentEnsemble(); + List newEnsemble = EnsembleUtils.replaceBookiesInEnsemble( + clientCtx.getBookieWatcher(), metadata, currentEnsemble, failedBookies, logContext); + Long lastEnsembleKey = LedgerMetadataUtils.getLastEnsembleKey(metadata); + LedgerMetadataBuilder builder = LedgerMetadataBuilder.from(metadata); + long newEnsembleStartEntry = getLastAddConfirmed() + 1; + checkState(lastEnsembleKey <= newEnsembleStartEntry, + "New ensemble must either replace the last ensemble, or add a new one"); + if (LOG.isDebugEnabled()) { + LOG.debug("{}[attempt:{}] changing ensemble from: {} to: {} starting at entry: {}", + logContext, attempts.get(), currentEnsemble, newEnsemble, newEnsembleStartEntry); + } - // We should check number of ensembles since there are two kinds of metadata conflicts: - // - Case 1: Multiple bookies involved in ensemble change. - // Number of ensembles should be same in this case. - // - Case 2: Recovery (Auto/Manually) replaced ensemble and ensemble changed. - // The metadata changed due to ensemble change would have one more ensemble - // than the metadata changed by recovery. - int diff = newMeta.getEnsembles().size() - metadata.getEnsembles().size(); - if (0 != diff) { - if (LOG.isDebugEnabled()) { - LOG.debug("[EnsembleChange-L{}-{}] : resolving conflicts but ensembles have {} differences," - + " local metadata = \n {} \n, zk metadata = \n {} \n", - ledgerId, ensembleChangeIdx, diff, metadata, newMeta); - } - if (-1 == diff) { - // Case 1: metadata is changed by other ones (e.g. Recovery) - return updateMetadataIfPossible(metadata, newMeta); - } - return false; - } + if (lastEnsembleKey.equals(newEnsembleStartEntry)) { + return builder.replaceEnsembleEntry(newEnsembleStartEntry, newEnsemble).build(); + } else { + return builder.newEnsembleEntry(newEnsembleStartEntry, newEnsemble).build(); + } + }, + this::setLedgerMetadata) + .run().whenCompleteAsync((metadata, ex) -> { + if (ex != null) { + LOG.warn("{}[attempt:{}] Exception changing ensemble", logContext, attempts.get(), ex); + handleUnrecoverableErrorDuringAdd(BKException.getExceptionCode(ex, WriteException)); + } else if (metadata.getValue().isClosed()) { + if (LOG.isDebugEnabled()) { + LOG.debug("{}[attempt:{}] Metadata closed during attempt to replace bookie." + + " Another client must have recovered the ledger.", logContext, attempts.get()); + } + handleUnrecoverableErrorDuringAdd(BKException.Code.LedgerClosedException); + } else if (metadata.getValue().getState() == LedgerMetadata.State.IN_RECOVERY) { + if (LOG.isDebugEnabled()) { + LOG.debug("{}[attempt:{}] Metadata marked as in-recovery during attempt to replace bookie." + + " Another client must be recovering the ledger.", logContext, attempts.get()); + } - // - // Case 2: - // - // If the failed the bookie is still existed in the metadata (in zookeeper), it means that - // the ensemble change of the failed bookie is failed due to metadata conflicts. so try to - // update the ensemble change metadata again. Otherwise, it means that the ensemble change - // is already succeed, unset the success and re-adding entries. - if (!areFailedBookiesReplaced(newMeta, ensembleInfo)) { - // If the in-memory data doesn't contains the failed bookie, it means the ensemble change - // didn't finish, so try to resolve conflicts with the metadata read from zookeeper and - // update ensemble changed metadata again. - if (areFailedBookiesReplaced(metadata, ensembleInfo)) { - return updateMetadataIfPossible(metadata, newMeta); - } - } else { - ensembleChangeCounter.inc(); - // We've successfully changed an ensemble - // the failed bookie has been replaced - int newBlockAddCompletions = blockAddCompletions.decrementAndGet(); - unsetSuccessAndSendWriteRequest(ensembleInfo.newEnsemble, ensembleInfo.replacedBookies); - if (LOG.isDebugEnabled()) { - LOG.info("[EnsembleChange-L{}-{}] : resolved conflicts, block add complectiosn {} => {}.", - ledgerId, ensembleChangeIdx, curBlockAddCompletions, newBlockAddCompletions); - } - } - return true; - } + handleUnrecoverableErrorDuringAdd(BKException.Code.LedgerFencedException); + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("{}[attempt:{}] Success updating metadata.", logContext, attempts.get()); + } - /** - * Check whether all the failed bookies are replaced. - * - * @param newMeta - * new ledger metadata - * @param ensembleInfo - * ensemble info used for ensemble change. - * @return true if all failed bookies are replaced, false otherwise - */ - private boolean areFailedBookiesReplaced(LedgerMetadata newMeta, EnsembleInfo ensembleInfo) { - boolean replaced = true; - for (Integer replacedBookieIdx : ensembleInfo.replacedBookies) { - BookieSocketAddress failedBookieAddr = ensembleInfo.failedBookies.get(replacedBookieIdx); - BookieSocketAddress replacedBookieAddr = newMeta.getEnsembles() - .lastEntry().getValue().get(replacedBookieIdx); - replaced &= !Objects.equal(replacedBookieAddr, failedBookieAddr); - } - return replaced; - } + List newEnsemble = null; + Set replaced = null; + synchronized (metadataLock) { + if (!delayedWriteFailedBookies.isEmpty()) { + Map toReplace = new HashMap<>(delayedWriteFailedBookies); + delayedWriteFailedBookies.clear(); - private boolean updateMetadataIfPossible(LedgerMetadata metadata, LedgerMetadata newMeta) { - // if the local metadata is newer than zookeeper metadata, it means that metadata is updated - // again when it was trying re-reading the metatada, re-kick the reread again - if (metadata.isNewerThan(newMeta)) { - if (LOG.isDebugEnabled()) { - LOG.debug("[EnsembleChange-L{}-{}] : reread metadata because local metadata is newer.", - new Object[]{ledgerId, ensembleChangeIdx}); - } - rereadMetadata(this); - return true; - } - // make sure the metadata doesn't changed by other ones. - if (metadata.isConflictWith(newMeta)) { - if (LOG.isDebugEnabled()) { - LOG.debug("[EnsembleChange-L{}-{}] : metadata is conflicted, local metadata = \n {} \n," - + " zk metadata = \n {} \n", ledgerId, ensembleChangeIdx, metadata, newMeta); - } - return false; - } - if (LOG.isDebugEnabled()) { - LOG.info("[EnsembleChange-L{}-{}] : resolved ledger metadata conflict and writing to zookeeper," - + " local meta data is \n {} \n, zk meta data is \n {}.", - ledgerId, ensembleChangeIdx, metadata, newMeta); - } - // update znode version - metadata.setVersion(newMeta.getVersion()); - // merge ensemble infos from new meta except last ensemble - // since they might be modified by recovery tool. - metadata.mergeEnsembles(newMeta.getEnsembles()); - writeLedgerConfig(new ChangeEnsembleCb(ensembleInfo, curBlockAddCompletions, - ensembleChangeIdx)); - return true; - } + ensembleChangeLoop(origEnsemble, toReplace); + } else { + newEnsemble = getCurrentEnsemble(); + replaced = EnsembleUtils.diffEnsemble(origEnsemble, newEnsemble); + LOG.info("New Ensemble: {} for ledger: {}", newEnsemble, ledgerId); - @Override - public String toString() { - return String.format("ReReadLedgerMetadata(%d)", ledgerId); - } + changingEnsemble = false; + } + } + if (newEnsemble != null) { // unsetSuccess outside of lock + unsetSuccessAndSendWriteRequest(newEnsemble, replaced); + } + } + }, clientCtx.getMainWorkerPool().chooseThread(ledgerId)); } - void unsetSuccessAndSendWriteRequest(List ensemble, final Set bookies) { + void unsetSuccessAndSendWriteRequest(List ensemble, final Set bookies) { for (PendingAddOp pendingAddOp : pendingAddOps) { for (Integer bookieIndex: bookies) { pendingAddOp.unsetSuccessAndSendWriteRequest(ensemble, bookieIndex); @@ -2212,122 +2291,12 @@ void unsetSuccessAndSendWriteRequest(List ensemble, final S } } - void rereadMetadata(final GenericCallback cb) { - clientCtx.getLedgerManager().readLedgerMetadata(ledgerId, cb); - } - - void registerOperationFailureOnBookie(BookieSocketAddress bookie, long entryId) { + void registerOperationFailureOnBookie(BookieId bookie, long entryId) { if (clientCtx.getConf().enableBookieFailureTracking) { bookieFailureHistory.put(bookie, entryId); } } - - void recover(GenericCallback finalCb) { - recover(finalCb, null, false); - } - - /** - * Recover the ledger. - * - * @param finalCb - * callback after recovery is done. - * @param listener - * read entry listener on recovery reads. - * @param forceRecovery - * force the recovery procedure even the ledger metadata shows the ledger is closed. - */ - void recover(GenericCallback finalCb, - final @VisibleForTesting BookkeeperInternalCallbacks.ReadEntryListener listener, - final boolean forceRecovery) { - final GenericCallback cb = new TimedGenericCallback( - finalCb, - BKException.Code.OK, - clientCtx.getClientStats().getRecoverOpLogger()); - boolean wasClosed = false; - boolean wasInRecovery = false; - - LedgerMetadata metadata = getLedgerMetadata(); - synchronized (this) { - if (metadata.isClosed()) { - if (forceRecovery) { - wasClosed = false; - // mark the ledger back to in recovery state, so it would proceed ledger recovery again. - wasInRecovery = false; - metadata.markLedgerInRecovery(); - } else { - lastAddConfirmed = lastAddPushed = metadata.getLastEntryId(); - length = metadata.getLength(); - wasClosed = true; - } - } else { - wasClosed = false; - if (metadata.isInRecovery()) { - wasInRecovery = true; - } else { - wasInRecovery = false; - metadata.markLedgerInRecovery(); - } - } - } - - if (wasClosed) { - // We are already closed, nothing to do - cb.operationComplete(BKException.Code.OK, null); - return; - } - - if (wasInRecovery) { - // if metadata is already in recover, dont try to write again, - // just do the recovery from the starting point - new LedgerRecoveryOp(LedgerHandle.this, clientCtx, cb) - .setEntryListener(listener) - .initiate(); - return; - } - - writeLedgerConfig(new OrderedGenericCallback(clientCtx.getMainWorkerPool(), ledgerId) { - @Override - public void safeOperationComplete(final int rc, LedgerMetadata writtenMetadata) { - if (rc == BKException.Code.MetadataVersionException) { - rereadMetadata(new OrderedGenericCallback(clientCtx.getMainWorkerPool(), - ledgerId) { - @Override - public void safeOperationComplete(int rc, LedgerMetadata newMeta) { - if (rc != BKException.Code.OK) { - cb.operationComplete(rc, null); - } else { - LedgerHandle.this.metadata = newMeta; - recover(cb, listener, forceRecovery); - } - } - - @Override - public String toString() { - return String.format("ReReadMetadataForRecover(%d)", ledgerId); - } - }); - } else if (rc == BKException.Code.OK) { - // we only could issue recovery operation after we successfully update the ledger state to - // in recovery otherwise, it couldn't prevent us advancing last confirmed while the other writer is - // closing the ledger, which will cause inconsistent last add confirmed on bookies & zookeeper - // metadata. - new LedgerRecoveryOp(LedgerHandle.this, clientCtx, cb) - .setEntryListener(listener) - .initiate(); - } else { - LOG.error("Error writing ledger {} config: {}", ledgerId, BKException.codeLogger(rc)); - cb.operationComplete(rc, null); - } - } - - @Override - public String toString() { - return String.format("WriteLedgerConfigForRecover(%d)", ledgerId); - } - }); - } - static class NoopCloseCallback implements CloseCallback { static NoopCloseCallback instance = new NoopCloseCallback(); @@ -2353,10 +2322,53 @@ public void closeComplete(int rc, LedgerHandle lh, Object ctx) { * operations themselves, to avoid adding more dependencies between the classes. * There are too many already. */ - List getCurrentEnsemble() { + List getCurrentEnsemble() { // Getting current ensemble from the metadata is only a temporary // thing until metadata is immutable. At that point, current ensemble // becomes a property of the LedgerHandle itself. - return metadata.getCurrentEnsemble(); + return LedgerMetadataUtils.getCurrentEnsemble(versionedMetadata.getValue()); + } + + /** + * Return a {@link WriteSet} suitable for reading a particular entry. + * This will include all bookies that are part of the ensemble for the entry. + */ + WriteSet getWriteSetForReadOperation(long entryId) { + if (stickyBookieIndex != STICKY_READ_BOOKIE_INDEX_UNSET) { + // When sticky reads are enabled we want to make sure to take + // advantage of read-ahead (or, anyway, from efficiencies in + // reading sequential data from disk through the page cache). + // For this, all the entries that a given bookie prefetches, + // should read from that bookie. + // For example, with e=2, w=2, a=2 we would have + // B-1 B-2 + // e-0 X X + // e-1 X X + // e-2 X X + // + // In this case we want all the requests to be issued to B-1 (by + // preference), so that cache hits will be maximized. + // + // We can only enable sticky reads if the ensemble==writeQuorum + // otherwise the same bookie will not have all the entries + // stored + return distributionSchedule.getWriteSet(stickyBookieIndex); + } else { + return distributionSchedule.getWriteSet(entryId); + } + } + + /** + * Execute the callback in the thread pinned to the ledger. + * @param runnable + * @throws RejectedExecutionException + */ + void executeOrdered(Runnable runnable) throws RejectedExecutionException { + executor.execute(runnable); + } + + @VisibleForTesting + public Queue getPendingAddOps() { + return pendingAddOps; } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandleAdv.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandleAdv.java index c1d38490bce..c94a9154f51 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandleAdv.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandleAdv.java @@ -34,9 +34,10 @@ import org.apache.bookkeeper.client.AsyncCallback.AddCallback; import org.apache.bookkeeper.client.AsyncCallback.AddCallbackWithLatency; import org.apache.bookkeeper.client.SyncCallbackUtils.SyncAddCallback; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.client.api.WriteAdvHandle; import org.apache.bookkeeper.client.api.WriteFlag; -import org.apache.bookkeeper.util.SafeRunnable; +import org.apache.bookkeeper.versioning.Versioned; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -50,13 +51,14 @@ public class LedgerHandleAdv extends LedgerHandle implements WriteAdvHandle { static final Logger LOG = LoggerFactory.getLogger(LedgerHandleAdv.class); static class PendingOpsComparator implements Comparator, Serializable { + @Override public int compare(PendingAddOp o1, PendingAddOp o2) { return Long.compare(o1.entryId, o2.entryId); } } LedgerHandleAdv(ClientContext clientCtx, - long ledgerId, LedgerMetadata metadata, + long ledgerId, Versioned metadata, BookKeeper.DigestType digestType, byte[] password, EnumSet writeFlags) throws GeneralSecurityException, NumberFormatException { super(clientCtx, ledgerId, metadata, digestType, password, writeFlags); @@ -71,6 +73,7 @@ public int compare(PendingAddOp o1, PendingAddOp o2) { * entryId of the entry to add * @param data * array of bytes to be written to the ledger + * do not reuse the buffer, bk-client will release it appropriately * @return * entryId that is just created. */ @@ -88,6 +91,7 @@ public long addEntry(final long entryId, byte[] data) throws InterruptedExceptio * entryId of the entry to add * @param data * array of bytes to be written to the ledger + * do not reuse the buffer, bk-client will release it appropriately * @param offset * offset from which to take bytes from data * @param length @@ -118,6 +122,7 @@ public long addEntry(final long entryId, byte[] data, int offset, int length) th * entryId of the entry to add * @param data * array of bytes to be written + * do not reuse the buffer, bk-client will release it appropriately * @param cb * object implementing callbackinterface * @param ctx @@ -135,6 +140,7 @@ public void asyncAddEntry(long entryId, byte[] data, AddCallback cb, Object ctx) * entryId of the entry to add * @param data * array of bytes to be written + * do not reuse the buffer, bk-client will release it appropriately * @param offset * offset from which to take bytes from data * @param length @@ -160,6 +166,7 @@ public void asyncAddEntry(final long entryId, final byte[] data, final int offse * entryId of the entry to add * @param data * array of bytes to be written + * do not reuse the buffer, bk-client will release it appropriately * @param offset * offset from which to take bytes from data * @param length @@ -181,12 +188,13 @@ public void asyncAddEntry(final long entryId, final byte[] data, final int offse /** * Add entry asynchronously to an open ledger, using an offset and range. * This can be used only with {@link LedgerHandleAdv} returned through - * ledgers created with {@link createLedgerAdv(int, int, int, DigestType, byte[])}. + * ledgers created with {@link BookKeeper#createLedgerAdv(int, int, int, BookKeeper.DigestType, byte[])}. * * @param entryId * entryId of the entry to add. * @param data * io.netty.buffer.ByteBuf of bytes to be written + * do not reuse the buffer, bk-client will release it appropriately * @param cb * object implementing callbackinterface * @param ctx @@ -222,24 +230,25 @@ protected void doAsyncAddEntry(final PendingAddOp op) { // synchronized on this to ensure that // the ledger isn't closed between checking and // updating lastAddPushed - if (getLedgerMetadata().isClosed()) { - wasClosed = true; - } else { + if (isHandleWritable()) { long currentLength = addToLength(op.payload.readableBytes()); op.setLedgerLength(currentLength); pendingAddOps.add(op); + } else { + wasClosed = true; } } if (wasClosed) { // make sure the callback is triggered in main worker pool try { - clientCtx.getMainWorkerPool().submit(new SafeRunnable() { + clientCtx.getMainWorkerPool().submit(new Runnable() { @Override - public void safeRun() { + public void run() { LOG.warn("Attempt to add to closed ledger: {}", ledgerId); op.cb.addCompleteWithLatency(BKException.Code.LedgerClosedException, LedgerHandleAdv.this, op.getEntryId(), 0, op.ctx); + op.recyclePendAddOpObject(); } @Override public String toString() { @@ -250,22 +259,23 @@ public String toString() { op.cb.addCompleteWithLatency(BookKeeper.getReturnRc(clientCtx.getBookieClient(), BKException.Code.InterruptedException), LedgerHandleAdv.this, op.getEntryId(), 0, op.ctx); + op.recyclePendAddOpObject(); } return; } - if (!waitForWritable(distributionSchedule.getWriteSet(op.getEntryId()), - op.getEntryId(), 0, clientCtx.getConf().waitForWriteSetMs)) { - op.allowFailFastOnUnwritableChannel(); + if (clientCtx.getConf().waitForWriteSetMs >= 0) { + DistributionSchedule.WriteSet ws = distributionSchedule.getWriteSet(op.getEntryId()); + try { + if (!waitForWritable(ws, 0, clientCtx.getConf().waitForWriteSetMs)) { + op.allowFailFastOnUnwritableChannel(); + } + } finally { + ws.recycle(); + } } - try { - clientCtx.getMainWorkerPool().executeOrdered(ledgerId, op); - } catch (RejectedExecutionException e) { - op.cb.addCompleteWithLatency(BookKeeper.getReturnRc(clientCtx.getBookieClient(), - BKException.Code.InterruptedException), - LedgerHandleAdv.this, op.getEntryId(), 0, op.ctx); - } + op.initiate(); } @Override diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerMetadata.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerMetadata.java deleted file mode 100644 index 4f58aa8b88a..00000000000 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerMetadata.java +++ /dev/null @@ -1,790 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bookkeeper.client; - -import static com.google.common.base.Charsets.UTF_8; -import static com.google.common.base.Preconditions.checkArgument; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Optional; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Maps; -import com.google.protobuf.ByteString; -import com.google.protobuf.TextFormat; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.StringReader; -import java.nio.CharBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.NavigableMap; -import java.util.Set; -import java.util.SortedMap; -import java.util.TreeMap; -import lombok.EqualsAndHashCode; -import org.apache.bookkeeper.client.api.DigestType; -import org.apache.bookkeeper.net.BookieSocketAddress; -import org.apache.bookkeeper.proto.DataFormats.LedgerMetadataFormat; -import org.apache.bookkeeper.versioning.Version; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * This class encapsulates all the ledger metadata that is persistently stored - * in metadata store. - * - *

    It provides parsing and serialization methods of such metadata. - */ -@EqualsAndHashCode -public class LedgerMetadata implements org.apache.bookkeeper.client.api.LedgerMetadata { - static final Logger LOG = LoggerFactory.getLogger(LedgerMetadata.class); - - private static final String closed = "CLOSED"; - private static final String lSplitter = "\n"; - private static final String tSplitter = "\t"; - - // can't use -1 for NOTCLOSED because that is reserved for a closed, empty - // ledger - private static final int NOTCLOSED = -101; - private static final int IN_RECOVERY = -102; - - public static final int LOWEST_COMPAT_METADATA_FORMAT_VERSION = 0; - public static final int CURRENT_METADATA_FORMAT_VERSION = 2; - public static final String VERSION_KEY = "BookieMetadataFormatVersion"; - - private int metadataFormatVersion = 0; - - private int ensembleSize; - private int writeQuorumSize; - private int ackQuorumSize; - private long length; - private long lastEntryId; - private long ctime; - boolean storeSystemtimeAsLedgerCreationTime = false; - - private LedgerMetadataFormat.State state; - private TreeMap> ensembles = new TreeMap<>(); - private List currentEnsemble; - volatile Version version = Version.NEW; - - private boolean hasPassword = false; - private LedgerMetadataFormat.DigestType digestType; - private byte[] password; - - private Map customMetadata = Maps.newHashMap(); - - public LedgerMetadata(int ensembleSize, - int writeQuorumSize, - int ackQuorumSize, - BookKeeper.DigestType digestType, - byte[] password, - Map customMetadata, - boolean storeSystemtimeAsLedgerCreationTime) { - this.ensembleSize = ensembleSize; - this.writeQuorumSize = writeQuorumSize; - this.ackQuorumSize = ackQuorumSize; - if (storeSystemtimeAsLedgerCreationTime) { - this.ctime = System.currentTimeMillis(); - } else { - // if client disables storing its system time as ledger creation time, there should be no ctime at this - // moment. - this.ctime = -1L; - } - this.storeSystemtimeAsLedgerCreationTime = storeSystemtimeAsLedgerCreationTime; - - /* - * It is set in PendingReadOp.readEntryComplete, and - * we read it in LedgerRecoveryOp.readComplete. - */ - this.length = 0; - this.state = LedgerMetadataFormat.State.OPEN; - this.lastEntryId = LedgerHandle.INVALID_ENTRY_ID; - this.metadataFormatVersion = CURRENT_METADATA_FORMAT_VERSION; - - this.digestType = digestType.equals(BookKeeper.DigestType.MAC) - ? LedgerMetadataFormat.DigestType.HMAC : LedgerMetadataFormat.DigestType.valueOf(digestType.toString()); - this.password = Arrays.copyOf(password, password.length); - this.hasPassword = true; - if (customMetadata != null) { - this.customMetadata = customMetadata; - } - } - - LedgerMetadata(int ensembleSize, - int writeQuorumSize, - int ackQuorumSize, - LedgerMetadataFormat.State state, - java.util.Optional lastEntryId, - Map> ensembles, - DigestType digestType, - java.util.Optional password, - java.util.Optional ctime, - Map customMetadata, - Version version) { - checkArgument(ensembles.size() > 0, "There must be at least one ensemble in the ledger"); - - this.ensembleSize = ensembleSize; - this.writeQuorumSize = writeQuorumSize; - this.ackQuorumSize = ackQuorumSize; - this.state = state; - lastEntryId.ifPresent((eid) -> this.lastEntryId = eid); - - setEnsembles(ensembles); - if (state != LedgerMetadataFormat.State.CLOSED) { - currentEnsemble = this.ensembles.lastEntry().getValue(); - } - - this.digestType = digestType.equals(DigestType.MAC) - ? LedgerMetadataFormat.DigestType.HMAC : LedgerMetadataFormat.DigestType.valueOf(digestType.toString()); - - password.ifPresent((pw) -> { - this.password = pw; - this.hasPassword = true; - }); - - ctime.ifPresent((c) -> { - this.ctime = c; - this.storeSystemtimeAsLedgerCreationTime = true; - }); - - this.customMetadata.putAll(customMetadata); - this.version = version; - } - - /** - * Used for testing purpose only. - */ - @VisibleForTesting - public LedgerMetadata(int ensembleSize, int writeQuorumSize, int ackQuorumSize, - BookKeeper.DigestType digestType, byte[] password) { - this(ensembleSize, writeQuorumSize, ackQuorumSize, digestType, password, null, false); - } - - /** - * Copy Constructor. - */ - LedgerMetadata(LedgerMetadata other) { - this.ensembleSize = other.ensembleSize; - this.writeQuorumSize = other.writeQuorumSize; - this.ackQuorumSize = other.ackQuorumSize; - this.length = other.length; - this.lastEntryId = other.lastEntryId; - this.metadataFormatVersion = other.metadataFormatVersion; - this.state = other.state; - this.version = other.version; - this.hasPassword = other.hasPassword; - this.digestType = other.digestType; - this.ctime = other.ctime; - this.storeSystemtimeAsLedgerCreationTime = other.storeSystemtimeAsLedgerCreationTime; - this.password = new byte[other.password.length]; - System.arraycopy(other.password, 0, this.password, 0, other.password.length); - // copy the ensembles - for (Entry> entry : other.ensembles.entrySet()) { - this.addEnsemble(entry.getKey(), entry.getValue()); - } - this.customMetadata = other.customMetadata; - } - - private LedgerMetadata() { - this(0, 0, 0, BookKeeper.DigestType.MAC, new byte[] {}); - this.hasPassword = false; - } - - /** - * Get the Map of bookie ensembles for the various ledger fragments - * that make up the ledger. - * - * @return SortedMap of Ledger Fragments and the corresponding - * bookie ensembles that store the entries. - */ - public TreeMap> getEnsembles() { - return ensembles; - } - - @Override - public NavigableMap> getAllEnsembles() { - return ensembles; - } - - void setEnsembles(Map> newEnsembles) { - this.ensembles = newEnsembles.entrySet().stream() - .collect(TreeMap::new, - (m, e) -> m.put(e.getKey(), ImmutableList.copyOf(e.getValue())), - TreeMap::putAll); - } - - @Override - public int getEnsembleSize() { - return ensembleSize; - } - - @Override - public int getWriteQuorumSize() { - return writeQuorumSize; - } - - @Override - public int getAckQuorumSize() { - return ackQuorumSize; - } - - @Override - public long getCtime() { - return ctime; - } - - @VisibleForTesting - void setCtime(long ctime) { - this.ctime = ctime; - } - - /** - * In versions 4.1.0 and below, the digest type and password were not - * stored in the metadata. - * - * @return whether the password has been stored in the metadata - */ - boolean hasPassword() { - return hasPassword; - } - - @VisibleForTesting - public byte[] getPassword() { - return Arrays.copyOf(password, password.length); - } - - @Override - public DigestType getDigestType() { - switch (digestType) { - case HMAC: - return DigestType.MAC; - case CRC32: - return DigestType.CRC32; - case CRC32C: - return DigestType.CRC32C; - case DUMMY: - return DigestType.DUMMY; - default: - throw new IllegalArgumentException("Unable to convert digest type " + digestType); - } - } - - @Override - public long getLastEntryId() { - return lastEntryId; - } - - @Override - public long getLength() { - return length; - } - - void setLength(long length) { - this.length = length; - } - - @Override - public boolean isClosed() { - return state == LedgerMetadataFormat.State.CLOSED; - } - - public boolean isInRecovery() { - return state == LedgerMetadataFormat.State.IN_RECOVERY; - } - - public LedgerMetadataFormat.State getState() { - return state; - } - - void setState(LedgerMetadataFormat.State state) { - this.state = state; - } - - void markLedgerInRecovery() { - state = LedgerMetadataFormat.State.IN_RECOVERY; - } - - void close(long entryId) { - lastEntryId = entryId; - state = LedgerMetadataFormat.State.CLOSED; - } - - public void addEnsemble(long startEntryId, List ensemble) { - checkArgument(ensembles.isEmpty() || startEntryId >= ensembles.lastKey()); - - ensembles.put(startEntryId, ImmutableList.copyOf(ensemble)); - currentEnsemble = ensemble; - } - - List getCurrentEnsemble() { - return currentEnsemble; - } - - public void updateEnsemble(long startEntryId, List ensemble) { - checkArgument(ensembles.containsKey(startEntryId)); - ensembles.put(startEntryId, ImmutableList.copyOf(ensemble)); - } - - List getEnsemble(long entryId) { - // the head map cannot be empty, since we insert an ensemble for - // entry-id 0, right when we start - return ensembles.get(ensembles.headMap(entryId + 1).lastKey()); - } - - @Override - public List getEnsembleAt(long entryId) { - return getEnsemble(entryId); - } - - /** - * the entry id greater than the given entry-id at which the next ensemble change takes - * place. - * - * @param entryId - * @return the entry id of the next ensemble change (-1 if no further ensemble changes) - */ - long getNextEnsembleChange(long entryId) { - SortedMap> tailMap = ensembles.tailMap(entryId + 1); - - if (tailMap.isEmpty()) { - return -1; - } else { - return tailMap.firstKey(); - } - } - - @Override - public Map getCustomMetadata() { - return this.customMetadata; - } - - void setCustomMetadata(Map customMetadata) { - this.customMetadata = customMetadata; - } - - LedgerMetadataFormat buildProtoFormat() { - return buildProtoFormat(true); - } - - LedgerMetadataFormat buildProtoFormat(boolean withPassword) { - LedgerMetadataFormat.Builder builder = LedgerMetadataFormat.newBuilder(); - builder.setQuorumSize(writeQuorumSize).setAckQuorumSize(ackQuorumSize) - .setEnsembleSize(ensembleSize).setLength(length) - .setState(state).setLastEntryId(lastEntryId); - - if (storeSystemtimeAsLedgerCreationTime) { - builder.setCtime(ctime); - } - - if (hasPassword) { - builder.setDigestType(digestType); - if (withPassword) { - builder.setPassword(ByteString.copyFrom(password)); - } - } - - if (customMetadata != null) { - LedgerMetadataFormat.cMetadataMapEntry.Builder cMetadataBuilder = - LedgerMetadataFormat.cMetadataMapEntry.newBuilder(); - for (Map.Entry entry : customMetadata.entrySet()) { - cMetadataBuilder.setKey(entry.getKey()).setValue(ByteString.copyFrom(entry.getValue())); - builder.addCustomMetadata(cMetadataBuilder.build()); - } - } - - for (Map.Entry> entry : ensembles.entrySet()) { - LedgerMetadataFormat.Segment.Builder segmentBuilder = LedgerMetadataFormat.Segment.newBuilder(); - segmentBuilder.setFirstEntryId(entry.getKey()); - for (BookieSocketAddress addr : entry.getValue()) { - segmentBuilder.addEnsembleMember(addr.toString()); - } - builder.addSegment(segmentBuilder.build()); - } - return builder.build(); - } - - /** - * Generates a byte array of this object. - * - * @return the metadata serialized into a byte array - */ - public byte[] serialize() { - return serialize(true); - } - - public byte[] serialize(boolean withPassword) { - if (metadataFormatVersion == 1) { - return serializeVersion1(); - } - - StringBuilder s = new StringBuilder(); - s.append(VERSION_KEY).append(tSplitter).append(CURRENT_METADATA_FORMAT_VERSION).append(lSplitter); - s.append(TextFormat.printToString(buildProtoFormat(withPassword))); - if (LOG.isDebugEnabled()) { - LOG.debug("Serialized config: {}", s); - } - return s.toString().getBytes(UTF_8); - } - - private byte[] serializeVersion1() { - StringBuilder s = new StringBuilder(); - s.append(VERSION_KEY).append(tSplitter).append(metadataFormatVersion).append(lSplitter); - s.append(writeQuorumSize).append(lSplitter).append(ensembleSize).append(lSplitter).append(length); - - for (Map.Entry> entry : ensembles.entrySet()) { - s.append(lSplitter).append(entry.getKey()); - for (BookieSocketAddress addr : entry.getValue()) { - s.append(tSplitter); - s.append(addr.toString()); - } - } - - if (state == LedgerMetadataFormat.State.IN_RECOVERY) { - s.append(lSplitter).append(IN_RECOVERY).append(tSplitter).append(closed); - } else if (state == LedgerMetadataFormat.State.CLOSED) { - s.append(lSplitter).append(getLastEntryId()).append(tSplitter).append(closed); - } - - if (LOG.isDebugEnabled()) { - LOG.debug("Serialized config: {}", s); - } - - return s.toString().getBytes(UTF_8); - } - - /** - * Parses a given byte array and transforms into a LedgerConfig object. - * - * @param bytes - * byte array to parse - * @param version - * version of the ledger metadata - * @param msCtime - * metadata store creation time, used for legacy ledgers - * @return LedgerConfig - * @throws IOException - * if the given byte[] cannot be parsed - */ - public static LedgerMetadata parseConfig(byte[] bytes, Version version, Optional msCtime) throws IOException { - LedgerMetadata lc = new LedgerMetadata(); - lc.version = version; - - String config = new String(bytes, UTF_8); - - if (LOG.isDebugEnabled()) { - LOG.debug("Parsing Config: {}", config); - } - BufferedReader reader = new BufferedReader(new StringReader(config)); - String versionLine = reader.readLine(); - if (versionLine == null) { - throw new IOException("Invalid metadata. Content missing"); - } - if (versionLine.startsWith(VERSION_KEY)) { - String parts[] = versionLine.split(tSplitter); - lc.metadataFormatVersion = Integer.parseInt(parts[1]); - } else { - // if no version is set, take it to be version 1 - // as the parsing is the same as what we had before - // we introduce versions - lc.metadataFormatVersion = 1; - // reset the reader - reader.close(); - reader = new BufferedReader(new StringReader(config)); - } - - if (lc.metadataFormatVersion < LOWEST_COMPAT_METADATA_FORMAT_VERSION - || lc.metadataFormatVersion > CURRENT_METADATA_FORMAT_VERSION) { - throw new IOException("Metadata version not compatible. Expected between " - + LOWEST_COMPAT_METADATA_FORMAT_VERSION + " and " + CURRENT_METADATA_FORMAT_VERSION - + ", but got " + lc.metadataFormatVersion); - } - - if (lc.metadataFormatVersion == 1) { - return parseVersion1Config(lc, reader); - } - - // remaining size is total minus the length of the version line and '\n' - char[] configBuffer = new char[config.length() - (versionLine.length() + 1)]; - if (configBuffer.length != reader.read(configBuffer, 0, configBuffer.length)) { - throw new IOException("Invalid metadata buffer"); - } - - LedgerMetadataFormat.Builder builder = LedgerMetadataFormat.newBuilder(); - - TextFormat.merge((CharSequence) CharBuffer.wrap(configBuffer), builder); - LedgerMetadataFormat data = builder.build(); - lc.writeQuorumSize = data.getQuorumSize(); - if (data.hasCtime()) { - lc.ctime = data.getCtime(); - lc.storeSystemtimeAsLedgerCreationTime = true; - } else if (msCtime.isPresent()) { - lc.ctime = msCtime.get(); - lc.storeSystemtimeAsLedgerCreationTime = false; - } - if (data.hasAckQuorumSize()) { - lc.ackQuorumSize = data.getAckQuorumSize(); - } else { - lc.ackQuorumSize = lc.writeQuorumSize; - } - - lc.ensembleSize = data.getEnsembleSize(); - lc.length = data.getLength(); - lc.state = data.getState(); - lc.lastEntryId = data.getLastEntryId(); - - if (data.hasPassword()) { - lc.digestType = data.getDigestType(); - lc.password = data.getPassword().toByteArray(); - lc.hasPassword = true; - } - - for (LedgerMetadataFormat.Segment s : data.getSegmentList()) { - ArrayList addrs = new ArrayList(); - for (String member : s.getEnsembleMemberList()) { - addrs.add(new BookieSocketAddress(member)); - } - lc.addEnsemble(s.getFirstEntryId(), addrs); - } - - if (data.getCustomMetadataCount() > 0) { - List cMetadataList = data.getCustomMetadataList(); - lc.customMetadata = Maps.newHashMap(); - for (LedgerMetadataFormat.cMetadataMapEntry ent : cMetadataList) { - lc.customMetadata.put(ent.getKey(), ent.getValue().toByteArray()); - } - } - return lc; - } - - static LedgerMetadata parseVersion1Config(LedgerMetadata lc, - BufferedReader reader) throws IOException { - try { - lc.writeQuorumSize = lc.ackQuorumSize = Integer.parseInt(reader.readLine()); - lc.ensembleSize = Integer.parseInt(reader.readLine()); - lc.length = Long.parseLong(reader.readLine()); - - String line = reader.readLine(); - while (line != null) { - String parts[] = line.split(tSplitter); - - if (parts[1].equals(closed)) { - Long l = Long.parseLong(parts[0]); - if (l == IN_RECOVERY) { - lc.state = LedgerMetadataFormat.State.IN_RECOVERY; - } else { - lc.state = LedgerMetadataFormat.State.CLOSED; - lc.lastEntryId = l; - } - break; - } else { - lc.state = LedgerMetadataFormat.State.OPEN; - } - - ArrayList addrs = new ArrayList(); - for (int j = 1; j < parts.length; j++) { - addrs.add(new BookieSocketAddress(parts[j])); - } - lc.addEnsemble(Long.parseLong(parts[0]), addrs); - line = reader.readLine(); - } - } catch (NumberFormatException e) { - throw new IOException(e); - } - return lc; - } - - /** - * Updates the version of this metadata. - * - * @param v Version - */ - public void setVersion(Version v) { - this.version = v; - } - - /** - * Returns the last version. - * - * @return version - */ - public Version getVersion() { - return this.version; - } - - /** - * Is the metadata newer than given newMeta. - * - * @param newMeta the metadata to compare - * @return true if this is newer than newMeta, false otherwise - */ - boolean isNewerThan(LedgerMetadata newMeta) { - if (null == version) { - return false; - } - return Version.Occurred.AFTER == version.compare(newMeta.version); - } - - /** - * Routine to compare two {@code Map}; Since the values in the map are {@code byte[]}, we can't use - * {@code Map.equals}. - * @param first - * The first map - * @param second - * The second map to compare with - * @return true if the 2 maps contain the exact set of {@code } pairs. - */ - public static boolean areByteArrayValMapsEqual(Map first, Map second) { - if (first == null && second == null) { - return true; - } - - // above check confirms that both are not null; - // if one is null the other isn't; so they must - // be different - if (first == null || second == null) { - return false; - } - - if (first.size() != second.size()) { - return false; - } - for (Map.Entry entry : first.entrySet()) { - if (!Arrays.equals(entry.getValue(), second.get(entry.getKey()))) { - return false; - } - } - return true; - } - - /** - * Is the metadata conflict with new updated metadata. - * - * @param newMeta - * Re-read metadata - * @return true if the metadata is conflict. - */ - boolean isConflictWith(LedgerMetadata newMeta) { - /* - * if length & close have changed, then another client has - * opened the ledger, can't resolve this conflict. - */ - - if (metadataFormatVersion != newMeta.metadataFormatVersion - || ensembleSize != newMeta.ensembleSize - || writeQuorumSize != newMeta.writeQuorumSize - || ackQuorumSize != newMeta.ackQuorumSize - || length != newMeta.length - || state != newMeta.state - || !digestType.equals(newMeta.digestType) - || !Arrays.equals(password, newMeta.password) - || !LedgerMetadata.areByteArrayValMapsEqual(customMetadata, newMeta.customMetadata)) { - return true; - } - - // verify the ctime - if (storeSystemtimeAsLedgerCreationTime != newMeta.storeSystemtimeAsLedgerCreationTime) { - return true; - } else if (storeSystemtimeAsLedgerCreationTime) { - return ctime != newMeta.ctime; - } - - if (state == LedgerMetadataFormat.State.CLOSED - && lastEntryId != newMeta.lastEntryId) { - return true; - } - // if ledger is closed, we can just take the new ensembles - if (newMeta.state != LedgerMetadataFormat.State.CLOSED) { - // allow new metadata to be one ensemble less than current metadata - // since ensemble change might kick in when recovery changed metadata - int diff = ensembles.size() - newMeta.ensembles.size(); - if (0 != diff && 1 != diff) { - return true; - } - // ensemble distribution should be same - // we don't check the detail ensemble, since new bookie will be set - // using recovery tool. - Iterator keyIter = ensembles.keySet().iterator(); - Iterator newMetaKeyIter = newMeta.ensembles.keySet().iterator(); - for (int i = 0; i < newMeta.ensembles.size(); i++) { - Long curKey = keyIter.next(); - Long newMetaKey = newMetaKeyIter.next(); - if (!curKey.equals(newMetaKey)) { - return true; - } - } - } - return false; - } - - @Override - public String toString() { - return toStringRepresentation(true); - } - - /** - * Returns a string representation of this LedgerMetadata object by - * filtering out the password field. - * - * @return a string representation of the object without password field in - * it. - */ - public String toSafeString() { - return toStringRepresentation(false); - } - - private String toStringRepresentation(boolean withPassword) { - StringBuilder sb = new StringBuilder(); - sb.append("(meta:").append(new String(serialize(withPassword), UTF_8)).append(", version:").append(version) - .append(")"); - return sb.toString(); - } - - void mergeEnsembles(SortedMap> newEnsembles) { - // allow new metadata to be one ensemble less than current metadata - // since ensemble change might kick in when recovery changed metadata - int diff = ensembles.size() - newEnsembles.size(); - if (0 != diff && 1 != diff) { - return; - } - int i = 0; - for (Entry> entry : newEnsembles.entrySet()) { - ++i; - if (ensembles.size() != i) { - // we should use last ensemble from current metadata - // not the new metadata read from zookeeper - long key = entry.getKey(); - List ensemble = entry.getValue(); - ensembles.put(key, ImmutableList.copyOf(ensemble)); - } - } - } - - Set getBookiesInThisLedger() { - Set bookies = new HashSet(); - for (List ensemble : ensembles.values()) { - bookies.addAll(ensemble); - } - return bookies; - } - -} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerMetadataBuilder.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerMetadataBuilder.java index 8c37bd53917..eaa2efa875c 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerMetadataBuilder.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerMetadataBuilder.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -19,79 +19,128 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; +import static org.apache.bookkeeper.meta.LedgerMetadataSerDe.CURRENT_METADATA_FORMAT_VERSION; +import static org.apache.bookkeeper.meta.LedgerMetadataSerDe.METADATA_FORMAT_VERSION_1; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableMap; - +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.TreeMap; - import org.apache.bookkeeper.client.api.DigestType; -import org.apache.bookkeeper.net.BookieSocketAddress; -import org.apache.bookkeeper.proto.DataFormats.LedgerMetadataFormat; -import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.client.api.LedgerMetadata.State; +import org.apache.bookkeeper.common.annotation.InterfaceAudience.LimitedPrivate; +import org.apache.bookkeeper.common.annotation.InterfaceStability.Unstable; +import org.apache.bookkeeper.net.BookieId; -class LedgerMetadataBuilder { +/** + * Builder for building LedgerMetadata objects. + */ +@LimitedPrivate +@Unstable +@VisibleForTesting +public class LedgerMetadataBuilder { + private long ledgerId = -1L; + private int metadataFormatVersion = CURRENT_METADATA_FORMAT_VERSION; private int ensembleSize = 3; private int writeQuorumSize = 3; private int ackQuorumSize = 2; - private LedgerMetadataFormat.State state = LedgerMetadataFormat.State.OPEN; + private State state = State.OPEN; private Optional lastEntryId = Optional.empty(); + private Optional length = Optional.empty(); - private TreeMap> ensembles = new TreeMap<>(); + private TreeMap> ensembles = new TreeMap<>(); - private DigestType digestType = DigestType.CRC32C; + private Optional digestType = Optional.empty(); private Optional password = Optional.empty(); - private Optional ctime = Optional.empty(); + private long ctime = -1; + private boolean storeCtime = false; private Map customMetadata = Collections.emptyMap(); - private Version version = Version.NEW; + private static final long BLANK_CTOKEN = 0; + private long cToken = BLANK_CTOKEN; - static LedgerMetadataBuilder create() { + public static LedgerMetadataBuilder create() { return new LedgerMetadataBuilder(); } - static LedgerMetadataBuilder from(LedgerMetadata other) { + public static LedgerMetadataBuilder from(LedgerMetadata other) { LedgerMetadataBuilder builder = new LedgerMetadataBuilder(); + builder.ledgerId = other.getLedgerId(); + builder.metadataFormatVersion = other.getMetadataFormatVersion(); builder.ensembleSize = other.getEnsembleSize(); builder.writeQuorumSize = other.getWriteQuorumSize(); builder.ackQuorumSize = other.getAckQuorumSize(); builder.state = other.getState(); - - long lastEntryId = other.getLastEntryId(); - if (lastEntryId != LedgerHandle.INVALID_ENTRY_ID) { - builder.lastEntryId = Optional.of(lastEntryId); + if (builder.state == State.CLOSED) { + builder.lastEntryId = Optional.of(other.getLastEntryId()); + builder.length = Optional.of(other.getLength()); } - builder.ensembles.putAll(other.getEnsembles()); + builder.ensembles.putAll(other.getAllEnsembles()); - builder.digestType = other.getDigestType(); if (other.hasPassword()) { builder.password = Optional.of(other.getPassword()); + builder.digestType = Optional.of(other.getDigestType()); } - if (other.storeSystemtimeAsLedgerCreationTime) { - builder.ctime = Optional.of(other.getCtime()); - } - builder.customMetadata = ImmutableMap.copyOf(other.getCustomMetadata()); + builder.ctime = other.getCtime(); + + /** Hack to get around fact that ctime was never versioned correctly */ + builder.storeCtime = LedgerMetadataUtils.shouldStoreCtime(other); - builder.version = other.getVersion(); + builder.customMetadata = ImmutableMap.copyOf(other.getCustomMetadata()); return builder; } - LedgerMetadataBuilder withEnsembleSize(int ensembleSize) { + public LedgerMetadataBuilder withId(long ledgerId) { + this.ledgerId = ledgerId; + return this; + } + + public LedgerMetadataBuilder withMetadataFormatVersion(int version) { + if (version < METADATA_FORMAT_VERSION_1 || version > CURRENT_METADATA_FORMAT_VERSION) { + return this; + } + this.metadataFormatVersion = version; + return this; + } + + public LedgerMetadataBuilder withPassword(byte[] password) { + this.password = Optional.of(Arrays.copyOf(password, password.length)); + return this; + } + + public LedgerMetadataBuilder withDigestType(DigestType digestType) { + this.digestType = Optional.of(digestType); + return this; + } + + public LedgerMetadataBuilder withEnsembleSize(int ensembleSize) { checkState(ensembles.size() == 0, "Can only set ensemble size before adding ensembles to the builder"); this.ensembleSize = ensembleSize; return this; } - LedgerMetadataBuilder newEnsembleEntry(long firstEntry, List ensemble) { + public LedgerMetadataBuilder withWriteQuorumSize(int writeQuorumSize) { + this.writeQuorumSize = writeQuorumSize; + return this; + } + + public LedgerMetadataBuilder withAckQuorumSize(int ackQuorumSize) { + this.ackQuorumSize = ackQuorumSize; + return this; + } + + public LedgerMetadataBuilder newEnsembleEntry(long firstEntry, List ensemble) { checkArgument(ensemble.size() == ensembleSize, "Size of passed in ensemble must match the ensembleSize of the builder"); checkArgument(ensembles.isEmpty() || firstEntry > ensembles.lastKey(), @@ -100,7 +149,7 @@ LedgerMetadataBuilder newEnsembleEntry(long firstEntry, List ensemble) { + public LedgerMetadataBuilder replaceEnsembleEntry(long firstEntry, List ensemble) { checkArgument(ensemble.size() == ensembleSize, "Size of passed in ensemble must match the ensembleSize of the builder"); checkArgument(ensembles.containsKey(firstEntry), @@ -109,19 +158,57 @@ LedgerMetadataBuilder replaceEnsembleEntry(long firstEntry, List customMetadata) { + this.customMetadata = ImmutableMap.copyOf(customMetadata); + return this; + } + + public LedgerMetadataBuilder withCreationTime(long ctime) { + this.ctime = ctime; + return this; + } + + public LedgerMetadataBuilder storingCreationTime(boolean storing) { + this.storeCtime = storing; + return this; + } + + public LedgerMetadataBuilder withCToken(long cToken) { + this.cToken = cToken; + return this; + } + + public LedgerMetadata build() { + checkArgument(ledgerId >= 0, "Ledger id must be set"); + checkArgument(ensembleSize >= writeQuorumSize, "Write quorum must be less or equal to ensemble size"); + checkArgument(writeQuorumSize >= ackQuorumSize, "Write quorum must be greater or equal to ack quorum"); + + return new LedgerMetadataImpl(ledgerId, metadataFormatVersion, + ensembleSize, writeQuorumSize, ackQuorumSize, + state, lastEntryId, length, ensembles, + digestType, password, ctime, storeCtime, + cToken, + customMetadata); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerMetadataImpl.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerMetadataImpl.java new file mode 100644 index 00000000000..14a17bb5fd7 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerMetadataImpl.java @@ -0,0 +1,292 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.client; + +import static com.google.common.base.Preconditions.checkArgument; + +import com.google.common.base.MoreObjects; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import java.util.Arrays; +import java.util.Base64; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.NavigableMap; +import java.util.Optional; +import java.util.TreeMap; +import java.util.stream.Collectors; +import lombok.EqualsAndHashCode; +import org.apache.bookkeeper.client.api.DigestType; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.client.api.LedgerMetadata.State; +import org.apache.bookkeeper.net.BookieId; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class encapsulates all the ledger metadata that is persistently stored + * in metadata store. + * + *

    It provides parsing and serialization methods of such metadata. + */ +@EqualsAndHashCode(exclude = + "ledgerId" // ledgerId is not serialized inside ZK node data +) +class LedgerMetadataImpl implements LedgerMetadata { + static final Logger LOG = LoggerFactory.getLogger(LedgerMetadataImpl.class); + + private final long ledgerId; + + private final int metadataFormatVersion; + private final int ensembleSize; + private final int writeQuorumSize; + private final int ackQuorumSize; + + private final State state; + private final long length; + private final long lastEntryId; + private final long ctime; + final boolean storeCtime; // non-private so builder can access for copy + + private final NavigableMap> ensembles; + private final ImmutableList currentEnsemble; + + private final boolean hasPassword; + private final DigestType digestType; + private final byte[] password; + + private final Map customMetadata; + + private long cToken; + + LedgerMetadataImpl(long ledgerId, + int metadataFormatVersion, + int ensembleSize, + int writeQuorumSize, + int ackQuorumSize, + State state, + Optional lastEntryId, + Optional length, + Map> ensembles, + Optional digestType, + Optional password, + long ctime, + boolean storeCtime, + long cToken, + Map customMetadata) { + checkArgument(ensembles.size() > 0, "There must be at least one ensemble in the ledger"); + if (state == State.CLOSED) { + checkArgument(length.isPresent(), "Closed ledger must have a length"); + checkArgument(lastEntryId.isPresent(), "Closed ledger must have a last entry"); + } else { + checkArgument(!length.isPresent(), "Non-closed ledger must not have a length"); + checkArgument(!lastEntryId.isPresent(), "Non-closed ledger must not have a last entry"); + } + checkArgument((digestType.isPresent() && password.isPresent()) + || (!digestType.isPresent() && !password.isPresent()), + "Either both password and digest type must be set, or neither"); + + this.ledgerId = ledgerId; + this.metadataFormatVersion = metadataFormatVersion; + this.ensembleSize = ensembleSize; + this.writeQuorumSize = writeQuorumSize; + this.ackQuorumSize = ackQuorumSize; + this.state = state; + + this.lastEntryId = lastEntryId.orElse(LedgerHandle.INVALID_ENTRY_ID); + this.length = length.orElse(0L); + + this.ensembles = Collections.unmodifiableNavigableMap( + ensembles.entrySet().stream().collect(TreeMap::new, + (m, e) -> m.put(e.getKey(), + ImmutableList.copyOf(e.getValue())), + TreeMap::putAll)); + + if (state != State.CLOSED) { + currentEnsemble = this.ensembles.lastEntry().getValue(); + } else { + currentEnsemble = null; + } + + if (password.isPresent()) { + this.password = password.get(); + this.digestType = digestType.get(); + this.hasPassword = true; + } else { + this.password = null; + this.hasPassword = false; + this.digestType = null; + } + this.ctime = ctime; + this.storeCtime = storeCtime; + + this.cToken = cToken; + + this.customMetadata = ImmutableMap.copyOf(customMetadata); + } + + @Override + public long getLedgerId() { + return ledgerId; + } + + @Override + public NavigableMap> getAllEnsembles() { + return ensembles; + } + + @Override + public int getEnsembleSize() { + return ensembleSize; + } + + @Override + public int getWriteQuorumSize() { + return writeQuorumSize; + } + + @Override + public int getAckQuorumSize() { + return ackQuorumSize; + } + + @Override + public long getCtime() { + return ctime; + } + + /** + * In versions 4.1.0 and below, the digest type and password were not + * stored in the metadata. + * + * @return whether the password has been stored in the metadata + */ + @Override + public boolean hasPassword() { + return hasPassword; + } + + @Override + public byte[] getPassword() { + if (!hasPassword()) { + return new byte[0]; + } else { + return Arrays.copyOf(password, password.length); + } + } + + @Override + public DigestType getDigestType() { + if (!hasPassword()) { + return null; + } else { + return digestType; + } + } + + @Override + public long getLastEntryId() { + return lastEntryId; + } + + @Override + public long getLength() { + return length; + } + + @Override + public boolean isClosed() { + return state == State.CLOSED; + } + + @Override + public State getState() { + return state; + } + + @Override + public List getEnsembleAt(long entryId) { + // the head map cannot be empty, since we insert an ensemble for + // entry-id 0, right when we start + return ensembles.get(ensembles.headMap(entryId + 1).lastKey()); + } + + @Override + public Map getCustomMetadata() { + return this.customMetadata; + } + + @Override + public String toString() { + return toStringRepresentation(true); + } + + /** + * Returns a string representation of this LedgerMetadata object by + * filtering out the password field. + * + * @return a string representation of the object without password field in + * it. + */ + @Override + public String toSafeString() { + return toStringRepresentation(false); + } + + private String toStringRepresentation(boolean withPassword) { + MoreObjects.ToStringHelper helper = MoreObjects.toStringHelper("LedgerMetadata"); + helper.add("formatVersion", metadataFormatVersion) + .add("ensembleSize", ensembleSize) + .add("writeQuorumSize", writeQuorumSize) + .add("ackQuorumSize", ackQuorumSize) + .add("state", state); + if (state == State.CLOSED) { + helper.add("length", length) + .add("lastEntryId", lastEntryId); + } + if (hasPassword()) { + helper.add("digestType", digestType); + if (withPassword) { + helper.add("password", "base64:" + Base64.getEncoder().encodeToString(password)); + } else { + helper.add("password", "OMITTED"); + } + } + helper.add("ensembles", ensembles.toString()); + helper.add("customMetadata", + customMetadata.entrySet().stream().collect( + Collectors.toMap(e -> e.getKey(), + e -> "base64:" + Base64.getEncoder().encodeToString(e.getValue())))); + return helper.toString(); + } + + @Override + public int getMetadataFormatVersion() { + return metadataFormatVersion; + } + + boolean shouldStoreCtime() { + return storeCtime; + } + + @Override + public long getCToken() { + return cToken; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerMetadataUtils.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerMetadataUtils.java new file mode 100644 index 00000000000..ab26be7d65f --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerMetadataUtils.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.client; + +import static com.google.common.base.Preconditions.checkArgument; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.SortedMap; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.net.BookieId; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utilities for working with ledger metadata. + */ +public class LedgerMetadataUtils { + static final Logger LOG = LoggerFactory.getLogger(LedgerMetadataUtils.class); + + static List getCurrentEnsemble(LedgerMetadata metadata) { + return getLastEnsembleValue(metadata); + } + + /** + * the entry id greater than the given entry-id at which the next ensemble change takes + * place. + * + * @param entryId + * @return the entry id of the next ensemble change (-1 if no further ensemble changes) + */ + static long getNextEnsembleChange(LedgerMetadata metadata, long entryId) { + SortedMap> tailMap = metadata.getAllEnsembles().tailMap(entryId + 1); + + if (tailMap.isEmpty()) { + return -1; + } else { + return tailMap.firstKey(); + } + } + + static Set getBookiesInThisLedger(LedgerMetadata metadata) { + Set bookies = new HashSet(); + for (List ensemble : metadata.getAllEnsembles().values()) { + bookies.addAll(ensemble); + } + return bookies; + } + + static List getLastEnsembleValue(LedgerMetadata metadata) { + checkArgument(!metadata.getAllEnsembles().isEmpty(), "Metadata should never be created with no ensembles"); + return metadata.getAllEnsembles().lastEntry().getValue(); + } + + static Long getLastEnsembleKey(LedgerMetadata metadata) { + checkArgument(!metadata.getAllEnsembles().isEmpty(), "Metadata should never be created with no ensembles"); + return metadata.getAllEnsembles().lastKey(); + } + + public static boolean shouldStoreCtime(LedgerMetadata metadata) { + return metadata instanceof LedgerMetadataImpl && ((LedgerMetadataImpl) metadata).shouldStoreCtime(); + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerOpenOp.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerOpenOp.java index cf5f3a7aebe..943aa8cd2a9 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerOpenOp.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerOpenOp.java @@ -28,17 +28,18 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReentrantReadWriteLock; - import org.apache.bookkeeper.client.AsyncCallback.OpenCallback; import org.apache.bookkeeper.client.AsyncCallback.ReadLastConfirmedCallback; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.client.SyncCallbackUtils.SyncOpenCallback; +import org.apache.bookkeeper.client.api.BKException.Code; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.client.api.ReadHandle; import org.apache.bookkeeper.client.impl.OpenBuilderBase; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; +import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.stats.OpStatsLogger; -import org.apache.bookkeeper.util.MathUtils; import org.apache.bookkeeper.util.OrderedGenericCallback; +import org.apache.bookkeeper.versioning.Versioned; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -46,14 +47,14 @@ * Encapsulates the ledger open operation. * */ -class LedgerOpenOp implements GenericCallback { +class LedgerOpenOp { static final Logger LOG = LoggerFactory.getLogger(LedgerOpenOp.class); final BookKeeper bk; final long ledgerId; final OpenCallback cb; final Object ctx; - LedgerHandle lh; + ReadOnlyLedgerHandle lh; final byte[] passwd; boolean doRecovery = true; boolean administrativeOpen = false; @@ -68,7 +69,7 @@ class LedgerOpenOp implements GenericCallback { * * @param bk * @param ledgerId - * @param digestType. Ignored if conf.getEnableDigestTypeAutodetection() is true + * @param digestType Ignored if conf.getEnableDigestTypeAutodetection() is true * @param passwd * @param cb * @param ctx @@ -109,7 +110,12 @@ public void initiate() { /** * Asynchronously read the ledger metadata node. */ - bk.getLedgerManager().readLedgerMetadata(ledgerId, this); + bk.getLedgerManager().readLedgerMetadata(ledgerId) + .thenAcceptAsync(this::openWithMetadata, bk.getScheduler().chooseThread(ledgerId)) + .exceptionally(exception -> { + openComplete(BKException.getExceptionCode(exception), null); + return null; + }); } /** @@ -120,25 +126,27 @@ public void initiateWithoutRecovery() { initiate(); } - /** - * Implements Open Ledger Callback. - */ - @Override - public void operationComplete(int rc, LedgerMetadata metadata) { - if (BKException.Code.OK != rc) { - // open ledger failed. - openComplete(rc, null); - return; + private CompletableFuture closeLedgerHandleAsync() { + if (lh != null) { + return lh.closeAsync(); } + return CompletableFuture.completedFuture(null); + } + + private void openWithMetadata(Versioned versionedMetadata) { + LedgerMetadata metadata = versionedMetadata.getValue(); final byte[] passwd; // we should use digest type from metadata *ONLY* when: // 1) digest type is stored in metadata // 2) `autodetection` is enabled - DigestType digestType = enableDigestAutodetection && metadata.hasPassword() - ? fromApiDigestType(metadata.getDigestType()) - : suggestedDigestType; + DigestType digestType; + if (enableDigestAutodetection && metadata.hasPassword()) { + digestType = fromApiDigestType(metadata.getDigestType()); + } else { + digestType = suggestedDigestType; + } /* For an administrative open, the default passwords * are read from the configuration, but if the metadata @@ -167,7 +175,7 @@ public void operationComplete(int rc, LedgerMetadata metadata) { // get the ledger metadata back try { - lh = new ReadOnlyLedgerHandle(bk.getClientCtx(), ledgerId, metadata, digestType, + lh = new ReadOnlyLedgerHandle(bk.getClientCtx(), ledgerId, versionedMetadata, digestType, passwd, !doRecovery); } catch (GeneralSecurityException e) { LOG.error("Security exception while opening ledger: " + ledgerId, e); @@ -191,10 +199,18 @@ public void operationComplete(int rc, LedgerMetadata metadata) { public void safeOperationComplete(int rc, Void result) { if (rc == BKException.Code.OK) { openComplete(BKException.Code.OK, lh); - } else if (rc == BKException.Code.UnauthorizedAccessException) { - openComplete(BKException.Code.UnauthorizedAccessException, null); } else { - openComplete(bk.getReturnRc(BKException.Code.LedgerRecoveryException), null); + closeLedgerHandleAsync().whenComplete((ignore, ex) -> { + if (ex != null) { + LOG.error("Ledger {} close failed", ledgerId, ex); + } + if (rc == BKException.Code.UnauthorizedAccessException + || rc == BKException.Code.TimeoutException) { + openComplete(bk.getReturnRc(rc), null); + } else { + openComplete(bk.getReturnRc(BKException.Code.LedgerRecoveryException), null); + } + }); } } @Override @@ -207,8 +223,20 @@ public String toString() { @Override public void readLastConfirmedComplete(int rc, long lastConfirmed, Object ctx) { - if (rc != BKException.Code.OK) { - openComplete(bk.getReturnRc(BKException.Code.ReadException), null); + if (rc == BKException.Code.TimeoutException) { + closeLedgerHandleAsync().whenComplete((r, ex) -> { + if (ex != null) { + LOG.error("Ledger {} close failed", ledgerId, ex); + } + openComplete(bk.getReturnRc(rc), null); + }); + } else if (rc != BKException.Code.OK) { + closeLedgerHandleAsync().whenComplete((r, ex) -> { + if (ex != null) { + LOG.error("Ledger {} close failed", ledgerId, ex); + } + openComplete(bk.getReturnRc(BKException.Code.ReadException), null); + }); } else { lh.lastAddConfirmed = lh.lastAddPushed = lastConfirmed; openComplete(BKException.Code.OK, lh); @@ -225,7 +253,12 @@ void openComplete(int rc, LedgerHandle lh) { } else { openOpLogger.registerSuccessfulEvent(MathUtils.elapsedNanos(startTime), TimeUnit.NANOSECONDS); } - cb.openComplete(rc, lh, ctx); + + if (lh != null) { // lh is null in case of errors + lh.executeOrdered(() -> cb.openComplete(rc, lh, ctx)); + } else { + cb.openComplete(rc, null, ctx); + } } static final class OpenBuilderImpl extends OpenBuilderBase { @@ -245,8 +278,9 @@ public CompletableFuture execute() { } private void open(OpenCallback cb) { - if (!validate()) { - cb.openComplete(BKException.Code.NoSuchLedgerExistsException, null, null); + final int validateRc = validate(); + if (Code.OK != validateRc) { + cb.openComplete(validateRc, null, null); return; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerRecoveryOp.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerRecoveryOp.java index 6ab25d60322..fe697ef4e0f 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerRecoveryOp.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerRecoveryOp.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,11 +18,10 @@ package org.apache.bookkeeper.client; import com.google.common.annotations.VisibleForTesting; -import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.atomic.AtomicLong; import org.apache.bookkeeper.client.AsyncCallback.AddCallback; -import org.apache.bookkeeper.client.AsyncCallback.CloseCallback; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadEntryListener; import org.apache.bookkeeper.proto.checksum.DigestManager.RecoveryData; import org.slf4j.Logger; @@ -32,9 +31,7 @@ * This class encapsulated the ledger recovery operation. It first does a read * with entry-id of -1 (BookieProtocol.LAST_ADD_CONFIRMED) to all bookies. Then * starting from the last confirmed entry (from hints in the ledger entries), - * it reads forward until it is not able to find a particular entry. It closes - * the ledger at that entry. - * + * it reads forward until it is not able to find a particular entry. */ class LedgerRecoveryOp implements ReadEntryListener, AddCallback { @@ -42,13 +39,13 @@ class LedgerRecoveryOp implements ReadEntryListener, AddCallback { final LedgerHandle lh; final ClientContext clientCtx; + final CompletableFuture promise; final AtomicLong readCount, writeCount; volatile boolean readDone; - final AtomicBoolean callbackDone; volatile long startEntryToRead; volatile long endEntryToRead; - final GenericCallback cb; + // keep a copy of metadata for recovery. LedgerMetadata metadataForRecovery; @@ -72,13 +69,11 @@ protected LedgerMetadata getLedgerMetadata() { } - public LedgerRecoveryOp(LedgerHandle lh, ClientContext clientCtx, - GenericCallback cb) { + public LedgerRecoveryOp(LedgerHandle lh, ClientContext clientCtx) { readCount = new AtomicLong(0); writeCount = new AtomicLong(0); readDone = false; - callbackDone = new AtomicBoolean(false); - this.cb = cb; + this.promise = new CompletableFuture<>(); this.lh = lh; this.clientCtx = clientCtx; } @@ -96,21 +91,46 @@ LedgerRecoveryOp setEntryListener(ReadEntryListener entryListener) { return this; } - public void initiate() { - ReadLastConfirmedOp rlcop = new ReadLastConfirmedOp(lh, clientCtx.getBookieClient(), lh.getCurrentEnsemble(), + public CompletableFuture initiate() { + ReadLastConfirmedOp rlcop = new ReadLastConfirmedOp(clientCtx.getBookieClient(), + lh.distributionSchedule, + lh.macManager, + lh.ledgerId, + lh.getCurrentEnsemble(), + lh.ledgerKey, new ReadLastConfirmedOp.LastConfirmedDataCallback() { + @Override public void readLastConfirmedDataComplete(int rc, RecoveryData data) { if (rc == BKException.Code.OK) { synchronized (lh) { - lh.lastAddPushed = lh.lastAddConfirmed = data.getLastAddConfirmed(); + /** + The lowest an LAC can be for use in recovery is the first entry id + of the current ensemble - 1. + All ensembles prior to the current one, if any, are confirmed and + immutable (so are not part of the recovery process). + So we take the highest of: + - the LAC returned by the current bookie ensemble (could be -1) + - the first entry id of the current ensemble - 1. + */ + Long lastEnsembleEntryId = lh.getVersionedLedgerMetadata() + .getValue() + .getAllEnsembles() + .lastEntry() + .getKey(); + + lh.lastAddPushed = lh.lastAddConfirmed = Math.max(data.getLastAddConfirmed(), + (lastEnsembleEntryId - 1)); + lh.length = data.getLength(); lh.pendingAddsSequenceHead = lh.lastAddConfirmed; startEntryToRead = endEntryToRead = lh.lastAddConfirmed; } // keep a copy of ledger metadata before proceeding // ledger recovery - metadataForRecovery = new LedgerMetadata(lh.getLedgerMetadata()); + metadataForRecovery = lh.getLedgerMetadata(); doRecoveryRead(); + } else if (rc == BKException.Code.TimeoutException) { + submitCallback(rc); } else if (rc == BKException.Code.UnauthorizedAccessException) { submitCallback(rc); } else { @@ -125,24 +145,27 @@ public void readLastConfirmedDataComplete(int rc, RecoveryData data) { * from writing to it. */ rlcop.initiateWithFencing(); + + return promise; } private void submitCallback(int rc) { if (BKException.Code.OK == rc) { clientCtx.getClientStats().getRecoverAddCountLogger().registerSuccessfulValue(writeCount.get()); clientCtx.getClientStats().getRecoverReadCountLogger().registerSuccessfulValue(readCount.get()); + promise.complete(lh); } else { clientCtx.getClientStats().getRecoverAddCountLogger().registerFailedValue(writeCount.get()); clientCtx.getClientStats().getRecoverReadCountLogger().registerFailedValue(readCount.get()); + promise.completeExceptionally(BKException.create(rc)); } - cb.operationComplete(rc, null); } /** * Try to read past the last confirmed. */ private void doRecoveryRead() { - if (!callbackDone.get()) { + if (!promise.isDone()) { startEntryToRead = endEntryToRead + 1; endEntryToRead = endEntryToRead + clientCtx.getConf().recoveryReadBatchSize; new RecoveryReadOp(lh, clientCtx, startEntryToRead, endEntryToRead, this, null) @@ -150,26 +173,6 @@ private void doRecoveryRead() { } } - private void closeAndCallback() { - if (callbackDone.compareAndSet(false, true)) { - lh.asyncCloseInternal(new CloseCallback() { - @Override - public void closeComplete(int rc, LedgerHandle lh, Object ctx) { - if (rc != BKException.Code.OK) { - LOG.warn("Close ledger {} failed during recovery: ", - LedgerRecoveryOp.this.lh.getId(), BKException.getMessage(rc)); - submitCallback(rc); - } else { - submitCallback(BKException.Code.OK); - if (LOG.isDebugEnabled()) { - LOG.debug("After closing length is: {}", lh.getLength()); - } - } - } - }, null, BKException.Code.LedgerClosedException); - } - } - @Override public void onEntryComplete(int rc, LedgerHandle lh, LedgerEntry entry, Object ctx) { // notify entry listener on individual entries being read during ledger recovery. @@ -179,7 +182,7 @@ public void onEntryComplete(int rc, LedgerHandle lh, LedgerEntry entry, Object c } // we only trigger recovery add an entry when readDone == false && callbackDone == false - if (!callbackDone.get() && !readDone && rc == BKException.Code.OK) { + if (!promise.isDone() && !readDone && rc == BKException.Code.OK) { readCount.incrementAndGet(); byte[] data = entry.getEntry(); @@ -211,15 +214,15 @@ public void onEntryComplete(int rc, LedgerHandle lh, LedgerEntry entry, Object c if (rc == BKException.Code.NoSuchEntryException || rc == BKException.Code.NoSuchLedgerExistsException) { readDone = true; if (readCount.get() == writeCount.get()) { - closeAndCallback(); + submitCallback(BKException.Code.OK); } return; } // otherwise, some other error, we can't handle - if (BKException.Code.OK != rc && callbackDone.compareAndSet(false, true)) { + if (BKException.Code.OK != rc && !promise.isDone()) { LOG.error("Failure {} while reading entries: ({} - {}), ledger: {} while recovering ledger", - BKException.getMessage(rc), startEntryToRead, endEntryToRead, lh.getId()); + BKException.getMessage(rc), startEntryToRead, endEntryToRead, lh.getId()); submitCallback(rc); } else if (BKException.Code.OK == rc) { // we are here is because we successfully read an entry but readDone was already set to true. @@ -235,15 +238,12 @@ public void addComplete(int rc, LedgerHandle lh, long entryId, Object ctx) { if (rc != BKException.Code.OK) { LOG.error("Failure {} while writing entry: {} while recovering ledger: {}", BKException.codeLogger(rc), entryId + 1, lh.ledgerId); - if (callbackDone.compareAndSet(false, true)) { - // Give up, we can't recover from this error - submitCallback(rc); - } + submitCallback(rc); return; } long numAdd = writeCount.incrementAndGet(); if (readDone && readCount.get() == numAdd) { - closeAndCallback(); + submitCallback(rc); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ListenerBasedPendingReadOp.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ListenerBasedPendingReadOp.java index 108a805ca92..18af8d9188b 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ListenerBasedPendingReadOp.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ListenerBasedPendingReadOp.java @@ -22,8 +22,8 @@ import java.util.concurrent.TimeUnit; import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadEntryListener; -import org.apache.bookkeeper.util.MathUtils; @Slf4j class ListenerBasedPendingReadOp extends PendingReadOp { @@ -45,12 +45,12 @@ class ListenerBasedPendingReadOp extends PendingReadOp { @Override protected void submitCallback(int code) { - LedgerEntryRequest request; - while (!seq.isEmpty() && (request = seq.get(0)) != null) { + SingleLedgerEntryRequest request; + while (!seq.isEmpty() && (request = seq.getFirst()) != null) { if (!request.isComplete()) { return; } - seq.remove(0); + seq.removeFirst(); long latencyNanos = MathUtils.elapsedNanos(requestTimeNanos); LedgerEntry entry; if (BKException.Code.OK == request.getRc()) { diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/MetadataUpdateLoop.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/MetadataUpdateLoop.java index 9cd2afd1c9a..83c2b4ada5f 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/MetadataUpdateLoop.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/MetadataUpdateLoop.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -19,13 +19,14 @@ */ package org.apache.bookkeeper.client; +import com.google.common.util.concurrent.RateLimiter; import java.util.concurrent.CompletableFuture; import java.util.concurrent.atomic.AtomicIntegerFieldUpdater; import java.util.function.Supplier; - +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.meta.LedgerManager; import org.apache.bookkeeper.versioning.Version; - +import org.apache.bookkeeper.versioning.Versioned; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -50,10 +51,11 @@ class MetadataUpdateLoop { private final LedgerManager lm; private final long ledgerId; - private final Supplier currentLocalValue; + private final Supplier> currentLocalValue; private final NeedsUpdatePredicate needsTransformation; private final MetadataTransform transform; private final LocalValueUpdater updateLocalValue; + private final RateLimiter throttler; private final String logContext; private volatile int writeLoopCount = 0; @@ -69,9 +71,17 @@ interface MetadataTransform { } interface LocalValueUpdater { - boolean updateValue(LedgerMetadata oldValue, LedgerMetadata newValue); + boolean updateValue(Versioned oldValue, Versioned newValue); } + MetadataUpdateLoop(LedgerManager lm, + long ledgerId, + Supplier> currentLocalValue, + NeedsUpdatePredicate needsTransformation, + MetadataTransform transform, + LocalValueUpdater updateLocalValue) { + this(lm, ledgerId, currentLocalValue, needsTransformation, transform, updateLocalValue, null); + } /** * Construct the loop. This takes a set of functions which may be called multiple times * during the loop. @@ -87,45 +97,57 @@ interface LocalValueUpdater { * second parameter and return true, return false otherwise */ MetadataUpdateLoop(LedgerManager lm, - long ledgerId, - Supplier currentLocalValue, - NeedsUpdatePredicate needsTransformation, - MetadataTransform transform, - LocalValueUpdater updateLocalValue) { + long ledgerId, + Supplier> currentLocalValue, + NeedsUpdatePredicate needsTransformation, + MetadataTransform transform, + LocalValueUpdater updateLocalValue, + RateLimiter throttler) { this.lm = lm; this.ledgerId = ledgerId; this.currentLocalValue = currentLocalValue; this.needsTransformation = needsTransformation; this.transform = transform; this.updateLocalValue = updateLocalValue; + this.throttler = throttler; - this.logContext = String.format("UpdateLoop(ledgerId=%d,loopId=%08x)", - ledgerId, System.identityHashCode(this)); + this.logContext = String.format("UpdateLoop(ledgerId=%d,loopId=%08x)", ledgerId, System.identityHashCode(this)); } - CompletableFuture run() { - CompletableFuture promise = new CompletableFuture<>(); + CompletableFuture> run() { + CompletableFuture> promise = new CompletableFuture<>(); writeLoop(currentLocalValue.get(), promise); return promise; } - private void writeLoop(LedgerMetadata currentLocal, CompletableFuture promise) { - LOG.debug("{} starting write loop iteration, attempt {}", - logContext, WRITE_LOOP_COUNT_UPDATER.incrementAndGet(this)); + private void writeLoop(Versioned currentLocal, + CompletableFuture> promise) { + if (LOG.isDebugEnabled()) { + LOG.debug("{} starting write loop iteration, attempt {}", + logContext, WRITE_LOOP_COUNT_UPDATER.incrementAndGet(this)); + } try { - if (needsTransformation.needsUpdate(currentLocal)) { - LedgerMetadata transformed = transform.transform(currentLocal); - - writeToStore(ledgerId, transformed) + if (needsTransformation.needsUpdate(currentLocal.getValue())) { + LedgerMetadata transformed = transform.transform(currentLocal.getValue()); + if (throttler != null) { + // throttler to control updates per second + throttler.acquire(); + } + lm.writeLedgerMetadata(ledgerId, transformed, currentLocal.getVersion()) .whenComplete((writtenMetadata, ex) -> { if (ex == null) { if (updateLocalValue.updateValue(currentLocal, writtenMetadata)) { - LOG.debug("{} success", logContext); + if (LOG.isDebugEnabled()) { + LOG.debug("{} success", logContext); + } promise.complete(writtenMetadata); } else { - LOG.debug("{} local value changed while we were writing, try again", logContext); + if (LOG.isDebugEnabled()) { + LOG.debug("{} local value changed while we were writing, try again", + logContext); + } writeLoop(currentLocalValue.get(), promise); } } else if (ex instanceof BKException.BKMetadataVersionException) { @@ -144,7 +166,9 @@ private void writeLoop(LedgerMetadata currentLocal, CompletableFuture updateLocalValueFromStore(long ledgerId) { - CompletableFuture promise = new CompletableFuture<>(); + private CompletableFuture> updateLocalValueFromStore(long ledgerId) { + CompletableFuture> promise = new CompletableFuture<>(); readLoop(ledgerId, promise); return promise; } - private void readLoop(long ledgerId, CompletableFuture promise) { - LedgerMetadata current = currentLocalValue.get(); - - lm.readLedgerMetadata(ledgerId, - (rc, read) -> { - if (rc != BKException.Code.OK) { - LOG.error("{} Failed to read metadata from store, rc = {}", - logContext, rc); - promise.completeExceptionally(BKException.create(rc)); - } else if (current.getVersion().compare(read.getVersion()) - == Version.Occurred.CONCURRENTLY) { - // no update needed, these are the same in the immutable world - promise.complete(current); - } else if (updateLocalValue.updateValue(current, read)) { - // updated local value successfully - promise.complete(read); - } else { - // local value changed while we were reading, - // look at new value, and try to read again - readLoop(ledgerId, promise); - } - }); - } - - private CompletableFuture writeToStore(long ledgerId, LedgerMetadata toWrite) { - CompletableFuture promise = new CompletableFuture<>(); - - lm.writeLedgerMetadata(ledgerId, toWrite, - (rc, written) -> { - if (rc != BKException.Code.OK) { - promise.completeExceptionally(BKException.create(rc)); - } else { - promise.complete(written); - } - }); - return promise; + private void readLoop(long ledgerId, CompletableFuture> promise) { + Versioned current = currentLocalValue.get(); + + lm.readLedgerMetadata(ledgerId).whenComplete( + (read, exception) -> { + if (exception != null) { + LOG.error("{} Failed to read metadata from store", + logContext, exception); + promise.completeExceptionally(exception); + } else if (current.getVersion().compare(read.getVersion()) == Version.Occurred.CONCURRENTLY) { + // no update needed, these are the same in the immutable world + promise.complete(current); + } else if (updateLocalValue.updateValue(current, read)) { + // updated local value successfully + promise.complete(read); + } else { + // local value changed while we were reading, + // look at new value, and try to read again + readLoop(ledgerId, promise); + } + }); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/PendingAddOp.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/PendingAddOp.java index 89bf0b85a73..6b82200b3fa 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/PendingAddOp.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/PendingAddOp.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -23,24 +23,23 @@ import static org.apache.bookkeeper.proto.BookieProtocol.FLAG_RECOVERY_ADD; import com.google.common.collect.ImmutableMap; - +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import io.netty.buffer.ByteBuf; import io.netty.util.Recycler; import io.netty.util.Recycler.Handle; import io.netty.util.ReferenceCountUtil; +import io.netty.util.ReferenceCounted; import java.util.EnumSet; - +import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.concurrent.RejectedExecutionException; +import java.util.Set; import java.util.concurrent.TimeUnit; import org.apache.bookkeeper.client.AsyncCallback.AddCallbackWithLatency; import org.apache.bookkeeper.client.api.WriteFlag; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.common.util.MathUtils; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback; -import org.apache.bookkeeper.util.ByteBufList; -import org.apache.bookkeeper.util.MathUtils; -import org.apache.bookkeeper.util.SafeRunnable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -53,11 +52,11 @@ * * */ -class PendingAddOp extends SafeRunnable implements WriteCallback { +class PendingAddOp implements WriteCallback { private static final Logger LOG = LoggerFactory.getLogger(PendingAddOp.class); ByteBuf payload; - ByteBufList toSend; + ReferenceCounted toSend; AddCallbackWithLatency cb; Object ctx; long entryId; @@ -69,8 +68,10 @@ class PendingAddOp extends SafeRunnable implements WriteCallback { LedgerHandle lh; ClientContext clientCtx; boolean isRecoveryAdd = false; - long requestTimeNanos; + volatile long requestTimeNanos; long qwcLatency; // Quorum Write Completion Latency after response from quorum bookies. + Set addEntrySuccessBookies; + long writeDelayedStartTime; // min fault domains completion latency after response from ack quorum bookies long currentLedgerLength; int pendingWriteRequests; @@ -78,10 +79,11 @@ class PendingAddOp extends SafeRunnable implements WriteCallback { boolean hasRun; EnumSet writeFlags; boolean allowFailFast = false; - List ensemble; + List ensemble; + @SuppressFBWarnings("IS2_INCONSISTENT_SYNC") static PendingAddOp create(LedgerHandle lh, ClientContext clientCtx, - List ensemble, + List ensemble, ByteBuf payload, EnumSet writeFlags, AddCallbackWithLatency cb, Object ctx) { PendingAddOp op = RECYCLER.get(); @@ -106,6 +108,13 @@ static PendingAddOp create(LedgerHandle lh, ClientContext clientCtx, op.qwcLatency = 0; op.writeFlags = writeFlags; + if (op.addEntrySuccessBookies == null) { + op.addEntrySuccessBookies = new HashSet<>(); + } else { + op.addEntrySuccessBookies.clear(); + } + op.writeDelayedStartTime = -1; + return op; } @@ -135,7 +144,7 @@ long getEntryId() { return this.entryId; } - void sendWriteRequest(List ensemble, int bookieIndex) { + private void sendWriteRequest(List ensemble, int bookieIndex) { int flags = isRecoveryAdd ? FLAG_RECOVERY_ADD | FLAG_HIGH_PRIORITY : FLAG_NONE; clientCtx.getBookieClient().addEntry(ensemble.get(bookieIndex), @@ -152,27 +161,22 @@ boolean maybeTimeout() { return false; } - void timeoutQuorumWait() { - try { - clientCtx.getMainWorkerPool().executeOrdered(lh.ledgerId, new SafeRunnable() { - @Override - public void safeRun() { - if (completed) { - return; - } - lh.handleUnrecoverableErrorDuringAdd(BKException.Code.AddEntryQuorumTimeoutException); - } - @Override - public String toString() { - return String.format("AddEntryQuorumTimeout(lid=%d, eid=%d)", lh.ledgerId, entryId); - } - }); - } catch (RejectedExecutionException e) { - LOG.warn("Timeout add entry quorum wait failed {} entry: {}", lh.ledgerId, entryId); + synchronized void timeoutQuorumWait() { + if (completed) { + return; } + + if (addEntrySuccessBookies.size() >= lh.getLedgerMetadata().getAckQuorumSize()) { + // If ackQuorum number of bookies have acknowledged the write but still not complete, indicates + // failures due to not having been written to enough fault domains. Increment corresponding + // counter. + clientCtx.getClientStats().getWriteTimedOutDueToNotEnoughFaultDomains().inc(); + } + + lh.handleUnrecoverableErrorDuringAdd(BKException.Code.AddEntryQuorumTimeoutException); } - void unsetSuccessAndSendWriteRequest(List ensemble, int bookieIndex) { + synchronized void unsetSuccessAndSendWriteRequest(List ensemble, int bookieIndex) { // update the ensemble this.ensemble = ensemble; @@ -196,14 +200,9 @@ void unsetSuccessAndSendWriteRequest(List ensemble, int boo // completes. // // We call sendAddSuccessCallback when unsetting t cover this case. - DistributionSchedule.WriteSet writeSet = lh.distributionSchedule.getWriteSet(entryId); - try { - if (!writeSet.contains(bookieIndex)) { - lh.sendAddSuccessCallbacks(); - return; - } - } finally { - writeSet.recycle(); + if (!lh.distributionSchedule.hasEntry(entryId, bookieIndex)) { + lh.sendAddSuccessCallbacks(); + return; } if (callbackTriggered) { @@ -229,7 +228,7 @@ void unsetSuccessAndSendWriteRequest(List ensemble, int boo /** * Initiate the add operation. */ - public void safeRun() { + public synchronized void initiate() { hasRun = true; if (callbackTriggered) { // this should only be true if the request was failed due @@ -243,32 +242,25 @@ public void safeRun() { checkNotNull(lh); checkNotNull(lh.macManager); + int flags = isRecoveryAdd ? FLAG_RECOVERY_ADD | FLAG_HIGH_PRIORITY : FLAG_NONE; this.toSend = lh.macManager.computeDigestAndPackageForSending( entryId, lh.lastAddConfirmed, currentLedgerLength, - payload); + payload, lh.ledgerKey, flags); // ownership of RefCounted ByteBuf was passed to computeDigestAndPackageForSending payload = null; // We are about to send. Check if we need to make an ensemble change - // becasue of delayed write errors - Map delayedWriteFailedBookies = lh.getDelayedWriteFailedBookies(); - if (!delayedWriteFailedBookies.isEmpty()) { - lh.handleDelayedWriteBookieFailure(); - } - // Iterate over set and trigger the sendWriteRequests - DistributionSchedule.WriteSet writeSet = lh.distributionSchedule.getWriteSet(entryId); + // because of delayed write errors + lh.maybeHandleDelayedWriteBookieFailure(); - try { - for (int i = 0; i < writeSet.size(); i++) { - sendWriteRequest(ensemble, writeSet.get(i)); - } - } finally { - writeSet.recycle(); + // Iterate over set and trigger the sendWriteRequests + for (int i = 0; i < lh.distributionSchedule.getWriteQuorumSize(); i++) { + sendWriteRequest(ensemble, lh.distributionSchedule.getWriteSetBookieIndex(entryId, i)); } } @Override - public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddress addr, Object ctx) { + public synchronized void writeComplete(int rc, long ledgerId, long entryId, BookieId addr, Object ctx) { int bookieIndex = (Integer) ctx; --pendingWriteRequests; @@ -284,6 +276,7 @@ public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddre boolean ackQuorum = false; if (BKException.Code.OK == rc) { ackQuorum = ackSet.completeBookieAndCheck(bookieIndex); + addEntrySuccessBookies.add(ensemble.get(bookieIndex)); } if (completed) { @@ -293,7 +286,7 @@ public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddre clientCtx.getClientStats().getAddOpUrCounter().inc(); if (!clientCtx.getConf().disableEnsembleChangeFeature.isAvailable() && !clientCtx.getConf().delayEnsembleChange) { - lh.getDelayedWriteFailedBookies().putIfAbsent(bookieIndex, addr); + lh.notifyWriteFailed(bookieIndex, addr); } } // even the add operation is completed, but because we don't reset completed flag back to false when @@ -344,31 +337,52 @@ public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddre if (clientCtx.getConf().delayEnsembleChange) { if (ackSet.failBookieAndCheck(bookieIndex, addr) || rc == BKException.Code.WriteOnReadOnlyBookieException) { - Map failedBookies = ackSet.getFailedBookies(); + Map failedBookies = ackSet.getFailedBookies(); LOG.warn("Failed to write entry ({}, {}) to bookies {}, handling failures.", ledgerId, entryId, failedBookies); // we can't meet ack quorum requirement, trigger ensemble change. lh.handleBookieFailure(failedBookies); - } else { - if (LOG.isDebugEnabled()) { - LOG.debug("Failed to write entry ({}, {}) to bookie ({}, {})," - + " but it didn't break ack quorum, delaying ensemble change : {}", - ledgerId, entryId, bookieIndex, addr, BKException.getMessage(rc)); - } + } else if (LOG.isDebugEnabled()) { + LOG.debug("Failed to write entry ({}, {}) to bookie ({}, {})," + + " but it didn't break ack quorum, delaying ensemble change : {}", + ledgerId, entryId, bookieIndex, addr, BKException.getMessage(rc)); } } else { - LOG.warn("Failed to write entry ({}, {}): {}", - ledgerId, entryId, BKException.getMessage(rc)); + LOG.warn("Failed to write entry ({}, {}) to bookie ({}, {}): {}", + ledgerId, entryId, bookieIndex, addr, BKException.getMessage(rc)); lh.handleBookieFailure(ImmutableMap.of(bookieIndex, addr)); } return; } if (ackQuorum && !completed) { - completed = true; - this.qwcLatency = MathUtils.elapsedNanos(requestTimeNanos); + if (clientCtx.getConf().enforceMinNumFaultDomainsForWrite + && !(clientCtx.getPlacementPolicy() + .areAckedBookiesAdheringToPlacementPolicy(addEntrySuccessBookies, + lh.getLedgerMetadata().getWriteQuorumSize(), + lh.getLedgerMetadata().getAckQuorumSize()))) { + LOG.warn("Write success for entry ID {} delayed, not acknowledged by bookies in enough fault domains", + entryId); + // Increment to indicate write did not complete due to not enough fault domains + clientCtx.getClientStats().getWriteDelayedDueToNotEnoughFaultDomains().inc(); + + // Only do this for the first time. + if (writeDelayedStartTime == -1) { + writeDelayedStartTime = MathUtils.nowInNano(); + } + } else { + completed = true; + this.qwcLatency = MathUtils.elapsedNanos(requestTimeNanos); + + if (writeDelayedStartTime != -1) { + clientCtx.getClientStats() + .getWriteDelayedDueToNotEnoughFaultDomainsLatency() + .registerSuccessfulEvent(MathUtils.elapsedNanos(writeDelayedStartTime), + TimeUnit.NANOSECONDS); + } - sendAddSuccessCallbacks(); + sendAddSuccessCallbacks(); + } } } @@ -376,7 +390,7 @@ void sendAddSuccessCallbacks() { lh.sendAddSuccessCallbacks(); } - void submitCallback(final int rc) { + synchronized void submitCallback(final int rc) { if (LOG.isDebugEnabled()) { LOG.debug("Submit callback (lid:{}, eid: {}). rc:{}", lh.getId(), entryId, rc); } @@ -419,6 +433,7 @@ public boolean equals(Object o) { private final Handle recyclerHandle; private static final Recycler RECYCLER = new Recycler() { + @Override protected PendingAddOp newObject(Recycler.Handle handle) { return new PendingAddOp(handle); } @@ -429,7 +444,7 @@ private PendingAddOp(Handle recyclerHandle) { } - private void maybeRecycle() { + private synchronized void maybeRecycle() { /** * We have opportunity to recycle two objects here. * PendingAddOp#toSend and LedgerHandle#pendingAddOp @@ -459,7 +474,7 @@ private void maybeRecycle() { } } - private void recyclePendAddOpObject() { + public synchronized void recyclePendAddOpObject() { entryId = LedgerHandle.INVALID_ENTRY_ID; currentLedgerLength = -1; if (payload != null) { @@ -480,6 +495,8 @@ private void recyclePendAddOpObject() { hasRun = false; allowFailFast = false; writeFlags = null; + addEntrySuccessBookies.clear(); + writeDelayedStartTime = -1; recyclerHandle.recycle(this); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/PendingReadLacOp.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/PendingReadLacOp.java index 0dad8040ee1..07687bfcce0 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/PendingReadLacOp.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/PendingReadLacOp.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,11 +18,9 @@ package org.apache.bookkeeper.client; import io.netty.buffer.ByteBuf; - import java.util.List; - import org.apache.bookkeeper.client.BKException.BKDigestMatchException; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookieClient; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadLacCallback; import org.apache.bookkeeper.proto.checksum.DigestManager.RecoveryData; @@ -54,7 +52,7 @@ class PendingReadLacOp implements ReadLacCallback { int lastSeenError = BKException.Code.ReadException; final DistributionSchedule.QuorumCoverageSet coverageSet; long maxLac = LedgerHandle.INVALID_ENTRY_ID; - final List currentEnsemble; + final List currentEnsemble; /* * Wrapper to get Lac from the request @@ -63,7 +61,7 @@ interface LacCallback { void getLacComplete(int rc, long lac); } - PendingReadLacOp(LedgerHandle lh, BookieClient bookieClient, List ensemble, LacCallback cb) { + PendingReadLacOp(LedgerHandle lh, BookieClient bookieClient, List ensemble, LacCallback cb) { this.lh = lh; this.bookieClient = bookieClient; this.cb = cb; @@ -156,7 +154,9 @@ public void readLacComplete(int rc, long ledgerId, final ByteBuf lacBuffer, fina } if (numResponsesPending == 0 && !completed) { - LOG.info("While readLac ledger: " + ledgerId + " did not hear success responses from all of ensemble"); + LOG.error( + "While readLac ledger: {} did not hear success responses from all of ensemble, coverageSet is: {}", + ledgerId, coverageSet); cb.getLacComplete(lastSeenError, maxLac); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/PendingReadOp.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/PendingReadOp.java index d31a744f676..b45617be55f 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/PendingReadOp.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/PendingReadOp.java @@ -21,29 +21,18 @@ package org.apache.bookkeeper.client; import com.google.common.collect.Lists; -import com.google.common.util.concurrent.ListenableFuture; import io.netty.buffer.ByteBuf; -import java.util.ArrayList; import java.util.BitSet; -import java.util.HashSet; +import java.util.LinkedList; import java.util.List; -import java.util.Set; -import java.util.concurrent.Callable; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import org.apache.bookkeeper.client.BKException.BKDigestMatchException; -import org.apache.bookkeeper.client.api.LedgerEntries; import org.apache.bookkeeper.client.impl.LedgerEntriesImpl; import org.apache.bookkeeper.client.impl.LedgerEntryImpl; -import org.apache.bookkeeper.common.util.SafeRunnable; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.common.util.MathUtils; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookieProtocol; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadEntryCallback; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadEntryCallbackCtx; import org.apache.bookkeeper.proto.checksum.DigestManager; -import org.apache.bookkeeper.util.MathUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -54,84 +43,174 @@ * application as soon as it arrives rather than waiting for the whole thing. * */ -class PendingReadOp implements ReadEntryCallback, SafeRunnable { +class PendingReadOp extends ReadOpBase implements ReadEntryCallback { private static final Logger LOG = LoggerFactory.getLogger(PendingReadOp.class); - private ScheduledFuture speculativeTask = null; - protected final List seq; - private final CompletableFuture future; - private final Set heardFromHosts; - private final BitSet heardFromHostsBitSet; - private final Set sentToHosts = new HashSet(); - LedgerHandle lh; - final ClientContext clientCtx; + protected boolean parallelRead = false; + protected final LinkedList seq; - long numPendingEntries; - final long startEntryId; - final long endEntryId; - long requestTimeNanos; + PendingReadOp(LedgerHandle lh, + ClientContext clientCtx, + long startEntryId, + long endEntryId, + boolean isRecoveryRead) { + super(lh, clientCtx, startEntryId, endEntryId, isRecoveryRead); + this.seq = new LinkedList<>(); + numPendingEntries = endEntryId - startEntryId + 1; + } + + PendingReadOp parallelRead(boolean enabled) { + this.parallelRead = enabled; + return this; + } + + void initiate() { + long nextEnsembleChange = startEntryId, i = startEntryId; + this.requestTimeNanos = MathUtils.nowInNano(); + List ensemble = null; + do { + if (i == nextEnsembleChange) { + ensemble = getLedgerMetadata().getEnsembleAt(i); + nextEnsembleChange = LedgerMetadataUtils.getNextEnsembleChange(getLedgerMetadata(), i); + } + SingleLedgerEntryRequest entry; + if (parallelRead) { + entry = new ParallelReadRequest(ensemble, lh.ledgerId, i); + } else { + entry = new SequenceReadRequest(ensemble, lh.ledgerId, i); + } + seq.add(entry); + i++; + } while (i <= endEntryId); + // read the entries. + for (LedgerEntryRequest entry : seq) { + entry.read(); + if (!parallelRead && clientCtx.getConf().readSpeculativeRequestPolicy.isPresent()) { + speculativeTask = clientCtx.getConf().readSpeculativeRequestPolicy.get() + .initiateSpeculativeRequest(clientCtx.getScheduler(), entry); + } + } + } + + @Override + public void readEntryComplete(int rc, long ledgerId, final long entryId, final ByteBuf buffer, Object ctx) { + final ReadContext rctx = (ReadContext) ctx; + final SingleLedgerEntryRequest entry = (SingleLedgerEntryRequest) rctx.entry; - final int requiredBookiesMissingEntryForRecovery; - final boolean isRecoveryRead; + if (rc != BKException.Code.OK) { + entry.logErrorAndReattemptRead(rctx.bookieIndex, rctx.to, "Error: " + BKException.getMessage(rc), rc); + return; + } - boolean parallelRead = false; - final AtomicBoolean complete = new AtomicBoolean(false); - boolean allowFailFast = false; + heardFromHosts.add(rctx.to); + heardFromHostsBitSet.set(rctx.bookieIndex, true); - abstract class LedgerEntryRequest implements SpeculativeRequestExecutor, AutoCloseable { + buffer.retain(); + // if entry has completed don't handle twice + if (entry.complete(rctx.bookieIndex, rctx.to, buffer)) { + if (!isRecoveryRead) { + // do not advance LastAddConfirmed for recovery reads + lh.updateLastConfirmed(rctx.getLastAddConfirmed(), 0L); + } + submitCallback(BKException.Code.OK); + } else { + buffer.release(); + } - final AtomicBoolean complete = new AtomicBoolean(false); + if (numPendingEntries < 0) { + LOG.error("Read too many values for ledger {} : [{}, {}].", + ledgerId, startEntryId, endEntryId); + } + + } + + protected void submitCallback(int code) { + if (BKException.Code.OK == code) { + numPendingEntries--; + if (numPendingEntries != 0) { + return; + } + } - int rc = BKException.Code.OK; - int firstError = BKException.Code.OK; - int numBookiesMissingEntry = 0; + // ensure callback once + if (!complete.compareAndSet(false, true)) { + return; + } - final List ensemble; - final DistributionSchedule.WriteSet writeSet; + cancelSpeculativeTask(true); + + long latencyNanos = MathUtils.elapsedNanos(requestTimeNanos); + if (code != BKException.Code.OK) { + long firstUnread = LedgerHandle.INVALID_ENTRY_ID; + Integer firstRc = null; + for (LedgerEntryRequest req : seq) { + if (!req.isComplete()) { + firstUnread = req.eId; + firstRc = req.rc; + break; + } + } + LOG.error( + "Read of ledger entry failed: L{} E{}-E{}, Sent to {}, " + + "Heard from {} : bitset = {}, Error = '{}'. First unread entry is ({}, rc = {})", + lh.getId(), startEntryId, endEntryId, sentToHosts, heardFromHosts, heardFromHostsBitSet, + BKException.getMessage(code), firstUnread, firstRc); + clientCtx.getClientStats().getReadOpLogger().registerFailedEvent(latencyNanos, TimeUnit.NANOSECONDS); + // release the entries + seq.forEach(LedgerEntryRequest::close); + future.completeExceptionally(BKException.create(code)); + } else { + clientCtx.getClientStats().getReadOpLogger().registerSuccessfulEvent(latencyNanos, TimeUnit.NANOSECONDS); + future.complete(LedgerEntriesImpl.create(Lists.transform(seq, input -> input.entryImpl))); + } + } + + void sendReadTo(int bookieIndex, BookieId to, SingleLedgerEntryRequest entry) throws InterruptedException { + if (lh.throttler != null) { + lh.throttler.acquire(); + } + + if (isRecoveryRead) { + int flags = BookieProtocol.FLAG_HIGH_PRIORITY | BookieProtocol.FLAG_DO_FENCING; + clientCtx.getBookieClient().readEntry(to, lh.ledgerId, entry.eId, + this, new ReadContext(bookieIndex, to, entry), flags, lh.ledgerKey); + } else { + clientCtx.getBookieClient().readEntry(to, lh.ledgerId, entry.eId, + this, new ReadContext(bookieIndex, to, entry), BookieProtocol.FLAG_NONE); + } + } + + abstract class SingleLedgerEntryRequest extends LedgerEntryRequest { final LedgerEntryImpl entryImpl; - final long eId; - LedgerEntryRequest(List ensemble, long lId, long eId) { + SingleLedgerEntryRequest(List ensemble, long lId, long eId) { + super(ensemble, eId); this.entryImpl = LedgerEntryImpl.create(lId, eId); - this.ensemble = ensemble; - this.eId = eId; - - if (clientCtx.getConf().enableReorderReadSequence) { - writeSet = clientCtx.getPlacementPolicy().reorderReadSequence( - ensemble, - lh.getBookiesHealthInfo(), - lh.distributionSchedule.getWriteSet(eId)); - } else { - writeSet = lh.distributionSchedule.getWriteSet(eId); - } } + @Override public void close() { + super.close(); entryImpl.close(); } - /** - * Execute the read request. - */ - abstract void read(); - /** * Complete the read request from host. * - * @param bookieIndex - * bookie index - * @param host - * host that respond the read - * @param buffer - * the data buffer + * @param bookieIndex bookie index + * @param host host that respond the read + * @param buffer the data buffer * @return return true if we managed to complete the entry; - * otherwise return false if the read entry is not complete or it is already completed before + * otherwise return false if the read entry is not complete or it is already completed before */ - boolean complete(int bookieIndex, BookieSocketAddress host, final ByteBuf buffer) { + boolean complete(int bookieIndex, BookieId host, final ByteBuf buffer) { ByteBuf content; + if (isComplete()) { + return false; + } try { content = lh.macManager.verifyDigestAndReturnData(eId, buffer); - } catch (BKDigestMatchException e) { + } catch (BKException.BKDigestMatchException e) { clientCtx.getClientStats().getReadOpDmCounter().inc(); logErrorAndReattemptRead(bookieIndex, host, "Mac mismatch", BKException.Code.DigestMatchException); return false; @@ -151,128 +230,13 @@ boolean complete(int bookieIndex, BookieSocketAddress host, final ByteBuf buffer return false; } } - - /** - * Fail the request with given result code rc. - * - * @param rc - * result code to fail the request. - * @return true if we managed to fail the entry; otherwise return false if it already failed or completed. - */ - boolean fail(int rc) { - if (complete.compareAndSet(false, true)) { - this.rc = rc; - submitCallback(rc); - writeSet.recycle(); - return true; - } else { - return false; - } - } - - /** - * Log error errMsg and reattempt read from host. - * - * @param bookieIndex - * bookie index - * @param host - * host that just respond - * @param errMsg - * error msg to log - * @param rc - * read result code - */ - synchronized void logErrorAndReattemptRead(int bookieIndex, BookieSocketAddress host, String errMsg, int rc) { - if (BKException.Code.OK == firstError - || BKException.Code.NoSuchEntryException == firstError - || BKException.Code.NoSuchLedgerExistsException == firstError) { - firstError = rc; - } else if (BKException.Code.BookieHandleNotAvailableException == firstError - && BKException.Code.NoSuchEntryException != rc - && BKException.Code.NoSuchLedgerExistsException != rc) { - // if other exception rather than NoSuchEntryException or NoSuchLedgerExistsException is - // returned we need to update firstError to indicate that it might be a valid read but just - // failed. - firstError = rc; - } - if (BKException.Code.NoSuchEntryException == rc - || BKException.Code.NoSuchLedgerExistsException == rc) { - ++numBookiesMissingEntry; - if (LOG.isDebugEnabled()) { - LOG.debug("No such entry found on bookie. L{} E{} bookie: {}", - lh.ledgerId, eId, host); - } - } else { - if (LOG.isInfoEnabled()) { - LOG.info("{} while reading L{} E{} from bookie: {}", - errMsg, lh.ledgerId, eId, host); - } - } - } - - /** - * Send to next replica speculatively, if required and possible. - * This returns the host we may have sent to for unit testing. - * - * @param heardFromHostsBitSet - * the set of hosts that we already received responses. - * @return host we sent to if we sent. null otherwise. - */ - abstract BookieSocketAddress maybeSendSpeculativeRead(BitSet heardFromHostsBitSet); - - /** - * Whether the read request completed. - * - * @return true if the read request is completed. - */ - boolean isComplete() { - return complete.get(); - } - - /** - * Get result code of this entry. - * - * @return result code. - */ - int getRc() { - return rc; - } - - @Override - public String toString() { - return String.format("L%d-E%d", lh.getId(), eId); - } - - /** - * Issues a speculative request and indicates if more speculative - * requests should be issued. - * - * @return whether more speculative requests should be issued - */ - @Override - public ListenableFuture issueSpeculativeRequest() { - return clientCtx.getMainWorkerPool().submitOrdered(lh.getId(), new Callable() { - @Override - public Boolean call() throws Exception { - if (!isComplete() && null != maybeSendSpeculativeRead(heardFromHostsBitSet)) { - if (LOG.isDebugEnabled()) { - LOG.debug("Send speculative read for {}. Hosts sent are {}, " - + " Hosts heard are {}, ensemble is {}.", - this, sentToHosts, heardFromHostsBitSet, ensemble); - } - return true; - } - return false; - } - }); - } } - class ParallelReadRequest extends LedgerEntryRequest { + class ParallelReadRequest extends SingleLedgerEntryRequest { int numPendings; - ParallelReadRequest(List ensemble, long lId, long eId) { + ParallelReadRequest(List ensemble, long lId, long eId) { super(ensemble, lId, eId); numPendings = writeSet.size(); } @@ -280,7 +244,7 @@ class ParallelReadRequest extends LedgerEntryRequest { @Override void read() { for (int i = 0; i < writeSet.size(); i++) { - BookieSocketAddress to = ensemble.get(writeSet.get(i)); + BookieId to = ensemble.get(writeSet.get(i)); try { sendReadTo(writeSet.get(i), to, this); } catch (InterruptedException ie) { @@ -293,7 +257,7 @@ void read() { } @Override - synchronized void logErrorAndReattemptRead(int bookieIndex, BookieSocketAddress host, String errMsg, int rc) { + synchronized void logErrorAndReattemptRead(int bookieIndex, BookieId host, String errMsg, int rc) { super.logErrorAndReattemptRead(bookieIndex, host, errMsg, rc); // if received all responses or this entry doesn't meet quorum write, complete the request. @@ -309,20 +273,20 @@ synchronized void logErrorAndReattemptRead(int bookieIndex, BookieSocketAddress } @Override - BookieSocketAddress maybeSendSpeculativeRead(BitSet heardFromHostsBitSet) { + BookieId maybeSendSpeculativeRead(BitSet heardFromHostsBitSet) { // no speculative read return null; } } - class SequenceReadRequest extends LedgerEntryRequest { + class SequenceReadRequest extends SingleLedgerEntryRequest { static final int NOT_FOUND = -1; int nextReplicaIndexToReadFrom = 0; final BitSet sentReplicas; final BitSet erroredReplicas; - SequenceReadRequest(List ensemble, long lId, long eId) { + SequenceReadRequest(List ensemble, long lId, long eId) { super(ensemble, lId, eId); this.sentReplicas = new BitSet(lh.getLedgerMetadata().getWriteQuorumSize()); @@ -354,7 +318,7 @@ private boolean readsOutstanding() { * @return host we sent to if we sent. null otherwise. */ @Override - synchronized BookieSocketAddress maybeSendSpeculativeRead(BitSet heardFrom) { + synchronized BookieId maybeSendSpeculativeRead(BitSet heardFrom) { if (nextReplicaIndexToReadFrom >= getLedgerMetadata().getWriteQuorumSize()) { return null; } @@ -378,7 +342,7 @@ void read() { sendNextRead(); } - synchronized BookieSocketAddress sendNextRead() { + synchronized BookieId sendNextRead() { if (nextReplicaIndexToReadFrom >= getLedgerMetadata().getWriteQuorumSize()) { // we are done, the read has failed from all replicas, just fail the // read @@ -393,7 +357,7 @@ synchronized BookieSocketAddress sendNextRead() { nextReplicaIndexToReadFrom++; try { - BookieSocketAddress to = ensemble.get(bookieIndex); + BookieId to = ensemble.get(bookieIndex); sendReadTo(bookieIndex, to, this); sentToHosts.add(to); sentReplicas.set(replica); @@ -407,7 +371,7 @@ synchronized BookieSocketAddress sendNextRead() { } @Override - synchronized void logErrorAndReattemptRead(int bookieIndex, BookieSocketAddress host, String errMsg, int rc) { + synchronized void logErrorAndReattemptRead(int bookieIndex, BookieId host, String errMsg, int rc) { super.logErrorAndReattemptRead(bookieIndex, host, errMsg, rc); int replica = writeSet.indexOf(bookieIndex); @@ -430,7 +394,7 @@ synchronized void logErrorAndReattemptRead(int bookieIndex, BookieSocketAddress } @Override - boolean complete(int bookieIndex, BookieSocketAddress host, ByteBuf buffer) { + boolean complete(int bookieIndex, BookieId host, ByteBuf buffer) { boolean completed = super.complete(bookieIndex, host, buffer); if (completed) { int numReplicasTried = getNextReplicaIndexToReadFrom(); @@ -438,197 +402,11 @@ boolean complete(int bookieIndex, BookieSocketAddress host, ByteBuf buffer) { // the first successful speculative read as "slow" for (int i = 0; i < numReplicasTried - 1; i++) { int slowBookieIndex = writeSet.get(i); - BookieSocketAddress slowBookieSocketAddress = ensemble.get(slowBookieIndex); + BookieId slowBookieSocketAddress = ensemble.get(slowBookieIndex); clientCtx.getPlacementPolicy().registerSlowBookie(slowBookieSocketAddress, eId); } } return completed; } } - - PendingReadOp(LedgerHandle lh, - ClientContext clientCtx, - long startEntryId, - long endEntryId, - boolean isRecoveryRead) { - this.seq = new ArrayList<>((int) ((endEntryId + 1) - startEntryId)); - this.future = new CompletableFuture<>(); - this.lh = lh; - this.clientCtx = clientCtx; - this.startEntryId = startEntryId; - this.endEntryId = endEntryId; - this.isRecoveryRead = isRecoveryRead; - - this.allowFailFast = false; - numPendingEntries = endEntryId - startEntryId + 1; - requiredBookiesMissingEntryForRecovery = getLedgerMetadata().getWriteQuorumSize() - - getLedgerMetadata().getAckQuorumSize() + 1; - heardFromHosts = new HashSet<>(); - heardFromHostsBitSet = new BitSet(getLedgerMetadata().getEnsembleSize()); - } - - CompletableFuture future() { - return future; - } - - protected LedgerMetadata getLedgerMetadata() { - return lh.getLedgerMetadata(); - } - - protected void cancelSpeculativeTask(boolean mayInterruptIfRunning) { - if (speculativeTask != null) { - speculativeTask.cancel(mayInterruptIfRunning); - speculativeTask = null; - } - } - - // I don't think this is ever used in production code -Ivan - PendingReadOp parallelRead(boolean enabled) { - this.parallelRead = enabled; - return this; - } - - void allowFailFastOnUnwritableChannel() { - allowFailFast = true; - } - - public void submit() { - clientCtx.getMainWorkerPool().executeOrdered(lh.ledgerId, this); - } - - void initiate() { - long nextEnsembleChange = startEntryId, i = startEntryId; - this.requestTimeNanos = MathUtils.nowInNano(); - List ensemble = null; - do { - if (i == nextEnsembleChange) { - ensemble = getLedgerMetadata().getEnsemble(i); - nextEnsembleChange = getLedgerMetadata().getNextEnsembleChange(i); - } - LedgerEntryRequest entry; - if (parallelRead) { - entry = new ParallelReadRequest(ensemble, lh.ledgerId, i); - } else { - entry = new SequenceReadRequest(ensemble, lh.ledgerId, i); - } - seq.add(entry); - i++; - } while (i <= endEntryId); - // read the entries. - for (LedgerEntryRequest entry : seq) { - entry.read(); - if (!parallelRead && clientCtx.getConf().readSpeculativeRequestPolicy.isPresent()) { - clientCtx.getConf().readSpeculativeRequestPolicy.get() - .initiateSpeculativeRequest(clientCtx.getScheduler(), entry); - } - } - } - - @Override - public void safeRun() { - initiate(); - } - - private static class ReadContext implements ReadEntryCallbackCtx { - final int bookieIndex; - final BookieSocketAddress to; - final LedgerEntryRequest entry; - long lac = LedgerHandle.INVALID_ENTRY_ID; - - ReadContext(int bookieIndex, BookieSocketAddress to, LedgerEntryRequest entry) { - this.bookieIndex = bookieIndex; - this.to = to; - this.entry = entry; - } - - @Override - public void setLastAddConfirmed(long lac) { - this.lac = lac; - } - - @Override - public long getLastAddConfirmed() { - return lac; - } - } - - void sendReadTo(int bookieIndex, BookieSocketAddress to, LedgerEntryRequest entry) throws InterruptedException { - if (lh.throttler != null) { - lh.throttler.acquire(); - } - - int flags = isRecoveryRead ? BookieProtocol.FLAG_HIGH_PRIORITY : BookieProtocol.FLAG_NONE; - clientCtx.getBookieClient().readEntry(to, lh.ledgerId, entry.eId, - this, new ReadContext(bookieIndex, to, entry), flags); - } - - @Override - public void readEntryComplete(int rc, long ledgerId, final long entryId, final ByteBuf buffer, Object ctx) { - final ReadContext rctx = (ReadContext) ctx; - final LedgerEntryRequest entry = rctx.entry; - - if (rc != BKException.Code.OK) { - entry.logErrorAndReattemptRead(rctx.bookieIndex, rctx.to, "Error: " + BKException.getMessage(rc), rc); - return; - } - - heardFromHosts.add(rctx.to); - heardFromHostsBitSet.set(rctx.bookieIndex, true); - - buffer.retain(); - if (entry.complete(rctx.bookieIndex, rctx.to, buffer)) { - if (!isRecoveryRead) { - // do not advance LastAddConfirmed for recovery reads - lh.updateLastConfirmed(rctx.getLastAddConfirmed(), 0L); - } - submitCallback(BKException.Code.OK); - } else { - buffer.release(); - } - - if (numPendingEntries < 0) { - LOG.error("Read too many values for ledger {} : [{}, {}].", - ledgerId, startEntryId, endEntryId); - } - } - - protected void submitCallback(int code) { - if (BKException.Code.OK == code) { - numPendingEntries--; - if (numPendingEntries != 0) { - return; - } - } - - // ensure callback once - if (!complete.compareAndSet(false, true)) { - return; - } - - cancelSpeculativeTask(true); - - long latencyNanos = MathUtils.elapsedNanos(requestTimeNanos); - if (code != BKException.Code.OK) { - long firstUnread = LedgerHandle.INVALID_ENTRY_ID; - for (LedgerEntryRequest req : seq) { - if (!req.isComplete()) { - firstUnread = req.eId; - break; - } - } - LOG.error( - "Read of ledger entry failed: L{} E{}-E{}, Sent to {}, " - + "Heard from {} : bitset = {}. First unread entry is {}", - lh.getId(), startEntryId, endEntryId, sentToHosts, heardFromHosts, heardFromHostsBitSet, - firstUnread); - clientCtx.getClientStats().getReadOpLogger().registerFailedEvent(latencyNanos, TimeUnit.NANOSECONDS); - // release the entries - seq.forEach(LedgerEntryRequest::close); - future.completeExceptionally(BKException.create(code)); - } else { - clientCtx.getClientStats().getReadOpLogger().registerSuccessfulEvent(latencyNanos, TimeUnit.NANOSECONDS); - future.complete(LedgerEntriesImpl.create(Lists.transform(seq, input -> input.entryImpl))); - } - } - } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/PendingWriteLacOp.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/PendingWriteLacOp.java index 2881d2f3ad5..f9a5397daf0 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/PendingWriteLacOp.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/PendingWriteLacOp.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -19,9 +19,8 @@ import java.util.BitSet; import java.util.List; - import org.apache.bookkeeper.client.AsyncCallback.AddLacCallback; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteLacCallback; import org.apache.bookkeeper.util.ByteBufList; import org.slf4j.Logger; @@ -51,9 +50,9 @@ class PendingWriteLacOp implements WriteLacCallback { LedgerHandle lh; ClientContext clientCtx; - final List currentEnsemble; + final List currentEnsemble; - PendingWriteLacOp(LedgerHandle lh, ClientContext clientCtx, List ensemble, + PendingWriteLacOp(LedgerHandle lh, ClientContext clientCtx, List ensemble, AddLacCallback cb, Object ctx) { this.lh = lh; this.clientCtx = clientCtx; @@ -80,18 +79,14 @@ void sendWriteLacRequest(int bookieIndex) { void initiate(ByteBufList toSend) { this.toSend = toSend; - DistributionSchedule.WriteSet writeSet = lh.distributionSchedule.getWriteSet(lac); - try { - for (int i = 0; i < writeSet.size(); i++) { - sendWriteLacRequest(writeSet.get(i)); - } - } finally { - writeSet.recycle(); + + for (int i = 0; i < lh.distributionSchedule.getWriteQuorumSize(); i++) { + sendWriteLacRequest(lh.distributionSchedule.getWriteSetBookieIndex(lac, i)); } } @Override - public void writeLacComplete(int rc, long ledgerId, BookieSocketAddress addr, Object ctx) { + public void writeLacComplete(int rc, long ledgerId, BookieId addr, Object ctx) { int bookieIndex = (Integer) ctx; if (completed) { @@ -112,7 +107,7 @@ public void writeLacComplete(int rc, long ledgerId, BookieSocketAddress addr, Ob return; } } else { - LOG.warn("WriteLac did not succeed: Ledger {} on {}", new Object[] { ledgerId, addr }); + LOG.warn("WriteLac did not succeed: Ledger {} on {}", ledgerId, addr); } if (receivedResponseSet.isEmpty()){ diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/RackChangeNotifier.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/RackChangeNotifier.java index c66bcaf9762..e9fc3a654d4 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/RackChangeNotifier.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/RackChangeNotifier.java @@ -18,8 +18,11 @@ package org.apache.bookkeeper.client; +import org.apache.bookkeeper.net.BookieNode; + /** - * Notifier used by the RackawareEnsemblePlacementPolicy to get notified if a rack changes for a bookie. + * Notifier used by the RackawareEnsemblePlacementPolicy to get notified if a + * rack changes for a bookie. */ public interface RackChangeNotifier { @@ -28,5 +31,6 @@ public interface RackChangeNotifier { * * @param rackawarePolicy */ - void registerRackChangeListener(RackawareEnsemblePlacementPolicyImpl rackawarePolicy); + void registerRackChangeListener( + ITopologyAwareEnsemblePlacementPolicy rackawarePolicy); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/RackawareEnsemblePlacementPolicy.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/RackawareEnsemblePlacementPolicy.java index 1fd7580c816..1fb17ca3ef1 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/RackawareEnsemblePlacementPolicy.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/RackawareEnsemblePlacementPolicy.java @@ -18,18 +18,15 @@ package org.apache.bookkeeper.client; import io.netty.util.HashedWheelTimer; - import java.util.List; import java.util.Map; import java.util.Set; - import org.apache.bookkeeper.client.BKException.BKNotEnoughBookiesException; -import org.apache.bookkeeper.client.ITopologyAwareEnsemblePlacementPolicy.Ensemble; -import org.apache.bookkeeper.client.ITopologyAwareEnsemblePlacementPolicy.Predicate; -import org.apache.bookkeeper.client.TopologyAwareEnsemblePlacementPolicy.BookieNode; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieNode; import org.apache.bookkeeper.net.DNSToSwitchMapping; import org.apache.bookkeeper.net.Node; +import org.apache.bookkeeper.proto.BookieAddressResolver; import org.apache.bookkeeper.stats.StatsLogger; /** @@ -38,7 +35,7 @@ * @see EnsemblePlacementPolicy */ public class RackawareEnsemblePlacementPolicy extends RackawareEnsemblePlacementPolicyImpl - implements ITopologyAwareEnsemblePlacementPolicy { + implements ITopologyAwareEnsemblePlacementPolicy { RackawareEnsemblePlacementPolicyImpl slave = null; public RackawareEnsemblePlacementPolicy() { @@ -59,18 +56,42 @@ protected RackawareEnsemblePlacementPolicy initialize(DNSToSwitchMapping dnsReso int maxWeightMultiple, int minNumRacksPerWriteQuorum, boolean enforceMinNumRacksPerWriteQuorum, - StatsLogger statsLogger) { + boolean ignoreLocalNodeInPlacementPolicy, + StatsLogger statsLogger, BookieAddressResolver bookieAddressResolver) { + return initialize(dnsResolver, timer, reorderReadsRandom, stabilizePeriodSeconds, + reorderThresholdPendingRequests, isWeighted, maxWeightMultiple, minNumRacksPerWriteQuorum, + enforceMinNumRacksPerWriteQuorum, ignoreLocalNodeInPlacementPolicy, false, + statsLogger, bookieAddressResolver); + } + + @Override + protected RackawareEnsemblePlacementPolicy initialize(DNSToSwitchMapping dnsResolver, + HashedWheelTimer timer, + boolean reorderReadsRandom, + int stabilizePeriodSeconds, + int reorderThresholdPendingRequests, + boolean isWeighted, + int maxWeightMultiple, + int minNumRacksPerWriteQuorum, + boolean enforceMinNumRacksPerWriteQuorum, + boolean ignoreLocalNodeInPlacementPolicy, + boolean useHostnameResolveLocalNodePlacementPolicy, + StatsLogger statsLogger, BookieAddressResolver bookieAddressResolver) { if (stabilizePeriodSeconds > 0) { super.initialize(dnsResolver, timer, reorderReadsRandom, 0, reorderThresholdPendingRequests, isWeighted, - maxWeightMultiple, minNumRacksPerWriteQuorum, enforceMinNumRacksPerWriteQuorum, statsLogger); + maxWeightMultiple, minNumRacksPerWriteQuorum, enforceMinNumRacksPerWriteQuorum, + ignoreLocalNodeInPlacementPolicy, useHostnameResolveLocalNodePlacementPolicy, + statsLogger, bookieAddressResolver); slave = new RackawareEnsemblePlacementPolicyImpl(enforceDurability); slave.initialize(dnsResolver, timer, reorderReadsRandom, stabilizePeriodSeconds, reorderThresholdPendingRequests, isWeighted, maxWeightMultiple, minNumRacksPerWriteQuorum, - enforceMinNumRacksPerWriteQuorum, statsLogger); + enforceMinNumRacksPerWriteQuorum, ignoreLocalNodeInPlacementPolicy, + useHostnameResolveLocalNodePlacementPolicy, statsLogger, bookieAddressResolver); } else { super.initialize(dnsResolver, timer, reorderReadsRandom, stabilizePeriodSeconds, reorderThresholdPendingRequests, isWeighted, maxWeightMultiple, minNumRacksPerWriteQuorum, - enforceMinNumRacksPerWriteQuorum, statsLogger); + enforceMinNumRacksPerWriteQuorum, ignoreLocalNodeInPlacementPolicy, + useHostnameResolveLocalNodePlacementPolicy, statsLogger, bookieAddressResolver); slave = null; } return this; @@ -85,9 +106,9 @@ public void uninitalize() { } @Override - public Set onClusterChanged(Set writableBookies, - Set readOnlyBookies) { - Set deadBookies = super.onClusterChanged(writableBookies, readOnlyBookies); + public Set onClusterChanged(Set writableBookies, + Set readOnlyBookies) { + Set deadBookies = super.onClusterChanged(writableBookies, readOnlyBookies); if (null != slave) { deadBookies = slave.onClusterChanged(writableBookies, readOnlyBookies); } @@ -95,8 +116,8 @@ public Set onClusterChanged(Set writab } @Override - public List newEnsemble(int ensembleSize, int writeQuorumSize, int ackQuorumSize, - Map customMetadata, Set excludeBookies) + public PlacementResult> newEnsemble(int ensembleSize, int writeQuorumSize, + int ackQuorumSize, Map customMetadata, Set excludeBookies) throws BKException.BKNotEnoughBookiesException { try { return super.newEnsemble(ensembleSize, writeQuorumSize, ackQuorumSize, customMetadata, excludeBookies); @@ -110,9 +131,9 @@ public List newEnsemble(int ensembleSize, int writeQuorumSi } @Override - public BookieSocketAddress replaceBookie(int ensembleSize, int writeQuorumSize, int ackQuorumSize, - Map customMetadata, Set currentEnsemble, - BookieSocketAddress bookieToReplace, Set excludeBookies) + public PlacementResult replaceBookie(int ensembleSize, int writeQuorumSize, int ackQuorumSize, + Map customMetadata, List currentEnsemble, + BookieId bookieToReplace, Set excludeBookies) throws BKException.BKNotEnoughBookiesException { try { return super.replaceBookie(ensembleSize, writeQuorumSize, ackQuorumSize, customMetadata, @@ -129,7 +150,7 @@ public BookieSocketAddress replaceBookie(int ensembleSize, int writeQuorumSize, @Override public DistributionSchedule.WriteSet reorderReadSequence( - List ensemble, + List ensemble, BookiesHealthInfo bookiesHealthInfo, DistributionSchedule.WriteSet writeSet) { return super.reorderReadSequence(ensemble, bookiesHealthInfo, @@ -138,7 +159,7 @@ public DistributionSchedule.WriteSet reorderReadSequence( @Override public DistributionSchedule.WriteSet reorderReadLACSequence( - List ensemble, + List ensemble, BookiesHealthInfo bookiesHealthInfo, DistributionSchedule.WriteSet writeSet) { return super.reorderReadLACSequence(ensemble, bookiesHealthInfo, @@ -146,10 +167,10 @@ public DistributionSchedule.WriteSet reorderReadLACSequence( } @Override - public List newEnsemble(int ensembleSize, + public PlacementResult> newEnsemble(int ensembleSize, int writeQuorumSize, int ackQuorumSize, - Set excludeBookies, + Set excludeBookies, Ensemble parentEnsemble, Predicate parentPredicate) throws BKException.BKNotEnoughBookiesException { @@ -235,7 +256,29 @@ public BookieNode selectFromNetworkLocation( } @Override - public void handleBookiesThatLeft(Set leftBookies) { + public PlacementResult> replaceToAdherePlacementPolicy( + int ensembleSize, + int writeQuorumSize, + int ackQuorumSize, + Set excludeBookies, + List currentEnsemble) { + final PlacementResult> placementResult = + super.replaceToAdherePlacementPolicy(ensembleSize, writeQuorumSize, ackQuorumSize, + excludeBookies, currentEnsemble); + if (placementResult.getAdheringToPolicy() != PlacementPolicyAdherence.FAIL) { + return placementResult; + } else { + if (slave == null) { + return placementResult; + } else { + return slave.replaceToAdherePlacementPolicy(ensembleSize, writeQuorumSize, ackQuorumSize, + excludeBookies, currentEnsemble); + } + } + } + + @Override + public void handleBookiesThatLeft(Set leftBookies) { super.handleBookiesThatLeft(leftBookies); if (null != slave) { slave.handleBookiesThatLeft(leftBookies); @@ -243,7 +286,7 @@ public void handleBookiesThatLeft(Set leftBookies) { } @Override - public void handleBookiesThatJoined(Set joinedBookies) { + public void handleBookiesThatJoined(Set joinedBookies) { super.handleBookiesThatJoined(joinedBookies); if (null != slave) { slave.handleBookiesThatJoined(joinedBookies); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/RackawareEnsemblePlacementPolicyImpl.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/RackawareEnsemblePlacementPolicyImpl.java index 1d587d07693..6258ede3c33 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/RackawareEnsemblePlacementPolicyImpl.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/RackawareEnsemblePlacementPolicyImpl.java @@ -18,18 +18,21 @@ package org.apache.bookkeeper.client; import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIES_JOINED; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIES_LEFT; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.FAILED_TO_RESOLVE_NETWORK_LOCATION_COUNT; +import static org.apache.bookkeeper.client.BookKeeperClientStats.CLIENT_SCOPE; +import static org.apache.bookkeeper.client.BookKeeperClientStats.NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK; +import static org.apache.bookkeeper.client.BookKeeperClientStats.READ_REQUESTS_REORDERED; import static org.apache.bookkeeper.client.RegionAwareEnsemblePlacementPolicy.UNKNOWN_REGION; +import com.beust.jcommander.internal.Lists; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Sets; - import io.netty.util.HashedWheelTimer; - +import java.io.IOException; import java.net.InetAddress; -import java.net.UnknownHostException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -39,31 +42,34 @@ import java.util.Map; import java.util.Optional; import java.util.Set; -import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReentrantReadWriteLock; -import java.util.function.Supplier; - -import org.apache.bookkeeper.bookie.BookKeeperServerStats; +import java.util.stream.Collectors; +import java.util.stream.Stream; import org.apache.bookkeeper.client.BKException.BKNotEnoughBookiesException; import org.apache.bookkeeper.client.BookieInfoReader.BookieInfo; import org.apache.bookkeeper.client.WeightedRandomSelection.WeightedObject; +import org.apache.bookkeeper.common.util.ReflectionUtils; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.Configurable; import org.apache.bookkeeper.feature.FeatureProvider; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieNode; import org.apache.bookkeeper.net.DNSToSwitchMapping; -import org.apache.bookkeeper.net.NetUtils; import org.apache.bookkeeper.net.NetworkTopology; import org.apache.bookkeeper.net.NetworkTopologyImpl; import org.apache.bookkeeper.net.Node; import org.apache.bookkeeper.net.NodeBase; import org.apache.bookkeeper.net.ScriptBasedMapping; import org.apache.bookkeeper.net.StabilizeNetworkTopology; +import org.apache.bookkeeper.proto.BookieAddressResolver; +import org.apache.bookkeeper.stats.Counter; +import org.apache.bookkeeper.stats.Gauge; import org.apache.bookkeeper.stats.OpStatsLogger; import org.apache.bookkeeper.stats.StatsLogger; -import org.apache.bookkeeper.util.ReflectionUtils; +import org.apache.bookkeeper.stats.annotations.StatsDoc; import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.tuple.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -72,18 +78,20 @@ * *

    Make most of the class and methods as protected, so it could be extended to implement other algorithms. */ +@StatsDoc( + name = CLIENT_SCOPE, + help = "BookKeeper client stats" +) public class RackawareEnsemblePlacementPolicyImpl extends TopologyAwareEnsemblePlacementPolicy { static final Logger LOG = LoggerFactory.getLogger(RackawareEnsemblePlacementPolicyImpl.class); - boolean isWeighted; int maxWeightMultiple; - private Map bookieInfoMap = new HashMap(); - private WeightedRandomSelection weightedSelection; protected int minNumRacksPerWriteQuorum; protected boolean enforceMinNumRacksPerWriteQuorum; + protected boolean ignoreLocalNodeInPlacementPolicy; + protected boolean useHostnameResolveLocalNodePlacementPolicy; - public static final String REPP_DNS_RESOLVER_CLASS = "reppDnsResolverClass"; public static final String REPP_RANDOM_READ_REORDERING = "ensembleRandomReadReordering"; static final int RACKNAME_DISTANCE_FROM_LEAVES = 1; @@ -98,105 +106,10 @@ public class RackawareEnsemblePlacementPolicyImpl extends TopologyAwareEnsembleP static final int UNAVAIL_MASK = 0x40 << 24; static final int MASK_BITS = 0xFFF << 20; - static class DefaultResolver implements DNSToSwitchMapping { - - final Supplier defaultRackSupplier; - - // for backwards compat - public DefaultResolver() { - this(() -> NetworkTopology.DEFAULT_REGION_AND_RACK); - } - - public DefaultResolver(Supplier defaultRackSupplier) { - checkNotNull(defaultRackSupplier, "defaultRackSupplier should not be null"); - this.defaultRackSupplier = defaultRackSupplier; - } - - @Override - public List resolve(List names) { - List rNames = new ArrayList(names.size()); - for (@SuppressWarnings("unused") String name : names) { - final String defaultRack = defaultRackSupplier.get(); - checkNotNull(defaultRack, "defaultRack cannot be null"); - rNames.add(defaultRack); - } - return rNames; - } - - @Override - public void reloadCachedMappings() { - // nop - } - - } - - /** - * Decorator for any existing dsn resolver. - * Backfills returned data with appropriate default rack info. - */ - static class DNSResolverDecorator implements DNSToSwitchMapping { - - final Supplier defaultRackSupplier; - final DNSToSwitchMapping resolver; - - DNSResolverDecorator(DNSToSwitchMapping resolver, Supplier defaultRackSupplier) { - checkNotNull(resolver, "Resolver cannot be null"); - checkNotNull(defaultRackSupplier, "defaultRackSupplier should not be null"); - this.defaultRackSupplier = defaultRackSupplier; - this.resolver = resolver; - } - - public List resolve(List names) { - if (names == null) { - return Collections.emptyList(); - } - final String defaultRack = defaultRackSupplier.get(); - checkNotNull(defaultRack, "Default rack cannot be null"); - - List rNames = resolver.resolve(names); - if (rNames != null && rNames.size() == names.size()) { - for (int i = 0; i < rNames.size(); ++i) { - if (rNames.get(i) == null) { - LOG.warn("Failed to resolve network location for {}, using default rack for it : {}.", - names.get(i), defaultRack); - rNames.set(i, defaultRack); - } - } - return rNames; - } - - LOG.warn("Failed to resolve network location for {}, using default rack for them : {}.", names, - defaultRack); - rNames = new ArrayList<>(names.size()); - - for (int i = 0; i < names.size(); ++i) { - rNames.add(defaultRack); - } - return rNames; - } - - @Override - public boolean useHostName() { - return resolver.useHostName(); - } - - @Override - public void reloadCachedMappings() { - resolver.reloadCachedMappings(); - } - } - - // for now, we just maintain the writable bookies' topology - protected NetworkTopology topology; - protected DNSToSwitchMapping dnsResolver; protected HashedWheelTimer timer; - protected final Map knownBookies; // Use a loading cache so slow bookies are expired. Use entryId as values. - protected Cache slowBookies; + protected Cache slowBookies; protected BookieNode localNode; - protected final ReentrantReadWriteLock rwLock; - // Initialize to empty set - protected ImmutableSet readOnlyBookies = ImmutableSet.of(); protected boolean reorderReadsRandom = false; protected boolean enforceDurability = false; protected int stabilizePeriodSeconds = 0; @@ -204,9 +117,22 @@ public void reloadCachedMappings() { // looks like these only assigned in the same thread as constructor, immediately after constructor; // no need to make volatile protected StatsLogger statsLogger = null; - protected OpStatsLogger bookiesJoinedCounter = null; - protected OpStatsLogger bookiesLeftCounter = null; + + @StatsDoc( + name = READ_REQUESTS_REORDERED, + help = "The distribution of number of bookies reordered on each read request" + ) protected OpStatsLogger readReorderedCounter = null; + @StatsDoc( + name = FAILED_TO_RESOLVE_NETWORK_LOCATION_COUNT, + help = "Counter for number of times DNSResolverDecorator failed to resolve Network Location" + ) + protected Counter failedToResolveNetworkLocationCounter = null; + @StatsDoc( + name = NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK, + help = "Gauge for the number of writable Bookies in default rack" + ) + protected Gauge numWritableBookiesInDefaultRack; private String defaultRack = NetworkTopology.DEFAULT_RACK; @@ -217,13 +143,41 @@ public void reloadCachedMappings() { RackawareEnsemblePlacementPolicyImpl(boolean enforceDurability) { this.enforceDurability = enforceDurability; topology = new NetworkTopologyImpl(); - knownBookies = new HashMap(); - - rwLock = new ReentrantReadWriteLock(); } - protected BookieNode createBookieNode(BookieSocketAddress addr) { - return new BookieNode(addr, resolveNetworkLocation(addr)); + /** + * Initialize the policy. + * + * @param dnsResolver + * @param timer + * @param reorderReadsRandom + * @param stabilizePeriodSeconds + * @param reorderThresholdPendingRequests + * @param isWeighted + * @param maxWeightMultiple + * @param minNumRacksPerWriteQuorum + * @param enforceMinNumRacksPerWriteQuorum + * @param ignoreLocalNodeInPlacementPolicy + * @param statsLogger + * @param bookieAddressResolver + * @return initialized ensemble placement policy + */ + protected RackawareEnsemblePlacementPolicyImpl initialize(DNSToSwitchMapping dnsResolver, + HashedWheelTimer timer, + boolean reorderReadsRandom, + int stabilizePeriodSeconds, + int reorderThresholdPendingRequests, + boolean isWeighted, + int maxWeightMultiple, + int minNumRacksPerWriteQuorum, + boolean enforceMinNumRacksPerWriteQuorum, + boolean ignoreLocalNodeInPlacementPolicy, + StatsLogger statsLogger, + BookieAddressResolver bookieAddressResolver) { + return initialize(dnsResolver, timer, reorderReadsRandom, stabilizePeriodSeconds, + reorderThresholdPendingRequests, isWeighted, maxWeightMultiple, minNumRacksPerWriteQuorum, + enforceMinNumRacksPerWriteQuorum, ignoreLocalNodeInPlacementPolicy, + false, statsLogger, bookieAddressResolver); } /** @@ -241,19 +195,44 @@ protected RackawareEnsemblePlacementPolicyImpl initialize(DNSToSwitchMapping dns int maxWeightMultiple, int minNumRacksPerWriteQuorum, boolean enforceMinNumRacksPerWriteQuorum, - StatsLogger statsLogger) { + boolean ignoreLocalNodeInPlacementPolicy, + boolean useHostnameResolveLocalNodePlacementPolicy, + StatsLogger statsLogger, + BookieAddressResolver bookieAddressResolver) { checkNotNull(statsLogger, "statsLogger should not be null, use NullStatsLogger instead."); this.statsLogger = statsLogger; - this.bookiesJoinedCounter = statsLogger.getOpStatsLogger(BookKeeperServerStats.BOOKIES_JOINED); - this.bookiesLeftCounter = statsLogger.getOpStatsLogger(BookKeeperServerStats.BOOKIES_LEFT); - this.readReorderedCounter = statsLogger.getOpStatsLogger(BookKeeperClientStats.READ_REQUESTS_REORDERED); + this.bookieAddressResolver = bookieAddressResolver; + this.bookiesJoinedCounter = statsLogger.getOpStatsLogger(BOOKIES_JOINED); + this.bookiesLeftCounter = statsLogger.getOpStatsLogger(BOOKIES_LEFT); + this.readReorderedCounter = statsLogger.getOpStatsLogger(READ_REQUESTS_REORDERED); + this.failedToResolveNetworkLocationCounter = statsLogger.getCounter(FAILED_TO_RESOLVE_NETWORK_LOCATION_COUNT); + this.numWritableBookiesInDefaultRack = new Gauge() { + @Override + public Integer getDefaultValue() { + return 0; + } + + @Override + public Integer getSample() { + rwLock.readLock().lock(); + try { + return topology.countNumOfAvailableNodes(getDefaultRack(), Collections.emptySet()); + } finally { + rwLock.readLock().unlock(); + } + } + }; + this.statsLogger.registerGauge(NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK, numWritableBookiesInDefaultRack); this.reorderReadsRandom = reorderReadsRandom; this.stabilizePeriodSeconds = stabilizePeriodSeconds; this.reorderThresholdPendingRequests = reorderThresholdPendingRequests; - this.dnsResolver = new DNSResolverDecorator(dnsResolver, () -> this.getDefaultRack()); + this.dnsResolver = new DNSResolverDecorator(dnsResolver, () -> this.getDefaultRack(), + failedToResolveNetworkLocationCounter); this.timer = timer; this.minNumRacksPerWriteQuorum = minNumRacksPerWriteQuorum; this.enforceMinNumRacksPerWriteQuorum = enforceMinNumRacksPerWriteQuorum; + this.ignoreLocalNodeInPlacementPolicy = ignoreLocalNodeInPlacementPolicy; + this.useHostnameResolveLocalNodePlacementPolicy = useHostnameResolveLocalNodePlacementPolicy; // create the network topology if (stabilizePeriodSeconds > 0) { @@ -262,12 +241,17 @@ protected RackawareEnsemblePlacementPolicyImpl initialize(DNSToSwitchMapping dns this.topology = new NetworkTopologyImpl(); } - BookieNode bn; - try { - bn = createBookieNode(new BookieSocketAddress(InetAddress.getLocalHost().getHostAddress(), 0)); - } catch (UnknownHostException e) { - LOG.error("Failed to get local host address : ", e); - bn = null; + BookieNode bn = null; + if (!ignoreLocalNodeInPlacementPolicy) { + try { + String hostname = useHostnameResolveLocalNodePlacementPolicy + ? InetAddress.getLocalHost().getCanonicalHostName() : InetAddress.getLocalHost().getHostAddress(); + bn = createDummyLocalBookieNode(hostname); + } catch (IOException e) { + LOG.error("Failed to get local host address : ", e); + } + } else { + LOG.info("Ignoring LocalNode in Placementpolicy"); } localNode = bn; LOG.info("Initialize rackaware ensemble placement policy @ {} @ {} : {}.", @@ -277,7 +261,7 @@ protected RackawareEnsemblePlacementPolicyImpl initialize(DNSToSwitchMapping dns this.isWeighted = isWeighted; if (this.isWeighted) { this.maxWeightMultiple = maxWeightMultiple; - this.weightedSelection = new WeightedRandomSelection(this.maxWeightMultiple); + this.weightedSelection = new WeightedRandomSelectionImpl(this.maxWeightMultiple); LOG.info("Weight based placement with max multiple of " + this.maxWeightMultiple); } else { LOG.info("Not weighted"); @@ -297,6 +281,7 @@ public RackawareEnsemblePlacementPolicyImpl withDefaultRack(String rack) { return this; } + @Override public String getDefaultRack() { return defaultRack; } @@ -306,7 +291,9 @@ public RackawareEnsemblePlacementPolicyImpl initialize(ClientConfiguration conf, Optional optionalDnsResolver, HashedWheelTimer timer, FeatureProvider featureProvider, - StatsLogger statsLogger) { + StatsLogger statsLogger, + BookieAddressResolver bookieAddressResolver) { + this.bookieAddressResolver = bookieAddressResolver; DNSToSwitchMapping dnsResolver; if (optionalDnsResolver.isPresent()) { dnsResolver = optionalDnsResolver.get(); @@ -314,6 +301,7 @@ public RackawareEnsemblePlacementPolicyImpl initialize(ClientConfiguration conf, String dnsResolverName = conf.getString(REPP_DNS_RESOLVER_CLASS, ScriptBasedMapping.class.getName()); try { dnsResolver = ReflectionUtils.newInstance(dnsResolverName, DNSToSwitchMapping.class); + dnsResolver.setBookieAddressResolver(bookieAddressResolver); if (dnsResolver instanceof Configurable) { ((Configurable) dnsResolver).setConf(conf); } @@ -322,16 +310,26 @@ public RackawareEnsemblePlacementPolicyImpl initialize(ClientConfiguration conf, ((RackChangeNotifier) dnsResolver).registerRackChangeListener(this); } } catch (RuntimeException re) { - LOG.info("Failed to initialize DNS Resolver {}, used default subnet resolver : {}", - dnsResolverName, re, re.getMessage()); - dnsResolver = new DefaultResolver(() -> this.getDefaultRack()); + if (!conf.getEnforceMinNumRacksPerWriteQuorum()) { + LOG.warn("Failed to initialize DNS Resolver {}, used default subnet resolver because {}", + dnsResolverName, re.getMessage()); + dnsResolver = new DefaultResolver(this::getDefaultRack); + dnsResolver.setBookieAddressResolver(bookieAddressResolver); + } else { + /* + * if minNumRacksPerWriteQuorum is enforced, then it + * shouldn't continue in the case of failure to create + * dnsResolver. + */ + throw re; + } } } slowBookies = CacheBuilder.newBuilder() .expireAfterWrite(conf.getBookieFailureHistoryExpirationMSec(), TimeUnit.MILLISECONDS) - .build(new CacheLoader() { + .build(new CacheLoader() { @Override - public Long load(BookieSocketAddress key) throws Exception { + public Long load(BookieId key) throws Exception { return -1L; } }); @@ -345,7 +343,10 @@ public Long load(BookieSocketAddress key) throws Exception { conf.getBookieMaxWeightMultipleForWeightBasedPlacement(), conf.getMinNumRacksPerWriteQuorum(), conf.getEnforceMinNumRacksPerWriteQuorum(), - statsLogger); + conf.getIgnoreLocalNodeInPlacementPolicy(), + conf.getUseHostnameResolveLocalNodePlacementPolicy(), + statsLogger, + bookieAddressResolver); } @Override @@ -353,161 +354,62 @@ public void uninitalize() { // do nothing } - protected String resolveNetworkLocation(BookieSocketAddress addr) { - return NetUtils.resolveNetworkLocation(dnsResolver, addr.getSocketAddress()); - } - - public void onBookieRackChange(List bookieAddressList) { - rwLock.writeLock().lock(); - try { - for (BookieSocketAddress bookieAddress : bookieAddressList) { - BookieNode node = knownBookies.get(bookieAddress); - if (node != null) { - // refresh the rack info if its a known bookie - topology.remove(node); - topology.add(createBookieNode(bookieAddress)); - } - } - } finally { - rwLock.writeLock().unlock(); - } - } - - @Override - public Set onClusterChanged(Set writableBookies, - Set readOnlyBookies) { - rwLock.writeLock().lock(); - try { - ImmutableSet joinedBookies, leftBookies, deadBookies; - Set oldBookieSet = knownBookies.keySet(); - // left bookies : bookies in known bookies, but not in new writable bookie cluster. - leftBookies = Sets.difference(oldBookieSet, writableBookies).immutableCopy(); - // joined bookies : bookies in new writable bookie cluster, but not in known bookies - joinedBookies = Sets.difference(writableBookies, oldBookieSet).immutableCopy(); - // dead bookies. - deadBookies = Sets.difference(leftBookies, readOnlyBookies).immutableCopy(); - LOG.debug("Cluster changed : left bookies are {}, joined bookies are {}, while dead bookies are {}.", - leftBookies, joinedBookies, deadBookies); - handleBookiesThatLeft(leftBookies); - handleBookiesThatJoined(joinedBookies); - if (this.isWeighted && (leftBookies.size() > 0 || joinedBookies.size() > 0)) { - this.weightedSelection.updateMap(this.bookieInfoMap); - } - if (!readOnlyBookies.isEmpty()) { - this.readOnlyBookies = ImmutableSet.copyOf(readOnlyBookies); - } - - return deadBookies; - } finally { - rwLock.writeLock().unlock(); - } - } - - @Override - public void handleBookiesThatLeft(Set leftBookies) { - for (BookieSocketAddress addr : leftBookies) { - try { - BookieNode node = knownBookies.remove(addr); - if (null != node) { - topology.remove(node); - if (this.isWeighted) { - this.bookieInfoMap.remove(node); - } - - bookiesLeftCounter.registerSuccessfulValue(1L); - - if (LOG.isDebugEnabled()) { - LOG.debug("Cluster changed : bookie {} left from cluster.", addr); + /* + * this method should be called in readlock scope of 'rwLock' + */ + protected Set addDefaultRackBookiesIfMinNumRacksIsEnforced( + Set excludeBookies) { + Set comprehensiveExclusionBookiesSet; + if (enforceMinNumRacksPerWriteQuorum) { + Set bookiesInDefaultRack = null; + Set defaultRackLeaves = topology.getLeaves(getDefaultRack()); + for (Node node : defaultRackLeaves) { + if (node instanceof BookieNode) { + if (bookiesInDefaultRack == null) { + bookiesInDefaultRack = new HashSet(excludeBookies); } + bookiesInDefaultRack.add(((BookieNode) node).getAddr()); + } else { + LOG.error("found non-BookieNode: {} as leaf of defaultrack: {}", node, getDefaultRack()); } - } catch (Throwable t) { - LOG.error("Unexpected exception while handling leaving bookie {}", addr, t); - if (bookiesLeftCounter != null) { - bookiesLeftCounter.registerFailedValue(1L); - } - // no need to re-throw; we want to process the rest of the bookies - // exception anyways will be caught/logged/suppressed in the ZK's event handler - } - } - } - - @Override - public void handleBookiesThatJoined(Set joinedBookies) { - // node joined - for (BookieSocketAddress addr : joinedBookies) { - try { - BookieNode node = createBookieNode(addr); - topology.add(node); - knownBookies.put(addr, node); - if (this.isWeighted) { - this.bookieInfoMap.putIfAbsent(node, new BookieInfo()); - } - - bookiesJoinedCounter.registerSuccessfulValue(1L); - - if (LOG.isDebugEnabled()) { - LOG.debug("Cluster changed : bookie {} joined the cluster.", addr); - } - } catch (Throwable t) { - // topology.add() throws unchecked exception - LOG.error("Unexpected exception while handling joining bookie {}", addr, t); - - bookiesJoinedCounter.registerFailedValue(1L); - // no need to re-throw; we want to process the rest of the bookies - // exception anyways will be caught/logged/suppressed in the ZK's event handler } - } - } - - protected Set convertBookiesToNodes(Set excludeBookies) { - Set nodes = new HashSet(); - for (BookieSocketAddress addr : excludeBookies) { - BookieNode bn = knownBookies.get(addr); - if (null == bn) { - bn = createBookieNode(addr); + if ((bookiesInDefaultRack == null) || bookiesInDefaultRack.isEmpty()) { + comprehensiveExclusionBookiesSet = excludeBookies; + } else { + comprehensiveExclusionBookiesSet = new HashSet(excludeBookies); + comprehensiveExclusionBookiesSet.addAll(bookiesInDefaultRack); + LOG.info("enforceMinNumRacksPerWriteQuorum is enabled, so Excluding bookies of defaultRack: {}", + bookiesInDefaultRack); } - nodes.add(bn); - } - return nodes; - } - - private static Set getNetworkLocations(Set bookieNodes) { - Set networkLocs = new HashSet<>(); - for (Node bookieNode : bookieNodes) { - networkLocs.add(bookieNode.getNetworkLocation()); + } else { + comprehensiveExclusionBookiesSet = excludeBookies; } - return networkLocs; + return comprehensiveExclusionBookiesSet; } @Override - public List newEnsemble(int ensembleSize, int writeQuorumSize, int ackQuorumSize, - Map customMetadata, Set excludeBookies) + public PlacementResult> newEnsemble(int ensembleSize, int writeQuorumSize, + int ackQuorumSize, Map customMetadata, Set excludeBookies) throws BKNotEnoughBookiesException { - return newEnsembleInternal(ensembleSize, writeQuorumSize, excludeBookies, null, null); - } - - protected List newEnsembleInternal(int ensembleSize, - int writeQuorumSize, - Set excludeBookies, - Ensemble parentEnsemble, - Predicate parentPredicate) - throws BKNotEnoughBookiesException { - return newEnsembleInternal( - ensembleSize, - writeQuorumSize, - writeQuorumSize, - excludeBookies, - parentEnsemble, - parentPredicate); + rwLock.readLock().lock(); + try { + Set comprehensiveExclusionBookiesSet = addDefaultRackBookiesIfMinNumRacksIsEnforced( + excludeBookies); + PlacementResult> newEnsembleResult = newEnsembleInternal(ensembleSize, + writeQuorumSize, ackQuorumSize, comprehensiveExclusionBookiesSet, null, null); + return newEnsembleResult; + } finally { + rwLock.readLock().unlock(); + } } @Override - public List newEnsemble(int ensembleSize, - int writeQuorumSize, - int ackQuorumSize, - Set excludeBookies, - Ensemble parentEnsemble, - Predicate parentPredicate) + public PlacementResult> newEnsemble(int ensembleSize, + int writeQuorumSize, + int ackQuorumSize, + Set excludeBookies, + Ensemble parentEnsemble, + Predicate parentPredicate) throws BKNotEnoughBookiesException { return newEnsembleInternal( ensembleSize, @@ -518,11 +420,11 @@ public List newEnsemble(int ensembleSize, parentPredicate); } - protected List newEnsembleInternal( + protected PlacementResult> newEnsembleInternal( int ensembleSize, int writeQuorumSize, int ackQuorumSize, - Set excludeBookies, + Set excludeBookies, Ensemble parentEnsemble, Predicate parentPredicate) throws BKNotEnoughBookiesException { rwLock.readLock().lock(); @@ -548,17 +450,15 @@ protected List newEnsembleInternal( } List bns = selectRandom(ensembleSize, excludeNodes, TruePredicate.INSTANCE, ensemble); - ArrayList addrs = new ArrayList(ensembleSize); + ArrayList addrs = new ArrayList(ensembleSize); for (BookieNode bn : bns) { addrs.add(bn.getAddr()); } - return addrs; + return PlacementResult.of(addrs, PlacementPolicyAdherence.FAIL); } - // pick nodes by racks, to ensure there is at least write quorum number of racks. - int idx = 0; - String[] racks = new String[ensembleSize]; + //Choose different rack nodes. + String curRack = null; for (int i = 0; i < ensembleSize; i++) { - String curRack; if (null == prevNode) { if ((null == localNode) || defaultRack.equals(localNode.getNetworkLocation())) { curRack = NodeBase.ROOT; @@ -566,92 +466,68 @@ protected List newEnsembleInternal( curRack = localNode.getNetworkLocation(); } } else { - StringBuilder sb = new StringBuilder(); - sb.append("~"); - - if (writeQuorumSize > 1) { - /* - * RackAwareEnsemblePlacementPolicy should try to select - * bookies from atleast - * minNumRacksPerWriteQuorumForThisEnsemble number of - * different racks for a write quorum. So in a - * WriteQuorum, bookies should be from - * minNumRacksPerWriteQuorumForThisEnsemble number of - * racks. So we would add racks of - * (minNumRacksPerWriteQuorumForThisEnsemble-1) - * neighbours (both sides) to the exclusion list - * (~curRack). - */ - for (int j = 1; j < minNumRacksPerWriteQuorumForThisEnsemble; j++) { - int nextIndex = i + j; - if (nextIndex >= ensembleSize) { - nextIndex %= ensembleSize; - } - /* - * if racks[nextIndex] is null, then it means bookie - * is not yet selected for ensemble at 'nextIndex' - * index. - */ - if (racks[nextIndex] != null) { - if (!((sb.length() == 1) && (sb.charAt(0) == '~'))) { - sb.append(NetworkTopologyImpl.NODE_SEPARATOR); - } - sb.append(racks[nextIndex]); - } - } - - for (int j = 1; j < minNumRacksPerWriteQuorumForThisEnsemble; j++) { - int nextIndex = i - j; - if (nextIndex < 0) { - nextIndex += ensembleSize; - } - /* - * if racks[nextIndex] is null, then it means bookie - * is not yet selected for ensemble at 'nextIndex' - * index. - */ - if (racks[nextIndex] != null) { - if (!((sb.length() == 1) && (sb.charAt(0) == '~'))) { - sb.append(NetworkTopologyImpl.NODE_SEPARATOR); - } - sb.append(racks[nextIndex]); - } - } + if (!curRack.startsWith("~")) { + curRack = "~" + prevNode.getNetworkLocation(); + } else { + curRack = curRack + NetworkTopologyImpl.NODE_SEPARATOR + prevNode.getNetworkLocation(); } - curRack = sb.toString(); } boolean firstBookieInTheEnsemble = (null == prevNode); - prevNode = selectFromNetworkLocation(curRack, excludeNodes, ensemble, ensemble, - !enforceMinNumRacksPerWriteQuorum || firstBookieInTheEnsemble); - racks[i] = prevNode.getNetworkLocation(); + try { + prevNode = selectRandomFromRack(curRack, excludeNodes, ensemble, ensemble); + } catch (BKNotEnoughBookiesException e) { + if (!curRack.equals(NodeBase.ROOT)) { + curRack = NodeBase.ROOT; + prevNode = selectFromNetworkLocation(curRack, excludeNodes, ensemble, ensemble, + !enforceMinNumRacksPerWriteQuorum || firstBookieInTheEnsemble); + } else { + throw e; + } + } } - List bookieList = ensemble.toList(); + List bookieList = ensemble.toList(); if (ensembleSize != bookieList.size()) { LOG.error("Not enough {} bookies are available to form an ensemble : {}.", ensembleSize, bookieList); throw new BKNotEnoughBookiesException(); } - return bookieList; + return PlacementResult.of(bookieList, + isEnsembleAdheringToPlacementPolicy( + bookieList, writeQuorumSize, ackQuorumSize)); } finally { rwLock.readLock().unlock(); } } @Override - public BookieSocketAddress replaceBookie(int ensembleSize, int writeQuorumSize, int ackQuorumSize, - Map customMetadata, Set currentEnsemble, - BookieSocketAddress bookieToReplace, Set excludeBookies) + public PlacementResult replaceBookie(int ensembleSize, int writeQuorumSize, int ackQuorumSize, + Map customMetadata, List currentEnsemble, + BookieId bookieToReplace, Set excludeBookies) throws BKNotEnoughBookiesException { rwLock.readLock().lock(); try { + excludeBookies = addDefaultRackBookiesIfMinNumRacksIsEnforced(excludeBookies); excludeBookies.addAll(currentEnsemble); + + Set ensembleNodes = new HashSet<>(); + Set excludeNodes = new HashSet<>(); BookieNode bn = knownBookies.get(bookieToReplace); if (null == bn) { bn = createBookieNode(bookieToReplace); } - - Set ensembleNodes = convertBookiesToNodes(currentEnsemble); - Set excludeNodes = convertBookiesToNodes(excludeBookies); + for (BookieId bookieId : currentEnsemble) { + if (bookieId.equals(bookieToReplace)) { + continue; + } + ensembleNodes.add(convertBookieToNode(bookieId)); + } + for (BookieId bookieId : excludeBookies) { + if (bookieId.equals(bookieToReplace)) { + excludeNodes.add(bn); + continue; + } + excludeNodes.add(convertBookieToNode(bookieId)); + } excludeNodes.addAll(ensembleNodes); excludeNodes.add(bn); @@ -674,35 +550,21 @@ public BookieSocketAddress replaceBookie(int ensembleSize, int writeQuorumSize, if (LOG.isDebugEnabled()) { LOG.debug("Bookie {} is chosen to replace bookie {}.", candidate, bn); } - return candidate.getAddr(); - } finally { - rwLock.readLock().unlock(); - } - } - - @Override - public void updateBookieInfo(Map bookieInfoMap) { - if (!isWeighted) { - LOG.info("bookieFreeDiskInfo callback called even without weighted placement policy being used."); - return; - } - List allBookies = new ArrayList(knownBookies.values()); - - // create a new map to reflect the new mapping - Map map = new HashMap(); - for (BookieNode bookie : allBookies) { - if (bookieInfoMap.containsKey(bookie.getAddr())) { - map.put(bookie, bookieInfoMap.get(bookie.getAddr())); + BookieId candidateAddr = candidate.getAddr(); + List newEnsemble = new ArrayList(currentEnsemble); + if (currentEnsemble.isEmpty()) { + /* + * in testing code there are test cases which would pass empty + * currentEnsemble + */ + newEnsemble.add(candidateAddr); } else { - map.put(bookie, new BookieInfo()); + newEnsemble.set(currentEnsemble.indexOf(bookieToReplace), candidateAddr); } - } - rwLock.writeLock().lock(); - try { - this.bookieInfoMap = map; - this.weightedSelection.updateMap(this.bookieInfoMap); + return PlacementResult.of(candidateAddr, + isEnsembleAdheringToPlacementPolicy(newEnsemble, writeQuorumSize, ackQuorumSize)); } finally { - rwLock.writeLock().unlock(); + rwLock.readLock().unlock(); } } @@ -725,9 +587,10 @@ public BookieNode selectFromNetworkLocation( networkLoc, excludeBookies); throw e; } - LOG.warn("Failed to choose a bookie from {} : " - + "excluded {}, fallback to choose bookie randomly from the cluster.", - networkLoc, excludeBookies); + LOG.warn("Failed to choose a bookie from network location {}, " + + "the bookies in the network location are {}, excluded bookies {}, " + + "current ensemble {}, fallback to choose bookie randomly from the cluster.", + networkLoc, topology.getLeaves(networkLoc), excludeBookies, ensemble); // randomly choose one from whole cluster, ignore the provided predicate. return selectRandom(1, excludeBookies, predicate, ensemble).get(0); } @@ -750,6 +613,10 @@ public BookieNode selectFromNetworkLocation(String networkLoc, * the whole cluster and exclude the racks specified at * excludeRacks. */ + LOG.warn("Failed to choose a bookie node from network location {}, " + + "the bookies in the network location are {}, excluded bookies {}, " + + "current ensemble {}, fallback to choose bookie randomly from the cluster.", + networkLoc, topology.getLeaves(networkLoc), excludeBookies, ensemble); return selectFromNetworkLocation(excludeRacks, excludeBookies, predicate, ensemble, fallbackToRandom); } } @@ -777,8 +644,7 @@ public BookieNode selectFromNetworkLocation(Set excludeRacks, } try { - return selectRandomInternal(knownNodes, 1, fullExclusionBookiesList, TruePredicate.INSTANCE, - EnsembleForReplacementWithNoConstraints.INSTANCE).get(0); + return selectRandomInternal(knownNodes, 1, fullExclusionBookiesList, predicate, ensemble).get(0); } catch (BKNotEnoughBookiesException e) { if (!fallbackToRandom) { LOG.error( @@ -815,7 +681,8 @@ private WeightedRandomSelection prepareForWeightedSelection(List wRSelection = new WeightedRandomSelection(maxWeightMultiple); + WeightedRandomSelection wRSelection = new WeightedRandomSelectionImpl( + maxWeightMultiple); wRSelection.updateMap(rackMap); return wRSelection; } @@ -921,20 +788,18 @@ protected List selectRandomInternal(List bookiesToSelect throw new BKNotEnoughBookiesException(); } if (wRSelection == null) { - Map rackMap = new HashMap(); - for (BookieNode n : bookiesToSelectFrom) { - if (excludeBookies.contains(n)) { - continue; - } - if (this.bookieInfoMap.containsKey(n)) { - rackMap.put(n, this.bookieInfoMap.get(n)); - } else { - rackMap.put(n, new BookieInfo()); - } + wRSelection = new WeightedRandomSelectionImpl(this.maxWeightMultiple); + } + + Map rackMap = new HashMap(); + for (BookieNode n : bookiesToSelectFrom) { + if (this.bookieInfoMap.containsKey(n)) { + rackMap.put(n, this.bookieInfoMap.get(n)); + } else { + rackMap.put(n, new BookieInfo()); } - wRSelection = new WeightedRandomSelection(this.maxWeightMultiple); - wRSelection.updateMap(rackMap); } + wRSelection.updateMap(rackMap); } else { Collections.shuffle(bookiesToSelectFrom); } @@ -987,7 +852,7 @@ protected List selectRandomInternal(List bookiesToSelect } @Override - public void registerSlowBookie(BookieSocketAddress bookieSocketAddress, long entryId) { + public void registerSlowBookie(BookieId bookieSocketAddress, long entryId) { if (reorderThresholdPendingRequests <= 0) { // only put bookies on slowBookies list if reorderThresholdPendingRequests is *not* set (0); // otherwise, rely on reordering of reads based on reorderThresholdPendingRequests @@ -997,7 +862,7 @@ public void registerSlowBookie(BookieSocketAddress bookieSocketAddress, long ent @Override public DistributionSchedule.WriteSet reorderReadSequence( - List ensemble, + List ensemble, BookiesHealthInfo bookiesHealthInfo, DistributionSchedule.WriteSet writeSet) { Map writeSetWithRegion = new HashMap<>(); @@ -1038,7 +903,7 @@ public DistributionSchedule.WriteSet reorderReadSequence( * @return ordering of bookies to send read to */ DistributionSchedule.WriteSet reorderReadSequenceWithRegion( - List ensemble, + List ensemble, DistributionSchedule.WriteSet writeSet, Map writeSetWithRegion, BookiesHealthInfo bookiesHealthInfo, @@ -1056,7 +921,7 @@ DistributionSchedule.WriteSet reorderReadSequenceWithRegion( isAnyBookieUnavailable = true; } else { for (int i = 0; i < ensemble.size(); i++) { - BookieSocketAddress bookieAddr = ensemble.get(i); + BookieId bookieAddr = ensemble.get(i); if ((!knownBookies.containsKey(bookieAddr) && !readOnlyBookies.contains(bookieAddr)) || slowBookies.getIfPresent(bookieAddr) != null) { // Found at least one bookie not available in the ensemble, or in slowBookies @@ -1110,7 +975,7 @@ DistributionSchedule.WriteSet reorderReadSequenceWithRegion( for (int i = 0; i < writeSet.size(); i++) { int idx = writeSet.get(i); - BookieSocketAddress address = ensemble.get(idx); + BookieId address = ensemble.get(idx); String region = writeSetWithRegion.get(idx); Long lastFailedEntryOnBookie = bookiesHealthInfo.getBookieFailureHistory(address); if (null == knownBookies.get(address)) { @@ -1219,27 +1084,290 @@ DistributionSchedule.WriteSet reorderReadSequenceWithRegion( return writeSet; } - /** - * Shuffle all the entries of an array that matches a mask. - * It assumes all entries with the same mask are contiguous in the array. - */ - static void shuffleWithMask(DistributionSchedule.WriteSet writeSet, - int mask, int bits) { - int first = -1; - int last = -1; - for (int i = 0; i < writeSet.size(); i++) { - if ((writeSet.get(i) & bits) == mask) { - if (first == -1) { - first = i; + // this method should be called in readlock scope of 'rwlock' + @Override + public PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy(List ensembleList, + int writeQuorumSize, int ackQuorumSize) { + if (CollectionUtils.isEmpty(ensembleList)) { + return PlacementPolicyAdherence.FAIL; + } + int ensembleSize = ensembleList.size(); + int minNumRacksPerWriteQuorumForThisEnsemble = Math.min(writeQuorumSize, minNumRacksPerWriteQuorum); + HashSet racksInQuorum = new HashSet(); + BookieId bookie; + for (int i = 0; i < ensembleList.size(); i++) { + racksInQuorum.clear(); + for (int j = 0; j < writeQuorumSize; j++) { + bookie = ensembleList.get((i + j) % ensembleSize); + try { + if (knownBookies.containsKey(bookie)) { + racksInQuorum.add(knownBookies.get(bookie).getNetworkLocation()); + } else if (LOG.isDebugEnabled()) { + LOG.debug("bookie {} is not in the list of knownBookies", bookie); + } + } catch (Exception e) { + /* + * any issue/exception in analyzing whether ensemble is + * strictly adhering to placement policy should be + * swallowed. + */ + LOG.warn("Received exception while trying to get network location of bookie: {}", bookie, e); + } + } + if ((racksInQuorum.size() < minNumRacksPerWriteQuorumForThisEnsemble) + || (enforceMinNumRacksPerWriteQuorum && racksInQuorum.contains(getDefaultRack()))) { + return PlacementPolicyAdherence.FAIL; + } + } + return PlacementPolicyAdherence.MEETS_STRICT; + } + + @Override + public boolean areAckedBookiesAdheringToPlacementPolicy(Set ackedBookies, + int writeQuorumSize, + int ackQuorumSize) { + HashSet rackCounter = new HashSet<>(); + int minWriteQuorumNumRacksPerWriteQuorum = Math.min(writeQuorumSize, minNumRacksPerWriteQuorum); + + ReentrantReadWriteLock.ReadLock readLock = rwLock.readLock(); + readLock.lock(); + try { + for (BookieId bookie : ackedBookies) { + if (knownBookies.containsKey(bookie)) { + rackCounter.add(knownBookies.get(bookie).getNetworkLocation()); + } else if (LOG.isDebugEnabled()) { + LOG.debug("bookie {} is not in the list of knownBookies", bookie); + } + } + + // Check to make sure that ensemble is writing to `minNumberOfRacks`'s number of racks at least. + if (LOG.isDebugEnabled()) { + LOG.debug("areAckedBookiesAdheringToPlacementPolicy returning {} because number of racks = {} and " + + "minNumRacksPerWriteQuorum = {}", + rackCounter.size() >= minNumRacksPerWriteQuorum, + rackCounter.size(), + minNumRacksPerWriteQuorum); + } + } finally { + readLock.unlock(); + } + return rackCounter.size() >= minWriteQuorumNumRacksPerWriteQuorum; + } + + @Override + public PlacementResult> replaceToAdherePlacementPolicy( + int ensembleSize, + int writeQuorumSize, + int ackQuorumSize, + Set excludeBookies, + List currentEnsemble) { + rwLock.readLock().lock(); + try { + PlacementPolicyAdherence currentPlacementAdherence = isEnsembleAdheringToPlacementPolicy( + currentEnsemble, writeQuorumSize, ackQuorumSize); + if (PlacementPolicyAdherence.FAIL != currentPlacementAdherence) { + return PlacementResult.of(new ArrayList<>(currentEnsemble), currentPlacementAdherence); + } + for (BookieId bookieId : currentEnsemble) { + if (!knownBookies.containsKey(bookieId)) { + excludeBookies.add(bookieId); + } + } + int minNumRacksPerWriteQuorumForThisEnsemble = Math.min(writeQuorumSize, minNumRacksPerWriteQuorum); + int numRacks = topology.getNumOfRacks(); + // only one rack or less than minNumRacksPerWriteQuorumForThisEnsemble, stop calculation to skip relocation + if (numRacks < 2 || numRacks < minNumRacksPerWriteQuorumForThisEnsemble) { + LOG.warn("Skip ensemble relocation because the cluster has only {} rack.", numRacks); + return PlacementResult.of(Collections.emptyList(), PlacementPolicyAdherence.FAIL); + } + PlacementResult> placementResult = PlacementResult.of(Collections.emptyList(), + PlacementPolicyAdherence.FAIL); + int minDiffer = Integer.MAX_VALUE; + for (int i = 0; i < currentEnsemble.size(); i++) { + PlacementResult> result = doReplaceToAdherePlacementPolicy(ensembleSize, + writeQuorumSize, ackQuorumSize, excludeBookies, currentEnsemble, i); + if (PlacementPolicyAdherence.FAIL == result.getAdheringToPolicy()) { + continue; + } + int differ = differBetweenBookies(currentEnsemble, result.getResult()); + if (differ < minDiffer) { + minDiffer = differ; + placementResult = result; + if (minDiffer == 1) { + break; + } + } + } + return placementResult; + } finally { + rwLock.readLock().unlock(); + } + } + + private PlacementResult> doReplaceToAdherePlacementPolicy( + int ensembleSize, + int writeQuorumSize, + int ackQuorumSize, + Set excludeBookies, + List currentEnsemble, + int startIndex) { + final List provisionalEnsembleNodes = currentEnsemble.stream() + .map(this::convertBookieToNode).collect(Collectors.toList()); + final Set excludeNodes = convertBookiesToNodes( + addDefaultRackBookiesIfMinNumRacksIsEnforced(excludeBookies)); + int minNumRacksPerWriteQuorumForThisEnsemble = Math.min(writeQuorumSize, minNumRacksPerWriteQuorum); + final RRTopologyAwareCoverageEnsemble ensemble = + new RRTopologyAwareCoverageEnsemble( + ensembleSize, + writeQuorumSize, + ackQuorumSize, + RACKNAME_DISTANCE_FROM_LEAVES, + null, + null, + minNumRacksPerWriteQuorumForThisEnsemble); + BookieNode prevNode = null; + final BookieNode firstNode = provisionalEnsembleNodes.get(startIndex); + // use same bookie at first to reduce ledger replication + if (!excludeNodes.contains(firstNode) && ensemble.apply(firstNode, ensemble) + && ensemble.addNode(firstNode)) { + excludeNodes.add(firstNode); + prevNode = firstNode; + } + for (int i = prevNode == null ? 0 : 1; i < ensembleSize; i++) { + int index = (startIndex + i) % ensembleSize; + final String curRack; + if (null == prevNode) { + if ((null == localNode) || defaultRack.equals(localNode.getNetworkLocation())) { + curRack = NodeBase.ROOT; + } else { + curRack = localNode.getNetworkLocation(); + } + } else { + curRack = NetworkTopologyImpl.INVERSE + prevNode.getNetworkLocation(); + } + try { + prevNode = replaceToAdherePlacementPolicyInternal( + curRack, excludeNodes, ensemble, ensemble, + provisionalEnsembleNodes, index, ensembleSize, minNumRacksPerWriteQuorumForThisEnsemble); + // got a good candidate + if (ensemble.addNode(prevNode)) { + // add the candidate to exclude set + excludeNodes.add(prevNode); + } else { + throw new BKNotEnoughBookiesException(); } - last = i; + // replace to newer node + provisionalEnsembleNodes.set(index, prevNode); + } catch (BKNotEnoughBookiesException e) { + LOG.warn("Skip ensemble relocation because the cluster has not enough bookies."); + return PlacementResult.of(Collections.emptyList(), PlacementPolicyAdherence.FAIL); } } - if (first != -1) { - for (int i = last + 1; i > first; i--) { - int swapWith = ThreadLocalRandom.current().nextInt(i); - writeSet.set(swapWith, writeSet.set(i, writeSet.get(swapWith))); + List bookieList = ensemble.toList(); + if (ensembleSize != bookieList.size()) { + LOG.warn("Not enough {} bookies are available to form an ensemble : {}.", + ensembleSize, bookieList); + return PlacementResult.of(Collections.emptyList(), PlacementPolicyAdherence.FAIL); + } + PlacementPolicyAdherence placementPolicyAdherence = isEnsembleAdheringToPlacementPolicy(bookieList, + writeQuorumSize, ackQuorumSize); + if (PlacementPolicyAdherence.FAIL == placementPolicyAdherence) { + return PlacementResult.of(Collections.emptyList(), PlacementPolicyAdherence.FAIL); + } + return PlacementResult.of(revertBookieListByIndex(bookieList, startIndex), placementPolicyAdherence); + } + + private List revertBookieListByIndex(List bookies, int startIndex) { + BookieId[] bookieIds = new BookieId[bookies.size()]; + for (int i = 0; i < bookies.size(); i++) { + if (startIndex == bookies.size()) { + startIndex = 0; + } + bookieIds[startIndex++] = bookies.get(i); + } + return Lists.newArrayList(bookieIds); + } + + private BookieNode replaceToAdherePlacementPolicyInternal( + String netPath, Set excludeBookies, Predicate predicate, + Ensemble ensemble, List provisionalEnsembleNodes, int ensembleIndex, + int ensembleSize, int minNumRacksPerWriteQuorumForThisEnsemble) throws BKNotEnoughBookiesException { + final BookieNode currentNode = provisionalEnsembleNodes.get(ensembleIndex); + // if the current bookie could be applied to the ensemble, apply it to minify the number of bookies replaced + if (!excludeBookies.contains(currentNode) && predicate.apply(currentNode, ensemble)) { + return currentNode; + } + final List>> conditionList = new ArrayList<>(); + final Set preExcludeRacks = new HashSet<>(); + final Set postExcludeRacks = new HashSet<>(); + for (int i = 0; i < minNumRacksPerWriteQuorumForThisEnsemble - 1; i++) { + preExcludeRacks.add(provisionalEnsembleNodes.get(Math.floorMod((ensembleIndex - i - 1), ensembleSize)) + .getNetworkLocation()); + postExcludeRacks.add(provisionalEnsembleNodes.get(Math.floorMod((ensembleIndex + i + 1), ensembleSize)) + .getNetworkLocation()); + } + // adhere minNumRacksPerWriteQuorum by preExcludeRacks + // avoid additional replace from write quorum candidates by preExcludeRacks and postExcludeRacks + // avoid to use first candidate bookies for election by provisionalEnsembleNodes + conditionList.add(Pair.of( + NetworkTopologyImpl.INVERSE + String.join(",", + Stream.concat(preExcludeRacks.stream(), postExcludeRacks.stream()).collect(Collectors.toSet())), + provisionalEnsembleNodes + )); + // avoid to use same rack between previous index by netPath + // avoid to use first candidate bookies for election by provisionalEnsembleNodes + conditionList.add(Pair.of(netPath, provisionalEnsembleNodes)); + // avoid to use same rack between previous index by netPath + conditionList.add(Pair.of(netPath, Collections.emptyList())); + + for (Pair> condition : conditionList) { + WeightedRandomSelection wRSelection = null; + + final List leaves = new ArrayList<>(topology.getLeaves(condition.getLeft())); + if (!isWeighted) { + Collections.shuffle(leaves); + } else { + if (CollectionUtils.subtract(leaves, excludeBookies).size() < 1) { + throw new BKNotEnoughBookiesException(); + } + wRSelection = prepareForWeightedSelection(leaves); + if (wRSelection == null) { + throw new BKNotEnoughBookiesException(); + } + } + + final Iterator it = leaves.iterator(); + final Set bookiesSeenSoFar = new HashSet<>(); + while (true) { + Node n; + if (isWeighted) { + if (bookiesSeenSoFar.size() == leaves.size()) { + // Don't loop infinitely. + break; + } + n = wRSelection.getNextRandom(); + bookiesSeenSoFar.add(n); + } else { + if (it.hasNext()) { + n = it.next(); + } else { + break; + } + } + if (excludeBookies.contains(n)) { + continue; + } + if (!(n instanceof BookieNode) || !predicate.apply((BookieNode) n, ensemble)) { + continue; + } + // additional excludeBookies + if (condition.getRight().contains(n)) { + continue; + } + BookieNode bn = (BookieNode) n; + return bn; } } + throw new BKNotEnoughBookiesException(); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ReadLastConfirmedAndEntryOp.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ReadLastConfirmedAndEntryOp.java index e61e666e1f7..35ba2823365 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ReadLastConfirmedAndEntryOp.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ReadLastConfirmedAndEntryOp.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -25,15 +25,17 @@ import java.util.BitSet; import java.util.List; import java.util.concurrent.Callable; +import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.client.impl.LedgerEntryImpl; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.common.util.MathUtils; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookieProtocol; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks; import org.apache.bookkeeper.proto.ReadLastConfirmedAndEntryContext; import org.apache.bookkeeper.proto.checksum.DigestManager; -import org.apache.bookkeeper.util.MathUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -63,7 +65,8 @@ class ReadLastConfirmedAndEntryOp implements BookkeeperInternalCallbacks.ReadEnt private final long prevEntryId; private long lastAddConfirmed; private long timeOutInMillis; - private final List currentEnsemble; + private final List currentEnsemble; + private ScheduledFuture speculativeTask = null; abstract class ReadLACAndEntryRequest implements AutoCloseable { @@ -73,12 +76,12 @@ abstract class ReadLACAndEntryRequest implements AutoCloseable { int firstError = BKException.Code.OK; int numMissedEntryReads = 0; - final List ensemble; + final List ensemble; final DistributionSchedule.WriteSet writeSet; final DistributionSchedule.WriteSet orderedEnsemble; final LedgerEntryImpl entryImpl; - ReadLACAndEntryRequest(List ensemble, long lId, long eId) { + ReadLACAndEntryRequest(List ensemble, long lId, long eId) { this.entryImpl = LedgerEntryImpl.create(lId, eId); this.ensemble = ensemble; this.writeSet = lh.getDistributionSchedule().getEnsembleSet(eId); @@ -90,6 +93,7 @@ abstract class ReadLACAndEntryRequest implements AutoCloseable { } } + @Override public void close() { entryImpl.close(); } @@ -115,7 +119,7 @@ synchronized int getFirstError() { * @return return true if we managed to complete the entry; * otherwise return false if the read entry is not complete or it is already completed before */ - boolean complete(int bookieIndex, BookieSocketAddress host, final ByteBuf buffer, long entryId) { + boolean complete(int bookieIndex, BookieId host, final ByteBuf buffer, long entryId) { ByteBuf content; try { content = lh.getDigestManager().verifyDigestAndReturnData(entryId, buffer); @@ -186,7 +190,7 @@ private synchronized void translateAndSetFirstError(int rc) { * @param rc * read result code */ - synchronized void logErrorAndReattemptRead(int bookieIndex, BookieSocketAddress host, String errMsg, int rc) { + synchronized void logErrorAndReattemptRead(int bookieIndex, BookieId host, String errMsg, int rc) { translateAndSetFirstError(rc); if (BKException.Code.NoSuchEntryException == rc || BKException.Code.NoSuchLedgerExistsException == rc) { @@ -213,7 +217,7 @@ synchronized void logErrorAndReattemptRead(int bookieIndex, BookieSocketAddress * the set of hosts that we already received responses. * @return host we sent to if we sent. null otherwise. */ - abstract BookieSocketAddress maybeSendSpeculativeRead(BitSet heardFromHostsBitSet); + abstract BookieId maybeSendSpeculativeRead(BitSet heardFromHostsBitSet); /** * Whether the read request completed. @@ -243,7 +247,7 @@ class ParallelReadRequest extends ReadLACAndEntryRequest { int numPendings; - ParallelReadRequest(List ensemble, long lId, long eId) { + ParallelReadRequest(List ensemble, long lId, long eId) { super(ensemble, lId, eId); numPendings = orderedEnsemble.size(); } @@ -251,7 +255,7 @@ class ParallelReadRequest extends ReadLACAndEntryRequest { @Override void read() { for (int i = 0; i < orderedEnsemble.size(); i++) { - BookieSocketAddress to = ensemble.get(orderedEnsemble.get(i)); + BookieId to = ensemble.get(orderedEnsemble.get(i)); try { sendReadTo(orderedEnsemble.get(i), to, this); } catch (InterruptedException ie) { @@ -264,7 +268,7 @@ void read() { } @Override - synchronized void logErrorAndReattemptRead(int bookieIndex, BookieSocketAddress host, String errMsg, int rc) { + synchronized void logErrorAndReattemptRead(int bookieIndex, BookieId host, String errMsg, int rc) { super.logErrorAndReattemptRead(bookieIndex, host, errMsg, rc); --numPendings; // if received all responses or this entry doesn't meet quorum write, complete the request. @@ -279,7 +283,7 @@ synchronized void logErrorAndReattemptRead(int bookieIndex, BookieSocketAddress } @Override - BookieSocketAddress maybeSendSpeculativeRead(BitSet heardFromHostsBitSet) { + BookieId maybeSendSpeculativeRead(BitSet heardFromHostsBitSet) { // no speculative read return null; } @@ -293,7 +297,7 @@ class SequenceReadRequest extends ReadLACAndEntryRequest { final BitSet erroredReplicas; final BitSet emptyResponseReplicas; - SequenceReadRequest(List ensemble, long lId, long eId) { + SequenceReadRequest(List ensemble, long lId, long eId) { super(ensemble, lId, eId); this.sentReplicas = new BitSet(orderedEnsemble.size()); @@ -331,7 +335,7 @@ private boolean readsOutstanding() { * @return host we sent to if we sent. null otherwise. */ @Override - synchronized BookieSocketAddress maybeSendSpeculativeRead(BitSet heardFrom) { + synchronized BookieId maybeSendSpeculativeRead(BitSet heardFrom) { if (nextReplicaIndexToReadFrom >= getLedgerMetadata().getEnsembleSize()) { return null; } @@ -353,7 +357,7 @@ void read() { sendNextRead(); } - synchronized BookieSocketAddress sendNextRead() { + synchronized BookieId sendNextRead() { if (nextReplicaIndexToReadFrom >= getLedgerMetadata().getEnsembleSize()) { // we are done, the read has failed from all replicas, just fail the // read @@ -374,7 +378,7 @@ synchronized BookieSocketAddress sendNextRead() { nextReplicaIndexToReadFrom++; try { - BookieSocketAddress to = ensemble.get(bookieIndex); + BookieId to = ensemble.get(bookieIndex); sendReadTo(bookieIndex, to, this); sentReplicas.set(replica); return to; @@ -387,7 +391,7 @@ synchronized BookieSocketAddress sendNextRead() { } @Override - synchronized void logErrorAndReattemptRead(int bookieIndex, BookieSocketAddress host, String errMsg, int rc) { + synchronized void logErrorAndReattemptRead(int bookieIndex, BookieId host, String errMsg, int rc) { super.logErrorAndReattemptRead(bookieIndex, host, errMsg, rc); int replica = getReplicaIndex(bookieIndex); @@ -408,7 +412,7 @@ synchronized void logErrorAndReattemptRead(int bookieIndex, BookieSocketAddress } @Override - boolean complete(int bookieIndex, BookieSocketAddress host, ByteBuf buffer, long entryId) { + boolean complete(int bookieIndex, BookieId host, ByteBuf buffer, long entryId) { boolean completed = super.complete(bookieIndex, host, buffer, entryId); if (completed) { int numReplicasTried = getNextReplicaIndexToReadFrom(); @@ -416,7 +420,7 @@ boolean complete(int bookieIndex, BookieSocketAddress host, ByteBuf buffer, long // first speculative read as slow for (int i = 0; i < numReplicasTried; i++) { int slowBookieIndex = orderedEnsemble.get(i); - BookieSocketAddress slowBookieSocketAddress = ensemble.get(slowBookieIndex); + BookieId slowBookieSocketAddress = ensemble.get(slowBookieIndex); clientCtx.getPlacementPolicy().registerSlowBookie(slowBookieSocketAddress, entryId); } } @@ -426,7 +430,7 @@ boolean complete(int bookieIndex, BookieSocketAddress host, ByteBuf buffer, long ReadLastConfirmedAndEntryOp(LedgerHandle lh, ClientContext clientCtx, - List ensemble, + List ensemble, LastConfirmedAndEntryCallback cb, long prevEntryId, long timeOutInMillis) { @@ -461,6 +465,12 @@ ReadLastConfirmedAndEntryOp parallelRead(boolean enabled) { return this; } + protected void cancelSpeculativeTask(boolean mayInterruptIfRunning) { + if (speculativeTask != null) { + speculativeTask.cancel(mayInterruptIfRunning); + speculativeTask = null; + } + } /** * Speculative Read Logic. */ @@ -491,12 +501,12 @@ public void initiate() { request.read(); if (!parallelRead && clientCtx.getConf().readLACSpeculativeRequestPolicy.isPresent()) { - clientCtx.getConf().readLACSpeculativeRequestPolicy.get() + speculativeTask = clientCtx.getConf().readLACSpeculativeRequestPolicy.get() .initiateSpeculativeRequest(clientCtx.getScheduler(), this); } } - void sendReadTo(int bookieIndex, BookieSocketAddress to, ReadLACAndEntryRequest entry) throws InterruptedException { + void sendReadTo(int bookieIndex, BookieId to, ReadLACAndEntryRequest entry) throws InterruptedException { if (LOG.isDebugEnabled()) { LOG.debug("Calling Read LAC and Entry with {} and long polling interval {} on Bookie {} - Parallel {}", prevEntryId, timeOutInMillis, to, parallelRead); @@ -521,6 +531,7 @@ interface LastConfirmedAndEntryCallback { private void submitCallback(int rc) { long latencyMicros = MathUtils.elapsedMicroSec(requestTimeNano); LedgerEntry entry; + cancelSpeculativeTask(true); if (BKException.Code.OK != rc) { clientCtx.getClientStats().getReadLacAndEntryOpLogger() .registerFailedEvent(latencyMicros, TimeUnit.MICROSECONDS); @@ -546,7 +557,7 @@ public void readEntryComplete(int rc, long ledgerId, long entryId, ByteBuf buffe getClass().getName(), ledgerId, entryId, rc); } ReadLastConfirmedAndEntryContext rCtx = (ReadLastConfirmedAndEntryContext) ctx; - BookieSocketAddress bookie = rCtx.getBookieAddress(); + BookieId bookie = rCtx.getBookieAddress(); numResponsesPending--; if (BKException.Code.OK == rc) { if (LOG.isTraceEnabled()) { diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ReadLastConfirmedOp.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ReadLastConfirmedOp.java index dfbc1672f00..4e30c7231eb 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ReadLastConfirmedOp.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ReadLastConfirmedOp.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,14 +17,15 @@ */ package org.apache.bookkeeper.client; +import com.google.common.annotations.VisibleForTesting; import io.netty.buffer.ByteBuf; import java.util.List; - import org.apache.bookkeeper.client.BKException.BKDigestMatchException; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookieClient; import org.apache.bookkeeper.proto.BookieProtocol; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadEntryCallback; +import org.apache.bookkeeper.proto.checksum.DigestManager; import org.apache.bookkeeper.proto.checksum.DigestManager.RecoveryData; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,16 +36,18 @@ */ class ReadLastConfirmedOp implements ReadEntryCallback { static final Logger LOG = LoggerFactory.getLogger(ReadLastConfirmedOp.class); - LedgerHandle lh; - BookieClient bookieClient; - int numResponsesPending; - RecoveryData maxRecoveredData; - volatile boolean completed = false; - int lastSeenError = BKException.Code.ReadException; - - LastConfirmedDataCallback cb; - final DistributionSchedule.QuorumCoverageSet coverageSet; - final List currentEnsemble; + private final long ledgerId; + private final byte[] ledgerKey; + private final BookieClient bookieClient; + private final DigestManager digestManager; + private int numResponsesPending; + private RecoveryData maxRecoveredData; + private volatile boolean completed = false; + private int lastSeenError = BKException.Code.ReadException; + + private final LastConfirmedDataCallback cb; + private final DistributionSchedule.QuorumCoverageSet coverageSet; + private final List currentEnsemble; /** * Wrapper to get all recovered data from the request. @@ -53,21 +56,28 @@ interface LastConfirmedDataCallback { void readLastConfirmedDataComplete(int rc, RecoveryData data); } - public ReadLastConfirmedOp(LedgerHandle lh, BookieClient bookieClient, - List ensemble, LastConfirmedDataCallback cb) { + public ReadLastConfirmedOp(BookieClient bookieClient, + DistributionSchedule schedule, + DigestManager digestManager, + long ledgerId, + List ensemble, + byte[] ledgerKey, + LastConfirmedDataCallback cb) { this.cb = cb; this.bookieClient = bookieClient; this.maxRecoveredData = new RecoveryData(LedgerHandle.INVALID_ENTRY_ID, 0); - this.lh = lh; - this.numResponsesPending = lh.getLedgerMetadata().getEnsembleSize(); - this.coverageSet = lh.distributionSchedule.getCoverageSet(); + this.numResponsesPending = ensemble.size(); + this.coverageSet = schedule.getCoverageSet(); this.currentEnsemble = ensemble; + this.ledgerId = ledgerId; + this.ledgerKey = ledgerKey; + this.digestManager = digestManager; } public void initiate() { for (int i = 0; i < currentEnsemble.size(); i++) { bookieClient.readEntry(currentEnsemble.get(i), - lh.ledgerId, + ledgerId, BookieProtocol.LAST_ADD_CONFIRMED, this, i, BookieProtocol.FLAG_NONE); } @@ -76,13 +86,14 @@ public void initiate() { public void initiateWithFencing() { for (int i = 0; i < currentEnsemble.size(); i++) { bookieClient.readEntry(currentEnsemble.get(i), - lh.ledgerId, + ledgerId, BookieProtocol.LAST_ADD_CONFIRMED, this, i, BookieProtocol.FLAG_DO_FENCING, - lh.ledgerKey); + ledgerKey); } } + @Override public synchronized void readEntryComplete(final int rc, final long ledgerId, final long entryId, final ByteBuf buffer, final Object ctx) { int bookieIndex = (Integer) ctx; @@ -94,7 +105,7 @@ public synchronized void readEntryComplete(final int rc, final long ledgerId, fi boolean heardValidResponse = false; if (rc == BKException.Code.OK) { try { - RecoveryData recoveryData = lh.macManager.verifyDigestAndReturnLastConfirmed(buffer); + RecoveryData recoveryData = digestManager.verifyDigestAndReturnLastConfirmed(buffer); if (recoveryData.getLastAddConfirmed() > maxRecoveredData.getLastAddConfirmed()) { maxRecoveredData = recoveryData; } @@ -137,10 +148,15 @@ public synchronized void readEntryComplete(final int rc, final long ledgerId, fi } if (numResponsesPending == 0 && !completed) { - // Have got all responses back but was still not enough, just fail the operation - LOG.error("While readLastConfirmed ledger: {} did not hear success responses from all quorums", ledgerId); + LOG.error("While readLastConfirmed ledger: {} did not hear success responses from all quorums, {}", + ledgerId, coverageSet); cb.readLastConfirmedDataComplete(lastSeenError, maxRecoveredData); } } + + @VisibleForTesting + synchronized int getNumResponsesPending() { + return numResponsesPending; + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ReadOnlyLedgerHandle.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ReadOnlyLedgerHandle.java index e9f790015fc..9e883a8246a 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ReadOnlyLedgerHandle.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ReadOnlyLedgerHandle.java @@ -20,19 +20,32 @@ */ package org.apache.bookkeeper.client; +import static com.google.common.base.Preconditions.checkState; + +import com.google.common.annotations.VisibleForTesting; import java.security.GeneralSecurityException; +import java.util.List; import java.util.Map; +import java.util.NavigableMap; +import java.util.Set; +import java.util.TreeMap; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.RejectedExecutionException; - import org.apache.bookkeeper.client.AsyncCallback.AddCallback; import org.apache.bookkeeper.client.AsyncCallback.CloseCallback; import org.apache.bookkeeper.client.AsyncCallback.ReadCallback; import org.apache.bookkeeper.client.AsyncCallback.ReadLastConfirmedCallback; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.client.api.WriteFlag; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.LedgerMetadataListener; -import org.apache.bookkeeper.util.SafeRunnable; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadEntryListener; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.TimedGenericCallback; import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Read only ledger handle. This ledger handle allows you to @@ -41,28 +54,29 @@ * It should be returned for BookKeeper#openLedger operations. */ class ReadOnlyLedgerHandle extends LedgerHandle implements LedgerMetadataListener { + private static final Logger LOG = LoggerFactory.getLogger(ReadOnlyLedgerHandle.class); - class MetadataUpdater extends SafeRunnable { + private Object metadataLock = new Object(); + private final NavigableMap> newEnsemblesFromRecovery = new TreeMap<>(); - final LedgerMetadata newMetadata; + class MetadataUpdater implements Runnable { - MetadataUpdater(LedgerMetadata metadata) { + final Versioned newMetadata; + + MetadataUpdater(Versioned metadata) { this.newMetadata = metadata; } @Override - public void safeRun() { + public void run() { while (true) { - LedgerMetadata currentMetadata = getLedgerMetadata(); + Versioned currentMetadata = getVersionedLedgerMetadata(); Version.Occurred occurred = currentMetadata.getVersion().compare(newMetadata.getVersion()); if (Version.Occurred.BEFORE == occurred) { - LOG.info("Updated ledger metadata for ledger {} to {}.", ledgerId, newMetadata.toSafeString()); synchronized (ReadOnlyLedgerHandle.this) { - if (newMetadata.isClosed()) { - ReadOnlyLedgerHandle.this.lastAddConfirmed = newMetadata.getLastEntryId(); - ReadOnlyLedgerHandle.this.length = newMetadata.getLength(); - } if (setLedgerMetadata(currentMetadata, newMetadata)) { + LOG.info("Updated ledger metadata for ledger {} to {}, version {}.", + ledgerId, newMetadata.getValue().toSafeString(), newMetadata.getVersion()); break; } } @@ -79,7 +93,7 @@ public String toString() { } ReadOnlyLedgerHandle(ClientContext clientCtx, - long ledgerId, LedgerMetadata metadata, + long ledgerId, Versioned metadata, BookKeeper.DigestType digestType, byte[] password, boolean watch) throws GeneralSecurityException, NumberFormatException { @@ -128,30 +142,7 @@ public void asyncAddEntry(final byte[] data, final int offset, final int length, } @Override - void handleBookieFailure(final Map failedBookies) { - blockAddCompletions.incrementAndGet(); - synchronized (getLedgerMetadata()) { - try { - EnsembleInfo ensembleInfo = replaceBookieInMetadata(failedBookies, - numEnsembleChanges.incrementAndGet()); - if (ensembleInfo.replacedBookies.isEmpty()) { - blockAddCompletions.decrementAndGet(); - return; - } - blockAddCompletions.decrementAndGet(); - // the failed bookie has been replaced - unsetSuccessAndSendWriteRequest(ensembleInfo.newEnsemble, ensembleInfo.replacedBookies); - } catch (BKException.BKNotEnoughBookiesException e) { - LOG.error("Could not get additional bookie to " - + "remake ensemble, closing ledger: " + ledgerId); - handleUnrecoverableErrorDuringAdd(e.getCode()); - return; - } - } - } - - @Override - public void onChanged(long lid, LedgerMetadata newMetadata) { + public void onChanged(long lid, Versioned newMetadata) { if (LOG.isDebugEnabled()) { LOG.debug("Received ledger metadata update on {} : {}", lid, newMetadata); } @@ -161,7 +152,7 @@ public void onChanged(long lid, LedgerMetadata newMetadata) { if (null == newMetadata) { return; } - LedgerMetadata currentMetadata = getLedgerMetadata(); + Versioned currentMetadata = getVersionedLedgerMetadata(); Version.Occurred occurred = currentMetadata.getVersion().compare(newMetadata.getVersion()); if (LOG.isDebugEnabled()) { LOG.debug("Try to update metadata from {} to {} : {}", @@ -183,7 +174,8 @@ public String toString() { } @Override - protected void initializeExplicitLacFlushPolicy() { + protected void initializeWriteHandleState() { + // Essentially a noop, we don't want to set up write handle state here for a ReadOnlyLedgerHandle explicitLacFlushPolicy = ExplicitLacFlushPolicy.VOID_EXPLICITLAC_FLUSH_POLICY; } @@ -207,4 +199,153 @@ public void readLastConfirmedComplete(int rc, long lastConfirmed, Object ctx) { } }, ctx); } + + /** + * For a read only ledger handle, this method will only ever be called during recovery, + * when we are reading forward from LAC and writing back those entries. As such, + * unlike with LedgerHandle, we do not want to persist changes to the metadata as they occur, + * but rather, we want to defer the persistence until recovery has completed, and do it all + * on the close. + */ + @Override + void handleBookieFailure(final Map failedBookies) { + // handleBookieFailure should always run in the ordered executor thread for this + // ledger, so this synchronized should be unnecessary, but putting it here now + // just in case (can be removed when we validate threads) + synchronized (metadataLock) { + String logContext = String.format("[RecoveryEnsembleChange(ledger:%d)]", ledgerId); + + long lac = getLastAddConfirmed(); + LedgerMetadata metadata = getLedgerMetadata(); + List currentEnsemble = getCurrentEnsemble(); + try { + List newEnsemble = EnsembleUtils.replaceBookiesInEnsemble( + clientCtx.getBookieWatcher(), metadata, currentEnsemble, failedBookies, logContext); + Set replaced = EnsembleUtils.diffEnsemble(currentEnsemble, newEnsemble); + if (!replaced.isEmpty()) { + newEnsemblesFromRecovery.put(lac + 1, newEnsemble); + unsetSuccessAndSendWriteRequest(newEnsemble, replaced); + } + } catch (BKException.BKNotEnoughBookiesException e) { + LOG.error("Could not get additional bookie to remake ensemble, closing ledger: {}", ledgerId); + + handleUnrecoverableErrorDuringAdd(e.getCode()); + return; + } + } + } + + @Override + void handleUnrecoverableErrorDuringAdd(int rc) { + errorOutPendingAdds(rc); + } + + void recover(GenericCallback finalCb) { + recover(finalCb, null, false); + } + + /** + * Recover the ledger. + * + * @param finalCb + * callback after recovery is done. + * @param listener + * read entry listener on recovery reads. + * @param forceRecovery + * force the recovery procedure even the ledger metadata shows the ledger is closed. + */ + void recover(GenericCallback finalCb, + final @VisibleForTesting ReadEntryListener listener, + final boolean forceRecovery) { + final GenericCallback cb = new TimedGenericCallback( + finalCb, + BKException.Code.OK, + clientCtx.getClientStats().getRecoverOpLogger()); + + MetadataUpdateLoop.NeedsUpdatePredicate needsUpdate = + (metadata) -> metadata.getState() == LedgerMetadata.State.OPEN; + if (forceRecovery) { + // in the force recovery case, we want to update the metadata + // to IN_RECOVERY, even if the ledger is already closed + needsUpdate = (metadata) -> metadata.getState() != LedgerMetadata.State.IN_RECOVERY; + } + new MetadataUpdateLoop( + clientCtx.getLedgerManager(), getId(), + this::getVersionedLedgerMetadata, + needsUpdate, + (metadata) -> LedgerMetadataBuilder.from(metadata).withInRecoveryState().build(), + this::setLedgerMetadata) + .run() + .thenCompose((metadata) -> { + if (metadata.getValue().isClosed()) { + return CompletableFuture.completedFuture(ReadOnlyLedgerHandle.this); + } else { + return new LedgerRecoveryOp(ReadOnlyLedgerHandle.this, clientCtx) + .setEntryListener(listener) + .initiate(); + } + }) + .thenCompose((ignore) -> closeRecovered()) + .whenComplete((ignore, ex) -> { + if (ex != null) { + cb.operationComplete( + BKException.getExceptionCode(ex, BKException.Code.UnexpectedConditionException), null); + } else { + cb.operationComplete(BKException.Code.OK, null); + } + }); + } + + CompletableFuture> closeRecovered() { + long lac, len; + synchronized (this) { + lac = lastAddConfirmed; + len = length; + } + LOG.info("Closing recovered ledger {} at entry {}", getId(), lac); + CompletableFuture> f = new MetadataUpdateLoop( + clientCtx.getLedgerManager(), getId(), + this::getVersionedLedgerMetadata, + (metadata) -> metadata.getState() == LedgerMetadata.State.IN_RECOVERY, + (metadata) -> { + LedgerMetadataBuilder builder = LedgerMetadataBuilder.from(metadata); + Long lastEnsembleKey = LedgerMetadataUtils.getLastEnsembleKey(metadata); + synchronized (metadataLock) { + newEnsemblesFromRecovery.entrySet().forEach( + (e) -> { + checkState(e.getKey() >= lastEnsembleKey, + "Once a ledger is in recovery, noone can add ensembles without closing"); + // Occurs when a bookie need to be replaced at very start of recovery + if (lastEnsembleKey.equals(e.getKey())) { + builder.replaceEnsembleEntry(e.getKey(), e.getValue()); + } else { + builder.newEnsembleEntry(e.getKey(), e.getValue()); + } + }); + } + return builder.withClosedState().withLastEntryId(lac).withLength(len).build(); + }, + this::setLedgerMetadata).run(); + f.whenComplete((result, exception) -> { + synchronized (metadataLock) { + newEnsemblesFromRecovery.clear(); + } + if (exception != null) { + LOG.error("When closeRecovered,failed on clearing newEnsemblesFromRecovery.", exception); + } + }); + return f; + } + + @Override + List getCurrentEnsemble() { + synchronized (metadataLock) { + if (!newEnsemblesFromRecovery.isEmpty()) { + return newEnsemblesFromRecovery.lastEntry().getValue(); + } else { + return super.getCurrentEnsemble(); + } + } + } + } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ReadOpBase.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ReadOpBase.java new file mode 100644 index 00000000000..cbd68ec657a --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ReadOpBase.java @@ -0,0 +1,293 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.client; + +import com.google.common.util.concurrent.ListenableFuture; +import java.util.BitSet; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.bookkeeper.client.api.LedgerEntries; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class ReadOpBase implements Runnable { + + private static final Logger LOG = LoggerFactory.getLogger(ReadOpBase.class); + + protected ScheduledFuture speculativeTask = null; + protected final CompletableFuture future; + protected final Set heardFromHosts; + protected final BitSet heardFromHostsBitSet; + protected final Set sentToHosts = new HashSet(); + LedgerHandle lh; + protected ClientContext clientCtx; + + protected final long startEntryId; + protected long requestTimeNanos; + + protected final int requiredBookiesMissingEntryForRecovery; + protected final boolean isRecoveryRead; + + protected final AtomicBoolean complete = new AtomicBoolean(false); + protected boolean allowFailFast = false; + long numPendingEntries; + final long endEntryId; + protected ReadOpBase(LedgerHandle lh, ClientContext clientCtx, long startEntryId, long endEntryId, + boolean isRecoveryRead) { + this.lh = lh; + this.future = new CompletableFuture<>(); + this.startEntryId = startEntryId; + this.endEntryId = endEntryId; + this.isRecoveryRead = isRecoveryRead; + this.requiredBookiesMissingEntryForRecovery = getLedgerMetadata().getWriteQuorumSize() + - getLedgerMetadata().getAckQuorumSize() + 1; + this.heardFromHosts = new HashSet<>(); + this.heardFromHostsBitSet = new BitSet(getLedgerMetadata().getEnsembleSize()); + this.allowFailFast = false; + this.clientCtx = clientCtx; + } + + protected LedgerMetadata getLedgerMetadata() { + return lh.getLedgerMetadata(); + } + + protected void cancelSpeculativeTask(boolean mayInterruptIfRunning) { + if (speculativeTask != null) { + speculativeTask.cancel(mayInterruptIfRunning); + speculativeTask = null; + } + } + + public ScheduledFuture getSpeculativeTask() { + return speculativeTask; + } + + CompletableFuture future() { + return future; + } + + void allowFailFastOnUnwritableChannel() { + allowFailFast = true; + } + + public void submit() { + clientCtx.getMainWorkerPool().executeOrdered(lh.ledgerId, this); + } + + @Override + public void run() { + initiate(); + } + + abstract void initiate(); + + protected abstract void submitCallback(int code); + + abstract class LedgerEntryRequest implements SpeculativeRequestExecutor { + + final AtomicBoolean complete = new AtomicBoolean(false); + + int rc = BKException.Code.OK; + int firstError = BKException.Code.OK; + int numBookiesMissingEntry = 0; + + final long eId; + + final List ensemble; + final DistributionSchedule.WriteSet writeSet; + + + LedgerEntryRequest(List ensemble, final long eId) { + this.ensemble = ensemble; + this.eId = eId; + if (clientCtx.getConf().enableReorderReadSequence) { + writeSet = clientCtx.getPlacementPolicy() + .reorderReadSequence( + ensemble, + lh.getBookiesHealthInfo(), + lh.getWriteSetForReadOperation(eId)); + } else { + writeSet = lh.getWriteSetForReadOperation(eId); + } + } + + public void close() { + // this request has succeeded before, can't recycle writeSet again + if (complete.compareAndSet(false, true)) { + rc = BKException.Code.UnexpectedConditionException; + writeSet.recycle(); + } + } + + /** + * Execute the read request. + */ + abstract void read(); + + /** + * Fail the request with given result code rc. + * + * @param rc + * result code to fail the request. + * @return true if we managed to fail the entry; otherwise return false if it already failed or completed. + */ + boolean fail(int rc) { + if (complete.compareAndSet(false, true)) { + this.rc = rc; + writeSet.recycle(); + submitCallback(rc); + return true; + } else { + return false; + } + } + + /** + * Log error errMsg and reattempt read from host. + * + * @param bookieIndex + * bookie index + * @param host + * host that just respond + * @param errMsg + * error msg to log + * @param rc + * read result code + */ + synchronized void logErrorAndReattemptRead(int bookieIndex, BookieId host, String errMsg, int rc) { + if (BKException.Code.OK == firstError + || BKException.Code.NoSuchEntryException == firstError + || BKException.Code.NoSuchLedgerExistsException == firstError) { + firstError = rc; + } else if (BKException.Code.BookieHandleNotAvailableException == firstError + && BKException.Code.NoSuchEntryException != rc + && BKException.Code.NoSuchLedgerExistsException != rc) { + // if other exception rather than NoSuchEntryException or NoSuchLedgerExistsException is + // returned we need to update firstError to indicate that it might be a valid read but just + // failed. + firstError = rc; + } + if (BKException.Code.NoSuchEntryException == rc + || BKException.Code.NoSuchLedgerExistsException == rc) { + ++numBookiesMissingEntry; + if (LOG.isDebugEnabled()) { + LOG.debug("No such entry found on bookie. L{} E{} bookie: {}", + lh.ledgerId, eId, host); + } + } else { + if (LOG.isInfoEnabled()) { + LOG.info("{} while reading L{} E{} from bookie: {}", + errMsg, lh.ledgerId, eId, host); + } + } + + lh.recordReadErrorOnBookie(bookieIndex); + } + + /** + * Send to next replica speculatively, if required and possible. + * This returns the host we may have sent to for unit testing. + * + * @param heardFromHostsBitSet + * the set of hosts that we already received responses. + * @return host we sent to if we sent. null otherwise. + */ + abstract BookieId maybeSendSpeculativeRead(BitSet heardFromHostsBitSet); + + /** + * Whether the read request completed. + * + * @return true if the read request is completed. + */ + boolean isComplete() { + return complete.get(); + } + + /** + * Get result code of this entry. + * + * @return result code. + */ + int getRc() { + return rc; + } + + @Override + public String toString() { + return String.format("L%d-E%d", lh.getId(), eId); + } + + /** + * Issues a speculative request and indicates if more speculative + * requests should be issued. + * + * @return whether more speculative requests should be issued + */ + @Override + public ListenableFuture issueSpeculativeRequest() { + return clientCtx.getMainWorkerPool().submitOrdered(lh.getId(), new Callable() { + @Override + public Boolean call() throws Exception { + if (!isComplete() && null != maybeSendSpeculativeRead(heardFromHostsBitSet)) { + if (LOG.isDebugEnabled()) { + LOG.debug("Send speculative read for {}. Hosts sent are {}, " + + " Hosts heard are {}, ensemble is {}.", + this, sentToHosts, heardFromHostsBitSet, ensemble); + } + return true; + } + return false; + } + }); + } + } + + protected static class ReadContext implements BookkeeperInternalCallbacks.ReadEntryCallbackCtx { + final int bookieIndex; + final BookieId to; + final PendingReadOp.LedgerEntryRequest entry; + long lac = LedgerHandle.INVALID_ENTRY_ID; + + ReadContext(int bookieIndex, BookieId to, PendingReadOp.LedgerEntryRequest entry) { + this.bookieIndex = bookieIndex; + this.to = to; + this.entry = entry; + } + + @Override + public void setLastAddConfirmed(long lac) { + this.lac = lac; + } + + @Override + public long getLastAddConfirmed() { + return lac; + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/RegionAwareEnsemblePlacementPolicy.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/RegionAwareEnsemblePlacementPolicy.java index c52e0fee852..c742e62c04d 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/RegionAwareEnsemblePlacementPolicy.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/RegionAwareEnsemblePlacementPolicy.java @@ -18,8 +18,8 @@ package org.apache.bookkeeper.client; import io.netty.util.HashedWheelTimer; - import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -28,15 +28,17 @@ import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; - import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.feature.Feature; import org.apache.bookkeeper.feature.FeatureProvider; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieNode; import org.apache.bookkeeper.net.DNSToSwitchMapping; import org.apache.bookkeeper.net.NetworkTopology; +import org.apache.bookkeeper.net.NetworkTopologyImpl; import org.apache.bookkeeper.net.Node; import org.apache.bookkeeper.net.NodeBase; +import org.apache.bookkeeper.proto.BookieAddressResolver; import org.apache.bookkeeper.stats.StatsLogger; import org.apache.bookkeeper.util.BookKeeperConstants; import org.apache.commons.lang3.tuple.Pair; @@ -67,7 +69,7 @@ public class RegionAwareEnsemblePlacementPolicy extends RackawareEnsemblePlaceme static final int REMOTE_NODE_IN_REORDER_SEQUENCE = 2; protected final Map perRegionPlacement; - protected final ConcurrentMap address2Region; + protected final ConcurrentMap address2Region; protected FeatureProvider featureProvider; protected String disallowBookiePlacementInRegionFeatureName; protected String myRegion = null; @@ -75,41 +77,46 @@ public class RegionAwareEnsemblePlacementPolicy extends RackawareEnsemblePlaceme protected boolean enableValidation = true; protected boolean enforceDurabilityInReplace = false; protected Feature disableDurabilityFeature; + private int lastRegionIndex = 0; RegionAwareEnsemblePlacementPolicy() { super(); perRegionPlacement = new HashMap(); - address2Region = new ConcurrentHashMap(); + address2Region = new ConcurrentHashMap(); + } + + protected String getLocalRegion(BookieNode node) { + if (null == node || null == node.getAddr()) { + return UNKNOWN_REGION; + } + return getRegion(node.getAddr()); } - protected String getRegion(BookieSocketAddress addr) { + protected String getRegion(BookieId addr) { String region = address2Region.get(addr); if (null == region) { - String networkLocation = resolveNetworkLocation(addr); - if (NetworkTopology.DEFAULT_REGION_AND_RACK.equals(networkLocation)) { - region = UNKNOWN_REGION; - } else { - String[] parts = networkLocation.split(NodeBase.PATH_SEPARATOR_STR); - if (parts.length <= 1) { - region = UNKNOWN_REGION; - } else { - region = parts[1]; - } - } + region = parseBookieRegion(addr); address2Region.putIfAbsent(addr, region); } return region; } - protected String getLocalRegion(BookieNode node) { - if (null == node || null == node.getAddr()) { + protected String parseBookieRegion(BookieId addr) { + String networkLocation = resolveNetworkLocation(addr); + if (NetworkTopology.DEFAULT_REGION_AND_RACK.equals(networkLocation)) { return UNKNOWN_REGION; + } else { + String[] parts = networkLocation.split(NodeBase.PATH_SEPARATOR_STR); + if (parts.length <= 1) { + return UNKNOWN_REGION; + } else { + return parts[1]; + } } - return getRegion(node.getAddr()); } @Override - public void handleBookiesThatLeft(Set leftBookies) { + public void handleBookiesThatLeft(Set leftBookies) { super.handleBookiesThatLeft(leftBookies); for (TopologyAwareEnsemblePlacementPolicy policy: perRegionPlacement.values()) { @@ -118,26 +125,29 @@ public void handleBookiesThatLeft(Set leftBookies) { } @Override - public void handleBookiesThatJoined(Set joinedBookies) { - Map> perRegionClusterChange = new HashMap>(); + public void handleBookiesThatJoined(Set joinedBookies) { + Map> perRegionClusterChange = new HashMap>(); // node joined - for (BookieSocketAddress addr : joinedBookies) { + for (BookieId addr : joinedBookies) { BookieNode node = createBookieNode(addr); topology.add(node); knownBookies.put(addr, node); + historyBookies.put(addr, node); String region = getLocalRegion(node); if (null == perRegionPlacement.get(region)) { perRegionPlacement.put(region, new RackawareEnsemblePlacementPolicy() .initialize(dnsResolver, timer, this.reorderReadsRandom, this.stabilizePeriodSeconds, this.reorderThresholdPendingRequests, this.isWeighted, this.maxWeightMultiple, - this.minNumRacksPerWriteQuorum, this.enforceMinNumRacksPerWriteQuorum, statsLogger) + this.minNumRacksPerWriteQuorum, this.enforceMinNumRacksPerWriteQuorum, + this.ignoreLocalNodeInPlacementPolicy, + this.useHostnameResolveLocalNodePlacementPolicy, statsLogger, bookieAddressResolver) .withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK)); } - Set regionSet = perRegionClusterChange.get(region); + Set regionSet = perRegionClusterChange.get(region); if (null == regionSet) { - regionSet = new HashSet(); + regionSet = new HashSet(); regionSet.add(addr); perRegionClusterChange.put(region, regionSet); } else { @@ -150,21 +160,74 @@ public void handleBookiesThatJoined(Set joinedBookies) { } for (Map.Entry regionEntry : perRegionPlacement.entrySet()) { - Set regionSet = perRegionClusterChange.get(regionEntry.getKey()); + Set regionSet = perRegionClusterChange.get(regionEntry.getKey()); if (null == regionSet) { - regionSet = new HashSet(); + regionSet = new HashSet(); } regionEntry.getValue().handleBookiesThatJoined(regionSet); } } + @Override + public void onBookieRackChange(List bookieAddressList) { + rwLock.writeLock().lock(); + try { + bookieAddressList.forEach(bookieAddress -> { + try { + BookieNode node = knownBookies.get(bookieAddress); + if (node != null) { + // refresh the rack info if its a known bookie + BookieNode newNode = createBookieNode(bookieAddress); + if (!newNode.getNetworkLocation().equals(node.getNetworkLocation())) { + topology.remove(node); + topology.add(newNode); + knownBookies.put(bookieAddress, newNode); + historyBookies.put(bookieAddress, newNode); + } + //Handle per region placement policy. + String oldRegion = getRegion(bookieAddress); + String newRegion = parseBookieRegion(newNode.getAddr()); + if (oldRegion.equals(newRegion)) { + TopologyAwareEnsemblePlacementPolicy regionPlacement = perRegionPlacement.get(oldRegion); + regionPlacement.onBookieRackChange(Collections.singletonList(bookieAddress)); + } else { + address2Region.put(bookieAddress, newRegion); + TopologyAwareEnsemblePlacementPolicy oldRegionPlacement = perRegionPlacement.get(oldRegion); + oldRegionPlacement.handleBookiesThatLeft(Collections.singleton(bookieAddress)); + TopologyAwareEnsemblePlacementPolicy newRegionPlacement = perRegionPlacement.get( + newRegion); + if (newRegionPlacement == null) { + newRegionPlacement = new RackawareEnsemblePlacementPolicy() + .initialize(dnsResolver, timer, this.reorderReadsRandom, + this.stabilizePeriodSeconds, this.reorderThresholdPendingRequests, + this.isWeighted, this.maxWeightMultiple, + this.minNumRacksPerWriteQuorum, this.enforceMinNumRacksPerWriteQuorum, + this.ignoreLocalNodeInPlacementPolicy, + this.useHostnameResolveLocalNodePlacementPolicy, statsLogger, + bookieAddressResolver) + .withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); + perRegionPlacement.put(newRegion, newRegionPlacement); + } + newRegionPlacement.handleBookiesThatJoined(Collections.singleton(bookieAddress)); + } + } + } catch (IllegalArgumentException | NetworkTopologyImpl.InvalidTopologyException e) { + LOG.error("Failed to update bookie rack info: {} ", bookieAddress, e); + } + }); + } finally { + rwLock.writeLock().unlock(); + } + } + @Override public RegionAwareEnsemblePlacementPolicy initialize(ClientConfiguration conf, Optional optionalDnsResolver, HashedWheelTimer timer, FeatureProvider featureProvider, - StatsLogger statsLogger) { - super.initialize(conf, optionalDnsResolver, timer, featureProvider, statsLogger) + StatsLogger statsLogger, + BookieAddressResolver bookieAddressResolver) { + super.initialize(conf, optionalDnsResolver, timer, featureProvider, statsLogger, bookieAddressResolver) .withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); myRegion = getLocalRegion(localNode); enableValidation = conf.getBoolean(REPP_ENABLE_VALIDATION, true); @@ -180,7 +243,9 @@ public RegionAwareEnsemblePlacementPolicy initialize(ClientConfiguration conf, perRegionPlacement.put(region, new RackawareEnsemblePlacementPolicy(true) .initialize(dnsResolver, timer, this.reorderReadsRandom, this.stabilizePeriodSeconds, this.reorderThresholdPendingRequests, this.isWeighted, this.maxWeightMultiple, - this.minNumRacksPerWriteQuorum, this.enforceMinNumRacksPerWriteQuorum, statsLogger) + this.minNumRacksPerWriteQuorum, this.enforceMinNumRacksPerWriteQuorum, + this.ignoreLocalNodeInPlacementPolicy, this.useHostnameResolveLocalNodePlacementPolicy, + statsLogger, bookieAddressResolver) .withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK)); } minRegionsForDurability = conf.getInt(REPP_MINIMUM_REGIONS_FOR_DURABILITY, @@ -225,8 +290,8 @@ protected List selectRandomFromRegions(Set availableRegions, @Override - public List newEnsemble(int ensembleSize, int writeQuorumSize, int ackQuorumSize, - Map customMetadata, Set excludeBookies) + public PlacementResult> newEnsemble(int ensembleSize, int writeQuorumSize, + int ackQuorumSize, Map customMetadata, Set excludedBookies) throws BKException.BKNotEnoughBookiesException { int effectiveMinRegionsForDurability = disableDurabilityFeature.isAvailable() ? 1 : minRegionsForDurability; @@ -255,12 +320,15 @@ public List newEnsemble(int ensembleSize, int writeQuorumSi rwLock.readLock().lock(); try { - Set excludeNodes = convertBookiesToNodes(excludeBookies); - Set availableRegions = new HashSet(); - for (String region: perRegionPlacement.keySet()) { - if ((null == disallowBookiePlacementInRegionFeatureName) + Set comprehensiveExclusionBookiesSet = addDefaultRackBookiesIfMinNumRacksIsEnforced( + excludedBookies); + Set excludeNodes = convertBookiesToNodes(comprehensiveExclusionBookiesSet); + List availableRegions = new ArrayList<>(); + for (Map.Entry entry : perRegionPlacement.entrySet()) { + String region = entry.getKey(); + if (!entry.getValue().knownBookies.isEmpty() && (null == disallowBookiePlacementInRegionFeatureName || !featureProvider.scope(region).getFeature(disallowBookiePlacementInRegionFeatureName) - .isAvailable()) { + .isAvailable())) { availableRegions.add(region); } } @@ -275,11 +343,13 @@ public List newEnsemble(int ensembleSize, int writeQuorumSi } List bns = selectRandom(ensembleSize, excludeNodes, TruePredicate.INSTANCE, EnsembleForReplacementWithNoConstraints.INSTANCE); - ArrayList addrs = new ArrayList(ensembleSize); + ArrayList addrs = new ArrayList(ensembleSize); for (BookieNode bn : bns) { addrs.add(bn.getAddr()); } - return addrs; + return PlacementResult.of(addrs, + isEnsembleAdheringToPlacementPolicy( + addrs, writeQuorumSize, ackQuorumSize)); } // Single region, fall back to RackAwareEnsemblePlacement @@ -290,8 +360,8 @@ public List newEnsemble(int ensembleSize, int writeQuorumSi effectiveMinRegionsForDurability, minNumRacksPerWriteQuorum); TopologyAwareEnsemblePlacementPolicy nextPolicy = perRegionPlacement.get( availableRegions.iterator().next()); - return nextPolicy.newEnsemble(ensembleSize, writeQuorumSize, writeQuorumSize, excludeBookies, ensemble, - ensemble); + return nextPolicy.newEnsemble(ensembleSize, writeQuorumSize, writeQuorumSize, + comprehensiveExclusionBookiesSet, ensemble, ensemble); } int remainingEnsemble = ensembleSize; @@ -321,9 +391,22 @@ public List newEnsemble(int ensembleSize, int writeQuorumSi effectiveMinRegionsForDurability, minNumRacksPerWriteQuorum); remainingEnsembleBeforeIteration = remainingEnsemble; int regionsToAllocate = numRemainingRegions; - for (Map.Entry> regionEntry: regionsWiseAllocation.entrySet()) { - String region = regionEntry.getKey(); - final Pair currentAllocation = regionEntry.getValue(); + int startRegionIndex = lastRegionIndex % numRegionsAvailable; + int localRegionIndex = -1; + if (myRegion != null && !UNKNOWN_REGION.equals(myRegion)) { + localRegionIndex = availableRegions.indexOf(myRegion); + } + String region = myRegion; + for (int i = 0; i < numRegionsAvailable; ++i) { + // select the local region first, and for the rest region select, use round-robin selection. + if (i > 0 || localRegionIndex == -1) { + if (startRegionIndex % numRegionsAvailable == localRegionIndex) { + startRegionIndex++; + } + region = availableRegions.get(startRegionIndex % numRegionsAvailable); + startRegionIndex++; + } + final Pair currentAllocation = regionsWiseAllocation.get(region); TopologyAwareEnsemblePlacementPolicy policyWithinRegion = perRegionPlacement.get(region); if (!regionsReachedMaxAllocation.contains(region)) { if (numRemainingRegions <= 0) { @@ -345,15 +428,17 @@ public List newEnsemble(int ensembleSize, int writeQuorumSi int newEnsembleSize = currentAllocation.getLeft() + addToEnsembleSize; int newWriteQuorumSize = currentAllocation.getRight() + addToWriteQuorum; try { - List allocated = policyWithinRegion.newEnsemble(newEnsembleSize, - newWriteQuorumSize, newWriteQuorumSize, excludeBookies, tempEnsemble, - tempEnsemble); + List allocated = policyWithinRegion + .newEnsemble(newEnsembleSize, newWriteQuorumSize, newWriteQuorumSize, + comprehensiveExclusionBookiesSet, tempEnsemble, tempEnsemble) + .getResult(); ensemble = tempEnsemble; remainingEnsemble -= addToEnsembleSize; remainingWriteQuorum -= addToWriteQuorum; regionsWiseAllocation.put(region, Pair.of(newEnsembleSize, newWriteQuorumSize)); success = true; regionsToAllocate--; + lastRegionIndex = startRegionIndex; LOG.info("Region {} allocating bookies with ensemble size {} " + "and write quorum size {} : {}", region, newEnsembleSize, newWriteQuorumSize, allocated); @@ -375,12 +460,12 @@ public List newEnsemble(int ensembleSize, int writeQuorumSi if (regionsReachedMaxAllocation.contains(region)) { if (currentAllocation.getLeft() > 0) { LOG.info("Allocating {} bookies in region {} : ensemble {} exclude {}", - currentAllocation.getLeft(), region, excludeBookies, ensemble); + currentAllocation.getLeft(), region, comprehensiveExclusionBookiesSet, ensemble); policyWithinRegion.newEnsemble( currentAllocation.getLeft(), currentAllocation.getRight(), currentAllocation.getRight(), - excludeBookies, + comprehensiveExclusionBookiesSet, ensemble, ensemble); LOG.info("Allocated {} bookies in region {} : {}", @@ -394,7 +479,7 @@ public List newEnsemble(int ensembleSize, int writeQuorumSi } } while ((remainingEnsemble > 0) && (remainingEnsemble < remainingEnsembleBeforeIteration)); - List bookieList = ensemble.toList(); + List bookieList = ensemble.toList(); if (ensembleSize != bookieList.size()) { LOG.error("Not enough {} bookies are available to form an ensemble : {}.", ensembleSize, bookieList); @@ -407,22 +492,26 @@ public List newEnsemble(int ensembleSize, int writeQuorumSi throw new BKException.BKNotEnoughBookiesException(); } LOG.info("Bookies allocated successfully {}", ensemble); - return ensemble.toList(); + List ensembleList = ensemble.toList(); + return PlacementResult.of(ensembleList, + isEnsembleAdheringToPlacementPolicy(ensembleList, writeQuorumSize, ackQuorumSize)); } finally { rwLock.readLock().unlock(); } } @Override - public BookieSocketAddress replaceBookie(int ensembleSize, int writeQuorumSize, int ackQuorumSize, - Map customMetadata, Set currentEnsemble, - BookieSocketAddress bookieToReplace, Set excludeBookies) + public PlacementResult replaceBookie(int ensembleSize, int writeQuorumSize, int ackQuorumSize, + Map customMetadata, List currentEnsemble, + BookieId bookieToReplace, Set excludeBookies) throws BKException.BKNotEnoughBookiesException { rwLock.readLock().lock(); try { boolean enforceDurability = enforceDurabilityInReplace && !disableDurabilityFeature.isAvailable(); int effectiveMinRegionsForDurability = enforceDurability ? minRegionsForDurability : 1; - Set excludeNodes = convertBookiesToNodes(excludeBookies); + Set comprehensiveExclusionBookiesSet = addDefaultRackBookiesIfMinNumRacksIsEnforced( + excludeBookies); + Set excludeNodes = convertBookiesToNodes(comprehensiveExclusionBookiesSet); RRTopologyAwareCoverageEnsemble ensemble = new RRTopologyAwareCoverageEnsemble(ensembleSize, writeQuorumSize, ackQuorumSize, @@ -436,7 +525,7 @@ public BookieSocketAddress replaceBookie(int ensembleSize, int writeQuorumSize, } excludeNodes.add(bookieNodeToReplace); - for (BookieSocketAddress bookieAddress: currentEnsemble) { + for (BookieId bookieAddress: currentEnsemble) { if (bookieAddress.equals(bookieToReplace)) { continue; } @@ -469,7 +558,19 @@ public BookieSocketAddress replaceBookie(int ensembleSize, int writeQuorumSize, if (LOG.isDebugEnabled()) { LOG.debug("Bookie {} is chosen to replace bookie {}.", candidate, bookieNodeToReplace); } - return candidate.getAddr(); + BookieId candidateAddr = candidate.getAddr(); + List newEnsemble = new ArrayList(currentEnsemble); + if (currentEnsemble.isEmpty()) { + /* + * in testing code there are test cases which would pass empty + * currentEnsemble + */ + newEnsemble.add(candidateAddr); + } else { + newEnsemble.set(currentEnsemble.indexOf(bookieToReplace), candidateAddr); + } + return PlacementResult.of(candidateAddr, + isEnsembleAdheringToPlacementPolicy(newEnsemble, writeQuorumSize, ackQuorumSize)); } finally { rwLock.readLock().unlock(); } @@ -522,7 +623,7 @@ protected BookieNode replaceFromRack(BookieNode bookieNodeToReplace, @Override public final DistributionSchedule.WriteSet reorderReadSequence( - List ensemble, + List ensemble, BookiesHealthInfo bookiesHealthInfo, DistributionSchedule.WriteSet writeSet) { if (UNKNOWN_REGION.equals(myRegion)) { @@ -540,7 +641,7 @@ public final DistributionSchedule.WriteSet reorderReadSequence( @Override public final DistributionSchedule.WriteSet reorderReadLACSequence( - List ensemble, + List ensemble, BookiesHealthInfo bookiesHealthInfo, DistributionSchedule.WriteSet writeSet) { if (UNKNOWN_REGION.equals(myRegion)) { @@ -550,4 +651,16 @@ public final DistributionSchedule.WriteSet reorderReadLACSequence( finalList.addMissingIndices(ensemble.size()); return finalList; } + + @Override + public PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy(List ensembleList, + int writeQuorumSize, int ackQuorumSize) { + /** + * TODO: have to implement actual logic for this method for + * RegionAwareEnsemblePlacementPolicy. For now return true value. + * + * - https://github.com/apache/bookkeeper/issues/1898 + */ + return PlacementPolicyAdherence.MEETS_STRICT; + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/RoundRobinDistributionSchedule.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/RoundRobinDistributionSchedule.java index d07940823b9..62690129744 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/RoundRobinDistributionSchedule.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/RoundRobinDistributionSchedule.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -20,15 +20,15 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.MoreObjects; import com.google.common.collect.ImmutableMap; - import io.netty.util.Recycler; import io.netty.util.Recycler.Handle; - import java.util.Arrays; import java.util.BitSet; import java.util.Map; - -import org.apache.bookkeeper.net.BookieSocketAddress; +import lombok.Getter; +import org.apache.bookkeeper.net.BookieId; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * A specific {@link DistributionSchedule} that places entries in round-robin @@ -37,7 +37,9 @@ * on. * */ -class RoundRobinDistributionSchedule implements DistributionSchedule { +public class RoundRobinDistributionSchedule implements DistributionSchedule { + private static final Logger LOG = LoggerFactory.getLogger(RoundRobinDistributionSchedule.class); + @Getter private final int writeQuorumSize; private final int ackQuorumSize; private final int ensembleSize; @@ -53,6 +55,11 @@ public WriteSet getWriteSet(long entryId) { return WriteSetImpl.create(ensembleSize, writeQuorumSize, entryId); } + @Override + public int getWriteSetBookieIndex(long entryId, int writeSetIndex) { + return (int) (entryId + writeSetIndex) % ensembleSize; + } + @Override public WriteSet getEnsembleSet(long entryId) { // for long poll reads and force ledger , we are trying all the bookies in the ensemble @@ -76,6 +83,7 @@ private static class WriteSetImpl implements WriteSet { private final Handle recyclerHandle; private static final Recycler RECYCLER = new Recycler() { + @Override protected WriteSetImpl newObject( Recycler.Handle handle) { return new WriteSetImpl(handle); @@ -262,10 +270,11 @@ private static class AckSetImpl implements AckSet { private int ackQuorumSize; private final BitSet ackSet = new BitSet(); // grows on reset() - private BookieSocketAddress[] failureMap = new BookieSocketAddress[0]; + private BookieId[] failureMap = new BookieId[0]; private final Handle recyclerHandle; private static final Recycler RECYCLER = new Recycler() { + @Override protected AckSetImpl newObject(Recycler.Handle handle) { return new AckSetImpl(handle); } @@ -290,7 +299,7 @@ private void reset(int ensembleSize, this.writeQuorumSize = writeQuorumSize; ackSet.clear(); if (failureMap.length < ensembleSize) { - failureMap = new BookieSocketAddress[ensembleSize]; + failureMap = new BookieId[ensembleSize]; } Arrays.fill(failureMap, null); } @@ -304,15 +313,15 @@ public boolean completeBookieAndCheck(int bookieIndexHeardFrom) { @Override public boolean failBookieAndCheck(int bookieIndexHeardFrom, - BookieSocketAddress address) { + BookieId address) { ackSet.clear(bookieIndexHeardFrom); failureMap[bookieIndexHeardFrom] = address; return failed() > (writeQuorumSize - ackQuorumSize); } @Override - public Map getFailedBookies() { - ImmutableMap.Builder builder = new ImmutableMap.Builder<>(); + public Map getFailedBookies() { + ImmutableMap.Builder builder = new ImmutableMap.Builder<>(); for (int i = 0; i < failureMap.length; i++) { if (failureMap[i] != null) { builder.put(i, failureMap[i]); @@ -370,28 +379,43 @@ public synchronized void addBookie(int bookieIndexHeardFrom, int rc) { public synchronized boolean checkCovered() { // now check if there are any write quorums, with |ackQuorum| nodes available for (int i = 0; i < ensembleSize; i++) { - int nodesNotCovered = 0; - int nodesOkay = 0; - int nodesUninitialized = 0; + /* Nodes which have either responded with an error other than NoSuch{Entry,Ledger}, + or have not responded at all. We cannot know if these nodes ever accepted a entry. */ + int nodesUnknown = 0; + for (int j = 0; j < writeQuorumSize; j++) { int nodeIndex = (i + j) % ensembleSize; - if (covered[nodeIndex] == BKException.Code.OK) { - nodesOkay++; - } else if (covered[nodeIndex] != BKException.Code.NoSuchEntryException - && covered[nodeIndex] != BKException.Code.NoSuchLedgerExistsException) { - nodesNotCovered++; - } else if (covered[nodeIndex] == BKException.Code.UNINITIALIZED) { - nodesUninitialized++; + if (covered[nodeIndex] != BKException.Code.OK + && covered[nodeIndex] != BKException.Code.NoSuchEntryException + && covered[nodeIndex] != BKException.Code.NoSuchLedgerExistsException) { + nodesUnknown++; } } - // if we haven't seen any OK responses and there are still nodes not heard from, - // let's wait until - if (nodesNotCovered >= ackQuorumSize || (nodesOkay == 0 && nodesUninitialized > 0)) { + + /* If nodesUnknown is greater than the ack quorum size, then + it is possible those two unknown nodes accepted an entry which + we do not know about */ + if (nodesUnknown >= ackQuorumSize) { return false; } } return true; } + + @Override + public String toString() { + StringBuilder buffer = new StringBuilder(); + buffer.append("QuorumCoverage(e:").append(ensembleSize) + .append(",w:").append(writeQuorumSize) + .append(",a:").append(ackQuorumSize) + .append(") = ["); + int i = 0; + for (; i < covered.length - 1; i++) { + buffer.append(covered[i]).append(", "); + } + buffer.append(covered[i]).append("]"); + return buffer.toString(); + } } @Override @@ -401,11 +425,39 @@ public QuorumCoverageSet getCoverageSet() { @Override public boolean hasEntry(long entryId, int bookieIndex) { - WriteSet w = getWriteSet(entryId); - try { - return w.contains(bookieIndex); - } finally { - w.recycle(); + for (int w = 0; w < writeQuorumSize; w++) { + if (bookieIndex == getWriteSetBookieIndex(entryId, w)) { + return true; + } + } + + return false; + } + + @Override + public BitSet getEntriesStripedToTheBookie(int bookieIndex, long startEntryId, long lastEntryId) { + if ((startEntryId < 0) || (lastEntryId < 0) || (bookieIndex < 0) || (bookieIndex >= ensembleSize) + || (lastEntryId < startEntryId)) { + LOG.error( + "Illegal arguments for getEntriesStripedToTheBookie, bookieIndex : {}," + + " ensembleSize : {}, startEntryId : {}, lastEntryId : {}", + bookieIndex, ensembleSize, startEntryId, lastEntryId); + throw new IllegalArgumentException("Illegal arguments for getEntriesStripedToTheBookie"); + } + BitSet entriesStripedToTheBookie = new BitSet((int) (lastEntryId - startEntryId + 1)); + for (long entryId = startEntryId; entryId <= lastEntryId; entryId++) { + int modValOfFirstReplica = (int) (entryId % ensembleSize); + int modValOfLastReplica = (int) ((entryId + writeQuorumSize - 1) % ensembleSize); + if (modValOfLastReplica >= modValOfFirstReplica) { + if ((bookieIndex >= modValOfFirstReplica) && (bookieIndex <= modValOfLastReplica)) { + entriesStripedToTheBookie.set((int) (entryId - startEntryId)); + } + } else { + if ((bookieIndex >= modValOfFirstReplica) || (bookieIndex <= modValOfLastReplica)) { + entriesStripedToTheBookie.set((int) (entryId - startEntryId)); + } + } } + return entriesStripedToTheBookie; } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/SpeculativeRequestExecutionPolicy.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/SpeculativeRequestExecutionPolicy.java index bff4bb3f8ea..226fae16871 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/SpeculativeRequestExecutionPolicy.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/SpeculativeRequestExecutionPolicy.java @@ -21,6 +21,7 @@ package org.apache.bookkeeper.client; import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; /** * Define a policy for speculative request execution. @@ -36,7 +37,9 @@ public interface SpeculativeRequestExecutionPolicy { * Initialize the speculative request execution policy and initiate requests. * * @param scheduler The scheduler service to issue the speculative request - * @param requestExectuor The executor is used to issue the actual speculative requests + * @param requestExecutor The executor is used to issue the actual speculative requests + * @return ScheduledFuture, in case caller needs to cancel it. */ - void initiateSpeculativeRequest(ScheduledExecutorService scheduler, SpeculativeRequestExecutor requestExectuor); + ScheduledFuture initiateSpeculativeRequest(ScheduledExecutorService scheduler, + SpeculativeRequestExecutor requestExecutor); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/SyncCallbackUtils.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/SyncCallbackUtils.java index 3c79203e821..a421a7547d7 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/SyncCallbackUtils.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/SyncCallbackUtils.java @@ -197,10 +197,8 @@ static class LastAddConfirmedCallback implements AsyncCallback.AddLacCallback { public void addLacComplete(int rc, LedgerHandle lh, Object ctx) { if (rc != BKException.Code.OK) { log.warn("LastAddConfirmedUpdate failed: {} ", BKException.getMessage(rc)); - } else { - if (log.isDebugEnabled()) { - log.debug("Callback LAC Updated for: {} ", lh.getId()); - } + } else if (log.isDebugEnabled()) { + log.debug("Callback LAC Updated for: {} ", lh.getId()); } } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/TopologyAwareEnsemblePlacementPolicy.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/TopologyAwareEnsemblePlacementPolicy.java index 19cb5050710..9eae79438fd 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/TopologyAwareEnsemblePlacementPolicy.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/TopologyAwareEnsemblePlacementPolicy.java @@ -17,42 +17,86 @@ */ package org.apache.bookkeeper.client; +import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIES_JOINED; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIES_LEFT; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.FAILED_TO_RESOLVE_NETWORK_LOCATION_COUNT; + +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; - import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; - +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.function.Supplier; +import org.apache.bookkeeper.client.BookieInfoReader.BookieInfo; +import org.apache.bookkeeper.client.WeightedRandomSelection.WeightedObject; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieNode; import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.DNSToSwitchMapping; +import org.apache.bookkeeper.net.NetUtils; import org.apache.bookkeeper.net.NetworkTopology; +import org.apache.bookkeeper.net.NetworkTopologyImpl; +import org.apache.bookkeeper.net.Node; import org.apache.bookkeeper.net.NodeBase; +import org.apache.bookkeeper.proto.BookieAddressResolver; +import org.apache.bookkeeper.stats.Counter; +import org.apache.bookkeeper.stats.OpStatsLogger; +import org.apache.bookkeeper.stats.annotations.StatsDoc; +import org.apache.commons.collections4.CollectionUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; abstract class TopologyAwareEnsemblePlacementPolicy implements - ITopologyAwareEnsemblePlacementPolicy { + ITopologyAwareEnsemblePlacementPolicy { static final Logger LOG = LoggerFactory.getLogger(TopologyAwareEnsemblePlacementPolicy.class); + public static final String REPP_DNS_RESOLVER_CLASS = "reppDnsResolverClass"; + protected final Map knownBookies = new HashMap(); + protected final Map historyBookies = new HashMap(); + protected final ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock(); + protected Map bookieInfoMap = new HashMap(); + // Initialize to empty set + protected ImmutableSet readOnlyBookies = ImmutableSet.of(); + boolean isWeighted; + protected WeightedRandomSelection weightedSelection; + // for now, we just maintain the writable bookies' topology + protected NetworkTopology topology; + protected DNSToSwitchMapping dnsResolver; + protected BookieAddressResolver bookieAddressResolver; + @StatsDoc( + name = BOOKIES_JOINED, + help = "The distribution of number of bookies joined the cluster on each network topology change" + ) + protected OpStatsLogger bookiesJoinedCounter = null; + @StatsDoc( + name = BOOKIES_LEFT, + help = "The distribution of number of bookies left the cluster on each network topology change" + ) + protected OpStatsLogger bookiesLeftCounter = null; protected static class TruePredicate implements Predicate { - public static final TruePredicate INSTANCE = new TruePredicate(); @Override public boolean apply(BookieNode candidate, Ensemble chosenNodes) { return true; } - } protected static class EnsembleForReplacementWithNoConstraints implements Ensemble { public static final EnsembleForReplacementWithNoConstraints INSTANCE = new EnsembleForReplacementWithNoConstraints(); - static final List EMPTY_LIST = new ArrayList(0); + static final List EMPTY_LIST = new ArrayList(0); @Override public boolean addNode(BookieNode node) { @@ -61,7 +105,7 @@ public boolean addNode(BookieNode node) { } @Override - public List toList() { + public List toList() { return EMPTY_LIST; } @@ -77,40 +121,6 @@ public boolean validate() { } - protected static class BookieNode extends NodeBase { - - private final BookieSocketAddress addr; // identifier of a bookie node. - - BookieNode(BookieSocketAddress addr, String networkLoc) { - super(addr.toString(), networkLoc); - this.addr = addr; - } - - public BookieSocketAddress getAddr() { - return addr; - } - - @Override - public int hashCode() { - return name.hashCode(); - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof BookieNode)) { - return false; - } - BookieNode other = (BookieNode) obj; - return getName().equals(other.getName()); - } - - @Override - public String toString() { - return String.format("", name); - } - - } - /** * A predicate checking the rack coverage for write quorum in {@link RoundRobinDistributionSchedule}, * which ensures that a write quorum should be covered by at least two racks. @@ -294,8 +304,6 @@ public void addBookie(BookieNode candidate) { } } - - final int distanceFromLeaves; final int ensembleSize; final int writeQuorumSize; @@ -454,8 +462,8 @@ public boolean addNode(BookieNode node) { } @Override - public List toList() { - ArrayList addresses = new ArrayList(ensembleSize); + public List toList() { + ArrayList addresses = new ArrayList(ensembleSize); for (BookieNode bn : chosenNodes) { addresses.add(bn.getAddr()); } @@ -469,7 +477,7 @@ public List toList() { */ @Override public boolean validate() { - HashSet addresses = new HashSet(ensembleSize); + HashSet addresses = new HashSet(ensembleSize); HashSet racksOrRegions = new HashSet(); for (BookieNode bn : chosenNodes) { if (addresses.contains(bn.getAddr())) { @@ -489,9 +497,138 @@ public String toString() { } } + static class DefaultResolver implements DNSToSwitchMapping { + + final Supplier defaultRackSupplier; + + public DefaultResolver(Supplier defaultRackSupplier) { + checkNotNull(defaultRackSupplier, "defaultRackSupplier should not be null"); + this.defaultRackSupplier = defaultRackSupplier; + } + + @Override + public List resolve(List names) { + List rNames = new ArrayList(names.size()); + for (@SuppressWarnings("unused") String name : names) { + final String defaultRack = defaultRackSupplier.get(); + checkNotNull(defaultRack, "defaultRack cannot be null"); + rNames.add(defaultRack); + } + return rNames; + } + + @Override + public void reloadCachedMappings() { + // nop + } + } + + /** + * Decorator for any existing dsn resolver. + * Backfills returned data with appropriate default rack info. + */ + static class DNSResolverDecorator implements DNSToSwitchMapping { + + final Supplier defaultRackSupplier; + final DNSToSwitchMapping resolver; + @StatsDoc( + name = FAILED_TO_RESOLVE_NETWORK_LOCATION_COUNT, + help = "total number of times Resolver failed to resolve rack information of a node" + ) + final Counter failedToResolveNetworkLocationCounter; + + DNSResolverDecorator(DNSToSwitchMapping resolver, Supplier defaultRackSupplier, + Counter failedToResolveNetworkLocationCounter) { + checkNotNull(resolver, "Resolver cannot be null"); + checkNotNull(defaultRackSupplier, "defaultRackSupplier should not be null"); + this.defaultRackSupplier = defaultRackSupplier; + this.resolver = resolver; + this.failedToResolveNetworkLocationCounter = failedToResolveNetworkLocationCounter; + } + + @Override + public void setBookieAddressResolver(BookieAddressResolver bookieAddressResolver) { + this.resolver.setBookieAddressResolver(bookieAddressResolver); + } + + @Override + public List resolve(List names) { + if (names == null) { + return Collections.emptyList(); + } + final String defaultRack = defaultRackSupplier.get(); + checkNotNull(defaultRack, "Default rack cannot be null"); + + List rNames = resolver.resolve(names); + if (rNames != null && rNames.size() == names.size()) { + for (int i = 0; i < rNames.size(); ++i) { + if (rNames.get(i) == null) { + LOG.warn("Failed to resolve network location for {}, using default rack for it : {}.", + names.get(i), defaultRack); + failedToResolveNetworkLocationCounter.inc(); + rNames.set(i, defaultRack); + } + } + return rNames; + } + + LOG.warn("Failed to resolve network location for {}, using default rack for them : {}.", names, + defaultRack); + rNames = new ArrayList<>(names.size()); + + for (int i = 0; i < names.size(); ++i) { + failedToResolveNetworkLocationCounter.inc(); + rNames.add(defaultRack); + } + return rNames; + } + + @Override + public boolean useHostName() { + return resolver.useHostName(); + } + + @Override + public void reloadCachedMappings() { + resolver.reloadCachedMappings(); + } + } + + static Set getNetworkLocations(Set bookieNodes) { + Set networkLocs = new HashSet<>(); + for (Node bookieNode : bookieNodes) { + networkLocs.add(bookieNode.getNetworkLocation()); + } + return networkLocs; + } + + /** + * Shuffle all the entries of an array that matches a mask. + * It assumes all entries with the same mask are contiguous in the array. + */ + static void shuffleWithMask(DistributionSchedule.WriteSet writeSet, + int mask, int bits) { + int first = -1; + int last = -1; + for (int i = 0; i < writeSet.size(); i++) { + if ((writeSet.get(i) & bits) == mask) { + if (first == -1) { + first = i; + } + last = i; + } + } + if (first != -1) { + for (int i = last + 1; i > first; i--) { + int swapWith = ThreadLocalRandom.current().nextInt(i); + writeSet.set(swapWith, writeSet.set(i, writeSet.get(swapWith))); + } + } + } + @Override public DistributionSchedule.WriteSet reorderReadSequence( - List ensemble, + List ensemble, BookiesHealthInfo bookiesHealthInfo, DistributionSchedule.WriteSet writeSet) { return writeSet; @@ -499,7 +636,7 @@ public DistributionSchedule.WriteSet reorderReadSequence( @Override public DistributionSchedule.WriteSet reorderReadLACSequence( - List ensemble, + List ensemble, BookiesHealthInfo bookiesHealthInfo, DistributionSchedule.WriteSet writeSet) { DistributionSchedule.WriteSet retList = reorderReadSequence( @@ -507,4 +644,210 @@ public DistributionSchedule.WriteSet reorderReadLACSequence( retList.addMissingIndices(ensemble.size()); return retList; } + + @Override + public Set onClusterChanged(Set writableBookies, + Set readOnlyBookies) { + rwLock.writeLock().lock(); + try { + ImmutableSet joinedBookies, leftBookies, deadBookies; + Set oldBookieSet = knownBookies.keySet(); + // left bookies : bookies in known bookies, but not in new writable bookie cluster. + leftBookies = Sets.difference(oldBookieSet, writableBookies).immutableCopy(); + // joined bookies : bookies in new writable bookie cluster, but not in known bookies + joinedBookies = Sets.difference(writableBookies, oldBookieSet).immutableCopy(); + // dead bookies. + deadBookies = Sets.difference(leftBookies, readOnlyBookies).immutableCopy(); + if (LOG.isDebugEnabled()) { + LOG.debug("Cluster changed : left bookies are {}, joined bookies are {}, while dead bookies are {}.", + leftBookies, joinedBookies, deadBookies); + } + handleBookiesThatLeft(leftBookies); + handleBookiesThatJoined(joinedBookies); + if (this.isWeighted && (leftBookies.size() > 0 || joinedBookies.size() > 0)) { + this.weightedSelection.updateMap(this.bookieInfoMap); + } + if (!readOnlyBookies.isEmpty()) { + this.readOnlyBookies = ImmutableSet.copyOf(readOnlyBookies); + } + + return deadBookies; + } finally { + rwLock.writeLock().unlock(); + } + } + + /* + * this method should be called in writelock scope of 'rwLock' + */ + @Override + public void handleBookiesThatLeft(Set leftBookies) { + for (BookieId addr : leftBookies) { + try { + BookieNode node = knownBookies.remove(addr); + if (null != node) { + topology.remove(node); + if (this.isWeighted) { + this.bookieInfoMap.remove(node); + } + + bookiesLeftCounter.registerSuccessfulValue(1L); + + if (LOG.isDebugEnabled()) { + LOG.debug("Cluster changed : bookie {} left from cluster.", addr); + } + } + } catch (Throwable t) { + LOG.error("Unexpected exception while handling leaving bookie {}", addr, t); + if (bookiesLeftCounter != null) { + bookiesLeftCounter.registerFailedValue(1L); + } + // no need to re-throw; we want to process the rest of the bookies + // exception anyways will be caught/logged/suppressed in the ZK's event handler + } + } + } + + /* + * this method should be called in writelock scope of 'rwLock' + */ + @Override + public void handleBookiesThatJoined(Set joinedBookies) { + // node joined + for (BookieId addr : joinedBookies) { + try { + BookieNode node = createBookieNode(addr); + topology.add(node); + knownBookies.put(addr, node); + historyBookies.put(addr, node); + if (this.isWeighted) { + this.bookieInfoMap.putIfAbsent(node, new BookieInfo()); + } + + bookiesJoinedCounter.registerSuccessfulValue(1L); + + if (LOG.isDebugEnabled()) { + LOG.debug("Cluster changed : bookie {} joined the cluster.", addr); + } + } catch (Throwable t) { + // topology.add() throws unchecked exception + LOG.error("Unexpected exception while handling joining bookie {}", addr, t); + + bookiesJoinedCounter.registerFailedValue(1L); + // no need to re-throw; we want to process the rest of the bookies + // exception anyways will be caught/logged/suppressed in the ZK's event handler + } + } + } + + @Override + public void onBookieRackChange(List bookieAddressList) { + rwLock.writeLock().lock(); + try { + bookieAddressList.forEach(bookieAddress -> { + try { + BookieNode node = knownBookies.get(bookieAddress); + if (node != null) { + // refresh the rack info if its a known bookie + BookieNode newNode = createBookieNode(bookieAddress); + if (!newNode.getNetworkLocation().equals(node.getNetworkLocation())) { + topology.remove(node); + topology.add(newNode); + knownBookies.put(bookieAddress, newNode); + historyBookies.put(bookieAddress, newNode); + } + } + } catch (IllegalArgumentException | NetworkTopologyImpl.InvalidTopologyException e) { + LOG.error("Failed to update bookie rack info: {} ", bookieAddress, e); + } + }); + } finally { + rwLock.writeLock().unlock(); + } + } + + public static int differBetweenBookies(List bookiesA, List bookiesB) { + if (CollectionUtils.isEmpty(bookiesA) || CollectionUtils.isEmpty(bookiesB)) { + return Integer.MAX_VALUE; + } + if (bookiesA.size() != bookiesB.size()) { + return Integer.MAX_VALUE; + } + int differ = 0; + for (int i = 0; i < bookiesA.size(); i++) { + if (!bookiesA.get(i).equals(bookiesB.get(i))) { + differ++; + } + } + return differ; + } + + @Override + public void updateBookieInfo(Map bookieInfoMap) { + if (!isWeighted) { + LOG.info("bookieFreeDiskInfo callback called even without weighted placement policy being used."); + return; + } + rwLock.writeLock().lock(); + try { + List allBookies = new ArrayList(knownBookies.values()); + // create a new map to reflect the new mapping + Map map = new HashMap(); + for (BookieNode bookie : allBookies) { + if (bookieInfoMap.containsKey(bookie.getAddr())) { + map.put(bookie, bookieInfoMap.get(bookie.getAddr())); + } else { + map.put(bookie, new BookieInfo()); + } + } + this.bookieInfoMap = map; + this.weightedSelection.updateMap(this.bookieInfoMap); + } finally { + rwLock.writeLock().unlock(); + } + } + + protected BookieNode createBookieNode(BookieId addr) { + return new BookieNode(addr, resolveNetworkLocation(addr)); + } + + protected BookieNode createDummyLocalBookieNode(String hostname) { + return new BookieNode(BookieSocketAddress.createDummyBookieIdForHostname(hostname), + NetUtils.resolveNetworkLocation(dnsResolver, new BookieSocketAddress(hostname, 0))); + } + + protected String resolveNetworkLocation(BookieId addr) { + try { + return NetUtils.resolveNetworkLocation(dnsResolver, bookieAddressResolver.resolve(addr)); + } catch (BookieAddressResolver.BookieIdNotResolvedException err) { + BookieNode historyBookie = historyBookies.get(addr); + if (null != historyBookie) { + return historyBookie.getNetworkLocation(); + } + String defaultRack = getDefaultRack(); + LOG.error("Cannot resolve bookieId {} to a network address, resolving as {}. {}", addr, + defaultRack, err.getMessage()); + return defaultRack; + } + } + + protected String getDefaultRack() { + return NetworkTopology.DEFAULT_REGION_AND_RACK; + } + + protected Set convertBookiesToNodes(Collection bookies) { + Set nodes = new HashSet(); + for (BookieId addr : bookies) { + nodes.add(convertBookieToNode(addr)); + } + return nodes; + } + + protected BookieNode convertBookieToNode(BookieId addr) { + BookieNode bn = knownBookies.get(addr); + if (null == bn) { + bn = createBookieNode(addr); + } + return bn; + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/TryReadLastConfirmedOp.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/TryReadLastConfirmedOp.java index 1311d316c86..fb01843e64b 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/TryReadLastConfirmedOp.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/TryReadLastConfirmedOp.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -19,9 +19,8 @@ import io.netty.buffer.ByteBuf; import java.util.List; - import org.apache.bookkeeper.client.ReadLastConfirmedOp.LastConfirmedDataCallback; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookieClient; import org.apache.bookkeeper.proto.BookieProtocol; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadEntryCallback; @@ -45,10 +44,10 @@ class TryReadLastConfirmedOp implements ReadEntryCallback { volatile boolean hasValidResponse = false; volatile boolean completed = false; RecoveryData maxRecoveredData; - final List currentEnsemble; + final List currentEnsemble; TryReadLastConfirmedOp(LedgerHandle lh, BookieClient bookieClient, - List ensemble, LastConfirmedDataCallback cb, long lac) { + List ensemble, LastConfirmedDataCallback cb, long lac) { this.lh = lh; this.bookieClient = bookieClient; this.cb = cb; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/UpdateLedgerOp.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/UpdateLedgerOp.java index befd4f37248..bcdb20464ad 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/UpdateLedgerOp.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/UpdateLedgerOp.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -19,7 +19,6 @@ package org.apache.bookkeeper.client; import com.google.common.util.concurrent.RateLimiter; - import java.io.IOException; import java.util.Collection; import java.util.Collections; @@ -30,14 +29,15 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; +import java.util.concurrent.Semaphore; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; - import org.apache.bookkeeper.bookie.BookieShell.UpdateLedgerNotifier; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.meta.LedgerManager; -import org.apache.bookkeeper.net.BookieSocketAddress; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallbackFuture; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.versioning.Versioned; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -73,8 +73,9 @@ public UpdateLedgerOp(final BookKeeper bkc, final BookKeeperAdmin admin) { * if there is an error when updating bookie id in ledger * metadata */ - public void updateBookieIdInLedgers(final BookieSocketAddress oldBookieId, final BookieSocketAddress newBookieId, - final int rate, final int limit, final UpdateLedgerNotifier progressable) + public void updateBookieIdInLedgers(final BookieId oldBookieId, final BookieId newBookieId, + final int rate, int maxOutstandingReads, final int limit, + final UpdateLedgerNotifier progressable) throws IOException, InterruptedException { final AtomicInteger issuedLedgerCnt = new AtomicInteger(); @@ -83,40 +84,39 @@ public void updateBookieIdInLedgers(final BookieSocketAddress oldBookieId, final final Set> outstanding = Collections.newSetFromMap(new ConcurrentHashMap, Boolean>()); final RateLimiter throttler = RateLimiter.create(rate); + final Semaphore outstandingReads = new Semaphore(maxOutstandingReads); final Iterator ledgerItr = admin.listLedgers().iterator(); // iterate through all the ledgers while (ledgerItr.hasNext() && !finalPromise.isDone() && (limit == Integer.MIN_VALUE || issuedLedgerCnt.get() < limit)) { - // throttler to control updates per second - throttler.acquire(); + // semaphore to control reads according to update throttling + outstandingReads.acquire(); final long ledgerId = ledgerItr.next(); issuedLedgerCnt.incrementAndGet(); - GenericCallbackFuture readPromise = new GenericCallbackFuture<>(); - lm.readLedgerMetadata(ledgerId, readPromise); - CompletableFuture writePromise = readPromise.thenCompose((readMetadata) -> { - AtomicReference ref = new AtomicReference<>(readMetadata); + CompletableFuture> writePromise = lm.readLedgerMetadata(ledgerId) + .thenCompose((readMetadata) -> { + AtomicReference> ref = new AtomicReference<>(readMetadata); return new MetadataUpdateLoop( lm, ledgerId, ref::get, (metadata) -> { - return metadata.getEnsembles().values().stream() + return metadata.getAllEnsembles().values().stream() .flatMap(Collection::stream) - .filter(b -> b.equals(oldBookieId)) - .count() > 0; + .anyMatch(b -> b.equals(oldBookieId)); }, (metadata) -> { return replaceBookieInEnsembles(metadata, oldBookieId, newBookieId); }, - ref::compareAndSet).run(); + ref::compareAndSet, throttler).run(); }); outstanding.add(writePromise); writePromise.whenComplete((metadata, ex) -> { if (ex != null - && !(ex instanceof BKException.BKNoSuchLedgerExistsException)) { + && !(ex instanceof BKException.BKNoSuchLedgerExistsOnMetadataServerException)) { String error = String.format("Failed to update ledger metadata %s, replacing %s with %s", ledgerId, oldBookieId, newBookieId); LOG.error(error, ex); @@ -128,6 +128,7 @@ public void updateBookieIdInLedgers(final BookieSocketAddress oldBookieId, final updatedLedgerCnt.incrementAndGet(); progressable.progress(updatedLedgerCnt.get(), issuedLedgerCnt.get()); } + outstandingReads.release(); outstanding.remove(writePromise); }); } @@ -158,11 +159,11 @@ public void updateBookieIdInLedgers(final BookieSocketAddress oldBookieId, final } private static LedgerMetadata replaceBookieInEnsembles(LedgerMetadata metadata, - BookieSocketAddress oldBookieId, - BookieSocketAddress newBookieId) { + BookieId oldBookieId, + BookieId newBookieId) { LedgerMetadataBuilder builder = LedgerMetadataBuilder.from(metadata); - for (Map.Entry> e : metadata.getEnsembles().entrySet()) { - List newEnsemble = e.getValue().stream() + for (Map.Entry> e : metadata.getAllEnsembles().entrySet()) { + List newEnsemble = e.getValue().stream() .map(b -> b.equals(oldBookieId) ? newBookieId : b) .collect(Collectors.toList()); builder.replaceEnsembleEntry(e.getKey(), newEnsemble); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/WeightedRandomSelection.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/WeightedRandomSelection.java index 040425075eb..8a44174a45d 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/WeightedRandomSelection.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/WeightedRandomSelection.java @@ -18,146 +18,19 @@ package org.apache.bookkeeper.client; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.List; +import java.util.Collection; import java.util.Map; -import java.util.TreeMap; -import java.util.concurrent.locks.ReadWriteLock; -import java.util.concurrent.locks.ReentrantReadWriteLock; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -class WeightedRandomSelection { - static final Logger LOG = LoggerFactory.getLogger(WeightedRandomSelection.class); +interface WeightedRandomSelection { interface WeightedObject { long getWeight(); } - Double randomMax; - int maxProbabilityMultiplier; - Map map; - TreeMap cummulativeMap = new TreeMap(); - ReadWriteLock rwLock = new ReentrantReadWriteLock(true); - - WeightedRandomSelection() { - maxProbabilityMultiplier = -1; - } - - WeightedRandomSelection(int maxMultiplier) { - this.maxProbabilityMultiplier = maxMultiplier; - } - - public void setMaxProbabilityMultiplier(int max) { - this.maxProbabilityMultiplier = max; - } - - void updateMap(Map map) { - // get the sum total of all the values; this will be used to - // calculate the weighted probability later on - Long totalWeight = 0L, min = Long.MAX_VALUE; - List values = new ArrayList(map.values()); - Collections.sort(values, new Comparator() { - public int compare(WeightedObject o1, WeightedObject o2) { - long diff = o1.getWeight() - o2.getWeight(); - if (diff < 0L) { - return -1; - } else if (diff > 0L) { - return 1; - } else { - return 0; - } - } - }); - for (int i = 0; i < values.size(); i++) { - totalWeight += values.get(i).getWeight(); - if (values.get(i).getWeight() != 0 && min > values.get(i).getWeight()) { - min = values.get(i).getWeight(); - } - } - - double median = 0; - if (totalWeight == 0) { - // all the values are zeros; assign a value of 1 to all and the totalWeight equal - // to the size of the values - min = 1L; - median = 1; - totalWeight = (long) values.size(); - } else { - int mid = values.size() / 2; - if ((values.size() % 2) == 1) { - median = values.get(mid).getWeight(); - } else { - median = (double) (values.get(mid - 1).getWeight() + values.get(mid).getWeight()) / 2; - } - } - - double medianWeight, minWeight; - medianWeight = median / (double) totalWeight; - minWeight = (double) min / totalWeight; - if (LOG.isDebugEnabled()) { - LOG.debug("Updating weights map. MediaWeight: {} MinWeight: {}", medianWeight, minWeight); - } + void updateMap(Map map); - double maxWeight = maxProbabilityMultiplier * medianWeight; - Map weightMap = new HashMap(); - for (Map.Entry e : map.entrySet()) { - double weightedProbability; - if (e.getValue().getWeight() > 0) { - weightedProbability = (double) e.getValue().getWeight() / (double) totalWeight; - } else { - weightedProbability = minWeight; - } - if (maxWeight > 0 && weightedProbability > maxWeight) { - weightedProbability = maxWeight; - if (LOG.isDebugEnabled()) { - LOG.debug("Capping the probability to {} for {} Value: {}", - weightedProbability, e.getKey(), e.getValue()); - } - } - weightMap.put(e.getKey(), weightedProbability); - } + T getNextRandom(); - // The probability of picking a bookie randomly is defaultPickProbability - // but we change that priority by looking at the weight that each bookie - // carries. - TreeMap tmpCummulativeMap = new TreeMap(); - Double key = 0.0; - for (Map.Entry e : weightMap.entrySet()) { - tmpCummulativeMap.put(key, e.getKey()); - if (LOG.isDebugEnabled()) { - LOG.debug("Key: {} Value: {} AssignedKey: {} AssignedWeight: {}", - e.getKey(), e.getValue(), key, e.getValue()); - } - key += e.getValue(); - } + T getNextRandom(Collection selectedNodes); - rwLock.writeLock().lock(); - try { - this.map = map; - cummulativeMap = tmpCummulativeMap; - randomMax = key; - } finally { - rwLock.writeLock().unlock(); - } - } - - T getNextRandom() { - rwLock.readLock().lock(); - try { - // pick a random number between 0 and randMax - Double randomNum = randomMax * Math.random(); - // find the nearest key in the map corresponding to the randomNum - Double key = cummulativeMap.floorKey(randomNum); - //LOG.info("Random max: {} CummulativeMap size: {} selected key: {}", randomMax, cummulativeMap.size(), - // key); - return cummulativeMap.get(key); - } finally { - rwLock.readLock().unlock(); - } - } + void setMaxProbabilityMultiplier(int max); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/WeightedRandomSelectionImpl.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/WeightedRandomSelectionImpl.java new file mode 100644 index 00000000000..1a2b9f0dcd5 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/WeightedRandomSelectionImpl.java @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.client; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class WeightedRandomSelectionImpl implements WeightedRandomSelection { + static final Logger LOG = LoggerFactory.getLogger(WeightedRandomSelectionImpl.class); + + Double randomMax; + int maxProbabilityMultiplier; + Map map; + TreeMap cumulativeMap = new TreeMap(); + ReadWriteLock rwLock = new ReentrantReadWriteLock(true); + + WeightedRandomSelectionImpl() { + maxProbabilityMultiplier = -1; + } + + WeightedRandomSelectionImpl(int maxMultiplier) { + this.maxProbabilityMultiplier = maxMultiplier; + } + + @Override + public void updateMap(Map map) { + // get the sum total of all the values; this will be used to + // calculate the weighted probability later on + Long totalWeight = 0L, min = Long.MAX_VALUE; + List values = new ArrayList(map.values()); + Collections.sort(values, new Comparator() { + @Override + public int compare(WeightedObject o1, WeightedObject o2) { + long diff = o1.getWeight() - o2.getWeight(); + if (diff < 0L) { + return -1; + } else if (diff > 0L) { + return 1; + } else { + return 0; + } + } + }); + for (int i = 0; i < values.size(); i++) { + totalWeight += values.get(i).getWeight(); + if (values.get(i).getWeight() != 0 && min > values.get(i).getWeight()) { + min = values.get(i).getWeight(); + } + } + + double median = 0; + if (totalWeight == 0) { + // all the values are zeros; assign a value of 1 to all and the totalWeight equal + // to the size of the values + min = 1L; + median = 1; + totalWeight = (long) values.size(); + } else { + int mid = values.size() / 2; + if ((values.size() % 2) == 1) { + median = values.get(mid).getWeight(); + } else { + median = (double) (values.get(mid - 1).getWeight() + values.get(mid).getWeight()) / 2; + } + } + + double medianWeight, minWeight; + medianWeight = median / (double) totalWeight; + minWeight = (double) min / totalWeight; + + if (LOG.isDebugEnabled()) { + LOG.debug("Updating weights map. MediaWeight: {} MinWeight: {}", medianWeight, minWeight); + } + + double maxWeight = maxProbabilityMultiplier * medianWeight; + Map weightMap = new HashMap(); + for (Map.Entry e : map.entrySet()) { + double weightedProbability; + if (e.getValue().getWeight() > 0) { + weightedProbability = (double) e.getValue().getWeight() / (double) totalWeight; + } else { + weightedProbability = minWeight; + } + if (maxWeight > 0 && weightedProbability > maxWeight) { + weightedProbability = maxWeight; + if (LOG.isDebugEnabled()) { + LOG.debug("Capping the probability to {} for {} Value: {}", + weightedProbability, e.getKey(), e.getValue()); + } + } + weightMap.put(e.getKey(), weightedProbability); + } + + // The probability of picking a bookie randomly is defaultPickProbability + // but we change that priority by looking at the weight that each bookie + // carries. + TreeMap tmpCumulativeMap = new TreeMap(); + Double key = 0.0; + for (Map.Entry e : weightMap.entrySet()) { + tmpCumulativeMap.put(key, e.getKey()); + if (LOG.isDebugEnabled()) { + LOG.debug("Key: {} Value: {} AssignedKey: {} AssignedWeight: {}", + e.getKey(), e.getValue(), key, e.getValue()); + } + key += e.getValue(); + } + + rwLock.writeLock().lock(); + try { + this.map = map; + cumulativeMap = tmpCumulativeMap; + randomMax = key; + } finally { + rwLock.writeLock().unlock(); + } + } + + @Override + public T getNextRandom() { + rwLock.readLock().lock(); + try { + // pick a random number between 0 and randMax + Double randomNum = randomMax * Math.random(); + // find the nearest key in the map corresponding to the randomNum + Double key = cumulativeMap.floorKey(randomNum); + return cumulativeMap.get(key); + } finally { + rwLock.readLock().unlock(); + } + } + + @Override + public void setMaxProbabilityMultiplier(int max) { + this.maxProbabilityMultiplier = max; + } + + @Override + public T getNextRandom(Collection selectedNodes) { + throw new UnsupportedOperationException("getNextRandom is not implemented for WeightedRandomSelectionImpl"); + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ZoneawareEnsemblePlacementPolicy.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ZoneawareEnsemblePlacementPolicy.java new file mode 100644 index 00000000000..cf36c7d83fe --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ZoneawareEnsemblePlacementPolicy.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.client; + +import io.netty.util.HashedWheelTimer; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.feature.FeatureProvider; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieNode; +import org.apache.bookkeeper.net.DNSToSwitchMapping; +import org.apache.bookkeeper.proto.BookieAddressResolver; +import org.apache.bookkeeper.stats.StatsLogger; + +/** + * A placement policy implementation use zone information for placing ensembles. + * + * @see EnsemblePlacementPolicy + */ +public class ZoneawareEnsemblePlacementPolicy extends ZoneawareEnsemblePlacementPolicyImpl + implements ITopologyAwareEnsemblePlacementPolicy { + ZoneawareEnsemblePlacementPolicyImpl slave = null; + + public ZoneawareEnsemblePlacementPolicy() { + super(); + } + + @Override + public EnsemblePlacementPolicy initialize(ClientConfiguration conf, + Optional optionalDnsResolver, HashedWheelTimer timer, + FeatureProvider featureProvider, + StatsLogger statsLogger, BookieAddressResolver bookieAddressResolver) { + if (conf.getNetworkTopologyStabilizePeriodSeconds() > 0) { + ClientConfiguration confClone = new ClientConfiguration(conf); + confClone.setNetworkTopologyStabilizePeriodSeconds(0); + super.initialize(confClone, optionalDnsResolver, timer, featureProvider, + statsLogger, bookieAddressResolver); + slave = new ZoneawareEnsemblePlacementPolicyImpl(); + slave.initialize(conf, optionalDnsResolver, timer, featureProvider, statsLogger, bookieAddressResolver); + } else { + super.initialize(conf, optionalDnsResolver, timer, featureProvider, statsLogger, bookieAddressResolver); + slave = null; + } + return this; + } + + @Override + public void uninitalize() { + super.uninitalize(); + if (null != slave) { + slave.uninitalize(); + } + } + + @Override + public Set onClusterChanged(Set writableBookies, + Set readOnlyBookies) { + Set deadBookies = super.onClusterChanged(writableBookies, readOnlyBookies); + if (null != slave) { + deadBookies = slave.onClusterChanged(writableBookies, readOnlyBookies); + } + return deadBookies; + } + + @Override + public PlacementResult> newEnsemble(int ensembleSize, int writeQuorumSize, + int ackQuorumSize, Map customMetadata, Set excludeBookies) + throws BKException.BKNotEnoughBookiesException { + try { + return super.newEnsemble(ensembleSize, writeQuorumSize, ackQuorumSize, customMetadata, excludeBookies); + } catch (BKException.BKNotEnoughBookiesException bnebe) { + if (slave == null) { + throw bnebe; + } else { + return slave.newEnsemble(ensembleSize, writeQuorumSize, ackQuorumSize, customMetadata, excludeBookies); + } + } + } + + @Override + public PlacementResult replaceBookie(int ensembleSize, int writeQuorumSize, int ackQuorumSize, + Map customMetadata, List currentEnsemble, + BookieId bookieToReplace, Set excludeBookies) + throws BKException.BKNotEnoughBookiesException { + try { + return super.replaceBookie(ensembleSize, writeQuorumSize, ackQuorumSize, customMetadata, + currentEnsemble, bookieToReplace, excludeBookies); + } catch (BKException.BKNotEnoughBookiesException bnebe) { + if (slave == null) { + throw bnebe; + } else { + return slave.replaceBookie(ensembleSize, writeQuorumSize, ackQuorumSize, customMetadata, + currentEnsemble, bookieToReplace, excludeBookies); + } + } + } + + @Override + public void handleBookiesThatLeft(Set leftBookies) { + super.handleBookiesThatLeft(leftBookies); + if (null != slave) { + slave.handleBookiesThatLeft(leftBookies); + } + } + + @Override + public void handleBookiesThatJoined(Set joinedBookies) { + super.handleBookiesThatJoined(joinedBookies); + if (null != slave) { + slave.handleBookiesThatJoined(joinedBookies); + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ZoneawareEnsemblePlacementPolicyImpl.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ZoneawareEnsemblePlacementPolicyImpl.java new file mode 100644 index 00000000000..1ce04c4be31 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ZoneawareEnsemblePlacementPolicyImpl.java @@ -0,0 +1,1000 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.client; + +import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIES_JOINED; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIES_LEFT; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.FAILED_TO_RESOLVE_NETWORK_LOCATION_COUNT; +import static org.apache.bookkeeper.client.BookKeeperClientStats.NUM_WRITABLE_BOOKIES_IN_DEFAULT_FAULTDOMAIN; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import io.netty.util.HashedWheelTimer; +import java.io.IOException; +import java.net.InetAddress; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Optional; +import java.util.Random; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.ReentrantReadWriteLock; +import org.apache.bookkeeper.client.BKException.BKNotEnoughBookiesException; +import org.apache.bookkeeper.common.util.ReflectionUtils; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.conf.Configurable; +import org.apache.bookkeeper.feature.FeatureProvider; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieNode; +import org.apache.bookkeeper.net.DNSToSwitchMapping; +import org.apache.bookkeeper.net.NetworkTopology; +import org.apache.bookkeeper.net.NetworkTopologyImpl; +import org.apache.bookkeeper.net.Node; +import org.apache.bookkeeper.net.NodeBase; +import org.apache.bookkeeper.net.ScriptBasedMapping; +import org.apache.bookkeeper.net.StabilizeNetworkTopology; +import org.apache.bookkeeper.proto.BookieAddressResolver; +import org.apache.bookkeeper.stats.Counter; +import org.apache.bookkeeper.stats.Gauge; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.annotations.StatsDoc; +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Simple zoneaware ensemble placement policy. + */ +public class ZoneawareEnsemblePlacementPolicyImpl extends TopologyAwareEnsemblePlacementPolicy { + + static final Logger LOG = LoggerFactory.getLogger(ZoneawareEnsemblePlacementPolicyImpl.class); + + public static final String UNKNOWN_ZONE = "UnknownZone"; + /* + * this defaultFaultDomain is used as placeholder network location for + * bookies for which network location can't be resolved. In + * ZoneawareEnsemblePlacementPolicyImpl zone is the fault domain and upgrade + * domain is logical concept to enable parallel patching by bringing down + * all the bookies in the upgrade domain. + */ + private String defaultFaultDomain = NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN; + protected ZoneAwareNodeLocation unresolvedNodeLocation = new ZoneAwareNodeLocation( + NetworkTopology.DEFAULT_ZONE, NetworkTopology.DEFAULT_UPGRADEDOMAIN); + private final Random rand; + protected StatsLogger statsLogger = null; + // Use a loading cache so slow bookies are expired. Use entryId as values. + protected Cache slowBookies; + protected BookieNode myNode = null; + protected String myZone = null; + protected boolean reorderReadsRandom = false; + protected int stabilizePeriodSeconds = 0; + protected int reorderThresholdPendingRequests = 0; + protected int maxWeightMultiple; + protected int minNumZonesPerWriteQuorum; + protected int desiredNumZonesPerWriteQuorum; + protected boolean enforceStrictZoneawarePlacement; + protected HashedWheelTimer timer; + protected final ConcurrentMap address2NodePlacement; + + @StatsDoc(name = FAILED_TO_RESOLVE_NETWORK_LOCATION_COUNT, help = "Counter for number of times" + + " DNSResolverDecorator failed to resolve Network Location") + protected Counter failedToResolveNetworkLocationCounter = null; + @StatsDoc(name = NUM_WRITABLE_BOOKIES_IN_DEFAULT_FAULTDOMAIN, help = "Gauge for the number of writable" + + " Bookies in default fault domain") + protected Gauge numWritableBookiesInDefaultFaultDomain; + + /** + * Zone and UpgradeDomain pair of a node. + */ + public static class ZoneAwareNodeLocation { + private final String zone; + private final String upgradeDomain; + private final String repString; + + public ZoneAwareNodeLocation(String zone, String upgradeDomain) { + this.zone = zone; + this.upgradeDomain = upgradeDomain; + repString = zone + upgradeDomain; + } + + public String getZone() { + return zone; + } + + public String getUpgradeDomain() { + return upgradeDomain; + } + + @Override + public int hashCode() { + return repString.hashCode(); + } + + @Override + public boolean equals(Object obj) { + return ((obj instanceof ZoneAwareNodeLocation) + && repString.equals(((ZoneAwareNodeLocation) obj).repString)); + } + } + + + ZoneawareEnsemblePlacementPolicyImpl() { + super(); + address2NodePlacement = new ConcurrentHashMap(); + rand = new Random(System.currentTimeMillis()); + } + + protected ZoneAwareNodeLocation getZoneAwareNodeLocation(BookieId addr) { + ZoneAwareNodeLocation nodeLocation = address2NodePlacement.get(addr); + if (null == nodeLocation) { + String networkLocation = resolveNetworkLocation(addr); + if (getDefaultFaultDomain().equals(networkLocation)) { + nodeLocation = unresolvedNodeLocation; + } else { + String[] parts = StringUtils.split(NodeBase.normalize(networkLocation), NodeBase.PATH_SEPARATOR); + if (parts.length != 2) { + nodeLocation = unresolvedNodeLocation; + } else { + nodeLocation = new ZoneAwareNodeLocation(NodeBase.PATH_SEPARATOR_STR + parts[0], + NodeBase.PATH_SEPARATOR_STR + parts[1]); + } + } + address2NodePlacement.putIfAbsent(addr, nodeLocation); + } + return nodeLocation; + } + + protected ZoneAwareNodeLocation getZoneAwareNodeLocation(BookieNode node) { + if (null == node || null == node.getAddr()) { + return unresolvedNodeLocation; + } + return getZoneAwareNodeLocation(node.getAddr()); + } + + @Override + public EnsemblePlacementPolicy initialize(ClientConfiguration conf, + Optional optionalDnsResolver, HashedWheelTimer timer, FeatureProvider featureProvider, + StatsLogger statsLogger, BookieAddressResolver bookieAddressResolver) { + this.statsLogger = statsLogger; + this.bookieAddressResolver = bookieAddressResolver; + this.timer = timer; + this.bookiesJoinedCounter = statsLogger.getOpStatsLogger(BOOKIES_JOINED); + this.bookiesLeftCounter = statsLogger.getOpStatsLogger(BOOKIES_LEFT); + this.failedToResolveNetworkLocationCounter = statsLogger.getCounter(FAILED_TO_RESOLVE_NETWORK_LOCATION_COUNT); + this.numWritableBookiesInDefaultFaultDomain = new Gauge() { + @Override + public Integer getDefaultValue() { + return 0; + } + + @Override + public Integer getSample() { + rwLock.readLock().lock(); + try { + return topology.countNumOfAvailableNodes(getDefaultFaultDomain(), Collections.emptySet()); + } finally { + rwLock.readLock().unlock(); + } + } + }; + this.statsLogger.registerGauge(NUM_WRITABLE_BOOKIES_IN_DEFAULT_FAULTDOMAIN, + numWritableBookiesInDefaultFaultDomain); + this.reorderThresholdPendingRequests = conf.getReorderThresholdPendingRequests(); + this.isWeighted = conf.getDiskWeightBasedPlacementEnabled(); + if (this.isWeighted) { + this.maxWeightMultiple = conf.getBookieMaxWeightMultipleForWeightBasedPlacement(); + this.weightedSelection = new DynamicWeightedRandomSelectionImpl(this.maxWeightMultiple); + LOG.info("Weight based placement with max multiple of {}", this.maxWeightMultiple); + } else { + LOG.info("Not weighted"); + } + this.minNumZonesPerWriteQuorum = conf.getMinNumZonesPerWriteQuorum(); + this.desiredNumZonesPerWriteQuorum = conf.getDesiredNumZonesPerWriteQuorum(); + this.enforceStrictZoneawarePlacement = conf.getEnforceStrictZoneawarePlacement(); + if (minNumZonesPerWriteQuorum > desiredNumZonesPerWriteQuorum) { + LOG.error( + "It is misconfigured, for ZoneawareEnsemblePlacementPolicy, minNumZonesPerWriteQuorum: {} cann't be" + + " greater than desiredNumZonesPerWriteQuorum: {}", + minNumZonesPerWriteQuorum, desiredNumZonesPerWriteQuorum); + throw new IllegalArgumentException("minNumZonesPerWriteQuorum: " + minNumZonesPerWriteQuorum + + " cann't be greater than desiredNumZonesPerWriteQuorum: " + desiredNumZonesPerWriteQuorum); + } + DNSToSwitchMapping actualDNSResolver; + if (optionalDnsResolver.isPresent()) { + actualDNSResolver = optionalDnsResolver.get(); + } else { + String dnsResolverName = conf.getString(REPP_DNS_RESOLVER_CLASS, ScriptBasedMapping.class.getName()); + actualDNSResolver = ReflectionUtils.newInstance(dnsResolverName, DNSToSwitchMapping.class); + actualDNSResolver.setBookieAddressResolver(bookieAddressResolver); + if (actualDNSResolver instanceof Configurable) { + ((Configurable) actualDNSResolver).setConf(conf); + } + } + + this.dnsResolver = new DNSResolverDecorator(actualDNSResolver, () -> this.getDefaultFaultDomain(), + failedToResolveNetworkLocationCounter); + dnsResolver.setBookieAddressResolver(bookieAddressResolver); + this.stabilizePeriodSeconds = conf.getNetworkTopologyStabilizePeriodSeconds(); + // create the network topology + if (stabilizePeriodSeconds > 0) { + this.topology = new StabilizeNetworkTopology(timer, stabilizePeriodSeconds); + } else { + this.topology = new NetworkTopologyImpl(); + } + try { + myNode = createDummyLocalBookieNode(InetAddress.getLocalHost().getHostAddress()); + myZone = getZoneAwareNodeLocation(myNode).getZone(); + } catch (IOException e) { + LOG.error("Failed to get local host address : ", e); + throw new RuntimeException(e); + } + LOG.info("Initialized zoneaware ensemble placement policy @ {} @ {} : {}.", myNode, + myNode.getNetworkLocation(), dnsResolver.getClass().getName()); + + slowBookies = CacheBuilder.newBuilder() + .expireAfterWrite(conf.getBookieFailureHistoryExpirationMSec(), TimeUnit.MILLISECONDS) + .build(new CacheLoader() { + @Override + public Long load(BookieId key) throws Exception { + return -1L; + } + }); + return this; + } + + public ZoneawareEnsemblePlacementPolicyImpl withDefaultFaultDomain(String defaultFaultDomain) { + checkNotNull(defaultFaultDomain, "Default fault domain cannot be null"); + + String[] parts = StringUtils.split(NodeBase.normalize(defaultFaultDomain), NodeBase.PATH_SEPARATOR); + if (parts.length != 2) { + LOG.error("provided defaultFaultDomain: {} is not valid", defaultFaultDomain); + throw new IllegalArgumentException("invalid defaultFaultDomain"); + } else { + unresolvedNodeLocation = new ZoneAwareNodeLocation(NodeBase.PATH_SEPARATOR_STR + parts[0], + NodeBase.PATH_SEPARATOR_STR + parts[1]); + } + + this.defaultFaultDomain = defaultFaultDomain; + return this; + } + + public String getDefaultFaultDomain() { + return defaultFaultDomain; + } + + @Override + public PlacementResult> newEnsemble(int ensembleSize, int writeQuorumSize, + int ackQuorumSize, Set excludeBookies, + org.apache.bookkeeper.client.ITopologyAwareEnsemblePlacementPolicy.Ensemble parentEnsemble, + org.apache.bookkeeper.client.ITopologyAwareEnsemblePlacementPolicy.Predicate parentPredicate) + throws BKNotEnoughBookiesException { + throw new UnsupportedOperationException( + "newEnsemble method with parentEnsemble and parentPredicate is not supported for " + + "ZoneawareEnsemblePlacementPolicyImpl"); + } + + @Override + public BookieNode selectFromNetworkLocation(String networkLoc, Set excludeBookies, + org.apache.bookkeeper.client.ITopologyAwareEnsemblePlacementPolicy.Predicate predicate, + org.apache.bookkeeper.client.ITopologyAwareEnsemblePlacementPolicy.Ensemble ensemble, + boolean fallbackToRandom) throws BKNotEnoughBookiesException { + throw new UnsupportedOperationException( + "selectFromNetworkLocation is not supported for ZoneawareEnsemblePlacementPolicyImpl"); + } + + @Override + public BookieNode selectFromNetworkLocation(Set excludeRacks, Set excludeBookies, + org.apache.bookkeeper.client.ITopologyAwareEnsemblePlacementPolicy.Predicate predicate, + org.apache.bookkeeper.client.ITopologyAwareEnsemblePlacementPolicy.Ensemble ensemble, + boolean fallbackToRandom) throws BKNotEnoughBookiesException { + throw new UnsupportedOperationException( + "selectFromNetworkLocation is not supported for ZoneawareEnsemblePlacementPolicyImpl"); + } + + @Override + public BookieNode selectFromNetworkLocation(String networkLoc, Set excludeRacks, Set excludeBookies, + org.apache.bookkeeper.client.ITopologyAwareEnsemblePlacementPolicy.Predicate predicate, + org.apache.bookkeeper.client.ITopologyAwareEnsemblePlacementPolicy.Ensemble ensemble, + boolean fallbackToRandom) throws BKNotEnoughBookiesException { + throw new UnsupportedOperationException( + "selectFromNetworkLocation is not supported for ZoneawareEnsemblePlacementPolicyImpl"); + } + + @Override + public void uninitalize() { + } + + @Override + public PlacementResult> newEnsemble(int ensembleSize, int writeQuorumSize, + int ackQuorumSize, Map customMetadata, Set excludeBookies) + throws BKNotEnoughBookiesException { + if (enforceStrictZoneawarePlacement) { + if (ensembleSize % writeQuorumSize != 0) { + /* + * if ensembleSize is not multiple of writeQuorumSize, then the + * write quorums which are wrapped will have bookies from just + * minNumberOfZones though bookies are available from + * desiredNumZones. + * + * lets say for example - desiredZones = 3, minZones = 2, + * ensembleSize = 5, writeQuorumSize = 3, ackQuorumSize = 2 + * + * z1, z2, z3, z1, z2 is a legal ensemble. (lets assume here z1 + * represents a node belonging to zone z1) + * + * the writeQuorum for entry 3 will be z1, z2 and z1, since + * ackQuorumSize is 2, an entry could be written just to two + * bookies that belong to z1. If the zone z1 goes down then the + * entry could potentially be unavailable until the zone z1 has + * come back. + * + * Also, it is not ideal to allow combination which fallsback to + * minZones, when bookies are available from desiredNumZones. + * + * So prohibiting this combination of configuration. + */ + LOG.error("It is illegal for ensembleSize to be not multiple of" + + " writeQuorumSize When StrictZoneawarePlacement is enabled"); + throw new IllegalArgumentException("It is illegal for ensembleSize to be not multiple of" + + " writeQuorumSize When StrictZoneawarePlacement is enabled"); + } + if (writeQuorumSize <= minNumZonesPerWriteQuorum) { + /* + * if we allow writeQuorumSize <= minNumZonesPerWriteQuorum, + * then replaceBookie may fail to find a candidate to replace a + * node when a zone goes down. + * + * lets say for example - desiredZones = 3, minZones = 2, + * ensembleSize = 6, writeQuorumSize = 2, ackQuorumSize = 2 + * + * z1, z2, z3, z1, z2, z3 is a legal ensemble. (lets assume here + * z1 represents a node belonging to zone z1) + * + * Now if Zone z2 goes down, you need to replace Index 1 and 4. + * To replace index 1, you need to find a zone that is not z1 + * and Z3 which is not possible. + * + * So prohibiting this combination of configuration. + */ + LOG.error("It is illegal for writeQuorumSize to be lesser than or equal" + + " to minNumZonesPerWriteQuorum When StrictZoneawarePlacement is enabled"); + throw new IllegalArgumentException("It is illegal for writeQuorumSize to be lesser than or equal" + + " to minNumZonesPerWriteQuorum When StrictZoneawarePlacement is enabled"); + } + } + int desiredNumZonesPerWriteQuorumForThisEnsemble = Math.min(writeQuorumSize, desiredNumZonesPerWriteQuorum); + List newEnsemble = new ArrayList( + Collections.nCopies(ensembleSize, null)); + rwLock.readLock().lock(); + try { + if (!enforceStrictZoneawarePlacement) { + return createNewEnsembleRandomly(newEnsemble, writeQuorumSize, ackQuorumSize, customMetadata, + excludeBookies); + } + Set comprehensiveExclusionBookiesSet = addDefaultFaultDomainBookies(excludeBookies); + for (int index = 0; index < ensembleSize; index++) { + BookieId selectedBookie = setBookieInTheEnsemble(ensembleSize, writeQuorumSize, newEnsemble, + newEnsemble, index, desiredNumZonesPerWriteQuorumForThisEnsemble, + comprehensiveExclusionBookiesSet); + comprehensiveExclusionBookiesSet.add(selectedBookie); + } + return PlacementResult.of(newEnsemble, + isEnsembleAdheringToPlacementPolicy(newEnsemble, writeQuorumSize, ackQuorumSize)); + } finally { + rwLock.readLock().unlock(); + } + } + + @Override + public PlacementResult replaceBookie(int ensembleSize, int writeQuorumSize, int ackQuorumSize, + Map customMetadata, List currentEnsemble, + BookieId bookieToReplace, Set excludeBookies) + throws BKNotEnoughBookiesException { + int bookieToReplaceIndex = currentEnsemble.indexOf(bookieToReplace); + int desiredNumZonesPerWriteQuorumForThisEnsemble = (writeQuorumSize < desiredNumZonesPerWriteQuorum) + ? writeQuorumSize : desiredNumZonesPerWriteQuorum; + List newEnsemble = new ArrayList(currentEnsemble); + rwLock.readLock().lock(); + try { + if (!enforceStrictZoneawarePlacement) { + return selectBookieRandomly(newEnsemble, bookieToReplace, excludeBookies, writeQuorumSize, + ackQuorumSize); + } + Set comprehensiveExclusionBookiesSet = addDefaultFaultDomainBookies(excludeBookies); + comprehensiveExclusionBookiesSet.addAll(currentEnsemble); + BookieId candidateAddr = setBookieInTheEnsemble(ensembleSize, writeQuorumSize, currentEnsemble, + newEnsemble, bookieToReplaceIndex, desiredNumZonesPerWriteQuorumForThisEnsemble, + comprehensiveExclusionBookiesSet); + return PlacementResult.of(candidateAddr, + isEnsembleAdheringToPlacementPolicy(newEnsemble, writeQuorumSize, ackQuorumSize)); + } finally { + rwLock.readLock().unlock(); + } + } + + private PlacementResult> createNewEnsembleRandomly(List newEnsemble, + int writeQuorumSize, int ackQuorumSize, Map customMetadata, + Set excludeBookies) throws BKNotEnoughBookiesException { + int ensembleSize = newEnsemble.size(); + Set bookiesToConsider = getBookiesToConsider(excludeBookies); + if (bookiesToConsider.size() < newEnsemble.size()) { + LOG.error("Not enough bookies are available to form ensemble of size: {}", newEnsemble.size()); + throw new BKNotEnoughBookiesException(); + } + + for (int i = 0; i < ensembleSize; i++) { + BookieNode candidateNode = selectCandidateNode(bookiesToConsider); + newEnsemble.set(i, candidateNode.getAddr()); + bookiesToConsider.remove(candidateNode); + } + return PlacementResult.of(newEnsemble, + isEnsembleAdheringToPlacementPolicy(newEnsemble, writeQuorumSize, ackQuorumSize)); + } + + private PlacementResult selectBookieRandomly(List newEnsemble, + BookieId bookieToReplace, Set excludeBookies, int writeQuorumSize, + int ackQuorumSize) throws BKNotEnoughBookiesException { + Set bookiesToExcludeIncludingEnsemble = new HashSet(excludeBookies); + bookiesToExcludeIncludingEnsemble.addAll(newEnsemble); + Set bookiesToConsider = getBookiesToConsider(bookiesToExcludeIncludingEnsemble); + int bookieToReplaceIndex = newEnsemble.indexOf(bookieToReplace); + + if (bookiesToConsider.isEmpty()) { + LOG.error("There is no bookie available to replace a bookie"); + throw new BKNotEnoughBookiesException(); + } + BookieId candidateAddr = (selectCandidateNode(bookiesToConsider)).getAddr(); + newEnsemble.set(bookieToReplaceIndex, candidateAddr); + return PlacementResult.of(candidateAddr, + isEnsembleAdheringToPlacementPolicy(newEnsemble, writeQuorumSize, ackQuorumSize)); + } + + private Set getBookiesToConsider(Set excludeBookies) { + Set leaves = topology.getLeaves(NodeBase.ROOT); + Set bookiesToConsider = new HashSet(); + BookieNode bookieNode; + for (Node leaf : leaves) { + if (leaf instanceof BookieNode) { + bookieNode = ((BookieNode) leaf); + if (excludeBookies.contains(bookieNode.getAddr())) { + continue; + } + bookiesToConsider.add(bookieNode); + } + } + return bookiesToConsider; + } + + /* + * This method finds the appropriate bookie for newEnsemble by finding + * bookie to replace at bookieToReplaceIndex in the currentEnsemble. + * + * It goes through following filtering process 1) Exclude zones of + * desiredNumZonesPerWriteQuorumForThisEnsemble neighboring nodes 2) Find + * bookies to consider by excluding zones (found from previous step) and + * excluding UDs of the zones to consider. 3) If it can't find eligible + * bookie, then keep reducing the number of neighboring nodes to + * minNumZonesPerWriteQuorum and repeat step 2. 4) If it still can't find + * eligible bookies then find the zones to exclude such that in a writeset + * there will be bookies from atleast minNumZonesPerWriteQuorum zones and + * repeat step 2 5) After getting the list of eligible candidates select a + * node randomly. 6) If step-4 couldn't find eligible candidates then throw + * BKNotEnoughBookiesException. + * + * Example: Ensemble:6 Qw:6 desiredNumZonesPerWriteQuorumForThisEnsemble:3 + * minNumZonesPerWriteQuorum:2 The selection process is as follows: + * + * 1) Find bookies by excluding zones of + * (desiredNumZonesPerWriteQuorumForThisEnsemble -1) neighboring bookies on + * the left and and the right side of the bookieToReplaceIndex. i.e Zones of + * 2 bookies(3-1) on both sides of the index in question will be excluded to + * find bookies. 2) Get the set of zones of the bookies selected above. 3) + * Get the UpgradeDomains to exclude of the each zone selected above to make + * sure bookies of writeSets containing bookieToReplaceIndex are from + * different UD if they belong to same zone. 4) now from the zones selected + * in step 2, apply the filter of UDs to exclude found in previous step and + * get the eligible bookies. 5) If no bookie matches this filter, then + * instead of aiming for unique UDs, fallback to UDs to exclude such that if + * bookies are from same zone in the writeSets containing + * bookieToReplaceIndex then they must be atleast from 2 different UDs. 6) + * now from the zones selected in step 2, apply the filter of UDs to exclude + * found in previous step and get the eligible bookies. 7) If no bookie + * matches this filter, repeat from Step1 to Step6 by decreasing neighboring + * exclude zones from (desiredNumZonesPerWriteQuorumForThisEnsemble - 1), + * which is 2 to (minNumZonesPerWriteQuorum - 1), which is 1 8) If even + * after this, bookies are not found matching the criteria fallback to + * minNumZonesPerWriteQuorum, for this find the zones to exclude such that + * in writesets containing this bookieToReplaceIndex there will be bookies + * from atleast minNumZonesPerWriteQuorum zones, which is 2. 9) Get the set + * of the zones of the bookies by excluding zones selected above. 10) repeat + * Step3 to Step6. 11) After getting the list of eligible candidates select + * a node randomly. 12) If even after Step10 there are no eligible + * candidates then throw BKNotEnoughBookiesException. + */ + private BookieId setBookieInTheEnsemble(int ensembleSize, int writeQuorumSize, + List currentEnsemble, List newEnsemble, int bookieToReplaceIndex, + int desiredNumZonesPerWriteQuorumForThisEnsemble, Set excludeBookies) + throws BKNotEnoughBookiesException { + BookieId bookieToReplace = currentEnsemble.get(bookieToReplaceIndex); + Set zonesToExclude = null; + Set bookiesToConsiderAfterExcludingZonesAndUDs = null; + for (int numberOfNeighborsToConsider = (desiredNumZonesPerWriteQuorumForThisEnsemble + - 1); numberOfNeighborsToConsider >= (minNumZonesPerWriteQuorum - 1); numberOfNeighborsToConsider--) { + zonesToExclude = getZonesOfNeighboringNodesInEnsemble(currentEnsemble, bookieToReplaceIndex, + (numberOfNeighborsToConsider)); + bookiesToConsiderAfterExcludingZonesAndUDs = getBookiesToConsiderAfterExcludingZonesAndUDs(ensembleSize, + writeQuorumSize, currentEnsemble, bookieToReplaceIndex, excludeBookies, zonesToExclude); + if (!bookiesToConsiderAfterExcludingZonesAndUDs.isEmpty()) { + break; + } + } + if (bookiesToConsiderAfterExcludingZonesAndUDs.isEmpty()) { + zonesToExclude = getZonesToExcludeToMaintainMinZones(currentEnsemble, bookieToReplaceIndex, + writeQuorumSize); + bookiesToConsiderAfterExcludingZonesAndUDs = getBookiesToConsiderAfterExcludingZonesAndUDs(ensembleSize, + writeQuorumSize, currentEnsemble, bookieToReplaceIndex, excludeBookies, zonesToExclude); + } + if (bookiesToConsiderAfterExcludingZonesAndUDs.isEmpty()) { + LOG.error("Not enough bookies are available to replaceBookie : {} in ensemble : {} with excludeBookies {}.", + bookieToReplace, currentEnsemble, excludeBookies); + throw new BKNotEnoughBookiesException(); + } + + BookieId candidateAddr = selectCandidateNode(bookiesToConsiderAfterExcludingZonesAndUDs).getAddr(); + newEnsemble.set(bookieToReplaceIndex, candidateAddr); + return candidateAddr; + } + + /* + * this method should be called in readlock scope of 'rwLock'. This method + * returns a new set, by adding excludedBookies and bookies in + * defaultfaultdomain. + */ + protected Set addDefaultFaultDomainBookies(Set excludeBookies) { + Set comprehensiveExclusionBookiesSet = new HashSet(excludeBookies); + Set defaultFaultDomainLeaves = topology.getLeaves(getDefaultFaultDomain()); + for (Node node : defaultFaultDomainLeaves) { + if (node instanceof BookieNode) { + comprehensiveExclusionBookiesSet.add(((BookieNode) node).getAddr()); + } else { + LOG.error("found non-BookieNode: {} as leaf of defaultFaultDomain: {}", node, getDefaultFaultDomain()); + } + } + return comprehensiveExclusionBookiesSet; + } + + /* + * Select bookie randomly from the bookiesToConsiderAfterExcludingUDs set. + * If diskWeightBasedPlacement is enabled then it will select node randomly + * based on node weight. + */ + private BookieNode selectCandidateNode(Set bookiesToConsiderAfterExcludingUDs) { + BookieNode candidate = null; + if (!this.isWeighted) { + int randSelIndex = rand.nextInt(bookiesToConsiderAfterExcludingUDs.size()); + int ind = 0; + for (BookieNode bookieNode : bookiesToConsiderAfterExcludingUDs) { + if (ind == randSelIndex) { + candidate = bookieNode; + break; + } + ind++; + } + } else { + candidate = weightedSelection.getNextRandom(bookiesToConsiderAfterExcludingUDs); + } + return candidate; + } + + private String getExcludedZonesString(Set excludeZones) { + if (excludeZones.isEmpty()) { + return ""; + } + StringBuilder excludedZonesString = new StringBuilder(NetworkTopologyImpl.INVERSE); + boolean firstZone = true; + for (String excludeZone : excludeZones) { + if (!firstZone) { + excludedZonesString.append(NetworkTopologyImpl.NODE_SEPARATOR); + } + excludedZonesString.append(excludeZone); + firstZone = false; + } + return excludedZonesString.toString(); + } + + private Set getBookiesToConsider(String excludedZonesString, Set excludeBookies) { + Set bookiesToConsider = new HashSet(); + Set leaves = topology.getLeaves(excludedZonesString); + for (Node leaf : leaves) { + BookieNode bookieNode = ((BookieNode) leaf); + if (excludeBookies.contains(bookieNode.getAddr())) { + continue; + } + bookiesToConsider.add(bookieNode); + } + return bookiesToConsider; + } + + /* + * For the position of 'bookieToReplaceIndex' in currentEnsemble, get the + * set of bookies eligible by excluding the 'excludeZones' and + * 'excludeBookies'. After excluding excludeZones and excludeBookies, it + * would first try to exclude upgrade domains of neighboring nodes + * (writeset) so the bookie would be from completely new upgrade domain + * of a zone, if a writeset contains bookie from the zone. If Bookie is + * not found matching this criteria, then it will fallback to maintain min + * upgrade domains (two) from a zone, such that if multiple bookies in a + * write quorum are from the same zone then they will be spread across two + * upgrade domains. + */ + private Set getBookiesToConsiderAfterExcludingZonesAndUDs(int ensembleSize, int writeQuorumSize, + List currentEnsemble, int bookieToReplaceIndex, + Set excludeBookies, Set excludeZones) { + Set bookiesToConsiderAfterExcludingZonesAndUDs = new HashSet(); + HashMap> excludingUDsOfZonesToConsider = new HashMap>(); + Set bookiesToConsiderAfterExcludingZones = getBookiesToConsider( + getExcludedZonesString(excludeZones), excludeBookies); + + if (!bookiesToConsiderAfterExcludingZones.isEmpty()) { + Set zonesToConsider = getZonesOfBookies(bookiesToConsiderAfterExcludingZones); + for (String zoneToConsider : zonesToConsider) { + Set upgradeDomainsOfAZoneInNeighboringNodes = getUpgradeDomainsOfAZoneInNeighboringNodes( + currentEnsemble, bookieToReplaceIndex, writeQuorumSize, zoneToConsider); + excludingUDsOfZonesToConsider.put(zoneToConsider, upgradeDomainsOfAZoneInNeighboringNodes); + } + + updateBookiesToConsiderAfterExcludingZonesAndUDs(bookiesToConsiderAfterExcludingZonesAndUDs, + bookiesToConsiderAfterExcludingZones, excludingUDsOfZonesToConsider); + + /* + * If no eligible bookie is found, then instead of aiming for unique + * UDs, fallback to UDs to exclude such that if bookies are from + * same zone in the writeSets containing bookieToReplaceIndex then + * they must be atleast from 2 different UDs + */ + if (bookiesToConsiderAfterExcludingZonesAndUDs.isEmpty()) { + excludingUDsOfZonesToConsider.clear(); + for (String zoneToConsider : zonesToConsider) { + Set udsToExcludeToMaintainMinUDsInWriteQuorums = + getUDsToExcludeToMaintainMinUDsInWriteQuorums(currentEnsemble, bookieToReplaceIndex, + writeQuorumSize, zoneToConsider); + excludingUDsOfZonesToConsider.put(zoneToConsider, udsToExcludeToMaintainMinUDsInWriteQuorums); + } + + updateBookiesToConsiderAfterExcludingZonesAndUDs(bookiesToConsiderAfterExcludingZonesAndUDs, + bookiesToConsiderAfterExcludingZones, excludingUDsOfZonesToConsider); + } + } + return bookiesToConsiderAfterExcludingZonesAndUDs; + } + + /* + * Filter bookies which belong to excludingUDs of zones to consider from + * 'bookiesToConsider' set and add them to + * 'bookiesToConsiderAfterExcludingUDs' set. + */ + private void updateBookiesToConsiderAfterExcludingZonesAndUDs(Set bookiesToConsiderAfterExcludingUDs, + Set bookiesToConsider, HashMap> excludingUDsOfZonesToConsider) { + for (BookieNode bookieToConsider : bookiesToConsider) { + ZoneAwareNodeLocation nodeLocation = getZoneAwareNodeLocation(bookieToConsider); + if (excludingUDsOfZonesToConsider.get(nodeLocation.getZone()).contains(nodeLocation.getUpgradeDomain())) { + continue; + } + bookiesToConsiderAfterExcludingUDs.add(bookieToConsider); + } + } + + /* + * Gets the set of zones of neighboring nodes. + */ + private Set getZonesOfNeighboringNodesInEnsemble(List currentEnsemble, int indexOfNode, + int numOfNeighboringNodes) { + Set zonesOfNeighboringNodes = new HashSet(); + int ensembleSize = currentEnsemble.size(); + for (int i = (-1 * numOfNeighboringNodes); i <= numOfNeighboringNodes; i++) { + if (i == 0) { + continue; + } + int index = (indexOfNode + i + ensembleSize) % ensembleSize; + BookieId addrofNode = currentEnsemble.get(index); + if (addrofNode == null) { + continue; + } + String zoneOfNode = getZoneAwareNodeLocation(addrofNode).getZone(); + zonesOfNeighboringNodes.add(zoneOfNode); + } + return zonesOfNeighboringNodes; + } + + /* + * This method returns set of zones to exclude for the position of + * 'indexOfNode', so that writequorums, containing this index, would have + * atleast minNumZonesPerWriteQuorum. + */ + private Set getZonesToExcludeToMaintainMinZones(List currentEnsemble, int indexOfNode, + int writeQuorumSize) { + int ensSize = currentEnsemble.size(); + Set zonesToExclude = new HashSet(); + Set zonesInWriteQuorum = new HashSet(); + for (int i = -(writeQuorumSize - 1); i <= 0; i++) { + zonesInWriteQuorum.clear(); + for (int j = 0; j < writeQuorumSize; j++) { + int indexInEnsemble = (i + j + indexOfNode + ensSize) % ensSize; + if (indexInEnsemble == indexOfNode) { + continue; + } + BookieId bookieAddr = currentEnsemble.get(indexInEnsemble); + if (bookieAddr == null) { + continue; + } + ZoneAwareNodeLocation nodeLocation = getZoneAwareNodeLocation(bookieAddr); + zonesInWriteQuorum.add(nodeLocation.getZone()); + } + if (zonesInWriteQuorum.size() <= (minNumZonesPerWriteQuorum - 1)) { + zonesToExclude.addAll(zonesInWriteQuorum); + } + } + return zonesToExclude; + } + + private Set getZonesOfBookies(Collection bookieNodes) { + Set zonesOfBookies = new HashSet(); + for (BookieNode bookieNode : bookieNodes) { + ZoneAwareNodeLocation nodeLocation = getZoneAwareNodeLocation(bookieNode); + zonesOfBookies.add(nodeLocation.getZone()); + } + return zonesOfBookies; + } + + /* + * Gets the set of upgradedomains of neighboring nodes (writeQuorumSize) + * which belong to this 'zone'. + */ + private Set getUpgradeDomainsOfAZoneInNeighboringNodes(List currentEnsemble, + int indexOfNode, int writeQuorumSize, String zone) { + int ensSize = currentEnsemble.size(); + Set upgradeDomainsOfAZoneInNeighboringNodes = new HashSet(); + for (int i = -(writeQuorumSize - 1); i <= (writeQuorumSize - 1); i++) { + if (i == 0) { + continue; + } + int indexInEnsemble = (indexOfNode + i + ensSize) % ensSize; + BookieId bookieAddr = currentEnsemble.get(indexInEnsemble); + if (bookieAddr == null) { + continue; + } + ZoneAwareNodeLocation nodeLocation = getZoneAwareNodeLocation(bookieAddr); + if (nodeLocation.getZone().equals(zone)) { + upgradeDomainsOfAZoneInNeighboringNodes.add(nodeLocation.getUpgradeDomain()); + } + } + return upgradeDomainsOfAZoneInNeighboringNodes; + } + + /* + * This method returns set of UpgradeDomains to exclude if a bookie from + * the 'zone' has to be selected for the position of 'indexOfNode', then if + * there are multiple bookies from the 'zone' in a write quorum then they + * will be atleast from minimum of two upgrade domains. + */ + private Set getUDsToExcludeToMaintainMinUDsInWriteQuorums(List currentEnsemble, + int indexOfNode, int writeQuorumSize, String zone) { + int ensSize = currentEnsemble.size(); + Set upgradeDomainsToExclude = new HashSet(); + Set upgradeDomainsOfThisZoneInWriteQuorum = new HashSet(); + for (int i = -(writeQuorumSize - 1); i <= 0; i++) { + upgradeDomainsOfThisZoneInWriteQuorum.clear(); + for (int j = 0; j < writeQuorumSize; j++) { + int indexInEnsemble = (i + j + indexOfNode + ensSize) % ensSize; + if (indexInEnsemble == indexOfNode) { + continue; + } + BookieId bookieAddr = currentEnsemble.get(indexInEnsemble); + if (bookieAddr == null) { + continue; + } + ZoneAwareNodeLocation nodeLocation = getZoneAwareNodeLocation(bookieAddr); + if (nodeLocation.getZone().equals(zone)) { + upgradeDomainsOfThisZoneInWriteQuorum.add(nodeLocation.getUpgradeDomain()); + } + } + if (upgradeDomainsOfThisZoneInWriteQuorum.size() == 1) { + upgradeDomainsToExclude.addAll(upgradeDomainsOfThisZoneInWriteQuorum); + } + } + return upgradeDomainsToExclude; + } + + @Override + public void registerSlowBookie(BookieId bookieSocketAddress, long entryId) { + // TODO Auto-generated method stub + } + + @Override + public DistributionSchedule.WriteSet reorderReadSequence(List ensemble, + BookiesHealthInfo bookiesHealthInfo, DistributionSchedule.WriteSet writeSet) { + return writeSet; + } + + @Override + public DistributionSchedule.WriteSet reorderReadLACSequence(List ensemble, + BookiesHealthInfo bookiesHealthInfo, DistributionSchedule.WriteSet writeSet) { + DistributionSchedule.WriteSet retList = reorderReadSequence(ensemble, bookiesHealthInfo, writeSet); + retList.addMissingIndices(ensemble.size()); + return retList; + } + + /* + * In ZoneAwareEnsemblePlacementPolicy if bookies in the writeset are from + * 'desiredNumOfZones' then it is considered as MEETS_STRICT if they are + * from 'minNumOfZones' then it is considered as MEETS_SOFT otherwise + * considered as FAIL. Also in a writeset if there are multiple bookies from + * the same zone then they are expected to be from different upgrade + * domains. + */ + @Override + public PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy(List ensembleList, + int writeQuorumSize, int ackQuorumSize) { + if (CollectionUtils.isEmpty(ensembleList)) { + return PlacementPolicyAdherence.FAIL; + } + PlacementPolicyAdherence placementPolicyAdherence = PlacementPolicyAdherence.MEETS_STRICT; + rwLock.readLock().lock(); + try { + HashMap> bookiesLocationInWriteSet = new HashMap>(); + HashMap numOfBookiesInZones = new HashMap(); + BookieId bookieNode; + if (ensembleList.size() % writeQuorumSize != 0) { + placementPolicyAdherence = PlacementPolicyAdherence.FAIL; + if (LOG.isDebugEnabled()) { + LOG.debug( + "For ensemble: {}, ensembleSize: {} is not a multiple of writeQuorumSize: {}", + ensembleList, ensembleList.size(), writeQuorumSize); + } + return placementPolicyAdherence; + } + if (writeQuorumSize <= minNumZonesPerWriteQuorum) { + placementPolicyAdherence = PlacementPolicyAdherence.FAIL; + if (LOG.isDebugEnabled()) { + LOG.debug( + "For ensemble: {}, writeQuorumSize: {} is less than or equal to" + + " minNumZonesPerWriteQuorum: {}", + ensembleList, writeQuorumSize, minNumZonesPerWriteQuorum); + } + return placementPolicyAdherence; + } + int desiredNumZonesPerWriteQuorumForThisEnsemble = Math.min(writeQuorumSize, desiredNumZonesPerWriteQuorum); + for (int i = 0; i < ensembleList.size(); i++) { + bookiesLocationInWriteSet.clear(); + numOfBookiesInZones.clear(); + for (int j = 0; j < writeQuorumSize; j++) { + int indexOfNode = (i + j) % ensembleList.size(); + bookieNode = ensembleList.get(indexOfNode); + ZoneAwareNodeLocation nodeLocation = getZoneAwareNodeLocation(bookieNode); + if (nodeLocation.equals(unresolvedNodeLocation)) { + placementPolicyAdherence = PlacementPolicyAdherence.FAIL; + if (LOG.isDebugEnabled()) { + LOG.debug("ensemble: {}, contains bookie: {} for which network location is unresolvable", + ensembleList, bookieNode); + } + return placementPolicyAdherence; + } + String zone = nodeLocation.getZone(); + String upgradeDomain = nodeLocation.getUpgradeDomain(); + Set udsOfThisZoneInThisWriteSet = bookiesLocationInWriteSet.get(zone); + if (udsOfThisZoneInThisWriteSet == null) { + udsOfThisZoneInThisWriteSet = new HashSet(); + udsOfThisZoneInThisWriteSet.add(upgradeDomain); + bookiesLocationInWriteSet.put(zone, udsOfThisZoneInThisWriteSet); + numOfBookiesInZones.put(zone, 1); + } else { + udsOfThisZoneInThisWriteSet.add(upgradeDomain); + Integer numOfNodesInAZone = numOfBookiesInZones.get(zone); + numOfBookiesInZones.put(zone, (numOfNodesInAZone + 1)); + } + } + if (numOfBookiesInZones.entrySet().size() < minNumZonesPerWriteQuorum) { + placementPolicyAdherence = PlacementPolicyAdherence.FAIL; + if (LOG.isDebugEnabled()) { + LOG.debug("in ensemble: {}, writeset starting at: {} doesn't contain bookies from" + + " minNumZonesPerWriteQuorum: {}", ensembleList, i, minNumZonesPerWriteQuorum); + } + return placementPolicyAdherence; + } else if (numOfBookiesInZones.entrySet().size() >= desiredNumZonesPerWriteQuorumForThisEnsemble) { + if (!validateMinUDsAreMaintained(numOfBookiesInZones, bookiesLocationInWriteSet)) { + placementPolicyAdherence = PlacementPolicyAdherence.FAIL; + if (LOG.isDebugEnabled()) { + LOG.debug("in ensemble: {}, writeset starting at: {} doesn't maintain min of 2 UDs" + + " when there are multiple bookies from the same zone.", ensembleList, i); + } + return placementPolicyAdherence; + } + } else { + if (!validateMinUDsAreMaintained(numOfBookiesInZones, bookiesLocationInWriteSet)) { + placementPolicyAdherence = PlacementPolicyAdherence.FAIL; + if (LOG.isDebugEnabled()) { + LOG.debug("in ensemble: {}, writeset starting at: {} doesn't maintain min of 2 UDs" + + " when there are multiple bookies from the same zone.", ensembleList, i); + } + return placementPolicyAdherence; + } + if (placementPolicyAdherence == PlacementPolicyAdherence.MEETS_STRICT) { + placementPolicyAdherence = PlacementPolicyAdherence.MEETS_SOFT; + } + } + } + } finally { + rwLock.readLock().unlock(); + } + return placementPolicyAdherence; + } + + private boolean validateMinUDsAreMaintained(HashMap numOfNodesInZones, + HashMap> nodesLocationInWriteSet) { + for (Entry numOfNodesInZone : numOfNodesInZones.entrySet()) { + String zone = numOfNodesInZone.getKey(); + Integer numOfNodesInThisZone = numOfNodesInZone.getValue(); + if (numOfNodesInThisZone > 1) { + Set udsOfThisZone = nodesLocationInWriteSet.get(zone); + if (udsOfThisZone.size() < 2) { + return false; + } + } + } + return true; + } + + @Override + public boolean areAckedBookiesAdheringToPlacementPolicy(Set ackedBookies, int writeQuorumSize, + int ackQuorumSize) { + HashSet zonesOfAckedBookies = new HashSet<>(); + int minNumZonesPerWriteQuorumForThisEnsemble = Math.min(writeQuorumSize, minNumZonesPerWriteQuorum); + boolean areAckedBookiesAdheringToPlacementPolicy = false; + ReentrantReadWriteLock.ReadLock readLock = rwLock.readLock(); + readLock.lock(); + try { + for (BookieId ackedBookie : ackedBookies) { + zonesOfAckedBookies.add(getZoneAwareNodeLocation(ackedBookie).getZone()); + } + areAckedBookiesAdheringToPlacementPolicy = ((zonesOfAckedBookies + .size() >= minNumZonesPerWriteQuorumForThisEnsemble) && (ackedBookies.size() >= ackQuorumSize)); + if (LOG.isDebugEnabled()) { + LOG.debug( + "areAckedBookiesAdheringToPlacementPolicy returning {}, because number of ackedBookies = {}," + + " number of Zones of ackedbookies = {}," + + " number of minNumZonesPerWriteQuorumForThisEnsemble = {}", + areAckedBookiesAdheringToPlacementPolicy, ackedBookies.size(), zonesOfAckedBookies.size(), + minNumZonesPerWriteQuorumForThisEnsemble); + } + } finally { + readLock.unlock(); + } + return areAckedBookiesAdheringToPlacementPolicy; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/BKException.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/BKException.java index 09bb8f36098..8969efc29ee 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/BKException.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/BKException.java @@ -17,7 +17,6 @@ import java.lang.reflect.Field; import java.util.function.Function; - import org.apache.bookkeeper.client.LedgerHandleAdv; import org.apache.bookkeeper.common.annotation.InterfaceAudience.Public; import org.apache.bookkeeper.common.annotation.InterfaceStability.Unstable; @@ -59,6 +58,17 @@ public BKException(int code) { this.code = code; } + /** + * Create a new exception with the cause. + * + * @param code exception code + * @param cause the exception cause + */ + public BKException(int code, Throwable cause) { + super(getMessage(code), cause); + this.code = code; + } + /** * Get the return code for the exception. * @@ -105,7 +115,9 @@ public static String getMessage(int code) { case Code.NotEnoughBookiesException: return "Not enough non-faulty bookies available"; case Code.NoSuchLedgerExistsException: - return "No such ledger exists"; + return "No such ledger exists on Bookies"; + case Code.NoSuchLedgerExistsOnMetadataServerException: + return "No such ledger exists on Metadata Server"; case Code.BookieHandleNotAvailableException: return "Bookie handle is not available"; case Code.ZKException: @@ -156,6 +168,10 @@ public static String getMessage(int code) { return "Bookie operation timeout"; case Code.SecurityException: return "Failed to establish a secure connection"; + case Code.MetadataSerializationException: + return "Failed to serialize metadata"; + case Code.DataUnknownException: + return "Ledger in limbo"; default: return "Unexpected condition"; } @@ -229,6 +245,9 @@ public interface Code { int TimeoutException = -23; int SecurityException = -24; + /** No such ledger exists one metadata server. */ + int NoSuchLedgerExistsOnMetadataServerException = -25; + /** * Operation is illegal. */ @@ -260,6 +279,18 @@ public interface Code { */ int LedgerIdOverflowException = -106; + /** + * Failure to serialize metadata. + * + * @since 4.9 + */ + int MetadataSerializationException = -107; + + /** + * Operations failed due to ledger data in an unknown state. + */ + int DataUnknownException = -108; + /** * Generic exception code used to propagate in replication pipeline. */ diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/BookKeeper.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/BookKeeper.java index 7c4b678de7c..1e7fa35670f 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/BookKeeper.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/BookKeeper.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,6 +20,7 @@ */ package org.apache.bookkeeper.client.api; +import java.util.concurrent.CompletableFuture; import org.apache.bookkeeper.client.impl.BookKeeperBuilderImpl; import org.apache.bookkeeper.common.annotation.InterfaceAudience.Public; import org.apache.bookkeeper.common.annotation.InterfaceStability.Unstable; @@ -65,6 +66,21 @@ static BookKeeperBuilder newBuilder(final ClientConfiguration clientConfiguratio */ DeleteBuilder newDeleteLedgerOp(); + /** + * List ledgers. + * + * @return a builder useful to list ledgers. + */ + ListLedgersResultBuilder newListLedgersOp(); + + /** + * Get ledger metadata of a given ledger id. + * + * @param ledgerId id of the ledger. + * @return a CompletableFuture instance containing ledger metadata. + */ + CompletableFuture getLedgerMetadata(long ledgerId); + /** * Close the client and release every resource. * diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/BookKeeperBuilder.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/BookKeeperBuilder.java index 0e147dd3011..b260ca33ac0 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/BookKeeperBuilder.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/BookKeeperBuilder.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,6 +20,7 @@ */ package org.apache.bookkeeper.client.api; +import io.netty.buffer.ByteBufAllocator; import io.netty.channel.EventLoopGroup; import io.netty.util.HashedWheelTimer; import java.io.IOException; @@ -47,6 +48,15 @@ public interface BookKeeperBuilder { */ BookKeeperBuilder eventLoopGroup(EventLoopGroup eventLoopGroup); + /** + * Configure the bookkeeper client with a provided {@link ByteBufAllocator}. + * + * @param allocator an external {@link ByteBufAllocator} to use by the bookkeeper client. + * @return client builder. + * @since 4.9 + */ + BookKeeperBuilder allocator(ByteBufAllocator allocator); + /** * Configure the bookkeeper client with a provided {@link StatsLogger}. * diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/CreateAdvBuilder.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/CreateAdvBuilder.java index 12372919e5d..9a316b1fb0f 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/CreateAdvBuilder.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/CreateAdvBuilder.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/CreateBuilder.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/CreateBuilder.java index f02eb84d413..183441690c3 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/CreateBuilder.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/CreateBuilder.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/DeleteBuilder.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/DeleteBuilder.java index ddbf41d67e1..d88126a7bc2 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/DeleteBuilder.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/DeleteBuilder.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/DigestType.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/DigestType.java index 14c8f1b81d0..71f7c14dbdd 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/DigestType.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/DigestType.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/ForceableHandle.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/ForceableHandle.java index 48d7acb9eb8..7b10619e7bf 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/ForceableHandle.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/ForceableHandle.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/Handle.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/Handle.java index 1f2f8ca511d..46c512d56b4 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/Handle.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/Handle.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/LastConfirmedAndEntry.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/LastConfirmedAndEntry.java index 8bbe58e21a7..ccc6c0086cb 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/LastConfirmedAndEntry.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/LastConfirmedAndEntry.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -50,6 +50,7 @@ public interface LastConfirmedAndEntry extends AutoCloseable { /** * {@inheritDoc} */ + @Override void close(); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/LedgerEntries.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/LedgerEntries.java index 141aa6c8c70..621ffb894b9 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/LedgerEntries.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/LedgerEntries.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/LedgerEntry.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/LedgerEntry.java index 0f54961e45c..ce2fbecd163 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/LedgerEntry.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/LedgerEntry.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -98,6 +98,7 @@ public interface LedgerEntry extends AutoCloseable { /** * {@inheritDoc} */ + @Override void close(); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/LedgerMetadata.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/LedgerMetadata.java index dc2deb6dc6f..91f736f7f21 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/LedgerMetadata.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/LedgerMetadata.java @@ -24,7 +24,7 @@ import java.util.NavigableMap; import org.apache.bookkeeper.common.annotation.InterfaceAudience.LimitedPrivate; import org.apache.bookkeeper.common.annotation.InterfaceStability.Unstable; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; /** * Represents the client-side metadata of a ledger. It is immutable. @@ -34,6 +34,12 @@ @LimitedPrivate @Unstable public interface LedgerMetadata { + /** + * Returns the id of this ledger. + * + * @return the id of this ledger. + */ + long getLedgerId(); /** * Returns the ensemble size of this ledger. @@ -74,8 +80,25 @@ public interface LedgerMetadata { */ long getLength(); + /** + * Whether the metadata contains the password and digest type for the ledger. + * Ledgers created with version 4.1.0 clients or older do not have this information. + * + * @return true if the metadata contains the password and digest type, false otherwise. + */ + boolean hasPassword(); + + /** + * Get the password for the ledger. + * For ledgers created with version 4.1.0 or older, an empty byte array is returned. + * + * @return the password for the ledger. + */ + byte[] getPassword(); + /** * Returns the digest type used by this ledger. + * May return null if the ledger was created with version 4.1.0 or below. * * @return the digest type used by this ledger. */ @@ -108,14 +131,60 @@ public interface LedgerMetadata { * @param entryId the entry id to retrieve its ensemble information * @return the ensemble which contains the given {@code entryId}. */ - List getEnsembleAt(long entryId); + List getEnsembleAt(long entryId); + + /** + * Returns all the ensembles of this ledger. + * + * @return all the ensembles of this ledger. + */ + NavigableMap> getAllEnsembles(); + + /** + * Returns the state of the metadata. + * + * @return the state of the metadata. + */ + State getState(); + + /** + * Possible metadata states. + */ + enum State { + /** The ledger is open. New entry may be added to it. */ + OPEN, + + /** A reader has tried to, or may be trying to recover the ledger. + The writer may be able to add new entries if fencing hasn't already occurred, + but any attempt to change ensemble will fail and the write will be forced to + close the ledger. + */ + IN_RECOVERY, + + /** The ledger is closed. No new entries may be added to it. + The length and lastEntryId are fixed. Ensembles may change, but only for rereplication. + */ + CLOSED + } /** - * Returns all the ensembles of this entry. + * Similar to #toString(), but omits the password of the ledger, so that it is safe to log the output. * - * @return all the ensembles of this entry. + * @return a string representation of the metadata, omitting the password. */ - NavigableMap> getAllEnsembles(); + String toSafeString(); + /** + * Get the format version which should be used to serialize the metadata. + * + * @return the format version. + */ + int getMetadataFormatVersion(); + /** + * Get the unique creator token of the Ledger. + * + * @return the creator token + */ + long getCToken(); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/LedgersIterator.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/LedgersIterator.java new file mode 100644 index 00000000000..98c99e12f45 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/LedgersIterator.java @@ -0,0 +1,41 @@ +/* + * Copyright 2020 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.client.api; + +import java.io.IOException; + +/** + * Iterator for ledgers. + */ +public interface LedgersIterator { + + /** + * Return true if there is at least one ledger to visit. + * + * @return true if there is at least one ledger to visit. + * @throws IOException thrown when there is a problem accessing the ledger metadata store. + */ + boolean hasNext() throws IOException; + + /** + * Return next ledger id. + * + * @return next ledger id. + * @throws IOException thrown when there is a problem accessing the ledger metadata store. + */ + long next() throws IOException; + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/ListLedgersResult.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/ListLedgersResult.java new file mode 100644 index 00000000000..e5d080b77db --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/ListLedgersResult.java @@ -0,0 +1,40 @@ +/* + * Copyright 2020 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.client.api; + +/** + * Utility container for listing ledgers. + */ +public interface ListLedgersResult extends AutoCloseable { + + /** + * Creates a LedgersIterator. + * This method must be called once per ListLedgersResult instance. + * @return a LedgersIterator instance. + */ + LedgersIterator iterator(); + + /** + * Creates a Iterable, which wraps a LedgersIterator. + * This method must be called once per ListLedgersResult instance. + *
    + * Metadata store access exceptions (IOException) are wrapped within a RuntimeException. + * if you want to take care of these cases, it is better to use LedgersIterator. + * @return a Iterable instance, containing ledger ids. + */ + Iterable toIterable(); + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/ListLedgersResultBuilder.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/ListLedgersResultBuilder.java new file mode 100644 index 00000000000..3d1acabc8a8 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/ListLedgersResultBuilder.java @@ -0,0 +1,33 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.client.api; + +import org.apache.bookkeeper.common.annotation.InterfaceAudience.Public; +import org.apache.bookkeeper.common.annotation.InterfaceStability.Unstable; + +/** + * Builder-style interface to list exiting ledgers. + */ +@Public +@Unstable +public interface ListLedgersResultBuilder extends OpBuilder { + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/OpBuilder.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/OpBuilder.java index 8a36fe685ab..c9962249a5a 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/OpBuilder.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/OpBuilder.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/OpenBuilder.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/OpenBuilder.java index f1e5ac825a9..9b843a1711a 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/OpenBuilder.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/OpenBuilder.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -53,7 +53,7 @@ public interface OpenBuilder extends OpBuilder { OpenBuilder withRecovery(boolean recovery); /** - * Sets the password to be used to open the ledger. It defauls to an empty password + * Sets the password to be used to open the ledger. It defaults to an empty password * * @param password the password to unlock the ledger * diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/ReadHandle.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/ReadHandle.java index 04533dc2400..8e2e633a35a 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/ReadHandle.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/ReadHandle.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -45,6 +45,23 @@ public interface ReadHandle extends Handle { */ CompletableFuture readAsync(long firstEntry, long lastEntry); + /** + * Read a sequence of entries asynchronously. + * + * @param startEntry + * start entry id + * @param maxCount + * the total entries count. + * @param maxSize + * the total entries size. + * @return an handle to the result of the operation + */ + default CompletableFuture batchReadAsync(long startEntry, int maxCount, long maxSize) { + CompletableFuture future = new CompletableFuture<>(); + future.completeExceptionally(new UnsupportedOperationException()); + return future; + } + /** * Read a sequence of entries synchronously. * @@ -59,11 +76,26 @@ default LedgerEntries read(long firstEntry, long lastEntry) throws BKException, BKException.HANDLER); } + /** + * + * @param startEntry + * start entry id + * @param maxCount + * the total entries count. + * @param maxSize + * the total entries size. + * @return the result of the operation + */ + default LedgerEntries batchRead(long startEntry, int maxCount, long maxSize) + throws BKException, InterruptedException { + return FutureUtils.result(batchReadAsync(startEntry, maxCount, maxSize), BKException.HANDLER); + } + /** * Read a sequence of entries asynchronously, allowing to read after the LastAddConfirmed range. *
    This is the same of * {@link #read(long, long) } - * but it lets the client read without checking the local value of LastAddConfirmed, so that it is possibile to + * but it lets the client read without checking the local value of LastAddConfirmed, so that it is possible to * read entries for which the writer has not received the acknowledge yet.
    * For entries which are within the range 0..LastAddConfirmed BookKeeper guarantees that the writer has successfully * received the acknowledge.
    @@ -103,7 +135,7 @@ default LedgerEntries readUnconfirmed(long firstEntry, long lastEntry) /** * Obtains asynchronously the last confirmed write from a quorum of bookies. This - * call obtains the the last add confirmed each bookie has received for this ledger + * call obtains the last add confirmed each bookie has received for this ledger * and returns the maximum. If the ledger has been closed, the value returned by this * call may not correspond to the id of the last entry of the ledger, since it reads * the hint of bookies. Consequently, in the case the ledger has been closed, it may diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/WriteAdvHandle.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/WriteAdvHandle.java index c24c6d0279a..9c33ecfbb18 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/WriteAdvHandle.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/WriteAdvHandle.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -29,7 +29,7 @@ import org.apache.bookkeeper.common.concurrent.FutureUtils; /** - * Provide write access to a ledger. Using WriteAdvHandler the writer MUST explictly set an entryId. Beware that the + * Provide write access to a ledger. Using WriteAdvHandler the writer MUST explicitly set an entryId. Beware that the * write for a given entryId will be acknowledged if and only if all entries up to entryId - 1 have been acknowledged * too (expected from entryId 0) * @@ -46,6 +46,7 @@ public interface WriteAdvHandle extends ReadHandle, ForceableHandle { * * @param entryId entryId to be added * @param data array of bytes to be written + * do not reuse the buffer, bk-client will release it appropriately. * @return an handle to the result, in case of success it will return the same value of param entryId. */ default CompletableFuture writeAsync(final long entryId, final ByteBuffer data) { @@ -57,6 +58,7 @@ default CompletableFuture writeAsync(final long entryId, final ByteBuffer * * @param entryId entryId to be added * @param data array of bytes to be written + * do not reuse the buffer, bk-client will release it appropriately. * @return the same value of param entryId. */ default long write(final long entryId, final ByteBuffer data) @@ -69,6 +71,7 @@ default long write(final long entryId, final ByteBuffer data) * * @param entryId entryId to be added. * @param data array of bytes to be written + * do not reuse the buffer, bk-client will release it appropriately. * @return an handle to the result, in case of success it will return the same value of param {@code entryId}. */ default CompletableFuture writeAsync(final long entryId, final byte[] data) { @@ -80,6 +83,7 @@ default CompletableFuture writeAsync(final long entryId, final byte[] data * * @param entryId entryId to be added. * @param data array of bytes to be written + * do not reuse the buffer, bk-client will release it appropriately. * @return same value of param {@code entryId}. */ default long write(final long entryId, final byte[] data) @@ -92,6 +96,7 @@ default long write(final long entryId, final byte[] data) * * @param entryId entryId to be added. * @param data array of bytes to be written + * do not reuse the buffer, bk-client will release it appropriately. * @param offset the offset of the bytes array * @param length the length to data to write * @return an handle to the result, in case of success it will return the same value of param {@code entryId}. @@ -105,6 +110,7 @@ default CompletableFuture writeAsync(final long entryId, final byte[] data * * @param entryId entryId to be added. * @param data array of bytes to be written + * do not reuse the buffer, bk-client will release it appropriately. * @param offset the offset of the bytes array * @param length the length to data to write * @return the same value of param {@code entryId}. @@ -119,6 +125,7 @@ default long write(final long entryId, final byte[] data, int offset, int length * * @param entryId entryId to be added * @param data array of bytes to be written + * do not reuse the buffer, bk-client will release it appropriately. * @return an handle to the result, in case of success it will return the same value of param entryId */ CompletableFuture writeAsync(long entryId, ByteBuf data); @@ -128,6 +135,7 @@ default long write(final long entryId, final byte[] data, int offset, int length * * @param entryId entryId to be added * @param data array of bytes to be written + * do not reuse the buffer, bk-client will release it appropriately. * @return the same value of param entryId */ default long write(long entryId, ByteBuf data) throws BKException, InterruptedException { diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/WriteFlag.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/WriteFlag.java index 6914abeb256..7a3c2b49d0b 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/WriteFlag.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/WriteFlag.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/WriteHandle.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/WriteHandle.java index edad5f46022..46b614982b1 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/WriteHandle.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/WriteHandle.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -24,7 +24,6 @@ import io.netty.buffer.Unpooled; import java.nio.ByteBuffer; import java.util.concurrent.CompletableFuture; - import org.apache.bookkeeper.common.annotation.InterfaceAudience.Public; import org.apache.bookkeeper.common.annotation.InterfaceStability.Unstable; import org.apache.bookkeeper.common.concurrent.FutureUtils; @@ -45,6 +44,7 @@ public interface WriteHandle extends ReadHandle, ForceableHandle { * * @param data a bytebuf to be written. The bytebuf's reference count will be decremented by 1 after the * completable future is returned + * do not reuse the buffer, bk-client will release it appropriately. * @return an handle to the result, in case of success it will return the id of the newly appended entry */ CompletableFuture appendAsync(ByteBuf data); @@ -54,6 +54,7 @@ public interface WriteHandle extends ReadHandle, ForceableHandle { * * @param data a bytebuf to be written. The bytebuf's reference count will be decremented by 1 after the * call completes. + * do not reuse the buffer, bk-client will release it appropriately. * @return the id of the newly appended entry */ default long append(ByteBuf data) throws BKException, InterruptedException { @@ -64,6 +65,7 @@ default long append(ByteBuf data) throws BKException, InterruptedException { * Add entry asynchronously to an open ledger. * * @param data array of bytes to be written + * do not reuse the buffer, bk-client will release it appropriately. * @return an handle to the result, in case of success it will return the id of the newly appended entry */ default CompletableFuture appendAsync(ByteBuffer data) { @@ -74,6 +76,7 @@ default CompletableFuture appendAsync(ByteBuffer data) { * Add entry synchronously to an open ledger. * * @param data array of bytes to be written + * do not reuse the buffer, bk-client will release it appropriately. * @return the id of the newly appended entry */ default long append(ByteBuffer data) throws BKException, InterruptedException { @@ -84,6 +87,7 @@ default long append(ByteBuffer data) throws BKException, InterruptedException { * Add an entry asynchronously to an open ledger. * * @param data array of bytes to be written + * do not reuse the buffer, bk-client will release it appropriately. * @return a completable future represents the add result, in case of success the future returns the entry id * of this newly appended entry */ @@ -95,6 +99,7 @@ default CompletableFuture appendAsync(byte[] data) { * Add an entry synchronously to an open ledger. * * @param data array of bytes to be written + * do not reuse the buffer, bk-client will release it appropriately. * @return the entry id of this newly appended entry */ default long append(byte[] data) throws BKException, InterruptedException { @@ -105,6 +110,7 @@ default long append(byte[] data) throws BKException, InterruptedException { * Add an entry asynchronously to an open ledger. * * @param data array of bytes to be written + * do not reuse the buffer, bk-client will release it appropriately. * @param offset the offset in the bytes array * @param length the length of the bytes to be appended * @return a completable future represents the add result, in case of success the future returns the entry id @@ -118,6 +124,7 @@ default CompletableFuture appendAsync(byte[] data, int offset, int length) * Add an entry synchronously to an open ledger. * * @param data array of bytes to be written + * do not reuse the buffer, bk-client will release it appropriately. * @param offset the offset in the bytes array * @param length the length of the bytes to be appended * @return the entry id of this newly appended entry @@ -141,6 +148,14 @@ default long append(byte[] data, int offset, int length) throws BKException, Int * entry of the ledger is. Once the ledger has been closed, all reads from the * ledger will return the same set of entries. * + *

    The close operation can error if it finds conflicting metadata when it + * tries to write to the metadata store. On close, the metadata state is set to + * closed and lastEntry and length of the ledger are fixed in the metadata. A + * conflict occurs if the metadata in the metadata store has a different value for + * the lastEntry or length. If another process has updated the metadata, setting it + * to closed, but have fixed the lastEntry and length to the same values as this + * process is trying to write, the operation completes successfully. + * * @return an handle to access the result of the operation */ @Override @@ -152,6 +167,14 @@ default long append(byte[] data, int offset, int length) throws BKException, Int *

    Closing a ledger will ensure that all clients agree on what the last * entry of the ledger is. Once the ledger has been closed, all reads from the * ledger will return the same set of entries. + * + *

    The close operation can error if it finds conflicting metadata when it + * tries to write to the metadata store. On close, the metadata state is set to + * closed and lastEntry and length of the ledger are fixed in the metadata. A + * conflict occurs if the metadata in the metadata store has a different value for + * the lastEntry or length. If another process has updated the metadata, setting it + * to closed, but have fixed the lastEntry and length to the same values as this + * process is trying to write, the operation completes successfully. */ @Override default void close() throws BKException, InterruptedException { diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/package-info.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/package-info.java index 71ad4f1976c..a66866c0e8b 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/package-info.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/api/package-info.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/impl/BookKeeperBuilderImpl.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/impl/BookKeeperBuilderImpl.java index 3a07d1bc55d..e0757b27d40 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/impl/BookKeeperBuilderImpl.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/impl/BookKeeperBuilderImpl.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,6 +20,7 @@ */ package org.apache.bookkeeper.client.impl; +import io.netty.buffer.ByteBufAllocator; import io.netty.channel.EventLoopGroup; import io.netty.util.HashedWheelTimer; import java.io.IOException; @@ -50,6 +51,12 @@ public BookKeeperBuilder eventLoopGroup(EventLoopGroup eventLoopGroup) { return this; } + @Override + public BookKeeperBuilder allocator(ByteBufAllocator allocator) { + builder.allocator(allocator); + return this; + } + @Override public BookKeeperBuilder statsLogger(StatsLogger statsLogger) { builder.statsLogger(statsLogger); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/impl/BookKeeperClientStatsImpl.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/impl/BookKeeperClientStatsImpl.java new file mode 100644 index 00000000000..db1b44847dd --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/impl/BookKeeperClientStatsImpl.java @@ -0,0 +1,295 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.client.impl; + +import static org.apache.bookkeeper.client.BookKeeperClientStats.CATEGORY_CLIENT; +import static org.apache.bookkeeper.client.BookKeeperClientStats.CLIENT_SCOPE; + +import org.apache.bookkeeper.client.BookKeeperClientStats; +import org.apache.bookkeeper.stats.Counter; +import org.apache.bookkeeper.stats.Gauge; +import org.apache.bookkeeper.stats.OpStatsLogger; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.annotations.StatsDoc; + +/** + * The default implementation of {@link BookKeeperClientStats}. + */ +@StatsDoc( + name = CLIENT_SCOPE, + category = CATEGORY_CLIENT, + help = "BookKeeper client stats" +) +public class BookKeeperClientStatsImpl implements BookKeeperClientStats { + private final StatsLogger stats; + @StatsDoc( + name = CREATE_OP, + help = "operation stats of creating ledgers" + ) + private final OpStatsLogger createOpLogger; + @StatsDoc( + name = DELETE_OP, + help = "operation stats of deleting ledgers" + ) + private final OpStatsLogger deleteOpLogger; + @StatsDoc( + name = OPEN_OP, + help = "operation stats of opening ledgers" + ) + private final OpStatsLogger openOpLogger; + @StatsDoc( + name = RECOVER_OP, + help = "operation stats of recovering ledgers" + ) + private final OpStatsLogger recoverOpLogger; + @StatsDoc( + name = READ_OP, + help = "operation stats of reading entries requests" + ) + private final OpStatsLogger readOpLogger; + @StatsDoc( + name = READ_OP_DM, + help = "the number of read entries hitting DigestMismatch errors" + ) + private final Counter readOpDmCounter; + @StatsDoc( + name = READ_LAST_CONFIRMED_AND_ENTRY, + help = "operation stats of read_last_confirmed_and_entry requests" + ) + private final OpStatsLogger readLacAndEntryOpLogger; + @StatsDoc( + name = READ_LAST_CONFIRMED_AND_ENTRY_RESPONSE, + help = "operation stats of read_last_confirmed_and_entry responses" + ) + private final OpStatsLogger readLacAndEntryRespLogger; + @StatsDoc( + name = ADD_OP, + help = "operation stats of adding entries requests" + ) + private final OpStatsLogger addOpLogger; + @StatsDoc( + name = FORCE_OP, + help = "operation stats of force requests" + ) + private final OpStatsLogger forceOpLogger; + @StatsDoc( + name = ADD_OP_UR, + help = "the number of add entries under replication" + ) + private final Counter addOpUrCounter; + @StatsDoc( + name = WRITE_LAC_OP, + help = "operation stats of write_lac requests" + ) + private final OpStatsLogger writeLacOpLogger; + @StatsDoc( + name = READ_LAC_OP, + help = "operation stats of read_lac requests" + ) + private final OpStatsLogger readLacOpLogger; + @StatsDoc( + name = LEDGER_RECOVER_ADD_ENTRIES, + help = "the distribution of entries written in ledger recovery requests" + ) + private final OpStatsLogger recoverAddEntriesStats; + @StatsDoc( + name = LEDGER_RECOVER_READ_ENTRIES, + help = "the distribution of entries read in ledger recovery requests" + ) + private final OpStatsLogger recoverReadEntriesStats; + + @StatsDoc( + name = ENSEMBLE_CHANGES, + help = "The number of ensemble changes" + ) + private final Counter ensembleChangeCounter; + @StatsDoc( + name = LAC_UPDATE_HITS, + help = "The number of successful lac updates on piggybacked responses" + ) + private final Counter lacUpdateHitsCounter; + @StatsDoc( + name = LAC_UPDATE_MISSES, + help = "The number of unsuccessful lac updates on piggybacked responses" + ) + private final Counter lacUpdateMissesCounter; + @StatsDoc( + name = CLIENT_CHANNEL_WRITE_WAIT, + help = " The latency distribution of waiting time on channel being writable" + ) + private final OpStatsLogger clientChannelWriteWaitStats; + @StatsDoc( + name = SPECULATIVE_READ_COUNT, + help = "The number of speculative read requests" + ) + private final Counter speculativeReadCounter; + + @StatsDoc( + name = WRITE_DELAYED_DUE_TO_NOT_ENOUGH_FAULT_DOMAINS_LATENCY, + help = "The delay in write completion because min number of fault domains was not reached" + ) + private final OpStatsLogger writeDelayedDueToNotEnoughFaultDomainsLatency; + + @StatsDoc( + name = WRITE_DELAYED_DUE_TO_NOT_ENOUGH_FAULT_DOMAINS, + help = "The number of times write completion was delayed because min number of fault domains was not reached" + ) + private final Counter writeDelayedDueToNotEnoughFaultDomains; + + @StatsDoc( + name = WRITE_TIMED_OUT_DUE_TO_NOT_ENOUGH_FAULT_DOMAINS, + help = "The number of times write completion timed out because min number of fault domains was not reached" + ) + private final Counter writeTimedOutDueToNotEnoughFaultDomains; + + + public BookKeeperClientStatsImpl(StatsLogger stats) { + this.stats = stats; + this.createOpLogger = stats.getOpStatsLogger(CREATE_OP); + this.deleteOpLogger = stats.getOpStatsLogger(DELETE_OP); + this.openOpLogger = stats.getOpStatsLogger(OPEN_OP); + this.recoverOpLogger = stats.getOpStatsLogger(RECOVER_OP); + this.readOpLogger = stats.getOpStatsLogger(READ_OP); + this.readOpDmCounter = stats.getCounter(READ_OP_DM); + this.readLacAndEntryOpLogger = stats.getOpStatsLogger(READ_LAST_CONFIRMED_AND_ENTRY); + this.readLacAndEntryRespLogger = stats.getOpStatsLogger(READ_LAST_CONFIRMED_AND_ENTRY_RESPONSE); + this.addOpLogger = stats.getOpStatsLogger(ADD_OP); + this.forceOpLogger = stats.getOpStatsLogger(FORCE_OP); + this.addOpUrCounter = stats.getCounter(ADD_OP_UR); + this.writeLacOpLogger = stats.getOpStatsLogger(WRITE_LAC_OP); + this.readLacOpLogger = stats.getOpStatsLogger(READ_LAC_OP); + this.recoverAddEntriesStats = stats.getOpStatsLogger(LEDGER_RECOVER_ADD_ENTRIES); + this.recoverReadEntriesStats = stats.getOpStatsLogger(LEDGER_RECOVER_READ_ENTRIES); + + this.ensembleChangeCounter = stats.getCounter(ENSEMBLE_CHANGES); + this.lacUpdateHitsCounter = stats.getCounter(LAC_UPDATE_HITS); + this.lacUpdateMissesCounter = stats.getCounter(LAC_UPDATE_MISSES); + this.clientChannelWriteWaitStats = stats.getOpStatsLogger(CLIENT_CHANNEL_WRITE_WAIT); + + speculativeReadCounter = stats.getCounter(SPECULATIVE_READ_COUNT); + + this.writeDelayedDueToNotEnoughFaultDomainsLatency = + stats.getOpStatsLogger(WRITE_DELAYED_DUE_TO_NOT_ENOUGH_FAULT_DOMAINS_LATENCY); + this.writeDelayedDueToNotEnoughFaultDomains = stats.getCounter(WRITE_DELAYED_DUE_TO_NOT_ENOUGH_FAULT_DOMAINS); + this.writeTimedOutDueToNotEnoughFaultDomains = + stats.getCounter(WRITE_TIMED_OUT_DUE_TO_NOT_ENOUGH_FAULT_DOMAINS); + } + + @Override + public OpStatsLogger getCreateOpLogger() { + return createOpLogger; + } + @Override + public OpStatsLogger getOpenOpLogger() { + return openOpLogger; + } + @Override + public OpStatsLogger getDeleteOpLogger() { + return deleteOpLogger; + } + @Override + public OpStatsLogger getRecoverOpLogger() { + return recoverOpLogger; + } + @Override + public OpStatsLogger getReadOpLogger() { + return readOpLogger; + } + @Override + public OpStatsLogger getReadLacAndEntryOpLogger() { + return readLacAndEntryOpLogger; + } + @Override + public OpStatsLogger getReadLacAndEntryRespLogger() { + return readLacAndEntryRespLogger; + } + @Override + public OpStatsLogger getAddOpLogger() { + return addOpLogger; + } + @Override + public OpStatsLogger getForceOpLogger() { + return forceOpLogger; + } + @Override + public OpStatsLogger getWriteLacOpLogger() { + return writeLacOpLogger; + } + @Override + public OpStatsLogger getReadLacOpLogger() { + return readLacOpLogger; + } + @Override + public OpStatsLogger getRecoverAddCountLogger() { + return recoverAddEntriesStats; + } + @Override + public OpStatsLogger getRecoverReadCountLogger() { + return recoverReadEntriesStats; + } + @Override + public Counter getReadOpDmCounter() { + return readOpDmCounter; + } + @Override + public Counter getAddOpUrCounter() { + return addOpUrCounter; + } + @Override + public Counter getSpeculativeReadCounter() { + return speculativeReadCounter; + } + @Override + public Counter getEnsembleChangeCounter() { + return ensembleChangeCounter; + } + @Override + public Counter getLacUpdateHitsCounter() { + return lacUpdateHitsCounter; + } + @Override + public Counter getLacUpdateMissesCounter() { + return lacUpdateMissesCounter; + } + @Override + public OpStatsLogger getClientChannelWriteWaitLogger() { + return clientChannelWriteWaitStats; + } + @Override + public Counter getEnsembleBookieDistributionCounter(String bookie) { + return stats.scopeLabel(BOOKIE_LABEL, bookie).getCounter(LEDGER_ENSEMBLE_BOOKIE_DISTRIBUTION); + } + @Override + public OpStatsLogger getWriteDelayedDueToNotEnoughFaultDomainsLatency() { + return writeDelayedDueToNotEnoughFaultDomainsLatency; + } + @Override + public Counter getWriteDelayedDueToNotEnoughFaultDomains() { + return writeDelayedDueToNotEnoughFaultDomains; + } + @Override + public Counter getWriteTimedOutDueToNotEnoughFaultDomains() { + return writeTimedOutDueToNotEnoughFaultDomains; + } + @Override + public void registerPendingAddsGauge(Gauge gauge) { + stats.registerGauge(PENDING_ADDS, gauge); + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/impl/LastConfirmedAndEntryImpl.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/impl/LastConfirmedAndEntryImpl.java index 809021404d1..99064eec594 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/impl/LastConfirmedAndEntryImpl.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/impl/LastConfirmedAndEntryImpl.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/impl/LedgerEntriesImpl.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/impl/LedgerEntriesImpl.java index b89fce7a035..aeaacb1690b 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/impl/LedgerEntriesImpl.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/impl/LedgerEntriesImpl.java @@ -24,10 +24,8 @@ import static com.google.common.base.Preconditions.checkNotNull; import io.netty.util.Recycler; - import java.util.Iterator; import java.util.List; - import org.apache.bookkeeper.client.api.LedgerEntries; import org.apache.bookkeeper.client.api.LedgerEntry; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/impl/OpenBuilderBase.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/impl/OpenBuilderBase.java index b22effc3d57..270f115d84b 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/impl/OpenBuilderBase.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/impl/OpenBuilderBase.java @@ -19,11 +19,10 @@ package org.apache.bookkeeper.client.impl; import java.util.Arrays; - import org.apache.bookkeeper.client.LedgerHandle; +import org.apache.bookkeeper.client.api.BKException.Code; import org.apache.bookkeeper.client.api.DigestType; import org.apache.bookkeeper.client.api.OpenBuilder; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -62,11 +61,11 @@ public OpenBuilder withDigestType(DigestType digestType) { return this; } - protected boolean validate() { + protected int validate() { if (ledgerId < 0) { LOG.error("invalid ledgerId {} < 0", ledgerId); - return false; + return Code.NoSuchLedgerExistsOnMetadataServerException; } - return true; + return Code.OK; } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/AbstractConfiguration.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/AbstractConfiguration.java index 1ce4cf359d5..ea1576a4c77 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/AbstractConfiguration.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/AbstractConfiguration.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -19,17 +19,20 @@ import static org.apache.bookkeeper.conf.ClientConfiguration.CLIENT_AUTH_PROVIDER_FACTORY_CLASS; +import io.netty.buffer.PooledByteBufAllocator; import java.net.URL; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; - import javax.net.ssl.SSLEngine; - import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.common.allocator.LeakDetectionPolicy; +import org.apache.bookkeeper.common.allocator.OutOfMemoryPolicy; +import org.apache.bookkeeper.common.allocator.PoolingPolicy; import org.apache.bookkeeper.common.util.JsonUtil; import org.apache.bookkeeper.common.util.JsonUtil.ParseJsonException; +import org.apache.bookkeeper.common.util.ReflectionUtils; import org.apache.bookkeeper.feature.Feature; import org.apache.bookkeeper.meta.AbstractZkLedgerManagerFactory; import org.apache.bookkeeper.meta.HierarchicalLedgerManagerFactory; @@ -37,7 +40,6 @@ import org.apache.bookkeeper.meta.LongHierarchicalLedgerManagerFactory; import org.apache.bookkeeper.util.EntryFormatter; import org.apache.bookkeeper.util.LedgerIdFormatter; -import org.apache.bookkeeper.util.ReflectionUtils; import org.apache.bookkeeper.util.StringEntryFormatter; import org.apache.commons.configuration.CompositeConfiguration; import org.apache.commons.configuration.ConfigurationException; @@ -71,19 +73,26 @@ public abstract class AbstractConfiguration // Zookeeper Parameters protected static final String ZK_TIMEOUT = "zkTimeout"; protected static final String ZK_SERVERS = "zkServers"; + protected static final String ZK_RETRY_BACKOFF_MAX_RETRIES = "zkRetryBackoffMaxRetries"; // Ledger Manager protected static final String LEDGER_MANAGER_TYPE = "ledgerManagerType"; protected static final String LEDGER_MANAGER_FACTORY_CLASS = "ledgerManagerFactoryClass"; + protected static final String LEDGER_METADATA_FORMAT_VERSION = "ledgerMetadataVersion"; protected static final String ALLOW_SHADED_LEDGER_MANAGER_FACTORY_CLASS = "allowShadedLedgerManagerFactoryClass"; protected static final String SHADED_LEDGER_MANAGER_FACTORY_CLASS_PREFIX = "shadedLedgerManagerFactoryClassPrefix"; protected static final String METADATA_SERVICE_URI = "metadataServiceUri"; protected static final String ZK_LEDGERS_ROOT_PATH = "zkLedgersRootPath"; protected static final String ZK_REQUEST_RATE_LIMIT = "zkRequestRateLimit"; + protected static final String ZK_REPLICATION_TASK_RATE_LIMIT = "zkReplicationTaskRateLimit"; protected static final String AVAILABLE_NODE = "available"; protected static final String REREPLICATION_ENTRY_BATCH_SIZE = "rereplicationEntryBatchSize"; protected static final String STORE_SYSTEMTIME_AS_LEDGER_UNDERREPLICATED_MARK_TIME = "storeSystemTimeAsLedgerUnderreplicatedMarkTime"; + protected static final String STORE_SYSTEMTIME_AS_LEDGER_CREATION_TIME = "storeSystemTimeAsLedgerCreationTime"; + + protected static final String ENABLE_BUSY_WAIT = "enableBusyWait"; + protected static final String ENABLE_HEALTH_CHECK = "enableHealthCheck"; // Metastore settings, only being used when LEDGER_MANAGER_FACTORY_CLASS is MSLedgerManagerFactory protected static final String METASTORE_IMPL_CLASS = "metastoreImplClass"; @@ -102,11 +111,15 @@ public abstract class AbstractConfiguration // Enable authentication of the other connection end point (mutual authentication) protected static final String TLS_CLIENT_AUTHENTICATION = "tlsClientAuthentication"; + // Preserve MDC or not for tasks in executor + protected static final String PRESERVE_MDC_FOR_TASK_EXECUTION = "preserveMdcForTaskExecution"; + // Default formatter classes protected static final Class DEFAULT_ENTRY_FORMATTER = StringEntryFormatter.class; protected static final Class DEFAULT_LEDGERID_FORMATTER = - LedgerIdFormatter.UUIDLedgerIdFormatter.class; + LedgerIdFormatter.LongLedgerIdFormatter.class; + protected static final String TLS_CERT_FILES_REFRESH_DURATION_SECONDS = "tlsCertFilesRefreshDurationSeconds"; /** * This list will be passed to {@link SSLEngine#setEnabledCipherSuites(java.lang.String[]) }. * Please refer to official JDK JavaDocs @@ -149,6 +162,34 @@ public abstract class AbstractConfiguration // enforce minimum number of racks per write quorum public static final String ENFORCE_MIN_NUM_RACKS_PER_WRITE_QUORUM = "enforceMinNumRacksPerWriteQuorum"; + // enforce minimum number of fault domains for write + public static final String ENFORCE_MIN_NUM_FAULT_DOMAINS_FOR_WRITE = "enforceMinNumFaultDomainsForWrite"; + + // ignore usage of local node in the internal logic of placement policy + public static final String IGNORE_LOCAL_NODE_IN_PLACEMENT_POLICY = "ignoreLocalNodeInPlacementPolicy"; + + // minimum number of zones per write quorum in ZoneAwarePlacementPolicy + public static final String MIN_NUM_ZONES_PER_WRITE_QUORUM = "minNumZonesPerWriteQuorum"; + + // desired number of zones per write quorum in ZoneAwarePlacementPolicy + public static final String DESIRED_NUM_ZONES_PER_WRITE_QUORUM = "desiredNumZonesPerWriteQuorum"; + + // in ZoneawareEnsemblePlacementPolicy if strict placement is enabled then + // minZones/desiredZones in writeQuorum would be maintained otherwise it + // will pick nodes randomly. + public static final String ENFORCE_STRICT_ZONEAWARE_PLACEMENT = "enforceStrictZoneawarePlacement"; + + // Allocator configuration + protected static final String ALLOCATOR_POOLING_POLICY = "allocatorPoolingPolicy"; + protected static final String ALLOCATOR_POOLING_CONCURRENCY = "allocatorPoolingConcurrency"; + protected static final String ALLOCATOR_OOM_POLICY = "allocatorOutOfMemoryPolicy"; + protected static final String ALLOCATOR_LEAK_DETECTION_POLICY = "allocatorLeakDetectionPolicy"; + + // option to limit stats logging + public static final String LIMIT_STATS_LOGGING = "limitStatsLogging"; + + protected static final String REPLICATION_RATE_BY_BYTES = "replicationRateByBytes"; + protected AbstractConfiguration() { super(); if (READ_SYSTEM_PROPERTIES) { @@ -225,35 +266,12 @@ public String getMetadataServiceUriUnchecked() throws UncheckedConfigurationExce * @return metadata service uri. * @throws ConfigurationException if the metadata service uri is invalid. */ - @SuppressWarnings("deprecation") public String getMetadataServiceUri() throws ConfigurationException { String serviceUri = getString(METADATA_SERVICE_URI); - if (null == serviceUri) { + if (StringUtils.isBlank(serviceUri)) { // no service uri is defined, fallback to old settings String ledgerManagerType; - Class factoryClass = getLedgerManagerFactoryClass(); - if (factoryClass == null) { - // set the ledger manager type to "null", so the driver implementation knows that the type is not set. - ledgerManagerType = "null"; - } else { - if (!AbstractZkLedgerManagerFactory.class.isAssignableFrom(factoryClass)) { - // this is a non-zk implementation - throw new UnsupportedOperationException("metadata service uri is not supported for " - + factoryClass); - } - if (factoryClass == HierarchicalLedgerManagerFactory.class) { - ledgerManagerType = HierarchicalLedgerManagerFactory.NAME; - } else if (factoryClass == org.apache.bookkeeper.meta.FlatLedgerManagerFactory.class) { - ledgerManagerType = org.apache.bookkeeper.meta.FlatLedgerManagerFactory.NAME; - } else if (factoryClass == LongHierarchicalLedgerManagerFactory.class) { - ledgerManagerType = LongHierarchicalLedgerManagerFactory.NAME; - } else if (factoryClass == org.apache.bookkeeper.meta.MSLedgerManagerFactory.class) { - ledgerManagerType = org.apache.bookkeeper.meta.MSLedgerManagerFactory.NAME; - } else { - throw new IllegalArgumentException("Unknown zookeeper based ledger manager factory : " - + factoryClass); - } - } + ledgerManagerType = getLedgerManagerLayoutStringFromFactoryClass(); String zkServers = getZkServers(); if (null != zkServers) { // URI doesn't accept ',' @@ -330,6 +348,27 @@ public T setZkTimeout(int zkTimeout) { return getThis(); } + /** + * Get zookeeper client backoff max retry times. + * + * @return zk backoff max retry times. + */ + public int getZkRetryBackoffMaxRetries() { + return getInt(ZK_RETRY_BACKOFF_MAX_RETRIES, Integer.MAX_VALUE); + } + + /** + * Set zookeeper client backoff max retry times. + * + * @param maxRetries + * backoff max retry times + * @return server configuration. + */ + public T setZkRetryBackoffMaxRetries(int maxRetries) { + setProperty(ZK_RETRY_BACKOFF_MAX_RETRIES, Integer.toString(maxRetries)); + return getThis(); + } + /** * Set Ledger Manager Type. * @@ -426,6 +465,58 @@ public String getLedgerManagerFactoryClassName() { return getString(LEDGER_MANAGER_FACTORY_CLASS); } + /** + * Set Ledger metadata format version. + * + * @param metadataFormatVersion + * Ledger metadata format version. pass -1 to use default version + */ + public void setLedgerMetadataFormatVersion(int metadataFormatVersion) { + setProperty(LEDGER_METADATA_FORMAT_VERSION, metadataFormatVersion); + } + + /** + * Get Ledger metadata format version. + * + * @return ledger metadata format version. + */ + public int getLedgerMetadataFormatVersion() { + return getInt(LEDGER_METADATA_FORMAT_VERSION, -1); + } + + /** + * Get layout string ("null" if unconfigured). + * + * @return null, hierarchical, longhierarchical, or flat based on LEDGER_MANAGER_FACTORY_CLASS + */ + @SuppressWarnings("deprecation") + public String getLedgerManagerLayoutStringFromFactoryClass() throws ConfigurationException { + String ledgerManagerType; + Class factoryClass = getLedgerManagerFactoryClass(); + if (factoryClass == null) { + // set the ledger manager type to "null", so the driver implementation knows that the type is not set. + ledgerManagerType = "null"; + } else { + if (!AbstractZkLedgerManagerFactory.class.isAssignableFrom(factoryClass)) { + // this is a non-zk implementation + throw new ConfigurationException("metadata service uri is not supported for " + factoryClass); + } + if (factoryClass == HierarchicalLedgerManagerFactory.class) { + ledgerManagerType = HierarchicalLedgerManagerFactory.NAME; + } else if (factoryClass == org.apache.bookkeeper.meta.FlatLedgerManagerFactory.class) { + ledgerManagerType = org.apache.bookkeeper.meta.FlatLedgerManagerFactory.NAME; + } else if (factoryClass == LongHierarchicalLedgerManagerFactory.class) { + ledgerManagerType = LongHierarchicalLedgerManagerFactory.NAME; + } else if (factoryClass == org.apache.bookkeeper.meta.MSLedgerManagerFactory.class) { + ledgerManagerType = org.apache.bookkeeper.meta.MSLedgerManagerFactory.NAME; + } else { + throw new IllegalArgumentException("Unknown zookeeper based ledger manager factory : " + + factoryClass); + } + } + return ledgerManagerType; + } + /** * Set Ledger Manager Factory Class. * @@ -736,6 +827,30 @@ public T setTLSClientAuthentication(boolean enabled) { return getThis(); } + /** + * Set tls certificate files refresh duration in seconds. + * + * @param certFilesRefreshSec + * tls certificate files refresh duration in seconds (set 0 to + * disable auto refresh) + * @return current configuration + */ + public T setTLSCertFilesRefreshDurationSeconds(long certFilesRefreshSec) { + setProperty(TLS_CERT_FILES_REFRESH_DURATION_SECONDS, certFilesRefreshSec); + return getThis(); + } + + /** + * Get tls certificate files refresh duration in seconds. + * + * @return tls certificate files refresh duration in seconds. Default 0 + * to disable auto refresh. + * + */ + public long getTLSCertFilesRefreshDurationSeconds() { + return getLong(TLS_CERT_FILES_REFRESH_DURATION_SECONDS, 0); + } + /** * Set the list of enabled TLS cipher suites. Leave null not to override default JDK list. This list will be passed * to {@link SSLEngine#setEnabledCipherSuites(java.lang.String[]) }. Please refer to official JDK JavaDocs @@ -800,6 +915,60 @@ public int getMinNumRacksPerWriteQuorum() { return getInteger(MIN_NUM_RACKS_PER_WRITE_QUORUM, 2); } + /** + * Set the minimum number of zones per write quorum in + * ZoneAwarePlacementPolicy. + */ + public void setMinNumZonesPerWriteQuorum(int minNumZonesPerWriteQuorum) { + setProperty(MIN_NUM_ZONES_PER_WRITE_QUORUM, minNumZonesPerWriteQuorum); + } + + /** + * Get the minimum number of zones per write quorum in + * ZoneAwarePlacementPolicy. + */ + public int getMinNumZonesPerWriteQuorum() { + return getInteger(MIN_NUM_ZONES_PER_WRITE_QUORUM, 2); + } + + /** + * Set the desired number of zones per write quorum in + * ZoneAwarePlacementPolicy. + */ + public void setDesiredNumZonesPerWriteQuorum(int desiredNumZonesPerWriteQuorum) { + setProperty(DESIRED_NUM_ZONES_PER_WRITE_QUORUM, desiredNumZonesPerWriteQuorum); + } + + /** + * Get the desired number of zones per write quorum in + * ZoneAwarePlacementPolicy. + */ + public int getDesiredNumZonesPerWriteQuorum() { + return getInteger(DESIRED_NUM_ZONES_PER_WRITE_QUORUM, 3); + } + + /** + * Set the flag to enforce strict zoneaware placement. + * + *

    in ZoneawareEnsemblePlacementPolicy if strict placement is enabled then + * minZones/desiredZones in writeQuorum would be maintained otherwise it + * will pick nodes randomly. + */ + public void setEnforceStrictZoneawarePlacement(boolean enforceStrictZoneawarePlacement) { + setProperty(ENFORCE_STRICT_ZONEAWARE_PLACEMENT, enforceStrictZoneawarePlacement); + } + + /** + * Get the flag to enforce strict zoneaware placement. + * + *

    in ZoneawareEnsemblePlacementPolicy if strict placement is enabled then + * minZones/desiredZones in writeQuorum would be maintained otherwise it + * will pick nodes randomly. + */ + public boolean getEnforceStrictZoneawarePlacement() { + return getBoolean(ENFORCE_STRICT_ZONEAWARE_PLACEMENT, true); + } + /** * Set the flag to enforce minimum number of racks per write quorum. */ @@ -814,6 +983,34 @@ public boolean getEnforceMinNumRacksPerWriteQuorum() { return getBoolean(ENFORCE_MIN_NUM_RACKS_PER_WRITE_QUORUM, false); } + /** + * Set the flag to enforce minimum number of fault domains for write. + */ + public void setEnforceMinNumFaultDomainsForWrite(boolean enforceMinNumFaultDomainsForWrite) { + setProperty(ENFORCE_MIN_NUM_FAULT_DOMAINS_FOR_WRITE, enforceMinNumFaultDomainsForWrite); + } + + /** + * Get the flag to enforce minimum number of fault domains for write. + */ + public boolean getEnforceMinNumFaultDomainsForWrite() { + return getBoolean(ENFORCE_MIN_NUM_FAULT_DOMAINS_FOR_WRITE, false); + } + + /** + * Sets the flag to ignore usage of localnode in placement policy. + */ + public void setIgnoreLocalNodeInPlacementPolicy(boolean ignoreLocalNodeInPlacementPolicy) { + setProperty(IGNORE_LOCAL_NODE_IN_PLACEMENT_POLICY, ignoreLocalNodeInPlacementPolicy); + } + + /** + * Whether to ignore localnode in placementpolicy. + */ + public boolean getIgnoreLocalNodeInPlacementPolicy() { + return getBoolean(IGNORE_LOCAL_NODE_IN_PLACEMENT_POLICY, false); + } + /** * Enable the Auditor to use system time as underreplicated ledger mark * time. @@ -838,7 +1035,226 @@ public T setStoreSystemTimeAsLedgerUnderreplicatedMarkTime(boolean enabled) { * underreplicated ledger mark time. */ public boolean getStoreSystemTimeAsLedgerUnderreplicatedMarkTime() { - return getBoolean(STORE_SYSTEMTIME_AS_LEDGER_UNDERREPLICATED_MARK_TIME, false); + return getBoolean(STORE_SYSTEMTIME_AS_LEDGER_UNDERREPLICATED_MARK_TIME, true); + } + + /** + * Whether to preserve MDC for tasks in Executor. + * + * @return flag to enable/disable MDC preservation in Executor. + */ + public boolean getPreserveMdcForTaskExecution() { + return getBoolean(PRESERVE_MDC_FOR_TASK_EXECUTION, false); + } + + /** + * Whether to preserve MDC for tasks in Executor. + * + * @param enabled + * flag to enable/disable MDC preservation in Executor. + * @return configuration. + */ + public T setPreserveMdcForTaskExecution(boolean enabled) { + setProperty(PRESERVE_MDC_FOR_TASK_EXECUTION, enabled); + return getThis(); + } + + /** + * @return the configured pooling policy for the allocator. + */ + public PoolingPolicy getAllocatorPoolingPolicy() { + return PoolingPolicy.valueOf(this.getString(ALLOCATOR_POOLING_POLICY, PoolingPolicy.PooledDirect.toString())); + } + + /** + * Define the memory pooling policy. + * + *

    Default is {@link PoolingPolicy#PooledDirect} + * + * @param poolingPolicy + * the memory pooling policy + * @return configuration object. + */ + public T setAllocatorPoolingPolicy(PoolingPolicy poolingPolicy) { + this.setProperty(ALLOCATOR_POOLING_POLICY, poolingPolicy.toString()); + return getThis(); + } + + /** + * @return the configured pooling concurrency for the allocator. + */ + public int getAllocatorPoolingConcurrency() { + return this.getInteger(ALLOCATOR_POOLING_CONCURRENCY, PooledByteBufAllocator.defaultNumDirectArena()); + } + + /** + * Controls the amount of concurrency for the memory pool. + * + *

    Default is to have a number of allocator arenas equals to 2 * CPUS. + * + *

    Decreasing this number will reduce the amount of memory overhead, at the + * expense of increased allocation contention. + * + * @param concurrency + * the concurrency level to use for the allocator pool + * @return configuration object. + */ + public T setAllocatorPoolingConcurrency(int concurrency) { + this.setProperty(ALLOCATOR_POOLING_POLICY, concurrency); + return getThis(); + } + + /** + * @return the configured ouf of memory policy for the allocator. + */ + public OutOfMemoryPolicy getAllocatorOutOfMemoryPolicy() { + return OutOfMemoryPolicy + .valueOf(this.getString(ALLOCATOR_OOM_POLICY, OutOfMemoryPolicy.FallbackToHeap.toString())); + } + + /** + * Define the memory allocator out of memory policy. + * + *

    Default is {@link OutOfMemoryPolicy#FallbackToHeap} + * + * @param oomPolicy + * the "out-of-memory" policy for the memory allocator + * @return configuration object. + */ + public T setAllocatorOutOfMemoryPolicy(OutOfMemoryPolicy oomPolicy) { + this.setProperty(ALLOCATOR_OOM_POLICY, oomPolicy.toString()); + return getThis(); + } + + /** + * Return the configured leak detection policy for the allocator. + */ + public LeakDetectionPolicy getAllocatorLeakDetectionPolicy() { + //see: https://lists.apache.org/thread/d3zw8bxhlg0wxfhocyjglq0nbxrww3sg + String nettyLevelStr = System.getProperty("io.netty.leakDetectionLevel", LeakDetectionPolicy.Disabled.name()); + nettyLevelStr = System.getProperty("io.netty.leakDetection.level", nettyLevelStr); + String bkLevelStr = getString(ALLOCATOR_LEAK_DETECTION_POLICY, LeakDetectionPolicy.Disabled.name()); + LeakDetectionPolicy nettyLevel = LeakDetectionPolicy.parseLevel(nettyLevelStr); + LeakDetectionPolicy bkLevel = LeakDetectionPolicy.parseLevel(bkLevelStr); + if (nettyLevel.ordinal() >= bkLevel.ordinal()) { + return nettyLevel; + } else { + return bkLevel; + } + } + + /** + * Enable the leak detection for the allocator. + * + *

    Default is {@link LeakDetectionPolicy#Disabled} + * + * @param leakDetectionPolicy + * the leak detection policy for the memory allocator + * @return configuration object. + */ + public T setAllocatorLeakDetectionPolicy(LeakDetectionPolicy leakDetectionPolicy) { + this.setProperty(ALLOCATOR_LEAK_DETECTION_POLICY, leakDetectionPolicy.toString()); + return getThis(); + } + + /** + * Return whether the busy-wait is enabled for BookKeeper and Netty IO threads. + * + *

    Default is false + * + * @return the value of the option + */ + public boolean isBusyWaitEnabled() { + return getBoolean(ENABLE_BUSY_WAIT, false); + } + + /** + * Option to enable busy-wait settings. + * + *

    Default is false. + * + *

    WARNING: This option will enable spin-waiting on executors and IO threads + * in order to reduce latency during context switches. The spinning will + * consume 100% CPU even when bookie is not doing any work. It is + * recommended to reduce the number of threads in the main workers pool + * ({@link ClientConfiguration#setNumWorkerThreads(int)}) and Netty event + * loop {@link ClientConfiguration#setNumIOThreads(int)} to only have few + * CPU cores busy. + *

    + * + * @param busyWaitEnabled + * if enabled, use spin-waiting strategy to reduce latency in + * context switches + * + * @see #isBusyWaitEnabled() + */ + public T setBusyWaitEnabled(boolean busyWaitEnabled) { + setProperty(ENABLE_BUSY_WAIT, busyWaitEnabled); + return getThis(); + } + + /** + * Return the flag indicating whether to limit stats logging. + * + * @return + * the boolean flag indicating whether to limit stats logging + */ + public boolean getLimitStatsLogging() { + return getBoolean(LIMIT_STATS_LOGGING, true); + } + + /** + * Sets flag to limit the stats logging. + * + * @param limitStatsLogging + * flag to limit the stats logging. + * @return configuration. + */ + public T setLimitStatsLogging(boolean limitStatsLogging) { + setProperty(LIMIT_STATS_LOGGING, limitStatsLogging); + return getThis(); + } + + /** + * Get the bytes rate of re-replication. + * Default value is -1 which it means entries will replicated without any throttling activity. + * + * @return bytes rate of re-replication. + */ + public int getReplicationRateByBytes() { + return getInt(REPLICATION_RATE_BY_BYTES, -1); + } + + /** + * Set the bytes rate of re-replication. + * + * @param rate bytes rate of re-replication. + * + * @return ClientConfiguration + */ + public T setReplicationRateByBytes(int rate) { + this.setProperty(REPLICATION_RATE_BY_BYTES, rate); + return getThis(); + } + + /** + * get the max tasks can be acquired per second of re-replication. + * @return max tasks can be acquired per second of re-replication. + */ + public double getZkReplicationTaskRateLimit() { + return getDouble(ZK_REPLICATION_TASK_RATE_LIMIT, 0); + } + + /** + * set the max tasks can be acquired per second of re-replication, default is 0, which means no limit. + * Value greater than 0 will enable the rate limiting. Decimal value is allowed. + * For example, 0.5 means 1 task per 2 seconds, 1 means 1 task per second. + * @param zkReplicationTaskRateLimit + * @return ClientConfiguration + */ + public T setZkReplicationTaskRateLimit(double zkReplicationTaskRateLimit) { + setProperty(ZK_REPLICATION_TASK_RATE_LIMIT, zkReplicationTaskRateLimit); + return getThis(); } /** @@ -860,7 +1276,7 @@ private Map toMap() { Map configMap = new HashMap<>(); Iterator iterator = this.getKeys(); while (iterator.hasNext()) { - String key = iterator.next().toString(); + String key = iterator.next(); Object property = this.getProperty(key); if (property != null) { configMap.put(key, property.toString()); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ClientConfiguration.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ClientConfiguration.java index c902db823bb..34aadd8ef79 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ClientConfiguration.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ClientConfiguration.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,19 +17,21 @@ */ package org.apache.bookkeeper.conf; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.bookkeeper.util.BookKeeperConstants.FEATURE_DISABLE_ENSEMBLE_CHANGE; import io.netty.buffer.ByteBuf; +import java.util.NoSuchElementException; import java.util.concurrent.TimeUnit; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.client.EnsemblePlacementPolicy; import org.apache.bookkeeper.client.LedgerHandle; import org.apache.bookkeeper.client.RackawareEnsemblePlacementPolicy; +import org.apache.bookkeeper.client.api.BookKeeperBuilder; +import org.apache.bookkeeper.common.util.ReflectionUtils; import org.apache.bookkeeper.discover.RegistrationClient; import org.apache.bookkeeper.discover.ZKRegistrationClient; import org.apache.bookkeeper.replication.Auditor; -import org.apache.bookkeeper.util.ReflectionUtils; import org.apache.commons.configuration.ConfigurationException; @@ -93,6 +95,7 @@ public class ClientConfiguration extends AbstractConfigurationIf this flag is enabled, the client will use - * {@link EnsemblePlacementPolicy#reorderReadSequence(java.util.ArrayList, + * {@link EnsemblePlacementPolicy#reorderReadSequence(java.util.List, * org.apache.bookkeeper.client.BookiesHealthInfo, org.apache.bookkeeper.client.DistributionSchedule.WriteSet)} * to figure out a better read sequence to attempt reads from replicas and use - * {@link EnsemblePlacementPolicy#reorderReadLACSequence(java.util.ArrayList, + * {@link EnsemblePlacementPolicy#reorderReadLACSequence(java.util.List, * org.apache.bookkeeper.client.BookiesHealthInfo, org.apache.bookkeeper.client.DistributionSchedule.WriteSet)} * to figure out a better read sequence to attempt long poll reads from replicas. * @@ -1135,6 +1174,51 @@ public ClientConfiguration setReorderReadSequenceEnabled(boolean enabled) { return this; } + /** + * If read operation should be sticky to a single bookie or not. + * + * @return true if reorder read sequence is enabled, otherwise false. + */ + public boolean isStickyReadsEnabled() { + return getBoolean(STICKY_READS_ENABLED, false); + } + + /** + * Enable/disable having read operations for a ledger to be sticky to + * a single bookie. + * + *

    If this flag is enabled, the client will use one single bookie (by + * preference) to read all entries for a ledger. + * + *

    Having all the read to one bookie will increase the chances that + * a read request will be fulfilled by Bookie read cache (or OS file + * system cache) when doing sequential reads. + * + * @param enabled the flag to enable/disable sticky reads. + * @return client configuration instance. + */ + public ClientConfiguration setStickyReadsEnabled(boolean enabled) { + setProperty(STICKY_READS_ENABLED, enabled); + return this; + } + + /** + * If recovery batch read enabled or not. + * @return + */ + public boolean isRecoveryBatchReadEnabled() { + return getBoolean(RECOVERY_BATCH_READ_ENABLED, false); + } + + /** + * Enable/disable recovery batch read. + * @param enabled + * @return + */ + public ClientConfiguration setRecoveryBatchReadEnabled(boolean enabled) { + setProperty(RECOVERY_BATCH_READ_ENABLED, enabled); + return this; + } /** * Get Ensemble Placement Policy Class. * @@ -1205,26 +1289,48 @@ public ClientConfiguration setNetworkTopologyStabilizePeriodSeconds(int seconds) } /** - * Whether to order slow bookies in placement policy. + * Whether to enable BookieAddressResolver. * - * @return flag of whether to order slow bookies in placement policy or not. + * @return flag to enable/disable BookieAddressResolver. */ - public boolean getEnsemblePlacementPolicySlowBookies() { - return getBoolean(ENSEMBLE_PLACEMENT_POLICY_ORDER_SLOW_BOOKIES, false); + public boolean getBookieAddressResolverEnabled() { + return getBoolean(BOOKIE_ADDRESS_RESOLVER_ENABLED, true); } /** - * Enable/Disable ordering slow bookies in placement policy. + * Enable/Disable BookieAddressResolver. + * + *

    + * If this flag is true, read bookie information from the metadata service (e.g. ZooKeeper) to resolve the address + * from each bookie ID. If all bookie IDs in the cluster are "address:port" or "hostname:port", you can set this + * flag to false to reduce requests to the metadata service. + *

    * * @param enabled - * flag to enable/disable ordering slow bookies in placement policy. + * flag to enable/disable BookieAddressResolver. * @return client configuration. */ - public ClientConfiguration setEnsemblePlacementPolicySlowBookies(boolean enabled) { - setProperty(ENSEMBLE_PLACEMENT_POLICY_ORDER_SLOW_BOOKIES, enabled); + public ClientConfiguration setBookieAddressResolverEnabled(boolean enabled) { + setProperty(BOOKIE_ADDRESS_RESOLVER_ENABLED, enabled); return this; } + /** + * Set the flag to use hostname to resolve local node placement policy. + * @param useHostnameResolveLocalNodePlacementPolicy + */ + public void setUseHostnameResolveLocalNodePlacementPolicy(boolean useHostnameResolveLocalNodePlacementPolicy) { + setProperty(USE_HOSTNAME_RESOLVE_LOCAL_NODE_PLACEMENT_POLICY, useHostnameResolveLocalNodePlacementPolicy); + } + + /** + * Get whether to use hostname to resolve local node placement policy. + * @return + */ + public boolean getUseHostnameResolveLocalNodePlacementPolicy() { + return getBoolean(USE_HOSTNAME_RESOLVE_LOCAL_NODE_PLACEMENT_POLICY, false); + } + /** * Whether to enable recording task execution stats. * @@ -1370,6 +1476,26 @@ public ClientConfiguration setBookieQuarantineTime(int quarantineTime, TimeUnit return this; } + /** + * Get the bookie quarantine ratio. + * + * @return + */ + public double getBookieQuarantineRatio() { + return getDouble(BOOKIE_QUARANTINE_RATIO, 1.0); + } + + /** + * set the bookie quarantine ratio. default is 1.0. + * + * @param ratio + * @return client configuration + */ + public ClientConfiguration setBookieQuarantineRatio(double ratio) { + setProperty(BOOKIE_QUARANTINE_RATIO, ratio); + return this; + } + /** * {@inheritDoc} */ @@ -1497,6 +1623,27 @@ public ClientConfiguration setStartTLSTimeout(int timeoutSecs) { return this; } + /** + * Whether hostname verification enabled? + * + * @return true if hostname verification enabled, otherwise false. + */ + public boolean getHostnameVerificationEnabled() { + return getBoolean(TLS_HOSTNAME_VERIFICATION_ENABLED, false); + } + + /** + * Enable/Disable hostname verification for tls connection. + * + * @param enabled + * flag to enable/disable tls hostname verification. + * @return client configuration. + */ + public ClientConfiguration setHostnameVerificationEnabled(boolean enabled) { + setProperty(TLS_HOSTNAME_VERIFICATION_ENABLED, enabled); + return this; + } + /** * Set the client role. * @@ -1662,6 +1809,36 @@ public ClientConfiguration setTLSCertificatePath(String arg) { return this; } + /** + * Whether to allow opportunistic striping. + * + * @return true if opportunistic striping is enabled + */ + public boolean getOpportunisticStriping() { + return getBoolean(OPPORTUNISTIC_STRIPING, false); + } + + /** + * Enable/Disable opportunistic striping. + *

    + * If set to true, when you are creating a ledger with a given + * ensemble size, the system will automatically handle the + * lack of enough bookies, reducing ensemble size up to + * the write quorum size. This way in little clusters + * you can try to use striping (ensemble size > write quorum size) + * in case that you have enough bookies up and running, + * and degrade automatically to the minimum requested replication count. + *

    + * + * @param enabled + * flag to enable/disable opportunistic striping. + * @return client configuration. + */ + public ClientConfiguration setOpportunisticStriping(boolean enabled) { + setProperty(OPPORTUNISTIC_STRIPING, enabled); + return this; + } + /** * Whether to delay ensemble change or not? * @@ -1688,6 +1865,27 @@ public ClientConfiguration setDelayEnsembleChange(boolean enabled) { return this; } + /** + * Whether to enable bookie address changes tracking. + * + * @return flag to enable/disable bookie address changes tracking + */ + public boolean getEnableBookieAddressTracking() { + return getBoolean(FOLLOW_BOOKIE_ADDRESS_TRACKING, true); + } + + /** + * Enable/Disable bookie address changes tracking. + * + * @param value + * flag to enable/disable bookie address changes tracking + * @return client configuration. + */ + public ClientConfiguration setEnableBookieAddressTracking(boolean value) { + setProperty(FOLLOW_BOOKIE_ADDRESS_TRACKING, value); + return this; + } + /** * Whether to enable bookie failure tracking. * @@ -1775,8 +1973,11 @@ public ClientConfiguration setMaxAllowedEnsembleChanges(int num) { /** * Option to use Netty Pooled ByteBufs. * + * @deprecated see {@link BookKeeperBuilder#allocator(io.netty.buffer.ByteBufAllocator)} + * * @return the value of the option */ + @Deprecated public boolean isNettyUsePooledBuffers() { return getBoolean(NETTY_USE_POOLED_BUFFERS, true); } @@ -1788,6 +1989,8 @@ public boolean isNettyUsePooledBuffers() { * @param enabled * if enabled BookKeeper will use default Pooled Netty Buffer allocator * + * @deprecated see {@link BookKeeperBuilder#allocator(io.netty.buffer.ByteBufAllocator)} + * * @see #setUseV2WireProtocol(boolean) * @see ByteBuf#release() * @see LedgerHandle#readEntries(long, long) @@ -1850,6 +2053,37 @@ public boolean getStoreSystemtimeAsLedgerCreationTime() { return getBoolean(STORE_SYSTEMTIME_AS_LEDGER_CREATION_TIME, false); } + /** + * Set the log frequency when a bookie is unavailable, in order to limit log filesize. + * + * @param throttleValue + * @param unit + * @return client configuration. + */ + public ClientConfiguration setClientConnectBookieUnavailableLogThrottling( + int throttleValue, TimeUnit unit) { + setProperty(CLIENT_CONNECT_BOOKIE_UNAVAILABLE_LOG_THROTTLING, unit.toMillis(throttleValue)); + return this; + } + + /** + * Get the log frequency when a bookie is unavailable, in milliseconds. + * + * @return log frequency when a bookie is unavailable, in milliseconds. + */ + public long getClientConnectBookieUnavailableLogThrottlingMs() { + return getLong(CLIENT_CONNECT_BOOKIE_UNAVAILABLE_LOG_THROTTLING, 5_000L); + } + + public ClientConfiguration setBatchReadEnabled(boolean enable) { + setProperty(BATCH_READ_ENABLED, enable); + return this; + } + + public boolean isBatchReadEnabled() { + return getBoolean(BATCH_READ_ENABLED, true); + } + @Override protected ClientConfiguration getThis() { return this; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/Configurable.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/Configurable.java index 6b4adb3a7fb..60ed1490ad8 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/Configurable.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/Configurable.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java index db40e0f5021..cb731060c4c 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,32 +17,90 @@ */ package org.apache.bookkeeper.conf; +import static org.apache.bookkeeper.util.BookKeeperConstants.MAX_LOG_SIZE_LIMIT; + import com.google.common.annotations.Beta; import com.google.common.base.Strings; +import com.google.common.collect.Lists; import java.io.File; +import java.net.URL; +import java.nio.file.Paths; import java.util.concurrent.TimeUnit; +import org.apache.bookkeeper.bookie.FileChannelProvider; import org.apache.bookkeeper.bookie.InterleavedLedgerStorage; import org.apache.bookkeeper.bookie.LedgerStorage; import org.apache.bookkeeper.bookie.SortedLedgerStorage; +import org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorage; +import org.apache.bookkeeper.common.conf.ConfigDef; +import org.apache.bookkeeper.common.conf.ConfigException; +import org.apache.bookkeeper.common.conf.ConfigKey; +import org.apache.bookkeeper.common.conf.ConfigKeyGroup; +import org.apache.bookkeeper.common.conf.Type; +import org.apache.bookkeeper.common.conf.validators.ClassValidator; +import org.apache.bookkeeper.common.conf.validators.RangeValidator; +import org.apache.bookkeeper.common.util.ReflectionUtils; import org.apache.bookkeeper.discover.RegistrationManager; import org.apache.bookkeeper.discover.ZKRegistrationManager; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.stats.NullStatsProvider; import org.apache.bookkeeper.stats.StatsProvider; -import org.apache.bookkeeper.util.BookKeeperConstants; -import org.apache.bookkeeper.util.ReflectionUtils; import org.apache.commons.configuration.ConfigurationException; +import org.apache.commons.lang3.StringUtils; /** * Configuration manages server-side settings. */ public class ServerConfiguration extends AbstractConfiguration { + + private static final int SECOND = 1000; + // Ledger Storage Settings + + private static final ConfigKeyGroup GROUP_LEDGER_STORAGE = ConfigKeyGroup.builder("ledgerstorage") + .description("Ledger Storage related settings") + .order(10) // place a place holder here + .build(); + + protected static final String LEDGER_STORAGE_CLASS = "ledgerStorageClass"; + protected static final ConfigKey LEDGER_STORAGE_CLASS_KEY = ConfigKey.builder(LEDGER_STORAGE_CLASS) + .type(Type.CLASS) + .description("Ledger storage implementation class") + .defaultValue(SortedLedgerStorage.class) + .optionValues(Lists.newArrayList( + InterleavedLedgerStorage.class.getName(), + SortedLedgerStorage.class.getName(), + DbLedgerStorage.class.getName() + )) + .validator(ClassValidator.of(LedgerStorage.class)) + .group(GROUP_LEDGER_STORAGE) + .build(); + // Entry Log Parameters + + private static final ConfigKeyGroup GROUP_LEDGER_STORAGE_ENTRY_LOGGER = ConfigKeyGroup.builder("entrylogger") + .description("EntryLogger related settings") + .order(11) + .build(); + protected static final String ENTRY_LOG_SIZE_LIMIT = "logSizeLimit"; + protected static final ConfigKey ENTRY_LOG_SIZE_LIMIT_KEY = ConfigKey.builder(ENTRY_LOG_SIZE_LIMIT) + .type(Type.LONG) + .description("Max file size of entry logger, in bytes") + .documentation("A new entry log file will be created when the old one reaches this file size limitation") + .defaultValue(MAX_LOG_SIZE_LIMIT) + .validator(RangeValidator.between(0, MAX_LOG_SIZE_LIMIT)) + .group(GROUP_LEDGER_STORAGE_ENTRY_LOGGER) + .build(); + protected static final String ENTRY_LOG_FILE_PREALLOCATION_ENABLED = "entryLogFilePreallocationEnabled"; + + + protected static final String FORCE_ALLOW_COMPACTION = "forceAllowCompaction"; protected static final String MINOR_COMPACTION_INTERVAL = "minorCompactionInterval"; protected static final String MINOR_COMPACTION_THRESHOLD = "minorCompactionThreshold"; + protected static final String MINOR_COMPACTION_MAX_TIME_MILLIS = "minorCompactionMaxTimeMillis"; protected static final String MAJOR_COMPACTION_INTERVAL = "majorCompactionInterval"; protected static final String MAJOR_COMPACTION_THRESHOLD = "majorCompactionThreshold"; + protected static final String MAJOR_COMPACTION_MAX_TIME_MILLIS = "majorCompactionMaxTimeMillis"; protected static final String IS_THROTTLE_BY_BYTES = "isThrottleByBytes"; protected static final String COMPACTION_MAX_OUTSTANDING_REQUESTS = "compactionMaxOutstandingRequests"; protected static final String COMPACTION_RATE = "compactionRate"; @@ -53,8 +111,16 @@ public class ServerConfiguration extends AbstractConfiguration 0; + } + + /** + * Get local scrub interval. + * + * @return Number of seconds between scrubs, {@literal <=}0 for disabled. + */ + public long getLocalScrubPeriod() { + return this.getLong(LOCAL_SCRUB_PERIOD, 0); + } + + /** + * Set local scrub period in seconds ({@literal <=}0 for disabled). Scrub will be scheduled at delays + * chosen from the interval (.5 * interval, 1.5 * interval) + */ + public void setLocalScrubPeriod(long period) { + this.setProperty(LOCAL_SCRUB_PERIOD, period); + } + + /** + * Get local scrub rate limit (entries/second). + * + * @return Max number of entries to scrub per second, 0 for disabled. + */ + public double getLocalScrubRateLimit() { + return this.getDouble(LOCAL_SCRUB_RATE_LIMIT, 60); + } + + /** + * Get local scrub rate limit (entries/second). + * + * @param scrubRateLimit Max number of entries per second to scan. + */ + public void setLocalScrubRateLimit(double scrubRateLimit) { + this.setProperty(LOCAL_SCRUB_RATE_LIMIT, scrubRateLimit); + } + /** * Get flush interval. Default value is 10 second. It isn't useful to decrease * this value, since ledger storage only checkpoints when an entry logger file @@ -554,7 +798,7 @@ public ServerConfiguration setFileInfoMaxIdleTime(long idleTime) { * @return fileinfo format version to write. */ public int getFileInfoFormatVersionToWrite() { - return this.getInt(FILEINFO_FORMAT_VERSION_TO_WRITE, 0); + return this.getInt(FILEINFO_FORMAT_VERSION_TO_WRITE, 1); } /** @@ -608,6 +852,17 @@ public int getJournalWriteBufferSizeKB() { return this.getInt(JOURNAL_WRITE_BUFFER_SIZE, 64); } + /** + * Set the size of the write buffers used for the journal. + * + * @param bufferSizeKB the size of the write buffer used for the journal, in KB. + * @return server configuration + */ + public ServerConfiguration setJournalWriteBufferSizeKB(int bufferSizeKB) { + setProperty(JOURNAL_WRITE_BUFFER_SIZE, bufferSizeKB); + return this; + } + /** * Max number of older journal files kept. * @@ -657,7 +912,7 @@ public ServerConfiguration setJournalAlignmentSize(int size) { * @return journal format version to write. */ public int getJournalFormatVersionToWrite() { - return this.getInt(JOURNAL_FORMAT_VERSION_TO_WRITE, 4); + return this.getInt(JOURNAL_FORMAT_VERSION_TO_WRITE, 6); } /** @@ -672,6 +927,110 @@ public ServerConfiguration setJournalFormatVersionToWrite(int version) { return this; } + /** + * Set the size of the journal queue. + * + * @param journalQueueSize + * the max size of journal queue + * @return server configuration. + */ + public ServerConfiguration setJournalQueueSize(int journalQueueSize) { + this.setProperty(JOURNAL_QUEUE_SIZE, journalQueueSize); + return this; + } + + /** + * Get size of journal queue. + * + * @return the max size of journal queue. + */ + public int getJournalQueueSize() { + return this.getInt(JOURNAL_QUEUE_SIZE, 10_000); + } + + /** + * Set the max amount of memory that can be used by the journal. + * + * @param journalMaxMemorySizeMb + * the max amount of memory for the journal + * @return server configuration. + */ + public ServerConfiguration setJournalMaxMemorySizeMb(long journalMaxMemorySizeMb) { + this.setProperty(JOURNAL_MAX_MEMORY_SIZE_MB, journalMaxMemorySizeMb); + return this; + } + + /** + * Get the max amount of memory that can be used by the journal. + * + * @return the max amount of memory for the journal + */ + public long getJournalMaxMemorySizeMb() { + // Default is taking 5% of max direct memory (and convert to MB). + long estimateMaxDirectMemory = io.netty.util.internal.PlatformDependent.estimateMaxDirectMemory(); + long defaultValue = (long) (estimateMaxDirectMemory * 0.05 / 1024 / 1024); + return this.getLong(JOURNAL_MAX_MEMORY_SIZE_MB, defaultValue); + } + + /** + * Set PageCache flush interval in second. + * + * @Param journalPageCacheFlushInterval + * journal pageCache flush interval when journalSyncData closed + * @return server configuration. + */ + public ServerConfiguration setJournalPageCacheFlushIntervalMSec(long journalPageCacheFlushIntervalMSec) { + this.setProperty(JOURNAL_PAGECACHE_FLUSH_INTERVAL_MSEC, journalPageCacheFlushIntervalMSec); + return this; + } + + /** + * Get journal pageCache flush interval. + * + * @return journal pageCache flush interval. + */ + public long getJournalPageCacheFlushIntervalMSec() { + return this.getLong(JOURNAL_PAGECACHE_FLUSH_INTERVAL_MSEC, 1000); + } + + /** + * Set JournalChannelProvider classname. + * @param journalChannelProvider + * The JournalChannelProvider classname. The class must implements {@link FileChannelProvider} and + * no args constructor is needed. + * @return + */ + public ServerConfiguration setJournalChannelProvider(String journalChannelProvider) { + this.setProperty(JOURNAL_CHANNEL_PROVIDER, journalChannelProvider); + return this; + } + + /** + * + * @return + */ + public String getJournalChannelProvider() { + return this.getString(JOURNAL_CHANNEL_PROVIDER, "org.apache.bookkeeper.bookie.DefaultFileChannelProvider"); + } + + /** + * Get reuse journal files. + * @return + */ + public boolean getJournalReuseFiles() { + return this.getBoolean(JOURNAL_REUSE_FILES, false); + } + + /** + * Set reuse journal files. + * @param journalReuseFiles + * @return + */ + public ServerConfiguration setJournalReuseFiles(boolean journalReuseFiles) { + setProperty(JOURNAL_REUSE_FILES, journalReuseFiles); + return this; + } + /** * Get max number of adds in progress. 0 == unlimited. * @@ -847,6 +1206,38 @@ public ServerConfiguration setAllowLoopback(boolean allow) { return this; } + /** + * Get the configured BookieId for the bookie. + * + *

    If present, this setting will take precedence over the + * automatic BookieId generation, based on Network Addresses. + * + * @see #setBookieId(java.lang.String) + * @see #getAdvertisedAddress() + * @return the configure address to be advertised + */ + public String getBookieId() { + return this.getString(BOOKIE_ID, null); + } + + /** + * Configure the bookie to advertise a specific BookieId. + * + *

    By default, a bookie will advertise a BookieId computed + * from the primary network endpoint address. + * + * @see #getBookieId() + * @see #setAdvertisedAddress(java.lang.String) + * @param bookieId the bookie id + * + * @return server configuration + */ + public ServerConfiguration setBookieId(String bookieId) { + BookieId.parse(bookieId); + this.setProperty(BOOKIE_ID, bookieId); + return this; + } + /** * Get the configured advertised address for the bookie. * @@ -865,7 +1256,7 @@ public String getAdvertisedAddress() { * Configure the bookie to advertise a specific address. * *

    By default, a bookie will advertise either its own IP or hostname, - * depending on the {@link getUseHostNameAsBookieID()} setting. + * depending on the {@link #getUseHostNameAsBookieID()} setting. * *

    When the advertised is set to a non-empty string, the bookie will * register and advertise using this address. @@ -938,8 +1329,9 @@ public ServerConfiguration setAllowStorageExpansion(boolean val) { */ public String[] getJournalDirNames() { String[] journalDirs = this.getStringArray(JOURNAL_DIRS); - if (journalDirs == null || journalDirs.length == 0) { - return new String[] {getJournalDirName()}; + if (journalDirs == null || journalDirs.length == 0 + || (journalDirs.length == 1 && StringUtils.isEmpty(journalDirs[0]))) { + return new String[] { getJournalDirName() }; } return journalDirs; } @@ -1126,6 +1518,15 @@ public int getServerNumIOThreads() { return getInt(SERVER_NUM_IO_THREADS, 2 * Runtime.getRuntime().availableProcessors()); } + /** + * Get the number of Acceptor threads. + * + * @return the number of Acceptor threads + */ + public int getServerNumAcceptorThreads() { + return getInt(SERVER_NUM_ACCEPTOR_THREADS, 1); + } + /** * Set the number of IO threads. * @@ -1133,7 +1534,7 @@ public int getServerNumIOThreads() { * This is the number of threads used by Netty to handle TCP connections. *

    * - * @see #getNumIOThreads() + * @see #getServerNumIOThreads() * @param numThreads number of IO threads used for bookkeeper * @return client configuration */ @@ -1253,6 +1654,27 @@ public ServerConfiguration setStatisticsEnabled(boolean enabled) { return this; } + /** + * Allow manually force compact the entry log or not. + * + * @param enable + * whether allow manually force compact the entry log or not. + * @return service configuration. + */ + public ServerConfiguration setForceAllowCompaction(boolean enable) { + setProperty(FORCE_ALLOW_COMPACTION, enable); + return this; + } + + /** + * The force compaction is allowed or not when disabling the entry log compaction. + * + * @return the force compaction is allowed or not when disabling the entry log compaction. + */ + public boolean isForceAllowCompaction() { + return getBoolean(FORCE_ALLOW_COMPACTION, false); + } + /** * Get threshold of minor compaction. * @@ -1264,7 +1686,7 @@ public ServerConfiguration setStatisticsEnabled(boolean enabled) { * @return threshold of minor compaction */ public double getMinorCompactionThreshold() { - return getDouble(MINOR_COMPACTION_THRESHOLD, 0.2f); + return getDouble(MINOR_COMPACTION_THRESHOLD, 0.2d); } /** @@ -1292,7 +1714,7 @@ public ServerConfiguration setMinorCompactionThreshold(double threshold) { * @return threshold of major compaction */ public double getMajorCompactionThreshold() { - return getDouble(MAJOR_COMPACTION_THRESHOLD, 0.8f); + return getDouble(MAJOR_COMPACTION_THRESHOLD, 0.8d); } /** @@ -1309,6 +1731,33 @@ public ServerConfiguration setMajorCompactionThreshold(double threshold) { return this; } + /** + * Get the maximum milliseconds to run major compaction. If {@literal <=}0 the + * thread will run until all compaction is completed. + * + * @return limit + * The number of milliseconds to run compaction. + */ + public long getMajorCompactionMaxTimeMillis() { + return getLong(MAJOR_COMPACTION_MAX_TIME_MILLIS, -1); + } + + /** + * Set the maximum milliseconds to run major compaction. If {@literal <=}0 the + * thread will run until all compaction is completed. + * + * @see #getMajorCompactionMaxTimeMillis() + * + * @param majorCompactionMaxTimeMillis + * The number of milliseconds to run compaction. + * + * @return server configuration + */ + public ServerConfiguration setMajorCompactionMaxTimeMillis(long majorCompactionMaxTimeMillis) { + setProperty(MAJOR_COMPACTION_MAX_TIME_MILLIS, majorCompactionMaxTimeMillis); + return this; + } + /** * Get interval to run minor compaction, in seconds. * @@ -1359,6 +1808,33 @@ public ServerConfiguration setMajorCompactionInterval(long interval) { return this; } + /** + * Get the maximum milliseconds to run minor compaction. If {@literal <=}0 the + * thread will run until all compaction is completed. + * + * @return limit + * The number of milliseconds to run compaction. + */ + public long getMinorCompactionMaxTimeMillis() { + return getLong(MINOR_COMPACTION_MAX_TIME_MILLIS, -1); + } + + /** + * Set the maximum milliseconds to run minor compaction. If {@literal <=}0 the + * thread will run until all compaction is completed. + * + * @see #getMinorCompactionMaxTimeMillis() + * + * @param minorCompactionMaxTimeMillis + * The number of milliseconds to run compaction. + * + * @return server configuration + */ + public ServerConfiguration setMinorCompactionMaxTimeMillis(long minorCompactionMaxTimeMillis) { + setProperty(MINOR_COMPACTION_MAX_TIME_MILLIS, minorCompactionMaxTimeMillis); + return this; + } + /** * Get whether force compaction is allowed when disk full or almost full. * @@ -1416,12 +1892,15 @@ public long getOpenLedgerRereplicationGracePeriod() { } /** - * Set the grace period so that if the replication worker fails to replicate - * a underreplicatedledger for more than - * ReplicationWorker.MAXNUMBER_REPLICATION_FAILURES_ALLOWED_BEFORE_DEFERRING - * number of times, then instead of releasing the lock immediately after - * failed attempt, it will hold under replicated ledger lock for this grace - * period and then it will release the lock. + * Set the grace period, in milliseconds, which the replication worker has + * to wait before releasing the lock after it failed to replicate a ledger. + * For the first ReplicationWorker.NUM_OF_EXPONENTIAL_BACKOFF_RETRIALS + * failures it will do exponential backoff then it will bound at + * LOCK_RELEASE_OF_FAILED_LEDGER_GRACE_PERIOD. + * + *

    On replication failure, instead of releasing the lock immediately + * after failed attempt, it will hold under replicated ledger lock for the + * grace period and then it will release the lock. * * @param waitTime */ @@ -1430,16 +1909,16 @@ public void setLockReleaseOfFailedLedgerGracePeriod(String waitTime) { } /** - * Get the grace period which the replication worker to wait before - * releasing the lock after replication worker failing to replicate for more - * than - * ReplicationWorker.MAXNUMBER_REPLICATION_FAILURES_ALLOWED_BEFORE_DEFERRING - * number of times. + * Get the grace period, in milliseconds, which the replication worker has + * to wait before releasing the lock after it failed to replicate a ledger. + * For the first ReplicationWorker.NUM_OF_EXPONENTIAL_BACKOFF_RETRIALS + * failures it will do exponential backoff then it will bound at + * LOCK_RELEASE_OF_FAILED_LEDGER_GRACE_PERIOD. * * @return */ public long getLockReleaseOfFailedLedgerGracePeriod() { - return getLong(LOCK_RELEASE_OF_FAILED_LEDGER_GRACE_PERIOD, 60000); + return getLong(LOCK_RELEASE_OF_FAILED_LEDGER_GRACE_PERIOD, 300000); } /** @@ -1533,6 +2012,29 @@ public int getNumHighPriorityWorkerThreads() { return getInt(NUM_HIGH_PRIORITY_WORKER_THREADS, 8); } + /** + * Use auto-throttling of the read-worker threads. This is done + * to ensure the bookie is not using unlimited amount of memory + * to respond to read-requests. + * + * @param throttle + * whether to throttle the read workers threads + * @return server configuration + */ + public ServerConfiguration setReadWorkerThreadsThrottlingEnabled(boolean throttle) { + setProperty(READ_WORKER_THREADS_THROTTLING_ENABLED, throttle); + return this; + } + + /** + * Get the auto-throttling status of the read-worker threads. + * @return + */ + public boolean isReadWorkerThreadsThrottlingEnabled() { + return getBoolean(READ_WORKER_THREADS_THROTTLING_ENABLED, true); + } + + /** * Set the number of threads that would handle read requests. @@ -1664,6 +2166,7 @@ public ServerConfiguration setWriteBufferBytes(int writeBufferBytes) { * number of threads to handle journal callbacks. * @return server configuration */ + @Deprecated public ServerConfiguration setNumJournalCallbackThreads(int numThreads) { setProperty(NUM_JOURNAL_CALLBACK_THREADS, numThreads); return this; @@ -1674,6 +2177,7 @@ public ServerConfiguration setNumJournalCallbackThreads(int numThreads) { * * @return the number of threads that handle journal callbacks. */ + @Deprecated public int getNumJournalCallbackThreads() { return getInt(NUM_JOURNAL_CALLBACK_THREADS, 1); } @@ -1755,6 +2259,18 @@ public int getSkipListArenaMaxAllocSize() { return getInt(SKIP_LIST_MAX_ALLOC_ENTRY, 128 * 1024); } + /** + * Set the max size we should allocate from the skiplist arena. Allocations + * larger than this should be allocated directly by the VM to avoid fragmentation. + * + * @param size max alloc size. + * @return server configuration object. + */ + public ServerConfiguration setSkipListArenaMaxAllocSize(int size) { + setProperty(SKIP_LIST_MAX_ALLOC_ENTRY, size); + return this; + } + /** * Should the data be fsynced on journal before acknowledgment. * @@ -1766,6 +2282,29 @@ public boolean getJournalSyncData() { return getBoolean(JOURNAL_SYNC_DATA, true); } + /** + * Should the data be written to journal before acknowledgment. + * + *

    Default is true + * + * @return + */ + public boolean getJournalWriteData() { + return getBoolean(JOURNAL_WRITE_DATA, true); + } + + /** + * Should the data be written to journal before acknowledgment. + * + *

    Default is true + * + * @return + */ + public ServerConfiguration setJournalWriteData(boolean journalWriteData) { + setProperty(JOURNAL_WRITE_DATA, journalWriteData); + return this; + } + /** * Enable or disable journal syncs. * @@ -1835,9 +2374,20 @@ public long getJournalBufferedWritesThreshold() { } /** - * Maximum entries to buffer to impose on a journal write to achieve grouping. - * Use {@link #getJournalBufferedWritesThreshold()} if this is set to zero or - * less than zero. + * Set maximum bytes to buffer to impose on a journal write to achieve grouping. + * + * @param maxBytes maximum bytes to buffer to impose on a journal write + * @return max bytes to buffer + */ + public ServerConfiguration setJournalBufferedWritesThreshold(long maxBytes) { + setProperty(JOURNAL_BUFFERED_WRITES_THRESHOLD, maxBytes); + return this; + } + + /** + * Maximum entries to buffer to impose on a journal write to achieve grouping. + * Use {@link #getJournalBufferedWritesThreshold()} if this is set to zero or + * less than zero. * * @return max entries to buffer. */ @@ -1899,6 +2449,27 @@ public boolean isReadOnlyModeEnabled() { return getBoolean(READ_ONLY_MODE_ENABLED, true); } + /** + * Set whether the bookie is able to go into read-only mode when any disk is full. + * If this set to false, it will behave to READ_ONLY_MODE_ENABLED flag. + * + * @param enabled whether to enable read-only mode when any disk is full. + * @return + */ + public ServerConfiguration setReadOnlyModeOnAnyDiskFullEnabled(boolean enabled) { + setProperty(READ_ONLY_MODE_ON_ANY_DISK_FULL_ENABLED, enabled); + return this; + } + + /** + * Get whether read-only mode is enable when any disk is full. The default is false. + * + * @return boolean + */ + public boolean isReadOnlyModeOnAnyDiskFullEnabled() { + return getBoolean(READ_ONLY_MODE_ON_ANY_DISK_FULL_ENABLED, false); + } + /** * Set the warning threshold for disk usage. * @@ -2056,6 +2627,132 @@ public long getAuditorPeriodicBookieCheckInterval() { return getLong(AUDITOR_PERIODIC_BOOKIE_CHECK_INTERVAL, 86400); } + /** + * Sets the regularity/interval at which the auditor will run a placement + * policy check of all ledgers, which are closed. This should not be run + * very often, and should be run at most once a day. Setting this to 0 will + * completely disable the periodic metadata check. + * + * @param interval + * The interval in seconds. e.g. 86400 = 1 day, 604800 = 1 week + */ + public void setAuditorPeriodicPlacementPolicyCheckInterval(long interval) { + setProperty(AUDITOR_PERIODIC_PLACEMENT_POLICY_CHECK_INTERVAL, interval); + } + + /** + * Get the regularity at which the auditor does placement policy check of + * all ledgers, which are closed. + * + * @return The interval in seconds. By default, it is disabled. + */ + public long getAuditorPeriodicPlacementPolicyCheckInterval() { + return getLong(AUDITOR_PERIODIC_PLACEMENT_POLICY_CHECK_INTERVAL, 0); + } + + public void setRepairedPlacementPolicyNotAdheringBookieEnable(boolean enabled) { + setProperty(REPAIRED_PLACEMENT_POLICY_NOT_ADHERING_BOOKIE_ENABLED, enabled); + } + + /** + * Now the feature only support RackawareEnsemblePlacementPolicy. + * + * In Auditor, it combines with {@link #getAuditorPeriodicPlacementPolicyCheckInterval}, to control is marked + * ledger id to under replication managed when found a ledger ensemble not adhere to placement policy. + * In ReplicationWorker, to control is to repair the ledger which the ensemble does not adhere to the placement + * policy. By default, it is disabled. + * + * If you want to enable this feature, there maybe lots of ledger will be mark underreplicated. + * The replicationWorker will replicate lots of ledger, it will increase read request and write request in bookie + * server. You should set a suitable rereplicationEntryBatchSize to avoid bookie server pressure. + * + */ + public boolean isRepairedPlacementPolicyNotAdheringBookieEnable() { + return getBoolean(REPAIRED_PLACEMENT_POLICY_NOT_ADHERING_BOOKIE_ENABLED, false); + } + + /** + * Sets the grace period (in seconds) for underreplicated ledgers recovery. + * If ledger is marked underreplicated for more than this period then it + * will be reported by placementPolicyCheck in Auditor. Setting this to 0 + * will disable this check. + * + * @param gracePeriod + * The interval in seconds. e.g. 3600 = 1 hour + */ + public void setUnderreplicatedLedgerRecoveryGracePeriod(long gracePeriod) { + setProperty(UNDERREPLICATED_LEDGER_RECOVERY_GRACE_PERIOD, gracePeriod); + } + + /** + * Gets the grace period (in seconds) for underreplicated ledgers recovery. + * If ledger is marked underreplicated for more than this period then it + * will be reported by placementPolicyCheck in Auditor. Setting this to 0 + * will disable this check. + * + * @return The interval in seconds. By default it is disabled. + */ + public long getUnderreplicatedLedgerRecoveryGracePeriod() { + return getLong(UNDERREPLICATED_LEDGER_RECOVERY_GRACE_PERIOD, 0); + } + + /** + * Sets the interval at which the auditor will run a replicas check of all + * ledgers. This should not be run very often since it validates + * availability of replicas of all ledgers by querying bookies. Setting this + * to 0 will disable the periodic replicas check. + * + * @param interval + * The interval in seconds. e.g. 86400 = 1 day, 604800 = 1 week + */ + public void setAuditorPeriodicReplicasCheckInterval(long interval) { + setProperty(AUDITOR_REPLICAS_CHECK_INTERVAL, interval); + } + + /** + * Get the interval at which the auditor does replicas check of all ledgers. + * + * @return The interval in seconds. By default it is disabled. + */ + public long getAuditorPeriodicReplicasCheckInterval() { + return getLong(AUDITOR_REPLICAS_CHECK_INTERVAL, 0); + } + + /** + * Get the semaphore limit value of getting ledger from zookeeper in auto recovery. + * + * @return The semaphore value. By default it is 500. + */ + public int getAuditorMaxNumberOfConcurrentOpenLedgerOperations() { + return getInt(AUDITOR_MAX_NUMBER_OF_CONCURRENT_OPEN_LEDGER_OPERATIONS, 500); + } + + /** + * Set the semaphore limit value for getting ledger from zookeeper in auto recovery. + * @param semaphore + */ + public void setAuditorMaxNumberOfConcurrentOpenLedgerOperations(int semaphore) { + setProperty(AUDITOR_MAX_NUMBER_OF_CONCURRENT_OPEN_LEDGER_OPERATIONS, semaphore); + } + + /** + * Get the acquire concurrent open ledger operations timeout. + * + * @return The timeout values. By default it is 120000ms + */ + public int getAuditorAcquireConcurrentOpenLedgerOperationsTimeoutMSec() { + return getInt(AUDITOR_ACQUIRE_CONCURRENT_OPEN_LEDGER_OPERATIONS_TIMEOUT_MSEC, 120000); + } + + /** + * Set the acquire concurrent open ledger operations timeout. + * @param timeoutMs + */ + public void setAuditorAcquireConcurrentOpenLedgerOperationsTimeoutMSec(int timeoutMs) { + setProperty(AUDITOR_ACQUIRE_CONCURRENT_OPEN_LEDGER_OPERATIONS_TIMEOUT_MSEC, timeoutMs); + } + + /** * Set what percentage of a ledger (fragment)'s entries will be verified. * 0 - only the first and last entry of each ledger fragment would be verified @@ -2305,7 +3002,7 @@ public ServerConfiguration setJournalRemovePagesFromCache(boolean enabled) { * @return the class name */ public String getLedgerStorageClass() { - String ledgerStorageClass = getString(LEDGER_STORAGE_CLASS, SortedLedgerStorage.class.getName()); + String ledgerStorageClass = LEDGER_STORAGE_CLASS_KEY.getString(this); if (ledgerStorageClass.equals(SortedLedgerStorage.class.getName()) && !getSortedLedgerStorageEnabled()) { // This is to retain compatibility with BK-4.3 configuration @@ -2328,7 +3025,7 @@ public String getLedgerStorageClass() { * @return ServerConfiguration */ public ServerConfiguration setLedgerStorageClass(String ledgerStorageClass) { - setProperty(LEDGER_STORAGE_CLASS, ledgerStorageClass); + LEDGER_STORAGE_CLASS_KEY.set(this, ledgerStorageClass); return this; } @@ -2456,11 +3153,41 @@ public ServerConfiguration setStatsProviderClass(Class return this; } + + /** + * Flag to enable sanity check metrics in bookie stats. Defaults to false/disabled. + * + * @return true, if bookie collects sanity check metrics in stats + */ + public boolean isSanityCheckMetricsEnabled() { + return getBoolean(SANITY_CHECK_METRICS_ENABLED, false); + } + + /** + * Enable sanity check metrics in bookie stats. + * + * @param sanityCheckMetricsEnabled + * flag to enable sanity check metrics + * @return server configuration + */ + public ServerConfiguration setSanityCheckMetricsEnabled(boolean sanityCheckMetricsEnabled) { + setProperty(SANITY_CHECK_METRICS_ENABLED, sanityCheckMetricsEnabled); + return this; + } + /** * Validate the configuration. * @throws ConfigurationException */ public void validate() throws ConfigurationException { + // generate config def + ConfigDef configDef = ConfigDef.of(ServerConfiguration.class); + try { + configDef.validate(this); + } catch (ConfigException e) { + throw new ConfigurationException(e.getMessage(), e.getCause()); + } + if (getSkipListArenaChunkSize() < getSkipListArenaMaxAllocSize()) { throw new ConfigurationException("Arena max allocation size should be smaller than the chunk size."); } @@ -2470,10 +3197,6 @@ public void validate() throws ConfigurationException { if (getJournalAlignmentSize() > getJournalPreAllocSizeMB() * 1024 * 1024) { throw new ConfigurationException("Invalid preallocation size : " + getJournalPreAllocSizeMB() + " MB"); } - if (getEntryLogSizeLimit() > BookKeeperConstants.MAX_LOG_SIZE_LIMIT) { - throw new ConfigurationException("Entry log file size should not be larger than " - + BookKeeperConstants.MAX_LOG_SIZE_LIMIT); - } if (0 == getBookiePort() && !getAllowEphemeralPorts()) { throw new ConfigurationException("Invalid port specified, using ephemeral ports accidentally?"); } @@ -2485,6 +3208,12 @@ public void validate() throws ConfigurationException { throw new ConfigurationException("For persisiting explicitLac, journalFormatVersionToWrite should be >= 6" + "and FileInfoFormatVersionToWrite should be >= 1"); } + if (getMinorCompactionInterval() > 0 && getMinorCompactionInterval() * SECOND < getGcWaitTime()) { + throw new ConfigurationException("minorCompactionInterval should be >= gcWaitTime."); + } + if (getMajorCompactionInterval() > 0 && getMajorCompactionInterval() * SECOND < getGcWaitTime()) { + throw new ConfigurationException("majorCompactionInterval should be >= gcWaitTime."); + } } /** @@ -2870,6 +3599,135 @@ public ServerConfiguration setHttpServerPort(int port) { return this; } + /** + * Get the http server host. + * + * @return http server host + */ + public String getHttpServerHost() { + return getString(HTTP_SERVER_HOST, "0.0.0.0"); + } + + /** + * Set Http server host listening on. + * + * @param host + * host to listen on + * @return server configuration + */ + public ServerConfiguration setHttpServerHost(String host) { + setProperty(HTTP_SERVER_HOST, host); + return this; + } + + /** + * Get if Http Server Tls enable. + * @return + */ + public boolean isHttpServerTlsEnable() { + return getBoolean(HTTP_SERVER_TLS_ENABLE, false); + } + + /** + * Set if Http Server Tls enable. + * @param tlsEnable + * @return server configuration + */ + public ServerConfiguration setHttpServerTlsEnable(boolean tlsEnable) { + setProperty(HTTP_SERVER_TLS_ENABLE, tlsEnable); + return this; + } + + /** + * Get the http server keystore path. + * + * @return http server keystore path + */ + public String getHttpServerKeystorePath() { + return getString(HTTP_SERVER_KEY_STORE_PATH); + } + + /** + * Set Http server keystore path. + * + * @param keystorePath + * http server keystore path + * @return server configuration + */ + public ServerConfiguration setHttpServerKeystorePath(String keystorePath) { + setProperty(HTTP_SERVER_KEY_STORE_PATH, keystorePath); + return this; + } + + /** + * Get the http server keyStore password. + * + * @return http server keyStore password + */ + public String getHttpServerKeystorePassword() { + return getString(HTTP_SERVER_KEY_STORE_PASSWORD); + } + + /** + * Set Http server keyStore password. + * + * @param keyStorePassword + * http server keyStore password + * @return server configuration + */ + public ServerConfiguration setHttpServerKeyStorePassword(String keyStorePassword) { + setProperty(HTTP_SERVER_KEY_STORE_PASSWORD, keyStorePassword); + return this; + } + + /** + * Get the http server trustStore path. + * + * @return http server trustStore path + */ + public String getHttpServerTrustStorePath() { + return getString(HTTP_SERVER_TRUST_STORE_PATH); + } + + /** + * Set Http server trustStore path. + * + * @param trustStorePath + * http server trustStore path + * @return server configuration + */ + public ServerConfiguration setHttpServerTrustStorePath(String trustStorePath) { + setProperty(HTTP_SERVER_TRUST_STORE_PATH, trustStorePath); + return this; + } + + /** + * Get the http server trustStore password. + * + * @return http server trustStore password + */ + public String getHttpServerTrustStorePassword() { + String serverTrustStorePassword = getString(HTTP_SERVER_TRUST_STORE_PASSWORD); + if (serverTrustStorePassword != null) { + return serverTrustStorePassword; + } + // mistake introduced in https://github.com/apache/bookkeeper/pull/2995 + // will remove in next major version + return getString(HTTP_SERVER_KEY_STORE_PASSWORD); + } + + /** + * Set Http server trustStore password. + * + * @param trustStorePassword + * http server trustStore password + * @return server configuration + */ + public ServerConfiguration setHttpServerTrustStorePasswordPassword(String trustStorePassword) { + setProperty(HTTP_SERVER_TRUST_STORE_PASSWORD, trustStorePassword); + return this; + } + /** * Get the extra list of server lifecycle components to enable on a bookie server. * @@ -3081,4 +3939,223 @@ public ServerConfiguration setEntryLogPerLedgerCounterLimitsMultFactor( Integer.toString(entryLogPerLedgerCounterLimitsMultFactor)); return this; } + + /** + * True if a local consistency check should be performed on startup. + */ + public boolean isLocalConsistencyCheckOnStartup() { + return this.getBoolean(LOCAL_CONSISTENCY_CHECK_ON_STARTUP, false); + } + + /** + * Get the authorized roles. + * + * @return String array of configured auth roles. + */ + public String[] getAuthorizedRoles() { + return getStringArray(AUTHORIZED_ROLES); + } + + /** + * Set authorized roles. + * + * @return Configuration Object with roles set + */ + public ServerConfiguration setAuthorizedRoles(String roles) { + this.setProperty(AUTHORIZED_ROLES, roles); + return this; + } + + /** + * Get in flight read entry number when ledger checker. + * Default value is -1 which it is unlimited when ledger checker. + * + * @return read entry number of in flight. + */ + public int getInFlightReadEntryNumInLedgerChecker(){ + return getInt(IN_FLIGHT_READ_ENTRY_NUM_IN_LEDGER_CHECKER, -1); + } + + /** + * Enabled data integrity checker. + * The data integrity checker checks that the bookie has all the entries which + * ledger metadata asserts it has. + * The checker runs on startup (periodic will be added later). + * The changes how cookies are handled. If a directory is found to be missing a cookie, + * the check runs. The check is divided into two parts, preboot and full. + * The preboot check ensures that it is safe to boot the bookie; the bookie will not + * vote in any operation that contradicts a previous vote. + * The full check ensures that any ledger that claims to have entries on the bookie, + * truly does have data on the bookie. Any missing entries are copies from available + * replicas. + */ + public ServerConfiguration setDataIntegrityCheckingEnabled(boolean enabled) { + this.setProperty(DATA_INTEGRITY_CHECKING_ENABLED, + Boolean.toString(enabled)); + return this; + } + + /** + * @see #setDataIntegrityCheckingEnabled + */ + public boolean isDataIntegrityCheckingEnabled() { + return this.getBoolean(DATA_INTEGRITY_CHECKING_ENABLED, false); + } + + /** + * When this config is set to true and the data integrity checker is also enabled then + * any missing cookie files in the ledger directories do not prevent the bookie from + * booting. Missing cookie files usually indicate an empty disk has been mounted, which + * might be after a disk failure (all data lost) or a provisioning error (wrong disk mounted). + * If there are missing cookie files then: + * - a new cookie is stamped (written to each ledger directory and to the co-ordination service, eg: zookeeper). + * - the data integrity checker will attempt to repair any lost data by sourcing the lost entries from other bookies + * If any cookies do not match the master cookie, then cookie validation still fails as normal. + */ + public ServerConfiguration setDataIntegrityStampMissingCookiesEnabled(boolean enabled) { + this.setProperty(DATA_INTEGRITY_COOKIE_STAMPING_ENABLED, + Boolean.toString(enabled)); + return this; + } + + /** + * @see #setDataIntegrityStampMissingCookiesEnabled + */ + public boolean isDataIntegrityStampMissingCookiesEnabled() { + return this.getBoolean(DATA_INTEGRITY_COOKIE_STAMPING_ENABLED, false); + } + + + /** + * When this config is set to true,if we replay journal failed, we will skip. + * @param skipReplayJournalInvalidRecord + * @return + */ + public ServerConfiguration setSkipReplayJournalInvalidRecord(boolean skipReplayJournalInvalidRecord) { + this.setProperty(SKIP_REPLAY_JOURNAL_INVALID_RECORD, + Boolean.toString(skipReplayJournalInvalidRecord)); + return this; + } + + /** + * @see #isSkipReplayJournalInvalidRecord . + */ + public boolean isSkipReplayJournalInvalidRecord() { + return this.getBoolean(SKIP_REPLAY_JOURNAL_INVALID_RECORD, false); + } + + /** + * Get default rocksdb conf. + * + * @return String configured default rocksdb conf. + */ + public String getDefaultRocksDBConf() { + String filePath = getFilePath("conf/default_rocksdb.conf"); + return getString(DEFAULT_ROCKSDB_CONF, filePath); + } + + /** + * Set default rocksdb conf. + * + * @return Configuration Object with default rocksdb conf + */ + public ServerConfiguration setDefaultRocksDBConf(String defaultRocksdbConf) { + this.setProperty(DEFAULT_ROCKSDB_CONF, defaultRocksdbConf); + return this; + } + + /** + * Get entry Location rocksdb conf. + * + * @return String configured entry Location rocksdb conf. + */ + public String getEntryLocationRocksdbConf() { + String filePath = getFilePath("conf/entry_location_rocksdb.conf"); + return getString(ENTRY_LOCATION_ROCKSDB_CONF, filePath); + } + + /** + * Set entry Location rocksdb conf. + * + * @return Configuration Object with entry Location rocksdb conf + */ + public ServerConfiguration setEntryLocationRocksdbConf(String entryLocationRocksdbConf) { + this.setProperty(ENTRY_LOCATION_ROCKSDB_CONF, entryLocationRocksdbConf); + return this; + } + + /** + * Get ledger metadata rocksdb conf. + * + * @return String configured ledger metadata rocksdb conf. + */ + public String getLedgerMetadataRocksdbConf() { + String filePath = getFilePath("conf/ledger_metadata_rocksdb.conf"); + return getString(LEDGER_METADATA_ROCKSDB_CONF, filePath); + } + + /** + * Set ledger metadata rocksdb conf. + * + * @return Configuration Object with ledger metadata rocksdb conf + */ + public ServerConfiguration setLedgerMetadataRocksdbConf(String ledgerMetadataRocksdbConf) { + this.setProperty(LEDGER_METADATA_ROCKSDB_CONF, ledgerMetadataRocksdbConf); + return this; + } + + /** + * Set the max operation numbers in a single rocksdb write batch. + * The rocksdb write batch is related to the memory usage. If the batch is too large, it will cause the OOM. + * + * @param maxNumbersInSingleRocksDBBatch + * @return + */ + public ServerConfiguration setOperationMaxNumbersInSingleRocksDBWriteBatch(int maxNumbersInSingleRocksDBBatch) { + this.setProperty(MAX_OPERATION_NUMBERS_IN_SINGLE_ROCKSDB_WRITE_BATCH, maxNumbersInSingleRocksDBBatch); + return this; + } + + /** + * Get the max operation numbers in a single rocksdb write batch. + * + * @return + */ + public int getMaxOperationNumbersInSingleRocksDBBatch() { + return getInt(MAX_OPERATION_NUMBERS_IN_SINGLE_ROCKSDB_WRITE_BATCH, 100000); + } + + /** + * Set the max batch read size. + * + * @param maxBatchReadSize + * @return + */ + public ServerConfiguration setMaxBatchReadSize(long maxBatchReadSize) { + this.setProperty(MAX_BATCH_READ_SIZE, maxBatchReadSize); + return this; + } + + /** + * Get the max batch read size. + * + * @return + */ + public long getMaxBatchReadSize() { + return this.getLong(MAX_BATCH_READ_SIZE, DEFAULT_MAX_BATCH_READ_SIZE); + } + + /** + * Get the path of a file from resources. + * + * @param fileName the name of the file to get the path for. + * @return String the absolute path of the file. + */ + private String getFilePath(String fileName) { + URL resourceURL = getClass().getClassLoader().getResource(fileName); + if (resourceURL != null) { + return Paths.get(resourceURL.getPath()).toString(); + } + return ""; + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/discover/BookieServiceInfo.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/discover/BookieServiceInfo.java new file mode 100644 index 00000000000..8b23a70143b --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/discover/BookieServiceInfo.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.discover; + +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.function.Supplier; + +/** + * Information about services exposed by a Bookie. + */ +public final class BookieServiceInfo { + + /** + * Default empty implementation. + */ + public static final BookieServiceInfo EMPTY = new BookieServiceInfo( + Collections.emptyMap(), + Collections.emptyList() + ); + + /** + * Default empty implementation. + */ + public static final Supplier NO_INFO = () -> EMPTY; + + private Map properties; + private List endpoints; + + public BookieServiceInfo(Map properties, List endpoints) { + this.properties = Collections.unmodifiableMap(properties); + this.endpoints = Collections.unmodifiableList(endpoints); + } + + public BookieServiceInfo() { + this(Collections.emptyMap(), Collections.emptyList()); + } + + /** + * Unmodifiable map with bookie wide information. + * + * @return the map + */ + public Map getProperties() { + return properties; + } + + /** + * Unmodifieable structure with the list of exposed endpoints. + * + * @return the list. + */ + public List getEndpoints() { + return endpoints; + } + + public void setProperties(Map properties) { + this.properties = properties; + } + + public void setEndpoints(List endpoints) { + this.endpoints = endpoints; + } + + /** + * Information about an endpoint. + */ + public static final class Endpoint { + + private String id; + private int port; + private String host; + private String protocol; + private List auth; + private List extensions; + + public Endpoint(String id, int port, String host, String protocol, List auth, List extensions) { + this.id = id; + this.port = port; + this.host = host; + this.protocol = protocol; + this.auth = auth; + this.extensions = extensions; + } + + public Endpoint() { + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public int getPort() { + return port; + } + + public void setPort(int port) { + this.port = port; + } + + public String getHost() { + return host; + } + + public void setHost(String host) { + this.host = host; + } + + public String getProtocol() { + return protocol; + } + + public void setProtocol(String protocol) { + this.protocol = protocol; + } + + public List getAuth() { + return auth; + } + + public void setAuth(List auth) { + this.auth = auth; + } + + public List getExtensions() { + return extensions; + } + + public void setExtensions(List extensions) { + this.extensions = extensions; + } + + @Override + public String toString() { + return "EndpointInfo{" + "id=" + id + ", port=" + port + ", host=" + host + ", protocol=" + protocol + ", " + + "auth=" + auth + ", extensions=" + extensions + '}'; + } + + } + + @Override + public String toString() { + return "BookieServiceInfo{" + "properties=" + properties + ", endpoints=" + endpoints + '}'; + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/discover/BookieServiceInfoUtils.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/discover/BookieServiceInfoUtils.java new file mode 100644 index 00000000000..252cb3bceae --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/discover/BookieServiceInfoUtils.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.discover; + +import java.net.UnknownHostException; +import java.util.Arrays; +import java.util.Collections; +import org.apache.bookkeeper.net.BookieSocketAddress; + +/** + * Utility class for {@link BookieServiceInfo}. + */ +public final class BookieServiceInfoUtils { + + /** + * Creates a default legacy bookie info implementation. + * In the default implementation there is one endpoint with + * bookie-rpc protocol and the bookie id in the host port. + * + * @param bookieId bookie id + * @return default implementation of a BookieServiceInfo + * @throws UnknownHostException if the given bookieId is invalid + */ + public static BookieServiceInfo buildLegacyBookieServiceInfo(String bookieId) throws UnknownHostException { + BookieSocketAddress address = new BookieSocketAddress(bookieId); + BookieServiceInfo.Endpoint endpoint = new BookieServiceInfo.Endpoint(); + endpoint.setId(bookieId); + endpoint.setHost(address.getHostName()); + endpoint.setPort(address.getPort()); + endpoint.setProtocol("bookie-rpc"); + endpoint.setAuth(Collections.emptyList()); + endpoint.setExtensions(Collections.emptyList()); + return new BookieServiceInfo(Collections.emptyMap(), Arrays.asList(endpoint)); + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/discover/RegistrationClient.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/discover/RegistrationClient.java index b53fd240545..e9fbc4e6253 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/discover/RegistrationClient.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/discover/RegistrationClient.java @@ -18,16 +18,20 @@ package org.apache.bookkeeper.discover; +import java.net.UnknownHostException; import java.util.Set; import java.util.concurrent.CompletableFuture; import org.apache.bookkeeper.common.annotation.InterfaceAudience.LimitedPrivate; import org.apache.bookkeeper.common.annotation.InterfaceStability.Evolving; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.versioning.LongVersion; import org.apache.bookkeeper.versioning.Versioned; /** * A registration client, which the bookkeeper client will use to interact with registration service. */ + @LimitedPrivate @Evolving public interface RegistrationClient extends AutoCloseable { @@ -37,7 +41,7 @@ public interface RegistrationClient extends AutoCloseable { */ interface RegistrationListener { - void onBookiesChanged(Versioned> bookies); + void onBookiesChanged(Versioned> bookies); } @@ -49,14 +53,40 @@ interface RegistrationListener { * * @return a future represents the list of writable bookies. */ - CompletableFuture>> getWritableBookies(); + CompletableFuture>> getWritableBookies(); + + /** + * Get the list of all bookies identifiers. + * + * @return a future represents the list of all bookies. + */ + CompletableFuture>> getAllBookies(); /** * Get the list of readonly bookie identifiers. * * @return a future represents the list of readonly bookies. */ - CompletableFuture>> getReadOnlyBookies(); + CompletableFuture>> getReadOnlyBookies(); + + /** + * Get detailed information about the services exposed by a Bookie. + * For old bookies it is expected to return an empty BookieServiceInfo structure. + * + * @param bookieId this is the id of the bookie, it can be computed from a {@link BookieId} + * @return a future represents the available information. + * + * @since 4.11 + */ + default CompletableFuture> getBookieServiceInfo(BookieId bookieId) { + try { + BookieServiceInfo bookieServiceInfo = BookieServiceInfoUtils + .buildLegacyBookieServiceInfo(bookieId.toString()); + return FutureUtils.value(new Versioned<>(bookieServiceInfo, new LongVersion(-1))); + } catch (UnknownHostException e) { + return FutureUtils.exception(e); + } + } /** * Watch the changes of bookies. diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/discover/RegistrationManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/discover/RegistrationManager.java index 3d357d4dbe8..ee8bd2f9567 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/discover/RegistrationManager.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/discover/RegistrationManager.java @@ -21,6 +21,7 @@ import org.apache.bookkeeper.bookie.BookieException; import org.apache.bookkeeper.common.annotation.InterfaceAudience.LimitedPrivate; import org.apache.bookkeeper.common.annotation.InterfaceStability.Evolving; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.versioning.Version; import org.apache.bookkeeper.versioning.Versioned; @@ -59,9 +60,10 @@ interface RegistrationListener { * * @param bookieId bookie id * @param readOnly whether to register it as writable or readonly + * @param serviceInfo information about services exposed by the Bookie * @throws BookieException when fail to register a bookie. */ - void registerBookie(String bookieId, boolean readOnly) throws BookieException; + void registerBookie(BookieId bookieId, boolean readOnly, BookieServiceInfo serviceInfo) throws BookieException; /** * Unregistering the bookie server as bookieId. @@ -70,7 +72,7 @@ interface RegistrationListener { * @param readOnly whether to register it as writable or readonly * @throws BookieException when fail to unregister a bookie. */ - void unregisterBookie(String bookieId, boolean readOnly) throws BookieException; + void unregisterBookie(BookieId bookieId, boolean readOnly) throws BookieException; /** * Checks if Bookie with the given BookieId is registered as readwrite or @@ -81,7 +83,7 @@ interface RegistrationListener { * readwrite or readonly bookie. * @throws BookieException */ - boolean isBookieRegistered(String bookieId) throws BookieException; + boolean isBookieRegistered(BookieId bookieId) throws BookieException; /** * Write the cookie data, which will be used for verifying the integrity of the bookie environment. @@ -90,7 +92,7 @@ interface RegistrationListener { * @param cookieData cookie data * @throws BookieException when fail to write cookie */ - void writeCookie(String bookieId, Versioned cookieData) throws BookieException; + void writeCookie(BookieId bookieId, Versioned cookieData) throws BookieException; /** * Read the cookie data, which will be used for verifying the integrity of the bookie environment. @@ -99,7 +101,7 @@ interface RegistrationListener { * @return versioned cookie data * @throws BookieException when fail to read cookie */ - Versioned readCookie(String bookieId) throws BookieException; + Versioned readCookie(BookieId bookieId) throws BookieException; /** * Remove the cookie data. @@ -108,7 +110,7 @@ interface RegistrationListener { * @param version version of the cookie data * @throws BookieException when fail to remove cookie */ - void removeCookie(String bookieId, Version version) throws BookieException; + void removeCookie(BookieId bookieId, Version version) throws BookieException; /** * Prepare ledgers root node, availableNode, readonly node.. @@ -141,4 +143,11 @@ interface RegistrationListener { * @throws Exception */ boolean nukeExistingCluster() throws Exception; + + /** + * Add a listener to be triggered when an registration event occurs. + * + * @param listener the listener to be added + */ + void addRegistrationListener(RegistrationListener listener); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/discover/ZKRegistrationClient.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/discover/ZKRegistrationClient.java index ce19751c9d3..cca631086b0 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/discover/ZKRegistrationClient.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/discover/ZKRegistrationClient.java @@ -19,55 +19,63 @@ package org.apache.bookkeeper.discover; import static org.apache.bookkeeper.util.BookKeeperConstants.AVAILABLE_NODE; +import static org.apache.bookkeeper.util.BookKeeperConstants.COOKIE_NODE; import static org.apache.bookkeeper.util.BookKeeperConstants.READONLY; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Sets; import java.io.IOException; +import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArraySet; import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.function.BiConsumer; +import java.util.stream.Collectors; import lombok.AccessLevel; import lombok.Getter; import lombok.extern.slf4j.Slf4j; -import org.apache.bookkeeper.client.BKException.Code; +import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.client.BKException.ZKException; import org.apache.bookkeeper.common.concurrent.FutureUtils; -import org.apache.bookkeeper.common.util.SafeRunnable; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.DataFormats.BookieServiceInfoFormat; import org.apache.bookkeeper.versioning.LongVersion; import org.apache.bookkeeper.versioning.Version; import org.apache.bookkeeper.versioning.Version.Occurred; import org.apache.bookkeeper.versioning.Versioned; +import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher; import org.apache.zookeeper.Watcher.Event.EventType; import org.apache.zookeeper.Watcher.Event.KeeperState; import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.data.Stat; /** * ZooKeeper based {@link RegistrationClient}. */ + @Slf4j public class ZKRegistrationClient implements RegistrationClient { static final int ZK_CONNECT_BACKOFF_MS = 200; class WatchTask - implements SafeRunnable, + implements Runnable, Watcher, - BiConsumer>, Throwable>, + BiConsumer>, Throwable>, AutoCloseable { private final String regPath; private final Set listeners; private volatile boolean closed = false; - private Set bookies = null; + private Set bookies = null; private Version version = Version.NEW; private final CompletableFuture firstRunFuture; @@ -110,7 +118,7 @@ private void scheduleWatchTask(long delayMs) { } @Override - public void safeRun() { + public void run() { if (isClosed()) { return; } @@ -120,7 +128,7 @@ public void safeRun() { } @Override - public void accept(Versioned> bookieSet, Throwable throwable) { + public void accept(Versioned> bookieSet, Throwable throwable) { if (throwable != null) { if (firstRunFuture.isDone()) { scheduleWatchTask(ZK_CONNECT_BACKOFF_MS); @@ -133,9 +141,10 @@ public void accept(Versioned> bookieSet, Throwable thro if (this.version.compare(bookieSet.getVersion()) == Occurred.BEFORE) { this.version = bookieSet.getVersion(); this.bookies = bookieSet.getValue(); - - for (RegistrationListener listener : listeners) { - listener.onBookiesChanged(bookieSet); + if (!listeners.isEmpty()) { + for (RegistrationListener listener : listeners) { + listener.onBookiesChanged(bookieSet); + } } } FutureUtils.complete(firstRunFuture, null); @@ -170,18 +179,30 @@ public void close() { private WatchTask watchWritableBookiesTask = null; @Getter(AccessLevel.PACKAGE) private WatchTask watchReadOnlyBookiesTask = null; - + private final ConcurrentHashMap> bookieServiceInfoCache = + new ConcurrentHashMap<>(); + private final Watcher bookieServiceInfoCacheInvalidation; + private final boolean bookieAddressTracking; // registration paths private final String bookieRegistrationPath; + private final String bookieAllRegistrationPath; private final String bookieReadonlyRegistrationPath; public ZKRegistrationClient(ZooKeeper zk, String ledgersRootPath, - ScheduledExecutorService scheduler) { + ScheduledExecutorService scheduler, + boolean bookieAddressTracking) { this.zk = zk; this.scheduler = scheduler; - + // Following Bookie Network Address Changes is an expensive operation + // as it requires additional ZooKeeper watches + // we can disable this feature, in case the BK cluster has only + // static addresses + this.bookieAddressTracking = bookieAddressTracking; + this.bookieServiceInfoCacheInvalidation = bookieAddressTracking + ? new BookieServiceInfoCacheInvalidationWatcher() : null; this.bookieRegistrationPath = ledgersRootPath + "/" + AVAILABLE_NODE; + this.bookieAllRegistrationPath = ledgersRootPath + "/" + COOKIE_NODE; this.bookieReadonlyRegistrationPath = this.bookieRegistrationPath + "/" + READONLY; } @@ -190,33 +211,169 @@ public void close() { // no-op } + public boolean isBookieAddressTracking() { + return bookieAddressTracking; + } + public ZooKeeper getZk() { return zk; } @Override - public CompletableFuture>> getWritableBookies() { + public CompletableFuture>> getWritableBookies() { return getChildren(bookieRegistrationPath, null); } @Override - public CompletableFuture>> getReadOnlyBookies() { + public CompletableFuture>> getAllBookies() { + return getChildren(bookieAllRegistrationPath, null); + } + + @Override + public CompletableFuture>> getReadOnlyBookies() { return getChildren(bookieReadonlyRegistrationPath, null); } - private CompletableFuture>> getChildren(String regPath, Watcher watcher) { - CompletableFuture>> future = FutureUtils.createFuture(); + @Override + public CompletableFuture> getBookieServiceInfo(BookieId bookieId) { + // we can only serve data from cache here, + // because it can happen than this method is called inside the main + // zookeeper client event loop thread + Versioned resultFromCache = bookieServiceInfoCache.get(bookieId); + if (log.isDebugEnabled()) { + log.debug("getBookieServiceInfo {} -> {}", bookieId, resultFromCache); + } + if (resultFromCache != null) { + return CompletableFuture.completedFuture(resultFromCache); + } else { + return FutureUtils.exception(new BKException.BKBookieHandleNotAvailableException()); + } + } + + /** + * Read BookieServiceInfo from ZooKeeper and updates the local cache. + * + * @param bookieId + * @return an handle to the result of the operation. + */ + private CompletableFuture> readBookieServiceInfoAsync(BookieId bookieId) { + String pathAsWritable = bookieRegistrationPath + "/" + bookieId; + String pathAsReadonly = bookieReadonlyRegistrationPath + "/" + bookieId; + + CompletableFuture> promise = new CompletableFuture<>(); + zk.getData(pathAsWritable, bookieServiceInfoCacheInvalidation, + (int rc, String path, Object o, byte[] bytes, Stat stat) -> { + if (KeeperException.Code.OK.intValue() == rc) { + try { + BookieServiceInfo bookieServiceInfo = deserializeBookieServiceInfo(bookieId, bytes); + Versioned result = new Versioned<>(bookieServiceInfo, + new LongVersion(stat.getCversion())); + log.info("Update BookieInfoCache (writable bookie) {} -> {}", bookieId, result.getValue()); + bookieServiceInfoCache.put(bookieId, result); + promise.complete(result); + } catch (IOException ex) { + log.error("Cannot update BookieInfo for ", ex); + promise.completeExceptionally(KeeperException.create(KeeperException.Code.get(rc), path) + .initCause(ex)); + return; + } + } else if (KeeperException.Code.NONODE.intValue() == rc) { + // not found, looking for a readonly bookie + zk.getData(pathAsReadonly, bookieServiceInfoCacheInvalidation, + (int rc2, String path2, Object o2, byte[] bytes2, Stat stat2) -> { + if (KeeperException.Code.OK.intValue() == rc2) { + try { + BookieServiceInfo bookieServiceInfo = deserializeBookieServiceInfo(bookieId, bytes2); + Versioned result = + new Versioned<>(bookieServiceInfo, new LongVersion(stat2.getCversion())); + log.info("Update BookieInfoCache (readonly bookie) {} -> {}", bookieId, result.getValue()); + bookieServiceInfoCache.put(bookieId, result); + promise.complete(result); + } catch (IOException ex) { + log.error("Cannot update BookieInfo for ", ex); + promise.completeExceptionally(KeeperException.create(KeeperException.Code.get(rc2), path2) + .initCause(ex)); + return; + } + } else { + // not found as writable and readonly, the bookie is offline + promise.completeExceptionally(BKException.create(BKException.Code.NoBookieAvailableException)); + } + }, null); + } else { + promise.completeExceptionally(KeeperException.create(KeeperException.Code.get(rc), path)); + } + }, null); + return promise; + } + + @SuppressWarnings("unchecked") + @VisibleForTesting + static BookieServiceInfo deserializeBookieServiceInfo(BookieId bookieId, byte[] bookieServiceInfo) + throws IOException { + if (bookieServiceInfo == null || bookieServiceInfo.length == 0) { + return BookieServiceInfoUtils.buildLegacyBookieServiceInfo(bookieId.toString()); + } + + BookieServiceInfoFormat builder = BookieServiceInfoFormat.parseFrom(bookieServiceInfo); + BookieServiceInfo bsi = new BookieServiceInfo(); + List endpoints = builder.getEndpointsList().stream() + .map(e -> { + BookieServiceInfo.Endpoint endpoint = new BookieServiceInfo.Endpoint(); + endpoint.setId(e.getId()); + endpoint.setPort(e.getPort()); + endpoint.setHost(e.getHost()); + endpoint.setProtocol(e.getProtocol()); + endpoint.setAuth(e.getAuthList()); + endpoint.setExtensions(e.getExtensionsList()); + return endpoint; + }) + .collect(Collectors.toList()); + + bsi.setEndpoints(endpoints); + bsi.setProperties(builder.getPropertiesMap()); + + return bsi; + } + + /** + * Reads the list of bookies at the given path and eagerly caches the BookieServiceInfo + * structure. + * + * @param regPath the path on ZooKeeper + * @param watcher an optional watcher + * @return an handle to the operation + */ + private CompletableFuture>> getChildren(String regPath, Watcher watcher) { + CompletableFuture>> future = FutureUtils.createFuture(); zk.getChildren(regPath, watcher, (rc, path, ctx, children, stat) -> { - if (Code.OK != rc) { - ZKException zke = new ZKException(); - zke.fillInStackTrace(); - future.completeExceptionally(zke); + if (KeeperException.Code.OK.intValue() != rc) { + ZKException zke = new ZKException(KeeperException.create(KeeperException.Code.get(rc), path)); + future.completeExceptionally(zke.fillInStackTrace()); return; } Version version = new LongVersion(stat.getCversion()); - Set bookies = convertToBookieAddresses(children); - future.complete(new Versioned<>(bookies, version)); + Set bookies = convertToBookieAddresses(children); + List>> bookieInfoUpdated = new ArrayList<>(bookies.size()); + for (BookieId id : bookies) { + // update the cache for new bookies + if (!bookieServiceInfoCache.containsKey(id)) { + bookieInfoUpdated.add(readBookieServiceInfoAsync(id)); + } + } + if (bookieInfoUpdated.isEmpty()) { + future.complete(new Versioned<>(bookies, version)); + } else { + FutureUtils + .collect(bookieInfoUpdated) + .whenComplete((List> info, Throwable error) -> { + // we are ignoring errors intentionally + // there could be bookies that publish unparsable information + // or other temporary/permanent errors + future.complete(new Versioned<>(bookies, version)); + }); + } }, null); return future; } @@ -292,24 +449,66 @@ public synchronized void unwatchReadOnlyBookies(RegistrationListener listener) { } } - private static HashSet convertToBookieAddresses(List children) { + private static HashSet convertToBookieAddresses(List children) { // Read the bookie addresses into a set for efficient lookup - HashSet newBookieAddrs = Sets.newHashSet(); + HashSet newBookieAddrs = Sets.newHashSet(); for (String bookieAddrString : children) { if (READONLY.equals(bookieAddrString)) { continue; } + BookieId bookieAddr = BookieId.parse(bookieAddrString); + newBookieAddrs.add(bookieAddr); + } + return newBookieAddrs; + } - BookieSocketAddress bookieAddr; + private static BookieId stripBookieIdFromPath(String path) { + if (path == null) { + return null; + } + final int slash = path.lastIndexOf('/'); + if (slash >= 0) { try { - bookieAddr = new BookieSocketAddress(bookieAddrString); - } catch (IOException e) { - log.error("Could not parse bookie address: " + bookieAddrString + ", ignoring this bookie"); - continue; + return BookieId.parse(path.substring(slash + 1)); + } catch (IllegalArgumentException e) { + log.warn("Cannot decode bookieId from {}", path, e); + } + } + return null; + } + + private class BookieServiceInfoCacheInvalidationWatcher implements Watcher { + + @Override + public void process(WatchedEvent we) { + if (log.isDebugEnabled()) { + log.debug("zk event {} for {} state {}", we.getType(), we.getPath(), we.getState()); + } + if (we.getState() == KeeperState.Expired) { + log.info("zk session expired, invalidating cache"); + bookieServiceInfoCache.clear(); + return; + } + BookieId bookieId = stripBookieIdFromPath(we.getPath()); + if (bookieId == null) { + return; + } + switch (we.getType()) { + case NodeDeleted: + log.info("Invalidate cache for {}", bookieId); + bookieServiceInfoCache.remove(bookieId); + break; + case NodeDataChanged: + log.info("refresh cache for {}", bookieId); + readBookieServiceInfoAsync(bookieId); + break; + default: + if (log.isDebugEnabled()) { + log.debug("ignore cache event {} for {}", we.getType(), bookieId); + } + break; } - newBookieAddrs.add(bookieAddr); } - return newBookieAddrs; } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/discover/ZKRegistrationManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/discover/ZKRegistrationManager.java index eab9e4f1864..58263614073 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/discover/ZKRegistrationManager.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/discover/ZKRegistrationManager.java @@ -25,18 +25,23 @@ import static org.apache.bookkeeper.util.BookKeeperConstants.INSTANCEID; import static org.apache.bookkeeper.util.BookKeeperConstants.READONLY; -import com.google.common.base.Charsets; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; +import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.UUID; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.function.Function; +import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; import org.apache.bookkeeper.bookie.BookieException; import org.apache.bookkeeper.bookie.BookieException.BookieIllegalOpException; +import org.apache.bookkeeper.bookie.BookieException.CookieExistException; import org.apache.bookkeeper.bookie.BookieException.CookieNotFoundException; import org.apache.bookkeeper.bookie.BookieException.MetadataStoreException; import org.apache.bookkeeper.client.BKException; @@ -50,7 +55,8 @@ import org.apache.bookkeeper.meta.ZkLayoutManager; import org.apache.bookkeeper.meta.ZkLedgerUnderreplicationManager; import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.DataFormats.BookieServiceInfoFormat; import org.apache.bookkeeper.util.BookKeeperConstants; import org.apache.bookkeeper.util.ZkUtils; import org.apache.bookkeeper.versioning.LongVersion; @@ -104,17 +110,16 @@ public class ZKRegistrationManager implements RegistrationManager { protected final String bookieReadonlyRegistrationPath; // session timeout in milliseconds private final int zkTimeoutMs; + private final List listeners = new ArrayList<>(); public ZKRegistrationManager(ServerConfiguration conf, - ZooKeeper zk, - RegistrationListener listener) { - this(conf, zk, ZKMetadataDriverBase.resolveZkLedgersRootPath(conf), listener); + ZooKeeper zk) { + this(conf, zk, ZKMetadataDriverBase.resolveZkLedgersRootPath(conf)); } public ZKRegistrationManager(ServerConfiguration conf, ZooKeeper zk, - String ledgersRootPath, - RegistrationListener listener) { + String ledgersRootPath) { this.conf = conf; this.zk = zk; this.zkAcls = ZkUtils.getACLs(conf); @@ -137,7 +142,7 @@ public ZKRegistrationManager(ServerConfiguration conf, // Check for expired connection. if (event.getType().equals(EventType.None) && event.getState().equals(KeeperState.Expired)) { - listener.onRegistrationExpired(); + listeners.forEach(RegistrationListener::onRegistrationExpired); } }); } @@ -153,7 +158,7 @@ public void close() { * @param bookieId bookie id * @return */ - public String getCookiePath(String bookieId) { + public String getCookiePath(BookieId bookieId) { return this.cookiePath + "/" + bookieId; } @@ -212,21 +217,53 @@ public void process(WatchedEvent event) { } @Override - public void registerBookie(String bookieId, boolean readOnly) throws BookieException { + public void registerBookie(BookieId bookieId, boolean readOnly, + BookieServiceInfo bookieServiceInfo) throws BookieException { if (!readOnly) { String regPath = bookieRegistrationPath + "/" + bookieId; - doRegisterBookie(regPath); + doRegisterBookie(regPath, bookieServiceInfo); } else { - doRegisterReadOnlyBookie(bookieId); + doRegisterReadOnlyBookie(bookieId, bookieServiceInfo); + } + } + + @VisibleForTesting + static byte[] serializeBookieServiceInfo(BookieServiceInfo bookieServiceInfo) { + if (log.isDebugEnabled()) { + log.debug("serialize BookieServiceInfo {}", bookieServiceInfo); + } + try (ByteArrayOutputStream os = new ByteArrayOutputStream()) { + BookieServiceInfoFormat.Builder builder = BookieServiceInfoFormat.newBuilder(); + List bsiEndpoints = bookieServiceInfo.getEndpoints().stream() + .map(e -> { + return BookieServiceInfoFormat.Endpoint.newBuilder() + .setId(e.getId()) + .setPort(e.getPort()) + .setHost(e.getHost()) + .setProtocol(e.getProtocol()) + .addAllAuth(e.getAuth()) + .addAllExtensions(e.getExtensions()) + .build(); + }) + .collect(Collectors.toList()); + + builder.addAllEndpoints(bsiEndpoints); + builder.putAllProperties(bookieServiceInfo.getProperties()); + + builder.build().writeTo(os); + return os.toByteArray(); + } catch (IOException err) { + log.error("Cannot serialize bookieServiceInfo from " + bookieServiceInfo); + throw new RuntimeException(err); } } - private void doRegisterBookie(String regPath) throws BookieException { + private void doRegisterBookie(String regPath, BookieServiceInfo bookieServiceInfo) throws BookieException { // ZK ephemeral node for this Bookie. try { if (!checkRegNodeAndWaitExpired(regPath)) { // Create the ZK ephemeral node for this Bookie. - zk.create(regPath, new byte[0], zkAcls, CreateMode.EPHEMERAL); + zk.create(regPath, serializeBookieServiceInfo(bookieServiceInfo), zkAcls, CreateMode.EPHEMERAL); zkRegManagerInitialized = true; } } catch (KeeperException ke) { @@ -247,11 +284,12 @@ private void doRegisterBookie(String regPath) throws BookieException { } } - private void doRegisterReadOnlyBookie(String bookieId) throws BookieException { + private void doRegisterReadOnlyBookie(BookieId bookieId, BookieServiceInfo bookieServiceInfo) + throws BookieException { try { if (null == zk.exists(this.bookieReadonlyRegistrationPath, false)) { try { - zk.create(this.bookieReadonlyRegistrationPath, new byte[0], + zk.create(this.bookieReadonlyRegistrationPath, serializeBookieServiceInfo(bookieServiceInfo), zkAcls, CreateMode.PERSISTENT); } catch (NodeExistsException e) { // this node is just now created by someone. @@ -259,7 +297,7 @@ private void doRegisterReadOnlyBookie(String bookieId) throws BookieException { } String regPath = bookieReadonlyRegistrationPath + "/" + bookieId; - doRegisterBookie(regPath); + doRegisterBookie(regPath, bookieServiceInfo); // clear the write state regPath = bookieRegistrationPath + "/" + bookieId; try { @@ -275,7 +313,7 @@ private void doRegisterReadOnlyBookie(String bookieId) throws BookieException { } @Override - public void unregisterBookie(String bookieId, boolean readOnly) throws BookieException { + public void unregisterBookie(BookieId bookieId, boolean readOnly) throws BookieException { String regPath; if (!readOnly) { regPath = bookieRegistrationPath + "/" + bookieId; @@ -301,7 +339,7 @@ private void doUnregisterBookie(String regPath) throws BookieException { // @Override - public void writeCookie(String bookieId, + public void writeCookie(BookieId bookieId, Versioned cookieData) throws BookieException { String zkPath = getCookiePath(bookieId); try { @@ -327,13 +365,17 @@ public void writeCookie(String bookieId, } catch (InterruptedException ie) { Thread.currentThread().interrupt(); throw new MetadataStoreException("Interrupted writing cookie for bookie " + bookieId, ie); + } catch (NoNodeException nne) { + throw new CookieNotFoundException(bookieId.toString()); + } catch (NodeExistsException nee) { + throw new CookieExistException(bookieId.toString()); } catch (KeeperException e) { throw new MetadataStoreException("Failed to write cookie for bookie " + bookieId); } } @Override - public Versioned readCookie(String bookieId) throws BookieException { + public Versioned readCookie(BookieId bookieId) throws BookieException { String zkPath = getCookiePath(bookieId); try { Stat stat = zk.exists(zkPath, false); @@ -342,19 +384,19 @@ public Versioned readCookie(String bookieId) throws BookieException { LongVersion version = new LongVersion(stat.getVersion()); return new Versioned<>(data, version); } catch (NoNodeException nne) { - throw new CookieNotFoundException(bookieId); + throw new CookieNotFoundException(bookieId.toString()); } catch (KeeperException | InterruptedException e) { throw new MetadataStoreException("Failed to read cookie for bookie " + bookieId); } } @Override - public void removeCookie(String bookieId, Version version) throws BookieException { + public void removeCookie(BookieId bookieId, Version version) throws BookieException { String zkPath = getCookiePath(bookieId); try { zk.delete(zkPath, (int) ((LongVersion) version).getLongVersion()); } catch (NoNodeException e) { - throw new CookieNotFoundException(bookieId); + throw new CookieNotFoundException(bookieId.toString()); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new MetadataStoreException("Interrupted deleting cookie for bookie " + bookieId, e); @@ -395,11 +437,12 @@ public boolean prepareFormat() throws Exception { boolean availableNodeExists = null != zk.exists(bookieRegistrationPath, false); // Create ledgers root node if not exists if (!ledgerRootExists) { - zk.create(ledgersRootPath, "".getBytes(Charsets.UTF_8), zkAcls, CreateMode.PERSISTENT); + ZkUtils.createFullPathOptimistic(zk, ledgersRootPath, "".getBytes(StandardCharsets.UTF_8), zkAcls, + CreateMode.PERSISTENT); } // create available bookies node if not exists if (!availableNodeExists) { - zk.create(bookieRegistrationPath, "".getBytes(Charsets.UTF_8), zkAcls, CreateMode.PERSISTENT); + zk.create(bookieRegistrationPath, "".getBytes(StandardCharsets.UTF_8), zkAcls, CreateMode.PERSISTENT); } // create readonly bookies node if not exists @@ -472,10 +515,11 @@ public boolean nukeExistingCluster() throws Exception { try (RegistrationClient regClient = new ZKRegistrationClient( zk, ledgersRootPath, - null + null, + false )) { if (availableNodeExists) { - Collection rwBookies = FutureUtils + Collection rwBookies = FutureUtils .result(regClient.getWritableBookies(), EXCEPTION_FUNC).getValue(); if (rwBookies != null && !rwBookies.isEmpty()) { log.error("Bookies are still up and connected to this cluster, " @@ -485,7 +529,7 @@ public boolean nukeExistingCluster() throws Exception { boolean readonlyNodeExists = null != zk.exists(bookieReadonlyRegistrationPath, false); if (readonlyNodeExists) { - Collection roBookies = FutureUtils + Collection roBookies = FutureUtils .result(regClient.getReadOnlyBookies(), EXCEPTION_FUNC).getValue(); if (roBookies != null && !roBookies.isEmpty()) { log.error("Readonly Bookies are still up and connected to this cluster, " @@ -544,14 +588,14 @@ public boolean format() throws Exception { // create INSTANCEID String instanceId = UUID.randomUUID().toString(); zk.create(ledgersRootPath + "/" + BookKeeperConstants.INSTANCEID, - instanceId.getBytes(Charsets.UTF_8), zkAcls, CreateMode.PERSISTENT); + instanceId.getBytes(StandardCharsets.UTF_8), zkAcls, CreateMode.PERSISTENT); log.info("Successfully formatted BookKeeper metadata"); return true; } @Override - public boolean isBookieRegistered(String bookieId) throws BookieException { + public boolean isBookieRegistered(BookieId bookieId) throws BookieException { String regPath = bookieRegistrationPath + "/" + bookieId; String readonlyRegPath = bookieReadonlyRegistrationPath + "/" + bookieId; try { @@ -566,4 +610,9 @@ public boolean isBookieRegistered(String bookieId) throws BookieException { throw new MetadataStoreException(e); } } + + @Override + public void addRegistrationListener(RegistrationListener listener) { + listeners.add(listener); + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/feature/SettableFeature.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/feature/SettableFeature.java index bb6d1db091e..797f5badee7 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/feature/SettableFeature.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/feature/SettableFeature.java @@ -28,16 +28,16 @@ public SettableFeature(String name, int initialAvailability) { super(name, initialAvailability); } - public SettableFeature(String name, boolean isAvailabile) { - super(name, isAvailabile); + public SettableFeature(String name, boolean isAvailable) { + super(name, isAvailable); } public void set(int availability) { this.availability = availability; } - public void set(boolean isAvailabile) { - this.availability = isAvailabile ? FEATURE_AVAILABILITY_MAX_VALUE : 0; + public void set(boolean isAvailable) { + this.availability = isAvailable ? FEATURE_AVAILABILITY_MAX_VALUE : 0; } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/feature/package-info.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/feature/package-info.java index cd911cc12f0..f21d44ad487 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/feature/package-info.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/feature/package-info.java @@ -22,7 +22,7 @@ * that is used to proportionally control what features are enabled for the system. * *

    In other words, it is a way of altering the control in a system without restarting it. - * It can be used during all stages of developement, its most visible use case is on production. + * It can be used during all stages of development, its most visible use case is on production. * For instance, during a production release, you can enable or disable individual features, * control the data flow through the system, thereby minimizing risk of system failures * in real time. diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/AbstractHierarchicalLedgerManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/AbstractHierarchicalLedgerManager.java index 1aa8e151d2c..4a1ad27bb5e 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/AbstractHierarchicalLedgerManager.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/AbstractHierarchicalLedgerManager.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -23,7 +23,6 @@ import java.util.TreeSet; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.atomic.AtomicInteger; - import org.apache.bookkeeper.conf.AbstractConfiguration; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.Processor; import org.apache.bookkeeper.util.StringUtils; @@ -68,7 +67,7 @@ public void processResult(int rc, String path, Object ctx) { finalCb.processResult(successRc, null, context); return; } else if (rc != Code.OK.intValue()) { - LOG.error("Error syncing path " + path + " when getting its chidren: ", + LOG.error("Error syncing path " + path + " when getting its children: ", KeeperException.create(KeeperException.Code.get(rc), path)); finalCb.processResult(failureRc, null, context); return; @@ -100,7 +99,7 @@ public void processResult(int rc, String path, Object ctx, } /** - * Process list one by one in asynchronize way. Process will be stopped immediately + * Process list one by one in asynchronous way. Process will be stopped immediately * when error occurred. */ private static class AsyncListProcessor { @@ -191,7 +190,7 @@ protected NavigableSet ledgerListToSet(List ledgerNodes, String pa NavigableSet zkActiveLedgers = new TreeSet(); if (!path.startsWith(ledgerRootPath)) { - LOG.warn("Ledger path [{}] is not a valid path name, it should start wth {}", path, ledgerRootPath); + LOG.warn("Ledger path [{}] is not a valid path name, it should start with {}", path, ledgerRootPath); return zkActiveLedgers; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/AbstractZkLedgerManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/AbstractZkLedgerManager.java index ccfbb52ecfb..59b17be922b 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/AbstractZkLedgerManager.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/AbstractZkLedgerManager.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,24 +18,25 @@ package org.apache.bookkeeper.meta; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Optional; - import io.netty.util.concurrent.DefaultThreadFactory; - import java.io.IOException; import java.util.HashSet; import java.util.List; import java.util.NavigableSet; +import java.util.Optional; import java.util.Set; import java.util.TreeSet; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; - import org.apache.bookkeeper.client.BKException; -import org.apache.bookkeeper.client.LedgerMetadata; +import org.apache.bookkeeper.client.LedgerMetadataBuilder; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.common.concurrent.FutureUtils; import org.apache.bookkeeper.conf.AbstractConfiguration; import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; @@ -46,6 +47,8 @@ import org.apache.bookkeeper.util.ZkUtils; import org.apache.bookkeeper.versioning.LongVersion; import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; +import org.apache.commons.collections4.CollectionUtils; import org.apache.zookeeper.AsyncCallback; import org.apache.zookeeper.AsyncCallback.DataCallback; import org.apache.zookeeper.AsyncCallback.StatCallback; @@ -72,6 +75,7 @@ public abstract class AbstractZkLedgerManager implements LedgerManager, Watcher @VisibleForTesting static final int ZK_CONNECT_BACKOFF_MS = 200; + private final LedgerMetadataSerDe serDe; protected final AbstractConfiguration conf; protected final ZooKeeper zk; protected final String ledgerRootPath; @@ -85,7 +89,7 @@ public abstract class AbstractZkLedgerManager implements LedgerManager, Watcher /** * ReadLedgerMetadataTask class. */ - protected class ReadLedgerMetadataTask implements Runnable, GenericCallback { + protected class ReadLedgerMetadataTask implements Runnable { final long ledgerId; @@ -99,34 +103,30 @@ public void run() { if (LOG.isDebugEnabled()) { LOG.debug("Re-read ledger metadata for {}.", ledgerId); } - readLedgerMetadata(ledgerId, this, AbstractZkLedgerManager.this); - } else { - if (LOG.isDebugEnabled()) { - LOG.debug("Ledger metadata listener for ledger {} is already removed.", ledgerId); - } + readLedgerMetadata(ledgerId, AbstractZkLedgerManager.this) + .whenComplete((metadata, exception) -> handleMetadata(metadata, exception)); + } else if (LOG.isDebugEnabled()) { + LOG.debug("Ledger metadata listener for ledger {} is already removed.", ledgerId); } } - @Override - public void operationComplete(int rc, final LedgerMetadata result) { - if (BKException.Code.OK == rc) { + private void handleMetadata(Versioned result, Throwable exception) { + if (exception == null) { final Set listenerSet = listeners.get(ledgerId); if (null != listenerSet) { if (LOG.isDebugEnabled()) { LOG.debug("Ledger metadata is changed for {} : {}.", ledgerId, result); } - scheduler.submit(new Runnable() { - @Override - public void run() { + scheduler.submit(() -> { synchronized (listenerSet) { for (LedgerMetadataListener listener : listenerSet) { listener.onChanged(ledgerId, result); } } - } - }); + }); } - } else if (BKException.Code.NoSuchLedgerExistsException == rc) { + } else if (BKException.getExceptionCode(exception) + == BKException.Code.NoSuchLedgerExistsOnMetadataServerException) { // the ledger is removed, do nothing Set listenerSet = listeners.remove(ledgerId); if (null != listenerSet) { @@ -143,12 +143,37 @@ public void run() { } } } else { - LOG.warn("Failed on read ledger metadata of ledger {} : {}", ledgerId, rc); + LOG.warn("Failed on read ledger metadata of ledger {}: {}", + ledgerId, BKException.getExceptionCode(exception)); scheduler.schedule(this, ZK_CONNECT_BACKOFF_MS, TimeUnit.MILLISECONDS); } } } + /** + * CancelWatchLedgerMetadataTask class. + */ + protected class CancelWatchLedgerMetadataTask implements Runnable { + + final long ledgerId; + + CancelWatchLedgerMetadataTask(long ledgerId) { + this.ledgerId = ledgerId; + } + + @Override + public void run() { + Set listeners = AbstractZkLedgerManager.this.listeners.get(ledgerId); + if (!CollectionUtils.isEmpty(listeners)) { + if (LOG.isDebugEnabled()) { + LOG.debug("Still watch ledgerId: {}, ignore this unwatch task.", ledgerId); + } + return; + } + cancelMetadataWatch(ledgerId, AbstractZkLedgerManager.this); + } + } + /** * ZooKeeper-based Ledger Manager Constructor. * @@ -158,6 +183,7 @@ public void run() { * ZooKeeper Client Handle */ protected AbstractZkLedgerManager(AbstractConfiguration conf, ZooKeeper zk) { + this.serDe = new LedgerMetadataSerDe(); this.conf = conf; this.zk = zk; this.ledgerRootPath = ZKMetadataDriverBase.resolveZkLedgersRootPath(conf); @@ -175,7 +201,7 @@ protected AbstractZkLedgerManager(AbstractConfiguration conf, ZooKeeper zk) { * Ledger ID * @return ledger node path */ - protected abstract String getLedgerPath(long ledgerId); + public abstract String getLedgerPath(long ledgerId); /** * Get ledger id from its znode ledger path. @@ -189,7 +215,9 @@ protected AbstractZkLedgerManager(AbstractConfiguration conf, ZooKeeper zk) { @Override public void process(WatchedEvent event) { - LOG.debug("Received watched event {} from zookeeper based ledger manager.", event); + if (LOG.isDebugEnabled()) { + LOG.debug("Received watched event {} from zookeeper based ledger manager.", event); + } if (Event.EventType.None == event.getType()) { if (Event.KeeperState.Expired == event.getState()) { LOG.info("ZooKeeper client expired on ledger manager."); @@ -226,10 +254,9 @@ public void process(WatchedEvent event) { } listeners.remove(ledgerId, listenerSet); } - } else { - if (LOG.isDebugEnabled()) { - LOG.debug("No ledger metadata listeners to remove from ledger {} after it's deleted.", ledgerId); - } + } else if (LOG.isDebugEnabled()) { + LOG.debug("No ledger metadata listeners to remove from ledger {} after it's deleted.", + ledgerId); } break; case NodeDataChanged: @@ -244,55 +271,94 @@ public void process(WatchedEvent event) { } @Override - public void createLedgerMetadata(final long ledgerId, final LedgerMetadata metadata, - final GenericCallback ledgerCb) { + public CompletableFuture> createLedgerMetadata(long ledgerId, + LedgerMetadata inputMetadata) { + CompletableFuture> promise = new CompletableFuture<>(); + /* + * Create a random number and use it as creator token. + */ + final long cToken = ThreadLocalRandom.current().nextLong(Long.MAX_VALUE); + final LedgerMetadata metadata; + if (inputMetadata.getMetadataFormatVersion() > LedgerMetadataSerDe.METADATA_FORMAT_VERSION_2) { + metadata = LedgerMetadataBuilder.from(inputMetadata).withId(ledgerId).withCToken(cToken).build(); + } else { + metadata = inputMetadata; + } String ledgerPath = getLedgerPath(ledgerId); StringCallback scb = new StringCallback() { @Override public void processResult(int rc, String path, Object ctx, String name) { if (rc == Code.OK.intValue()) { - // update version - metadata.setVersion(new LongVersion(0)); - ledgerCb.operationComplete(BKException.Code.OK, metadata); + promise.complete(new Versioned<>(metadata, new LongVersion(0))); } else if (rc == Code.NODEEXISTS.intValue()) { - LOG.warn("Failed to create ledger metadata for {} which already exist", ledgerId); - ledgerCb.operationComplete(BKException.Code.LedgerExistException, null); + LOG.info("Ledger metadata for {} appears to already exist, checking cToken", + ledgerId); + if (metadata.getMetadataFormatVersion() > 2) { + CompletableFuture> readFuture = readLedgerMetadata(ledgerId); + readFuture.handle((readMetadata, exception) -> { + if (exception == null) { + if (readMetadata.getValue().getCToken() == cToken) { + FutureUtils.complete(promise, new Versioned<>(metadata, new LongVersion(0))); + } else { + LOG.warn("Failed to create ledger metadata for {} which already exists", ledgerId); + promise.completeExceptionally(new BKException.BKLedgerExistException()); + } + } else if (exception instanceof KeeperException.NoNodeException) { + // This is a pretty strange case. We tried to create the node, found that it + // already exists, but failed to find it when we reread it. It's possible that + // we successfully created it, got an erroneous NODEEXISTS due to a resend, + // and then it got removed. It's also possible that we actually lost the race + // and then it got removed. I'd argue that returning an error here is the right + // path since recreating it is likely to cause problems. + LOG.warn("Ledger {} appears to have already existed and then been removed, failing" + + " with LedgerExistException", ledgerId); + promise.completeExceptionally(new BKException.BKLedgerExistException()); + } else { + LOG.error("Could not validate node for ledger {} after LedgerExistsException", ledgerId, + exception); + promise.completeExceptionally(new BKException.ZKException(exception)); + } + return null; + }); + } else { + LOG.warn("Failed to create ledger metadata for {} which already exists", ledgerId); + promise.completeExceptionally(new BKException.BKLedgerExistException()); + } } else { LOG.error("Could not create node for ledger {}", ledgerId, KeeperException.create(Code.get(rc), path)); - ledgerCb.operationComplete(BKException.Code.ZKException, null); + promise.completeExceptionally( + new BKException.ZKException(KeeperException.create(Code.get(rc), path))); } } }; + final byte[] data; + try { + data = serDe.serialize(metadata); + } catch (IOException ioe) { + promise.completeExceptionally(new BKException.BKMetadataSerializationException(ioe)); + return promise; + } + List zkAcls = ZkUtils.getACLs(conf); - ZkUtils.asyncCreateFullPathOptimistic(zk, ledgerPath, metadata.serialize(), zkAcls, - CreateMode.PERSISTENT, scb, null); + ZkUtils.asyncCreateFullPathOptimistic(zk, ledgerPath, data, zkAcls, + CreateMode.PERSISTENT, scb, null); + return promise; } - /** - * Removes ledger metadata from ZooKeeper and deletes its parent znodes - * recursively if they dont have anymore children. - * - * @param ledgerId - * ledger identifier - * @param version - * local version of metadata znode - * @param cb - * callback object - */ @Override - public void removeLedgerMetadata(final long ledgerId, final Version version, - final GenericCallback cb) { + public CompletableFuture removeLedgerMetadata(final long ledgerId, final Version version) { + CompletableFuture promise = new CompletableFuture<>(); int znodeVersion = -1; if (Version.NEW == version) { LOG.error("Request to delete ledger {} metadata with version set to the initial one", ledgerId); - cb.operationComplete(BKException.Code.MetadataVersionException, (Void) null); - return; + promise.completeExceptionally(new BKException.BKMetadataVersionException()); + return promise; } else if (Version.ANY != version) { if (!(version instanceof LongVersion)) { LOG.info("Not an instance of ZKVersion: {}", ledgerId); - cb.operationComplete(BKException.Code.MetadataVersionException, (Void) null); - return; + promise.completeExceptionally(new BKException.BKMetadataVersionException()); + return promise; } else { znodeVersion = (int) ((LongVersion) version).getLongVersion(); } @@ -301,10 +367,9 @@ public void removeLedgerMetadata(final long ledgerId, final Version version, VoidCallback callbackForDelete = new VoidCallback() { @Override public void processResult(int rc, String path, Object ctx) { - int bkRc; if (rc == KeeperException.Code.NONODE.intValue()) { - LOG.warn("Ledger node does not exist in ZooKeeper: ledgerId={}", ledgerId); - bkRc = BKException.Code.NoSuchLedgerExistsException; + LOG.warn("Ledger node does not exist in ZooKeeper: ledgerId={}. Returning success.", ledgerId); + FutureUtils.complete(promise, null); } else if (rc == KeeperException.Code.OK.intValue()) { // removed listener on ledgerId Set listenerSet = listeners.remove(ledgerId); @@ -312,19 +377,17 @@ public void processResult(int rc, String path, Object ctx) { if (LOG.isDebugEnabled()) { LOG.debug( "Remove registered ledger metadata listeners on ledger {} after ledger is deleted.", - ledgerId, listenerSet); - } - } else { - if (LOG.isDebugEnabled()) { - LOG.debug("No ledger metadata listeners to remove from ledger {} when it's being deleted.", ledgerId); } + } else if (LOG.isDebugEnabled()) { + LOG.debug("No ledger metadata listeners to remove from ledger {} when it's being deleted.", + ledgerId); } - bkRc = BKException.Code.OK; + FutureUtils.complete(promise, null); } else { - bkRc = BKException.Code.ZKException; + promise.completeExceptionally( + new BKException.ZKException(KeeperException.create(Code.get(rc), path))); } - cb.operationComplete(bkRc, (Void) null); } }; String ledgerZnodePath = getLedgerPath(ledgerId); @@ -338,6 +401,7 @@ public void processResult(int rc, String path, Object ctx) { } else { zk.delete(ledgerZnodePath, znodeVersion, callbackForDelete, null); } + return promise; } @Override @@ -375,18 +439,35 @@ public void unregisterLedgerMetadataListener(long ledgerId, LedgerMetadataListen } if (listenerSet.isEmpty()) { listeners.remove(ledgerId, listenerSet); + new CancelWatchLedgerMetadataTask(ledgerId).run(); } } } } + private void cancelMetadataWatch(long ledgerId, Watcher watcher) { + zk.removeWatches(getLedgerPath(ledgerId), watcher, WatcherType.Data, true, new VoidCallback() { + @Override + public void processResult(int rc, String path, Object o) { + if (rc != KeeperException.Code.OK.intValue()) { + LOG.error("Cancel watch ledger {} metadata failed.", ledgerId, + KeeperException.create(KeeperException.Code.get(rc), path)); + return; + } + if (LOG.isDebugEnabled()) { + LOG.debug("Cancel watch ledger {} metadata succeed.", ledgerId); + } + } + }, null); + } + @Override - public void readLedgerMetadata(final long ledgerId, final GenericCallback readCb) { - readLedgerMetadata(ledgerId, readCb, null); + public CompletableFuture> readLedgerMetadata(long ledgerId) { + return readLedgerMetadata(ledgerId, null); } - protected void readLedgerMetadata(final long ledgerId, final GenericCallback readCb, - Watcher watcher) { + protected CompletableFuture> readLedgerMetadata(final long ledgerId, Watcher watcher) { + CompletableFuture> promise = new CompletableFuture<>(); zk.getData(getLedgerPath(ledgerId), watcher, new DataCallback() { @Override public void processResult(int rc, String path, Object ctx, byte[] data, Stat stat) { @@ -395,70 +476,84 @@ public void processResult(int rc, String path, Object ctx, byte[] data, Stat sta LOG.debug("No such ledger: " + ledgerId, KeeperException.create(KeeperException.Code.get(rc), path)); } - readCb.operationComplete(BKException.Code.NoSuchLedgerExistsException, null); + promise.completeExceptionally(new BKException.BKNoSuchLedgerExistsOnMetadataServerException()); return; } if (rc != KeeperException.Code.OK.intValue()) { LOG.error("Could not read metadata for ledger: " + ledgerId, KeeperException.create(KeeperException.Code.get(rc), path)); - readCb.operationComplete(BKException.Code.ZKException, null); + promise.completeExceptionally( + new BKException.ZKException(KeeperException.create(Code.get(rc), path))); return; } if (stat == null) { LOG.error("Could not parse ledger metadata for ledger: {}. Stat object is null", ledgerId); - readCb.operationComplete(BKException.Code.ZKException, null); + promise.completeExceptionally(new BKException.ZKException( + new Exception("Could not parse ledger metadata for ledger: " + + ledgerId + " . Stat object is null").fillInStackTrace())); return; } - LedgerMetadata metadata; + try { - metadata = LedgerMetadata.parseConfig(data, new LongVersion(stat.getVersion()), - Optional.of(stat.getCtime())); - } catch (IOException e) { - LOG.error("Could not parse ledger metadata for ledger: " + ledgerId, e); - readCb.operationComplete(BKException.Code.ZKException, null); - return; + LongVersion version = new LongVersion(stat.getVersion()); + LedgerMetadata metadata = serDe.parseConfig(data, ledgerId, Optional.of(stat.getCtime())); + promise.complete(new Versioned<>(metadata, version)); + } catch (Throwable t) { + LOG.error("Could not parse ledger metadata for ledger: {}", ledgerId, t); + promise.completeExceptionally(new BKException.ZKException( + new Exception("Could not parse ledger metadata for ledger: " + + ledgerId, t).fillInStackTrace())); } - readCb.operationComplete(BKException.Code.OK, metadata); } }, null); + return promise; } @Override - public void writeLedgerMetadata(final long ledgerId, final LedgerMetadata metadata, - final GenericCallback cb) { - Version v = metadata.getVersion(); - if (!(v instanceof LongVersion)) { - cb.operationComplete(BKException.Code.MetadataVersionException, null); - return; + public CompletableFuture> writeLedgerMetadata(long ledgerId, LedgerMetadata metadata, + Version currentVersion) { + CompletableFuture> promise = new CompletableFuture<>(); + if (!(currentVersion instanceof LongVersion)) { + promise.completeExceptionally(new BKException.BKMetadataVersionException()); + return promise; + } + final LongVersion zv = (LongVersion) currentVersion; + + final byte[] data; + try { + data = serDe.serialize(metadata); + } catch (IOException ioe) { + promise.completeExceptionally(new BKException.BKMetadataSerializationException(ioe)); + return promise; } - final LongVersion zv = (LongVersion) v; zk.setData(getLedgerPath(ledgerId), - metadata.serialize(), (int) zv.getLongVersion(), + data, (int) zv.getLongVersion(), new StatCallback() { @Override public void processResult(int rc, String path, Object ctx, Stat stat) { if (KeeperException.Code.BADVERSION.intValue() == rc) { - cb.operationComplete(BKException.Code.MetadataVersionException, null); + promise.completeExceptionally(new BKException.BKMetadataVersionException()); } else if (KeeperException.Code.OK.intValue() == rc) { // update metadata version - metadata.setVersion(zv.setLongVersion(stat.getVersion())); - cb.operationComplete(BKException.Code.OK, metadata); + promise.complete(new Versioned<>(metadata, new LongVersion(stat.getVersion()))); } else if (KeeperException.Code.NONODE.intValue() == rc) { LOG.warn("Ledger node does not exist in ZooKeeper: ledgerId={}", ledgerId); - cb.operationComplete(BKException.Code.NoSuchLedgerExistsException, null); + promise.completeExceptionally(new BKException.BKNoSuchLedgerExistsOnMetadataServerException()); } else { LOG.warn("Conditional update ledger metadata failed: {}", KeeperException.Code.get(rc)); - cb.operationComplete(BKException.Code.ZKException, null); + promise.completeExceptionally( + new BKException.ZKException(KeeperException.create(Code.get(rc), path))); } } }, null); + return promise; } /** * Process ledgers in a single zk node. * *

    - * for each ledger found in this zk node, processor#process(ledgerId) will be triggerred + * for each ledger found in this zk node, processor#process(ledgerId) will be triggered * to process a specific ledger. after all ledgers has been processed, the finalCb will * be called with provided context object. The RC passed to finalCb is decided by : *

      @@ -509,9 +604,11 @@ public void operationComplete(int rc, List ledgerNodes) { MultiCallback mcb = new MultiCallback(zkActiveLedgers.size(), finalCb, ctx, successRc, failureRc); // start loop over all ledgers - for (Long ledger : zkActiveLedgers) { - processor.process(ledger, mcb); - } + scheduler.submit(() -> { + for (Long ledger : zkActiveLedgers) { + processor.process(ledger, mcb); + } + }); } }); } @@ -523,18 +620,19 @@ public void operationComplete(int rc, List ledgerNodes) { * Znode Name * @return true if the znode is a special znode otherwise false */ - public static boolean isSpecialZnode(String znode) { - if (BookKeeperConstants.AVAILABLE_NODE.equals(znode) - || BookKeeperConstants.COOKIE_NODE.equals(znode) - || BookKeeperConstants.LAYOUT_ZNODE.equals(znode) - || BookKeeperConstants.INSTANCEID.equals(znode) - || BookKeeperConstants.UNDER_REPLICATION_NODE.equals(znode) - || LegacyHierarchicalLedgerManager.IDGEN_ZNODE.equals(znode) - || LongHierarchicalLedgerManager.IDGEN_ZNODE.equals(znode) - || znode.startsWith(ZkLedgerIdGenerator.LEDGER_ID_GEN_PREFIX)) { - return true; - } - return false; + public static boolean isSpecialZnode(String znode) { + return BookKeeperConstants.AVAILABLE_NODE.equals(znode) + || BookKeeperConstants.COOKIE_NODE.equals(znode) + || BookKeeperConstants.LAYOUT_ZNODE.equals(znode) + || BookKeeperConstants.INSTANCEID.equals(znode) + || BookKeeperConstants.UNDER_REPLICATION_NODE.equals(znode) + || isLeadgerIdGeneratorZnode(znode); + } + + public static boolean isLeadgerIdGeneratorZnode(String znode) { + return LegacyHierarchicalLedgerManager.IDGEN_ZNODE.equals(znode) + || LongHierarchicalLedgerManager.IDGEN_ZNODE.equals(znode) + || znode.startsWith(ZkLedgerIdGenerator.LEDGER_ID_GEN_PREFIX); } /** diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/AbstractZkLedgerManagerFactory.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/AbstractZkLedgerManagerFactory.java index 42ec7344b5f..59fd561efc3 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/AbstractZkLedgerManagerFactory.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/AbstractZkLedgerManagerFactory.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,15 +17,18 @@ */ package org.apache.bookkeeper.meta; +import static org.apache.bookkeeper.meta.AbstractZkLedgerManager.isLeadgerIdGeneratorZnode; +import static org.apache.bookkeeper.meta.AbstractZkLedgerManager.isSpecialZnode; + import java.io.IOException; import java.net.URI; import java.util.List; import java.util.Objects; import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.common.util.ReflectionUtils; import org.apache.bookkeeper.conf.AbstractConfiguration; import org.apache.bookkeeper.meta.LayoutManager.LedgerLayoutExistsException; import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; -import org.apache.bookkeeper.util.ReflectionUtils; import org.apache.commons.configuration.ConfigurationException; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.ZKUtil; @@ -48,7 +51,10 @@ public void format(AbstractConfiguration conf, LayoutManager layoutManager) String ledgersRootPath = ZKMetadataDriverBase.resolveZkLedgersRootPath(conf); List children = zk.getChildren(ledgersRootPath, false); for (String child : children) { - if (!AbstractZkLedgerManager.isSpecialZnode(child) && ledgerManager.isLedgerParentNode(child)) { + boolean lParentNode = !isSpecialZnode(child) && ledgerManager.isLedgerParentNode(child); + boolean lIdGenerator = isLeadgerIdGeneratorZnode(child); + + if (lParentNode || lIdGenerator) { ZKUtil.deleteRecursive(zk, ledgersRootPath + "/" + child); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/CleanupLedgerManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/CleanupLedgerManager.java index 36f9d8c7412..bc4cefe6fc2 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/CleanupLedgerManager.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/CleanupLedgerManager.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,25 +18,27 @@ package org.apache.bookkeeper.meta; import com.google.common.annotations.VisibleForTesting; - import java.io.IOException; import java.util.HashSet; import java.util.Set; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; import java.util.concurrent.locks.ReentrantReadWriteLock; - +import lombok.extern.slf4j.Slf4j; import org.apache.bookkeeper.client.BKException; -import org.apache.bookkeeper.client.LedgerMetadata; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.common.concurrent.FutureUtils; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.LedgerMetadataListener; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.Processor; import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; import org.apache.zookeeper.AsyncCallback; /** * A ledger manager that cleans up resources upon closing. */ +@Slf4j public class CleanupLedgerManager implements LedgerManager { private class CleanupGenericCallback implements GenericCallback { @@ -52,7 +54,7 @@ private class CleanupGenericCallback implements GenericCallback { public void operationComplete(int rc, T result) { closeLock.readLock().lock(); try { - if (!closed && null != removeCallback(cb)) { + if (!closed && removeCallback(cb)) { cb.operationComplete(rc, result); } } finally { @@ -75,10 +77,10 @@ public LedgerRange next() throws IOException { } private final LedgerManager underlying; - private final ConcurrentMap callbacks = - new ConcurrentHashMap(); + private final Set callbacks = ConcurrentHashMap.newKeySet(); private boolean closed = false; private final ReentrantReadWriteLock closeLock = new ReentrantReadWriteLock(); + private final Set> futures = ConcurrentHashMap.newKeySet(); public CleanupLedgerManager(LedgerManager lm) { this.underlying = lm; @@ -90,7 +92,7 @@ public LedgerManager getUnderlying() { } private void addCallback(GenericCallback callback) { - callbacks.put(callback, callback); + callbacks.add(callback); } @Override @@ -103,67 +105,80 @@ public void unregisterLedgerMetadataListener(long ledgerId, LedgerMetadataListen underlying.unregisterLedgerMetadataListener(ledgerId, listener); } - private GenericCallback removeCallback(GenericCallback callback) { + private boolean removeCallback(GenericCallback callback) { return callbacks.remove(callback); } + private void recordPromise(CompletableFuture promise) { + futures.add(promise); + promise.whenComplete((result, exception) -> { + futures.remove(promise); + }); + } + + @VisibleForTesting + int getCurrentFuturePromiseSize() { + return futures.size(); + } + @Override - public void createLedgerMetadata(long lid, LedgerMetadata metadata, - GenericCallback cb) { + public CompletableFuture> createLedgerMetadata(long lid, LedgerMetadata metadata) { closeLock.readLock().lock(); try { if (closed) { - cb.operationComplete(BKException.Code.ClientClosedException, null); - return; + return closedPromise(); + } else { + CompletableFuture> promise = underlying.createLedgerMetadata(lid, metadata); + recordPromise(promise); + return promise; } - underlying.createLedgerMetadata(lid, metadata, new CleanupGenericCallback(cb)); } finally { closeLock.readLock().unlock(); } } @Override - public void removeLedgerMetadata(long ledgerId, Version version, - GenericCallback vb) { + public CompletableFuture removeLedgerMetadata(long ledgerId, Version version) { closeLock.readLock().lock(); try { if (closed) { - vb.operationComplete(BKException.Code.ClientClosedException, null); - return; + return closedPromise(); } - underlying.removeLedgerMetadata(ledgerId, version, - new CleanupGenericCallback(vb)); + CompletableFuture promise = underlying.removeLedgerMetadata(ledgerId, version); + recordPromise(promise); + return promise; } finally { closeLock.readLock().unlock(); } } @Override - public void readLedgerMetadata(long ledgerId, - GenericCallback readCb) { + public CompletableFuture> readLedgerMetadata(long ledgerId) { closeLock.readLock().lock(); try { if (closed) { - readCb.operationComplete(BKException.Code.ClientClosedException, null); - return; + return closedPromise(); } - underlying.readLedgerMetadata(ledgerId, new CleanupGenericCallback(readCb)); + CompletableFuture> promise = underlying.readLedgerMetadata(ledgerId); + recordPromise(promise); + return promise; } finally { closeLock.readLock().unlock(); } } @Override - public void writeLedgerMetadata(long ledgerId, LedgerMetadata metadata, - GenericCallback cb) { + public CompletableFuture> writeLedgerMetadata(long ledgerId, LedgerMetadata metadata, + Version currentVersion) { closeLock.readLock().lock(); try { if (closed) { - cb.operationComplete(BKException.Code.ClientClosedException, null); - return; + return closedPromise(); } - underlying.writeLedgerMetadata(ledgerId, metadata, - new CleanupGenericCallback(cb)); + CompletableFuture> promise = + underlying.writeLedgerMetadata(ledgerId, metadata, currentVersion); + recordPromise(promise); + return promise; } finally { closeLock.readLock().unlock(); } @@ -189,7 +204,7 @@ public void operationComplete(int rc, Void result) { underlying.asyncProcessLedgers(processor, new AsyncCallback.VoidCallback() { @Override public void processResult(int rc, String path, Object ctx) { - if (null != removeCallback(stub)) { + if (removeCallback(stub)) { finalCb.processResult(rc, path, ctx); } } @@ -200,13 +215,13 @@ public void processResult(int rc, String path, Object ctx) { } @Override - public LedgerRangeIterator getLedgerRanges() { + public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) { closeLock.readLock().lock(); try { if (closed) { return new ClosedLedgerRangeIterator(); } - return underlying.getLedgerRanges(); + return underlying.getLedgerRanges(zkOpTimeoutMs); } finally { closeLock.readLock().unlock(); } @@ -222,16 +237,22 @@ public void close() throws IOException { return; } closed = true; - keys = new HashSet(callbacks.keySet()); + keys = new HashSet<>(callbacks); } finally { closeLock.writeLock().unlock(); } for (GenericCallback key : keys) { - GenericCallback callback = callbacks.remove(key); - if (null != callback) { - callback.operationComplete(BKException.Code.ClientClosedException, null); + if (callbacks.remove(key)) { + key.operationComplete(BKException.Code.ClientClosedException, null); } } + BKException exception = new BKException.BKClientClosedException(); + futures.forEach((f) -> f.completeExceptionally(exception)); + futures.clear(); underlying.close(); } + + private static CompletableFuture closedPromise() { + return FutureUtils.exception(new BKException.BKClientClosedException()); + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/FlatLedgerManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/FlatLedgerManager.java index 7ee2e2289ea..a40a775ed41 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/FlatLedgerManager.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/FlatLedgerManager.java @@ -1,6 +1,6 @@ package org.apache.bookkeeper.meta; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -21,7 +21,6 @@ import java.io.IOException; import java.util.NoSuchElementException; import java.util.Set; - import org.apache.bookkeeper.conf.AbstractConfiguration; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.Processor; import org.apache.bookkeeper.util.StringUtils; @@ -73,7 +72,7 @@ public String getLedgerPath(long ledgerId) { public long getLedgerId(String nodeName) throws IOException { long ledgerId; try { - String parts[] = nodeName.split(ledgerPrefix); + String[] parts = nodeName.split(ledgerPrefix); ledgerId = Long.parseLong(parts[parts.length - 1]); } catch (NumberFormatException e) { throw new IOException(e); @@ -89,7 +88,7 @@ public void asyncProcessLedgers(final Processor processor, } @Override - public LedgerRangeIterator getLedgerRanges() { + public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) { return new LedgerRangeIterator() { // single iterator, can visit only one time boolean nextCalled = false; @@ -103,7 +102,8 @@ private synchronized void preload() throws IOException { try { zkActiveLedgers = ledgerListToSet( - ZkUtils.getChildrenInSingleNode(zk, ledgerRootPath), ledgerRootPath); + ZkUtils.getChildrenInSingleNode(zk, ledgerRootPath, zkOpTimeoutMs), + ledgerRootPath); nextRange = new LedgerRange(zkActiveLedgers); } catch (KeeperException.NoNodeException e) { throw new IOException("Path does not exist: " + ledgerRootPath, e); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/FlatLedgerManagerFactory.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/FlatLedgerManagerFactory.java index 19ac418eeae..acd23f4ae36 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/FlatLedgerManagerFactory.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/FlatLedgerManagerFactory.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -21,12 +21,12 @@ import java.io.IOException; import java.util.List; - import org.apache.bookkeeper.conf.AbstractConfiguration; +import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; import org.apache.bookkeeper.replication.ReplicationException; +import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.util.ZkUtils; -import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.data.ACL; /** @@ -84,7 +84,14 @@ public LedgerManager newLedgerManager() { @Override public LedgerUnderreplicationManager newLedgerUnderreplicationManager() - throws KeeperException, InterruptedException, ReplicationException.CompatibilityException { + throws ReplicationException.UnavailableException, InterruptedException, + ReplicationException.CompatibilityException { return new ZkLedgerUnderreplicationManager(conf, zk); } + + @Override + public LedgerAuditorManager newLedgerAuditorManager() { + ServerConfiguration serverConfiguration = new ServerConfiguration(conf); + return new ZkLedgerAuditorManager(zk, serverConfiguration, NullStatsLogger.INSTANCE); + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/HierarchicalLedgerManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/HierarchicalLedgerManager.java index 946ed2a5d32..d5c0fa1ff9f 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/HierarchicalLedgerManager.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/HierarchicalLedgerManager.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,7 +18,6 @@ package org.apache.bookkeeper.meta; import java.io.IOException; - import org.apache.bookkeeper.conf.AbstractConfiguration; import org.apache.bookkeeper.meta.LedgerManager.LedgerRangeIterator; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.Processor; @@ -73,7 +72,7 @@ public void processResult(int rc, String path, Object ctx) { } @Override - protected String getLedgerPath(long ledgerId) { + public String getLedgerPath(long ledgerId) { return ledgerRootPath + StringUtils.getHybridHierarchicalLedgerPath(ledgerId); } @@ -87,9 +86,9 @@ protected long getLedgerId(String ledgerPath) throws IOException { } @Override - public LedgerRangeIterator getLedgerRanges() { - LedgerRangeIterator legacyLedgerRangeIterator = legacyLM.getLedgerRanges(); - LedgerRangeIterator longLedgerRangeIterator = longLM.getLedgerRanges(); + public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) { + LedgerRangeIterator legacyLedgerRangeIterator = legacyLM.getLedgerRanges(zkOpTimeoutMs); + LedgerRangeIterator longLedgerRangeIterator = longLM.getLedgerRanges(zkOpTimeoutMs); return new HierarchicalLedgerRangeIterator(legacyLedgerRangeIterator, longLedgerRangeIterator); } @@ -123,4 +122,15 @@ public LedgerRange next() throws IOException { protected String getLedgerParentNodeRegex() { return StringUtils.HIERARCHICAL_LEDGER_PARENT_NODE_REGEX; } + + @Override + public void close() { + super.close(); + if (legacyLM != null) { + legacyLM.close(); + } + if (longLM != null) { + longLM.close(); + } + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/HierarchicalLedgerManagerFactory.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/HierarchicalLedgerManagerFactory.java index 4a9d6cfbf6e..f28763f45fb 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/HierarchicalLedgerManagerFactory.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/HierarchicalLedgerManagerFactory.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,7 +18,6 @@ package org.apache.bookkeeper.meta; import java.util.List; - import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; import org.apache.bookkeeper.util.ZkUtils; import org.apache.zookeeper.data.ACL; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerAuditorManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerAuditorManager.java new file mode 100644 index 00000000000..492433ed1b5 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerAuditorManager.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.meta; + +import java.io.IOException; +import java.util.function.Consumer; +import org.apache.bookkeeper.net.BookieId; + +/** + * Interface to handle the ledger auditor election. + */ +public interface LedgerAuditorManager extends AutoCloseable { + + /** + * Events that can be triggered by the LedgerAuditorManager. + */ + enum AuditorEvent { + SessionLost, + VoteWasDeleted, + } + + /** + * Try to become the auditor. If there's already another auditor, it will wait until this + * current instance has become the auditor. + * + * @param bookieId the identifier for current bookie + * @param listener listener that will receive AuditorEvent notifications + * @return + */ + void tryToBecomeAuditor(String bookieId, Consumer listener) throws IOException, InterruptedException; + + /** + * Return the information regarding the current auditor. + * @return + */ + BookieId getCurrentAuditor() throws IOException, InterruptedException; +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerIdGenerator.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerIdGenerator.java index a110334fb01..32298f57f7c 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerIdGenerator.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerIdGenerator.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,7 +18,6 @@ package org.apache.bookkeeper.meta; import java.io.Closeable; - import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; /** diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerLayout.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerLayout.java index eb5b705c16b..06b2998d949 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerLayout.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerLayout.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,6 +18,7 @@ package org.apache.bookkeeper.meta; import java.io.IOException; +import java.nio.charset.StandardCharsets; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.ToString; @@ -34,7 +35,7 @@ @ToString public class LedgerLayout { - // version of compability layout version + // version of compatibility layout version public static final int LAYOUT_MIN_COMPAT_VERSION = 1; // version of ledger layout metadata public static final int LAYOUT_FORMAT_VERSION = 2; @@ -82,7 +83,7 @@ public byte[] serialize() throws IOException { if (log.isDebugEnabled()) { log.debug("Serialized layout info: {}", s); } - return s.getBytes("UTF-8"); + return s.getBytes(StandardCharsets.UTF_8); } /** @@ -95,12 +96,12 @@ public byte[] serialize() throws IOException { * if the given byte[] cannot be parsed */ public static LedgerLayout parseLayout(byte[] bytes) throws IOException { - String layout = new String(bytes, "UTF-8"); + String layout = new String(bytes, StandardCharsets.UTF_8); if (log.isDebugEnabled()) { log.debug("Parsing Layout: {}", layout); } - String lines[] = layout.split(lSplitter); + String[] lines = layout.split(lSplitter); try { int layoutFormatVersion = Integer.parseInt(lines[0]); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerManager.java index 84b9cb69ab0..db197ce02e6 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerManager.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerManager.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -22,12 +22,12 @@ import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; - -import org.apache.bookkeeper.client.LedgerMetadata; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; +import java.util.concurrent.CompletableFuture; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.LedgerMetadataListener; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.Processor; import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; import org.apache.zookeeper.AsyncCallback; /** @@ -46,16 +46,14 @@ public interface LedgerManager extends Closeable { * Ledger id provided to be created * @param metadata * Metadata provided when creating the new ledger - * @param cb - * Callback when creating a new ledger, returning the written metadata. - * Return code:
        - *
      • {@link BKException.Code.OK} if success
      • - *
      • {@link BKException.Code.LedgerExistException} if given ledger id exist
      • - *
      • {@link BKException.Code.ZKException}/{@link BKException.Code.MetaStoreException} - * for other issue
      • - *
      + * @return Future which, when completed returns the metadata of the newly created ledger. + * Completed with an exception:
        + *
      • {@link org.apache.bookkeeper.client.BKException.BKLedgerExistException} if given ledger id exist
      • + *
      • {@link org.apache.bookkeeper.client.BKException.ZKException} + * /{@link org.apache.bookkeeper.client.BKException.BKMetadataSerializationException} for other issues
      • + *
      */ - void createLedgerMetadata(long ledgerId, LedgerMetadata metadata, GenericCallback cb); + CompletableFuture> createLedgerMetadata(long ledgerId, LedgerMetadata metadata); /** * Remove a specified ledger metadata by ledgerId and version. @@ -64,29 +62,30 @@ public interface LedgerManager extends Closeable { * Ledger Id * @param version * Ledger metadata version - * @param cb - * Callback when remove ledger metadata. Return code:
        - *
      • {@link BKException.Code.OK} if success
      • - *
      • {@link BKException.Code.MetadataVersionException} if version doesn't match
      • - *
      • {@link BKException.Code.NoSuchLedgerExistsException} if ledger not exist
      • - *
      • {@link BKException.Code.ZKException} for other issue
      • - *
      + * @return Future which, when completed, denotes that the ledger metadata has been removed. + * Completed with an exception:
        + *
      • {@link org.apache.bookkeeper.client.BKException.BKMetadataVersionException} + * if version doesn't match
      • + *
      • {@link org.apache.bookkeeper.client.BKException.BKNoSuchLedgerExistsOnMetadataServerException} + * if ledger not exist
      • + *
      • {@link org.apache.bookkeeper.client.BKException.ZKException} for other issues
      • + *
      */ - void removeLedgerMetadata(long ledgerId, Version version, GenericCallback cb); + CompletableFuture removeLedgerMetadata(long ledgerId, Version version); /** * Read ledger metadata of a specified ledger. * * @param ledgerId * Ledger Id - * @param readCb - * Callback when read ledger metadata. Return code:
        - *
      • {@link BKException.Code.OK} if success
      • - *
      • {@link BKException.Code.NoSuchLedgerExistsException} if ledger not exist
      • - *
      • {@link BKException.Code.ZKException} for other issue
      • + * @return Future which, when completed, contains the requested versioned metadata. + * Completed with an exception::
          + *
        • {@link org.apache.bookkeeper.client.BKException.BKNoSuchLedgerExistsOnMetadataServerException} + * if ledger not exist
        • + *
        • {@link org.apache.bookkeeper.client.BKException.ZKException} for other issues
        • *
        */ - void readLedgerMetadata(long ledgerId, GenericCallback readCb); + CompletableFuture> readLedgerMetadata(long ledgerId); /** * Write ledger metadata. @@ -95,15 +94,17 @@ public interface LedgerManager extends Closeable { * Ledger Id * @param metadata * Ledger Metadata to write - * @param cb - * Callback when finished writing ledger metadata, returning the written metadata. - * Return code:
          - *
        • {@link BKException.Code.OK} if success
        • - *
        • {@link BKException.Code.MetadataVersionException} if version in metadata doesn't match
        • - *
        • {@link BKException.Code.ZKException} for other issue
        • + * @param currentVersion + * The version of the metadata we expect to be overwriting. + * @return Future which, when completed, contains the newly written metadata. + * Completed with an exception:
            + *
          • {@link org.apache.bookkeeper.client.BKException.BKMetadataVersionException} + * if version in metadata doesn't match
          • + *
          • {@link org.apache.bookkeeper.client.BKException.ZKException} for other issue
          • *
          */ - void writeLedgerMetadata(long ledgerId, LedgerMetadata metadata, GenericCallback cb); + CompletableFuture> writeLedgerMetadata(long ledgerId, LedgerMetadata metadata, + Version currentVersion); /** * Register the ledger metadata listener on ledgerId. @@ -129,9 +130,9 @@ public interface LedgerManager extends Closeable { * Loop to process all ledgers. *

          *

            - * After all ledgers were processed, finalCb will be triggerred: + * After all ledgers were processed, finalCb will be triggered: *
          • if all ledgers are processed done with OK, success rc will be passed to finalCb. - *
          • if some ledgers are prcoessed failed, failure rc will be passed to finalCb. + *
          • if some ledgers are processed failed, failure rc will be passed to finalCb. *
          *

          * @@ -144,7 +145,7 @@ public interface LedgerManager extends Closeable { * @param successRc * Success RC code passed to finalCb when callback * @param failureRc - * Failure RC code passed to finalCb when exceptions occured. + * Failure RC code passed to finalCb when exceptions occurred. */ void asyncProcessLedgers(Processor processor, AsyncCallback.VoidCallback finalCb, Object context, int successRc, int failureRc); @@ -152,9 +153,12 @@ void asyncProcessLedgers(Processor processor, AsyncCallback.VoidCallback f /** * Loop to scan a range of metadata from metadata storage. * + * @param zkOpTimeOutMs + * Iterator considers timeout while fetching ledger-range from + * zk. * @return will return a iterator of the Ranges */ - LedgerRangeIterator getLedgerRanges(); + LedgerRangeIterator getLedgerRanges(long zkOpTimeOutMs); /** * Used to represent the Ledgers range returned from the diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerManagerFactory.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerManagerFactory.java index 80d3a6526f9..f1244703f87 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerManagerFactory.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerManagerFactory.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -84,7 +84,18 @@ LedgerManagerFactory initialize(AbstractConfiguration conf, * @see LedgerUnderreplicationManager */ LedgerUnderreplicationManager newLedgerUnderreplicationManager() - throws KeeperException, InterruptedException, ReplicationException.CompatibilityException; + throws ReplicationException.UnavailableException, + InterruptedException, ReplicationException.CompatibilityException; + + + /** + * Return a ledger auditor manager, which is used to + * coordinate the auto-recovery process. + * + * @return ledger auditor manager + * @see LedgerAuditorManager + */ + LedgerAuditorManager newLedgerAuditorManager() throws IOException, InterruptedException; /** * Format the ledger metadata for LedgerManager. diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerMetadataSerDe.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerMetadataSerDe.java new file mode 100644 index 00000000000..5aa0dd7c339 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerMetadataSerDe.java @@ -0,0 +1,520 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.meta; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkState; +import static java.nio.charset.StandardCharsets.UTF_8; + +import com.google.protobuf.ByteString; +import com.google.protobuf.TextFormat; +import java.io.BufferedReader; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.Base64; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; +import org.apache.bookkeeper.client.LedgerMetadataBuilder; +import org.apache.bookkeeper.client.LedgerMetadataUtils; +import org.apache.bookkeeper.client.api.DigestType; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.client.api.LedgerMetadata.State; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.DataFormats.LedgerMetadataFormat; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Serialization and deserialization for LedgerMetadata. + */ +public class LedgerMetadataSerDe { + private static final Logger log = LoggerFactory.getLogger(LedgerMetadataSerDe.class); + + /** + * Text based manual serialization. + * Available from v4.0.x onwards. + */ + public static final int METADATA_FORMAT_VERSION_1 = 1; + + /** + * Protobuf based, serialized using TextFormat. + * Available from v4.2.x onwards. + * Can contain ctime or not, but if it contains ctime it can only be parse by v4.4.x onwards. + */ + public static final int METADATA_FORMAT_VERSION_2 = 2; + + /** + * Protobuf based, serialized in binary format. + * Available from v4.9.x onwards. + */ + public static final int METADATA_FORMAT_VERSION_3 = 3; + + public static final int MAXIMUM_METADATA_FORMAT_VERSION = METADATA_FORMAT_VERSION_3; + public static final int CURRENT_METADATA_FORMAT_VERSION = METADATA_FORMAT_VERSION_3; + private static final int LOWEST_COMPAT_METADATA_FORMAT_VERSION = METADATA_FORMAT_VERSION_1; + + // for pulling the version + private static final int MAX_VERSION_DIGITS = 10; + private static final byte[] VERSION_KEY_BYTES = "BookieMetadataFormatVersion\t".getBytes(UTF_8); + private static final String LINE_SPLITTER = "\n"; + private static final byte[] LINE_SPLITTER_BYTES = LINE_SPLITTER.getBytes(UTF_8); + private static final String FIELD_SPLITTER = "\t"; + + // old V1 constants + private static final String V1_CLOSED_TAG = "CLOSED"; + private static final int V1_IN_RECOVERY_ENTRY_ID = -102; + + private static void writeHeader(OutputStream os, int version) throws IOException { + os.write(VERSION_KEY_BYTES); + os.write(String.valueOf(version).getBytes(UTF_8)); + os.write(LINE_SPLITTER_BYTES); + } + + private static int readHeader(InputStream is) throws IOException { + checkState(LINE_SPLITTER_BYTES.length == 1, "LINE_SPLITTER must be single byte"); + + for (int i = 0; i < VERSION_KEY_BYTES.length; i++) { + int b = is.read(); + if (b < 0 || ((byte) b) != VERSION_KEY_BYTES[i]) { + throw new IOException("Ledger metadata header corrupt at index " + i); + } + } + byte[] versionBuf = new byte[MAX_VERSION_DIGITS]; + int i = 0; + while (i < MAX_VERSION_DIGITS) { + int b = is.read(); + if (b == LINE_SPLITTER_BYTES[0]) { + String versionStr = new String(versionBuf, 0, i, UTF_8); + try { + return Integer.parseInt(versionStr); + } catch (NumberFormatException nfe) { + throw new IOException("Unable to parse version number from " + versionStr); + } + } else if (b < 0) { + break; + } else { + versionBuf[i++] = (byte) b; + } + } + throw new IOException("Unable to find end of version number, metadata appears corrupt"); + } + + public byte[] serialize(LedgerMetadata metadata) throws IOException { + int formatVersion = metadata.getMetadataFormatVersion(); + final byte[] serialized; + switch (formatVersion) { + case METADATA_FORMAT_VERSION_3: + serialized = serializeVersion3(metadata); + break; + case METADATA_FORMAT_VERSION_2: + serialized = serializeVersion2(metadata); + break; + case METADATA_FORMAT_VERSION_1: + serialized = serializeVersion1(metadata); + break; + default: + throw new IllegalArgumentException("Invalid format version " + formatVersion); + } + if (log.isDebugEnabled()) { + String serializedStr; + if (formatVersion > METADATA_FORMAT_VERSION_2) { + serializedStr = Base64.getEncoder().encodeToString(serialized); + } else { + serializedStr = new String(serialized, UTF_8); + } + log.debug("Serialized with format {}: {}", formatVersion, serializedStr); + } + return serialized; + } + + private static byte[] serializeVersion3(LedgerMetadata metadata) throws IOException { + try (ByteArrayOutputStream os = new ByteArrayOutputStream()) { + writeHeader(os, METADATA_FORMAT_VERSION_3); + LedgerMetadataFormat.Builder builder = LedgerMetadataFormat.newBuilder(); + builder.setQuorumSize(metadata.getWriteQuorumSize()) + .setAckQuorumSize(metadata.getAckQuorumSize()) + .setEnsembleSize(metadata.getEnsembleSize()) + .setLength(metadata.getLength()) + .setLastEntryId(metadata.getLastEntryId()); + + switch (metadata.getState()) { + case CLOSED: + builder.setState(LedgerMetadataFormat.State.CLOSED); + break; + case IN_RECOVERY: + builder.setState(LedgerMetadataFormat.State.IN_RECOVERY); + break; + case OPEN: + builder.setState(LedgerMetadataFormat.State.OPEN); + break; + default: + checkArgument(false, + String.format("Unknown state %s for protobuf serialization", metadata.getState())); + break; + } + + /** Hack to get around fact that ctime was never versioned correctly */ + if (LedgerMetadataUtils.shouldStoreCtime(metadata)) { + builder.setCtime(metadata.getCtime()); + } + + + builder.setDigestType(apiToProtoDigestType(metadata.getDigestType())); + + serializePassword(metadata.getPassword(), builder); + + Map customMetadata = metadata.getCustomMetadata(); + if (customMetadata.size() > 0) { + LedgerMetadataFormat.cMetadataMapEntry.Builder cMetadataBuilder = + LedgerMetadataFormat.cMetadataMapEntry.newBuilder(); + for (Map.Entry entry : customMetadata.entrySet()) { + cMetadataBuilder.setKey(entry.getKey()).setValue(ByteString.copyFrom(entry.getValue())); + builder.addCustomMetadata(cMetadataBuilder.build()); + } + } + + for (Map.Entry> entry : metadata.getAllEnsembles().entrySet()) { + LedgerMetadataFormat.Segment.Builder segmentBuilder = LedgerMetadataFormat.Segment.newBuilder(); + segmentBuilder.setFirstEntryId(entry.getKey()); + for (BookieId addr : entry.getValue()) { + segmentBuilder.addEnsembleMember(addr.toString()); + } + builder.addSegment(segmentBuilder.build()); + } + + builder.setCToken(metadata.getCToken()); + + builder.build().writeDelimitedTo(os); + return os.toByteArray(); + } + } + + private static byte[] serializeVersion2(LedgerMetadata metadata) throws IOException { + try (ByteArrayOutputStream os = new ByteArrayOutputStream()) { + writeHeader(os, METADATA_FORMAT_VERSION_2); + try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(os, UTF_8.name()))) { + /*********************************************************************** + * WARNING: Do not modify to add fields. + * This code is purposefully duplicated, as version 2 does not support adding + * fields, and if this code was shared with version 3, it would be easy to + * accidently add new fields and create BC issues. + **********************************************************************/ + LedgerMetadataFormat.Builder builder = LedgerMetadataFormat.newBuilder(); + builder.setQuorumSize(metadata.getWriteQuorumSize()) + .setAckQuorumSize(metadata.getAckQuorumSize()) + .setEnsembleSize(metadata.getEnsembleSize()) + .setLength(metadata.getLength()) + .setLastEntryId(metadata.getLastEntryId()); + + switch (metadata.getState()) { + case CLOSED: + builder.setState(LedgerMetadataFormat.State.CLOSED); + break; + case IN_RECOVERY: + builder.setState(LedgerMetadataFormat.State.IN_RECOVERY); + break; + case OPEN: + builder.setState(LedgerMetadataFormat.State.OPEN); + break; + default: + checkArgument(false, + String.format("Unknown state %s for protobuf serialization", metadata.getState())); + break; + } + + /** Hack to get around fact that ctime was never versioned correctly */ + if (LedgerMetadataUtils.shouldStoreCtime(metadata)) { + builder.setCtime(metadata.getCtime()); + } + + builder.setDigestType(apiToProtoDigestType(metadata.getDigestType())); + serializePassword(metadata.getPassword(), builder); + + Map customMetadata = metadata.getCustomMetadata(); + if (customMetadata.size() > 0) { + LedgerMetadataFormat.cMetadataMapEntry.Builder cMetadataBuilder = + LedgerMetadataFormat.cMetadataMapEntry.newBuilder(); + for (Map.Entry entry : customMetadata.entrySet()) { + cMetadataBuilder.setKey(entry.getKey()).setValue(ByteString.copyFrom(entry.getValue())); + builder.addCustomMetadata(cMetadataBuilder.build()); + } + } + + for (Map.Entry> entry : + metadata.getAllEnsembles().entrySet()) { + LedgerMetadataFormat.Segment.Builder segmentBuilder = LedgerMetadataFormat.Segment.newBuilder(); + segmentBuilder.setFirstEntryId(entry.getKey()); + for (BookieId addr : entry.getValue()) { + segmentBuilder.addEnsembleMember(addr.toString()); + } + builder.addSegment(segmentBuilder.build()); + } + + TextFormat.printer().print(builder.build(), writer); + writer.flush(); + } + return os.toByteArray(); + } + } + + private static byte[] serializeVersion1(LedgerMetadata metadata) throws IOException { + try (ByteArrayOutputStream os = new ByteArrayOutputStream()) { + writeHeader(os, METADATA_FORMAT_VERSION_1); + + try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(os, UTF_8.name()))) { + writer.append(String.valueOf(metadata.getWriteQuorumSize())).append(LINE_SPLITTER); + writer.append(String.valueOf(metadata.getEnsembleSize())).append(LINE_SPLITTER); + writer.append(String.valueOf(metadata.getLength())).append(LINE_SPLITTER); + + for (Map.Entry> entry : + metadata.getAllEnsembles().entrySet()) { + writer.append(String.valueOf(entry.getKey())); + for (BookieId addr : entry.getValue()) { + writer.append(FIELD_SPLITTER).append(addr.toString()); + } + writer.append(LINE_SPLITTER); + } + + if (metadata.getState() == State.IN_RECOVERY) { + writer.append(String.valueOf(V1_IN_RECOVERY_ENTRY_ID)).append(FIELD_SPLITTER).append(V1_CLOSED_TAG); + } else if (metadata.getState() == State.CLOSED) { + writer.append(String.valueOf(metadata.getLastEntryId())) + .append(FIELD_SPLITTER).append(V1_CLOSED_TAG); + } else { + checkArgument(metadata.getState() == State.OPEN, + String.format("Unknown state %s for V1 serialization", metadata.getState())); + } + writer.flush(); + } catch (UnsupportedEncodingException uee) { + throw new RuntimeException("UTF_8 should be supported everywhere"); + } + return os.toByteArray(); + } + } + + private static void serializePassword(byte[] password, LedgerMetadataFormat.Builder builder) { + if (password == null || password.length == 0) { + builder.setPassword(ByteString.EMPTY); + } else { + builder.setPassword(ByteString.copyFrom(password)); + } + } + + /** + * Parses a given byte array and transforms into a LedgerConfig object. + * + * @param bytes + * byte array to parse + * @param metadataStoreCtime + * metadata store creation time, used for legacy ledgers + * @return LedgerConfig + * @throws IOException + * if the given byte[] cannot be parsed + */ + public LedgerMetadata parseConfig(byte[] bytes, + long ledgerId, + Optional metadataStoreCtime) throws IOException { + if (log.isDebugEnabled()) { + log.debug("Deserializing {}", Base64.getEncoder().encodeToString(bytes)); + } + try (ByteArrayInputStream is = new ByteArrayInputStream(bytes)) { + int metadataFormatVersion = readHeader(is); + if (log.isDebugEnabled()) { + String contentStr = ""; + if (metadataFormatVersion <= METADATA_FORMAT_VERSION_2) { + contentStr = ", content: " + new String(bytes, UTF_8); + } + log.debug("Format version {} detected{}", metadataFormatVersion, contentStr); + } + + switch (metadataFormatVersion) { + case METADATA_FORMAT_VERSION_3: + return parseVersion3Config(ledgerId, is, metadataStoreCtime); + case METADATA_FORMAT_VERSION_2: + return parseVersion2Config(ledgerId, is, metadataStoreCtime); + case METADATA_FORMAT_VERSION_1: + return parseVersion1Config(ledgerId, is); + default: + throw new IOException( + String.format("Metadata version not compatible. Expected between %d and %d, but got %d", + LOWEST_COMPAT_METADATA_FORMAT_VERSION, CURRENT_METADATA_FORMAT_VERSION, + metadataFormatVersion)); + } + } + } + + private static LedgerMetadata parseVersion3Config(long ledgerId, InputStream is, Optional metadataStoreCtime) + throws IOException { + LedgerMetadataBuilder builder = LedgerMetadataBuilder.create() + .withId(ledgerId) + .withMetadataFormatVersion(METADATA_FORMAT_VERSION_3); + LedgerMetadataFormat.Builder formatBuilder = LedgerMetadataFormat.newBuilder(); + formatBuilder.mergeDelimitedFrom(is); + LedgerMetadataFormat data = formatBuilder.build(); + decodeFormat(data, builder); + if (data.hasCtime()) { + builder.storingCreationTime(true); + } else if (metadataStoreCtime.isPresent()) { + builder.withCreationTime(metadataStoreCtime.get()).storingCreationTime(false); + } + return builder.build(); + } + + private static LedgerMetadata parseVersion2Config(long ledgerId, InputStream is, Optional metadataStoreCtime) + throws IOException { + LedgerMetadataBuilder builder = LedgerMetadataBuilder.create() + .withId(ledgerId) + .withMetadataFormatVersion(METADATA_FORMAT_VERSION_2); + + LedgerMetadataFormat.Builder formatBuilder = LedgerMetadataFormat.newBuilder(); + try (InputStreamReader reader = new InputStreamReader(is, UTF_8.name())) { + TextFormat.merge(reader, formatBuilder); + } + LedgerMetadataFormat data = formatBuilder.build(); + decodeFormat(data, builder); + if (data.hasCtime()) { + // 'storingCreationTime' is only ever taken into account for serializing version 2 + builder.storingCreationTime(true); + } else if (metadataStoreCtime.isPresent()) { + builder.withCreationTime(metadataStoreCtime.get()).storingCreationTime(false); + } + return builder.build(); + } + + private static void decodeFormat(LedgerMetadataFormat data, LedgerMetadataBuilder builder) throws IOException { + builder.withEnsembleSize(data.getEnsembleSize()); + builder.withWriteQuorumSize(data.getQuorumSize()); + if (data.hasAckQuorumSize()) { + builder.withAckQuorumSize(data.getAckQuorumSize()); + } else { + builder.withAckQuorumSize(data.getQuorumSize()); + } + + if (data.hasCtime()) { + builder.withCreationTime(data.getCtime()); + } + + if (data.getState() == LedgerMetadataFormat.State.IN_RECOVERY) { + builder.withInRecoveryState(); + } else if (data.getState() == LedgerMetadataFormat.State.CLOSED) { + builder.withClosedState().withLastEntryId(data.getLastEntryId()).withLength(data.getLength()); + } + + if (data.hasPassword()) { + builder.withPassword(data.getPassword().toByteArray()) + .withDigestType(protoToApiDigestType(data.getDigestType())); + } + + for (LedgerMetadataFormat.Segment s : data.getSegmentList()) { + List addrs = new ArrayList<>(); + for (String addr : s.getEnsembleMemberList()) { + addrs.add(BookieId.parse(addr)); + } + builder.newEnsembleEntry(s.getFirstEntryId(), addrs); + } + + if (data.getCustomMetadataCount() > 0) { + builder.withCustomMetadata(data.getCustomMetadataList().stream().collect( + Collectors.toMap(e -> e.getKey(), + e -> e.getValue().toByteArray()))); + } + + if (data.hasCToken()) { + builder.withCToken(data.getCToken()); + } + } + + private static LedgerMetadata parseVersion1Config(long ledgerId, InputStream is) throws IOException { + try (BufferedReader reader = new BufferedReader(new InputStreamReader(is, UTF_8.name()))) { + LedgerMetadataBuilder builder = LedgerMetadataBuilder.create() + .withId(ledgerId) + .withMetadataFormatVersion(1); + int quorumSize = Integer.parseInt(reader.readLine()); + int ensembleSize = Integer.parseInt(reader.readLine()); + long length = Long.parseLong(reader.readLine()); + + builder.withEnsembleSize(ensembleSize).withWriteQuorumSize(quorumSize).withAckQuorumSize(quorumSize); + + String line = reader.readLine(); + while (line != null) { + String[] parts = line.split(FIELD_SPLITTER); + + if (parts[1].equals(V1_CLOSED_TAG)) { + Long l = Long.parseLong(parts[0]); + if (l == V1_IN_RECOVERY_ENTRY_ID) { + builder.withInRecoveryState(); + } else { + builder.withClosedState().withLastEntryId(l).withLength(length); + } + break; + } + + ArrayList addrs = new ArrayList(); + for (int j = 1; j < parts.length; j++) { + addrs.add(BookieId.parse(parts[j])); + } + builder.newEnsembleEntry(Long.parseLong(parts[0]), addrs); + + line = reader.readLine(); + } + return builder.build(); + } catch (NumberFormatException e) { + throw new IOException(e); + } + } + + private static LedgerMetadataFormat.DigestType apiToProtoDigestType(DigestType digestType) { + switch (digestType) { + case MAC: + return LedgerMetadataFormat.DigestType.HMAC; + case CRC32: + return LedgerMetadataFormat.DigestType.CRC32; + case CRC32C: + return LedgerMetadataFormat.DigestType.CRC32C; + case DUMMY: + return LedgerMetadataFormat.DigestType.DUMMY; + default: + throw new IllegalArgumentException("Unable to convert digest type " + digestType); + } + } + + private static DigestType protoToApiDigestType(LedgerMetadataFormat.DigestType digestType) { + switch (digestType) { + case HMAC: + return DigestType.MAC; + case CRC32: + return DigestType.CRC32; + case CRC32C: + return DigestType.CRC32C; + case DUMMY: + return DigestType.DUMMY; + default: + throw new IllegalArgumentException("Unable to convert digest type " + digestType); + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerUnderreplicationManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerUnderreplicationManager.java index b5447beee2f..64548fbd131 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerUnderreplicationManager.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerUnderreplicationManager.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -23,7 +23,6 @@ import java.util.List; import java.util.concurrent.CompletableFuture; import java.util.function.Predicate; - import org.apache.bookkeeper.common.concurrent.FutureUtils; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.replication.ReplicationException; @@ -43,9 +42,14 @@ default void markLedgerUnderreplicated(long ledgerId, String missingReplica) thr ledgerId, Lists.newArrayList(missingReplica)), ReplicationException.EXCEPTION_HANDLER); } + /** + * Check whether the ledger is being replicated by any bookie. + */ + boolean isLedgerBeingReplicated(long ledgerId) throws ReplicationException; + /** * Mark a ledger as underreplicated with missing bookies. The replication should then - * check which fragements are underreplicated and rereplicate them. + * check which fragments are underreplicated and rereplicate them. * * @param ledgerId ledger id * @param missingReplicas missing replicas @@ -60,6 +64,18 @@ default void markLedgerUnderreplicated(long ledgerId, String missingReplica) thr void markLedgerReplicated(long ledgerId) throws ReplicationException.UnavailableException; + /** + * Get the UnderreplicatedLedger info if this ledger is marked + * underreplicated otherwise it returns null. + * + * @param ledgerId + * ledger id + * @return the UnderreplicatedLedger info instance if this ledger is marked + * underreplicated otherwise it returns null. + * @throws ReplicationException.UnavailableException + */ + UnderreplicatedLedger getLedgerUnreplicationInfo(long ledgerId) throws ReplicationException.UnavailableException; + /** * Get a list of all the underreplicated ledgers which have been * marked for rereplication, filtered by the predicate on the missing replicas list. @@ -92,6 +108,7 @@ long getLedgerToRereplicate() long pollLedgerToRereplicate() throws ReplicationException.UnavailableException; + void acquireUnderreplicatedLedger(long ledgerId) throws ReplicationException; /** * Release a previously acquired ledger. This allows others to acquire the ledger. @@ -102,6 +119,7 @@ void releaseUnderreplicatedLedger(long ledgerId) /** * Release all resources held by the ledger underreplication manager. */ + @Override void close() throws ReplicationException.UnavailableException; @@ -168,6 +186,64 @@ boolean initializeLostBookieRecoveryDelay(int lostBookieRecoveryDelay) */ int getLostBookieRecoveryDelay() throws ReplicationException.UnavailableException; + /** + * Setter for the CheckAllLedgers last executed ctime. + * + * @param checkAllLedgersCTime + * @throws ReplicationException.UnavailableException + */ + void setCheckAllLedgersCTime(long checkAllLedgersCTime) throws ReplicationException.UnavailableException; + + /** + * Getter for the CheckAllLedgers last executed ctime. + * + * @return the long value of checkAllLedgersCTime + * @throws ReplicationException.UnavailableException + */ + long getCheckAllLedgersCTime() throws ReplicationException.UnavailableException; + + /** + * Setter for the PlacementPolicyCheck last executed ctime. + * + * @param placementPolicyCheckCTime + * @throws ReplicationException.UnavailableException + */ + void setPlacementPolicyCheckCTime(long placementPolicyCheckCTime) throws ReplicationException.UnavailableException; + + /** + * Getter for the PlacementPolicyCheck last executed ctime. + * + * @return the long value of placementPolicyCheckCTime + * @throws ReplicationException.UnavailableException + */ + long getPlacementPolicyCheckCTime() throws ReplicationException.UnavailableException; + + /** + * Setter for the ReplicasCheck last executed ctime. + * + * @param replicasCheckCTime + * @throws ReplicationException.UnavailableException + */ + void setReplicasCheckCTime(long replicasCheckCTime) throws ReplicationException.UnavailableException; + + /** + * Getter for the ReplicasCheck last executed ctime. + * + * @return the long value of replicasCheckCTime + * @throws ReplicationException.UnavailableException + */ + long getReplicasCheckCTime() throws ReplicationException.UnavailableException; + + /** + * Receive notification asynchronously when the num of under-replicated ledgers Changed. + * + * @param cb + * @throws ReplicationException.UnavailableException + */ + @Deprecated + default void notifyUnderReplicationLedgerChanged(GenericCallback cb) + throws ReplicationException.UnavailableException {} + /** * Receive notification asynchronously when the lostBookieRecoveryDelay value is Changed. * diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LegacyHierarchicalLedgerManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LegacyHierarchicalLedgerManager.java index 76ecc9d68de..52bd4588817 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LegacyHierarchicalLedgerManager.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LegacyHierarchicalLedgerManager.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -24,7 +24,6 @@ import java.util.List; import java.util.NavigableSet; import java.util.NoSuchElementException; - import org.apache.bookkeeper.conf.AbstractConfiguration; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.Processor; import org.apache.bookkeeper.util.StringUtils; @@ -153,8 +152,8 @@ protected String getLedgerParentNodeRegex() { } @Override - public LedgerRangeIterator getLedgerRanges() { - return new LegacyHierarchicalLedgerRangeIterator(); + public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) { + return new LegacyHierarchicalLedgerRangeIterator(zkOpTimeoutMs); } /** @@ -166,6 +165,11 @@ private class LegacyHierarchicalLedgerRangeIterator implements LedgerRangeIterat private String curL1Nodes = ""; private boolean iteratorDone = false; private LedgerRange nextRange = null; + private final long zkOpTimeoutMs; + + public LegacyHierarchicalLedgerRangeIterator(long zkOpTimeoutMs) { + this.zkOpTimeoutMs = zkOpTimeoutMs; + } /** * Iterate next level1 znode. @@ -261,7 +265,7 @@ LedgerRange getLedgerRangeByLevel(final String level1, final String level2) String nodePath = nodeBuilder.toString(); List ledgerNodes = null; try { - ledgerNodes = ZkUtils.getChildrenInSingleNode(zk, nodePath); + ledgerNodes = ZkUtils.getChildrenInSingleNode(zk, nodePath, zkOpTimeoutMs); } catch (KeeperException.NoNodeException e) { /* If the node doesn't exist, we must have raced with a recursive node removal, just * return an empty list. */ diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LegacyHierarchicalLedgerManagerFactory.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LegacyHierarchicalLedgerManagerFactory.java index 91579735b05..1c4ec6021fe 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LegacyHierarchicalLedgerManagerFactory.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LegacyHierarchicalLedgerManagerFactory.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -21,12 +21,12 @@ import java.io.IOException; import java.util.List; - import org.apache.bookkeeper.conf.AbstractConfiguration; +import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; import org.apache.bookkeeper.replication.ReplicationException; +import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.util.ZkUtils; -import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.data.ACL; /** @@ -78,6 +78,12 @@ public LedgerIdGenerator newLedgerIdGenerator() { zkAcls); } + @Override + public LedgerAuditorManager newLedgerAuditorManager() { + ServerConfiguration serverConfiguration = new ServerConfiguration(conf); + return new ZkLedgerAuditorManager(zk, serverConfiguration, NullStatsLogger.INSTANCE); + } + @Override public LedgerManager newLedgerManager() { return new LegacyHierarchicalLedgerManager(conf, zk); @@ -85,7 +91,8 @@ public LedgerManager newLedgerManager() { @Override public LedgerUnderreplicationManager newLedgerUnderreplicationManager() - throws KeeperException, InterruptedException, ReplicationException.CompatibilityException{ + throws ReplicationException.UnavailableException, InterruptedException, + ReplicationException.CompatibilityException{ return new ZkLedgerUnderreplicationManager(conf, zk); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LongHierarchicalLedgerManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LongHierarchicalLedgerManager.java index 2e69e90a5c1..ca9216d8baa 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LongHierarchicalLedgerManager.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LongHierarchicalLedgerManager.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -24,7 +24,6 @@ import java.util.List; import java.util.NoSuchElementException; import java.util.Set; - import org.apache.bookkeeper.conf.AbstractConfiguration; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.Processor; import org.apache.bookkeeper.util.StringUtils; @@ -139,8 +138,8 @@ public void process(String lNode, VoidCallback cb) { } @Override - public LedgerRangeIterator getLedgerRanges() { - return new LongHierarchicalLedgerRangeIterator(); + public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) { + return new LongHierarchicalLedgerRangeIterator(zkOpTimeoutMs); } @@ -149,6 +148,7 @@ public LedgerRangeIterator getLedgerRanges() { */ private class LongHierarchicalLedgerRangeIterator implements LedgerRangeIterator { LedgerRangeIterator rootIterator; + final long zkOpTimeoutMs; /** * Returns all children with path as a parent. If path is non-existent, @@ -162,7 +162,7 @@ private class LongHierarchicalLedgerRangeIterator implements LedgerRangeIterator */ List getChildrenAt(String path) throws IOException { try { - List children = ZkUtils.getChildrenInSingleNode(zk, path); + List children = ZkUtils.getChildrenInSingleNode(zk, path, zkOpTimeoutMs); Collections.sort(children); return children; } catch (KeeperException.NoNodeException e) { @@ -284,7 +284,9 @@ public LedgerRange next() throws IOException { } } - private LongHierarchicalLedgerRangeIterator() {} + private LongHierarchicalLedgerRangeIterator(long zkOpTimeoutMs) { + this.zkOpTimeoutMs = zkOpTimeoutMs; + } private void bootstrap() throws IOException { if (rootIterator == null) { diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LongHierarchicalLedgerManagerFactory.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LongHierarchicalLedgerManagerFactory.java index 93ad9ddc681..a6978628665 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LongHierarchicalLedgerManagerFactory.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LongHierarchicalLedgerManagerFactory.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LongZkLedgerIdGenerator.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LongZkLedgerIdGenerator.java index d959c337fb9..c42764a0f6d 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LongZkLedgerIdGenerator.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LongZkLedgerIdGenerator.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -21,7 +21,6 @@ import java.util.ArrayList; import java.util.List; import java.util.Optional; - import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.util.ZkUtils; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/MSLedgerManagerFactory.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/MSLedgerManagerFactory.java index 5b28a0be487..bc9ae3a35a2 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/MSLedgerManagerFactory.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/MSLedgerManagerFactory.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -21,15 +21,16 @@ import static org.apache.bookkeeper.metastore.MetastoreTable.ALL_FIELDS; import static org.apache.bookkeeper.metastore.MetastoreTable.NON_FIELDS; -import com.google.common.base.Optional; import com.google.common.util.concurrent.ThreadFactoryBuilder; import java.io.IOException; import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Optional; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.CountDownLatch; @@ -38,8 +39,10 @@ import java.util.concurrent.TimeUnit; import lombok.extern.slf4j.Slf4j; import org.apache.bookkeeper.client.BKException; -import org.apache.bookkeeper.client.LedgerMetadata; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.common.concurrent.FutureUtils; import org.apache.bookkeeper.conf.AbstractConfiguration; +import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; import org.apache.bookkeeper.metastore.MSException; import org.apache.bookkeeper.metastore.MSWatchedEvent; @@ -55,10 +58,10 @@ import org.apache.bookkeeper.metastore.MetastoreUtils; import org.apache.bookkeeper.metastore.MetastoreWatcher; import org.apache.bookkeeper.metastore.Value; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.LedgerMetadataListener; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.Processor; import org.apache.bookkeeper.replication.ReplicationException; +import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.util.BookKeeperConstants; import org.apache.bookkeeper.util.StringUtils; import org.apache.bookkeeper.util.ZkUtils; @@ -209,7 +212,7 @@ public LedgerIdGenerator newLedgerIdGenerator() { static class MsLedgerManager implements LedgerManager, MetastoreWatcher { final ZooKeeper zk; final AbstractConfiguration conf; - + private final LedgerMetadataSerDe serDe; final MetaStore metastore; final MetastoreScannableTable ledgerTable; final int maxEntriesPerScan; @@ -224,7 +227,7 @@ static class MsLedgerManager implements LedgerManager, MetastoreWatcher { // callbacks ScheduledExecutorService scheduler; - protected class ReadLedgerMetadataTask implements Runnable, GenericCallback { + protected class ReadLedgerMetadataTask implements Runnable { final long ledgerId; @@ -238,7 +241,8 @@ public void run() { if (LOG.isDebugEnabled()) { LOG.debug("Re-read ledger metadata for {}.", ledgerId); } - readLedgerMetadata(ledgerId, ReadLedgerMetadataTask.this); + readLedgerMetadata(ledgerId).whenComplete( + (metadata, exception) -> handleMetadata(metadata, exception)); } else { if (LOG.isDebugEnabled()) { LOG.debug("Ledger metadata listener for ledger {} is already removed.", ledgerId); @@ -246,26 +250,23 @@ public void run() { } } - @Override - public void operationComplete(int rc, final LedgerMetadata result) { - if (BKException.Code.OK == rc) { + private void handleMetadata(Versioned metadata, Throwable exception) { + if (exception == null) { final Set listenerSet = listeners.get(ledgerId); if (null != listenerSet) { if (LOG.isDebugEnabled()) { - LOG.debug("Ledger metadata is changed for {} : {}.", ledgerId, result); + LOG.debug("Ledger metadata is changed for {} : {}.", ledgerId, metadata.getValue()); } - scheduler.submit(new Runnable() { - @Override - public void run() { + scheduler.submit(() -> { synchronized (listenerSet) { for (LedgerMetadataListener listener : listenerSet) { - listener.onChanged(ledgerId, result); + listener.onChanged(ledgerId, metadata); } } - } - }); + }); } - } else if (BKException.Code.NoSuchLedgerExistsException == rc) { + } else if (BKException.getExceptionCode(exception) + == BKException.Code.NoSuchLedgerExistsOnMetadataServerException) { // the ledger is removed, do nothing Set listenerSet = listeners.remove(ledgerId); if (null != listenerSet) { @@ -275,7 +276,8 @@ public void run() { } } } else { - LOG.warn("Failed on read ledger metadata of ledger {} : {}", ledgerId, rc); + LOG.warn("Failed on read ledger metadata of ledger {}: {}", + ledgerId, BKException.getExceptionCode(exception)); scheduler.schedule(this, MS_CONNECT_BACKOFF_MS, TimeUnit.MILLISECONDS); } } @@ -285,6 +287,7 @@ public void run() { this.conf = conf; this.zk = zk; this.metastore = metastore; + this.serDe = new LedgerMetadataSerDe(); try { ledgerTable = metastore.createScannableTable(TABLE_NAME); @@ -375,119 +378,132 @@ public void close() { } @Override - public void createLedgerMetadata(final long lid, final LedgerMetadata metadata, - final GenericCallback ledgerCb) { + public CompletableFuture> createLedgerMetadata(long lid, LedgerMetadata metadata) { + CompletableFuture> promise = new CompletableFuture<>(); MetastoreCallback msCallback = new MetastoreCallback() { @Override public void complete(int rc, Version version, Object ctx) { if (MSException.Code.BadVersion.getCode() == rc) { - ledgerCb.operationComplete(BKException.Code.MetadataVersionException, null); + promise.completeExceptionally(new BKException.BKMetadataVersionException()); return; } if (MSException.Code.OK.getCode() != rc) { - ledgerCb.operationComplete(BKException.Code.MetaStoreException, null); + promise.completeExceptionally(new BKException.MetaStoreException()); return; } if (LOG.isDebugEnabled()) { LOG.debug("Create ledger {} with version {} successfully.", lid, version); } - // update version - metadata.setVersion(version); - ledgerCb.operationComplete(BKException.Code.OK, metadata); + promise.complete(new Versioned<>(metadata, version)); } }; - ledgerTable.put(ledgerId2Key(lid), new Value().setField(META_FIELD, metadata.serialize()), - Version.NEW, msCallback, null); + final byte[] bytes; + try { + bytes = serDe.serialize(metadata); + } catch (IOException ioe) { + promise.completeExceptionally(new BKException.BKMetadataSerializationException(ioe)); + return promise; + } + ledgerTable.put(ledgerId2Key(lid), new Value().setField(META_FIELD, bytes), + Version.NEW, msCallback, null); + return promise; } @Override - public void removeLedgerMetadata(final long ledgerId, final Version version, - final GenericCallback cb) { + public CompletableFuture removeLedgerMetadata(final long ledgerId, final Version version) { + CompletableFuture promise = new CompletableFuture<>(); MetastoreCallback msCallback = new MetastoreCallback() { @Override public void complete(int rc, Void value, Object ctx) { int bkRc; if (MSException.Code.NoKey.getCode() == rc) { LOG.warn("Ledger entry does not exist in meta table: ledgerId={}", ledgerId); - bkRc = BKException.Code.NoSuchLedgerExistsException; + promise.completeExceptionally(new BKException.BKNoSuchLedgerExistsOnMetadataServerException()); } else if (MSException.Code.OK.getCode() == rc) { - bkRc = BKException.Code.OK; + FutureUtils.complete(promise, null); } else { - bkRc = BKException.Code.MetaStoreException; + promise.completeExceptionally(new BKException.BKMetadataSerializationException()); } - cb.operationComplete(bkRc, (Void) null); } }; ledgerTable.remove(ledgerId2Key(ledgerId), version, msCallback, null); + return promise; } @Override - public void readLedgerMetadata(final long ledgerId, final GenericCallback readCb) { + public CompletableFuture> readLedgerMetadata(final long ledgerId) { final String key = ledgerId2Key(ledgerId); + CompletableFuture> promise = new CompletableFuture<>(); MetastoreCallback> msCallback = new MetastoreCallback>() { @Override public void complete(int rc, Versioned value, Object ctx) { if (MSException.Code.NoKey.getCode() == rc) { LOG.error("No ledger metadata found for ledger " + ledgerId + " : ", MSException.create(MSException.Code.get(rc), "No key " + key + " found.")); - readCb.operationComplete(BKException.Code.NoSuchLedgerExistsException, null); + promise.completeExceptionally(new BKException.BKNoSuchLedgerExistsOnMetadataServerException()); return; } if (MSException.Code.OK.getCode() != rc) { LOG.error("Could not read metadata for ledger " + ledgerId + " : ", MSException.create(MSException.Code.get(rc), "Failed to get key " + key)); - readCb.operationComplete(BKException.Code.MetaStoreException, null); + promise.completeExceptionally(new BKException.MetaStoreException()); return; } - LedgerMetadata metadata; try { - metadata = LedgerMetadata.parseConfig(value.getValue().getField(META_FIELD), - value.getVersion(), Optional.absent()); + LedgerMetadata metadata = serDe.parseConfig( + value.getValue().getField(META_FIELD), ledgerId, Optional.empty()); + promise.complete(new Versioned<>(metadata, value.getVersion())); } catch (IOException e) { LOG.error("Could not parse ledger metadata for ledger " + ledgerId + " : ", e); - readCb.operationComplete(BKException.Code.MetaStoreException, null); - return; + promise.completeExceptionally(new BKException.MetaStoreException()); } - readCb.operationComplete(BKException.Code.OK, metadata); } }; ledgerTable.get(key, this, msCallback, ALL_FIELDS); + return promise; } @Override - public void writeLedgerMetadata(final long ledgerId, final LedgerMetadata metadata, - final GenericCallback cb) { - Value data = new Value().setField(META_FIELD, metadata.serialize()); + public CompletableFuture> writeLedgerMetadata(long ledgerId, LedgerMetadata metadata, + Version currentVersion) { + + CompletableFuture> promise = new CompletableFuture<>(); + final byte[] bytes; + try { + bytes = serDe.serialize(metadata); + } catch (IOException ioe) { + promise.completeExceptionally(new BKException.MetaStoreException(ioe)); + return promise; + } + + Value data = new Value().setField(META_FIELD, bytes); if (LOG.isDebugEnabled()) { - LOG.debug("Writing ledger {} metadata, version {}", new Object[] { ledgerId, metadata.getVersion() }); + LOG.debug("Writing ledger {} metadata, version {}", new Object[] { ledgerId, currentVersion }); } final String key = ledgerId2Key(ledgerId); MetastoreCallback msCallback = new MetastoreCallback() { @Override public void complete(int rc, Version version, Object ctx) { - int bkRc; if (MSException.Code.BadVersion.getCode() == rc) { - LOG.info("Bad version provided to updat metadata for ledger {}", ledgerId); - bkRc = BKException.Code.MetadataVersionException; + LOG.info("Bad version provided to update metadata for ledger {}", ledgerId); + promise.completeExceptionally(new BKException.BKMetadataVersionException()); } else if (MSException.Code.NoKey.getCode() == rc) { LOG.warn("Ledger {} doesn't exist when writing its ledger metadata.", ledgerId); - bkRc = BKException.Code.NoSuchLedgerExistsException; + promise.completeExceptionally(new BKException.BKNoSuchLedgerExistsOnMetadataServerException()); } else if (MSException.Code.OK.getCode() == rc) { - metadata.setVersion(version); - bkRc = BKException.Code.OK; + promise.complete(new Versioned<>(metadata, version)); } else { LOG.warn("Conditional update ledger metadata failed: ", MSException.create(MSException.Code.get(rc), "Failed to put key " + key)); - bkRc = BKException.Code.MetaStoreException; + promise.completeExceptionally(new BKException.MetaStoreException()); } - - cb.operationComplete(bkRc, metadata); } }; - ledgerTable.put(key, data, metadata.getVersion(), msCallback, null); + ledgerTable.put(key, data, currentVersion, msCallback, null); + return promise; } @Override @@ -628,7 +644,7 @@ public LedgerRange next() throws IOException { } @Override - public LedgerRangeIterator getLedgerRanges() { + public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) { return new MSLedgerRangeIterator(); } @@ -639,16 +655,13 @@ public LedgerRangeIterator getLedgerRanges() { * Znode Name * @return true if the znode is a special znode otherwise false */ - public static boolean isSpecialZnode(String znode) { - if (BookKeeperConstants.AVAILABLE_NODE.equals(znode) - || BookKeeperConstants.COOKIE_NODE.equals(znode) - || BookKeeperConstants.LAYOUT_ZNODE.equals(znode) - || BookKeeperConstants.INSTANCEID.equals(znode) - || BookKeeperConstants.UNDER_REPLICATION_NODE.equals(znode) - || MsLedgerManager.IDGEN_ZNODE.equals(znode)) { - return true; - } - return false; + public static boolean isSpecialZnode(String znode) { + return BookKeeperConstants.AVAILABLE_NODE.equals(znode) + || BookKeeperConstants.COOKIE_NODE.equals(znode) + || BookKeeperConstants.LAYOUT_ZNODE.equals(znode) + || BookKeeperConstants.INSTANCEID.equals(znode) + || BookKeeperConstants.UNDER_REPLICATION_NODE.equals(znode) + || MsLedgerManager.IDGEN_ZNODE.equals(znode); } } @@ -658,7 +671,8 @@ public LedgerManager newLedgerManager() { } @Override - public LedgerUnderreplicationManager newLedgerUnderreplicationManager() throws KeeperException, + public LedgerUnderreplicationManager newLedgerUnderreplicationManager() + throws ReplicationException.UnavailableException, InterruptedException, ReplicationException.CompatibilityException { // TODO: currently just use zk ledger underreplication manager return new ZkLedgerUnderreplicationManager(conf, zk); @@ -747,7 +761,7 @@ public void format(AbstractConfiguration conf, LayoutManager layoutManager) try { MetastoreUtils.cleanTable(ledgerTable, conf.getMetastoreMaxEntriesPerScan()); } catch (MSException mse) { - throw new IOException("Exception when cleanning up table " + TABLE_NAME, mse); + throw new IOException("Exception when cleaning up table " + TABLE_NAME, mse); } LOG.info("Finished cleaning up table {}.", TABLE_NAME); // Delete and recreate the LAYOUT information. @@ -804,4 +818,10 @@ public boolean validateAndNukeExistingCluster(AbstractConfiguration conf, Lay zkServers, zkLedgersRootPath); return true; } + + @Override + public LedgerAuditorManager newLedgerAuditorManager() { + ServerConfiguration serverConfiguration = new ServerConfiguration(conf); + return new ZkLedgerAuditorManager(zk, serverConfiguration, NullStatsLogger.INSTANCE); + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/MetadataBookieDriver.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/MetadataBookieDriver.java index b4d0e0d1b27..c9ad734c29e 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/MetadataBookieDriver.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/MetadataBookieDriver.java @@ -18,9 +18,10 @@ */ package org.apache.bookkeeper.meta; +import java.util.concurrent.CompletableFuture; +import org.apache.bookkeeper.common.concurrent.FutureUtils; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.discover.RegistrationManager; -import org.apache.bookkeeper.discover.RegistrationManager.RegistrationListener; import org.apache.bookkeeper.meta.exceptions.MetadataException; import org.apache.bookkeeper.stats.StatsLogger; @@ -33,12 +34,10 @@ public interface MetadataBookieDriver extends AutoCloseable { * Initialize the metadata driver. * * @param conf configuration - * @param listener registration listener listening on registration state changes. * @param statsLogger stats logger * @return metadata driver */ MetadataBookieDriver initialize(ServerConfiguration conf, - RegistrationListener listener, StatsLogger statsLogger) throws MetadataException; @@ -50,11 +49,11 @@ MetadataBookieDriver initialize(ServerConfiguration conf, String getScheme(); /** - * Return the registration manager used for registering/unregistering bookies. + * Create the registration manager used for registering/unregistering bookies. * * @return the registration manager used for registering/unregistering bookies. */ - RegistrationManager getRegistrationManager(); + RegistrationManager createRegistrationManager(); /** * Return the ledger manager factory used for accessing ledger metadata. @@ -71,6 +70,31 @@ LedgerManagerFactory getLedgerManagerFactory() */ LayoutManager getLayoutManager(); + /** + * Return health check is enable or disable. + * + * @return true if health check is enable, otherwise false. + */ + default CompletableFuture isHealthCheckEnabled() { + return FutureUtils.value(true); + } + + /** + * Disable health check. + */ + default CompletableFuture disableHealthCheck() { + CompletableFuture result = new CompletableFuture<>(); + result.completeExceptionally(new Exception("disableHealthCheck is not supported by this metadata driver")); + return result; + } + + /** + * Enable health check. + */ + default CompletableFuture enableHealthCheck() { + return FutureUtils.Void(); + } + @Override void close(); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/MetadataClientDriver.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/MetadataClientDriver.java index b53836790cc..1615f04ecdc 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/MetadataClientDriver.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/MetadataClientDriver.java @@ -19,9 +19,11 @@ package org.apache.bookkeeper.meta; import java.util.Optional; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ScheduledExecutorService; import org.apache.bookkeeper.common.annotation.InterfaceAudience.LimitedPrivate; import org.apache.bookkeeper.common.annotation.InterfaceStability.Evolving; +import org.apache.bookkeeper.common.concurrent.FutureUtils; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.discover.RegistrationClient; import org.apache.bookkeeper.meta.exceptions.MetadataException; @@ -84,4 +86,32 @@ LedgerManagerFactory getLedgerManagerFactory() @Override void close(); + /** + * State Listener on listening the metadata client session states. + */ + @FunctionalInterface + interface SessionStateListener { + + /** + * Signal when client session is expired. + */ + void onSessionExpired(); + } + + /** + * sets session state listener. + * + * @param sessionStateListener + * listener listening on metadata client session states. + */ + void setSessionStateListener(SessionStateListener sessionStateListener); + + /** + * Return health check is enable or disable. + * + * @return true if health check is enable, otherwise false. + */ + default CompletableFuture isHealthCheckEnabled() { + return FutureUtils.value(true); + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/MetadataDrivers.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/MetadataDrivers.java index bab6d8423bb..e85b4ec2228 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/MetadataDrivers.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/MetadataDrivers.java @@ -24,7 +24,9 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Sets; import com.google.common.util.concurrent.UncheckedExecutionException; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.net.URI; +import java.util.Collections; import java.util.Optional; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; @@ -37,13 +39,13 @@ import lombok.NoArgsConstructor; import lombok.ToString; import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.common.util.ReflectionUtils; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.discover.RegistrationManager; import org.apache.bookkeeper.meta.exceptions.Code; import org.apache.bookkeeper.meta.exceptions.MetadataException; import org.apache.bookkeeper.stats.NullStatsLogger; -import org.apache.bookkeeper.util.ReflectionUtils; import org.apache.commons.configuration.ConfigurationException; import org.apache.commons.lang3.StringUtils; @@ -90,27 +92,18 @@ static class MetadataBookieDriverInfo { private static final ConcurrentMap clientDrivers; @Getter(AccessLevel.PACKAGE) private static final ConcurrentMap bookieDrivers; - private static boolean initialized = false; static { clientDrivers = new ConcurrentHashMap<>(); bookieDrivers = new ConcurrentHashMap<>(); - initialize(); - } - - static void initialize() { - if (initialized) { - return; - } loadInitialDrivers(); - initialized = true; - log.info("BookKeeper metadata driver manager initialized"); } @VisibleForTesting static void loadInitialDrivers() { loadInitialClientDrivers(); loadInitialBookieDrivers(); + log.info("BookKeeper metadata driver manager initialized"); } private static void loadInitialClientDrivers() { @@ -123,9 +116,7 @@ private static void loadInitialClientDrivers() { String driversStr = System.getProperty(BK_METADATA_CLIENT_DRIVERS_PROPERTY); if (null != driversStr) { String[] driversArray = StringUtils.split(driversStr, ':'); - for (String driver : driversArray) { - driverList.add(driver); - } + Collections.addAll(driverList, driversArray); } // initialize the drivers @@ -152,9 +143,7 @@ private static void loadInitialBookieDrivers() { String driversStr = System.getProperty(BK_METADATA_BOOKIE_DRIVERS_PROPERTY); if (null != driversStr) { String[] driversArray = StringUtils.split(driversStr, ':'); - for (String driver : driversArray) { - driverList.add(driver); - } + Collections.addAll(driverList, driversArray); } // initialize the drivers @@ -186,10 +175,6 @@ public static void registerClientDriver(String metadataBackendScheme, public static void registerClientDriver(String metadataBackendScheme, Class driver, boolean allowOverride) { - if (!initialized) { - initialize(); - } - String scheme = metadataBackendScheme.toLowerCase(); MetadataClientDriverInfo oldDriverInfo = clientDrivers.get(scheme); if (null != oldDriverInfo && !allowOverride) { @@ -198,9 +183,13 @@ public static void registerClientDriver(String metadataBackendScheme, MetadataClientDriverInfo newDriverInfo = new MetadataClientDriverInfo(driver); oldDriverInfo = clientDrivers.putIfAbsent(scheme, newDriverInfo); if (null != oldDriverInfo) { - log.debug("Metadata client driver for {} is already there.", scheme); + if (log.isDebugEnabled()) { + log.debug("Metadata client driver for {} is already there.", scheme); + } if (allowOverride) { - log.debug("Overriding client driver for {}", scheme); + if (log.isDebugEnabled()) { + log.debug("Overriding client driver for {}", scheme); + } clientDrivers.put(scheme, newDriverInfo); } } @@ -221,10 +210,6 @@ public static void registerBookieDriver(String metadataBackendScheme, public static void registerBookieDriver(String metadataBackendScheme, Class driver, boolean allowOverride) { - if (!initialized) { - initialize(); - } - String scheme = metadataBackendScheme.toLowerCase(); MetadataBookieDriverInfo oldDriverInfo = bookieDrivers.get(scheme); if (null != oldDriverInfo && !allowOverride) { @@ -233,9 +218,13 @@ public static void registerBookieDriver(String metadataBackendScheme, MetadataBookieDriverInfo newDriverInfo = new MetadataBookieDriverInfo(driver); oldDriverInfo = bookieDrivers.putIfAbsent(scheme, newDriverInfo); if (null != oldDriverInfo) { - log.debug("Metadata bookie driver for {} is already there.", scheme); + if (log.isDebugEnabled()) { + log.debug("Metadata bookie driver for {} is already there.", scheme); + } if (allowOverride) { - log.debug("Overriding bookie driver for {}", scheme); + if (log.isDebugEnabled()) { + log.debug("Overriding bookie driver for {}", scheme); + } bookieDrivers.put(scheme, newDriverInfo); } } @@ -250,9 +239,6 @@ public static void registerBookieDriver(String metadataBackendScheme, */ public static MetadataClientDriver getClientDriver(String scheme) { checkNotNull(scheme, "Client Driver Scheme is null"); - if (!initialized) { - initialize(); - } MetadataClientDriverInfo driverInfo = clientDrivers.get(scheme.toLowerCase()); if (null == driverInfo) { throw new IllegalArgumentException("Unknown backend " + scheme); @@ -290,9 +276,6 @@ public static MetadataClientDriver getClientDriver(URI uri) { */ public static MetadataBookieDriver getBookieDriver(String scheme) { checkNotNull(scheme, "Bookie Driver Scheme is null"); - if (!initialized) { - initialize(); - } MetadataBookieDriverInfo driverInfo = bookieDrivers.get(scheme.toLowerCase()); if (null == driverInfo) { throw new IllegalArgumentException("Unknown backend " + scheme); @@ -331,6 +314,7 @@ public static MetadataBookieDriver getBookieDriver(URI uri) { * @throws MetadataException when failed to access metadata store * @throws ExecutionException exception thrown when processing function. */ + @SuppressFBWarnings("RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE") public static T runFunctionWithMetadataClientDriver(ClientConfiguration conf, Function function, ScheduledExecutorService executorService) @@ -362,13 +346,14 @@ public static T runFunctionWithMetadataClientDriver(ClientConfiguration conf * @throws MetadataException when failed to access metadata store * @throws ExecutionException exception thrown when processing function. */ + @SuppressFBWarnings("RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE") public static T runFunctionWithMetadataBookieDriver(ServerConfiguration conf, Function function) throws MetadataException, ExecutionException { try (MetadataBookieDriver driver = MetadataDrivers.getBookieDriver( URI.create(conf.getMetadataServiceUri()) )) { - driver.initialize(conf, () -> {}, NullStatsLogger.INSTANCE); + driver.initialize(conf, NullStatsLogger.INSTANCE); try { return function.apply(driver); } catch (Exception uee) { @@ -395,7 +380,12 @@ public static T runFunctionWithMetadataBookieDriver(ServerConfiguration conf public static T runFunctionWithRegistrationManager(ServerConfiguration conf, Function function) throws MetadataException, ExecutionException { - return runFunctionWithMetadataBookieDriver(conf, driver -> function.apply(driver.getRegistrationManager())); + return runFunctionWithMetadataBookieDriver( + conf, driver -> { + try (RegistrationManager rm = driver.createRegistrationManager()) { + return function.apply(rm); + } + }); } /** diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/NullMetadataBookieDriver.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/NullMetadataBookieDriver.java new file mode 100644 index 00000000000..7d2d84381da --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/NullMetadataBookieDriver.java @@ -0,0 +1,399 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.meta; + +import java.io.IOException; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.concurrent.CompletableFuture; +import java.util.function.Consumer; +import java.util.function.Predicate; +import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.conf.AbstractConfiguration; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.discover.BookieServiceInfo; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.LedgerMetadataListener; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.Processor; +import org.apache.bookkeeper.replication.ReplicationException; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; +import org.apache.zookeeper.AsyncCallback; + +/** + * A no-op implementation of MetadataBookieDriver. + */ +public class NullMetadataBookieDriver implements MetadataBookieDriver { + @Override + public MetadataBookieDriver initialize(ServerConfiguration conf, + StatsLogger statsLogger) { + return this; + } + + @Override + public String getScheme() { + return "null"; + } + + @Override + public RegistrationManager createRegistrationManager() { + return new NullRegistrationManager(); + } + + @Override + public LedgerManagerFactory getLedgerManagerFactory() { + return new NullLedgerManagerFactory(); + } + + @Override + public LayoutManager getLayoutManager() { + return new NullLayoutManager(); + } + + @Override + public void close() {} + + /** + * A no-op implementation of LedgerManagerFactory. + */ + public static class NullLedgerManagerFactory implements LedgerManagerFactory { + @Override + public int getCurrentVersion() { + return 1; + } + @Override + public LedgerManagerFactory initialize(AbstractConfiguration conf, + LayoutManager layoutManager, + int factoryVersion) { + return this; + } + @Override + public void close() {} + @Override + public LedgerIdGenerator newLedgerIdGenerator() { + return new NullLedgerIdGenerator(); + } + + @Override + public LedgerManager newLedgerManager() { + return new NullLedgerManager(); + } + + @Override + public LedgerUnderreplicationManager newLedgerUnderreplicationManager() { + return new NullLedgerUnderreplicationManager(); + } + + @Override + public LedgerAuditorManager newLedgerAuditorManager() throws IOException, InterruptedException { + return new NullLedgerAuditorManager(); + } + + @Override + public void format(AbstractConfiguration conf, LayoutManager lm) {} + @Override + public boolean validateAndNukeExistingCluster(AbstractConfiguration conf, + LayoutManager lm) { + return false; + } + } + + /** + * A no-op implementation of LedgerAuditorManager. + */ + public static class NullLedgerAuditorManager implements LedgerAuditorManager { + + @Override + public void tryToBecomeAuditor(String bookieId, Consumer listener) + throws IOException, InterruptedException { + // no-op + } + + @Override + public BookieId getCurrentAuditor() throws IOException, InterruptedException { + return BookieId.parse("127.0.0.1:3181"); + } + + @Override + public void close() throws Exception { + // no-op + } + } + + /** + * A no-op implementation of LayoutManager. + */ + public static class NullLayoutManager implements LayoutManager { + @Override + public LedgerLayout readLedgerLayout() { + return new LedgerLayout("null", -1); + } + + @Override + public void storeLedgerLayout(LedgerLayout layout) { } + + @Override + public void deleteLedgerLayout() { } + } + + /** + * A no-op implementation of RegistrationManager. + */ + public static class NullRegistrationManager implements RegistrationManager { + @Override + public void close() {} + + @Override + public String getClusterInstanceId() { + return "null"; + } + + @Override + public void registerBookie(BookieId bookieId, boolean readOnly, BookieServiceInfo bookieService) {} + + @Override + public void unregisterBookie(BookieId bookieId, boolean readOnly) {} + + @Override + public boolean isBookieRegistered(BookieId bookieId) { + return false; + } + + @Override + public void writeCookie(BookieId bookieId, Versioned cookieData) throws BookieException { + + } + + @Override + public Versioned readCookie(BookieId bookieId) throws BookieException { + return null; + } + + @Override + public void removeCookie(BookieId bookieId, Version version) {} + + @Override + public boolean prepareFormat() { + return false; + } + + @Override + public boolean initNewCluster() { + return false; + } + + @Override + public boolean format() { + return false; + } + + @Override + public boolean nukeExistingCluster() { + return false; + } + + @Override + public void addRegistrationListener(RegistrationListener listener) {} + } + + /** + * A no-op implementation of LedgerIdGenerator. + */ + public static class NullLedgerIdGenerator implements LedgerIdGenerator { + @Override + public void close() {} + @Override + public void generateLedgerId(GenericCallback cb) { + cb.operationComplete(BKException.Code.IllegalOpException, null); + } + } + + /** + * A no-op implementation of LedgerManager. + */ + public static class NullLedgerManager implements LedgerManager { + private CompletableFuture> illegalOp() { + CompletableFuture> promise = new CompletableFuture<>(); + promise.completeExceptionally(new BKException.BKIllegalOpException()); + return promise; + } + + @Override + public CompletableFuture> createLedgerMetadata(long ledgerId, + LedgerMetadata metadata) { + return illegalOp(); + } + @Override + public CompletableFuture removeLedgerMetadata(long ledgerId, Version version) { + CompletableFuture promise = new CompletableFuture<>(); + promise.completeExceptionally(new BKException.BKIllegalOpException()); + return promise; + } + + @Override + public CompletableFuture> readLedgerMetadata(long ledgerId) { + return illegalOp(); + } + + @Override + public CompletableFuture> writeLedgerMetadata( + long ledgerId, LedgerMetadata metadata, Version currentVersion) { + return illegalOp(); + } + + @Override + public void registerLedgerMetadataListener(long ledgerId, + LedgerMetadataListener listener) {} + @Override + public void unregisterLedgerMetadataListener(long ledgerId, + LedgerMetadataListener listener) {} + @Override + public void asyncProcessLedgers(Processor processor, + AsyncCallback.VoidCallback finalCb, + Object context, int successRc, int failureRc) {} + @Override + public LedgerManager.LedgerRangeIterator getLedgerRanges(long zkOpTimeOutMs) { + return new LedgerManager.LedgerRangeIterator() { + @Override + public boolean hasNext() { + return false; + } + @Override + public LedgerManager.LedgerRange next() { + throw new NoSuchElementException(); + } + }; + } + + @Override + public void close() {} + } + + /** + * A no-op implementation of LedgerUnderreplicationManager. + */ + public static class NullLedgerUnderreplicationManager implements LedgerUnderreplicationManager { + @Override + public boolean isLedgerBeingReplicated(long ledgerId) throws ReplicationException { + return false; + } + + @Override + public CompletableFuture markLedgerUnderreplicatedAsync(long ledgerId, + Collection missingReplicas) { + CompletableFuture promise = new CompletableFuture<>(); + promise.completeExceptionally(new ReplicationException.UnavailableException("null")); + return promise; + } + @Override + public void markLedgerReplicated(long ledgerId) + throws ReplicationException.UnavailableException { + throw new ReplicationException.UnavailableException("null"); + } + @Override + public UnderreplicatedLedger getLedgerUnreplicationInfo(long ledgerId) + throws ReplicationException.UnavailableException { + throw new ReplicationException.UnavailableException("null"); + } + @Override + public Iterator listLedgersToRereplicate(Predicate> predicate) { + return new Iterator() { + @Override + public boolean hasNext() { + return false; + } + @Override + public UnderreplicatedLedger next() { + throw new NoSuchElementException(); + } + }; + } + @Override + public long getLedgerToRereplicate() throws ReplicationException.UnavailableException { + throw new ReplicationException.UnavailableException("null"); + } + @Override + public long pollLedgerToRereplicate() throws ReplicationException.UnavailableException { + throw new ReplicationException.UnavailableException("null"); + } + + @Override + public void acquireUnderreplicatedLedger(long ledgerId) throws ReplicationException { + // no-op + } + + @Override + public void releaseUnderreplicatedLedger(long ledgerId) {} + @Override + public void close() {} + @Override + public void disableLedgerReplication() {} + @Override + public void enableLedgerReplication() {} + @Override + public boolean isLedgerReplicationEnabled() { + return false; + } + @Override + public void notifyLedgerReplicationEnabled(GenericCallback cb) {} + @Override + public boolean initializeLostBookieRecoveryDelay(int lostBookieRecoveryDelay) { + return false; + } + @Override + public void setLostBookieRecoveryDelay(int lostBookieRecoveryDelay) {} + @Override + public int getLostBookieRecoveryDelay() { + return Integer.MAX_VALUE; + } + @Override + public void setCheckAllLedgersCTime(long checkAllLedgersCTime) {} + @Override + public long getCheckAllLedgersCTime() { + return Integer.MAX_VALUE; + } + @Override + public void setPlacementPolicyCheckCTime(long placementPolicyCheckCTime) {} + @Override + public long getPlacementPolicyCheckCTime() { + return Long.MAX_VALUE; + } + @Override + public void setReplicasCheckCTime(long replicasCheckCTime) {} + @Override + public long getReplicasCheckCTime() { + return Long.MAX_VALUE; + } + @Override + public void notifyLostBookieRecoveryDelayChanged(GenericCallback cb) {} + @Override + public String getReplicationWorkerIdRereplicatingLedger(long ledgerId) + throws ReplicationException.UnavailableException { + throw new ReplicationException.UnavailableException("null"); + } + @Override + public void notifyUnderReplicationLedgerChanged(GenericCallback cb) {} + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/UnderreplicatedLedger.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/UnderreplicatedLedger.java index 6ad3036de5b..81c92d03521 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/UnderreplicatedLedger.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/UnderreplicatedLedger.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/ZkLedgerAuditorManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/ZkLedgerAuditorManager.java new file mode 100644 index 00000000000..4f52245514c --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/ZkLedgerAuditorManager.java @@ -0,0 +1,280 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.meta; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.bookkeeper.proto.DataFormats.AuditorVoteFormat; +import static org.apache.bookkeeper.replication.ReplicationStats.ELECTION_ATTEMPTS; + +import com.google.protobuf.TextFormat; +import java.io.IOException; +import java.io.Serializable; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.function.Consumer; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.stats.Counter; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.annotations.StatsDoc; +import org.apache.bookkeeper.util.BookKeeperConstants; +import org.apache.bookkeeper.util.ZkUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.WatchedEvent; +import org.apache.zookeeper.Watcher; +import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.data.ACL; + +/** + * ZK based implementation of LedgerAuditorManager. + */ +@Slf4j +public class ZkLedgerAuditorManager implements LedgerAuditorManager { + + private final ZooKeeper zkc; + private final ServerConfiguration conf; + private final String basePath; + private final String electionPath; + + private String myVote; + + private static final String ELECTION_ZNODE = "auditorelection"; + + // Represents the index of the auditor node + private static final int AUDITOR_INDEX = 0; + // Represents vote prefix + private static final String VOTE_PREFIX = "V_"; + // Represents path Separator + private static final String PATH_SEPARATOR = "/"; + + private volatile Consumer listener; + private volatile boolean isClosed = false; + + // Expose Stats + @StatsDoc( + name = ELECTION_ATTEMPTS, + help = "The number of auditor election attempts" + ) + private final Counter electionAttempts; + + public ZkLedgerAuditorManager(ZooKeeper zkc, ServerConfiguration conf, StatsLogger statsLogger) { + this.zkc = zkc; + this.conf = conf; + + this.basePath = ZKMetadataDriverBase.resolveZkLedgersRootPath(conf) + '/' + + BookKeeperConstants.UNDER_REPLICATION_NODE; + this.electionPath = basePath + '/' + ELECTION_ZNODE; + this.electionAttempts = statsLogger.getCounter(ELECTION_ATTEMPTS); + } + + @Override + public void tryToBecomeAuditor(String bookieId, Consumer listener) + throws IOException, InterruptedException { + this.listener = listener; + createElectorPath(); + + try { + while (!isClosed) { + createMyVote(bookieId); + + List children = zkc.getChildren(getVotePath(""), false); + if (0 >= children.size()) { + throw new IllegalArgumentException( + "At least one bookie server should present to elect the Auditor!"); + } + + // sorting in ascending order of sequential number + Collections.sort(children, new ElectionComparator()); + String voteNode = StringUtils.substringAfterLast(myVote, PATH_SEPARATOR); + + if (children.get(AUDITOR_INDEX).equals(voteNode)) { + // We have been elected as the auditor + // update the auditor bookie id in the election path. This is + // done for debugging purpose + AuditorVoteFormat.Builder builder = AuditorVoteFormat.newBuilder() + .setBookieId(bookieId); + + zkc.setData(getVotePath(""), + builder.build().toString().getBytes(UTF_8), -1); + return; + } else { + // If not an auditor, will be watching to my predecessor and + // looking the previous node deletion. + int myIndex = children.indexOf(voteNode); + if (myIndex < 0) { + throw new IllegalArgumentException("My vote has disappeared"); + } + + int prevNodeIndex = myIndex - 1; + + CountDownLatch latch = new CountDownLatch(1); + + if (null == zkc.exists(getVotePath(PATH_SEPARATOR) + + children.get(prevNodeIndex), event -> latch.countDown())) { + // While adding, the previous znode doesn't exists. + // Again going to election. + continue; + } + + // Wait for the previous auditor in line to be deleted + latch.await(); + } + + electionAttempts.inc(); + } + } catch (KeeperException e) { + throw new IOException(e); + } + } + + @Override + public BookieId getCurrentAuditor() throws IOException, InterruptedException { + String electionRoot = ZKMetadataDriverBase.resolveZkLedgersRootPath(conf) + '/' + + BookKeeperConstants.UNDER_REPLICATION_NODE + '/' + ELECTION_ZNODE; + + try { + List children = zkc.getChildren(electionRoot, false); + Collections.sort(children, new ElectionComparator()); + if (children.size() < 1) { + return null; + } + String ledger = electionRoot + "/" + children.get(AUDITOR_INDEX); + byte[] data = zkc.getData(ledger, false, null); + + AuditorVoteFormat.Builder builder = AuditorVoteFormat.newBuilder(); + TextFormat.merge(new String(data, UTF_8), builder); + AuditorVoteFormat v = builder.build(); + return BookieId.parse(v.getBookieId()); + } catch (KeeperException e) { + throw new IOException(e); + } + } + + @Override + public void close() throws Exception { + log.info("Shutting down AuditorElector"); + isClosed = true; + if (myVote != null) { + try { + zkc.delete(myVote, -1); + } catch (KeeperException.NoNodeException nne) { + // Ok + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + log.warn("InterruptedException while deleting myVote: " + myVote, + ie); + } catch (KeeperException ke) { + log.error("Exception while deleting myVote:" + myVote, ke); + } + } + } + + private void createMyVote(String bookieId) throws IOException, InterruptedException { + List zkAcls = ZkUtils.getACLs(conf); + AuditorVoteFormat.Builder builder = AuditorVoteFormat.newBuilder() + .setBookieId(bookieId); + + try { + if (null == myVote || null == zkc.exists(myVote, false)) { + myVote = zkc.create(getVotePath(PATH_SEPARATOR + VOTE_PREFIX), + builder.build().toString().getBytes(UTF_8), zkAcls, + CreateMode.EPHEMERAL_SEQUENTIAL); + } + } catch (KeeperException e) { + throw new IOException(e); + } + } + + private void createElectorPath() throws IOException { + try { + List zkAcls = ZkUtils.getACLs(conf); + if (zkc.exists(basePath, false) == null) { + try { + zkc.create(basePath, new byte[0], zkAcls, + CreateMode.PERSISTENT); + } catch (KeeperException.NodeExistsException nee) { + // do nothing, someone else could have created it + } + } + if (zkc.exists(getVotePath(""), false) == null) { + try { + zkc.create(getVotePath(""), new byte[0], + zkAcls, CreateMode.PERSISTENT); + } catch (KeeperException.NodeExistsException nee) { + // do nothing, someone else could have created it + } + } + } catch (KeeperException ke) { + throw new IOException("Failed to initialize Auditor Elector", ke); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new IOException("Failed to initialize Auditor Elector", ie); + } + } + + private String getVotePath(String vote) { + return electionPath + vote; + } + + private void handleZkWatch(WatchedEvent event) { + if (isClosed) { + return; + } + + if (event.getState() == Watcher.Event.KeeperState.Expired) { + log.error("Lost ZK connection, shutting down"); + + listener.accept(AuditorEvent.SessionLost); + } else if (event.getType() == Watcher.Event.EventType.NodeDeleted) { + listener.accept(AuditorEvent.VoteWasDeleted); + } + } + + /** + * Compare the votes in the ascending order of the sequence number. Vote + * format is 'V_sequencenumber', comparator will do sorting based on the + * numeric sequence value. + */ + private static class ElectionComparator + implements Comparator, Serializable { + /** + * Return -1 if the first vote is less than second. Return 1 if the + * first vote is greater than second. Return 0 if the votes are equal. + */ + @Override + public int compare(String vote1, String vote2) { + long voteSeqId1 = getVoteSequenceId(vote1); + long voteSeqId2 = getVoteSequenceId(vote2); + int result = voteSeqId1 < voteSeqId2 ? -1 + : (voteSeqId1 > voteSeqId2 ? 1 : 0); + return result; + } + + private long getVoteSequenceId(String vote) { + String voteId = StringUtils.substringAfter(vote, VOTE_PREFIX); + return Long.parseLong(voteId); + } + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/ZkLedgerIdGenerator.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/ZkLedgerIdGenerator.java index c5d53465085..489be4742bc 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/ZkLedgerIdGenerator.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/ZkLedgerIdGenerator.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -19,7 +19,6 @@ import java.io.IOException; import java.util.List; - import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.util.ZkUtils; @@ -126,7 +125,7 @@ public void processResult(int rc, String path, Object ctx) { private static long getLedgerIdFromGenPath(String nodeName, String ledgerPrefix) throws IOException { long ledgerId; try { - String parts[] = nodeName.split(ledgerPrefix); + String[] parts = nodeName.split(ledgerPrefix); ledgerId = Long.parseLong(parts[parts.length - 1]); } catch (NumberFormatException e) { throw new IOException(e); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/ZkLedgerUnderreplicationManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/ZkLedgerUnderreplicationManager.java index 32723706fd0..3eaf63a2450 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/ZkLedgerUnderreplicationManager.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/ZkLedgerUnderreplicationManager.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,13 +18,14 @@ package org.apache.bookkeeper.meta; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Joiner; +import com.google.common.util.concurrent.RateLimiter; +import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.TextFormat; import com.google.protobuf.TextFormat.ParseException; - import java.net.UnknownHostException; import java.util.Arrays; import java.util.Collection; @@ -33,6 +34,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Queue; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; @@ -40,14 +42,16 @@ import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; - import org.apache.bookkeeper.common.concurrent.FutureUtils; import org.apache.bookkeeper.conf.AbstractConfiguration; import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; import org.apache.bookkeeper.net.DNS; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; +import org.apache.bookkeeper.proto.DataFormats.CheckAllLedgersFormat; import org.apache.bookkeeper.proto.DataFormats.LedgerRereplicationLayoutFormat; import org.apache.bookkeeper.proto.DataFormats.LockDataFormat; +import org.apache.bookkeeper.proto.DataFormats.PlacementPolicyCheckFormat; +import org.apache.bookkeeper.proto.DataFormats.ReplicasCheckFormat; import org.apache.bookkeeper.proto.DataFormats.UnderreplicatedLedgerFormat; import org.apache.bookkeeper.replication.ReplicationEnableCb; import org.apache.bookkeeper.replication.ReplicationException; @@ -55,6 +59,7 @@ import org.apache.bookkeeper.util.BookKeeperConstants; import org.apache.bookkeeper.util.SubTreeCache; import org.apache.bookkeeper.util.ZkUtils; +import org.apache.zookeeper.AddWatchMode; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException.Code; @@ -69,7 +74,7 @@ /** * ZooKeeper implementation of underreplication manager. - * This is implemented in a heirarchical fashion, so it'll work with + * This is implemented in a hierarchical fashion, so it'll work with * FlatLedgerManagerFactory and HierarchicalLedgerManagerFactory. * *

          Layout is: @@ -78,7 +83,7 @@ * locks/(ledgerId) * *

          The hierarchical path is created by splitting the ledger into 4 2byte - * segments which are represented in hexidecimal. + * segments which are represented in hexadecimal. * e.g. For ledger id 0xcafebeef0000feed, the path is * cafe/beef/0000/feed/ */ @@ -91,9 +96,9 @@ public class ZkLedgerUnderreplicationManager implements LedgerUnderreplicationMa private static class Lock { private final String lockZNode; - private final int ledgerZNodeVersion; + private final Optional ledgerZNodeVersion; - Lock(String lockZNode, int ledgerZNodeVersion) { + Lock(String lockZNode, Optional ledgerZNodeVersion) { this.lockZNode = lockZNode; this.ledgerZNodeVersion = ledgerZNodeVersion; } @@ -102,32 +107,48 @@ String getLockZNode() { return lockZNode; } - int getLedgerZNodeVersion() { + Optional getLedgerZNodeVersion() { return ledgerZNodeVersion; } } private final Map heldLocks = new ConcurrentHashMap(); private final Pattern idExtractionPattern; + private final String rootPath; private final String basePath; private final String urLedgerPath; private final String urLockPath; private final String layoutZNode; private final AbstractConfiguration conf; private final String lostBookieRecoveryDelayZnode; + private final String checkAllLedgersCtimeZnode; + private final String placementPolicyCheckCtimeZnode; + private final String replicasCheckCtimeZnode; private final ZooKeeper zkc; private final SubTreeCache subTreeCache; + private final RateLimiter rateLimiter; public ZkLedgerUnderreplicationManager(AbstractConfiguration conf, ZooKeeper zkc) - throws KeeperException, InterruptedException, ReplicationException.CompatibilityException { + throws UnavailableException, InterruptedException, ReplicationException.CompatibilityException { this.conf = conf; - basePath = getBasePath(ZKMetadataDriverBase.resolveZkLedgersRootPath(conf)); + if (conf.getZkReplicationTaskRateLimit() > 0) { + LOG.info("Throttling acquire task rate is configured to {} permits per second", + conf.getZkReplicationTaskRateLimit()); + rateLimiter = RateLimiter.create(conf.getZkReplicationTaskRateLimit()); + } else { + LOG.info("Throttling acquire task rate is disabled"); + rateLimiter = null; + } + rootPath = ZKMetadataDriverBase.resolveZkLedgersRootPath(conf); + basePath = getBasePath(rootPath); layoutZNode = basePath + '/' + BookKeeperConstants.LAYOUT_ZNODE; urLedgerPath = basePath + BookKeeperConstants.DEFAULT_ZK_LEDGERS_ROOT_PATH; urLockPath = basePath + '/' + BookKeeperConstants.UNDER_REPLICATION_LOCK; lostBookieRecoveryDelayZnode = basePath + '/' + BookKeeperConstants.LOSTBOOKIERECOVERYDELAY_NODE; - + checkAllLedgersCtimeZnode = basePath + '/' + BookKeeperConstants.CHECK_ALL_LEDGERS_CTIME; + placementPolicyCheckCtimeZnode = basePath + '/' + BookKeeperConstants.PLACEMENT_POLICY_CHECK_CTIME; + replicasCheckCtimeZnode = basePath + '/' + BookKeeperConstants.REPLICAS_CHECK_CTIME; idExtractionPattern = Pattern.compile("urL(\\d+)$"); this.zkc = zkc; this.subTreeCache = new SubTreeCache(new SubTreeCache.TreeProvider() { @@ -137,7 +158,11 @@ public List getChildren(String path, Watcher watcher) throws Interrupted } }); - checkLayout(); + try { + checkLayout(); + } catch (KeeperException ke) { + throw ReplicationException.fromKeeperException("", ke); + } } public static String getBasePath(String rootPath) { @@ -156,7 +181,7 @@ public static byte[] getLockData() { // if we cant get the address, ignore. it's optional // in the data structure in any case } - return TextFormat.printToString(lockDataBuilder.build()).getBytes(UTF_8); + return lockDataBuilder.build().toString().getBytes(UTF_8); } private void checkLayout() @@ -174,11 +199,11 @@ private void checkLayout() LedgerRereplicationLayoutFormat.Builder builder = LedgerRereplicationLayoutFormat.newBuilder(); builder.setType(LAYOUT).setVersion(LAYOUT_VERSION); try { - zkc.create(layoutZNode, TextFormat.printToString(builder.build()).getBytes(UTF_8), + zkc.create(layoutZNode, builder.build().toString().getBytes(UTF_8), zkAcls, CreateMode.PERSISTENT); + break; } catch (KeeperException.NodeExistsException nne) { // someone else managed to create it - continue; } } else { byte[] layoutData = zkc.getData(layoutZNode, false, null); @@ -243,18 +268,43 @@ public static String getUrLedgerLockZnode(String base, long ledgerId) { return String.format("%s/urL%010d", base, ledgerId); } - private String getUrLedgerZnode(long ledgerId) { + @VisibleForTesting + String getUrLedgerZnode(long ledgerId) { return getUrLedgerZnode(urLedgerPath, ledgerId); } - @VisibleForTesting - public UnderreplicatedLedgerFormat getLedgerUnreplicationInfo(long ledgerId) - throws KeeperException, TextFormat.ParseException, InterruptedException { - String znode = getUrLedgerZnode(ledgerId); - UnderreplicatedLedgerFormat.Builder builder = UnderreplicatedLedgerFormat.newBuilder(); - byte[] data = zkc.getData(znode, false, null); - TextFormat.merge(new String(data, UTF_8), builder); - return builder.build(); + @Override + public UnderreplicatedLedger getLedgerUnreplicationInfo(long ledgerId) + throws ReplicationException.UnavailableException { + try { + String znode = getUrLedgerZnode(ledgerId); + UnderreplicatedLedgerFormat.Builder builder = UnderreplicatedLedgerFormat.newBuilder(); + byte[] data = null; + try { + data = zkc.getData(znode, false, null); + } catch (KeeperException.NoNodeException nne) { + if (LOG.isDebugEnabled()) { + LOG.debug("Ledger: {} is not marked underreplicated", ledgerId); + } + return null; + } + TextFormat.merge(new String(data, UTF_8), builder); + UnderreplicatedLedgerFormat underreplicatedLedgerFormat = builder.build(); + UnderreplicatedLedger underreplicatedLedger = new UnderreplicatedLedger(ledgerId); + List replicaList = underreplicatedLedgerFormat.getReplicaList(); + long ctime = (underreplicatedLedgerFormat.hasCtime() ? underreplicatedLedgerFormat.getCtime() + : UnderreplicatedLedger.UNASSIGNED_CTIME); + underreplicatedLedger.setCtime(ctime); + underreplicatedLedger.setReplicaList(replicaList); + return underreplicatedLedger; + } catch (KeeperException ke) { + throw ReplicationException.fromKeeperException("Error contacting zookeeper", ke); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new ReplicationException.UnavailableException("Interrupted while connecting zookeeper", ie); + } catch (TextFormat.ParseException pe) { + throw new ReplicationException.UnavailableException("Error parsing proto message", pe); + } } @Override @@ -278,7 +328,7 @@ private void tryMarkLedgerUnderreplicatedAsync(final String znode, builder.setCtime(System.currentTimeMillis()); } missingReplicas.forEach(builder::addReplica); - final byte[] urLedgerData = TextFormat.printToString(builder.build()).getBytes(UTF_8); + final byte[] urLedgerData = builder.build().toString().getBytes(UTF_8); ZkUtils.asyncCreateFullPathOptimistic( zkc, znode, urLedgerData, zkAcls, CreateMode.PERSISTENT, (rc, path, ctx, name) -> { @@ -329,7 +379,7 @@ private void handleLedgerUnderreplicatedAlreadyMarked(final String znode, if (conf.getStoreSystemTimeAsLedgerUnderreplicatedMarkTime()) { builder.setCtime(System.currentTimeMillis()); } - final byte[] newUrLedgerData = TextFormat.printToString(builder.build()).getBytes(UTF_8); + final byte[] newUrLedgerData = builder.build().toString().getBytes(UTF_8); zkc.setData(znode, newUrLedgerData, getStat.getVersion(), (setRc, setPath, setCtx, setStat) -> { if (Code.OK.intValue() == setRc) { FutureUtils.complete(finalFuture, null); @@ -357,24 +407,27 @@ public void markLedgerReplicated(long ledgerId) throws ReplicationException.Unav try { Lock l = heldLocks.get(ledgerId); if (l != null) { - zkc.delete(getUrLedgerZnode(ledgerId), l.getLedgerZNodeVersion()); + final Optional ledgerZNodeVersion = l.getLedgerZNodeVersion(); + if (ledgerZNodeVersion.isPresent()) { + zkc.delete(getUrLedgerZnode(ledgerId), ledgerZNodeVersion.get()); - try { - // clean up the hierarchy - String parts[] = getUrLedgerZnode(ledgerId).split("/"); - for (int i = 1; i <= 4; i++) { - String p[] = Arrays.copyOf(parts, parts.length - i); - String path = Joiner.on("/").join(p); - Stat s = zkc.exists(path, null); - if (s != null) { - zkc.delete(path, s.getVersion()); + try { + // clean up the hierarchy + String[] parts = getUrLedgerZnode(ledgerId).split("/"); + for (int i = 1; i <= 4; i++) { + String[] p = Arrays.copyOf(parts, parts.length - i); + String path = Joiner.on("/").join(p); + Stat s = zkc.exists(path, null); + if (s != null) { + zkc.delete(path, s.getVersion()); + } } + } catch (KeeperException.NotEmptyException nee) { + // This can happen when cleaning up the hierarchy. + // It's safe to ignore, it simply means another + // ledger in the same hierarchy has been marked as + // underreplicated. } - } catch (KeeperException.NotEmptyException nee) { - // This can happen when cleaning up the hierarchy. - // It's safe to ignore, it simply means another - // ledger in the same hierarchy has been marked as - // underreplicated. } } } catch (KeeperException.NoNodeException nne) { @@ -385,7 +438,7 @@ public void markLedgerReplicated(long ledgerId) throws ReplicationException.Unav // znode in place, so the ledger is checked. } catch (KeeperException ke) { LOG.error("Error deleting underreplicated ledger znode", ke); - throw new ReplicationException.UnavailableException("Error contacting zookeeper", ke); + throw ReplicationException.fromKeeperException("Error contacting zookeeper", ke); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); throw new ReplicationException.UnavailableException("Interrupted while contacting zookeeper", ie); @@ -427,28 +480,18 @@ public boolean hasNext() { String parent = queue.remove(); try { for (String c : zkc.getChildren(parent, false)) { - try { - String child = parent + "/" + c; - if (c.startsWith("urL")) { - long ledgerId = getLedgerId(child); - UnderreplicatedLedgerFormat underreplicatedLedgerFormat = - getLedgerUnreplicationInfo(ledgerId); - List replicaList = underreplicatedLedgerFormat.getReplicaList(); - long ctime = (underreplicatedLedgerFormat.hasCtime() - ? underreplicatedLedgerFormat.getCtime() - : UnderreplicatedLedger.UNASSIGNED_CTIME); + String child = parent + "/" + c; + if (c.startsWith("urL")) { + long ledgerId = getLedgerId(child); + UnderreplicatedLedger underreplicatedLedger = getLedgerUnreplicationInfo(ledgerId); + if (underreplicatedLedger != null) { + List replicaList = underreplicatedLedger.getReplicaList(); if ((predicate == null) || predicate.test(replicaList)) { - UnderreplicatedLedger underreplicatedLedger = new UnderreplicatedLedger( - ledgerId); - underreplicatedLedger.setCtime(ctime); - underreplicatedLedger.setReplicaList(replicaList); curBatch.add(underreplicatedLedger); } - } else { - queue.add(child); } - } catch (KeeperException.NoNodeException nne) { - // ignore + } else { + queue.add(child); } } } catch (InterruptedException ie) { @@ -506,7 +549,7 @@ private long getLedgerToRereplicateFromHierarchy(String parent, long depth) String lockPath = urLockPath + "/" + tryChild; long ledgerId = getLedgerId(tryChild); zkc.create(lockPath, LOCK_DATA, zkAcls, CreateMode.EPHEMERAL); - heldLocks.put(ledgerId, new Lock(lockPath, stat.getVersion())); + heldLocks.put(ledgerId, new Lock(lockPath, Optional.of(stat.getVersion()))); return ledgerId; } catch (KeeperException.NodeExistsException nee) { children.remove(tryChild); @@ -549,7 +592,7 @@ public long pollLedgerToRereplicate() throws ReplicationException.UnavailableExc try { return getLedgerToRereplicateFromHierarchy(urLedgerPath, 0); } catch (KeeperException ke) { - throw new ReplicationException.UnavailableException("Error contacting zookeeper", ke); + throw ReplicationException.fromKeeperException("Error contacting zookeeper", ke); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); throw new ReplicationException.UnavailableException("Interrupted while connecting zookeeper", ie); @@ -562,16 +605,15 @@ public long getLedgerToRereplicate() throws ReplicationException.UnavailableExce LOG.debug("getLedgerToRereplicate()"); } while (true) { + if (rateLimiter != null) { + rateLimiter.acquire(); + } final CountDownLatch changedLatch = new CountDownLatch(1); Watcher w = new Watcher() { + @Override public void process(WatchedEvent e) { - if (e.getType() == Watcher.Event.EventType.NodeChildrenChanged - || e.getType() == Watcher.Event.EventType.NodeDeleted - || e.getType() == Watcher.Event.EventType.NodeCreated - || e.getState() == Watcher.Event.KeeperState.Expired - || e.getState() == Watcher.Event.KeeperState.Disconnected) { - changedLatch.countDown(); - } + LOG.info("Latch countdown due to ZK event: " + e); + changedLatch.countDown(); } }; try (SubTreeCache.WatchGuard wg = subTreeCache.registerWatcherWithGuard(w)) { @@ -583,7 +625,7 @@ public void process(WatchedEvent e) { // nothing found, wait for a watcher to trigger changedLatch.await(); } catch (KeeperException ke) { - throw new ReplicationException.UnavailableException("Error contacting zookeeper", ke); + throw ReplicationException.fromKeeperException("Error contacting zookeeper", ke); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); throw new ReplicationException.UnavailableException("Interrupted while connecting zookeeper", ie); @@ -593,8 +635,8 @@ public void process(WatchedEvent e) { private void waitIfLedgerReplicationDisabled() throws UnavailableException, InterruptedException { - ReplicationEnableCb cb = new ReplicationEnableCb(); if (!this.isLedgerReplicationEnabled()) { + ReplicationEnableCb cb = new ReplicationEnableCb(); this.notifyLedgerReplicationEnabled(cb); cb.await(); } @@ -606,7 +648,7 @@ public void releaseUnderreplicatedLedger(long ledgerId) throws ReplicationExcept LOG.debug("releaseLedger(ledgerId={})", ledgerId); } try { - Lock l = heldLocks.remove(ledgerId); + Lock l = heldLocks.get(ledgerId); if (l != null) { zkc.delete(l.getLockZNode(), -1); } @@ -614,11 +656,12 @@ public void releaseUnderreplicatedLedger(long ledgerId) throws ReplicationExcept // this is ok } catch (KeeperException ke) { LOG.error("Error deleting underreplicated ledger lock", ke); - throw new ReplicationException.UnavailableException("Error contacting zookeeper", ke); + throw ReplicationException.fromKeeperException("Error contacting zookeeper", ke); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); throw new ReplicationException.UnavailableException("Interrupted while connecting zookeeper", ie); } + heldLocks.remove(ledgerId); } @Override @@ -634,7 +677,7 @@ public void close() throws ReplicationException.UnavailableException { // this is ok } catch (KeeperException ke) { LOG.error("Error deleting underreplicated ledger lock", ke); - throw new ReplicationException.UnavailableException("Error contacting zookeeper", ke); + throw ReplicationException.fromKeeperException("Error contacting zookeeper", ke); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); throw new ReplicationException.UnavailableException("Interrupted while connecting zookeeper", ie); @@ -658,8 +701,7 @@ public void disableLedgerReplication() "AutoRecovery is already disabled!", ke); } catch (KeeperException ke) { LOG.error("Exception while stopping auto ledger re-replication", ke); - throw new ReplicationException.UnavailableException( - "Exception while stopping auto ledger re-replication", ke); + throw ReplicationException.fromKeeperException("Exception while stopping auto ledger re-replication", ke); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); throw new ReplicationException.UnavailableException( @@ -682,8 +724,7 @@ public void enableLedgerReplication() "AutoRecovery is already enabled!", ke); } catch (KeeperException ke) { LOG.error("Exception while resuming ledger replication", ke); - throw new ReplicationException.UnavailableException( - "Exception while resuming auto ledger re-replication", ke); + throw ReplicationException.fromKeeperException("Exception while resuming auto ledger re-replication", ke); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); throw new ReplicationException.UnavailableException( @@ -698,16 +739,12 @@ public boolean isLedgerReplicationEnabled() LOG.debug("isLedgerReplicationEnabled()"); } try { - if (null != zkc.exists(basePath + '/' - + BookKeeperConstants.DISABLE_NODE, false)) { - return false; - } - return true; + return null == zkc.exists(basePath + '/' + + BookKeeperConstants.DISABLE_NODE, false); } catch (KeeperException ke) { LOG.error("Error while checking the state of " + "ledger re-replication", ke); - throw new ReplicationException.UnavailableException( - "Error contacting zookeeper", ke); + throw ReplicationException.fromKeeperException("Error contacting zookeeper", ke); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); throw new ReplicationException.UnavailableException( @@ -722,6 +759,7 @@ public void notifyLedgerReplicationEnabled(final GenericCallback cb) LOG.debug("notifyLedgerReplicationEnabled()"); } Watcher w = new Watcher() { + @Override public void process(WatchedEvent e) { if (e.getType() == Watcher.Event.EventType.NodeDeleted) { LOG.info("LedgerReplication is enabled externally through Zookeeper, " @@ -741,8 +779,7 @@ public void process(WatchedEvent e) { } catch (KeeperException ke) { LOG.error("Error while checking the state of " + "ledger re-replication", ke); - throw new ReplicationException.UnavailableException( - "Error contacting zookeeper", ke); + throw ReplicationException.fromKeeperException("Error contacting zookeeper", ke); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); throw new ReplicationException.UnavailableException( @@ -753,35 +790,47 @@ public void process(WatchedEvent e) { /** * Check whether the ledger is being replicated by any bookie. */ - public static boolean isLedgerBeingReplicated(ZooKeeper zkc, String zkLedgersRootPath, long ledgerId) - throws KeeperException, - InterruptedException { - return zkc.exists(getUrLedgerLockZnode(getUrLockPath(zkLedgersRootPath), ledgerId), false) != null; + @Override + public boolean isLedgerBeingReplicated(long ledgerId) throws ReplicationException { + try { + return zkc.exists(getUrLedgerLockZnode(urLockPath, ledgerId), false) != null; + } catch (Exception e) { + throw new ReplicationException.UnavailableException("Failed to check ledger lock", e); + } } /** * Acquire the underreplicated ledger lock. */ - public static void acquireUnderreplicatedLedgerLock(ZooKeeper zkc, String zkLedgersRootPath, + public static String acquireUnderreplicatedLedgerLock(ZooKeeper zkc, String zkLedgersRootPath, long ledgerId, List zkAcls) - throws KeeperException, InterruptedException { - ZkUtils.createFullPathOptimistic(zkc, getUrLedgerLockZnode(getUrLockPath(zkLedgersRootPath), ledgerId), - LOCK_DATA, zkAcls, CreateMode.EPHEMERAL); + throws UnavailableException, InterruptedException { + try { + final String lockPath = getUrLedgerLockZnode(getUrLockPath(zkLedgersRootPath), ledgerId); + ZkUtils.createFullPathOptimistic(zkc, lockPath, LOCK_DATA, zkAcls, CreateMode.EPHEMERAL); + return lockPath; + } catch (KeeperException ke) { + throw ReplicationException.fromKeeperException("Error contacting zookeeper", ke); + } } - /** - * Release the underreplicated ledger lock if it exists. - */ - public static void releaseUnderreplicatedLedgerLock(ZooKeeper zkc, String zkLedgersRootPath, long ledgerId) - throws InterruptedException, KeeperException { - if (isLedgerBeingReplicated(zkc, zkLedgersRootPath, ledgerId)) { - zkc.delete(getUrLedgerLockZnode(getUrLockPath(zkLedgersRootPath), ledgerId), -1); + @Override + public void acquireUnderreplicatedLedger(long ledgerId) + throws ReplicationException { + try { + final String lockPath = acquireUnderreplicatedLedgerLock(zkc, rootPath, ledgerId, ZkUtils.getACLs(conf)); + heldLocks.put(ledgerId, new Lock(lockPath, Optional.empty())); + } catch (Exception e) { + throw new ReplicationException.UnavailableException( + "Failed to acquire underreplicated ledger lock for " + ledgerId, e); } } @Override public boolean initializeLostBookieRecoveryDelay(int lostBookieRecoveryDelay) throws UnavailableException { - LOG.debug("initializeLostBookieRecoveryDelay()"); + if (LOG.isDebugEnabled()) { + LOG.debug("initializeLostBookieRecoveryDelay()"); + } try { zkc.create(lostBookieRecoveryDelayZnode, Integer.toString(lostBookieRecoveryDelay).getBytes(UTF_8), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); @@ -789,9 +838,12 @@ public boolean initializeLostBookieRecoveryDelay(int lostBookieRecoveryDelay) th LOG.info("lostBookieRecoveryDelay Znode is already present, so using " + "existing lostBookieRecoveryDelay Znode value"); return false; + } catch (KeeperException.NoNodeException nne) { + LOG.error("lostBookieRecoveryDelay Znode not found. Please verify if Auditor has been initialized.", nne); + return false; } catch (KeeperException ke) { LOG.error("Error while initializing LostBookieRecoveryDelay", ke); - throw new ReplicationException.UnavailableException("Error contacting zookeeper", ke); + throw ReplicationException.fromKeeperException("Error contacting zookeeper", ke); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); throw new ReplicationException.UnavailableException("Interrupted while contacting zookeeper", ie); @@ -801,7 +853,9 @@ public boolean initializeLostBookieRecoveryDelay(int lostBookieRecoveryDelay) th @Override public void setLostBookieRecoveryDelay(int lostBookieRecoveryDelay) throws UnavailableException { - LOG.debug("setLostBookieRecoveryDelay()"); + if (LOG.isDebugEnabled()) { + LOG.debug("setLostBookieRecoveryDelay()"); + } try { if (zkc.exists(lostBookieRecoveryDelayZnode, false) != null) { zkc.setData(lostBookieRecoveryDelayZnode, Integer.toString(lostBookieRecoveryDelay).getBytes(UTF_8), @@ -812,7 +866,7 @@ public void setLostBookieRecoveryDelay(int lostBookieRecoveryDelay) throws Unava } } catch (KeeperException ke) { LOG.error("Error while setting LostBookieRecoveryDelay ", ke); - throw new ReplicationException.UnavailableException("Error contacting zookeeper", ke); + throw ReplicationException.fromKeeperException("Error contacting zookeeper", ke); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); throw new ReplicationException.UnavailableException("Interrupted while contacting zookeeper", ie); @@ -821,13 +875,39 @@ public void setLostBookieRecoveryDelay(int lostBookieRecoveryDelay) throws Unava @Override public int getLostBookieRecoveryDelay() throws UnavailableException { - LOG.debug("getLostBookieRecoveryDelay()"); + if (LOG.isDebugEnabled()) { + LOG.debug("getLostBookieRecoveryDelay()"); + } try { byte[] data = zkc.getData(lostBookieRecoveryDelayZnode, false, null); return Integer.parseInt(new String(data, UTF_8)); } catch (KeeperException ke) { LOG.error("Error while getting LostBookieRecoveryDelay ", ke); - throw new ReplicationException.UnavailableException("Error contacting zookeeper", ke); + throw ReplicationException.fromKeeperException("Error contacting zookeeper", ke); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new ReplicationException.UnavailableException("Interrupted while contacting zookeeper", ie); + } + } + + @Override + public void notifyUnderReplicationLedgerChanged(GenericCallback cb) throws UnavailableException { + if (LOG.isDebugEnabled()) { + LOG.debug("notifyUnderReplicationLedgerChanged()"); + } + Watcher w = new Watcher() { + @Override + public void process(WatchedEvent e) { + if (e.getType() == Event.EventType.NodeDeleted && idExtractionPattern.matcher(e.getPath()).find()) { + cb.operationComplete(0, null); + } + } + }; + try { + zkc.addWatch(urLedgerPath, w, AddWatchMode.PERSISTENT_RECURSIVE); + } catch (KeeperException ke) { + LOG.error("Error while checking the state of underReplicated ledgers", ke); + throw ReplicationException.fromKeeperException("Error contacting zookeeper", ke); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); throw new ReplicationException.UnavailableException("Interrupted while contacting zookeeper", ie); @@ -836,8 +916,11 @@ public int getLostBookieRecoveryDelay() throws UnavailableException { @Override public void notifyLostBookieRecoveryDelayChanged(GenericCallback cb) throws UnavailableException { - LOG.debug("notifyLostBookieRecoveryDelayChanged()"); + if (LOG.isDebugEnabled()) { + LOG.debug("notifyLostBookieRecoveryDelayChanged()"); + } Watcher w = new Watcher() { + @Override public void process(WatchedEvent e) { if (e.getType() == Watcher.Event.EventType.NodeDataChanged) { cb.operationComplete(0, null); @@ -851,7 +934,7 @@ public void process(WatchedEvent e) { } } catch (KeeperException ke) { LOG.error("Error while checking the state of lostBookieRecoveryDelay", ke); - throw new ReplicationException.UnavailableException("Error contacting zookeeper", ke); + throw ReplicationException.fromKeeperException("Error contacting zookeeper", ke); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); throw new ReplicationException.UnavailableException("Interrupted while contacting zookeeper", ie); @@ -872,7 +955,7 @@ public String getReplicationWorkerIdRereplicatingLedger(long ledgerId) // this is ok. } catch (KeeperException e) { LOG.error("Error while getting ReplicationWorkerId rereplicating Ledger", e); - throw new ReplicationException.UnavailableException( + throw ReplicationException.fromKeeperException( "Error while getting ReplicationWorkerId rereplicating Ledger", e); } catch (InterruptedException e) { LOG.error("Got interrupted while getting ReplicationWorkerId rereplicating Ledger", e); @@ -884,4 +967,142 @@ public String getReplicationWorkerIdRereplicatingLedger(long ledgerId) } return replicationWorkerId; } + + @Override + public void setCheckAllLedgersCTime(long checkAllLedgersCTime) throws UnavailableException { + if (LOG.isDebugEnabled()) { + LOG.debug("setCheckAllLedgersCTime"); + } + try { + List zkAcls = ZkUtils.getACLs(conf); + CheckAllLedgersFormat.Builder builder = CheckAllLedgersFormat.newBuilder(); + builder.setCheckAllLedgersCTime(checkAllLedgersCTime); + byte[] checkAllLedgersFormatByteArray = builder.build().toByteArray(); + if (zkc.exists(checkAllLedgersCtimeZnode, false) != null) { + zkc.setData(checkAllLedgersCtimeZnode, checkAllLedgersFormatByteArray, -1); + } else { + zkc.create(checkAllLedgersCtimeZnode, checkAllLedgersFormatByteArray, zkAcls, CreateMode.PERSISTENT); + } + } catch (KeeperException ke) { + throw ReplicationException.fromKeeperException("Error contacting zookeeper", ke); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new ReplicationException.UnavailableException("Interrupted while contacting zookeeper", ie); + } + } + + @Override + public long getCheckAllLedgersCTime() throws UnavailableException { + if (LOG.isDebugEnabled()) { + LOG.debug("setCheckAllLedgersCTime"); + } + try { + byte[] data = zkc.getData(checkAllLedgersCtimeZnode, false, null); + CheckAllLedgersFormat checkAllLedgersFormat = CheckAllLedgersFormat.parseFrom(data); + return checkAllLedgersFormat.hasCheckAllLedgersCTime() ? checkAllLedgersFormat.getCheckAllLedgersCTime() + : -1; + } catch (KeeperException.NoNodeException ne) { + LOG.warn("checkAllLedgersCtimeZnode is not yet available"); + return -1; + } catch (KeeperException ke) { + throw ReplicationException.fromKeeperException("Error contacting zookeeper", ke); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new ReplicationException.UnavailableException("Interrupted while contacting zookeeper", ie); + } catch (InvalidProtocolBufferException ipbe) { + throw new ReplicationException.UnavailableException("Error while parsing ZK protobuf binary data", ipbe); + } + } + + @Override + public void setPlacementPolicyCheckCTime(long placementPolicyCheckCTime) throws UnavailableException { + if (LOG.isDebugEnabled()) { + LOG.debug("setPlacementPolicyCheckCTime"); + } + try { + List zkAcls = ZkUtils.getACLs(conf); + PlacementPolicyCheckFormat.Builder builder = PlacementPolicyCheckFormat.newBuilder(); + builder.setPlacementPolicyCheckCTime(placementPolicyCheckCTime); + byte[] placementPolicyCheckFormatByteArray = builder.build().toByteArray(); + if (zkc.exists(placementPolicyCheckCtimeZnode, false) != null) { + zkc.setData(placementPolicyCheckCtimeZnode, placementPolicyCheckFormatByteArray, -1); + } else { + zkc.create(placementPolicyCheckCtimeZnode, placementPolicyCheckFormatByteArray, zkAcls, + CreateMode.PERSISTENT); + } + } catch (KeeperException ke) { + throw ReplicationException.fromKeeperException("Error contacting zookeeper", ke); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new ReplicationException.UnavailableException("Interrupted while contacting zookeeper", ie); + } + } + + @Override + public long getPlacementPolicyCheckCTime() throws UnavailableException { + if (LOG.isDebugEnabled()) { + LOG.debug("getPlacementPolicyCheckCTime"); + } + try { + byte[] data = zkc.getData(placementPolicyCheckCtimeZnode, false, null); + PlacementPolicyCheckFormat placementPolicyCheckFormat = PlacementPolicyCheckFormat.parseFrom(data); + return placementPolicyCheckFormat.hasPlacementPolicyCheckCTime() + ? placementPolicyCheckFormat.getPlacementPolicyCheckCTime() : -1; + } catch (KeeperException.NoNodeException ne) { + LOG.warn("placementPolicyCheckCtimeZnode is not yet available"); + return -1; + } catch (KeeperException ke) { + throw ReplicationException.fromKeeperException("Error contacting zookeeper", ke); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new ReplicationException.UnavailableException("Interrupted while contacting zookeeper", ie); + } catch (InvalidProtocolBufferException ipbe) { + throw new ReplicationException.UnavailableException("Error while parsing ZK protobuf binary data", ipbe); + } + } + + @Override + public void setReplicasCheckCTime(long replicasCheckCTime) throws UnavailableException { + try { + List zkAcls = ZkUtils.getACLs(conf); + ReplicasCheckFormat.Builder builder = ReplicasCheckFormat.newBuilder(); + builder.setReplicasCheckCTime(replicasCheckCTime); + byte[] replicasCheckFormatByteArray = builder.build().toByteArray(); + if (zkc.exists(replicasCheckCtimeZnode, false) != null) { + zkc.setData(replicasCheckCtimeZnode, replicasCheckFormatByteArray, -1); + } else { + zkc.create(replicasCheckCtimeZnode, replicasCheckFormatByteArray, zkAcls, CreateMode.PERSISTENT); + } + if (LOG.isDebugEnabled()) { + LOG.debug("setReplicasCheckCTime completed successfully"); + } + } catch (KeeperException ke) { + throw ReplicationException.fromKeeperException("Error contacting zookeeper", ke); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new ReplicationException.UnavailableException("Interrupted while contacting zookeeper", ie); + } + } + + @Override + public long getReplicasCheckCTime() throws UnavailableException { + try { + byte[] data = zkc.getData(replicasCheckCtimeZnode, false, null); + ReplicasCheckFormat replicasCheckFormat = ReplicasCheckFormat.parseFrom(data); + if (LOG.isDebugEnabled()) { + LOG.debug("getReplicasCheckCTime completed successfully"); + } + return replicasCheckFormat.hasReplicasCheckCTime() ? replicasCheckFormat.getReplicasCheckCTime() : -1; + } catch (KeeperException.NoNodeException ne) { + LOG.warn("replicasCheckCtimeZnode is not yet available"); + return -1; + } catch (KeeperException ke) { + throw ReplicationException.fromKeeperException("Error contacting zookeeper", ke); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new ReplicationException.UnavailableException("Interrupted while contacting zookeeper", ie); + } catch (InvalidProtocolBufferException ipbe) { + throw new ReplicationException.UnavailableException("Error while parsing ZK protobuf binary data", ipbe); + } + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/zk/ZKMetadataBookieDriver.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/zk/ZKMetadataBookieDriver.java index e7b1ad0d596..252f4b7c1dd 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/zk/ZKMetadataBookieDriver.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/zk/ZKMetadataBookieDriver.java @@ -25,13 +25,13 @@ import lombok.extern.slf4j.Slf4j; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.discover.RegistrationManager; -import org.apache.bookkeeper.discover.RegistrationManager.RegistrationListener; import org.apache.bookkeeper.discover.ZKRegistrationManager; import org.apache.bookkeeper.meta.MetadataBookieDriver; import org.apache.bookkeeper.meta.MetadataDrivers; import org.apache.bookkeeper.meta.exceptions.MetadataException; import org.apache.bookkeeper.stats.StatsLogger; import org.apache.bookkeeper.zookeeper.BoundExponentialBackoffRetryPolicy; +import org.apache.zookeeper.ZooKeeper; /** * ZooKeeper based metadata bookie driver. @@ -49,53 +49,35 @@ public class ZKMetadataBookieDriver } ServerConfiguration serverConf; - RegistrationManager regManager; - RegistrationListener listener; @Override public synchronized MetadataBookieDriver initialize(ServerConfiguration conf, - RegistrationListener listener, StatsLogger statsLogger) throws MetadataException { super.initialize( conf, statsLogger.scope(BOOKIE_SCOPE), new BoundExponentialBackoffRetryPolicy(conf.getZkRetryBackoffStartMs(), - conf.getZkRetryBackoffMaxMs(), Integer.MAX_VALUE), + conf.getZkRetryBackoffMaxMs(), conf.getZkRetryBackoffMaxRetries()), Optional.empty()); this.serverConf = conf; - this.listener = listener; this.statsLogger = statsLogger; return this; } - @VisibleForTesting - public synchronized void setRegManager(RegistrationManager regManager) { - this.regManager = regManager; + @Override + public synchronized RegistrationManager createRegistrationManager() { + ZKRegistrationManager zkRegistrationManager = newZKRegistrationManager(serverConf, zk); + return zkRegistrationManager; } - @Override - public synchronized RegistrationManager getRegistrationManager() { - if (null == regManager) { - regManager = new ZKRegistrationManager( - serverConf, - zk, - listener - ); - } - return regManager; + @VisibleForTesting + ZKRegistrationManager newZKRegistrationManager(ServerConfiguration serverConf, ZooKeeper zk) { + return new ZKRegistrationManager(serverConf, zk); } @Override public void close() { - RegistrationManager rmToClose; - synchronized (this) { - rmToClose = regManager; - regManager = null; - } - if (null != rmToClose) { - rmToClose.close(); - } super.close(); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/zk/ZKMetadataClientDriver.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/zk/ZKMetadataClientDriver.java index a5dcaa740cf..1fe6210ce9b 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/zk/ZKMetadataClientDriver.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/zk/ZKMetadataClientDriver.java @@ -18,6 +18,7 @@ */ package org.apache.bookkeeper.meta.zk; +import com.google.common.annotations.VisibleForTesting; import java.util.Optional; import java.util.concurrent.ScheduledExecutorService; import lombok.extern.slf4j.Slf4j; @@ -29,6 +30,9 @@ import org.apache.bookkeeper.meta.exceptions.MetadataException; import org.apache.bookkeeper.stats.StatsLogger; import org.apache.bookkeeper.zookeeper.BoundExponentialBackoffRetryPolicy; +import org.apache.zookeeper.Watcher.Event.EventType; +import org.apache.zookeeper.Watcher.Event.KeeperState; +import org.apache.zookeeper.ZooKeeper; /** * ZooKeeper based metadata client driver. @@ -47,6 +51,7 @@ public class ZKMetadataClientDriver ClientConfiguration clientConf; ScheduledExecutorService scheduler; RegistrationClient regClient; + boolean bookieAddressTracking = true; @Override public synchronized MetadataClientDriver initialize(ClientConfiguration conf, @@ -61,25 +66,33 @@ public synchronized MetadataClientDriver initialize(ClientConfiguration conf, new BoundExponentialBackoffRetryPolicy( conf.getZkTimeout(), conf.getZkTimeout(), - 0), + conf.getZkRetryBackoffMaxRetries()), optionalCtx); this.statsLogger = statsLogger; this.clientConf = conf; this.scheduler = scheduler; + this.bookieAddressTracking = conf.getEnableBookieAddressTracking(); return this; } @Override public synchronized RegistrationClient getRegistrationClient() { if (null == regClient) { - regClient = new ZKRegistrationClient( - zk, - ledgersRootPath, - scheduler); + regClient = newZKRegistrationClient( + zk, + ledgersRootPath, + scheduler, + bookieAddressTracking); } return regClient; } + @VisibleForTesting + ZKRegistrationClient newZKRegistrationClient(ZooKeeper zk, String ledgersRootPath, + ScheduledExecutorService scheduler, boolean bookieAddressTracking) { + return new ZKRegistrationClient(zk, ledgersRootPath, scheduler, bookieAddressTracking); + } + @Override public synchronized void close() { if (null != regClient) { @@ -88,4 +101,14 @@ public synchronized void close() { } super.close(); } + + @Override + public void setSessionStateListener(SessionStateListener sessionStateListener) { + zk.register((event) -> { + // Check for expired connection. + if (event.getType().equals(EventType.None) && event.getState().equals(KeeperState.Expired)) { + sessionStateListener.onSessionExpired(); + } + }); + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/zk/ZKMetadataDriverBase.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/zk/ZKMetadataDriverBase.java index a9e1a13966d..643ddd3ced2 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/zk/ZKMetadataDriverBase.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/zk/ZKMetadataDriverBase.java @@ -21,6 +21,7 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; import static org.apache.bookkeeper.util.BookKeeperConstants.AVAILABLE_NODE; +import static org.apache.bookkeeper.util.BookKeeperConstants.DISABLE_HEALTH_CHECK; import static org.apache.bookkeeper.util.BookKeeperConstants.EMPTY_BYTE_ARRAY; import static org.apache.bookkeeper.util.BookKeeperConstants.READONLY; @@ -28,6 +29,7 @@ import java.net.URI; import java.util.List; import java.util.Optional; +import java.util.concurrent.CompletableFuture; import lombok.Getter; import lombok.Setter; import lombok.SneakyThrows; @@ -42,15 +44,18 @@ import org.apache.bookkeeper.meta.exceptions.Code; import org.apache.bookkeeper.meta.exceptions.MetadataException; import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.util.BookKeeperConstants; import org.apache.bookkeeper.util.ZkUtils; import org.apache.bookkeeper.zookeeper.RetryPolicy; import org.apache.bookkeeper.zookeeper.ZooKeeperClient; import org.apache.commons.configuration.ConfigurationException; import org.apache.commons.lang3.StringUtils; +import org.apache.zookeeper.AsyncCallback; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.ZooKeeper; import org.apache.zookeeper.data.ACL; +import org.apache.zookeeper.data.Stat; /** * This is a mixin class for supporting zookeeper based metadata driver. @@ -59,9 +64,14 @@ public class ZKMetadataDriverBase implements AutoCloseable { protected static final String SCHEME = "zk"; + private static final int ZK_CLIENT_WAIT_FOR_SHUTDOWN_TIMEOUT_MS = 5000; public static String getZKServersFromServiceUri(URI uri) { - return uri.getAuthority().replace(";", ","); + String authority = uri.getAuthority(); + if (authority == null) { + throw new IllegalArgumentException("Invalid metadata service URI format: " + uri); + } + return authority.replace(";", ","); } @SuppressWarnings("deprecation") @@ -117,7 +127,8 @@ public static Class resolveLedgerManagerFactory( + schemeParts[1] + "' at uri : " + metadataServiceUri); } } else { - ledgerManagerFactoryClass = HierarchicalLedgerManagerFactory.class; + // behave as in the +null case, infer the layout from the store or fall back to the default + ledgerManagerFactoryClass = null; } return ledgerManagerFactoryClass; } @@ -135,6 +146,9 @@ public static Class resolveLedgerManagerFactory( // instantiated us protected boolean ownZKHandle = false; + // disable health check path + String disableHealthCheckPath; + // ledgers root path protected String ledgersRootPath; @@ -182,7 +196,13 @@ protected void initialize(AbstractConfiguration conf, final String bookieReadonlyRegistrationPath = bookieRegistrationPath + "/" + READONLY; // construct the zookeeper - final String zkServers = getZKServersFromServiceUri(metadataServiceUri); + final String zkServers; + try { + zkServers = getZKServersFromServiceUri(metadataServiceUri); + } catch (IllegalArgumentException ex) { + throw new MetadataException( + Code.INVALID_METADATA_SERVICE_URI, ex); + } log.info("Initialize zookeeper metadata driver at metadata service uri {} :" + " zkServers = {}, ledgersRootPath = {}.", metadataServiceUriStr, zkServers, ledgersRootPath); @@ -219,6 +239,7 @@ protected void initialize(AbstractConfiguration conf, this.ownZKHandle = true; } + disableHealthCheckPath = ledgersRootPath + "/" + DISABLE_HEALTH_CHECK; // once created the zookeeper client, create the layout manager and registration client this.layoutManager = new ZkLayoutManager( zk, @@ -249,6 +270,66 @@ public synchronized LedgerManagerFactory getLedgerManagerFactory() return lmFactory; } + public CompletableFuture disableHealthCheck() { + CompletableFuture createResult = new CompletableFuture<>(); + zk.create(disableHealthCheckPath, BookKeeperConstants.EMPTY_BYTE_ARRAY, acls, + CreateMode.PERSISTENT, new AsyncCallback.StringCallback() { + @Override + public void processResult(int rc, String path, Object ctx, String name) { + if (KeeperException.Code.OK.intValue() == rc) { + createResult.complete(null); + } else if (KeeperException.Code.NODEEXISTS.intValue() == rc) { + if (log.isDebugEnabled()) { + log.debug("health check already disabled!"); + } + createResult.complete(null); + } else { + createResult.completeExceptionally(KeeperException.create(KeeperException.Code.get(rc), path)); + } + } + }, null); + + return createResult; + } + + public CompletableFuture enableHealthCheck() { + CompletableFuture deleteResult = new CompletableFuture<>(); + + zk.delete(disableHealthCheckPath, -1, new AsyncCallback.VoidCallback() { + @Override + public void processResult(int rc, String path, Object ctx) { + if (KeeperException.Code.OK.intValue() == rc) { + deleteResult.complete(null); + } else if (KeeperException.Code.NONODE.intValue() == rc) { + if (log.isDebugEnabled()) { + log.debug("health check already enabled!"); + } + deleteResult.complete(null); + } else { + deleteResult.completeExceptionally(KeeperException.create(KeeperException.Code.get(rc), path)); + } + } + }, null); + + return deleteResult; + } + + public CompletableFuture isHealthCheckEnabled() { + CompletableFuture enableResult = new CompletableFuture<>(); + zk.exists(disableHealthCheckPath, false, new AsyncCallback.StatCallback() { + @Override + public void processResult(int rc, String path, Object ctx, Stat stat) { + if (KeeperException.Code.OK.intValue() == rc) { + enableResult.complete(false); + } else { + enableResult.complete(true); + } + } + }, null); + + return enableResult; + } + @Override public void close() { if (null != lmFactory) { @@ -261,7 +342,7 @@ public void close() { } if (ownZKHandle && null != zk) { try { - zk.close(); + zk.close(ZK_CLIENT_WAIT_FOR_SHUTDOWN_TIMEOUT_MS); } catch (InterruptedException e) { Thread.currentThread().interrupt(); log.warn("Interrupted on closing zookeeper client", e); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/InMemoryMetaStore.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/InMemoryMetaStore.java index b792399aa83..90cb4501817 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/InMemoryMetaStore.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/InMemoryMetaStore.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -19,7 +19,6 @@ import java.util.HashMap; import java.util.Map; - import org.apache.commons.configuration.Configuration; /** diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/InMemoryMetastoreCursor.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/InMemoryMetastoreCursor.java index 17e75b3c588..98410d2354c 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/InMemoryMetastoreCursor.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/InMemoryMetastoreCursor.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -20,7 +20,6 @@ import static org.apache.bookkeeper.metastore.InMemoryMetastoreTable.cloneValue; import com.google.common.collect.ImmutableSortedMap; - import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; @@ -29,7 +28,6 @@ import java.util.Set; import java.util.SortedMap; import java.util.concurrent.ScheduledExecutorService; - import org.apache.bookkeeper.metastore.MSException.Code; import org.apache.bookkeeper.versioning.Versioned; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/InMemoryMetastoreTable.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/InMemoryMetastoreTable.java index 25cb3553bbb..6b1deee915e 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/InMemoryMetastoreTable.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/InMemoryMetastoreTable.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,13 +18,11 @@ package org.apache.bookkeeper.metastore; import com.google.common.util.concurrent.ThreadFactoryBuilder; - import java.util.NavigableMap; import java.util.Set; import java.util.TreeMap; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; - import org.apache.bookkeeper.metastore.MSException.Code; import org.apache.bookkeeper.versioning.Version; import org.apache.bookkeeper.versioning.Versioned; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MSException.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MSException.java index 6d16c1f356a..f0d2c3bc74d 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MSException.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MSException.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -40,7 +40,7 @@ public enum Code { InterruptedException (-100, "Operation interrupted"), IllegalOp (-101, "Illegal operation"), ServiceDown (-102, "Metadata service is down"), - OperationFailure(-103, "Operaion failed on metadata storage server side"); + OperationFailure(-103, "Operation failed on metadata storage server side"); private static final Map codes = new HashMap(); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MSWatchedEvent.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MSWatchedEvent.java index 08c17214333..84d2e92ab30 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MSWatchedEvent.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MSWatchedEvent.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetaStore.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetaStore.java index 62d7a32395d..04eb74f2eb7 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetaStore.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetaStore.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -31,7 +31,7 @@ public interface MetaStore { String getName(); /** - * Get the plugin verison. + * Get the plugin version. * * @return the plugin version. */ diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreCallback.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreCallback.java index b25311a6638..f1679ed5310 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreCallback.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreCallback.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreCursor.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreCursor.java index abd1cd47821..a2a2211a231 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreCursor.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreCursor.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreException.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreException.java index f66fe00e840..f1f8d04d30b 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreException.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreException.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreFactory.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreFactory.java index 92f2eadcca9..30a36c7ac7d 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreFactory.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreFactory.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,7 +17,7 @@ */ package org.apache.bookkeeper.metastore; -import org.apache.bookkeeper.util.ReflectionUtils; +import org.apache.bookkeeper.common.util.ReflectionUtils; /** * Metastore Factory. diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreScannableTable.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreScannableTable.java index 1e073d4f0ff..bc92a7938a7 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreScannableTable.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreScannableTable.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreTable.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreTable.java index d302b681e77..562fcda1aa8 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreTable.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreTable.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreTableItem.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreTableItem.java index 349548b1876..0155bf48127 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreTableItem.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreTableItem.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreUtils.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreUtils.java index 6e26f4d5da8..1f326b1eec1 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreUtils.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreUtils.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -22,7 +22,6 @@ import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicInteger; - import org.apache.bookkeeper.metastore.MSException.Code; import org.apache.bookkeeper.versioning.Version; import org.slf4j.Logger; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreWatcher.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreWatcher.java index b751dfa7fc0..ebd0dd3be61 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreWatcher.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/MetastoreWatcher.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/Value.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/Value.java index fef83dbd19c..3936d77bd86 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/Value.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/metastore/Value.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,14 +17,13 @@ */ package org.apache.bookkeeper.metastore; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.bookkeeper.metastore.MetastoreTable.ALL_FIELDS; import com.google.common.hash.HashFunction; import com.google.common.hash.Hasher; import com.google.common.hash.Hashing; import com.google.common.primitives.UnsignedBytes; - import java.nio.charset.Charset; import java.util.Collections; import java.util.Comparator; @@ -92,7 +91,7 @@ public Value project(Set fields) { @Override public int hashCode() { - HashFunction hf = Hashing.murmur3_32(); + HashFunction hf = Hashing.murmur3_32_fixed(); Hasher hc = hf.newHasher(); for (String key : fields.keySet()) { hc.putString(key, Charset.defaultCharset()); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/AbstractDNSToSwitchMapping.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/AbstractDNSToSwitchMapping.java index 84e9bd47670..3060b86bbb3 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/AbstractDNSToSwitchMapping.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/AbstractDNSToSwitchMapping.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -20,9 +20,10 @@ import java.util.HashSet; import java.util.Map; import java.util.Set; - import org.apache.bookkeeper.conf.Configurable; +import org.apache.bookkeeper.proto.BookieAddressResolver; import org.apache.commons.configuration.Configuration; +import org.apache.commons.lang.StringUtils; /** * This is a base class for DNS to Switch mappings. @@ -41,6 +42,7 @@ public abstract class AbstractDNSToSwitchMapping implements DNSToSwitchMapping, Configurable { private Configuration conf; + private BookieAddressResolver bookieAddressResolver; /** * Create an unconfigured instance. @@ -58,12 +60,24 @@ protected AbstractDNSToSwitchMapping(Configuration conf) { this.conf = conf; } + public BookieAddressResolver getBookieAddressResolver() { + return bookieAddressResolver; + } + + @Override + public void setBookieAddressResolver(BookieAddressResolver bookieAddressResolver) { + this.bookieAddressResolver = bookieAddressResolver; + } + + @Override public Configuration getConf() { return conf; } + @Override public void setConf(Configuration conf) { this.conf = conf; + validateConf(); } /** @@ -100,7 +114,7 @@ public Map getSwitchMap() { public String dumpTopology() { Map rack = getSwitchMap(); StringBuilder builder = new StringBuilder(); - builder.append("Mapping: ").append(toString()).append("\n"); + builder.append("Mapping: ").append(this).append("\n"); if (rack != null) { builder.append("Map:\n"); Set switches = new HashSet(); @@ -117,7 +131,8 @@ public String dumpTopology() { } protected boolean isSingleSwitchByScriptPolicy() { - return conf != null && conf.getString(CommonConfigurationKeys.NET_TOPOLOGY_SCRIPT_FILE_NAME_KEY) == null; + return conf != null + && (!StringUtils.isNotBlank(conf.getString(CommonConfigurationKeys.NET_TOPOLOGY_SCRIPT_FILE_NAME_KEY))); } /** @@ -136,4 +151,10 @@ public static boolean isMappingSingleSwitch(DNSToSwitchMapping mapping) { && ((AbstractDNSToSwitchMapping) mapping).isSingleSwitch(); } + /** + * when setConf is called it should do sanity checking of the conf/env. and + * throw RuntimeException if things are not valid. + */ + protected void validateConf() { + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/BookieId.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/BookieId.java new file mode 100644 index 00000000000..31fc8f5e926 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/BookieId.java @@ -0,0 +1,90 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.net; + +import java.util.Objects; + +/** + * This is an identifier for a BookieID. + */ +public final class BookieId { + + private final String id; + + private BookieId(String id) { + validateBookieId(id); + this.id = id; + } + + /** + * Returns the serialized version of this object. + * @return the bookieId + */ + @Override + public String toString() { + return id; + } + + /** + * Parses the given serialized representation of a BookieId. + * @param serialized + * @return the parsed BookieId + */ + public static BookieId parse(String serialized) { + return new BookieId(serialized); + } + + public String getId() { + return id; + } + + @Override + public int hashCode() { + return this.id.hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final BookieId other = (BookieId) obj; + if (!Objects.equals(this.id, other.id)) { + return false; + } + return true; + } + + private static void validateBookieId(String id) { + Objects.requireNonNull(id, "BookieId cannot be null"); + if (!(id.matches("[a-zA-Z0-9:-_.\\-]+")) + || "readonly".equalsIgnoreCase(id)) { + throw new IllegalArgumentException("BookieId " + id + " is not valid"); + } + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/BookieNode.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/BookieNode.java new file mode 100644 index 00000000000..abf22c04e36 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/BookieNode.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.net; + +/** + * Bookie Node implementation. + */ +public class BookieNode extends NodeBase { + private final BookieId addr; // identifier of a bookie node. + + public BookieNode(BookieId addr, String networkLoc) { + super(addr.toString(), networkLoc); + this.addr = addr; + } + + public BookieId getAddr() { + return addr; + } + + @Override + public int hashCode() { + return name.hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof BookieNode)) { + return false; + } + BookieNode other = (BookieNode) obj; + return getName().equals(other.getName()); + } + + @Override + public String toString() { + return String.format("", name); + } +} \ No newline at end of file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/BookieSocketAddress.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/BookieSocketAddress.java index 4e8f3246c66..b426ab4f9d7 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/BookieSocketAddress.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/BookieSocketAddress.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -22,10 +22,12 @@ import static org.apache.bookkeeper.util.BookKeeperConstants.COLON; -import io.netty.channel.local.LocalAddress; - +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.google.common.net.InetAddresses; import java.net.InetSocketAddress; import java.net.UnknownHostException; +import java.util.Optional; +import org.apache.bookkeeper.proto.BookieAddressResolver; /** * This is a data wrapper class that is an InetSocketAddress, it would use the hostname @@ -38,14 +40,24 @@ public class BookieSocketAddress { // Member fields that make up this class. private final String hostname; private final int port; - - private final InetSocketAddress socketAddress; + private final Optional socketAddress; // Constructor that takes in both a port. public BookieSocketAddress(String hostname, int port) { this.hostname = hostname; this.port = port; - socketAddress = new InetSocketAddress(hostname, port); + /* + * if ipaddress is used for bookieid then lets cache InetSocketAddress + * otherwise not cache it. If Hostname is used for bookieid, then it is + * ok for node to change its ipaddress. But if ipaddress is used for + * bookieid then it is invalid scenario if node's ipaddress changes and + * nodes HostName is considered static. + */ + if (InetAddresses.isInetAddress(hostname)) { + socketAddress = Optional.of(new InetSocketAddress(hostname, port)); + } else { + socketAddress = Optional.empty(); + } } // Constructor from a String "serialized" version of this class. @@ -60,9 +72,15 @@ public BookieSocketAddress(String addr) throws UnknownHostException { } catch (NumberFormatException nfe) { throw new UnknownHostException(addr); } - socketAddress = new InetSocketAddress(hostname, port); + if (InetAddresses.isInetAddress(hostname)) { + socketAddress = Optional.of(new InetSocketAddress(hostname, port)); + } else { + socketAddress = Optional.empty(); + } } + + // Public getters public String getHostName() { return hostname; @@ -73,24 +91,23 @@ public int getPort() { } // Method to return an InetSocketAddress for the regular port. + @JsonIgnore public InetSocketAddress getSocketAddress() { - return socketAddress; - } - - /** - * Maps the socketAddress to a "local" address. - */ - public LocalAddress getLocalAddress() { - // for local address, we just need "port" to differentiate different addresses. - return new LocalAddress("" + port); + /* + * Return each time a new instance of the InetSocketAddress if hostname + * is used as bookieid. If we keep using the same InetSocketAddress + * instance, if bookies are advertising hostnames and the IP change, the + * BK client will keep forever to try to connect to the old IP. + */ + return socketAddress.orElseGet(() -> { + return new InetSocketAddress(hostname, port); + }); } // Return the String "serialized" version of this object. @Override public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append(hostname).append(COLON).append(port); - return sb.toString(); + return hostname + COLON + port; } // Implement an equals method comparing two BookiSocketAddress objects. @@ -108,4 +125,53 @@ public int hashCode() { return this.hostname.hashCode() + 13 * this.port; } + /** + * Create a BookieID in legacy format hostname:port. + * @return the BookieID + */ + public BookieId toBookieId() { + return BookieId.parse(this.toString()); + } + + /** + * Simple converter from legacy BookieId to a real network address. + */ + public static final BookieAddressResolver LEGACY_BOOKIEID_RESOLVER = (BookieId b) -> { + try { + return new BookieSocketAddress(b.toString()); + } catch (UnknownHostException err) { + throw new BookieAddressResolver.BookieIdNotResolvedException(b, err); + } + }; + + /** + * Utility for Placement Policies that need to create a dummy BookieId that represents + * a given host. + * @param hostname the hostname + * @return a dummy bookie id, compatible with the BookieSocketAddress#toBookieId, with a 0 tcp port. + */ + public static BookieId createDummyBookieIdForHostname(String hostname) { + return BookieId.parse(hostname + ":0"); + } + + /** + * Tells whether a BookieId may be a dummy id. + * @param bookieId + * @return true if the BookieId looks like it has been generated by + * {@link #createDummyBookieIdForHostname(java.lang.String)} + */ + public static boolean isDummyBookieIdForHostname(BookieId bookieId) { + return bookieId.getId().endsWith(":0"); + } + + /** + * Use legacy resolver to resolve a bookieId. + * @param bookieId legacy style bookie ID consisting of address (or hostname) and port + * @return the BookieSocketAddress + */ + public static BookieSocketAddress resolveLegacyBookieId(BookieId bookieId) + throws BookieAddressResolver.BookieIdNotResolvedException { + return LEGACY_BOOKIEID_RESOLVER.resolve(bookieId); + } + } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/CachedDNSToSwitchMapping.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/CachedDNSToSwitchMapping.java index ac9aa327885..ae383d1525d 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/CachedDNSToSwitchMapping.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/CachedDNSToSwitchMapping.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/CommonConfigurationKeys.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/CommonConfigurationKeys.java index 4a8fa568997..c525b8a6dff 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/CommonConfigurationKeys.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/CommonConfigurationKeys.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/DNS.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/DNS.java index 308728e7beb..68ff6621265 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/DNS.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/DNS.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -26,13 +26,11 @@ import java.util.Enumeration; import java.util.LinkedHashSet; import java.util.Vector; - import javax.naming.NamingException; import javax.naming.directory.Attribute; import javax.naming.directory.Attributes; import javax.naming.directory.DirContext; import javax.naming.directory.InitialDirContext; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -194,7 +192,7 @@ public static String[] getIPs(String strInterface, allAddrs.removeAll(getSubinterfaceInetAddrs(netIf)); } - String ips[] = new String[allAddrs.size()]; + String[] ips = new String[allAddrs.size()]; int i = 0; for (InetAddress addr : allAddrs) { ips[i++] = addr.getHostAddress(); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/DNSToSwitchMapping.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/DNSToSwitchMapping.java index bdf9ce58d79..0083f8d63a7 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/DNSToSwitchMapping.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/DNSToSwitchMapping.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,8 +18,8 @@ package org.apache.bookkeeper.net; import com.google.common.annotations.Beta; - import java.util.List; +import org.apache.bookkeeper.proto.BookieAddressResolver; /** * An interface that must be implemented to allow pluggable @@ -67,4 +67,11 @@ public interface DNSToSwitchMapping { default boolean useHostName() { return true; } + + /** + * Receives the current BookieAddressResolver. + * @param bookieAddressResolver + */ + default void setBookieAddressResolver(BookieAddressResolver bookieAddressResolver) { + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/NetUtils.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/NetUtils.java index 97762930676..64e3d810d09 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/NetUtils.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/NetUtils.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -27,14 +27,10 @@ import java.util.Collection; import java.util.List; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - /** * Network Utilities. */ public class NetUtils { - private static final Logger logger = LoggerFactory.getLogger(NetUtils.class); /** * Given a string representation of a host, return its ip address @@ -68,13 +64,20 @@ public static List normalizeHostNames(Collection names) { return hostNames; } - public static String resolveNetworkLocation(DNSToSwitchMapping dnsResolver, InetSocketAddress addr) { + public static String resolveNetworkLocation(DNSToSwitchMapping dnsResolver, + BookieSocketAddress addr) { List names = new ArrayList(1); + InetSocketAddress inetSocketAddress = addr.getSocketAddress(); if (dnsResolver.useHostName()) { names.add(addr.getHostName()); } else { - names.add(addr.getAddress().getHostAddress()); + InetAddress inetAddress = inetSocketAddress.getAddress(); + if (null == inetAddress) { + names.add(addr.getHostName()); + } else { + names.add(inetAddress.getHostAddress()); + } } // resolve network addresses diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/NetworkTopology.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/NetworkTopology.java index a6bcf77a7c8..bb686dd9e95 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/NetworkTopology.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/NetworkTopology.java @@ -17,6 +17,7 @@ */ package org.apache.bookkeeper.net; +import java.util.Collection; import java.util.Set; /** @@ -26,6 +27,9 @@ public interface NetworkTopology { String DEFAULT_REGION = "/default-region"; String DEFAULT_RACK = "/default-rack"; + String DEFAULT_ZONE = "/default-zone"; + String DEFAULT_UPGRADEDOMAIN = "/default-upgradedomain"; + String DEFAULT_ZONE_AND_UPGRADEDOMAIN = DEFAULT_ZONE + DEFAULT_UPGRADEDOMAIN; String DEFAULT_REGION_AND_RACK = DEFAULT_REGION + DEFAULT_RACK; /** @@ -37,7 +41,7 @@ public interface NetworkTopology { void add(Node node); /** - * Remove a node from nework topology. + * Remove a node from network topology. * * @param node * remove the node from network topology @@ -76,4 +80,14 @@ public interface NetworkTopology { */ Set getLeaves(String loc); + /** + * Return the number of leaves in scope but not in excludedNodes. + * + *

          If scope starts with ~, return the number of nodes that are not + * in scope and excludedNodes; + * @param scope a path string that may start with ~ + * @param excludedNodes a list of nodes + * @return number of available nodes + */ + int countNumOfAvailableNodes(String scope, Collection excludedNodes); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/NetworkTopologyImpl.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/NetworkTopologyImpl.java index d6756f8cc9d..82890dedc18 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/NetworkTopologyImpl.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/NetworkTopologyImpl.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,6 +17,7 @@ */ package org.apache.bookkeeper.net; +import com.google.common.base.Strings; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -26,7 +27,6 @@ import java.util.Set; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -45,6 +45,7 @@ public class NetworkTopologyImpl implements NetworkTopology { public static final int DEFAULT_HOST_LEVEL = 2; public static final Logger LOG = LoggerFactory.getLogger(NetworkTopologyImpl.class); public static final String NODE_SEPARATOR = ","; + public static final String INVERSE = "~"; /** * A marker for an InvalidTopology Exception. @@ -109,11 +110,7 @@ boolean isRack() { } Node firstChild = children.get(0); - if (firstChild instanceof InnerNode) { - return false; - } - - return true; + return !(firstChild instanceof InnerNode); } /** @@ -123,9 +120,10 @@ boolean isRack() { * @return true if this node is an ancestor of n */ boolean isAncestor(Node n) { - return getPath(this).equals(NodeBase.PATH_SEPARATOR_STR) + return !Strings.isNullOrEmpty(n.getNetworkLocation()) + && (getPath(this).equals(NodeBase.PATH_SEPARATOR_STR) || (n.getNetworkLocation() + NodeBase.PATH_SEPARATOR_STR).startsWith(getPath(this) - + NodeBase.PATH_SEPARATOR_STR); + + NodeBase.PATH_SEPARATOR_STR)); } /** @@ -164,7 +162,7 @@ private String getNextAncestorName(Node n) { boolean add(Node n) { if (!isAncestor(n)) { throw new IllegalArgumentException(n.getName() + ", which is located at " + n.getNetworkLocation() - + ", is not a decendent of " + getPath(this)); + + ", is not a descendent of " + getPath(this)); } if (isParent(n)) { // this node is the parent of n; add n directly @@ -403,6 +401,7 @@ public NetworkTopologyImpl() { * @exception IllegalArgumentException if add a node to a leave or node to be added is not a leaf */ + @Override public void add(Node node) { if (node == null) { return; @@ -422,7 +421,7 @@ public void add(Node node) { Node rack = getNodeForNetworkLocation(node); if (rack != null && !(rack instanceof InnerNode)) { LOG.error("Unexpected data node {} at an illegal network location", node); - throw new IllegalArgumentException("Unexpected data node " + node.toString() + throw new IllegalArgumentException("Unexpected data node " + node + " at an illegal network location"); } if (clusterMap.add(node)) { @@ -437,7 +436,7 @@ public void add(Node node) { } } if (LOG.isDebugEnabled()) { - LOG.debug("NetworkTopology became:\n" + this.toString()); + LOG.debug("NetworkTopology became:\n" + this); } } finally { netlock.writeLock().unlock(); @@ -506,9 +505,12 @@ public void remove(Node node) { if (rack == null) { numOfRacks--; } + if (clusterMap.numOfLeaves == 0) { + depthOfAllLeaves = -1; + } } if (LOG.isDebugEnabled()) { - LOG.debug("NetworkTopology became:\n" + this.toString()); + LOG.debug("NetworkTopology became:\n" + this); } } finally { netlock.writeLock().unlock(); @@ -710,7 +712,7 @@ protected boolean isSameParents(Node node1, Node node2) { public Node chooseRandom(String scope) { netlock.readLock().lock(); try { - if (scope.startsWith("~")) { + if (scope.startsWith(INVERSE)) { return chooseRandom(NodeBase.ROOT, scope.substring(1)); } else { return chooseRandom(scope, null); @@ -757,6 +759,10 @@ private Node chooseRandom(String scope, String excludedScope) { private Set doGetLeaves(String scope) { Node node = getNode(scope); Set leafNodes = new HashSet(); + if (node == null) { + return leafNodes; + } + if (!(node instanceof InnerNode)) { leafNodes.add(node); } else { @@ -772,7 +778,7 @@ private Set doGetLeaves(String scope) { public Set getLeaves(String scope) { netlock.readLock().lock(); try { - if (scope.startsWith("~")) { + if (scope.startsWith(INVERSE)) { Set allNodes = doGetLeaves(NodeBase.ROOT); String[] excludeScopes = scope.substring(1).split(NODE_SEPARATOR); Set excludeNodes = new HashSet(); @@ -789,18 +795,10 @@ public Set getLeaves(String scope) { } } - /** - * Return the number of leaves in scope but not in excludedNodes. - * - *

          If scope starts with ~, return the number of nodes that are not - * in scope and excludedNodes; - * @param scope a path string that may start with ~ - * @param excludedNodes a list of nodes - * @return number of available nodes - */ + @Override public int countNumOfAvailableNodes(String scope, Collection excludedNodes) { boolean isExcluded = false; - if (scope.startsWith("~")) { + if (scope.startsWith(INVERSE)) { isExcluded = true; scope = scope.substring(1); } @@ -815,7 +813,7 @@ public int countNumOfAvailableNodes(String scope, Collection excludedNodes } } Node n = getNode(scope); - int scopeNodeCount = 1; + int scopeNodeCount = 0; if (n instanceof InnerNode) { scopeNodeCount = ((InnerNode) n).getNumOfLeaves(); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/Node.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/Node.java index fe2d9ce199f..e160ef8fb09 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/Node.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/Node.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,7 +18,6 @@ package org.apache.bookkeeper.net; import com.google.common.annotations.Beta; - /** The interface defines a node in a network topology. * A node may be a leave representing a data node or an inner * node representing a datacenter or rack. diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/NodeBase.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/NodeBase.java index 3373b2e02c9..8e0233ad163 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/NodeBase.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/NodeBase.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -48,7 +48,7 @@ public NodeBase() { /** * Construct a node from its path. * @param path - * a concatenation of this node's location, the path seperator, and its name + * a concatenation of this node's location, the path separator, and its name */ public NodeBase(String path) { path = normalize(path); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/ScriptBasedMapping.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/ScriptBasedMapping.java index 8c8350c3fc7..c4787119af4 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/ScriptBasedMapping.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/ScriptBasedMapping.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -22,11 +22,11 @@ import java.util.ArrayList; import java.util.List; import java.util.StringTokenizer; - import org.apache.bookkeeper.util.Shell.ShellCommandExecutor; import org.apache.commons.configuration.Configuration; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.apache.commons.lang.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * This class implements the {@link DNSToSwitchMapping} interface using a @@ -129,25 +129,54 @@ public void setConf(Configuration conf) { private static final class RawScriptBasedMapping extends AbstractDNSToSwitchMapping { private String scriptName; private int maxArgs; //max hostnames per call of the script - private static final Log LOG = LogFactory.getLog(ScriptBasedMapping.class); + private static final Logger LOG = LoggerFactory.getLogger(RawScriptBasedMapping.class); - /** - * Set the configuration and extract the configuration parameters of interest. - * @param conf the new configuration + /* + * extract 'scriptName' and 'maxArgs' parameters from the conf and throw + * RuntimeException if 'scriptName' is null. Also for sanity check + * purpose try executing the script with no arguments. Here it is + * expected that running script with no arguments would do sanity check + * of the script and the env, and return successfully if script and env. + * are valid. If sanity check of the script with no argument fails then + * throw RuntimeException. + * */ @Override - public void setConf(Configuration conf) { - super.setConf(conf); + protected void validateConf() { + Configuration conf = getConf(); if (conf != null) { - scriptName = conf.getString(SCRIPT_FILENAME_KEY); - maxArgs = conf.getInt(SCRIPT_ARG_COUNT_KEY, DEFAULT_ARG_COUNT); + String scriptNameConfValue = conf.getString(SCRIPT_FILENAME_KEY); + if (StringUtils.isNotBlank(scriptNameConfValue)) { + scriptName = scriptNameConfValue; + maxArgs = conf.getInt(SCRIPT_ARG_COUNT_KEY, DEFAULT_ARG_COUNT); + } else { + scriptName = null; + maxArgs = 0; + } } else { scriptName = null; maxArgs = 0; } if (null == scriptName) { - throw new RuntimeException("No network topology script is found when using script based DNS resolver."); + throw new RuntimeException("No network topology script is found when using script" + + " based DNS resolver."); + } else { + File dir = null; + String userDir; + if ((userDir = System.getProperty("user.dir")) != null) { + dir = new File(userDir); + } + String[] execString = { this.scriptName }; + ShellCommandExecutor s = new ShellCommandExecutor(execString, dir); + try { + s.execute(); + } catch (Exception e) { + LOG.error("Conf validation failed. Got exception for sanity check of script: " + this.scriptName, + e); + throw new RuntimeException( + "Conf validation failed. Got exception for sanity check of script: " + this.scriptName, e); + } } } @@ -180,8 +209,8 @@ public List resolve(List names) { if (m.size() != names.size()) { // invalid number of entries returned by the script - LOG.error("Script " + scriptName + " returned " + Integer.toString(m.size()) + " values when " - + Integer.toString(names.size()) + " were expected."); + LOG.error("Script " + scriptName + " returned " + m.size() + " values when " + + names.size() + " were expected."); return null; } } else { @@ -209,8 +238,8 @@ private String runResolveCommand(List args) { StringBuilder allOutput = new StringBuilder(); int numProcessed = 0; if (maxArgs < MIN_ALLOWABLE_ARGS) { - LOG.warn("Invalid value " + Integer.toString(maxArgs) + " for " + SCRIPT_ARG_COUNT_KEY - + "; must be >= " + Integer.toString(MIN_ALLOWABLE_ARGS)); + LOG.warn("Invalid value " + maxArgs + " for " + SCRIPT_ARG_COUNT_KEY + + "; must be >= " + MIN_ALLOWABLE_ARGS); return null; } @@ -233,7 +262,7 @@ private String runResolveCommand(List args) { s.execute(); allOutput.append(s.getOutput()).append(" "); } catch (Exception e) { - LOG.warn("Exception running " + s, e); + LOG.warn("Exception running: {} Exception message: {}", s, e.getMessage()); return null; } loopCount++; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/StabilizeNetworkTopology.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/StabilizeNetworkTopology.java index 0bac3bfd4a9..1319a8a802b 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/StabilizeNetworkTopology.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/net/StabilizeNetworkTopology.java @@ -20,13 +20,11 @@ import io.netty.util.HashedWheelTimer; import io.netty.util.Timeout; import io.netty.util.TimerTask; - +import java.util.Collection; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.TimeUnit; - -import org.apache.bookkeeper.util.MathUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -42,7 +40,7 @@ static class NodeStatus { boolean tentativeToRemove; NodeStatus() { - this.lastPresentTime = MathUtils.now(); + this.lastPresentTime = System.currentTimeMillis(); } synchronized boolean isTentativeToRemove() { @@ -52,7 +50,7 @@ synchronized boolean isTentativeToRemove() { synchronized NodeStatus updateStatus(boolean tentativeToRemove) { this.tentativeToRemove = tentativeToRemove; if (!this.tentativeToRemove) { - this.lastPresentTime = MathUtils.now(); + this.lastPresentTime = System.currentTimeMillis(); } return this; } @@ -88,7 +86,7 @@ public void run(Timeout timeout) throws Exception { // no status of this node, remove this node from topology impl.remove(node); } else if (status.isTentativeToRemove()) { - long millisSinceLastSeen = MathUtils.now() - status.getLastPresentTime(); + long millisSinceLastSeen = System.currentTimeMillis() - status.getLastPresentTime(); if (millisSinceLastSeen >= stabilizePeriodMillis) { logger.info("Node {} (seen @ {}) becomes stale for {} ms, remove it from the topology.", node, status.getLastPresentTime(), millisSinceLastSeen); @@ -152,4 +150,9 @@ public int getNumOfRacks() { public Set getLeaves(String loc) { return impl.getLeaves(loc); } + + @Override + public int countNumOfAvailableNodes(String scope, Collection excludedNodes) { + return impl.countNumOfAvailableNodes(scope, excludedNodes); + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/processor/RequestProcessor.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/processor/RequestProcessor.java index 8b328ef7db0..5a4238e64d6 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/processor/RequestProcessor.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/processor/RequestProcessor.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,7 +20,7 @@ */ package org.apache.bookkeeper.processor; -import io.netty.channel.Channel; +import org.apache.bookkeeper.proto.BookieRequestHandler; /** * A request processor that is used for processing requests at bookie side. @@ -30,6 +30,7 @@ public interface RequestProcessor extends AutoCloseable { /** * Close the request processor. */ + @Override void close(); /** @@ -40,5 +41,5 @@ public interface RequestProcessor extends AutoCloseable { * @param channel * channel received the given request r */ - void processRequest(Object r, Channel channel); + void processRequest(Object r, BookieRequestHandler channel); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/AuthHandler.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/AuthHandler.java index 1b1f60fc592..a11fad102dd 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/AuthHandler.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/AuthHandler.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -23,25 +23,25 @@ import static org.apache.bookkeeper.auth.AuthProviderFactoryFactory.AUTHENTICATION_DISABLED_PLUGIN_NAME; import com.google.protobuf.ByteString; - +import io.netty.buffer.ByteBuf; import io.netty.channel.Channel; import io.netty.channel.ChannelDuplexHandler; import io.netty.channel.ChannelHandlerContext; import io.netty.channel.ChannelInboundHandlerAdapter; import io.netty.channel.ChannelPromise; - import java.io.IOException; import java.net.SocketAddress; import java.util.Queue; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.atomic.AtomicLong; - import org.apache.bookkeeper.auth.AuthCallbacks; import org.apache.bookkeeper.auth.AuthToken; import org.apache.bookkeeper.auth.BookieAuthProvider; import org.apache.bookkeeper.auth.ClientAuthProvider; import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.proto.BookkeeperProtocol.AuthMessage; +import org.apache.bookkeeper.util.ByteBufList; +import org.apache.bookkeeper.util.NettyChannelUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -103,15 +103,15 @@ public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception } else if (msg instanceof BookieProtocol.Request) { BookieProtocol.Request req = (BookieProtocol.Request) msg; if (req.getOpCode() == BookieProtocol.ADDENTRY) { - ctx.channel().writeAndFlush( - BookieProtocol.AddResponse.create( - req.getProtocolVersion(), BookieProtocol.EUA, - req.getLedgerId(), req.getEntryId())); + final BookieProtocol.AddResponse response = BookieProtocol.AddResponse.create( + req.getProtocolVersion(), BookieProtocol.EUA, + req.getLedgerId(), req.getEntryId()); + NettyChannelUtil.writeAndFlushWithVoidPromise(ctx.channel(), response); } else if (req.getOpCode() == BookieProtocol.READENTRY) { - ctx.channel().writeAndFlush( - new BookieProtocol.ReadResponse( - req.getProtocolVersion(), BookieProtocol.EUA, - req.getLedgerId(), req.getEntryId())); + final BookieProtocol.ReadResponse response = new BookieProtocol.ReadResponse( + req.getProtocolVersion(), BookieProtocol.EUA, + req.getLedgerId(), req.getEntryId()); + NettyChannelUtil.writeAndFlushWithVoidPromise(ctx.channel(), response); } else { ctx.channel().close(); } @@ -134,7 +134,7 @@ && checkAuthPlugin(req.getAuthRequest(), ctx.channel())) { .setHeader(req.getHeader()) .setStatus(BookkeeperProtocol.StatusCode.EUA); - ctx.channel().writeAndFlush(builder.build()); + NettyChannelUtil.writeAndFlushWithVoidPromise(ctx.channel(), builder.build()); } } else { // close the channel, junk coming over it @@ -144,13 +144,17 @@ && checkAuthPlugin(req.getAuthRequest(), ctx.channel())) { private boolean checkAuthPlugin(AuthMessage am, final Channel src) { if (!am.hasAuthPluginName() || !am.getAuthPluginName().equals(authProviderFactory.getPluginName())) { - LOG.error("Received message from incompatible auth plugin. Local = {}," + " Remote = {}, Channel = {}", - authProviderFactory.getPluginName(), am.getAuthPluginName()); + LOG.error("Received message from incompatible auth plugin. Local = {}, Remote = {}, Channel = {}", + authProviderFactory.getPluginName(), am.getAuthPluginName(), src); return false; } return true; } + public boolean isAuthenticated() { + return authenticated; + } + static class AuthResponseCallbackLegacy implements AuthCallbacks.GenericCallback { final BookieProtocol.AuthRequest req; final Channel channel; @@ -160,6 +164,7 @@ static class AuthResponseCallbackLegacy implements AuthCallbacks.GenericCallback this.channel = channel; } + @Override public void operationComplete(int rc, AuthToken newam) { if (rc != BKException.Code.OK) { LOG.error("Error processing auth message, closing connection"); @@ -168,7 +173,9 @@ public void operationComplete(int rc, AuthToken newam) { } AuthMessage message = AuthMessage.newBuilder().setAuthPluginName(req.authMessage.getAuthPluginName()) .setPayload(ByteString.copyFrom(newam.getData())).build(); - channel.writeAndFlush(new BookieProtocol.AuthResponse(req.getProtocolVersion(), message)); + final BookieProtocol.AuthResponse response = + new BookieProtocol.AuthResponse(req.getProtocolVersion(), message); + NettyChannelUtil.writeAndFlushWithVoidPromise(channel, response); } } @@ -183,6 +190,7 @@ static class AuthResponseCallback implements AuthCallbacks.GenericCallback waitingForAuth = new ConcurrentLinkedQueue<>(); final ClientConnectionPeer connectionPeer; + private final boolean isUsingV2Protocol; + public ClientAuthProvider getAuthProvider() { return authProvider; } ClientSideHandler(ClientAuthProvider.Factory authProviderFactory, AtomicLong transactionIdGenerator, - ClientConnectionPeer connectionPeer) { + ClientConnectionPeer connectionPeer, boolean isUsingV2Protocol) { this.authProviderFactory = authProviderFactory; this.transactionIdGenerator = transactionIdGenerator; this.connectionPeer = connectionPeer; authProvider = null; + this.isUsingV2Protocol = isUsingV2Protocol; } @Override @@ -279,7 +293,7 @@ public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception if (AUTHENTICATION_DISABLED_PLUGIN_NAME.equals(am.getAuthPluginName())){ SocketAddress remote = ctx.channel().remoteAddress(); LOG.info("Authentication is not enabled." - + "Considering this client {0} authenticated", remote); + + "Considering this client {} authenticated", remote); AuthHandshakeCompleteCallback cb = new AuthHandshakeCompleteCallback(ctx); cb.operationComplete(BKException.Code.OK, null); return; @@ -296,6 +310,33 @@ public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception break; } } + } else if (msg instanceof BookieProtocol.Response) { + BookieProtocol.Response resp = (BookieProtocol.Response) msg; + switch (resp.opCode) { + case BookieProtocol.AUTH: + if (resp.errorCode != BookieProtocol.EOK) { + authenticationError(ctx, resp.errorCode); + } else { + BookkeeperProtocol.AuthMessage am = ((BookieProtocol.AuthResponse) resp).authMessage; + if (AUTHENTICATION_DISABLED_PLUGIN_NAME.equals(am.getAuthPluginName())) { + SocketAddress remote = ctx.channel().remoteAddress(); + LOG.info("Authentication is not enabled." + + "Considering this client {} authenticated", remote); + AuthHandshakeCompleteCallback cb = new AuthHandshakeCompleteCallback(ctx); + cb.operationComplete(BKException.Code.OK, null); + return; + } + byte[] payload = am.getPayload().toByteArray(); + authProvider.process(AuthToken.wrap(payload), new AuthRequestCallback(ctx, + authProviderFactory.getPluginName())); + } + break; + default: + LOG.warn("dropping received message {} from bookie {}", msg, ctx.channel()); + // else just drop the message, we're not authenticated so nothing should be coming + // through + break; + } } } @@ -314,23 +355,35 @@ public void write(ChannelHandlerContext ctx, Object msg, ChannelPromise promise) super.write(ctx, msg, promise); super.flush(ctx); } else { - waitingForAuth.add(msg); + addMsgAndPromiseToQueue(msg, promise); } } else if (msg instanceof BookieProtocol.Request) { // let auth messages through, queue the rest BookieProtocol.Request req = (BookieProtocol.Request) msg; - if (BookkeeperProtocol.OperationType.AUTH.getNumber() == req.getOpCode()) { + if (BookieProtocol.AUTH == req.getOpCode()) { super.write(ctx, msg, promise); super.flush(ctx); } else { - waitingForAuth.add(msg); + addMsgAndPromiseToQueue(msg, promise); } + } else if (msg instanceof ByteBuf || msg instanceof ByteBufList) { + addMsgAndPromiseToQueue(msg, promise); } else { - LOG.info("dropping write of message {}", msg); + LOG.info("[{}] dropping write of message {}", ctx.channel(), msg); } } } + // Add the message and the associated promise to the queue. + // The promise is added to the same queue as the message without an additional wrapper object so + // that object allocations can be avoided. A similar solution is used in Netty codebase. + private void addMsgAndPromiseToQueue(Object msg, ChannelPromise promise) { + waitingForAuth.add(msg); + if (promise != null && !promise.isVoid()) { + waitingForAuth.add(promise); + } + } + long newTxnId() { return transactionIdGenerator.incrementAndGet(); } @@ -351,21 +404,30 @@ class AuthRequestCallback implements AuthCallbacks.GenericCallback { this.pluginName = pluginName; } + @Override public void operationComplete(int rc, AuthToken newam) { if (rc != BKException.Code.OK) { authenticationError(ctx, rc); return; } + AuthMessage message = AuthMessage.newBuilder().setAuthPluginName(pluginName) .setPayload(ByteString.copyFrom(newam.getData())).build(); - BookkeeperProtocol.BKPacketHeader header = BookkeeperProtocol.BKPacketHeader.newBuilder() - .setVersion(BookkeeperProtocol.ProtocolVersion.VERSION_THREE) - .setOperation(BookkeeperProtocol.OperationType.AUTH).setTxnId(newTxnId()).build(); - BookkeeperProtocol.Request.Builder builder = BookkeeperProtocol.Request.newBuilder().setHeader(header) - .setAuthRequest(message); - - channel.writeAndFlush(builder.build()); + if (isUsingV2Protocol) { + final BookieProtocol.AuthRequest msg = + new BookieProtocol.AuthRequest(BookieProtocol.CURRENT_PROTOCOL_VERSION, message); + NettyChannelUtil.writeAndFlushWithVoidPromise(channel, msg); + } else { + // V3 protocol + BookkeeperProtocol.BKPacketHeader header = BookkeeperProtocol.BKPacketHeader.newBuilder() + .setVersion(BookkeeperProtocol.ProtocolVersion.VERSION_THREE) + .setOperation(BookkeeperProtocol.OperationType.AUTH).setTxnId(newTxnId()).build(); + BookkeeperProtocol.Request.Builder builder = BookkeeperProtocol.Request.newBuilder() + .setHeader(header) + .setAuthRequest(message); + NettyChannelUtil.writeAndFlushWithVoidPromise(channel, builder.build()); + } } } @@ -381,10 +443,19 @@ public void operationComplete(int rc, Void v) { if (rc == BKException.Code.OK) { synchronized (this) { authenticated = true; - Object msg = waitingForAuth.poll(); - while (msg != null) { - ctx.writeAndFlush(msg); - msg = waitingForAuth.poll(); + while (true) { + Object msg = waitingForAuth.poll(); + if (msg == null) { + break; + } + ChannelPromise promise; + // check if the message has an associated promise as the next element in the queue + if (waitingForAuth.peek() instanceof ChannelPromise) { + promise = (ChannelPromise) waitingForAuth.poll(); + } else { + promise = ctx.voidPromise(); + } + ctx.writeAndFlush(msg, promise); } } } else { diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BKStats.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BKStats.java index 144767d7f35..ec7a3ac55ad 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BKStats.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BKStats.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -22,7 +22,6 @@ package org.apache.bookkeeper.proto; import java.beans.ConstructorProperties; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BatchedReadEntryProcessor.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BatchedReadEntryProcessor.java new file mode 100644 index 00000000000..6db3e143519 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BatchedReadEntryProcessor.java @@ -0,0 +1,116 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.proto; + +import io.netty.buffer.ByteBuf; +import io.netty.util.Recycler; +import io.netty.util.ReferenceCounted; +import java.util.concurrent.ExecutorService; +import org.apache.bookkeeper.proto.BookieProtocol.BatchedReadRequest; +import org.apache.bookkeeper.util.ByteBufList; + +public class BatchedReadEntryProcessor extends ReadEntryProcessor { + + private long maxBatchReadSize; + + public static BatchedReadEntryProcessor create(BatchedReadRequest request, + BookieRequestHandler requestHandler, + BookieRequestProcessor requestProcessor, + ExecutorService fenceThreadPool, + boolean throttleReadResponses, + long maxBatchReadSize) { + BatchedReadEntryProcessor rep = RECYCLER.get(); + rep.init(request, requestHandler, requestProcessor); + rep.fenceThreadPool = fenceThreadPool; + rep.throttleReadResponses = throttleReadResponses; + rep.maxBatchReadSize = maxBatchReadSize; + requestProcessor.onReadRequestStart(requestHandler.ctx().channel()); + return rep; + } + + @Override + protected ReferenceCounted readData() throws Exception { + ByteBufList data = null; + BatchedReadRequest batchRequest = (BatchedReadRequest) request; + int maxCount = batchRequest.getMaxCount(); + if (maxCount <= 0) { + maxCount = Integer.MAX_VALUE; + } + long maxSize = Math.min(batchRequest.getMaxSize(), maxBatchReadSize); + //See BookieProtoEncoding.ResponseEnDeCoderPreV3#encode on BatchedReadResponse case. + long frameSize = 24 + 8 + 4; + for (int i = 0; i < maxCount; i++) { + try { + ByteBuf entry = requestProcessor.getBookie().readEntry(request.getLedgerId(), request.getEntryId() + i); + frameSize += entry.readableBytes() + 4; + if (data == null) { + data = ByteBufList.get(entry); + } else { + if (frameSize > maxSize) { + entry.release(); + break; + } + data.add(entry); + } + } catch (Throwable e) { + if (data == null) { + throw e; + } + break; + } + } + return data; + } + + @Override + protected BookieProtocol.Response buildReadResponse(ReferenceCounted data) { + return ResponseBuilder.buildBatchedReadResponse((ByteBufList) data, (BatchedReadRequest) request); + } + + @Override + public String toString() { + BatchedReadRequest br = (BatchedReadRequest) request; + return String.format("BatchedReadEntry(%d, %d %d, %d)", br.getLedgerId(), br.getEntryId(), br.getMaxCount(), + br.getMaxSize()); + } + + protected void recycle() { + request.recycle(); + super.reset(); + if (this.recyclerHandle != null) { + this.recyclerHandle.recycle(this); + } + } + + private final Recycler.Handle recyclerHandle; + + private BatchedReadEntryProcessor(Recycler.Handle recyclerHandle) { + this.recyclerHandle = recyclerHandle; + } + + private static final Recycler RECYCLER = new Recycler() { + @Override + protected BatchedReadEntryProcessor newObject(Recycler.Handle handle) { + return new BatchedReadEntryProcessor(handle); + } + }; + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieAddressResolver.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieAddressResolver.java new file mode 100644 index 00000000000..f70740b1e68 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieAddressResolver.java @@ -0,0 +1,53 @@ +/* + * Copyright 2020 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.proto; + +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieSocketAddress; + +/** + * Maps a logical BookieId to a ResolvedBookieSocketAddress + that it to a network address. + */ +public interface BookieAddressResolver { + + /** + * Maps a logical address to a network address. + * @param bookieId + * @return a mapped address. + * @throws BookieIdNotResolvedException if it is not possible to resolve the address of the BookieId + */ + BookieSocketAddress resolve(BookieId bookieId) throws BookieIdNotResolvedException; + + /** + * This error happens when there is not enough information to resolve a BookieId + * to a BookieSocketAddress, this can happen when the Bookie is down + * and it is not publishing its EndpointInfo. + */ + class BookieIdNotResolvedException extends RuntimeException { + private final BookieId bookieId; + + public BookieIdNotResolvedException(BookieId bookieId, Throwable cause) { + super("Cannot resolve bookieId " + bookieId + ", bookie does not exist or it is not running", cause); + this.bookieId = bookieId; + } + + public BookieId getBookieId() { + return bookieId; + } + + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieClient.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieClient.java index 85a4ef9c164..5d20e1b22d1 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieClient.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieClient.java @@ -20,17 +20,20 @@ */ package org.apache.bookkeeper.proto; +import io.netty.util.ReferenceCounted; import java.util.EnumSet; import java.util.List; - +import java.util.concurrent.CompletableFuture; import org.apache.bookkeeper.client.api.WriteFlag; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.BatchedReadEntryCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ForceLedgerCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GetBookieInfoCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadEntryCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadLacCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteLacCallback; +import org.apache.bookkeeper.util.AvailabilityOfEntriesOfLedger; import org.apache.bookkeeper.util.ByteBufList; /** @@ -45,7 +48,7 @@ public interface BookieClient { * * @return the list of faulty bookies */ - List getFaultyBookies(); + List getFaultyBookies(); /** * Check whether the channel used to write to a bookie channel is writable. @@ -65,14 +68,14 @@ public interface BookieClient { * @param ledgerId the ledger we wish to send a request to * */ - boolean isWritable(BookieSocketAddress address, long ledgerId); + boolean isWritable(BookieId address, long ledgerId); /** * Get the number of outstanding requests on the channel used to connect * to a bookie at {@code address} for a ledger with {@code ledgerId}. * It is necessary to specify the ledgerId as there may be multiple * channels for a single bookie if pooling is in use. - * If the bookie is not {@link #isWritable(BookieSocketAddress,long) writable}, + * If the bookie is not {@link #isWritable(BookieId,long) writable}, * then the {@link #PENDINGREQ_NOTWRITABLE_MASK} will be logically or'd with * the returned value. * @@ -80,7 +83,7 @@ public interface BookieClient { * @param ledgerId the ledger whose channel we wish to query * @return the number of requests currently outstanding */ - long getNumPendingRequests(BookieSocketAddress address, long ledgerId); + long getNumPendingRequests(BookieId address, long ledgerId); /** * Send a force request to the server. When complete all entries which have @@ -92,7 +95,7 @@ public interface BookieClient { * @param cb the callback notified when the request completes * @param ctx a context object passed to the callback on completion */ - void forceLedger(BookieSocketAddress address, long ledgerId, + void forceLedger(BookieId address, long ledgerId, ForceLedgerCallback cb, Object ctx); /** @@ -104,7 +107,7 @@ void forceLedger(BookieSocketAddress address, long ledgerId, * @param cb the callback notified when the request completes * @param ctx a context object passed to the callback on completion */ - void readLac(BookieSocketAddress address, long ledgerId, ReadLacCallback cb, Object ctx); + void readLac(BookieId address, long ledgerId, ReadLacCallback cb, Object ctx); /** * Explicitly write the last add confirmed for ledger {@code ledgerId} to the bookie at @@ -118,7 +121,7 @@ void forceLedger(BookieSocketAddress address, long ledgerId, * @param cb the callback notified when the request completes * @param ctx a context object passed to the callback on completion */ - void writeLac(BookieSocketAddress address, long ledgerId, byte[] masterKey, + void writeLac(BookieId address, long ledgerId, byte[] masterKey, long lac, ByteBufList toSend, WriteLacCallback cb, Object ctx); /** @@ -133,28 +136,28 @@ void writeLac(BookieSocketAddress address, long ledgerId, byte[] masterKey, * @param options a bit mask of flags from BookieProtocol.FLAG_* * {@link org.apache.bookkeeper.proto.BookieProtocol} * @param allowFastFail fail the add immediately if the channel is non-writable - * {@link #isWritable(BookieSocketAddress,long)} + * {@link #isWritable(BookieId,long)} * @param writeFlags a set of write flags - * {@link org.apache.bookkeeper.client.api.WriteFlags} + * {@link org.apache.bookkeeper.client.api.WriteFlag} */ - void addEntry(BookieSocketAddress address, long ledgerId, byte[] masterKey, - long entryId, ByteBufList toSend, WriteCallback cb, Object ctx, + void addEntry(BookieId address, long ledgerId, byte[] masterKey, + long entryId, ReferenceCounted toSend, WriteCallback cb, Object ctx, int options, boolean allowFastFail, EnumSet writeFlags); /** * Read entry with a null masterkey, disallowing failfast. - * @see #readEntry(BookieSocketAddress,long,long,ReadEntryCallback,Object,int,byte[],boolean) + * @see #readEntry(BookieId,long,long,ReadEntryCallback,Object,int,byte[],boolean) */ - default void readEntry(BookieSocketAddress address, long ledgerId, long entryId, + default void readEntry(BookieId address, long ledgerId, long entryId, ReadEntryCallback cb, Object ctx, int flags) { readEntry(address, ledgerId, entryId, cb, ctx, flags, null); } /** * Read entry, disallowing failfast. - * @see #readEntry(BookieSocketAddress,long,long,ReadEntryCallback,Object,int,byte[],boolean) + * @see #readEntry(BookieId,long,long,ReadEntryCallback,Object,int,byte[],boolean) */ - default void readEntry(BookieSocketAddress address, long ledgerId, long entryId, + default void readEntry(BookieId address, long ledgerId, long entryId, ReadEntryCallback cb, Object ctx, int flags, byte[] masterKey) { readEntry(address, ledgerId, entryId, cb, ctx, flags, masterKey, false); } @@ -172,12 +175,53 @@ default void readEntry(BookieSocketAddress address, long ledgerId, long entryId, * @param masterKey the master key of the ledger being read from. This is only required * if the FLAG_DO_FENCING is specified. * @param allowFastFail fail the read immediately if the channel is non-writable - * {@link #isWritable(BookieSocketAddress,long)} + * {@link #isWritable(BookieId,long)} */ - void readEntry(BookieSocketAddress address, long ledgerId, long entryId, + void readEntry(BookieId address, long ledgerId, long entryId, ReadEntryCallback cb, Object ctx, int flags, byte[] masterKey, boolean allowFastFail); + /** + * Batch read entries with a null masterkey, disallowing failfast. + * @see #batchReadEntries(BookieId,long,long,int,long,BatchedReadEntryCallback,Object,int,byte[],boolean) + */ + default void batchReadEntries(BookieId address, long ledgerId, long startEntryId, + int maxCount, long maxSize, BatchedReadEntryCallback cb, Object ctx, + int flags) { + batchReadEntries(address, ledgerId, startEntryId, maxCount, maxSize, cb, ctx, flags, null); + } + + /** + * Batch read entries, disallowing failfast. + * @see #batchReadEntries(BookieId,long,long,int,long,BatchedReadEntryCallback,Object,int,byte[],boolean) + */ + default void batchReadEntries(BookieId address, long ledgerId, long startEntryId, + int maxCount, long maxSize, BatchedReadEntryCallback cb, Object ctx, + int flags, byte[] masterKey) { + batchReadEntries(address, ledgerId, startEntryId, maxCount, maxSize, cb, ctx, flags, masterKey, false); + } + + /** + * Batch read entries from bookie at address {@code address}. + * + * @param address address of the bookie to read from + * @param ledgerId id of the ledger the entry belongs to + * @param startEntryId id of the entry started + * @param maxCount the total entries count in this batch + * @param maxSize the total entries size in this batch + * @param cb the callback notified when the request completes + * @param ctx a context object passed to the callback on completion + * @param flags a bit mask of flags from BookieProtocol.FLAG_* + * {@link org.apache.bookkeeper.proto.BookieProtocol} + * @param masterKey the master key of the ledger being read from. This is only required + * if the FLAG_DO_FENCING is specified. + * @param allowFastFail fail the read immediately if the channel is non-writable + * {@link #isWritable(BookieId,long)} + */ + void batchReadEntries(BookieId address, long ledgerId, long startEntryId, + int maxCount, long maxSize, BatchedReadEntryCallback cb, Object ctx, + int flags, byte[] masterKey, boolean allowFastFail); + /** * Send a long poll request to bookie, waiting for the last add confirmed * to be updated. The client can also request that the full entry is returned @@ -192,7 +236,7 @@ void readEntry(BookieSocketAddress address, long ledgerId, long entryId, * @param cb the callback notified when the request completes * @param ctx a context object passed to the callback on completion */ - void readEntryWaitForLACUpdate(BookieSocketAddress address, + void readEntryWaitForLACUpdate(BookieId address, long ledgerId, long entryId, long previousLAC, @@ -212,9 +256,22 @@ void readEntryWaitForLACUpdate(BookieSocketAddress address, * * @see org.apache.bookkeeper.client.BookieInfoReader.BookieInfo */ - void getBookieInfo(BookieSocketAddress address, long requested, + void getBookieInfo(BookieId address, long requested, GetBookieInfoCallback cb, Object ctx); + /** + * Makes async request for getting list of entries of ledger from a bookie + * and returns Future for the result. + * + * @param address + * BookieId of the bookie + * @param ledgerId + * ledgerId + * @return returns Future + */ + CompletableFuture getListOfEntriesOfLedger(BookieId address, + long ledgerId); + /** * @return whether bookie client object has been closed */ diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieClientImpl.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieClientImpl.java index 50dd85fd83e..a12d9fd64d5 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieClientImpl.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieClientImpl.java @@ -20,23 +20,24 @@ */ package org.apache.bookkeeper.proto; -import static com.google.common.base.Charsets.UTF_8; -import static org.apache.bookkeeper.util.SafeRunnable.safeRun; +import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.collect.Lists; import com.google.protobuf.ExtensionRegistry; - import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; import io.netty.buffer.Unpooled; import io.netty.channel.EventLoopGroup; import io.netty.channel.nio.NioEventLoopGroup; import io.netty.util.Recycler; import io.netty.util.Recycler.Handle; +import io.netty.util.ReferenceCountUtil; +import io.netty.util.ReferenceCounted; import io.netty.util.concurrent.DefaultThreadFactory; - import java.io.IOException; import java.util.EnumSet; import java.util.List; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Executors; import java.util.concurrent.RejectedExecutionException; @@ -44,17 +45,18 @@ import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReentrantReadWriteLock; - import org.apache.bookkeeper.auth.AuthProviderFactoryFactory; import org.apache.bookkeeper.auth.ClientAuthProvider; import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.client.BookieInfoReader.BookieInfo; import org.apache.bookkeeper.client.api.WriteFlag; import org.apache.bookkeeper.common.util.OrderedExecutor; -import org.apache.bookkeeper.common.util.SafeRunnable; import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.BatchedReadEntryCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ForceLedgerCallback; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.FutureGetListOfEntriesOfLedger; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GetBookieInfoCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadEntryCallback; @@ -65,6 +67,7 @@ import org.apache.bookkeeper.stats.StatsLogger; import org.apache.bookkeeper.tls.SecurityException; import org.apache.bookkeeper.tls.SecurityHandlerFactory; +import org.apache.bookkeeper.util.AvailabilityOfEntriesOfLedger; import org.apache.bookkeeper.util.ByteBufList; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -74,36 +77,45 @@ * */ public class BookieClientImpl implements BookieClient, PerChannelBookieClientFactory { - static final Logger LOG = LoggerFactory.getLogger(BookieClient.class); + static final Logger LOG = LoggerFactory.getLogger(BookieClientImpl.class); - OrderedExecutor executor; - ScheduledExecutorService scheduler; - ScheduledFuture timeoutFuture; + private final OrderedExecutor executor; + private final ScheduledExecutorService scheduler; + private final ScheduledFuture timeoutFuture; - EventLoopGroup eventLoopGroup; - final ConcurrentHashMap channels = - new ConcurrentHashMap(); + private final EventLoopGroup eventLoopGroup; + private final ByteBufAllocator allocator; + final ConcurrentHashMap channels = + new ConcurrentHashMap(); private final ClientAuthProvider.Factory authProviderFactory; private final ExtensionRegistry registry; private final ClientConfiguration conf; + private final ClientConfiguration v3Conf; + private final boolean useV3Enforced; private volatile boolean closed; private final ReentrantReadWriteLock closeLock; private final StatsLogger statsLogger; private final int numConnectionsPerBookie; + private final BookieAddressResolver bookieAddressResolver; private final long bookieErrorThresholdPerInterval; public BookieClientImpl(ClientConfiguration conf, EventLoopGroup eventLoopGroup, + ByteBufAllocator allocator, OrderedExecutor executor, ScheduledExecutorService scheduler, - StatsLogger statsLogger) throws IOException { + StatsLogger statsLogger, BookieAddressResolver bookieAddressResolver) throws IOException { this.conf = conf; + this.v3Conf = new ClientConfiguration(conf); + this.v3Conf.setUseV2WireProtocol(false); + this.useV3Enforced = conf.getUseV2WireProtocol(); this.eventLoopGroup = eventLoopGroup; + this.allocator = allocator; this.executor = executor; this.closed = false; this.closeLock = new ReentrantReadWriteLock(); - + this.bookieAddressResolver = bookieAddressResolver; this.registry = ExtensionRegistry.newInstance(); this.authProviderFactory = AuthProviderFactoryFactory.newClientAuthProviderFactory(conf); @@ -113,13 +125,13 @@ public BookieClientImpl(ClientConfiguration conf, EventLoopGroup eventLoopGroup, this.scheduler = scheduler; if (conf.getAddEntryTimeout() > 0 || conf.getReadEntryTimeout() > 0) { - SafeRunnable monitor = safeRun(() -> { - monitorPendingOperations(); - }); - this.timeoutFuture = this.scheduler.scheduleAtFixedRate(monitor, - conf.getTimeoutMonitorIntervalSec(), - conf.getTimeoutMonitorIntervalSec(), - TimeUnit.SECONDS); + this.timeoutFuture = this.scheduler.scheduleAtFixedRate( + () -> monitorPendingOperations(), + conf.getTimeoutMonitorIntervalSec(), + conf.getTimeoutMonitorIntervalSec(), + TimeUnit.SECONDS); + } else { + this.timeoutFuture = null; } } @@ -136,8 +148,8 @@ private int getRc(int rc) { } @Override - public List getFaultyBookies() { - List faultyBookies = Lists.newArrayList(); + public List getFaultyBookies() { + List faultyBookies = Lists.newArrayList(); for (PerChannelBookieClientPool channelPool : channels.values()) { if (channelPool instanceof DefaultPerChannelBookieClientPool) { DefaultPerChannelBookieClientPool pool = (DefaultPerChannelBookieClientPool) channelPool; @@ -150,14 +162,14 @@ public List getFaultyBookies() { } @Override - public boolean isWritable(BookieSocketAddress address, long key) { + public boolean isWritable(BookieId address, long key) { final PerChannelBookieClientPool pcbcPool = lookupClient(address); // if null, let the write initiate connect of fail with whatever error it produces return pcbcPool == null || pcbcPool.isWritable(key); } @Override - public long getNumPendingRequests(BookieSocketAddress address, long ledgerId) { + public long getNumPendingRequests(BookieId address, long ledgerId) { PerChannelBookieClientPool pcbcPool = lookupClient(address); if (pcbcPool == null) { return 0; @@ -169,13 +181,22 @@ public long getNumPendingRequests(BookieSocketAddress address, long ledgerId) { } @Override - public PerChannelBookieClient create(BookieSocketAddress address, PerChannelBookieClientPool pcbcPool, - SecurityHandlerFactory shFactory) throws SecurityException { - return new PerChannelBookieClient(conf, executor, eventLoopGroup, address, statsLogger, - authProviderFactory, registry, pcbcPool, shFactory); + public PerChannelBookieClient create(BookieId address, PerChannelBookieClientPool pcbcPool, + SecurityHandlerFactory shFactory, boolean forceUseV3) throws SecurityException { + StatsLogger statsLoggerForPCBC = statsLogger; + if (conf.getLimitStatsLogging()) { + statsLoggerForPCBC = NullStatsLogger.INSTANCE; + } + ClientConfiguration clientConfiguration = conf; + if (forceUseV3) { + clientConfiguration = v3Conf; + } + return new PerChannelBookieClient(clientConfiguration, executor, eventLoopGroup, allocator, address, + statsLoggerForPCBC, authProviderFactory, registry, pcbcPool, + shFactory, bookieAddressResolver); } - public PerChannelBookieClientPool lookupClient(BookieSocketAddress addr) { + public PerChannelBookieClientPool lookupClient(BookieId addr) { PerChannelBookieClientPool clientPool = channels.get(addr); if (null == clientPool) { closeLock.readLock().lock(); @@ -189,7 +210,7 @@ public PerChannelBookieClientPool lookupClient(BookieSocketAddress addr) { if (null == oldClientPool) { clientPool = newClientPool; // initialize the pool only after we put the pool into the map - clientPool.intialize(); + clientPool.initialize(); } else { clientPool = oldClientPool; newClientPool.close(false); @@ -205,7 +226,7 @@ public PerChannelBookieClientPool lookupClient(BookieSocketAddress addr) { } @Override - public void forceLedger(final BookieSocketAddress addr, final long ledgerId, + public void forceLedger(final BookieId addr, final long ledgerId, final ForceLedgerCallback cb, final Object ctx) { final PerChannelBookieClientPool client = lookupClient(addr); if (client == null) { @@ -217,9 +238,8 @@ public void forceLedger(final BookieSocketAddress addr, final long ledgerId, client.obtain((rc, pcbc) -> { if (rc != BKException.Code.OK) { try { - executor.executeOrdered(ledgerId, safeRun(() -> { - cb.forceLedgerComplete(rc, ledgerId, addr, ctx); - })); + executor.executeOrdered(ledgerId, + () -> cb.forceLedgerComplete(rc, ledgerId, addr, ctx)); } catch (RejectedExecutionException re) { cb.forceLedgerComplete(getRc(BKException.Code.InterruptedException), ledgerId, addr, ctx); } @@ -230,7 +250,7 @@ public void forceLedger(final BookieSocketAddress addr, final long ledgerId, } @Override - public void writeLac(final BookieSocketAddress addr, final long ledgerId, final byte[] masterKey, + public void writeLac(final BookieId addr, final long ledgerId, final byte[] masterKey, final long lac, final ByteBufList toSend, final WriteLacCallback cb, final Object ctx) { final PerChannelBookieClientPool client = lookupClient(addr); if (client == null) { @@ -241,50 +261,38 @@ public void writeLac(final BookieSocketAddress addr, final long ledgerId, final toSend.retain(); client.obtain((rc, pcbc) -> { - if (rc != BKException.Code.OK) { - try { - executor.executeOrdered(ledgerId, safeRun(() -> { - cb.writeLacComplete(rc, ledgerId, addr, ctx); - })); - } catch (RejectedExecutionException re) { - cb.writeLacComplete(getRc(BKException.Code.InterruptedException), ledgerId, addr, ctx); + try { + if (rc != BKException.Code.OK) { + try { + executor.executeOrdered(ledgerId, + () -> cb.writeLacComplete(rc, ledgerId, addr, ctx)); + } catch (RejectedExecutionException re) { + cb.writeLacComplete(getRc(BKException.Code.InterruptedException), ledgerId, addr, ctx); + } + } else { + pcbc.writeLac(ledgerId, masterKey, lac, toSend, cb, ctx); } - } else { - pcbc.writeLac(ledgerId, masterKey, lac, toSend, cb, ctx); + } finally { + ReferenceCountUtil.release(toSend); } - - toSend.release(); - }, ledgerId); + }, ledgerId, useV3Enforced); } private void completeAdd(final int rc, final long ledgerId, final long entryId, - final BookieSocketAddress addr, + final BookieId addr, final WriteCallback cb, final Object ctx) { - try { - executor.executeOrdered(ledgerId, new SafeRunnable() { - @Override - public void safeRun() { - cb.writeComplete(rc, ledgerId, entryId, addr, ctx); - } - @Override - public String toString() { - return String.format("CompleteWrite(ledgerId=%d, entryId=%d, addr=%s)", ledgerId, entryId, addr); - } - }); - } catch (RejectedExecutionException ree) { - cb.writeComplete(getRc(BKException.Code.InterruptedException), ledgerId, entryId, addr, ctx); - } + cb.writeComplete(rc, ledgerId, entryId, addr, ctx); } @Override - public void addEntry(final BookieSocketAddress addr, + public void addEntry(final BookieId addr, final long ledgerId, final byte[] masterKey, final long entryId, - final ByteBufList toSend, + final ReferenceCounted toSend, final WriteCallback cb, final Object ctx, final int options, @@ -307,6 +315,33 @@ public void addEntry(final BookieSocketAddress addr, ledgerId); } + @Override + public CompletableFuture getListOfEntriesOfLedger(BookieId address, + long ledgerId) { + FutureGetListOfEntriesOfLedger futureResult = new FutureGetListOfEntriesOfLedger(ledgerId); + final PerChannelBookieClientPool client = lookupClient(address); + if (client == null) { + futureResult.getListOfEntriesOfLedgerComplete(getRc(BKException.Code.BookieHandleNotAvailableException), + ledgerId, null); + return futureResult; + } + client.obtain((rc, pcbc) -> { + if (rc != BKException.Code.OK) { + try { + executor.executeOrdered(ledgerId, () -> + futureResult.getListOfEntriesOfLedgerComplete(rc, ledgerId, null) + ); + } catch (RejectedExecutionException re) { + futureResult.getListOfEntriesOfLedgerComplete(getRc(BKException.Code.InterruptedException), + ledgerId, null); + } + } else { + pcbc.getListOfEntriesOfLedger(ledgerId, futureResult); + } + }, ledgerId); + return futureResult; + } + private void completeRead(final int rc, final long ledgerId, final long entryId, @@ -314,27 +349,36 @@ private void completeRead(final int rc, final ReadEntryCallback cb, final Object ctx) { try { - executor.executeOrdered(ledgerId, new SafeRunnable() { - @Override - public void safeRun() { - cb.readEntryComplete(rc, ledgerId, entryId, entry, ctx); - } - }); + executor.executeOrdered(ledgerId, () -> cb.readEntryComplete(rc, ledgerId, entryId, entry, ctx)); } catch (RejectedExecutionException ree) { cb.readEntryComplete(getRc(BKException.Code.InterruptedException), ledgerId, entryId, entry, ctx); } } + private void completeBatchRead(final int rc, + final long ledgerId, + final long startEntryId, + final ByteBufList bufList, + final BatchedReadEntryCallback cb, + final Object ctx) { + try { + executor.executeOrdered(ledgerId, () -> cb.readEntriesComplete(rc, ledgerId, startEntryId, bufList, ctx)); + } catch (RejectedExecutionException ree) { + cb.readEntriesComplete(getRc(BKException.Code.InterruptedException), + ledgerId, startEntryId, bufList, ctx); + } + } + private static class ChannelReadyForAddEntryCallback implements GenericCallback { private final Handle recyclerHandle; private BookieClientImpl bookieClient; - private ByteBufList toSend; + private ReferenceCounted toSend; private long ledgerId; private long entryId; - private BookieSocketAddress addr; + private BookieId addr; private Object ctx; private WriteCallback cb; private int options; @@ -343,8 +387,8 @@ private static class ChannelReadyForAddEntryCallback private EnumSet writeFlags; static ChannelReadyForAddEntryCallback create( - BookieClientImpl bookieClient, ByteBufList toSend, long ledgerId, - long entryId, BookieSocketAddress addr, Object ctx, + BookieClientImpl bookieClient, ReferenceCounted toSend, long ledgerId, + long entryId, BookieId addr, Object ctx, WriteCallback cb, int options, byte[] masterKey, boolean allowFastFail, EnumSet writeFlags) { ChannelReadyForAddEntryCallback callback = RECYCLER.get(); @@ -365,14 +409,16 @@ static ChannelReadyForAddEntryCallback create( @Override public void operationComplete(final int rc, PerChannelBookieClient pcbc) { - if (rc != BKException.Code.OK) { - bookieClient.completeAdd(rc, ledgerId, entryId, addr, cb, ctx); - } else { - pcbc.addEntry(ledgerId, masterKey, entryId, - toSend, cb, ctx, options, allowFastFail, writeFlags); + try { + if (rc != BKException.Code.OK) { + bookieClient.completeAdd(rc, ledgerId, entryId, addr, cb, ctx); + } else { + pcbc.addEntry(ledgerId, masterKey, entryId, + toSend, cb, ctx, options, allowFastFail, writeFlags); + } + } finally { + ReferenceCountUtil.release(toSend); } - - toSend.release(); recycle(); } @@ -383,6 +429,7 @@ private ChannelReadyForAddEntryCallback( private static final Recycler RECYCLER = new Recycler() { + @Override protected ChannelReadyForAddEntryCallback newObject( Recycler.Handle recyclerHandle) { return new ChannelReadyForAddEntryCallback(recyclerHandle); @@ -405,7 +452,8 @@ public void recycle() { } } - public void readLac(final BookieSocketAddress addr, final long ledgerId, final ReadLacCallback cb, + @Override + public void readLac(final BookieId addr, final long ledgerId, final ReadLacCallback cb, final Object ctx) { final PerChannelBookieClientPool client = lookupClient(addr); if (client == null) { @@ -416,9 +464,8 @@ public void readLac(final BookieSocketAddress addr, final long ledgerId, final R client.obtain((rc, pcbc) -> { if (rc != BKException.Code.OK) { try { - executor.executeOrdered(ledgerId, safeRun(() -> { - cb.readLacComplete(rc, ledgerId, null, null, ctx); - })); + executor.executeOrdered(ledgerId, + () -> cb.readLacComplete(rc, ledgerId, null, null, ctx)); } catch (RejectedExecutionException re) { cb.readLacComplete(getRc(BKException.Code.InterruptedException), ledgerId, null, null, ctx); @@ -426,20 +473,23 @@ public void readLac(final BookieSocketAddress addr, final long ledgerId, final R } else { pcbc.readLac(ledgerId, cb, ctx); } - }, ledgerId); + }, ledgerId, useV3Enforced); } - public void readEntry(BookieSocketAddress addr, long ledgerId, long entryId, + @Override + public void readEntry(BookieId addr, long ledgerId, long entryId, ReadEntryCallback cb, Object ctx, int flags) { readEntry(addr, ledgerId, entryId, cb, ctx, flags, null); } - public void readEntry(final BookieSocketAddress addr, final long ledgerId, final long entryId, + @Override + public void readEntry(final BookieId addr, final long ledgerId, final long entryId, final ReadEntryCallback cb, final Object ctx, int flags, byte[] masterKey) { readEntry(addr, ledgerId, entryId, cb, ctx, flags, masterKey, false); } - public void readEntry(final BookieSocketAddress addr, final long ledgerId, final long entryId, + @Override + public void readEntry(final BookieId addr, final long ledgerId, final long entryId, final ReadEntryCallback cb, final Object ctx, int flags, byte[] masterKey, final boolean allowFastFail) { final PerChannelBookieClientPool client = lookupClient(addr); @@ -458,8 +508,29 @@ public void readEntry(final BookieSocketAddress addr, final long ledgerId, final }, ledgerId); } + @Override + public void batchReadEntries(final BookieId address, final long ledgerId, final long startEntryId, + final int maxCount, final long maxSize, final BatchedReadEntryCallback cb, final Object ctx, + final int flags, final byte[] masterKey, final boolean allowFastFail) { + final PerChannelBookieClientPool client = lookupClient(address); + if (client == null) { + cb.readEntriesComplete(getRc(BKException.Code.BookieHandleNotAvailableException), + ledgerId, startEntryId, null, ctx); + return; + } - public void readEntryWaitForLACUpdate(final BookieSocketAddress addr, + client.obtain((rc, pcbc) -> { + if (rc != BKException.Code.OK) { + completeBatchRead(rc, ledgerId, startEntryId, null, cb, ctx); + } else { + pcbc.batchReadEntries(ledgerId, startEntryId, maxCount, maxSize, cb, ctx, flags, masterKey, + allowFastFail); + } + }, ledgerId); + } + + @Override + public void readEntryWaitForLACUpdate(final BookieId addr, final long ledgerId, final long entryId, final long previousLAC, @@ -484,7 +555,8 @@ public void readEntryWaitForLACUpdate(final BookieSocketAddress addr, }, ledgerId); } - public void getBookieInfo(final BookieSocketAddress addr, final long requested, final GetBookieInfoCallback cb, + @Override + public void getBookieInfo(final BookieId addr, final long requested, final GetBookieInfoCallback cb, final Object ctx) { final PerChannelBookieClientPool client = lookupClient(addr); if (client == null) { @@ -495,9 +567,7 @@ public void getBookieInfo(final BookieSocketAddress addr, final long requested, client.obtain((rc, pcbc) -> { if (rc != BKException.Code.OK) { try { - executor.submit(safeRun(() -> { - cb.getBookieInfoComplete(rc, new BookieInfo(), ctx); - })); + executor.execute(() -> cb.getBookieInfoComplete(rc, new BookieInfo(), ctx)); } catch (RejectedExecutionException re) { cb.getBookieInfoComplete(getRc(BKException.Code.InterruptedException), new BookieInfo(), ctx); @@ -505,7 +575,7 @@ public void getBookieInfo(final BookieSocketAddress addr, final long requested, } else { pcbc.getBookieInfo(requested, cb, ctx); } - }, requested); + }, requested, useV3Enforced); } private void monitorPendingOperations() { @@ -514,10 +584,12 @@ private void monitorPendingOperations() { } } + @Override public boolean isClosed() { return closed; } + @Override public void close() { closeLock.writeLock().lock(); try { @@ -574,7 +646,8 @@ public static void main(String[] args) throws NumberFormatException, IOException } WriteCallback cb = new WriteCallback() { - public void writeComplete(int rc, long ledger, long entry, BookieSocketAddress addr, Object ctx) { + @Override + public void writeComplete(int rc, long ledger, long entry, BookieId addr, Object ctx) { Counter counter = (Counter) ctx; counter.dec(); if (rc != 0) { @@ -583,7 +656,7 @@ public void writeComplete(int rc, long ledger, long entry, BookieSocketAddress a } }; Counter counter = new Counter(); - byte hello[] = "hello".getBytes(UTF_8); + byte[] hello = "hello".getBytes(UTF_8); long ledger = Long.parseLong(args[2]); EventLoopGroup eventLoopGroup = new NioEventLoopGroup(1); OrderedExecutor executor = OrderedExecutor.newBuilder() @@ -592,9 +665,9 @@ public void writeComplete(int rc, long ledger, long entry, BookieSocketAddress a .build(); ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor( new DefaultThreadFactory("BookKeeperClientScheduler")); - BookieClientImpl bc = new BookieClientImpl(new ClientConfiguration(), eventLoopGroup, executor, - scheduler, NullStatsLogger.INSTANCE); - BookieSocketAddress addr = new BookieSocketAddress(args[0], Integer.parseInt(args[1])); + BookieClientImpl bc = new BookieClientImpl(new ClientConfiguration(), eventLoopGroup, + null, executor, scheduler, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + BookieId addr = new BookieSocketAddress(args[0], Integer.parseInt(args[1])).toBookieId(); for (int i = 0; i < 100000; i++) { counter.inc(); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieConnectionPeer.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieConnectionPeer.java index b28c847b0f0..58b4ecf3222 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieConnectionPeer.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieConnectionPeer.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieNettyServer.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieNettyServer.java index 1cbb345a259..98fe6c613ff 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieNettyServer.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieNettyServer.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -22,8 +22,8 @@ import com.google.common.annotations.VisibleForTesting; import com.google.protobuf.ExtensionRegistry; - import io.netty.bootstrap.ServerBootstrap; +import io.netty.buffer.ByteBufAllocator; import io.netty.buffer.PooledByteBufAllocator; import io.netty.channel.AdaptiveRecvByteBufAllocator; import io.netty.channel.Channel; @@ -33,7 +33,9 @@ import io.netty.channel.ChannelInitializer; import io.netty.channel.ChannelOption; import io.netty.channel.ChannelPipeline; +import io.netty.channel.DefaultEventLoop; import io.netty.channel.DefaultEventLoopGroup; +import io.netty.channel.EventLoop; import io.netty.channel.EventLoopGroup; import io.netty.channel.WriteBufferWaterMark; import io.netty.channel.epoll.EpollEventLoopGroup; @@ -41,16 +43,17 @@ import io.netty.channel.group.ChannelGroup; import io.netty.channel.group.ChannelGroupFuture; import io.netty.channel.group.DefaultChannelGroup; +import io.netty.channel.local.LocalAddress; import io.netty.channel.local.LocalChannel; import io.netty.channel.local.LocalServerChannel; -import io.netty.channel.nio.NioEventLoopGroup; import io.netty.channel.socket.SocketChannel; import io.netty.channel.socket.nio.NioServerSocketChannel; import io.netty.handler.codec.LengthFieldBasedFrameDecoder; -import io.netty.handler.codec.LengthFieldPrepender; +import io.netty.handler.flush.FlushConsolidationHandler; import io.netty.handler.ssl.SslHandler; +import io.netty.incubator.channel.uring.IOUringEventLoopGroup; +import io.netty.incubator.channel.uring.IOUringServerSocketChannel; import io.netty.util.concurrent.DefaultThreadFactory; - import java.io.IOException; import java.net.InetSocketAddress; import java.net.SocketAddress; @@ -60,22 +63,25 @@ import java.util.Collection; import java.util.Collections; import java.util.List; -import java.util.concurrent.ThreadFactory; +import java.util.Queue; +import java.util.concurrent.Executor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; - import javax.net.ssl.SSLPeerUnverifiedException; - import org.apache.bookkeeper.auth.AuthProviderFactoryFactory; import org.apache.bookkeeper.auth.BookKeeperPrincipal; import org.apache.bookkeeper.auth.BookieAuthProvider; -import org.apache.bookkeeper.bookie.Bookie; import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.common.collections.BlockingMpscQueue; +import org.apache.bookkeeper.common.util.affinity.CpuAffinity; import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.processor.RequestProcessor; +import org.apache.bookkeeper.stats.ThreadRegistry; import org.apache.bookkeeper.util.ByteBufList; -import org.apache.commons.lang.SystemUtils; +import org.apache.bookkeeper.util.EventLoopUtil; import org.apache.zookeeper.KeeperException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -86,10 +92,12 @@ class BookieNettyServer { private static final Logger LOG = LoggerFactory.getLogger(BookieNettyServer.class); + public static final String CONSOLIDATION_HANDLER_NAME = "consolidation"; final int maxFrameSize; final ServerConfiguration conf; final EventLoopGroup eventLoopGroup; + final EventLoopGroup acceptorGroup; final EventLoopGroup jvmEventLoopGroup; RequestProcessor requestProcessor; final AtomicBoolean isRunning = new AtomicBoolean(false); @@ -98,48 +106,75 @@ class BookieNettyServer { volatile boolean suspended = false; ChannelGroup allChannels; final BookieSocketAddress bookieAddress; + final BookieId bookieId; final InetSocketAddress bindAddress; final BookieAuthProvider.Factory authProviderFactory; final ExtensionRegistry registry = ExtensionRegistry.newInstance(); - BookieNettyServer(ServerConfiguration conf, RequestProcessor processor) + private final ByteBufAllocator allocator; + + BookieNettyServer(ServerConfiguration conf, RequestProcessor processor, ByteBufAllocator allocator) throws IOException, KeeperException, InterruptedException, BookieException { + this.allocator = allocator; this.maxFrameSize = conf.getNettyMaxFrameSizeBytes(); this.conf = conf; this.requestProcessor = processor; this.authProviderFactory = AuthProviderFactoryFactory.newBookieAuthProviderFactory(conf); if (!conf.isDisableServerSocketBind()) { - ThreadFactory threadFactory = new DefaultThreadFactory("bookie-io"); - final int numThreads = conf.getServerNumIOThreads(); - - EventLoopGroup eventLoopGroup; - if (SystemUtils.IS_OS_LINUX) { - try { - eventLoopGroup = new EpollEventLoopGroup(numThreads, threadFactory); - } catch (ExceptionInInitializerError | NoClassDefFoundError | UnsatisfiedLinkError e) { - LOG.warn("Could not use Netty Epoll event loop for bookie server: {}", e.getMessage()); - eventLoopGroup = new NioEventLoopGroup(numThreads, threadFactory); - } - } else { - eventLoopGroup = new NioEventLoopGroup(numThreads, threadFactory); - } - - this.eventLoopGroup = eventLoopGroup; + this.eventLoopGroup = EventLoopUtil.getServerEventLoopGroup(conf, + new DefaultThreadFactory("bookie-io") { + @Override + protected Thread newThread(Runnable r, String name) { + return super.newThread(ThreadRegistry.registerThread(r, "bookie-id"), name); + } + }); + this.acceptorGroup = EventLoopUtil.getServerAcceptorGroup(conf, + new DefaultThreadFactory("bookie-acceptor")); allChannels = new CleanupChannelGroup(eventLoopGroup); } else { this.eventLoopGroup = null; + this.acceptorGroup = null; } if (conf.isEnableLocalTransport()) { - jvmEventLoopGroup = new DefaultEventLoopGroup(); + jvmEventLoopGroup = new DefaultEventLoopGroup(conf.getServerNumIOThreads()) { + @Override + protected EventLoop newChild(Executor executor, Object... args) throws Exception { + return new DefaultEventLoop(this, executor) { + @Override + protected Queue newTaskQueue(int maxPendingTasks) { + if (conf.isBusyWaitEnabled()) { + return new BlockingMpscQueue<>(Math.min(maxPendingTasks, 10_000)); + } else { + return super.newTaskQueue(maxPendingTasks); + } + } + }; + } + }; + + // Enable CPU affinity on IO threads + if (conf.isBusyWaitEnabled()) { + for (int i = 0; i < conf.getServerNumIOThreads(); i++) { + jvmEventLoopGroup.next().submit(() -> { + try { + CpuAffinity.acquireCore(); + } catch (Throwable t) { + LOG.warn("Failed to acquire CPU core for thread {} {}", + Thread.currentThread().getName(), t.getMessage(), t); + } + }); + } + } + allChannels = new CleanupChannelGroup(jvmEventLoopGroup); } else { jvmEventLoopGroup = null; } - - bookieAddress = Bookie.getBookieAddress(conf); + bookieId = BookieImpl.getBookieId(conf); + bookieAddress = BookieImpl.getBookieAddress(conf); if (conf.getListeningInterface() == null) { bindAddress = new InetSocketAddress(conf.getBookiePort()); } else { @@ -164,7 +199,7 @@ void suspendProcessing() { for (Channel channel : allChannels) { // To suspend processing in the bookie, submit a task // that keeps the event loop busy until resume is - // explicitely invoked + // explicitly invoked channel.eventLoop().submit(() -> { while (suspended && isRunning()) { try { @@ -278,17 +313,20 @@ public void channelActive(ChannelHandlerContext ctx) throws Exception { private void listenOn(InetSocketAddress address, BookieSocketAddress bookieAddress) throws InterruptedException { if (!conf.isDisableServerSocketBind()) { ServerBootstrap bootstrap = new ServerBootstrap(); - bootstrap.childOption(ChannelOption.ALLOCATOR, new PooledByteBufAllocator(true)); - bootstrap.group(eventLoopGroup, eventLoopGroup); + bootstrap.option(ChannelOption.ALLOCATOR, allocator); + bootstrap.childOption(ChannelOption.ALLOCATOR, allocator); + bootstrap.group(acceptorGroup, eventLoopGroup); bootstrap.childOption(ChannelOption.TCP_NODELAY, conf.getServerTcpNoDelay()); bootstrap.childOption(ChannelOption.SO_LINGER, conf.getServerSockLinger()); bootstrap.childOption(ChannelOption.RCVBUF_ALLOCATOR, new AdaptiveRecvByteBufAllocator(conf.getRecvByteBufAllocatorSizeMin(), conf.getRecvByteBufAllocatorSizeInitial(), conf.getRecvByteBufAllocatorSizeMax())); - bootstrap.option(ChannelOption.WRITE_BUFFER_WATER_MARK, new WriteBufferWaterMark( + bootstrap.childOption(ChannelOption.WRITE_BUFFER_WATER_MARK, new WriteBufferWaterMark( conf.getServerWriteBufferLowWaterMark(), conf.getServerWriteBufferHighWaterMark())); - if (eventLoopGroup instanceof EpollEventLoopGroup) { + if (eventLoopGroup instanceof IOUringEventLoopGroup){ + bootstrap.channel(IOUringServerSocketChannel.class); + } else if (eventLoopGroup instanceof EpollEventLoopGroup) { bootstrap.channel(EpollServerSocketChannel.class); } else { bootstrap.channel(NioServerSocketChannel.class); @@ -307,11 +345,11 @@ protected void initChannel(SocketChannel ch) throws Exception { new BookieSideConnectionPeerContextHandler(); ChannelPipeline pipeline = ch.pipeline(); - // For ByteBufList, skip the usual LengthFieldPrepender and have the encoder itself to add it - pipeline.addLast("bytebufList", ByteBufList.ENCODER_WITH_SIZE); + pipeline.addLast(CONSOLIDATION_HANDLER_NAME, new FlushConsolidationHandler(1024, true)); + + pipeline.addLast("bytebufList", ByteBufList.ENCODER); pipeline.addLast("lengthbaseddecoder", new LengthFieldBasedFrameDecoder(maxFrameSize, 0, 4, 0, 4)); - pipeline.addLast("lengthprepender", new LengthFieldPrepender(4)); pipeline.addLast("bookieProtoDecoder", new BookieProtoEncoding.RequestDecoder(registry)); pipeline.addLast("bookieProtoEncoder", new BookieProtoEncoding.ResponseEncoder(registry)); @@ -328,12 +366,17 @@ protected void initChannel(SocketChannel ch) throws Exception { }); // Bind and start to accept incoming connections + LOG.info("Binding bookie-rpc endpoint to {}", address); Channel listen = bootstrap.bind(address.getAddress(), address.getPort()).sync().channel(); + if (listen.localAddress() instanceof InetSocketAddress) { if (conf.getBookiePort() == 0) { + // this is really really nasty. It's using the configuration object as a notification + // bus. We should get rid of this at some point conf.setBookiePort(((InetSocketAddress) listen.localAddress()).getPort()); } } + } if (conf.isEnableLocalTransport()) { @@ -351,6 +394,8 @@ protected void initChannel(SocketChannel ch) throws Exception { if (jvmEventLoopGroup instanceof DefaultEventLoopGroup) { jvmBootstrap.channel(LocalServerChannel.class); + } else if (jvmEventLoopGroup instanceof IOUringEventLoopGroup) { + jvmBootstrap.channel(IOUringServerSocketChannel.class); } else if (jvmEventLoopGroup instanceof EpollEventLoopGroup) { jvmBootstrap.channel(EpollServerSocketChannel.class); } else { @@ -371,7 +416,6 @@ protected void initChannel(LocalChannel ch) throws Exception { ChannelPipeline pipeline = ch.pipeline(); pipeline.addLast("lengthbaseddecoder", new LengthFieldBasedFrameDecoder(maxFrameSize, 0, 4, 0, 4)); - pipeline.addLast("lengthprepender", new LengthFieldPrepender(4)); pipeline.addLast("bookieProtoDecoder", new BookieProtoEncoding.RequestDecoder(registry)); pipeline.addLast("bookieProtoEncoder", new BookieProtoEncoding.ResponseEncoder(registry)); @@ -386,10 +430,10 @@ protected void initChannel(LocalChannel ch) throws Exception { pipeline.addLast("contextHandler", contextHandler); } }); - + LOG.info("Binding jvm bookie-rpc endpoint to {}", bookieId.toString()); // use the same address 'name', so clients can find local Bookie still discovering them using ZK - jvmBootstrap.bind(bookieAddress.getLocalAddress()).sync(); - LocalBookiesRegistry.registerLocalBookieAddress(bookieAddress); + jvmBootstrap.bind(new LocalAddress(bookieId.toString())).sync(); + LocalBookiesRegistry.registerLocalBookieAddress(bookieId); } } @@ -408,6 +452,14 @@ void shutdown() { allChannels.close().awaitUninterruptibly(); + if (acceptorGroup != null) { + try { + acceptorGroup.shutdownGracefully(0, 10, TimeUnit.MILLISECONDS).await(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + if (eventLoopGroup != null) { try { eventLoopGroup.shutdownGracefully(0, 10, TimeUnit.MILLISECONDS).await(); @@ -417,7 +469,7 @@ void shutdown() { } } if (jvmEventLoopGroup != null) { - LocalBookiesRegistry.unregisterLocalBookieAddress(bookieAddress); + LocalBookiesRegistry.unregisterLocalBookieAddress(bookieAddress.toBookieId()); jvmEventLoopGroup.shutdownGracefully(); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieProtoEncoding.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieProtoEncoding.java index 151a799f584..0c3b7bf8e8e 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieProtoEncoding.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieProtoEncoding.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -24,23 +24,21 @@ import com.google.protobuf.ExtensionRegistry; import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.MessageLite; - import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBufAllocator; import io.netty.buffer.ByteBufInputStream; import io.netty.buffer.ByteBufOutputStream; -import io.netty.buffer.Unpooled; import io.netty.channel.ChannelHandler.Sharable; import io.netty.channel.ChannelHandlerContext; import io.netty.channel.ChannelInboundHandlerAdapter; import io.netty.channel.ChannelOutboundHandlerAdapter; import io.netty.channel.ChannelPromise; import io.netty.util.ReferenceCountUtil; - import java.io.IOException; import java.security.NoSuchAlgorithmException; - import org.apache.bookkeeper.proto.BookieProtocol.PacketHeader; +import org.apache.bookkeeper.proto.BookkeeperProtocol.OperationType; +import org.apache.bookkeeper.proto.BookkeeperProtocol.Response; import org.apache.bookkeeper.proto.checksum.MacDigestManager; import org.apache.bookkeeper.util.ByteBufList; import org.slf4j.Logger; @@ -52,6 +50,13 @@ public class BookieProtoEncoding { private static final Logger LOG = LoggerFactory.getLogger(BookieProtoEncoding.class); + /** + * Threshold under which an entry is considered to be "small". + * + * Small entries payloads are copied instead of being passed around as references. + */ + public static final int SMALL_ENTRY_SIZE_THRESHOLD = 16 * 1024; + /** * An encoder/decoder interface for the Bookkeeper protocol. */ @@ -105,17 +110,29 @@ public Object encode(Object msg, ByteBufAllocator allocator) return msg; } BookieProtocol.Request r = (BookieProtocol.Request) msg; - if (r instanceof BookieProtocol.AddRequest) { - BookieProtocol.AddRequest ar = (BookieProtocol.AddRequest) r; - int totalHeaderSize = 4 // for the header - + BookieProtocol.MASTER_KEY_LENGTH; // for the master key - ByteBuf buf = allocator.buffer(totalHeaderSize); + if (r instanceof BookieProtocol.BatchedReadRequest) { + int totalHeaderSize = 4 // for request type + + 8 // for ledger id + + 8 // for entry id + + 8 // for request id + + 4 // for max count + + 8; // for max size + if (r.hasMasterKey()) { + totalHeaderSize += BookieProtocol.MASTER_KEY_LENGTH; + } + ByteBuf buf = allocator.buffer(totalHeaderSize + 4 /* frame size */); + buf.writeInt(totalHeaderSize); buf.writeInt(PacketHeader.toInt(r.getProtocolVersion(), r.getOpCode(), r.getFlags())); - buf.writeBytes(r.getMasterKey(), 0, BookieProtocol.MASTER_KEY_LENGTH); - ByteBufList data = ar.getData(); - ar.recycle(); - data.prepend(buf); - return data; + buf.writeLong(r.getLedgerId()); + buf.writeLong(r.getEntryId()); + buf.writeLong(((BookieProtocol.BatchedReadRequest) r).getRequestId()); + buf.writeInt(((BookieProtocol.BatchedReadRequest) r).getMaxCount()); + buf.writeLong(((BookieProtocol.BatchedReadRequest) r).getMaxSize()); + if (r.hasMasterKey()) { + buf.writeBytes(r.getMasterKey(), 0, BookieProtocol.MASTER_KEY_LENGTH); + } + r.recycle(); + return buf; } else if (r instanceof BookieProtocol.ReadRequest) { int totalHeaderSize = 4 // for request type + 8 // for ledgerId @@ -124,20 +141,22 @@ public Object encode(Object msg, ByteBufAllocator allocator) totalHeaderSize += BookieProtocol.MASTER_KEY_LENGTH; } - ByteBuf buf = allocator.buffer(totalHeaderSize); + ByteBuf buf = allocator.buffer(totalHeaderSize + 4 /* frame size */); + buf.writeInt(totalHeaderSize); buf.writeInt(PacketHeader.toInt(r.getProtocolVersion(), r.getOpCode(), r.getFlags())); buf.writeLong(r.getLedgerId()); buf.writeLong(r.getEntryId()); if (r.hasMasterKey()) { buf.writeBytes(r.getMasterKey(), 0, BookieProtocol.MASTER_KEY_LENGTH); } - + r.recycle(); return buf; } else if (r instanceof BookieProtocol.AuthRequest) { BookkeeperProtocol.AuthMessage am = ((BookieProtocol.AuthRequest) r).getAuthMessage(); int totalHeaderSize = 4; // for request type int totalSize = totalHeaderSize + am.getSerializedSize(); - ByteBuf buf = allocator.buffer(totalSize); + ByteBuf buf = allocator.buffer(totalSize + 4 /* frame size */); + buf.writeInt(totalSize); buf.writeInt(PacketHeader.toInt(r.getProtocolVersion(), r.getOpCode(), r.getFlags())); ByteBufOutputStream bufStream = new ByteBufOutputStream(buf); am.writeTo(bufStream); @@ -171,7 +190,7 @@ public Object decode(ByteBuf packet) packet.markReaderIndex(); return BookieProtocol.ParsedAddRequest.create( version, ledgerId, entryId, flags, - masterKey, packet.retain()); + masterKey, packet); } case BookieProtocol.READENTRY: @@ -181,9 +200,24 @@ public Object decode(ByteBuf packet) if ((flags & BookieProtocol.FLAG_DO_FENCING) == BookieProtocol.FLAG_DO_FENCING && version >= 2) { byte[] masterKey = readMasterKey(packet); - return new BookieProtocol.ReadRequest(version, ledgerId, entryId, flags, masterKey); + return BookieProtocol.ReadRequest.create(version, ledgerId, entryId, flags, masterKey); } else { - return new BookieProtocol.ReadRequest(version, ledgerId, entryId, flags, null); + return BookieProtocol.ReadRequest.create(version, ledgerId, entryId, flags, null); + } + case BookieProtocol.BATCH_READ_ENTRY: + ledgerId = packet.readLong(); + entryId = packet.readLong(); + long requestId = packet.readLong(); + int maxCount = packet.readInt(); + long maxSize = packet.readLong(); + if ((flags & BookieProtocol.FLAG_DO_FENCING) == BookieProtocol.FLAG_DO_FENCING + && version >= 2) { + byte[] masterKey = readMasterKey(packet); + return BookieProtocol.BatchedReadRequest.create(version, ledgerId, entryId, flags, masterKey, + requestId, maxCount, maxSize); + } else { + return BookieProtocol.BatchedReadRequest.create(version, ledgerId, entryId, flags, null, + requestId, maxCount, maxSize); } case BookieProtocol.AUTH: BookkeeperProtocol.AuthMessage.Builder builder = BookkeeperProtocol.AuthMessage.newBuilder(); @@ -231,6 +265,8 @@ public ResponseEnDeCoderPreV3(ExtensionRegistry extensionRegistry) { this.extensionRegistry = extensionRegistry; } + private static final int RESPONSE_HEADERS_SIZE = 24; + @Override public Object encode(Object msg, ByteBufAllocator allocator) throws Exception { @@ -238,30 +274,82 @@ public Object encode(Object msg, ByteBufAllocator allocator) return msg; } BookieProtocol.Response r = (BookieProtocol.Response) msg; - ByteBuf buf = allocator.buffer(24); - buf.writeInt(PacketHeader.toInt(r.getProtocolVersion(), r.getOpCode(), (short) 0)); try { if (msg instanceof BookieProtocol.ReadResponse) { + BookieProtocol.ReadResponse rr = (BookieProtocol.ReadResponse) r; + int payloadSize = rr.getData().readableBytes(); + int responseSize = RESPONSE_HEADERS_SIZE + payloadSize; + boolean isSmallEntry = payloadSize < SMALL_ENTRY_SIZE_THRESHOLD; + + int bufferSize = 4 /* frame size */ + RESPONSE_HEADERS_SIZE + + (isSmallEntry ? payloadSize : 0); + ByteBuf buf = allocator.buffer(bufferSize); + buf.writeInt(responseSize); + buf.writeInt(PacketHeader.toInt(r.getProtocolVersion(), r.getOpCode(), (short) 0)); buf.writeInt(r.getErrorCode()); buf.writeLong(r.getLedgerId()); buf.writeLong(r.getEntryId()); - BookieProtocol.ReadResponse rr = (BookieProtocol.ReadResponse) r; - if (rr.hasData()) { - return ByteBufList.get(buf, rr.getData()); + if (isSmallEntry) { + buf.writeBytes(rr.getData()); + rr.release(); + return buf; } else { + return ByteBufList.get(buf, rr.getData()); + } + } else if (msg instanceof BookieProtocol.BatchedReadResponse) { + BookieProtocol.BatchedReadResponse brr = (BookieProtocol.BatchedReadResponse) r; + int payloadSize = brr.getData().readableBytes(); + int delimiterSize = brr.getData().size() * 4; // The size of each entry. + boolean isSmallEntry = (payloadSize + delimiterSize) < SMALL_ENTRY_SIZE_THRESHOLD; + + int responseSize = RESPONSE_HEADERS_SIZE + 8 /* request_id */ + payloadSize + delimiterSize; + int bufferSize = 4 /* frame size */ + responseSize; + ByteBuf buf = allocator.buffer(bufferSize); + buf.writeInt(responseSize); + buf.writeInt(PacketHeader.toInt(r.getProtocolVersion(), r.getOpCode(), (short) 0)); + buf.writeInt(r.getErrorCode()); + buf.writeLong(r.getLedgerId()); + buf.writeLong(r.getEntryId()); + buf.writeLong(((BookieProtocol.BatchedReadResponse) r).getRequestId()); + if (isSmallEntry) { + for (int i = 0; i < brr.getData().size(); i++) { + ByteBuf entryData = brr.getData().getBuffer(i); + buf.writeInt(entryData.readableBytes()); + buf.writeBytes(entryData); + } + brr.release(); return buf; + } else { + ByteBufList byteBufList = ByteBufList.get(buf); + for (int i = 0; i < brr.getData().size(); i++) { + ByteBuf entryData = brr.getData().getBuffer(i); + ByteBuf entryLengthBuf = allocator.buffer(4); + entryLengthBuf.writeInt(entryData.readableBytes()); + byteBufList.add(entryLengthBuf); + byteBufList.add(entryData); + } + return byteBufList; } } else if (msg instanceof BookieProtocol.AddResponse) { + ByteBuf buf = allocator.buffer(RESPONSE_HEADERS_SIZE + 4 /* frame size */); + buf.writeInt(RESPONSE_HEADERS_SIZE); + buf.writeInt(PacketHeader.toInt(r.getProtocolVersion(), r.getOpCode(), (short) 0)); buf.writeInt(r.getErrorCode()); buf.writeLong(r.getLedgerId()); buf.writeLong(r.getEntryId()); - return buf; } else if (msg instanceof BookieProtocol.AuthResponse) { BookkeeperProtocol.AuthMessage am = ((BookieProtocol.AuthResponse) r).getAuthMessage(); - return ByteBufList.get(buf, Unpooled.wrappedBuffer(am.toByteArray())); + int payloadSize = 4 + am.getSerializedSize(); + int bufferSize = payloadSize + 4 /* frame size */; + + ByteBuf buf = allocator.buffer(bufferSize); + buf.writeInt(payloadSize); + buf.writeInt(PacketHeader.toInt(r.getProtocolVersion(), r.getOpCode(), (short) 0)); + buf.writeBytes(am.toByteArray()); + return buf; } else { LOG.error("Cannot encode unknown response type {}", msg.getClass().getName()); return msg; @@ -293,6 +381,25 @@ public Object decode(ByteBuf buffer) return new BookieProtocol.ReadResponse( version, rc, ledgerId, entryId, buffer.retainedSlice()); + case BookieProtocol.BATCH_READ_ENTRY: + rc = buffer.readInt(); + ledgerId = buffer.readLong(); + entryId = buffer.readLong(); + long requestId = buffer.readLong(); + ByteBufList data = null; + while (buffer.readableBytes() > 0) { + int entrySize = buffer.readInt(); + int entryPos = buffer.readerIndex(); + if (data == null) { + data = ByteBufList.get(buffer.retainedSlice(entryPos, entrySize)); + buffer.readerIndex(entryPos + entrySize); + } else { + data.add(buffer.retainedSlice(entryPos, entrySize)); + buffer.readerIndex(entryPos + entrySize); + } + } + return new BookieProtocol.BatchedReadResponse(version, rc, ledgerId, entryId, requestId, data == null + ? ByteBufList.get() : data.retain()); case BookieProtocol.AUTH: ByteBufInputStream bufStream = new ByteBufInputStream(buffer); BookkeeperProtocol.AuthMessage.Builder builder = BookkeeperProtocol.AuthMessage.newBuilder(); @@ -303,6 +410,14 @@ public Object decode(ByteBuf buffer) throw new IllegalStateException("Received unknown response : op code = " + opCode); } } + + public static void serializeAddResponseInto(int rc, BookieProtocol.ParsedAddRequest req, ByteBuf buf) { + buf.writeInt(RESPONSE_HEADERS_SIZE); // Frame size + buf.writeInt(PacketHeader.toInt(req.getProtocolVersion(), req.getOpCode(), (short) 0)); + buf.writeInt(rc); // rc-code + buf.writeLong(req.getLedgerId()); + buf.writeLong(req.getEntryId()); + } } /** @@ -354,17 +469,27 @@ public Object encode(Object msg, ByteBufAllocator allocator) throws Exception { private static ByteBuf serializeProtobuf(MessageLite msg, ByteBufAllocator allocator) { int size = msg.getSerializedSize(); - ByteBuf buf = allocator.heapBuffer(size, size); + int frameSize = size + 4; + + // Protobuf serialization is the last step of the netty pipeline. We used to allocate + // a heap buffer while serializing and pass it down to netty library. + // In AbstractChannel#filterOutboundMessage(), netty copies that data to a direct buffer if + // it is currently in heap (otherwise skips it and uses it directly). + // Allocating a direct buffer reducing unnecessary CPU cycles for buffer copies in BK client + // and also helps alleviate pressure off the GC, since there is less memory churn. + // Bookies aren't usually CPU bound. This change improves READ_ENTRY code paths by a small factor as well. + ByteBuf buf = allocator.directBuffer(frameSize, frameSize); + buf.writeInt(size); try { - msg.writeTo(CodedOutputStream.newInstance(buf.array(), buf.arrayOffset() + buf.writerIndex(), size)); + msg.writeTo(CodedOutputStream.newInstance(buf.nioBuffer(buf.writerIndex(), size))); } catch (IOException e) { // This is in-memory serialization, should not fail throw new RuntimeException(e); } // Advance writer idx - buf.writerIndex(buf.capacity()); + buf.writerIndex(frameSize); return buf; } @@ -387,7 +512,9 @@ public void write(ChannelHandlerContext ctx, Object msg, ChannelPromise promise) if (LOG.isTraceEnabled()) { LOG.trace("Encode request {} to channel {}.", msg, ctx.channel()); } - if (msg instanceof BookkeeperProtocol.Request) { + if (msg instanceof ByteBuf || msg instanceof ByteBufList) { + ctx.write(msg, promise); + } else if (msg instanceof BookkeeperProtocol.Request) { ctx.write(reqV3.encode(msg, ctx.alloc()), promise); } else if (msg instanceof BookieProtocol.Request) { ctx.write(reqPreV3.encode(msg, ctx.alloc()), promise); @@ -463,7 +590,10 @@ public void write(ChannelHandlerContext ctx, Object msg, ChannelPromise promise) if (LOG.isTraceEnabled()) { LOG.trace("Encode response {} to channel {}.", msg, ctx.channel()); } - if (msg instanceof BookkeeperProtocol.Response) { + + if (msg instanceof ByteBuf) { + ctx.write(msg, promise); + } else if (msg instanceof BookkeeperProtocol.Response) { ctx.write(repV3.encode(msg, ctx.alloc()), promise); } else if (msg instanceof BookieProtocol.Response) { ctx.write(repPreV3.encode(msg, ctx.alloc()), promise); @@ -479,11 +609,20 @@ public void write(ChannelHandlerContext ctx, Object msg, ChannelPromise promise) */ @Sharable public static class ResponseDecoder extends ChannelInboundHandlerAdapter { - final EnDecoder rep; + final EnDecoder repPreV3; + final EnDecoder repV3; + final boolean useV2Protocol; + final boolean tlsEnabled; + boolean usingV3Protocol; - ResponseDecoder(ExtensionRegistry extensionRegistry, boolean useV2Protocol) { - rep = useV2Protocol - ? new ResponseEnDeCoderPreV3(extensionRegistry) : new ResponseEnDecoderV3(extensionRegistry); + ResponseDecoder(ExtensionRegistry extensionRegistry, + boolean useV2Protocol, + boolean tlsEnabled) { + this.repPreV3 = new ResponseEnDeCoderPreV3(extensionRegistry); + this.repV3 = new ResponseEnDecoderV3(extensionRegistry); + this.useV2Protocol = useV2Protocol; + this.tlsEnabled = tlsEnabled; + usingV3Protocol = true; } @Override @@ -499,7 +638,32 @@ public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception } ByteBuf buffer = (ByteBuf) msg; buffer.markReaderIndex(); - ctx.fireChannelRead(rep.decode(buffer)); + + Object result; + if (!useV2Protocol) { // always use v3 protocol + result = repV3.decode(buffer); + } else { // use v2 protocol but + // if TLS enabled, the first message `startTLS` is a protobuf message + if (tlsEnabled && usingV3Protocol) { + try { + result = repV3.decode(buffer); + if (result instanceof Response + && OperationType.START_TLS == ((Response) result).getHeader().getOperation()) { + usingV3Protocol = false; + if (LOG.isDebugEnabled()) { + LOG.debug("Degrade bookkeeper to v2 after starting TLS."); + } + } + } catch (InvalidProtocolBufferException e) { + usingV3Protocol = false; + buffer.resetReaderIndex(); + result = repPreV3.decode(buffer); + } + } else { + result = repPreV3.decode(buffer); + } + } + ctx.fireChannelRead(result); } finally { ReferenceCountUtil.release(msg); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieProtocol.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieProtocol.java index 9982cca712a..6a93f8d2cc6 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieProtocol.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieProtocol.java @@ -25,7 +25,7 @@ import io.netty.util.Recycler; import io.netty.util.Recycler.Handle; import io.netty.util.ReferenceCountUtil; - +import io.netty.util.ReferenceCounted; import org.apache.bookkeeper.proto.BookkeeperProtocol.AuthMessage; import org.apache.bookkeeper.util.ByteBufList; @@ -133,6 +133,7 @@ public static short getFlags(int packetHeader) { byte READ_LAC = 4; byte WRITE_LAC = 5; byte GET_BOOKIE_INFO = 6; + byte BATCH_READ_ENTRY = 7; /** * The error code that indicates success. @@ -180,6 +181,11 @@ public static short getFlags(int packetHeader) { */ int ETOOMANYREQUESTS = 106; + /** + * Ledger in unknown state. + */ + int EUNKNOWNLEDGERSTATE = 107; + short FLAG_NONE = 0x0; short FLAG_DO_FENCING = 0x0001; short FLAG_RECOVERY_ADD = 0x0002; @@ -248,15 +254,14 @@ public void recycle() {} } /** - * A Request that adds data. + * This is similar to add request, but it used when processing the request on the bookie side. */ - class AddRequest extends Request { - ByteBufList data; + class ParsedAddRequest extends Request { + ByteBuf data; - static AddRequest create(byte protocolVersion, long ledgerId, - long entryId, short flags, byte[] masterKey, - ByteBufList data) { - AddRequest add = RECYCLER.get(); + static ParsedAddRequest create(byte protocolVersion, long ledgerId, long entryId, short flags, byte[] masterKey, + ByteBuf data) { + ParsedAddRequest add = RECYCLER.get(); add.protocolVersion = protocolVersion; add.opCode = ADDENTRY; add.ledgerId = ledgerId; @@ -267,23 +272,28 @@ static AddRequest create(byte protocolVersion, long ledgerId, return add; } - ByteBufList getData() { + ByteBuf getData() { // We need to have different ByteBufList instances for each bookie write - return ByteBufList.clone(data); + return data; } boolean isRecoveryAdd() { return (flags & FLAG_RECOVERY_ADD) == FLAG_RECOVERY_ADD; } - private final Handle recyclerHandle; - private AddRequest(Handle recyclerHandle) { + void release() { + ReferenceCountUtil.release(data); + } + + private final Handle recyclerHandle; + private ParsedAddRequest(Handle recyclerHandle) { this.recyclerHandle = recyclerHandle; } - private static final Recycler RECYCLER = new Recycler() { - protected AddRequest newObject(Handle handle) { - return new AddRequest(handle); + private static final Recycler RECYCLER = new Recycler() { + @Override + protected ParsedAddRequest newObject(Handle handle) { + return new ParsedAddRequest(handle); } }; @@ -292,52 +302,46 @@ public void recycle() { ledgerId = -1; entryId = -1; masterKey = null; - ReferenceCountUtil.safeRelease(data); data = null; recyclerHandle.recycle(this); } } /** - * This is similar to add request, but it used when processing the request on the bookie side. + * A Request that reads data. */ - class ParsedAddRequest extends Request { - ByteBuf data; + class ReadRequest extends Request { - static ParsedAddRequest create(byte protocolVersion, long ledgerId, long entryId, short flags, byte[] masterKey, - ByteBuf data) { - ParsedAddRequest add = RECYCLER.get(); - add.protocolVersion = protocolVersion; - add.opCode = ADDENTRY; - add.ledgerId = ledgerId; - add.entryId = entryId; - add.flags = flags; - add.masterKey = masterKey; - add.data = data.retain(); - return add; + static ReadRequest create(byte protocolVersion, long ledgerId, long entryId, + short flags, byte[] masterKey) { + ReadRequest read = RECYCLER.get(); + read.protocolVersion = protocolVersion; + read.opCode = READENTRY; + read.ledgerId = ledgerId; + read.entryId = entryId; + read.flags = flags; + read.masterKey = masterKey; + return read; } - ByteBuf getData() { - // We need to have different ByteBufList instances for each bookie write - return data; + boolean isFencing() { + return (flags & FLAG_DO_FENCING) == FLAG_DO_FENCING; } - boolean isRecoveryAdd() { - return (flags & FLAG_RECOVERY_ADD) == FLAG_RECOVERY_ADD; - } + private final Handle recyclerHandle; - void release() { - data.release(); + protected ReadRequest() { + recyclerHandle = null; } - private final Handle recyclerHandle; - private ParsedAddRequest(Handle recyclerHandle) { + private ReadRequest(Handle recyclerHandle) { this.recyclerHandle = recyclerHandle; } - private static final Recycler RECYCLER = new Recycler() { - protected ParsedAddRequest newObject(Handle handle) { - return new ParsedAddRequest(handle); + private static final Recycler RECYCLER = new Recycler() { + @Override + protected ReadRequest newObject(Handle handle) { + return new ReadRequest(handle); } }; @@ -346,22 +350,74 @@ public void recycle() { ledgerId = -1; entryId = -1; masterKey = null; - data = null; - recyclerHandle.recycle(this); + if (recyclerHandle != null) { + recyclerHandle.recycle(this); + } } } /** - * A Request that reads data. + * The request for reading data with batch optimization. + * The ledger_id and entry_id will be used as start_ledger_id and start_entry_id. + * And the batch read operation can only happen on one ledger. */ - class ReadRequest extends Request { - ReadRequest(byte protocolVersion, long ledgerId, long entryId, - short flags, byte[] masterKey) { - init(protocolVersion, READENTRY, ledgerId, entryId, flags, masterKey); + class BatchedReadRequest extends ReadRequest { + + long requestId; + int maxCount; + long maxSize; + + static BatchedReadRequest create(byte protocolVersion, long ledgerId, long entryId, + short flags, byte[] masterKey, long requestId, int maxCount, long maxSize) { + BatchedReadRequest request = RECYCLER.get(); + request.protocolVersion = protocolVersion; + request.ledgerId = ledgerId; + request.entryId = entryId; + request.flags = flags; + request.masterKey = masterKey; + request.requestId = requestId; + request.maxCount = maxCount; + request.maxSize = maxSize; + request.opCode = BATCH_READ_ENTRY; + return request; } - boolean isFencing() { - return (flags & FLAG_DO_FENCING) == FLAG_DO_FENCING; + int getMaxCount() { + return maxCount; + } + + long getMaxSize() { + return maxSize; + } + + long getRequestId() { + return requestId; + } + + private final Handle recyclerHandle; + + private BatchedReadRequest(Handle recyclerHandle) { + this.recyclerHandle = recyclerHandle; + } + + private static final Recycler RECYCLER = new Recycler() { + @Override + protected BatchedReadRequest newObject(Handle handle) { + return new BatchedReadRequest(handle); + } + }; + + @Override + public void recycle() { + ledgerId = -1; + entryId = -1; + masterKey = null; + maxCount = -1; + maxSize = -1; + requestId = -1; + if (recyclerHandle != null) { + recyclerHandle.recycle(this); + } } } @@ -426,10 +482,8 @@ public String toString() { opCode, ledgerId, entryId, errorCode); } - void retain() { - } - - void release() { + boolean release() { + return true; } void recycle() { @@ -439,7 +493,7 @@ void recycle() { /** * A request that reads data. */ - class ReadResponse extends Response { + class ReadResponse extends Response implements ReferenceCounted { final ByteBuf data; ReadResponse(byte protocolVersion, int errorCode, long ledgerId, long entryId) { @@ -460,13 +514,109 @@ ByteBuf getData() { } @Override - public void retain() { + public int refCnt() { + return data.refCnt(); + } + + @Override + public ReferenceCounted retain() { data.retain(); + return this; + } + + @Override + public ReferenceCounted retain(int increment) { + return data.retain(increment); + } + + @Override + public ReferenceCounted touch() { + data.touch(); + return this; + } + + @Override + public ReferenceCounted touch(Object hint) { + data.touch(hint); + return this; + } + + @Override + public boolean release() { + return data.release(); + } + + @Override + public boolean release(int decrement) { + return data.release(decrement); + } + } + + /** + * The response for batched read. + * The ledger_id and entry_id will be used as start_ledger_id and start_entry_id. + * And all the returned data is from one ledger. + */ + class BatchedReadResponse extends Response implements ReferenceCounted { + + final long requestId; + final ByteBufList data; + + BatchedReadResponse(byte protocolVersion, int errorCode, long ledgerId, long entryId, long requestId) { + this(protocolVersion, errorCode, ledgerId, entryId, requestId, ByteBufList.get()); + } + + BatchedReadResponse(byte protocolVersion, int errorCode, long ledgerId, long entryId, long requestId, + ByteBufList data) { + init(protocolVersion, BATCH_READ_ENTRY, errorCode, ledgerId, entryId); + this.requestId = requestId; + this.data = data; + } + + ByteBufList getData() { + return data; + } + + long getRequestId() { + return requestId; + } + + @Override + public int refCnt() { + return data.refCnt(); + } + + @Override + public ReferenceCounted retain() { + data.retain(); + return this; + } + + @Override + public ReferenceCounted retain(int increment) { + return data.retain(increment); + } + + @Override + public ReferenceCounted touch() { + data.touch(); + return this; + } + + @Override + public ReferenceCounted touch(Object hint) { + data.touch(hint); + return this; + } + + @Override + public boolean release() { + return data.release(); } @Override - public void release() { - data.release(); + public boolean release(int decrement) { + return data.release(decrement); } } @@ -486,6 +636,7 @@ private AddResponse(Handle recyclerHandle) { } private static final Recycler RECYCLER = new Recycler() { + @Override protected AddResponse newObject(Handle handle) { return new AddResponse(handle); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieRequestHandler.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieRequestHandler.java index d2e954885af..3d906dba449 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieRequestHandler.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieRequestHandler.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,34 +20,44 @@ */ package org.apache.bookkeeper.proto; +import io.netty.buffer.ByteBuf; import io.netty.channel.ChannelHandlerContext; import io.netty.channel.ChannelInboundHandlerAdapter; import io.netty.channel.group.ChannelGroup; - import java.nio.channels.ClosedChannelException; - +import lombok.extern.slf4j.Slf4j; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.processor.RequestProcessor; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** * Serverside handler for bookkeeper requests. */ -class BookieRequestHandler extends ChannelInboundHandlerAdapter { +@Slf4j +public class BookieRequestHandler extends ChannelInboundHandlerAdapter { + + private static final int DEFAULT_PENDING_RESPONSE_SIZE = 256; - private static final Logger LOG = LoggerFactory.getLogger(BookieRequestHandler.class); private final RequestProcessor requestProcessor; private final ChannelGroup allChannels; + private ChannelHandlerContext ctx; + + private ByteBuf pendingSendResponses = null; + private int maxPendingResponsesSize = DEFAULT_PENDING_RESPONSE_SIZE; + BookieRequestHandler(ServerConfiguration conf, RequestProcessor processor, ChannelGroup allChannels) { this.requestProcessor = processor; this.allChannels = allChannels; } + public ChannelHandlerContext ctx() { + return ctx; + } + @Override public void channelActive(ChannelHandlerContext ctx) throws Exception { - LOG.info("Channel connected {}", ctx.channel()); + log.info("Channel connected {}", ctx.channel()); + this.ctx = ctx; super.channelActive(ctx); } @@ -58,16 +68,16 @@ public void channelRegistered(ChannelHandlerContext ctx) throws Exception { @Override public void channelInactive(ChannelHandlerContext ctx) throws Exception { - LOG.info("Channels disconnected: {}", ctx.channel()); + log.info("Channels disconnected: {}", ctx.channel()); } @Override public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception { if (cause instanceof ClosedChannelException) { - LOG.info("Client died before request could be completed", cause); + log.info("Client died before request could be completed on {}", ctx.channel(), cause); return; } - LOG.error("Unhandled exception occurred in I/O thread or handler", cause); + log.error("Unhandled exception occurred in I/O thread or handler on {}", ctx.channel(), cause); ctx.close(); } @@ -77,6 +87,27 @@ public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception ctx.fireChannelRead(msg); return; } - requestProcessor.processRequest(msg, ctx.channel()); + requestProcessor.processRequest(msg, this); + } + + public synchronized void prepareSendResponseV2(int rc, BookieProtocol.ParsedAddRequest req) { + if (pendingSendResponses == null) { + pendingSendResponses = ctx().alloc().directBuffer(maxPendingResponsesSize); + } + BookieProtoEncoding.ResponseEnDeCoderPreV3.serializeAddResponseInto(rc, req, pendingSendResponses); + } + + public synchronized void flushPendingResponse() { + if (pendingSendResponses != null) { + maxPendingResponsesSize = (int) Math.max( + maxPendingResponsesSize * 0.5 + 0.5 * pendingSendResponses.readableBytes(), + DEFAULT_PENDING_RESPONSE_SIZE); + if (ctx().channel().isActive()) { + ctx().writeAndFlush(pendingSendResponses, ctx.voidPromise()); + } else { + pendingSendResponses.release(); + } + pendingSendResponses = null; + } } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieRequestProcessor.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieRequestProcessor.java index 8ee363fc78f..7b55545b5d2 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieRequestProcessor.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieRequestProcessor.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,34 +21,6 @@ package org.apache.bookkeeper.proto; import static com.google.common.base.Preconditions.checkArgument; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.ADD_ENTRY; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.ADD_ENTRY_BLOCKED; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.ADD_ENTRY_BLOCKED_WAIT; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.ADD_ENTRY_IN_PROGRESS; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.ADD_ENTRY_REQUEST; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.CHANNEL_WRITE; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.FORCE_LEDGER; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.FORCE_LEDGER_REQUEST; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.GET_BOOKIE_INFO; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.GET_BOOKIE_INFO_REQUEST; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_BLOCKED; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_BLOCKED_WAIT; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_FENCE_READ; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_FENCE_REQUEST; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_FENCE_WAIT; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_IN_PROGRESS; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_LONG_POLL_PRE_WAIT; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_LONG_POLL_READ; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_LONG_POLL_REQUEST; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_LONG_POLL_WAIT; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_REQUEST; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_SCHEDULING_DELAY; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_LAC; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_LAC_REQUEST; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_LAST_ENTRY_NOENTRY_ERROR; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.WRITE_LAC; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.WRITE_LAC_REQUEST; import static org.apache.bookkeeper.proto.RequestUtils.hasFlag; import com.google.common.annotations.VisibleForTesting; @@ -56,21 +28,19 @@ import com.google.common.cache.CacheBuilder; import com.google.common.util.concurrent.ThreadFactoryBuilder; import com.google.protobuf.ByteString; - +import io.netty.buffer.ByteBufAllocator; import io.netty.channel.Channel; +import io.netty.channel.group.ChannelGroup; import io.netty.handler.ssl.SslHandler; import io.netty.util.HashedWheelTimer; import io.netty.util.concurrent.Future; import io.netty.util.concurrent.GenericFutureListener; - import java.util.Optional; import java.util.concurrent.ExecutorService; import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Consumer; - import lombok.AccessLevel; import lombok.Getter; import org.apache.bookkeeper.auth.AuthProviderFactoryFactory; @@ -80,15 +50,14 @@ import org.apache.bookkeeper.common.util.OrderedExecutor; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.processor.RequestProcessor; -import org.apache.bookkeeper.stats.Counter; -import org.apache.bookkeeper.stats.Gauge; -import org.apache.bookkeeper.stats.OpStatsLogger; import org.apache.bookkeeper.stats.StatsLogger; import org.apache.bookkeeper.tls.SecurityException; import org.apache.bookkeeper.tls.SecurityHandlerFactory; import org.apache.bookkeeper.tls.SecurityHandlerFactory.NodeType; +import org.apache.bookkeeper.util.NettyChannelUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.slf4j.MDC; /** * An implementation of the RequestProcessor interface. @@ -97,6 +66,7 @@ public class BookieRequestProcessor implements RequestProcessor { private static final Logger LOG = LoggerFactory.getLogger(BookieRequestProcessor.class); + public static final String TLS_HANDLER_NAME = "tls"; /** * The server configuration. We use this for getting the number of add and read @@ -104,6 +74,7 @@ public class BookieRequestProcessor implements RequestProcessor { */ private final ServerConfiguration serverCfg; private final long waitTimeoutOnBackpressureMillis; + private final boolean preserveMdcForTaskExecution; /** * This is the Bookie instance that is used to handle all read and write requests. @@ -144,50 +115,32 @@ public class BookieRequestProcessor implements RequestProcessor { // Expose Stats private final BKStats bkStats = BKStats.getInstance(); private final boolean statsEnabled; - private final OpStatsLogger addRequestStats; - private final OpStatsLogger addEntryStats; - final OpStatsLogger readRequestStats; - final OpStatsLogger readEntryStats; - final OpStatsLogger forceLedgerStats; - final OpStatsLogger forceLedgerRequestStats; - final OpStatsLogger fenceReadRequestStats; - final OpStatsLogger fenceReadEntryStats; - final OpStatsLogger fenceReadWaitStats; - final OpStatsLogger readEntrySchedulingDelayStats; - final OpStatsLogger longPollPreWaitStats; - final OpStatsLogger longPollWaitStats; - final OpStatsLogger longPollReadStats; - final OpStatsLogger longPollReadRequestStats; - final Counter readLastEntryNoEntryErrorCounter; - final OpStatsLogger writeLacRequestStats; - final OpStatsLogger writeLacStats; - final OpStatsLogger readLacRequestStats; - final OpStatsLogger readLacStats; - final OpStatsLogger getBookieInfoRequestStats; - final OpStatsLogger getBookieInfoStats; - final OpStatsLogger channelWriteStats; - final OpStatsLogger addEntryBlockedStats; - final OpStatsLogger readEntryBlockedStats; - - final AtomicInteger addsInProgress = new AtomicInteger(0); - final AtomicInteger maxAddsInProgress = new AtomicInteger(0); - final AtomicInteger addsBlocked = new AtomicInteger(0); - final AtomicInteger readsInProgress = new AtomicInteger(0); - final AtomicInteger readsBlocked = new AtomicInteger(0); - final AtomicInteger maxReadsInProgress = new AtomicInteger(0); + + private final RequestStats requestStats; final Semaphore addsSemaphore; final Semaphore readsSemaphore; + final ChannelGroup allChannels; + // to temporary blacklist channels final Optional> blacklistedChannels; final Consumer onResponseTimeout; - public BookieRequestProcessor(ServerConfiguration serverCfg, Bookie bookie, - StatsLogger statsLogger, SecurityHandlerFactory shFactory) throws SecurityException { + private final ByteBufAllocator allocator; + + private final boolean throttleReadResponses; + + public BookieRequestProcessor(ServerConfiguration serverCfg, Bookie bookie, StatsLogger statsLogger, + SecurityHandlerFactory shFactory, ByteBufAllocator allocator, + ChannelGroup allChannels) throws SecurityException { this.serverCfg = serverCfg; + this.allocator = allocator; + this.allChannels = allChannels; this.waitTimeoutOnBackpressureMillis = serverCfg.getWaitTimeoutOnResponseBackpressureMillis(); + this.preserveMdcForTaskExecution = serverCfg.getPreserveMdcForTaskExecution(); this.bookie = bookie; + this.throttleReadResponses = serverCfg.isReadWorkerThreadsThrottlingEnabled(); this.readThreadPool = createExecutor( this.serverCfg.getNumReadWorkerThreads(), "BookieReadThreadPool", @@ -207,16 +160,16 @@ public BookieRequestProcessor(ServerConfiguration serverCfg, Bookie bookie, } this.longPollThreadPool = createExecutor( numThreads, - "BookieLongPollThread-" + serverCfg.getBookiePort(), + "BookieLongPollThread", OrderedExecutor.NO_TASK_LIMIT, statsLogger); } this.highPriorityThreadPool = createExecutor( this.serverCfg.getNumHighPriorityWorkerThreads(), - "BookieHighPriorityThread-" + serverCfg.getBookiePort(), + "BookieHighPriorityThread", OrderedExecutor.NO_TASK_LIMIT, statsLogger); this.shFactory = shFactory; if (shFactory != null) { - shFactory.init(NodeType.Server, serverCfg); + shFactory.init(NodeType.Server, serverCfg, allocator); } this.requestTimer = new HashedWheelTimer( @@ -245,86 +198,13 @@ public BookieRequestProcessor(ServerConfiguration serverCfg, Bookie bookie, // Expose Stats this.statsEnabled = serverCfg.isStatisticsEnabled(); - this.addEntryStats = statsLogger.getOpStatsLogger(ADD_ENTRY); - this.addRequestStats = statsLogger.getOpStatsLogger(ADD_ENTRY_REQUEST); - this.readEntryStats = statsLogger.getOpStatsLogger(READ_ENTRY); - this.forceLedgerStats = statsLogger.getOpStatsLogger(FORCE_LEDGER); - this.forceLedgerRequestStats = statsLogger.getOpStatsLogger(FORCE_LEDGER_REQUEST); - this.readRequestStats = statsLogger.getOpStatsLogger(READ_ENTRY_REQUEST); - this.fenceReadEntryStats = statsLogger.getOpStatsLogger(READ_ENTRY_FENCE_READ); - this.fenceReadRequestStats = statsLogger.getOpStatsLogger(READ_ENTRY_FENCE_REQUEST); - this.fenceReadWaitStats = statsLogger.getOpStatsLogger(READ_ENTRY_FENCE_WAIT); - this.readEntrySchedulingDelayStats = statsLogger.getOpStatsLogger(READ_ENTRY_SCHEDULING_DELAY); - this.longPollPreWaitStats = statsLogger.getOpStatsLogger(READ_ENTRY_LONG_POLL_PRE_WAIT); - this.longPollWaitStats = statsLogger.getOpStatsLogger(READ_ENTRY_LONG_POLL_WAIT); - this.longPollReadStats = statsLogger.getOpStatsLogger(READ_ENTRY_LONG_POLL_READ); - this.longPollReadRequestStats = statsLogger.getOpStatsLogger(READ_ENTRY_LONG_POLL_REQUEST); - this.readLastEntryNoEntryErrorCounter = statsLogger.getCounter(READ_LAST_ENTRY_NOENTRY_ERROR); - this.writeLacStats = statsLogger.getOpStatsLogger(WRITE_LAC); - this.writeLacRequestStats = statsLogger.getOpStatsLogger(WRITE_LAC_REQUEST); - this.readLacStats = statsLogger.getOpStatsLogger(READ_LAC); - this.readLacRequestStats = statsLogger.getOpStatsLogger(READ_LAC_REQUEST); - this.getBookieInfoStats = statsLogger.getOpStatsLogger(GET_BOOKIE_INFO); - this.getBookieInfoRequestStats = statsLogger.getOpStatsLogger(GET_BOOKIE_INFO_REQUEST); - this.channelWriteStats = statsLogger.getOpStatsLogger(CHANNEL_WRITE); - - this.addEntryBlockedStats = statsLogger.getOpStatsLogger(ADD_ENTRY_BLOCKED_WAIT); - this.readEntryBlockedStats = statsLogger.getOpStatsLogger(READ_ENTRY_BLOCKED_WAIT); + this.requestStats = new RequestStats(statsLogger); int maxAdds = serverCfg.getMaxAddsInProgressLimit(); addsSemaphore = maxAdds > 0 ? new Semaphore(maxAdds, true) : null; int maxReads = serverCfg.getMaxReadsInProgressLimit(); readsSemaphore = maxReads > 0 ? new Semaphore(maxReads, true) : null; - - statsLogger.registerGauge(ADD_ENTRY_IN_PROGRESS, new Gauge() { - @Override - public Number getDefaultValue() { - return 0; - } - - @Override - public Number getSample() { - return addsInProgress; - } - }); - - statsLogger.registerGauge(ADD_ENTRY_BLOCKED, new Gauge() { - @Override - public Number getDefaultValue() { - return 0; - } - - @Override - public Number getSample() { - return addsBlocked; - } - }); - - statsLogger.registerGauge(READ_ENTRY_IN_PROGRESS, new Gauge() { - @Override - public Number getDefaultValue() { - return 0; - } - - @Override - public Number getSample() { - return readsInProgress; - } - }); - - statsLogger.registerGauge(READ_ENTRY_BLOCKED, new Gauge() { - @Override - public Number getDefaultValue() { - return 0; - } - - @Override - public Number getSample() { - return readsBlocked; - } - }); - } protected void onAddRequestStart(Channel channel) { @@ -333,21 +213,19 @@ protected void onAddRequestStart(Channel channel) { final long throttlingStartTimeNanos = MathUtils.nowInNano(); channel.config().setAutoRead(false); LOG.info("Too many add requests in progress, disabling autoread on channel {}", channel); - addsBlocked.incrementAndGet(); + requestStats.blockAddRequest(); addsSemaphore.acquireUninterruptibly(); channel.config().setAutoRead(true); final long delayNanos = MathUtils.elapsedNanos(throttlingStartTimeNanos); LOG.info("Re-enabled autoread on channel {} after AddRequest delay of {} nanos", channel, delayNanos); - addEntryBlockedStats.registerSuccessfulEvent(delayNanos, TimeUnit.NANOSECONDS); - addsBlocked.decrementAndGet(); + requestStats.unblockAddRequest(delayNanos); } } - final int curr = addsInProgress.incrementAndGet(); - maxAddsInProgress.accumulateAndGet(curr, Integer::max); + requestStats.trackAddRequest(); } protected void onAddRequestFinish() { - addsInProgress.decrementAndGet(); + requestStats.untrackAddRequest(); if (addsSemaphore != null) { addsSemaphore.release(); } @@ -359,21 +237,19 @@ protected void onReadRequestStart(Channel channel) { final long throttlingStartTimeNanos = MathUtils.nowInNano(); channel.config().setAutoRead(false); LOG.info("Too many read requests in progress, disabling autoread on channel {}", channel); - readsBlocked.incrementAndGet(); + requestStats.blockReadRequest(); readsSemaphore.acquireUninterruptibly(); channel.config().setAutoRead(true); final long delayNanos = MathUtils.elapsedNanos(throttlingStartTimeNanos); LOG.info("Re-enabled autoread on channel {} after ReadRequest delay of {} nanos", channel, delayNanos); - readEntryBlockedStats.registerSuccessfulEvent(delayNanos, TimeUnit.NANOSECONDS); - readsBlocked.decrementAndGet(); + requestStats.unblockReadRequest(delayNanos); } } - final int curr = readsInProgress.incrementAndGet(); - maxReadsInProgress.accumulateAndGet(curr, Integer::max); + requestStats.trackReadRequest(); } protected void onReadRequestFinish() { - readsInProgress.decrementAndGet(); + requestStats.untrackReadRequest(); if (readsSemaphore != null) { readsSemaphore.release(); } @@ -381,16 +257,17 @@ protected void onReadRequestFinish() { @VisibleForTesting int maxAddsInProgressCount() { - return maxAddsInProgress.get(); + return requestStats.maxAddsInProgressCount(); } @VisibleForTesting int maxReadsInProgressCount() { - return maxReadsInProgress.get(); + return requestStats.maxReadsInProgressCount(); } @Override public void close() { + LOG.info("Closing RequestProcessor"); shutdownExecutor(writeThreadPool); shutdownExecutor(readThreadPool); if (serverCfg.getNumLongPollWorkerThreads() > 0 || readThreadPool == null) { @@ -398,6 +275,7 @@ public void close() { } shutdownExecutor(highPriorityThreadPool); requestTimer.stop(); + LOG.info("Closed RequestProcessor"); } private OrderedExecutor createExecutor( @@ -412,8 +290,10 @@ private OrderedExecutor createExecutor( .numThreads(numThreads) .name(nameFormat) .traceTaskExecution(serverCfg.getEnableTaskExecutionStats()) + .preserveMdcForTaskExecution(serverCfg.getPreserveMdcForTaskExecution()) .statsLogger(statsLogger) .maxTasksInQueue(maxTasksInQueue) + .enableThreadScopedMetrics(true) .build(); } } @@ -421,61 +301,73 @@ private OrderedExecutor createExecutor( private void shutdownExecutor(OrderedExecutor service) { if (null != service) { service.shutdown(); + service.forceShutdown(10, TimeUnit.SECONDS); } } @Override - public void processRequest(Object msg, Channel c) { + public void processRequest(Object msg, BookieRequestHandler requestHandler) { + Channel channel = requestHandler.ctx().channel(); // If we can decode this packet as a Request protobuf packet, process // it as a version 3 packet. Else, just use the old protocol. if (msg instanceof BookkeeperProtocol.Request) { BookkeeperProtocol.Request r = (BookkeeperProtocol.Request) msg; - BookkeeperProtocol.BKPacketHeader header = r.getHeader(); - switch (header.getOperation()) { - case ADD_ENTRY: - processAddRequestV3(r, c); - break; - case READ_ENTRY: - processReadRequestV3(r, c); - break; - case FORCE_LEDGER: - processForceLedgerRequestV3(r, c); - break; - case AUTH: - LOG.info("Ignoring auth operation from client {}", c.remoteAddress()); - BookkeeperProtocol.AuthMessage message = BookkeeperProtocol.AuthMessage - .newBuilder() - .setAuthPluginName(AuthProviderFactoryFactory.AUTHENTICATION_DISABLED_PLUGIN_NAME) - .setPayload(ByteString.copyFrom(AuthToken.NULL.getData())) - .build(); - BookkeeperProtocol.Response.Builder authResponse = BookkeeperProtocol.Response - .newBuilder().setHeader(r.getHeader()) - .setStatus(BookkeeperProtocol.StatusCode.EOK) - .setAuthResponse(message); - c.writeAndFlush(authResponse.build()); - break; - case WRITE_LAC: - processWriteLacRequestV3(r, c); - break; - case READ_LAC: - processReadLacRequestV3(r, c); - break; - case GET_BOOKIE_INFO: - processGetBookieInfoRequestV3(r, c); - break; - case START_TLS: - processStartTLSRequestV3(r, c); - break; - default: - LOG.info("Unknown operation type {}", header.getOperation()); - BookkeeperProtocol.Response.Builder response = - BookkeeperProtocol.Response.newBuilder().setHeader(r.getHeader()) - .setStatus(BookkeeperProtocol.StatusCode.EBADREQ); - c.writeAndFlush(response.build()); - if (statsEnabled) { - bkStats.getOpStats(BKStats.STATS_UNKNOWN).incrementFailedOps(); - } - break; + restoreMdcContextFromRequest(r); + try { + BookkeeperProtocol.BKPacketHeader header = r.getHeader(); + switch (header.getOperation()) { + case ADD_ENTRY: + processAddRequestV3(r, requestHandler); + break; + case READ_ENTRY: + processReadRequestV3(r, requestHandler); + break; + case FORCE_LEDGER: + processForceLedgerRequestV3(r, requestHandler); + break; + case AUTH: + LOG.info("Ignoring auth operation from client {}", channel.remoteAddress()); + BookkeeperProtocol.AuthMessage message = BookkeeperProtocol.AuthMessage + .newBuilder() + .setAuthPluginName(AuthProviderFactoryFactory.AUTHENTICATION_DISABLED_PLUGIN_NAME) + .setPayload(ByteString.copyFrom(AuthToken.NULL.getData())) + .build(); + final BookkeeperProtocol.Response authResponse = BookkeeperProtocol.Response + .newBuilder().setHeader(r.getHeader()) + .setStatus(BookkeeperProtocol.StatusCode.EOK) + .setAuthResponse(message) + .build(); + writeAndFlush(channel, authResponse); + break; + case WRITE_LAC: + processWriteLacRequestV3(r, requestHandler); + break; + case READ_LAC: + processReadLacRequestV3(r, requestHandler); + break; + case GET_BOOKIE_INFO: + processGetBookieInfoRequestV3(r, requestHandler); + break; + case START_TLS: + processStartTLSRequestV3(r, requestHandler); + break; + case GET_LIST_OF_ENTRIES_OF_LEDGER: + processGetListOfEntriesOfLedgerProcessorV3(r, requestHandler); + break; + default: + LOG.info("Unknown operation type {}", header.getOperation()); + final BookkeeperProtocol.Response response = + BookkeeperProtocol.Response.newBuilder().setHeader(r.getHeader()) + .setStatus(BookkeeperProtocol.StatusCode.EBADREQ) + .build(); + writeAndFlush(channel, response); + if (statsEnabled) { + bkStats.getOpStats(BKStats.STATS_UNKNOWN).incrementFailedOps(); + } + break; + } + } finally { + MDC.clear(); } } else { BookieProtocol.Request r = (BookieProtocol.Request) msg; @@ -483,15 +375,34 @@ public void processRequest(Object msg, Channel c) { switch (r.getOpCode()) { case BookieProtocol.ADDENTRY: checkArgument(r instanceof BookieProtocol.ParsedAddRequest); - processAddRequest((BookieProtocol.ParsedAddRequest) r, c); + processAddRequest((BookieProtocol.ParsedAddRequest) r, requestHandler); break; case BookieProtocol.READENTRY: checkArgument(r instanceof BookieProtocol.ReadRequest); - processReadRequest((BookieProtocol.ReadRequest) r, c); + processReadRequest((BookieProtocol.ReadRequest) r, requestHandler); + break; + case BookieProtocol.BATCH_READ_ENTRY: + checkArgument(r instanceof BookieProtocol.BatchedReadRequest); + processReadRequest((BookieProtocol.BatchedReadRequest) r, requestHandler); + break; + case BookieProtocol.AUTH: + LOG.info("Ignoring auth operation from client {}", + requestHandler.ctx().channel().remoteAddress()); + BookkeeperProtocol.AuthMessage message = BookkeeperProtocol.AuthMessage + .newBuilder() + .setAuthPluginName(AuthProviderFactoryFactory.AUTHENTICATION_DISABLED_PLUGIN_NAME) + .setPayload(ByteString.copyFrom(AuthToken.NULL.getData())) + .build(); + + final BookieProtocol.AuthResponse response = new BookieProtocol.AuthResponse( + BookieProtocol.CURRENT_PROTOCOL_VERSION, message); + writeAndFlush(channel, response); break; default: LOG.error("Unknown op type {}, sending error", r.getOpCode()); - c.writeAndFlush(ResponseBuilder.buildErrorResponse(BookieProtocol.EBADREQ, r)); + final BookieProtocol.Response errResponse = ResponseBuilder + .buildErrorResponse(BookieProtocol.EBADREQ, r); + writeAndFlush(channel, errResponse); if (statsEnabled) { bkStats.getOpStats(BKStats.STATS_UNKNOWN).incrementFailedOps(); } @@ -500,8 +411,18 @@ public void processRequest(Object msg, Channel c) { } } - private void processWriteLacRequestV3(final BookkeeperProtocol.Request r, final Channel c) { - WriteLacProcessorV3 writeLac = new WriteLacProcessorV3(r, c, this); + private void restoreMdcContextFromRequest(BookkeeperProtocol.Request req) { + if (preserveMdcForTaskExecution) { + MDC.clear(); + for (BookkeeperProtocol.ContextPair pair: req.getRequestContextList()) { + MDC.put(pair.getKey(), pair.getValue()); + } + } + } + + private void processWriteLacRequestV3(final BookkeeperProtocol.Request r, + final BookieRequestHandler requestHandler) { + WriteLacProcessorV3 writeLac = new WriteLacProcessorV3(r, requestHandler, this); if (null == writeThreadPool) { writeLac.run(); } else { @@ -509,8 +430,9 @@ private void processWriteLacRequestV3(final BookkeeperProtocol.Request r, final } } - private void processReadLacRequestV3(final BookkeeperProtocol.Request r, final Channel c) { - ReadLacProcessorV3 readLac = new ReadLacProcessorV3(r, c, this); + private void processReadLacRequestV3(final BookkeeperProtocol.Request r, + final BookieRequestHandler requestHandler) { + ReadLacProcessorV3 readLac = new ReadLacProcessorV3(r, requestHandler, this); if (null == readThreadPool) { readLac.run(); } else { @@ -518,8 +440,8 @@ private void processReadLacRequestV3(final BookkeeperProtocol.Request r, final C } } - private void processAddRequestV3(final BookkeeperProtocol.Request r, final Channel c) { - WriteEntryProcessorV3 write = new WriteEntryProcessorV3(r, c, this); + private void processAddRequestV3(final BookkeeperProtocol.Request r, final BookieRequestHandler requestHandler) { + WriteEntryProcessorV3 write = new WriteEntryProcessorV3(r, requestHandler, this); final OrderedExecutor threadPool; if (RequestUtils.isHighPriority(r)) { @@ -538,6 +460,7 @@ private void processAddRequestV3(final BookkeeperProtocol.Request r, final Chann LOG.debug("Failed to process request to add entry at {}:{}. Too many pending requests", r.getAddRequest().getLedgerId(), r.getAddRequest().getEntryId()); } + getRequestStats().getAddEntryRejectedCounter().inc(); BookkeeperProtocol.AddResponse.Builder addResponse = BookkeeperProtocol.AddResponse.newBuilder() .setLedgerId(r.getAddRequest().getLedgerId()) .setEntryId(r.getAddRequest().getEntryId()) @@ -547,13 +470,14 @@ private void processAddRequestV3(final BookkeeperProtocol.Request r, final Chann .setStatus(addResponse.getStatus()) .setAddResponse(addResponse); BookkeeperProtocol.Response resp = response.build(); - write.sendResponse(addResponse.getStatus(), resp, addRequestStats); + write.sendResponse(addResponse.getStatus(), resp, requestStats.getAddRequestStats()); } } } - private void processForceLedgerRequestV3(final BookkeeperProtocol.Request r, final Channel c) { - ForceLedgerProcessorV3 forceLedger = new ForceLedgerProcessorV3(r, c, this); + private void processForceLedgerRequestV3(final BookkeeperProtocol.Request r, + final BookieRequestHandler requestHandler) { + ForceLedgerProcessorV3 forceLedger = new ForceLedgerProcessorV3(r, requestHandler, this); final OrderedExecutor threadPool; if (RequestUtils.isHighPriority(r)) { @@ -581,24 +505,28 @@ private void processForceLedgerRequestV3(final BookkeeperProtocol.Request r, fin .setStatus(forceLedgerResponse.getStatus()) .setForceLedgerResponse(forceLedgerResponse); BookkeeperProtocol.Response resp = response.build(); - forceLedger.sendResponse(forceLedgerResponse.getStatus(), resp, forceLedgerRequestStats); + forceLedger.sendResponse( + forceLedgerResponse.getStatus(), + resp, + requestStats.getForceLedgerRequestStats()); } } } - private void processReadRequestV3(final BookkeeperProtocol.Request r, final Channel c) { - ExecutorService fenceThread = null == highPriorityThreadPool ? null : highPriorityThreadPool.chooseThread(c); + private void processReadRequestV3(final BookkeeperProtocol.Request r, final BookieRequestHandler requestHandler) { + ExecutorService fenceThread = null == highPriorityThreadPool ? null : + highPriorityThreadPool.chooseThread(requestHandler.ctx()); final ReadEntryProcessorV3 read; final OrderedExecutor threadPool; if (RequestUtils.isLongPollReadRequest(r.getReadRequest())) { - ExecutorService lpThread = longPollThreadPool.chooseThread(c); + ExecutorService lpThread = longPollThreadPool.chooseThread(requestHandler.ctx()); - read = new LongPollReadEntryProcessorV3(r, c, this, fenceThread, + read = new LongPollReadEntryProcessorV3(r, requestHandler, this, fenceThread, lpThread, requestTimer); threadPool = longPollThreadPool; } else { - read = new ReadEntryProcessorV3(r, c, this, fenceThread); + read = new ReadEntryProcessorV3(r, requestHandler, this, fenceThread); // If it's a high priority read (fencing or as part of recovery process), we want to make sure it // gets executed as fast as possible, so bypass the normal readThreadPool @@ -622,6 +550,7 @@ private void processReadRequestV3(final BookkeeperProtocol.Request r, final Chan LOG.debug("Failed to process request to read entry at {}:{}. Too many pending requests", r.getReadRequest().getLedgerId(), r.getReadRequest().getEntryId()); } + getRequestStats().getReadEntryRejectedCounter().inc(); BookkeeperProtocol.ReadResponse.Builder readResponse = BookkeeperProtocol.ReadResponse.newBuilder() .setLedgerId(r.getReadRequest().getLedgerId()) .setEntryId(r.getReadRequest().getEntryId()) @@ -631,26 +560,36 @@ private void processReadRequestV3(final BookkeeperProtocol.Request r, final Chan .setStatus(readResponse.getStatus()) .setReadResponse(readResponse); BookkeeperProtocol.Response resp = response.build(); - read.sendResponse(readResponse.getStatus(), resp, readRequestStats); + read.sendResponse(readResponse.getStatus(), resp, requestStats.getReadRequestStats()); + onReadRequestFinish(); } } } - private void processStartTLSRequestV3(final BookkeeperProtocol.Request r, final Channel c) { + private void processStartTLSRequestV3(final BookkeeperProtocol.Request r, + final BookieRequestHandler requestHandler) { BookkeeperProtocol.Response.Builder response = BookkeeperProtocol.Response.newBuilder(); BookkeeperProtocol.BKPacketHeader.Builder header = BookkeeperProtocol.BKPacketHeader.newBuilder(); header.setVersion(BookkeeperProtocol.ProtocolVersion.VERSION_THREE); header.setOperation(r.getHeader().getOperation()); header.setTxnId(r.getHeader().getTxnId()); response.setHeader(header.build()); + final Channel c = requestHandler.ctx().channel(); + if (shFactory == null) { LOG.error("Got StartTLS request but TLS not configured"); response.setStatus(BookkeeperProtocol.StatusCode.EBADREQ); - c.writeAndFlush(response.build()); + writeAndFlush(c, response.build()); } else { + LOG.info("Starting TLS handshake with client on channel {}", c); // there is no need to execute in a different thread as this operation is light SslHandler sslHandler = shFactory.newTLSHandler(); - c.pipeline().addFirst("tls", sslHandler); + if (c.pipeline().names().contains(BookieNettyServer.CONSOLIDATION_HANDLER_NAME)) { + c.pipeline().addAfter(BookieNettyServer.CONSOLIDATION_HANDLER_NAME, TLS_HANDLER_NAME, sslHandler); + } else { + // local transport doesn't contain FlushConsolidationHandler + c.pipeline().addFirst(TLS_HANDLER_NAME, sslHandler); + } response.setStatus(BookkeeperProtocol.StatusCode.EOK); BookkeeperProtocol.StartTLSResponse.Builder builder = BookkeeperProtocol.StartTLSResponse.newBuilder(); @@ -662,25 +601,37 @@ public void operationComplete(Future future) throws Exception { AuthHandler.ServerSideHandler authHandler = c.pipeline() .get(AuthHandler.ServerSideHandler.class); authHandler.authProvider.onProtocolUpgrade(); - if (future.isSuccess()) { + + /* + * Success of the future doesn't guarantee success in authentication + * future.isSuccess() only checks if the result field is not null + */ + if (future.isSuccess() && authHandler.isAuthenticated()) { LOG.info("Session is protected by: {}", sslHandler.engine().getSession().getCipherSuite()); } else { - LOG.error("TLS Handshake failure: {}", future.cause()); - BookkeeperProtocol.Response.Builder errResponse = BookkeeperProtocol.Response.newBuilder() - .setHeader(r.getHeader()).setStatus(BookkeeperProtocol.StatusCode.EIO); - c.writeAndFlush(errResponse.build()); + if (future.isSuccess()) { + LOG.error("TLS Handshake failed: Could not authenticate."); + } else { + LOG.error("TLS Handshake failure: ", future.cause()); + } + final BookkeeperProtocol.Response errResponse = BookkeeperProtocol.Response.newBuilder() + .setHeader(r.getHeader()) + .setStatus(BookkeeperProtocol.StatusCode.EIO) + .build(); + writeAndFlush(c, errResponse); if (statsEnabled) { bkStats.getOpStats(BKStats.STATS_UNKNOWN).incrementFailedOps(); } } } }); - c.writeAndFlush(response.build()); + writeAndFlush(c, response.build()); } } - private void processGetBookieInfoRequestV3(final BookkeeperProtocol.Request r, final Channel c) { - GetBookieInfoProcessorV3 getBookieInfo = new GetBookieInfoProcessorV3(r, c, this); + private void processGetBookieInfoRequestV3(final BookkeeperProtocol.Request r, + final BookieRequestHandler requestHandler) { + GetBookieInfoProcessorV3 getBookieInfo = new GetBookieInfoProcessorV3(r, requestHandler, this); if (null == readThreadPool) { getBookieInfo.run(); } else { @@ -688,8 +639,19 @@ private void processGetBookieInfoRequestV3(final BookkeeperProtocol.Request r, f } } - private void processAddRequest(final BookieProtocol.ParsedAddRequest r, final Channel c) { - WriteEntryProcessor write = WriteEntryProcessor.create(r, c, this); + private void processGetListOfEntriesOfLedgerProcessorV3(final BookkeeperProtocol.Request r, + final BookieRequestHandler requestHandler) { + GetListOfEntriesOfLedgerProcessorV3 getListOfEntriesOfLedger = + new GetListOfEntriesOfLedgerProcessorV3(r, requestHandler, this); + if (null == readThreadPool) { + getListOfEntriesOfLedger.run(); + } else { + readThreadPool.submit(getListOfEntriesOfLedger); + } + } + + private void processAddRequest(final BookieProtocol.ParsedAddRequest r, final BookieRequestHandler requestHandler) { + WriteEntryProcessor write = WriteEntryProcessor.create(r, requestHandler, this); // If it's a high priority add (usually as part of recovery process), we want to make sure it gets // executed as fast as possible, so bypass the normal writeThreadPool and execute in highPriorityThreadPool @@ -710,15 +672,27 @@ private void processAddRequest(final BookieProtocol.ParsedAddRequest r, final Ch LOG.debug("Failed to process request to add entry at {}:{}. Too many pending requests", r.ledgerId, r.entryId); } - - write.sendResponse(BookieProtocol.ETOOMANYREQUESTS, - ResponseBuilder.buildErrorResponse(BookieProtocol.ETOOMANYREQUESTS, r), addRequestStats); + getRequestStats().getAddEntryRejectedCounter().inc(); + + write.sendWriteReqResponse( + BookieProtocol.ETOOMANYREQUESTS, + ResponseBuilder.buildErrorResponse(BookieProtocol.ETOOMANYREQUESTS, r), + requestStats.getAddRequestStats()); + r.release(); + r.recycle(); + write.recycle(); } } } - private void processReadRequest(final BookieProtocol.ReadRequest r, final Channel c) { - ReadEntryProcessor read = ReadEntryProcessor.create(r, c, this); + private void processReadRequest(final BookieProtocol.ReadRequest r, final BookieRequestHandler requestHandler) { + ExecutorService fenceThreadPool = + null == highPriorityThreadPool ? null : highPriorityThreadPool.chooseThread(requestHandler.ctx()); + ReadEntryProcessor read = r instanceof BookieProtocol.BatchedReadRequest + ? BatchedReadEntryProcessor.create((BookieProtocol.BatchedReadRequest) r, requestHandler, + this, fenceThreadPool, throttleReadResponses, serverCfg.getMaxBatchReadSize()) + : ReadEntryProcessor.create(r, requestHandler, + this, fenceThreadPool, throttleReadResponses); // If it's a high priority read (fencing or as part of recovery process), we want to make sure it // gets executed as fast as possible, so bypass the normal readThreadPool @@ -740,9 +714,13 @@ private void processReadRequest(final BookieProtocol.ReadRequest r, final Channe LOG.debug("Failed to process request to read entry at {}:{}. Too many pending requests", r.ledgerId, r.entryId); } - - read.sendResponse(BookieProtocol.ETOOMANYREQUESTS, - ResponseBuilder.buildErrorResponse(BookieProtocol.ETOOMANYREQUESTS, r), readRequestStats); + getRequestStats().getReadEntryRejectedCounter().inc(); + read.sendResponse( + BookieProtocol.ETOOMANYREQUESTS, + ResponseBuilder.buildErrorResponse(BookieProtocol.ETOOMANYREQUESTS, r), + requestStats.getReadRequestStats()); + onReadRequestFinish(); + read.recycle(); } } } @@ -770,4 +748,8 @@ public boolean isBlacklisted(Channel channel) { public void handleNonWritableChannel(Channel channel) { onResponseTimeout.accept(channel); } + + private static void writeAndFlush(Channel channel, Object msg) { + NettyChannelUtil.writeAndFlushWithVoidPromise(channel, msg); + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieServer.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieServer.java index 27e8ab2eef5..da2cbd86e1f 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieServer.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieServer.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,32 +20,29 @@ */ package org.apache.bookkeeper.proto; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_SCOPE; import static org.apache.bookkeeper.bookie.BookKeeperServerStats.SERVER_SCOPE; import static org.apache.bookkeeper.conf.AbstractConfiguration.PERMITTED_STARTUP_USERS; import com.google.common.annotations.VisibleForTesting; +import io.netty.buffer.ByteBufAllocator; import java.io.IOException; import java.lang.Thread.UncaughtExceptionHandler; import java.net.UnknownHostException; -import java.security.AccessControlException; import java.util.Arrays; -import java.util.concurrent.TimeUnit; - import org.apache.bookkeeper.bookie.Bookie; import org.apache.bookkeeper.bookie.BookieCriticalThread; import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.BookieImpl; import org.apache.bookkeeper.bookie.ExitCode; -import org.apache.bookkeeper.bookie.ReadOnlyBookie; -import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.bookie.UncleanShutdownDetection; import org.apache.bookkeeper.common.util.JsonUtil.ParseJsonException; import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.processor.RequestProcessor; import org.apache.bookkeeper.replication.ReplicationException.CompatibilityException; import org.apache.bookkeeper.replication.ReplicationException.UnavailableException; import org.apache.bookkeeper.server.Main; -import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.stats.StatsLogger; import org.apache.bookkeeper.tls.SecurityException; import org.apache.bookkeeper.tls.SecurityHandlerFactory; @@ -62,8 +59,9 @@ public class BookieServer { final ServerConfiguration conf; BookieNettyServer nettyServer; private volatile boolean running = false; - Bookie bookie; + private final Bookie bookie; DeathWatcher deathWatcher; + UncleanShutdownDetection uncleanShutdownDetection; private static final Logger LOG = LoggerFactory.getLogger(BookieServer.class); int exitCode = ExitCode.OK; @@ -77,13 +75,11 @@ public class BookieServer { // Exception handler private volatile UncaughtExceptionHandler uncaughtExceptionHandler = null; - public BookieServer(ServerConfiguration conf) throws IOException, - KeeperException, InterruptedException, BookieException, - UnavailableException, CompatibilityException, SecurityException { - this(conf, NullStatsLogger.INSTANCE); - } - - public BookieServer(ServerConfiguration conf, StatsLogger statsLogger) + public BookieServer(ServerConfiguration conf, + Bookie bookie, + StatsLogger statsLogger, + ByteBufAllocator allocator, + UncleanShutdownDetection uncleanShutdownDetection) throws IOException, KeeperException, InterruptedException, BookieException, UnavailableException, CompatibilityException, SecurityException { this.conf = conf; @@ -97,23 +93,31 @@ public BookieServer(ServerConfiguration conf, StatsLogger statsLogger) } this.statsLogger = statsLogger; - this.nettyServer = new BookieNettyServer(this.conf, null); - try { - this.bookie = newBookie(conf); - } catch (IOException | KeeperException | InterruptedException | BookieException e) { - // interrupted on constructing a bookie - this.nettyServer.shutdown(); - throw e; - } + this.bookie = bookie; + this.nettyServer = new BookieNettyServer(this.conf, null, allocator); + this.uncleanShutdownDetection = uncleanShutdownDetection; + final SecurityHandlerFactory shFactory; shFactory = SecurityProviderFactoryFactory .getSecurityProviderFactory(conf.getTLSProviderFactoryClass()); + this.requestProcessor = new BookieRequestProcessor(conf, bookie, - statsLogger.scope(SERVER_SCOPE), shFactory); + statsLogger.scope(SERVER_SCOPE), shFactory, allocator, nettyServer.allChannels); this.nettyServer.setRequestProcessor(this.requestProcessor); } + @VisibleForTesting + public static BookieServer newBookieServer(ServerConfiguration conf, + Bookie bookie, + StatsLogger statsLogger, + ByteBufAllocator allocator, + UncleanShutdownDetection uncleanShutdownDetection) + throws CompatibilityException, UnavailableException, SecurityException, IOException, + InterruptedException, KeeperException, BookieException { + return new BookieServer(conf, bookie, statsLogger, allocator, uncleanShutdownDetection); + } + /** * Currently the uncaught exception handler is used for DeathWatcher to notify * lifecycle management that a bookie is dead for some reasons. @@ -126,21 +130,17 @@ public void setExceptionHandler(UncaughtExceptionHandler exceptionHandler) { this.uncaughtExceptionHandler = exceptionHandler; } - protected Bookie newBookie(ServerConfiguration conf) - throws IOException, KeeperException, InterruptedException, BookieException { - return conf.isForceReadOnlyBookie() - ? new ReadOnlyBookie(conf, statsLogger.scope(BOOKIE_SCOPE)) - : new Bookie(conf, statsLogger.scope(BOOKIE_SCOPE)); - } - - public void start() throws IOException, UnavailableException, InterruptedException, BKException { + public void start() throws InterruptedException, IOException { this.bookie.start(); + // fail fast, when bookie startup is not successful if (!this.bookie.isRunning()) { exitCode = bookie.getExitCode(); this.requestProcessor.close(); return; } + + this.uncleanShutdownDetection.registerStartUp(); this.nettyServer.start(); running = true; @@ -149,15 +149,16 @@ public void start() throws IOException, UnavailableException, InterruptedExcepti deathWatcher.setUncaughtExceptionHandler(uncaughtExceptionHandler); } deathWatcher.start(); - - // fixes test flappers at random places until ISSUE#1400 is resolved - // https://github.com/apache/bookkeeper/issues/1400 - TimeUnit.MILLISECONDS.sleep(250); } @VisibleForTesting public BookieSocketAddress getLocalAddress() throws UnknownHostException { - return Bookie.getBookieAddress(conf); + return BookieImpl.getBookieAddress(conf); + } + + @VisibleForTesting + public BookieId getBookieId() throws UnknownHostException { + return BookieImpl.getBookieId(conf); } @VisibleForTesting @@ -198,15 +199,16 @@ public synchronized void shutdown() { if (!running) { return; } - exitCode = bookie.shutdown(); this.requestProcessor.close(); + exitCode = bookie.shutdown(); + uncleanShutdownDetection.registerCleanShutdown(); running = false; } /** * Ensure the current user can start-up the process if it's restricted. */ - private void validateUser(ServerConfiguration conf) throws AccessControlException { + private void validateUser(ServerConfiguration conf) throws BookieException { if (conf.containsKey(PERMITTED_STARTUP_USERS)) { String currentUser = System.getProperty("user.name"); String[] propertyValue = conf.getPermittedStartupUsers(); @@ -220,7 +222,7 @@ private void validateUser(ServerConfiguration conf) throws AccessControlExceptio + " Current user: " + currentUser + " permittedStartupUsers: " + Arrays.toString(propertyValue); LOG.error(errorMsg); - throw new AccessControlException(errorMsg); + throw new BookieException.BookieUnauthorizedAccessException(errorMsg); } } @@ -285,6 +287,7 @@ public void run() { } } + /** * Legacy Method to run bookie server. */ @@ -294,13 +297,14 @@ public static void main(String[] args) { @Override public String toString() { - String id = "UNKNOWN"; - + String addr = "UNKNOWN"; + String id = "?"; try { - id = Bookie.getBookieAddress(conf).toString(); + addr = BookieImpl.getBookieAddress(conf).toString(); + id = getBookieId().toString(); } catch (UnknownHostException e) { //Ignored... } - return "Bookie Server listening on " + id; + return "Bookie Server listening on " + addr + " with id " + id; } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookkeeperInternalCallbacks.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookkeeperInternalCallbacks.java index d5ed5aeb8d6..6760003521a 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookkeeperInternalCallbacks.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookkeeperInternalCallbacks.java @@ -22,22 +22,23 @@ package org.apache.bookkeeper.proto; import io.netty.buffer.ByteBuf; - import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutorService; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; - import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.client.BookieInfoReader.BookieInfo; import org.apache.bookkeeper.client.LedgerEntry; import org.apache.bookkeeper.client.LedgerHandle; -import org.apache.bookkeeper.client.LedgerMetadata; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.common.util.MathUtils; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.stats.OpStatsLogger; -import org.apache.bookkeeper.util.MathUtils; +import org.apache.bookkeeper.util.AvailabilityOfEntriesOfLedger; +import org.apache.bookkeeper.util.ByteBufList; +import org.apache.bookkeeper.versioning.Versioned; import org.apache.zookeeper.AsyncCallback; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -68,14 +69,14 @@ public interface LedgerMetadataListener { * @param metadata * new ledger metadata. */ - void onChanged(long ledgerId, LedgerMetadata metadata); + void onChanged(long ledgerId, Versioned metadata); } /** * A writer callback interface. */ public interface WriteCallback { - void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddress addr, Object ctx); + void writeComplete(int rc, long ledgerId, long entryId, BookieId addr, Object ctx); } /** @@ -89,14 +90,14 @@ public interface ReadLacCallback { * A last-add-confirmed (LAC) writer callback interface. */ public interface WriteLacCallback { - void writeLacComplete(int rc, long ledgerId, BookieSocketAddress addr, Object ctx); + void writeLacComplete(int rc, long ledgerId, BookieId addr, Object ctx); } /** * Force callback interface. */ public interface ForceLedgerCallback { - void forceLedgerComplete(int rc, long ledgerId, BookieSocketAddress addr, Object ctx); + void forceLedgerComplete(int rc, long ledgerId, BookieId addr, Object ctx); } /** @@ -106,6 +107,53 @@ public interface StartTLSCallback { void startTLSComplete(int rc, Object ctx); } + /** + * A callback interface for GetListOfEntriesOfLedger command. + */ + public interface GetListOfEntriesOfLedgerCallback { + void getListOfEntriesOfLedgerComplete(int rc, long ledgerId, + AvailabilityOfEntriesOfLedger availabilityOfEntriesOfLedger); + } + + /** + * Handle the Response Code and transform it to a BKException. + * + * @param + * @param rc + * @param result + * @param future + */ + public static void finish(int rc, T result, CompletableFuture future) { + if (rc != BKException.Code.OK) { + future.completeExceptionally(BKException.create(rc).fillInStackTrace()); + } else { + future.complete(result); + } + } + + /** + * Future for GetListOfEntriesOfLedger. + */ + public static class FutureGetListOfEntriesOfLedger extends CompletableFuture + implements GetListOfEntriesOfLedgerCallback { + private final long ledgerIdOfTheRequest; + + FutureGetListOfEntriesOfLedger(long ledgerId) { + this.ledgerIdOfTheRequest = ledgerId; + } + + @Override + public void getListOfEntriesOfLedgerComplete(int rc, long ledgerIdOfTheResponse, + AvailabilityOfEntriesOfLedger availabilityOfEntriesOfLedger) { + if ((rc == BKException.Code.OK) && (ledgerIdOfTheRequest != ledgerIdOfTheResponse)) { + LOG.error("For getListOfEntriesOfLedger expected ledgerId in the response: {} actual ledgerId: {}", + ledgerIdOfTheRequest, ledgerIdOfTheResponse); + rc = BKException.Code.ReadException; + } + finish(rc, availabilityOfEntriesOfLedger, this); + } + } + /** * A generic callback interface. */ @@ -174,6 +222,16 @@ public interface ReadEntryCallback { void readEntryComplete(int rc, long ledgerId, long entryId, ByteBuf buffer, Object ctx); } + /** + * Declaration of a callback implementation for calls from BookieClient objects. + * Such calls are for replies of batched read operations (operations to read multi entries + * from a ledger). + * + */ + public interface BatchedReadEntryCallback { + void readEntriesComplete(int rc, long ledgerId, long startEntryId, ByteBufList bufList, Object ctx); + } + /** * Listener on entries responded. */ diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ByteStringUtil.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ByteStringUtil.java new file mode 100644 index 00000000000..b26ac7b36ab --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ByteStringUtil.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.proto; + +import com.google.protobuf.ByteString; +import com.google.protobuf.UnsafeByteOperations; +import io.netty.buffer.ByteBuf; +import java.nio.ByteBuffer; +import org.apache.bookkeeper.util.ByteBufList; + +public class ByteStringUtil { + + /** + * Wrap the internal buffers of a ByteBufList into a single ByteString. + * The lifecycle of the wrapped ByteString is tied to the ByteBufList. + * + * @param bufList ByteBufList to wrap + * @return ByteString wrapping the internal buffers of the ByteBufList + */ + public static ByteString byteBufListToByteString(ByteBufList bufList) { + ByteString aggregated = null; + for (int i = 0; i < bufList.size(); i++) { + ByteBuf buffer = bufList.getBuffer(i); + if (buffer.readableBytes() > 0) { + aggregated = byteBufToByteString(aggregated, buffer); + } + } + return aggregated != null ? aggregated : ByteString.EMPTY; + } + + /** + * Wrap the internal buffers of a ByteBuf into a single ByteString. + * The lifecycle of the wrapped ByteString is tied to the ByteBuf. + * + * @param byteBuf ByteBuf to wrap + * @return ByteString wrapping the internal buffers of the ByteBuf + */ + public static ByteString byteBufToByteString(ByteBuf byteBuf) { + return byteBufToByteString(null, byteBuf); + } + + // internal method to aggregate a ByteBuf into a single aggregated ByteString + private static ByteString byteBufToByteString(ByteString aggregated, ByteBuf byteBuf) { + if (byteBuf.readableBytes() == 0) { + return ByteString.EMPTY; + } + if (byteBuf.nioBufferCount() > 1) { + for (ByteBuffer nioBuffer : byteBuf.nioBuffers()) { + ByteString piece = UnsafeByteOperations.unsafeWrap(nioBuffer); + aggregated = (aggregated == null) ? piece : aggregated.concat(piece); + } + } else { + ByteString piece; + if (byteBuf.hasArray()) { + piece = UnsafeByteOperations.unsafeWrap(byteBuf.array(), byteBuf.arrayOffset() + byteBuf.readerIndex(), + byteBuf.readableBytes()); + } else { + piece = UnsafeByteOperations.unsafeWrap(byteBuf.nioBuffer()); + } + aggregated = (aggregated == null) ? piece : aggregated.concat(piece); + } + return aggregated; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ClientConnectionPeer.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ClientConnectionPeer.java index 1c45b67790c..f45c7b6dad6 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ClientConnectionPeer.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ClientConnectionPeer.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ConnectionPeer.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ConnectionPeer.java index 7d27e72907e..b04ff6257f4 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ConnectionPeer.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ConnectionPeer.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/DefaultPerChannelBookieClientPool.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/DefaultPerChannelBookieClientPool.java index 04471d5f55e..ed2be1088c5 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/DefaultPerChannelBookieClientPool.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/DefaultPerChannelBookieClientPool.java @@ -24,14 +24,13 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; - +import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.conf.ClientConfiguration; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.tls.SecurityException; import org.apache.bookkeeper.tls.SecurityHandlerFactory; import org.apache.bookkeeper.tls.SecurityProviderFactoryFactory; -import org.apache.bookkeeper.util.MathUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -45,9 +44,10 @@ class DefaultPerChannelBookieClientPool implements PerChannelBookieClientPool, static final Logger LOG = LoggerFactory.getLogger(DefaultPerChannelBookieClientPool.class); final PerChannelBookieClientFactory factory; - final BookieSocketAddress address; + final BookieId address; final PerChannelBookieClient[] clients; + final PerChannelBookieClient[] clientsV3Enforced; final ClientConfiguration conf; SecurityHandlerFactory shFactory; @@ -56,19 +56,27 @@ class DefaultPerChannelBookieClientPool implements PerChannelBookieClientPool, final AtomicLong errorCounter = new AtomicLong(0); DefaultPerChannelBookieClientPool(ClientConfiguration conf, PerChannelBookieClientFactory factory, - BookieSocketAddress address, + BookieId address, int coreSize) throws SecurityException { checkArgument(coreSize > 0); this.factory = factory; this.address = address; this.conf = conf; - this.shFactory = SecurityProviderFactoryFactory - .getSecurityProviderFactory(conf.getTLSProviderFactoryClass()); + this.shFactory = SecurityProviderFactoryFactory.getSecurityProviderFactory(conf.getTLSProviderFactoryClass()); this.clients = new PerChannelBookieClient[coreSize]; for (int i = 0; i < coreSize; i++) { - this.clients[i] = factory.create(address, this, shFactory); + this.clients[i] = factory.create(address, this, shFactory, false); + } + + if (conf.getUseV2WireProtocol()) { + this.clientsV3Enforced = new PerChannelBookieClient[coreSize]; + for (int i = 0; i < coreSize; i++) { + this.clientsV3Enforced[i] = factory.create(address, this, shFactory, true); + } + } else { + this.clientsV3Enforced = this.clients; } } @@ -78,23 +86,38 @@ public void operationComplete(int rc, PerChannelBookieClient pcbc) { } @Override - public void intialize() { + public void initialize() { for (PerChannelBookieClient pcbc : this.clients) { pcbc.connectIfNeededAndDoOp(this); } } private PerChannelBookieClient getClient(long key) { - if (1 == clients.length) { - return clients[0]; + return getClient(key, false); + } + + private PerChannelBookieClient getClient(long key, PerChannelBookieClient[] pcbc) { + if (1 == pcbc.length) { + return pcbc[0]; + } + int idx = MathUtils.signSafeMod(key, pcbc.length); + return pcbc[idx]; + } + private PerChannelBookieClient getClient(long key, boolean forceUseV3) { + if (forceUseV3) { + return getClient(key, clientsV3Enforced); } - int idx = MathUtils.signSafeMod(key, clients.length); - return clients[idx]; + return getClient(key, clients); } @Override public void obtain(GenericCallback callback, long key) { - getClient(key).connectIfNeededAndDoOp(callback); + obtain(callback, key, false); + } + + @Override + public void obtain(GenericCallback callback, long key, boolean forceUseV3) { + getClient(key, forceUseV3).connectIfNeededAndDoOp(callback); } @Override @@ -106,6 +129,9 @@ public boolean isWritable(long key) { public void checkTimeoutOnPendingOperations() { for (int i = 0; i < clients.length; i++) { clients[i].checkTimeoutOnPendingOperations(); + if (clients != clientsV3Enforced) { + clientsV3Enforced[i].checkTimeoutOnPendingOperations(); + } } } @@ -116,15 +142,21 @@ public void recordError() { @Override public void disconnect(boolean wait) { - for (PerChannelBookieClient pcbc : clients) { - pcbc.disconnect(wait); + for (int i = 0; i < clients.length; i++) { + clients[i].disconnect(); + if (clients != clientsV3Enforced) { + clientsV3Enforced[i].disconnect(); + } } } @Override public void close(boolean wait) { - for (PerChannelBookieClient pcbc : clients) { - pcbc.close(wait); + for (int i = 0; i < clients.length; i++) { + clients[i].close(wait); + if (clients != clientsV3Enforced) { + clientsV3Enforced[i].close(wait); + } } } @@ -134,6 +166,11 @@ public long getNumPendingCompletionRequests() { for (PerChannelBookieClient pcbc : clients) { numPending += pcbc.getNumPendingCompletionRequests(); } + if (clients != clientsV3Enforced) { + for (PerChannelBookieClient pcbc : clientsV3Enforced) { + numPending += pcbc.getNumPendingCompletionRequests(); + } + } return numPending; } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ForceLedgerProcessorV3.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ForceLedgerProcessorV3.java index 0c8ef01fa87..d6b6ae3b086 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ForceLedgerProcessorV3.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ForceLedgerProcessorV3.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -22,16 +22,15 @@ import static com.google.common.base.Preconditions.checkArgument; -import io.netty.channel.Channel; import java.util.concurrent.TimeUnit; -import org.apache.bookkeeper.bookie.Bookie; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.common.util.MathUtils; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookkeeperProtocol.ForceLedgerRequest; import org.apache.bookkeeper.proto.BookkeeperProtocol.ForceLedgerResponse; import org.apache.bookkeeper.proto.BookkeeperProtocol.Request; import org.apache.bookkeeper.proto.BookkeeperProtocol.Response; import org.apache.bookkeeper.proto.BookkeeperProtocol.StatusCode; -import org.apache.bookkeeper.util.MathUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -39,9 +38,9 @@ class ForceLedgerProcessorV3 extends PacketProcessorBaseV3 implements Runnable { private static final Logger logger = LoggerFactory.getLogger(ForceLedgerProcessorV3.class); - public ForceLedgerProcessorV3(Request request, Channel channel, + public ForceLedgerProcessorV3(Request request, BookieRequestHandler requestHandler, BookieRequestProcessor requestProcessor) { - super(request, channel, requestProcessor); + super(request, requestHandler, requestProcessor); } // Returns null if there is no exception thrown @@ -58,20 +57,20 @@ private ForceLedgerResponse getForceLedgerResponse() { } BookkeeperInternalCallbacks.WriteCallback wcb = - (int rc, long ledgerId1, long entryId, BookieSocketAddress addr, Object ctx) -> { + (int rc, long ledgerId1, long entryId, BookieId addr, Object ctx) -> { - checkArgument(entryId == Bookie.METAENTRY_ID_FORCE_LEDGER, + checkArgument(entryId == BookieImpl.METAENTRY_ID_FORCE_LEDGER, "entryId must be METAENTRY_ID_FORCE_LEDGER but was {}", entryId); checkArgument(ledgerId1 == ledgerId, "ledgerId must be {} but was {}", ledgerId, ledgerId1); if (BookieProtocol.EOK == rc) { - requestProcessor.getForceLedgerStats() + requestProcessor.getRequestStats().getForceLedgerStats() .registerSuccessfulEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); } else { - requestProcessor.getForceLedgerStats() + requestProcessor.getRequestStats().getForceLedgerStats() .registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); } @@ -94,11 +93,11 @@ private ForceLedgerResponse getForceLedgerResponse() { .setStatus(forceLedgerResponse.getStatus()) .setForceLedgerResponse(forceLedgerResponse); Response resp = response.build(); - sendResponse(status, resp, requestProcessor.getForceLedgerRequestStats()); + sendResponse(status, resp, requestProcessor.getRequestStats().getForceLedgerRequestStats()); }; StatusCode status = null; try { - requestProcessor.getBookie().forceLedger(ledgerId, wcb, channel); + requestProcessor.getBookie().forceLedger(ledgerId, wcb, requestHandler); status = StatusCode.EOK; } catch (Throwable t) { logger.error("Unexpected exception while forcing ledger {} : ", ledgerId, t); @@ -116,7 +115,7 @@ private ForceLedgerResponse getForceLedgerResponse() { } @Override - public void safeRun() { + public void run() { ForceLedgerResponse forceLedgerResponse = getForceLedgerResponse(); if (null != forceLedgerResponse) { Response.Builder response = Response.newBuilder() @@ -124,7 +123,10 @@ public void safeRun() { .setStatus(forceLedgerResponse.getStatus()) .setForceLedgerResponse(forceLedgerResponse); Response resp = response.build(); - sendResponse(forceLedgerResponse.getStatus(), resp, requestProcessor.getForceLedgerRequestStats()); + sendResponse( + forceLedgerResponse.getStatus(), + resp, + requestProcessor.getRequestStats().getForceLedgerRequestStats()); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/GetBookieInfoProcessorV3.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/GetBookieInfoProcessorV3.java index d964957488b..cc223a1831a 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/GetBookieInfoProcessorV3.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/GetBookieInfoProcessorV3.java @@ -20,17 +20,14 @@ */ package org.apache.bookkeeper.proto; -import io.netty.channel.Channel; - import java.io.IOException; import java.util.concurrent.TimeUnit; - +import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.proto.BookkeeperProtocol.GetBookieInfoRequest; import org.apache.bookkeeper.proto.BookkeeperProtocol.GetBookieInfoResponse; import org.apache.bookkeeper.proto.BookkeeperProtocol.Request; import org.apache.bookkeeper.proto.BookkeeperProtocol.Response; import org.apache.bookkeeper.proto.BookkeeperProtocol.StatusCode; -import org.apache.bookkeeper.util.MathUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,9 +37,9 @@ public class GetBookieInfoProcessorV3 extends PacketProcessorBaseV3 implements Runnable { private static final Logger LOG = LoggerFactory.getLogger(GetBookieInfoProcessorV3.class); - public GetBookieInfoProcessorV3(Request request, Channel channel, + public GetBookieInfoProcessorV3(Request request, BookieRequestHandler requestHandler, BookieRequestProcessor requestProcessor) { - super(request, channel, requestProcessor); + super(request, requestHandler, requestProcessor); } private GetBookieInfoResponse getGetBookieInfoResponse() { @@ -54,8 +51,8 @@ private GetBookieInfoResponse getGetBookieInfoResponse() { if (!isVersionCompatible()) { getBookieInfoResponse.setStatus(StatusCode.EBADVERSION); - requestProcessor.getGetBookieInfoStats().registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), - TimeUnit.NANOSECONDS); + requestProcessor.getRequestStats().getGetBookieInfoStats() + .registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); return getBookieInfoResponse.build(); } @@ -73,20 +70,24 @@ private GetBookieInfoResponse getGetBookieInfoResponse() { totalDiskSpace = requestProcessor.getBookie().getTotalDiskSpace(); getBookieInfoResponse.setTotalDiskCapacity(totalDiskSpace); } - LOG.debug("FreeDiskSpace info is " + freeDiskSpace + " totalDiskSpace is: " + totalDiskSpace); + if (LOG.isDebugEnabled()) { + LOG.debug("FreeDiskSpace info is " + freeDiskSpace + " totalDiskSpace is: " + totalDiskSpace); + } + requestProcessor.getRequestStats().getGetBookieInfoStats() + .registerSuccessfulEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); } catch (IOException e) { status = StatusCode.EIO; LOG.error("IOException while getting freespace/totalspace", e); + requestProcessor.getRequestStats().getGetBookieInfoStats() + .registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); } getBookieInfoResponse.setStatus(status); - requestProcessor.getGetBookieInfoStats().registerSuccessfulEvent(MathUtils.elapsedNanos(startTimeNanos), - TimeUnit.NANOSECONDS); return getBookieInfoResponse.build(); } @Override - public void safeRun() { + public void run() { GetBookieInfoResponse getBookieInfoResponse = getGetBookieInfoResponse(); sendResponse(getBookieInfoResponse); } @@ -98,6 +99,6 @@ private void sendResponse(GetBookieInfoResponse getBookieInfoResponse) { .setGetBookieInfoResponse(getBookieInfoResponse); sendResponse(response.getStatus(), response.build(), - requestProcessor.getGetBookieInfoRequestStats()); + requestProcessor.getRequestStats().getGetBookieInfoRequestStats()); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/GetListOfEntriesOfLedgerProcessorV3.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/GetListOfEntriesOfLedgerProcessorV3.java new file mode 100644 index 00000000000..e5897574a02 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/GetListOfEntriesOfLedgerProcessorV3.java @@ -0,0 +1,108 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.proto; + +import com.google.protobuf.ByteString; +import java.io.IOException; +import java.util.concurrent.TimeUnit; +import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.common.util.MathUtils; +import org.apache.bookkeeper.proto.BookkeeperProtocol.GetListOfEntriesOfLedgerRequest; +import org.apache.bookkeeper.proto.BookkeeperProtocol.GetListOfEntriesOfLedgerResponse; +import org.apache.bookkeeper.proto.BookkeeperProtocol.Request; +import org.apache.bookkeeper.proto.BookkeeperProtocol.Response; +import org.apache.bookkeeper.proto.BookkeeperProtocol.StatusCode; +import org.apache.bookkeeper.util.AvailabilityOfEntriesOfLedger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A processor class for v3 entries of a ledger packets. + */ +public class GetListOfEntriesOfLedgerProcessorV3 extends PacketProcessorBaseV3 implements Runnable { + + private static final Logger LOG = LoggerFactory.getLogger(GetListOfEntriesOfLedgerProcessorV3.class); + protected final GetListOfEntriesOfLedgerRequest getListOfEntriesOfLedgerRequest; + protected final long ledgerId; + + public GetListOfEntriesOfLedgerProcessorV3(Request request, BookieRequestHandler requestHandler, + BookieRequestProcessor requestProcessor) { + super(request, requestHandler, requestProcessor); + this.getListOfEntriesOfLedgerRequest = request.getGetListOfEntriesOfLedgerRequest(); + this.ledgerId = getListOfEntriesOfLedgerRequest.getLedgerId(); + } + + private GetListOfEntriesOfLedgerResponse getListOfEntriesOfLedgerResponse() { + long startTimeNanos = MathUtils.nowInNano(); + + GetListOfEntriesOfLedgerResponse.Builder getListOfEntriesOfLedgerResponse = GetListOfEntriesOfLedgerResponse + .newBuilder(); + getListOfEntriesOfLedgerResponse.setLedgerId(ledgerId); + + if (!isVersionCompatible()) { + getListOfEntriesOfLedgerResponse.setStatus(StatusCode.EBADVERSION); + requestProcessor.getRequestStats().getGetListOfEntriesOfLedgerStats() + .registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); + return getListOfEntriesOfLedgerResponse.build(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Received new getListOfEntriesOfLedger request: {}", request); + } + StatusCode status = StatusCode.EOK; + AvailabilityOfEntriesOfLedger availabilityOfEntriesOfLedger = null; + try { + availabilityOfEntriesOfLedger = new AvailabilityOfEntriesOfLedger( + requestProcessor.bookie.getListOfEntriesOfLedger(ledgerId)); + getListOfEntriesOfLedgerResponse.setAvailabilityOfEntriesOfLedger( + ByteString.copyFrom(availabilityOfEntriesOfLedger.serializeStateOfEntriesOfLedger())); + + } catch (Bookie.NoLedgerException e) { + status = StatusCode.ENOLEDGER; + LOG.error("No ledger found while performing getListOfEntriesOfLedger from ledger: {}", ledgerId, e); + } catch (IOException e) { + status = StatusCode.EIO; + LOG.error("IOException while performing getListOfEntriesOfLedger from ledger: {}", ledgerId); + } + + if (status == StatusCode.EOK) { + requestProcessor.getRequestStats().getListOfEntriesOfLedgerStats + .registerSuccessfulEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); + } else { + requestProcessor.getRequestStats().getListOfEntriesOfLedgerStats + .registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); + } + // Finally set the status and return + getListOfEntriesOfLedgerResponse.setStatus(status); + return getListOfEntriesOfLedgerResponse.build(); + } + + @Override + public void run() { + GetListOfEntriesOfLedgerResponse listOfEntriesOfLedgerResponse = getListOfEntriesOfLedgerResponse(); + Response.Builder response = Response.newBuilder().setHeader(getHeader()) + .setStatus(listOfEntriesOfLedgerResponse.getStatus()) + .setGetListOfEntriesOfLedgerResponse(listOfEntriesOfLedgerResponse); + Response resp = response.build(); + sendResponse(listOfEntriesOfLedgerResponse.getStatus(), resp, + requestProcessor.getRequestStats().getListOfEntriesOfLedgerRequestStats); + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/LocalBookiesRegistry.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/LocalBookiesRegistry.java index 59b6ed0ae6f..a69824d507f 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/LocalBookiesRegistry.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/LocalBookiesRegistry.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,25 +21,25 @@ package org.apache.bookkeeper.proto; import java.util.concurrent.ConcurrentHashMap; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; /** * Local registry for embedded Bookies. */ public class LocalBookiesRegistry { - private static final ConcurrentHashMap localBookiesRegistry = + private static final ConcurrentHashMap localBookiesRegistry = new ConcurrentHashMap<>(); - static void registerLocalBookieAddress(BookieSocketAddress address) { + static void registerLocalBookieAddress(BookieId address) { localBookiesRegistry.put(address, Boolean.TRUE); } - static void unregisterLocalBookieAddress(BookieSocketAddress address) { + static void unregisterLocalBookieAddress(BookieId address) { if (address != null) { localBookiesRegistry.remove(address); } } - public static boolean isLocalBookie(BookieSocketAddress address) { + public static boolean isLocalBookie(BookieId address) { return localBookiesRegistry.containsKey(address); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/LongPollReadEntryProcessorV3.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/LongPollReadEntryProcessorV3.java index fdbbd353043..658c37c5949 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/LongPollReadEntryProcessorV3.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/LongPollReadEntryProcessorV3.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,17 +17,17 @@ */ package org.apache.bookkeeper.proto; -import com.google.common.base.Optional; import com.google.common.base.Stopwatch; -import io.netty.channel.Channel; import io.netty.util.HashedWheelTimer; import io.netty.util.Timeout; import java.io.IOException; +import java.util.Optional; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.TimeUnit; import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.BookieException; import org.apache.bookkeeper.bookie.LastAddConfirmedUpdateNotification; import org.apache.bookkeeper.common.util.Watcher; import org.apache.bookkeeper.proto.BookkeeperProtocol.ReadResponse; @@ -44,7 +44,7 @@ class LongPollReadEntryProcessorV3 extends ReadEntryProcessorV3 implements Watch private static final Logger logger = LoggerFactory.getLogger(LongPollReadEntryProcessorV3.class); private final Long previousLAC; - private Optional lastAddConfirmedUpdateTime = Optional.absent(); + private Optional lastAddConfirmedUpdateTime = Optional.empty(); // long poll execution state private final ExecutorService longPollThreadPool; @@ -54,12 +54,12 @@ class LongPollReadEntryProcessorV3 extends ReadEntryProcessorV3 implements Watch private boolean shouldReadEntry = false; LongPollReadEntryProcessorV3(Request request, - Channel channel, + BookieRequestHandler requestHandler, BookieRequestProcessor requestProcessor, ExecutorService fenceThreadPool, ExecutorService longPollThreadPool, HashedWheelTimer requestTimer) { - super(request, channel, requestProcessor, fenceThreadPool); + super(request, requestHandler, requestProcessor, fenceThreadPool); this.previousLAC = readRequest.getPreviousLAC(); this.longPollThreadPool = longPollThreadPool; this.requestTimer = requestTimer; @@ -79,7 +79,7 @@ private synchronized boolean shouldReadEntry() { protected ReadResponse readEntry(ReadResponse.Builder readResponseBuilder, long entryId, Stopwatch startTimeSw) - throws IOException { + throws IOException, BookieException { if (RequestUtils.shouldPiggybackEntry(readRequest)) { if (!readRequest.hasPreviousLAC() || (BookieProtocol.LAST_ADD_CONFIRMED != entryId)) { // This is not a valid request - client bug? @@ -101,7 +101,7 @@ protected ReadResponse readEntry(ReadResponse.Builder readResponseBuilder, try { return super.readEntry(readResponseBuilder, entryId, true, startTimeSw); } catch (Bookie.NoEntryException e) { - requestProcessor.readLastEntryNoEntryErrorCounter.inc(); + requestProcessor.getRequestStats().getReadLastEntryNoEntryErrorCounter().inc(); logger.info( "No entry found while piggyback reading entry {} from ledger {} : previous lac = {}", entryId, ledgerId, previousLAC); @@ -142,7 +142,7 @@ private ReadResponse getLongPollReadResponse() { final boolean watched; try { - watched = requestProcessor.bookie.waitForLastAddConfirmedUpdate(ledgerId, previousLAC, this); + watched = requestProcessor.getBookie().waitForLastAddConfirmedUpdate(ledgerId, previousLAC, this); } catch (Bookie.NoLedgerException e) { logger.info("No ledger found while longpoll reading ledger {}, previous lac = {}.", ledgerId, previousLAC); @@ -153,7 +153,7 @@ private ReadResponse getLongPollReadResponse() { return buildErrorResponse(StatusCode.EIO, startTimeSw); } - registerSuccessfulEvent(requestProcessor.longPollPreWaitStats, startTimeSw); + registerSuccessfulEvent(requestProcessor.getRequestStats().getLongPollPreWaitStats(), startTimeSw); lastPhaseStartTime.reset().start(); if (watched) { @@ -163,9 +163,10 @@ private ReadResponse getLongPollReadResponse() { } synchronized (this) { expirationTimerTask = requestTimer.newTimeout(timeout -> { - // When the timeout expires just get whatever is the current - // readLastConfirmed - LongPollReadEntryProcessorV3.this.scheduleDeferredRead(true); + requestProcessor.getBookie().cancelWaitForLastAddConfirmedUpdate(ledgerId, this); + // When the timeout expires just get whatever is the current + // readLastConfirmed + LongPollReadEntryProcessorV3.this.scheduleDeferredRead(true); }, readRequest.getTimeOut(), TimeUnit.MILLISECONDS); } return null; @@ -213,7 +214,7 @@ private synchronized void scheduleDeferredRead(boolean timeout) { expirationTimerTask.cancel(); } - registerEvent(timeout, requestProcessor.longPollWaitStats, lastPhaseStartTime); + registerEvent(timeout, requestProcessor.getRequestStats().getLongPollWaitStats(), lastPhaseStartTime); lastPhaseStartTime.reset().start(); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PacketProcessorBase.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PacketProcessorBase.java index b7dee2d4a8d..f0c079de0b0 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PacketProcessorBase.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PacketProcessorBase.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,36 +18,37 @@ package org.apache.bookkeeper.proto; import io.netty.channel.Channel; - +import io.netty.channel.ChannelFuture; +import io.netty.channel.ChannelPromise; +import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; - +import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.proto.BookieProtocol.Request; import org.apache.bookkeeper.stats.OpStatsLogger; -import org.apache.bookkeeper.util.MathUtils; -import org.apache.bookkeeper.util.SafeRunnable; +import org.apache.bookkeeper.util.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * A base class for bookeeper packet processors. */ -abstract class PacketProcessorBase extends SafeRunnable { +abstract class PacketProcessorBase implements Runnable { private static final Logger logger = LoggerFactory.getLogger(PacketProcessorBase.class); T request; - Channel channel; + BookieRequestHandler requestHandler; BookieRequestProcessor requestProcessor; long enqueueNanos; - protected void init(T request, Channel channel, BookieRequestProcessor requestProcessor) { + protected void init(T request, BookieRequestHandler requestHandler, BookieRequestProcessor requestProcessor) { this.request = request; - this.channel = channel; + this.requestHandler = requestHandler; this.requestProcessor = requestProcessor; this.enqueueNanos = MathUtils.nowInNano(); } protected void reset() { request = null; - channel = null; + requestHandler = null; requestProcessor = null; enqueueNanos = -1; } @@ -65,8 +66,107 @@ protected boolean isVersionCompatible() { return true; } + protected void sendWriteReqResponse(int rc, Object response, OpStatsLogger statsLogger) { + sendResponse(rc, response, statsLogger); + requestProcessor.onAddRequestFinish(); + } + + protected void sendReadReqResponse(int rc, Object response, OpStatsLogger statsLogger, boolean throttle) { + if (throttle) { + sendResponseAndWait(rc, response, statsLogger); + } else { + sendResponse(rc, response, statsLogger); + } + requestProcessor.onReadRequestFinish(); + } + protected void sendResponse(int rc, Object response, OpStatsLogger statsLogger) { - channel.writeAndFlush(response, channel.voidPromise()); + final long writeNanos = MathUtils.nowInNano(); + final long timeOut = requestProcessor.getWaitTimeoutOnBackpressureMillis(); + + Channel channel = requestHandler.ctx().channel(); + + if (timeOut >= 0 && !channel.isWritable()) { + if (!requestProcessor.isBlacklisted(channel)) { + synchronized (channel) { + if (!channel.isWritable() && !requestProcessor.isBlacklisted(channel)) { + final long waitUntilNanos = writeNanos + TimeUnit.MILLISECONDS.toNanos(timeOut); + while (!channel.isWritable() && MathUtils.nowInNano() < waitUntilNanos) { + try { + TimeUnit.MILLISECONDS.sleep(1); + } catch (InterruptedException e) { + break; + } + } + if (!channel.isWritable()) { + requestProcessor.blacklistChannel(channel); + requestProcessor.handleNonWritableChannel(channel); + } + } + } + } + + if (!channel.isWritable()) { + logger.warn("cannot write response to non-writable channel {} for request {}", channel, + StringUtils.requestToString(request)); + requestProcessor.getRequestStats().getChannelWriteStats() + .registerFailedEvent(MathUtils.elapsedNanos(writeNanos), TimeUnit.NANOSECONDS); + statsLogger.registerFailedEvent(MathUtils.elapsedNanos(enqueueNanos), TimeUnit.NANOSECONDS); + if (response instanceof BookieProtocol.Response) { + ((BookieProtocol.Response) response).release(); + } + return; + } else { + requestProcessor.invalidateBlacklist(channel); + } + } + + if (channel.isActive()) { + final ChannelPromise promise; + if (logger.isDebugEnabled()) { + promise = channel.newPromise().addListener(future -> { + if (!future.isSuccess()) { + logger.debug("Netty channel write exception. ", future.cause()); + } + }); + } else { + promise = channel.voidPromise(); + } + channel.writeAndFlush(response, promise); + } else { + if (response instanceof BookieProtocol.Response) { + ((BookieProtocol.Response) response).release(); + } + if (logger.isDebugEnabled()) { + logger.debug("Netty channel {} is inactive, " + + "hence bypassing netty channel writeAndFlush during sendResponse", channel); + } + } + if (BookieProtocol.EOK == rc) { + statsLogger.registerSuccessfulEvent(MathUtils.elapsedNanos(enqueueNanos), TimeUnit.NANOSECONDS); + } else { + statsLogger.registerFailedEvent(MathUtils.elapsedNanos(enqueueNanos), TimeUnit.NANOSECONDS); + } + } + + /** + * Write on the channel and wait until the write is completed. + * + *

          That will make the thread to get blocked until we're able to + * write everything on the TCP stack, providing auto-throttling + * and avoiding using too much memory when handling read-requests. + */ + protected void sendResponseAndWait(int rc, Object response, OpStatsLogger statsLogger) { + try { + Channel channel = requestHandler.ctx().channel(); + ChannelFuture future = channel.writeAndFlush(response); + if (!channel.eventLoop().inEventLoop()) { + future.get(); + } + } catch (ExecutionException | InterruptedException e) { + logger.debug("Netty channel write exception. ", e); + return; + } if (BookieProtocol.EOK == rc) { statsLogger.registerSuccessfulEvent(MathUtils.elapsedNanos(enqueueNanos), TimeUnit.NANOSECONDS); } else { @@ -75,11 +175,28 @@ protected void sendResponse(int rc, Object response, OpStatsLogger statsLogger) } @Override - public void safeRun() { + public void run() { + if (request instanceof BookieProtocol.ReadRequest) { + requestProcessor.getRequestStats().getReadEntrySchedulingDelayStats() + .registerSuccessfulEvent(MathUtils.elapsedNanos(enqueueNanos), TimeUnit.NANOSECONDS); + } + if (request instanceof BookieProtocol.ParsedAddRequest) { + requestProcessor.getRequestStats().getWriteThreadQueuedLatency() + .registerSuccessfulEvent(MathUtils.elapsedNanos(enqueueNanos), TimeUnit.NANOSECONDS); + } + if (!isVersionCompatible()) { sendResponse(BookieProtocol.EBADVERSION, ResponseBuilder.buildErrorResponse(BookieProtocol.EBADVERSION, request), - requestProcessor.readRequestStats); + requestProcessor.getRequestStats().getReadRequestStats()); + if (request instanceof BookieProtocol.ReadRequest) { + requestProcessor.onReadRequestFinish(); + } + if (request instanceof BookieProtocol.ParsedAddRequest) { + ((BookieProtocol.ParsedAddRequest) request).release(); + request.recycle(); + requestProcessor.onAddRequestFinish(); + } return; } processPacket(); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PacketProcessorBaseV3.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PacketProcessorBaseV3.java index 7dc29a38bf1..96f76bef7e0 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PacketProcessorBaseV3.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PacketProcessorBaseV3.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -24,30 +24,30 @@ import io.netty.channel.ChannelFuture; import io.netty.channel.ChannelFutureListener; import java.util.concurrent.TimeUnit; - +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.proto.BookkeeperProtocol.BKPacketHeader; import org.apache.bookkeeper.proto.BookkeeperProtocol.ProtocolVersion; import org.apache.bookkeeper.proto.BookkeeperProtocol.Request; import org.apache.bookkeeper.proto.BookkeeperProtocol.StatusCode; import org.apache.bookkeeper.stats.OpStatsLogger; -import org.apache.bookkeeper.util.MathUtils; -import org.apache.bookkeeper.util.SafeRunnable; import org.apache.bookkeeper.util.StringUtils; /** * A base class for bookkeeper protocol v3 packet processors. */ -public abstract class PacketProcessorBaseV3 extends SafeRunnable { +@Slf4j +public abstract class PacketProcessorBaseV3 implements Runnable { final Request request; - final Channel channel; + final BookieRequestHandler requestHandler; final BookieRequestProcessor requestProcessor; final long enqueueNanos; - public PacketProcessorBaseV3(Request request, Channel channel, + public PacketProcessorBaseV3(Request request, BookieRequestHandler requestHandler, BookieRequestProcessor requestProcessor) { this.request = request; - this.channel = channel; + this.requestHandler = requestHandler; this.requestProcessor = requestProcessor; this.enqueueNanos = MathUtils.nowInNano(); } @@ -55,6 +55,7 @@ public PacketProcessorBaseV3(Request request, Channel channel, protected void sendResponse(StatusCode code, Object response, OpStatsLogger statsLogger) { final long writeNanos = MathUtils.nowInNano(); + Channel channel = requestHandler.ctx().channel(); final long timeOut = requestProcessor.getWaitTimeoutOnBackpressureMillis(); if (timeOut >= 0 && !channel.isWritable()) { if (!requestProcessor.isBlacklisted(channel)) { @@ -77,9 +78,9 @@ protected void sendResponse(StatusCode code, Object response, OpStatsLogger stat } if (!channel.isWritable()) { - LOGGER.warn("cannot write response to non-writable channel {} for request {}", channel, + log.warn("cannot write response to non-writable channel {} for request {}", channel, StringUtils.requestToString(request)); - requestProcessor.getChannelWriteStats() + requestProcessor.getRequestStats().getChannelWriteStats() .registerFailedEvent(MathUtils.elapsedNanos(writeNanos), TimeUnit.NANOSECONDS); statsLogger.registerFailedEvent(MathUtils.elapsedNanos(enqueueNanos), TimeUnit.NANOSECONDS); return; @@ -87,25 +88,29 @@ protected void sendResponse(StatusCode code, Object response, OpStatsLogger stat requestProcessor.invalidateBlacklist(channel); } } - - channel.writeAndFlush(response).addListener(new ChannelFutureListener() { - @Override - public void operationComplete(ChannelFuture future) throws Exception { - long writeElapsedNanos = MathUtils.elapsedNanos(writeNanos); - if (!future.isSuccess()) { - requestProcessor.getChannelWriteStats() - .registerFailedEvent(writeElapsedNanos, TimeUnit.NANOSECONDS); - } else { - requestProcessor.getChannelWriteStats() - .registerSuccessfulEvent(writeElapsedNanos, TimeUnit.NANOSECONDS); - } - if (StatusCode.EOK == code) { - statsLogger.registerSuccessfulEvent(MathUtils.elapsedNanos(enqueueNanos), TimeUnit.NANOSECONDS); - } else { - statsLogger.registerFailedEvent(MathUtils.elapsedNanos(enqueueNanos), TimeUnit.NANOSECONDS); + if (channel.isActive()) { + channel.writeAndFlush(response).addListener(new ChannelFutureListener() { + @Override + public void operationComplete(ChannelFuture future) throws Exception { + long writeElapsedNanos = MathUtils.elapsedNanos(writeNanos); + if (!future.isSuccess()) { + requestProcessor.getRequestStats().getChannelWriteStats() + .registerFailedEvent(writeElapsedNanos, TimeUnit.NANOSECONDS); + } else { + requestProcessor.getRequestStats().getChannelWriteStats() + .registerSuccessfulEvent(writeElapsedNanos, TimeUnit.NANOSECONDS); + } + if (StatusCode.EOK == code) { + statsLogger.registerSuccessfulEvent(MathUtils.elapsedNanos(enqueueNanos), TimeUnit.NANOSECONDS); + } else { + statsLogger.registerFailedEvent(MathUtils.elapsedNanos(enqueueNanos), TimeUnit.NANOSECONDS); + } } - } - }); + }); + } else { + log.debug("Netty channel {} is inactive, " + + "hence bypassing netty channel writeAndFlush during sendResponse", channel); + } } protected boolean isVersionCompatible() { diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PerChannelBookieClient.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PerChannelBookieClient.java index 860079737fd..a77be0ca3fe 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PerChannelBookieClient.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PerChannelBookieClient.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -25,11 +25,9 @@ import com.google.protobuf.ByteString; import com.google.protobuf.ExtensionRegistry; import com.google.protobuf.UnsafeByteOperations; - import io.netty.bootstrap.Bootstrap; import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBufAllocator; -import io.netty.buffer.PooledByteBufAllocator; import io.netty.buffer.Unpooled; import io.netty.buffer.UnpooledByteBufAllocator; import io.netty.channel.Channel; @@ -45,23 +43,32 @@ import io.netty.channel.DefaultEventLoopGroup; import io.netty.channel.EventLoopGroup; import io.netty.channel.WriteBufferWaterMark; +import io.netty.channel.epoll.EpollChannelOption; import io.netty.channel.epoll.EpollEventLoopGroup; import io.netty.channel.epoll.EpollSocketChannel; +import io.netty.channel.local.LocalAddress; import io.netty.channel.local.LocalChannel; import io.netty.channel.socket.nio.NioSocketChannel; +import io.netty.channel.unix.Errors.NativeIoException; import io.netty.handler.codec.CorruptedFrameException; import io.netty.handler.codec.DecoderException; import io.netty.handler.codec.LengthFieldBasedFrameDecoder; -import io.netty.handler.codec.LengthFieldPrepender; import io.netty.handler.codec.TooLongFrameException; +import io.netty.handler.flush.FlushConsolidationHandler; import io.netty.handler.ssl.SslHandler; +import io.netty.incubator.channel.uring.IOUringChannelOption; +import io.netty.incubator.channel.uring.IOUringEventLoopGroup; +import io.netty.incubator.channel.uring.IOUringSocketChannel; import io.netty.util.Recycler; import io.netty.util.Recycler.Handle; +import io.netty.util.ReferenceCountUtil; +import io.netty.util.ReferenceCounted; import io.netty.util.concurrent.Future; import io.netty.util.concurrent.GenericFutureListener; - import java.io.IOException; +import java.net.InetSocketAddress; import java.net.SocketAddress; +import java.net.UnknownHostException; import java.security.cert.Certificate; import java.util.ArrayDeque; import java.util.ArrayList; @@ -70,29 +77,37 @@ import java.util.Collections; import java.util.EnumSet; import java.util.List; +import java.util.Map; +import java.util.NoSuchElementException; import java.util.Optional; import java.util.Queue; import java.util.Set; +import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.function.BiPredicate; - -import javax.net.ssl.SSLHandshakeException; +import javax.net.ssl.SSLException; import javax.net.ssl.SSLPeerUnverifiedException; - +import lombok.SneakyThrows; import org.apache.bookkeeper.auth.BookKeeperPrincipal; import org.apache.bookkeeper.auth.ClientAuthProvider; import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.client.BookKeeperClientStats; import org.apache.bookkeeper.client.BookieInfoReader.BookieInfo; import org.apache.bookkeeper.client.api.WriteFlag; +import org.apache.bookkeeper.common.util.MathUtils; +import org.apache.bookkeeper.common.util.MdcUtils; import org.apache.bookkeeper.common.util.OrderedExecutor; import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.BatchedReadEntryCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ForceLedgerCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GetBookieInfoCallback; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GetListOfEntriesOfLedgerCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadEntryCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadEntryCallbackCtx; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadLacCallback; @@ -106,6 +121,8 @@ import org.apache.bookkeeper.proto.BookkeeperProtocol.ForceLedgerResponse; import org.apache.bookkeeper.proto.BookkeeperProtocol.GetBookieInfoRequest; import org.apache.bookkeeper.proto.BookkeeperProtocol.GetBookieInfoResponse; +import org.apache.bookkeeper.proto.BookkeeperProtocol.GetListOfEntriesOfLedgerRequest; +import org.apache.bookkeeper.proto.BookkeeperProtocol.GetListOfEntriesOfLedgerResponse; import org.apache.bookkeeper.proto.BookkeeperProtocol.OperationType; import org.apache.bookkeeper.proto.BookkeeperProtocol.ProtocolVersion; import org.apache.bookkeeper.proto.BookkeeperProtocol.ReadLacRequest; @@ -121,22 +138,27 @@ import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.stats.OpStatsLogger; import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.annotations.StatsDoc; import org.apache.bookkeeper.tls.SecurityException; import org.apache.bookkeeper.tls.SecurityHandlerFactory; import org.apache.bookkeeper.tls.SecurityHandlerFactory.NodeType; +import org.apache.bookkeeper.util.AvailabilityOfEntriesOfLedger; import org.apache.bookkeeper.util.ByteBufList; -import org.apache.bookkeeper.util.MathUtils; -import org.apache.bookkeeper.util.SafeRunnable; import org.apache.bookkeeper.util.StringUtils; import org.apache.bookkeeper.util.collections.ConcurrentOpenHashMap; import org.apache.bookkeeper.util.collections.SynchronizedHashMultiMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.slf4j.MDC; /** * This class manages all details of connection to a particular bookie. It also * has reconnect logic if a connection to a bookie fails. */ +@StatsDoc( + name = BookKeeperClientStats.CHANNEL_SCOPE, + help = "Per channel bookie client stats" +) @Sharable public class PerChannelBookieClient extends ChannelInboundHandlerAdapter { @@ -153,18 +175,21 @@ public class PerChannelBookieClient extends ChannelInboundHandlerAdapter { BKException.Code.WriteOnReadOnlyBookieException)); private static final int DEFAULT_HIGH_PRIORITY_VALUE = 100; // We may add finer grained priority later. private static final AtomicLong txnIdGenerator = new AtomicLong(0); + static final String CONSOLIDATION_HANDLER_NAME = "consolidation"; - final BookieSocketAddress addr; + final BookieId bookieId; + final BookieAddressResolver bookieAddressResolver; final EventLoopGroup eventLoopGroup; + final ByteBufAllocator allocator; final OrderedExecutor executor; final long addEntryTimeoutNanos; final long readEntryTimeoutNanos; final int maxFrameSize; - final int getBookieInfoTimeout; + final long getBookieInfoTimeoutNanos; final int startTLSTimeout; private final ConcurrentOpenHashMap completionObjects = - new ConcurrentOpenHashMap(); + ConcurrentOpenHashMap.newBuilder().autoShrink(true).build(); // Map that hold duplicated read requests. The idea is to only use this map (synchronized) when there is a duplicate // read request for the same ledgerId/entryId @@ -172,32 +197,127 @@ public class PerChannelBookieClient extends ChannelInboundHandlerAdapter { new SynchronizedHashMultiMap<>(); private final StatsLogger statsLogger; + @StatsDoc( + name = BookKeeperClientStats.CHANNEL_READ_OP, + help = "channel stats of read entries requests" + ) private final OpStatsLogger readEntryOpLogger; + @StatsDoc( + name = BookKeeperClientStats.CHANNEL_TIMEOUT_READ, + help = "timeout stats of read entries requests" + ) private final OpStatsLogger readTimeoutOpLogger; + @StatsDoc( + name = BookKeeperClientStats.CHANNEL_ADD_OP, + help = "channel stats of add entries requests" + ) private final OpStatsLogger addEntryOpLogger; + @StatsDoc( + name = BookKeeperClientStats.CHANNEL_WRITE_LAC_OP, + help = "channel stats of write_lac requests" + ) private final OpStatsLogger writeLacOpLogger; + @StatsDoc( + name = BookKeeperClientStats.CHANNEL_FORCE_OP, + help = "channel stats of force requests" + ) private final OpStatsLogger forceLedgerOpLogger; + @StatsDoc( + name = BookKeeperClientStats.CHANNEL_READ_LAC_OP, + help = "channel stats of read_lac requests" + ) private final OpStatsLogger readLacOpLogger; + @StatsDoc( + name = BookKeeperClientStats.CHANNEL_TIMEOUT_ADD, + help = "timeout stats of add entries requests" + ) private final OpStatsLogger addTimeoutOpLogger; + @StatsDoc( + name = BookKeeperClientStats.CHANNEL_TIMEOUT_WRITE_LAC, + help = "timeout stats of write_lac requests" + ) private final OpStatsLogger writeLacTimeoutOpLogger; + @StatsDoc( + name = BookKeeperClientStats.CHANNEL_TIMEOUT_FORCE, + help = "timeout stats of force requests" + ) private final OpStatsLogger forceLedgerTimeoutOpLogger; + @StatsDoc( + name = BookKeeperClientStats.CHANNEL_TIMEOUT_READ_LAC, + help = "timeout stats of read_lac requests" + ) private final OpStatsLogger readLacTimeoutOpLogger; + @StatsDoc( + name = BookKeeperClientStats.GET_BOOKIE_INFO_OP, + help = "channel stats of get_bookie_info requests" + ) private final OpStatsLogger getBookieInfoOpLogger; + @StatsDoc( + name = BookKeeperClientStats.TIMEOUT_GET_BOOKIE_INFO, + help = "timeout stats of get_bookie_info requests" + ) private final OpStatsLogger getBookieInfoTimeoutOpLogger; + @StatsDoc( + name = BookKeeperClientStats.CHANNEL_START_TLS_OP, + help = "channel stats of start_tls requests" + ) private final OpStatsLogger startTLSOpLogger; + @StatsDoc( + name = BookKeeperClientStats.CHANNEL_TIMEOUT_START_TLS_OP, + help = "timeout stats of start_tls requests" + ) private final OpStatsLogger startTLSTimeoutOpLogger; + @StatsDoc( + name = BookKeeperClientStats.CLIENT_CONNECT_TIMER, + help = "channel stats of connect requests" + ) private final OpStatsLogger connectTimer; + private final OpStatsLogger getListOfEntriesOfLedgerCompletionOpLogger; + private final OpStatsLogger getListOfEntriesOfLedgerCompletionTimeoutOpLogger; + @StatsDoc( + name = BookKeeperClientStats.NETTY_EXCEPTION_CNT, + help = "the number of exceptions received from this channel" + ) private final Counter exceptionCounter; + @StatsDoc( + name = BookKeeperClientStats.ADD_OP_OUTSTANDING, + help = "the number of outstanding add_entry requests" + ) private final Counter addEntryOutstanding; + @StatsDoc( + name = BookKeeperClientStats.READ_OP_OUTSTANDING, + help = "the number of outstanding add_entry requests" + ) private final Counter readEntryOutstanding; /* collect stats on all Ops that flows through netty pipeline */ + @StatsDoc( + name = BookKeeperClientStats.NETTY_OPS, + help = "channel stats for all operations flowing through netty pipeline" + ) private final OpStatsLogger nettyOpLogger; + @StatsDoc( + name = BookKeeperClientStats.ACTIVE_NON_TLS_CHANNEL_COUNTER, + help = "the number of active non-tls channels" + ) private final Counter activeNonTlsChannelCounter; + @StatsDoc( + name = BookKeeperClientStats.ACTIVE_TLS_CHANNEL_COUNTER, + help = "the number of active tls channels" + ) private final Counter activeTlsChannelCounter; + @StatsDoc( + name = BookKeeperClientStats.FAILED_CONNECTION_COUNTER, + help = "the number of failed connections" + ) private final Counter failedConnectionCounter; + @StatsDoc( + name = BookKeeperClientStats.FAILED_TLS_HANDSHAKE_COUNTER, + help = "the number of failed tls handshakes" + ) private final Counter failedTlsHandshakeCounter; private final boolean useV2WireProtocol; + private final boolean preserveMdcForTaskExecution; /** * The following member variables do not need to be concurrent, or volatile @@ -209,6 +329,14 @@ public class PerChannelBookieClient extends ChannelInboundHandlerAdapter { private final ClientConnectionPeer connectionPeer; private volatile BookKeeperPrincipal authorizedId = BookKeeperPrincipal.ANONYMOUS; + @SneakyThrows + private FailedChannelFutureImpl processBookieNotResolvedError(long startTime, + BookieAddressResolver.BookieIdNotResolvedException err) { + FailedChannelFutureImpl failedFuture = new FailedChannelFutureImpl(err); + contextPreservingListener(new ConnectionFutureListener(startTime)).operationComplete(failedFuture); + return failedFuture; + } + enum ConnectionState { DISCONNECTED, CONNECTING, CONNECTED, CLOSED, START_TLS } @@ -222,65 +350,71 @@ enum ConnectionState { private final ExtensionRegistry extRegistry; private final SecurityHandlerFactory shFactory; private volatile boolean isWritable = true; + private long lastBookieUnavailableLogTimestamp = 0; public PerChannelBookieClient(OrderedExecutor executor, EventLoopGroup eventLoopGroup, - BookieSocketAddress addr) throws SecurityException { + BookieId addr, BookieAddressResolver bookieAddressResolver) throws SecurityException { this(new ClientConfiguration(), executor, eventLoopGroup, addr, NullStatsLogger.INSTANCE, null, null, - null); + null, bookieAddressResolver); } public PerChannelBookieClient(OrderedExecutor executor, EventLoopGroup eventLoopGroup, - BookieSocketAddress addr, + BookieId bookieId, ClientAuthProvider.Factory authProviderFactory, - ExtensionRegistry extRegistry) throws SecurityException { - this(new ClientConfiguration(), executor, eventLoopGroup, addr, NullStatsLogger.INSTANCE, - authProviderFactory, extRegistry, null); + ExtensionRegistry extRegistry, BookieAddressResolver bookieAddressResolver) + throws SecurityException { + this(new ClientConfiguration(), executor, eventLoopGroup, bookieId, + NullStatsLogger.INSTANCE, + authProviderFactory, extRegistry, null, bookieAddressResolver); } public PerChannelBookieClient(ClientConfiguration conf, OrderedExecutor executor, - EventLoopGroup eventLoopGroup, BookieSocketAddress addr, + EventLoopGroup eventLoopGroup, BookieId bookieId, StatsLogger parentStatsLogger, ClientAuthProvider.Factory authProviderFactory, ExtensionRegistry extRegistry, - PerChannelBookieClientPool pcbcPool) throws SecurityException { - this(conf, executor, eventLoopGroup, addr, NullStatsLogger.INSTANCE, - authProviderFactory, extRegistry, pcbcPool, null); + PerChannelBookieClientPool pcbcPool, BookieAddressResolver bookieAddressResolver) + throws SecurityException { + this(conf, executor, eventLoopGroup, UnpooledByteBufAllocator.DEFAULT, bookieId, NullStatsLogger.INSTANCE, + authProviderFactory, extRegistry, pcbcPool, null, bookieAddressResolver); } public PerChannelBookieClient(ClientConfiguration conf, OrderedExecutor executor, - EventLoopGroup eventLoopGroup, BookieSocketAddress addr, + EventLoopGroup eventLoopGroup, + ByteBufAllocator allocator, + BookieId bookieId, StatsLogger parentStatsLogger, ClientAuthProvider.Factory authProviderFactory, ExtensionRegistry extRegistry, PerChannelBookieClientPool pcbcPool, - SecurityHandlerFactory shFactory) throws SecurityException { + SecurityHandlerFactory shFactory, + BookieAddressResolver bookieAddressResolver) throws SecurityException { this.maxFrameSize = conf.getNettyMaxFrameSizeBytes(); this.conf = conf; - this.addr = addr; + this.bookieId = bookieId; + this.bookieAddressResolver = bookieAddressResolver; this.executor = executor; - if (LocalBookiesRegistry.isLocalBookie(addr)) { + if (LocalBookiesRegistry.isLocalBookie(bookieId)) { this.eventLoopGroup = new DefaultEventLoopGroup(); } else { this.eventLoopGroup = eventLoopGroup; } + this.allocator = allocator; this.state = ConnectionState.DISCONNECTED; this.addEntryTimeoutNanos = TimeUnit.SECONDS.toNanos(conf.getAddEntryTimeout()); this.readEntryTimeoutNanos = TimeUnit.SECONDS.toNanos(conf.getReadEntryTimeout()); - this.getBookieInfoTimeout = conf.getBookieInfoTimeout(); + this.getBookieInfoTimeoutNanos = TimeUnit.SECONDS.toNanos(conf.getBookieInfoTimeout()); this.startTLSTimeout = conf.getStartTLSTimeout(); this.useV2WireProtocol = conf.getUseV2WireProtocol(); + this.preserveMdcForTaskExecution = conf.getPreserveMdcForTaskExecution(); this.authProviderFactory = authProviderFactory; this.extRegistry = extRegistry; this.shFactory = shFactory; if (shFactory != null) { - shFactory.init(NodeType.Client, conf); + shFactory.init(NodeType.Client, conf, allocator); } - StringBuilder nameBuilder = new StringBuilder(); - nameBuilder.append(addr.getHostName().replace('.', '_').replace('-', '_')) - .append("_").append(addr.getPort()); - this.statsLogger = parentStatsLogger.scope(BookKeeperClientStats.CHANNEL_SCOPE) - .scope(nameBuilder.toString()); + .scopeLabel(BookKeeperClientStats.BOOKIE_LABEL, bookieId.toString()); readEntryOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.CHANNEL_READ_OP); addEntryOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.CHANNEL_ADD_OP); @@ -288,6 +422,8 @@ public PerChannelBookieClient(ClientConfiguration conf, OrderedExecutor executor forceLedgerOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.CHANNEL_FORCE_OP); readLacOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.CHANNEL_READ_LAC_OP); getBookieInfoOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.GET_BOOKIE_INFO_OP); + getListOfEntriesOfLedgerCompletionOpLogger = statsLogger + .getOpStatsLogger(BookKeeperClientStats.GET_LIST_OF_ENTRIES_OF_LEDGER_OP); readTimeoutOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.CHANNEL_TIMEOUT_READ); addTimeoutOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.CHANNEL_TIMEOUT_ADD); writeLacTimeoutOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.CHANNEL_TIMEOUT_WRITE_LAC); @@ -296,6 +432,8 @@ public PerChannelBookieClient(ClientConfiguration conf, OrderedExecutor executor getBookieInfoTimeoutOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.TIMEOUT_GET_BOOKIE_INFO); startTLSOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.CHANNEL_START_TLS_OP); startTLSTimeoutOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.CHANNEL_TIMEOUT_START_TLS_OP); + getListOfEntriesOfLedgerCompletionTimeoutOpLogger = statsLogger + .getOpStatsLogger(BookKeeperClientStats.TIMEOUT_GET_LIST_OF_ENTRIES_OF_LEDGER); exceptionCounter = statsLogger.getCounter(BookKeeperClientStats.NETTY_EXCEPTION_CNT); connectTimer = statsLogger.getOpStatsLogger(BookKeeperClientStats.CLIENT_CONNECT_TIMER); addEntryOutstanding = statsLogger.getCounter(BookKeeperClientStats.ADD_OP_OUTSTANDING); @@ -377,7 +515,6 @@ public boolean isSecure() { } private void completeOperation(GenericCallback op, int rc) { - //Thread.dumpStack(); closeLock.readLock().lock(); try { if (ConnectionState.CLOSED == state) { @@ -397,28 +534,42 @@ protected long getNumPendingCompletionRequests() { protected ChannelFuture connect() { final long startTime = MathUtils.nowInNano(); if (LOG.isDebugEnabled()) { - LOG.debug("Connecting to bookie: {}", addr); + LOG.debug("Connecting to bookie: {}", bookieId); + } + BookieSocketAddress addr; + try { + addr = bookieAddressResolver.resolve(bookieId); + } catch (BookieAddressResolver.BookieIdNotResolvedException err) { + LOG.error("Cannot connect to {} as endpoint resolution failed (probably bookie is down) err {}", + bookieId, err.toString()); + return processBookieNotResolvedError(startTime, err); } // Set up the ClientBootStrap so we can create a new Channel connection to the bookie. Bootstrap bootstrap = new Bootstrap(); bootstrap.group(eventLoopGroup); - if (eventLoopGroup instanceof EpollEventLoopGroup) { + if (eventLoopGroup instanceof IOUringEventLoopGroup) { + bootstrap.channel(IOUringSocketChannel.class); + try { + bootstrap.option(IOUringChannelOption.TCP_USER_TIMEOUT, conf.getTcpUserTimeoutMillis()); + } catch (NoSuchElementException e) { + // Property not set, so keeping default value. + } + } else if (eventLoopGroup instanceof EpollEventLoopGroup) { bootstrap.channel(EpollSocketChannel.class); + try { + // For Epoll channels, configure the TCP user timeout. + bootstrap.option(EpollChannelOption.TCP_USER_TIMEOUT, conf.getTcpUserTimeoutMillis()); + } catch (NoSuchElementException e) { + // Property not set, so keeping default value. + } } else if (eventLoopGroup instanceof DefaultEventLoopGroup) { bootstrap.channel(LocalChannel.class); } else { bootstrap.channel(NioSocketChannel.class); } - ByteBufAllocator allocator; - if (this.conf.isNettyUsePooledBuffers()) { - allocator = PooledByteBufAllocator.DEFAULT; - } else { - allocator = UnpooledByteBufAllocator.DEFAULT; - } - - bootstrap.option(ChannelOption.ALLOCATOR, allocator); + bootstrap.option(ChannelOption.ALLOCATOR, this.allocator); bootstrap.option(ChannelOption.CONNECT_TIMEOUT_MILLIS, conf.getClientConnectTimeoutMillis()); bootstrap.option(ChannelOption.WRITE_BUFFER_WATER_MARK, new WriteBufferWaterMark( conf.getClientWriteBufferLowWaterMark(), conf.getClientWriteBufferHighWaterMark())); @@ -438,35 +589,34 @@ protected ChannelFuture connect() { } // In the netty pipeline, we need to split packets based on length, so we - // use the {@link LengthFieldBasedFramDecoder}. Other than that all actions + // use the {@link LengthFieldBasedFrameDecoder}. Other than that all actions // are carried out in this class, e.g., making sense of received messages, // prepending the length to outgoing packets etc. bootstrap.handler(new ChannelInitializer() { @Override protected void initChannel(Channel ch) throws Exception { ChannelPipeline pipeline = ch.pipeline(); - - pipeline.addLast("bytebufList", ByteBufList.ENCODER_WITH_SIZE); + pipeline.addLast(CONSOLIDATION_HANDLER_NAME, new FlushConsolidationHandler(1024, true)); + pipeline.addLast("bytebufList", ByteBufList.ENCODER); pipeline.addLast("lengthbasedframedecoder", new LengthFieldBasedFrameDecoder(maxFrameSize, 0, 4, 0, 4)); - pipeline.addLast("lengthprepender", new LengthFieldPrepender(4)); pipeline.addLast("bookieProtoEncoder", new BookieProtoEncoding.RequestEncoder(extRegistry)); pipeline.addLast( "bookieProtoDecoder", - new BookieProtoEncoding.ResponseDecoder(extRegistry, useV2WireProtocol)); + new BookieProtoEncoding.ResponseDecoder(extRegistry, useV2WireProtocol, shFactory != null)); pipeline.addLast("authHandler", new AuthHandler.ClientSideHandler(authProviderFactory, txnIdGenerator, - connectionPeer)); + connectionPeer, useV2WireProtocol)); pipeline.addLast("mainhandler", PerChannelBookieClient.this); } }); SocketAddress bookieAddr = addr.getSocketAddress(); if (eventLoopGroup instanceof DefaultEventLoopGroup) { - bookieAddr = addr.getLocalAddress(); + bookieAddr = new LocalAddress(bookieId.toString()); } ChannelFuture future = bootstrap.connect(bookieAddr); - future.addListener(new ConnectionFutureListener(startTime)); + future.addListener(contextPreservingListener(new ConnectionFutureListener(startTime))); future.addListener(x -> makeWritable()); return future; } @@ -539,49 +689,46 @@ void connectIfNeededAndDoOp(GenericCallback op) { void writeLac(final long ledgerId, final byte[] masterKey, final long lac, ByteBufList toSend, WriteLacCallback cb, Object ctx) { final long txnId = getTxnId(); - final CompletionKey completionKey = new V3CompletionKey(txnId, + final CompletionKey completionKey = new TxnCompletionKey(txnId, OperationType.WRITE_LAC); // writeLac is mostly like addEntry hence uses addEntryTimeout completionObjects.put(completionKey, new WriteLacCompletion(completionKey, cb, - ctx, lac)); + ctx, ledgerId)); // Build the request BKPacketHeader.Builder headerBuilder = BKPacketHeader.newBuilder() .setVersion(ProtocolVersion.VERSION_THREE) .setOperation(OperationType.WRITE_LAC) .setTxnId(txnId); - ByteString body; - if (toSend.hasArray()) { - body = UnsafeByteOperations.unsafeWrap(toSend.array(), toSend.arrayOffset(), toSend.readableBytes()); - } else if (toSend.size() == 1) { - body = UnsafeByteOperations.unsafeWrap(toSend.getBuffer(0).nioBuffer()); - } else { - body = UnsafeByteOperations.unsafeWrap(toSend.toArray()); - } + ByteString body = ByteStringUtil.byteBufListToByteString(toSend); + toSend.retain(); + Runnable cleanupActionFailedBeforeWrite = toSend::release; + Runnable cleanupActionAfterWrite = cleanupActionFailedBeforeWrite; WriteLacRequest.Builder writeLacBuilder = WriteLacRequest.newBuilder() .setLedgerId(ledgerId) .setLac(lac) .setMasterKey(UnsafeByteOperations.unsafeWrap(masterKey)) .setBody(body); - final Request writeLacRequest = Request.newBuilder() + final Request writeLacRequest = withRequestContext(Request.newBuilder()) .setHeader(headerBuilder) .setWriteLacRequest(writeLacBuilder) .build(); - writeAndFlush(channel, completionKey, writeLacRequest); + writeAndFlush(channel, completionKey, writeLacRequest, false, cleanupActionFailedBeforeWrite, + cleanupActionAfterWrite); } void forceLedger(final long ledgerId, ForceLedgerCallback cb, Object ctx) { if (useV2WireProtocol) { LOG.error("force is not allowed with v2 protocol"); executor.executeOrdered(ledgerId, () -> { - cb.forceLedgerComplete(BKException.Code.IllegalOpException, ledgerId, addr, ctx); + cb.forceLedgerComplete(BKException.Code.IllegalOpException, ledgerId, bookieId, ctx); }); return; } final long txnId = getTxnId(); - final CompletionKey completionKey = new V3CompletionKey(txnId, + final CompletionKey completionKey = new TxnCompletionKey(txnId, OperationType.FORCE_LEDGER); // force is mostly like addEntry hence uses addEntryTimeout completionObjects.put(completionKey, @@ -596,7 +743,7 @@ void forceLedger(final long ledgerId, ForceLedgerCallback cb, Object ctx) { ForceLedgerRequest.Builder writeLacBuilder = ForceLedgerRequest.newBuilder() .setLedgerId(ledgerId); - final Request forceLedgerRequest = Request.newBuilder() + final Request forceLedgerRequest = withRequestContext(Request.newBuilder()) .setHeader(headerBuilder) .setForceLedgerRequest(writeLacBuilder) .build(); @@ -624,25 +771,33 @@ void forceLedger(final long ledgerId, ForceLedgerCallback cb, Object ctx) { * @param writeFlags * WriteFlags */ - void addEntry(final long ledgerId, byte[] masterKey, final long entryId, ByteBufList toSend, WriteCallback cb, + void addEntry(final long ledgerId, byte[] masterKey, final long entryId, ReferenceCounted toSend, WriteCallback cb, Object ctx, final int options, boolean allowFastFail, final EnumSet writeFlags) { Object request = null; CompletionKey completionKey = null; + Runnable cleanupActionFailedBeforeWrite = null; + Runnable cleanupActionAfterWrite = null; if (useV2WireProtocol) { if (writeFlags.contains(WriteFlag.DEFERRED_SYNC)) { LOG.error("invalid writeflags {} for v2 protocol", writeFlags); - executor.executeOrdered(ledgerId, () -> { - cb.writeComplete(BKException.Code.IllegalOpException, ledgerId, entryId, addr, ctx); - }); + cb.writeComplete(BKException.Code.IllegalOpException, ledgerId, entryId, bookieId, ctx); return; } completionKey = acquireV2Key(ledgerId, entryId, OperationType.ADD_ENTRY); - request = BookieProtocol.AddRequest.create( - BookieProtocol.CURRENT_PROTOCOL_VERSION, ledgerId, entryId, - (short) options, masterKey, toSend); + + if (toSend instanceof ByteBuf) { + ByteBuf byteBuf = ((ByteBuf) toSend).retainedDuplicate(); + request = byteBuf; + cleanupActionFailedBeforeWrite = byteBuf::release; + } else { + ByteBufList byteBufList = (ByteBufList) toSend; + byteBufList.retain(); + request = byteBufList; + cleanupActionFailedBeforeWrite = byteBufList::release; + } } else { final long txnId = getTxnId(); - completionKey = new V3CompletionKey(txnId, OperationType.ADD_ENTRY); + completionKey = new TxnCompletionKey(txnId, OperationType.ADD_ENTRY); // Build the request and calculate the total size to be included in the packet. BKPacketHeader.Builder headerBuilder = BKPacketHeader.newBuilder() @@ -653,14 +808,11 @@ void addEntry(final long ledgerId, byte[] masterKey, final long entryId, ByteBuf headerBuilder.setPriority(DEFAULT_HIGH_PRIORITY_VALUE); } - ByteString body; - if (toSend.hasArray()) { - body = UnsafeByteOperations.unsafeWrap(toSend.array(), toSend.arrayOffset(), toSend.readableBytes()); - } else if (toSend.size() == 1) { - body = UnsafeByteOperations.unsafeWrap(toSend.getBuffer(0).nioBuffer()); - } else { - body = UnsafeByteOperations.unsafeWrap(toSend.toArray()); - } + ByteBufList bufToSend = (ByteBufList) toSend; + ByteString body = ByteStringUtil.byteBufListToByteString(bufToSend); + bufToSend.retain(); + cleanupActionFailedBeforeWrite = bufToSend::release; + cleanupActionAfterWrite = cleanupActionFailedBeforeWrite; AddRequest.Builder addBuilder = AddRequest.newBuilder() .setLedgerId(ledgerId) .setEntryId(entryId) @@ -676,7 +828,7 @@ void addEntry(final long ledgerId, byte[] masterKey, final long entryId, ByteBuf addBuilder.setWriteFlags(WriteFlag.getWriteFlagsValue(writeFlags)); } - request = Request.newBuilder() + request = withRequestContext(Request.newBuilder()) .setHeader(headerBuilder) .setAddRequest(addBuilder) .build(); @@ -685,29 +837,21 @@ void addEntry(final long ledgerId, byte[] masterKey, final long entryId, ByteBuf putCompletionKeyValue(completionKey, acquireAddCompletion(completionKey, cb, ctx, ledgerId, entryId)); - final Channel c = channel; - if (c == null) { - // usually checked in writeAndFlush, but we have extra check - // because we need to release toSend. - errorOut(completionKey); - toSend.release(); - return; - } else { - // addEntry times out on backpressure - writeAndFlush(c, completionKey, request, allowFastFail); - } + // addEntry times out on backpressure + writeAndFlush(channel, completionKey, request, allowFastFail, cleanupActionFailedBeforeWrite, + cleanupActionAfterWrite); } public void readLac(final long ledgerId, ReadLacCallback cb, Object ctx) { Object request = null; CompletionKey completionKey = null; if (useV2WireProtocol) { - request = new BookieProtocol.ReadRequest(BookieProtocol.CURRENT_PROTOCOL_VERSION, + request = BookieProtocol.ReadRequest.create(BookieProtocol.CURRENT_PROTOCOL_VERSION, ledgerId, 0, (short) 0, null); completionKey = acquireV2Key(ledgerId, 0, OperationType.READ_LAC); } else { final long txnId = getTxnId(); - completionKey = new V3CompletionKey(txnId, OperationType.READ_LAC); + completionKey = new TxnCompletionKey(txnId, OperationType.READ_LAC); // Build the request and calculate the total size to be included in the packet. BKPacketHeader.Builder headerBuilder = BKPacketHeader.newBuilder() @@ -716,7 +860,7 @@ public void readLac(final long ledgerId, ReadLacCallback cb, Object ctx) { .setTxnId(txnId); ReadLacRequest.Builder readLacBuilder = ReadLacRequest.newBuilder() .setLedgerId(ledgerId); - request = Request.newBuilder() + request = withRequestContext(Request.newBuilder()) .setHeader(headerBuilder) .setReadLacRequest(readLacBuilder) .build(); @@ -727,6 +871,24 @@ public void readLac(final long ledgerId, ReadLacCallback cb, Object ctx) { writeAndFlush(channel, completionKey, request); } + public void getListOfEntriesOfLedger(final long ledgerId, GetListOfEntriesOfLedgerCallback cb) { + final long txnId = getTxnId(); + final CompletionKey completionKey = new TxnCompletionKey(txnId, OperationType.GET_LIST_OF_ENTRIES_OF_LEDGER); + completionObjects.put(completionKey, new GetListOfEntriesOfLedgerCompletion(completionKey, cb, ledgerId)); + + // Build the request. + BKPacketHeader.Builder headerBuilder = BKPacketHeader.newBuilder().setVersion(ProtocolVersion.VERSION_THREE) + .setOperation(OperationType.GET_LIST_OF_ENTRIES_OF_LEDGER).setTxnId(txnId); + + GetListOfEntriesOfLedgerRequest.Builder getListOfEntriesOfLedgerRequestBuilder = + GetListOfEntriesOfLedgerRequest.newBuilder().setLedgerId(ledgerId); + + final Request getListOfEntriesOfLedgerRequest = Request.newBuilder().setHeader(headerBuilder) + .setGetListOfEntriesOfLedgerRequest(getListOfEntriesOfLedgerRequestBuilder).build(); + + writeAndFlush(channel, completionKey, getListOfEntriesOfLedgerRequest); + } + /** * Long Poll Reads. */ @@ -768,12 +930,12 @@ private void readEntryInternal(final long ledgerId, Object request = null; CompletionKey completionKey = null; if (useV2WireProtocol) { - request = new BookieProtocol.ReadRequest(BookieProtocol.CURRENT_PROTOCOL_VERSION, + request = BookieProtocol.ReadRequest.create(BookieProtocol.CURRENT_PROTOCOL_VERSION, ledgerId, entryId, (short) flags, masterKey); completionKey = acquireV2Key(ledgerId, entryId, OperationType.READ_ENTRY); } else { final long txnId = getTxnId(); - completionKey = new V3CompletionKey(txnId, OperationType.READ_ENTRY); + completionKey = new TxnCompletionKey(txnId, OperationType.READ_ENTRY); // Build the request and calculate the total size to be included in the packet. BKPacketHeader.Builder headerBuilder = BKPacketHeader.newBuilder() @@ -823,7 +985,7 @@ private void readEntryInternal(final long ledgerId, readBuilder.setMasterKey(ByteString.copyFrom(masterKey)); } - request = Request.newBuilder() + request = withRequestContext(Request.newBuilder()) .setHeader(headerBuilder) .setReadRequest(readBuilder) .build(); @@ -832,12 +994,55 @@ private void readEntryInternal(final long ledgerId, ReadCompletion readCompletion = new ReadCompletion(completionKey, cb, ctx, ledgerId, entryId); putCompletionKeyValue(completionKey, readCompletion); - writeAndFlush(channel, completionKey, request, allowFastFail); + writeAndFlush(channel, completionKey, request, allowFastFail, null, null); + } + + public void batchReadEntries(final long ledgerId, + final long startEntryId, + final int maxCount, + final long maxSize, + BatchedReadEntryCallback cb, + Object ctx, + int flags, + byte[] masterKey, + boolean allowFastFail) { + + batchReadEntriesInternal(ledgerId, startEntryId, maxCount, maxSize, null, null, false, + cb, ctx, (short) flags, masterKey, allowFastFail); + } + + private void batchReadEntriesInternal(final long ledgerId, + final long startEntryId, + final int maxCount, + final long maxSize, + final Long previousLAC, + final Long timeOutInMillis, + final boolean piggyBackEntry, + final BatchedReadEntryCallback cb, + final Object ctx, + int flags, + byte[] masterKey, + boolean allowFastFail) { + Object request; + CompletionKey completionKey; + final long txnId = getTxnId(); + if (useV2WireProtocol) { + request = BookieProtocol.BatchedReadRequest.create(BookieProtocol.CURRENT_PROTOCOL_VERSION, + ledgerId, startEntryId, (short) flags, masterKey, txnId, maxCount, maxSize); + completionKey = new TxnCompletionKey(txnId, OperationType.BATCH_READ_ENTRY); + } else { + throw new UnsupportedOperationException("Unsupported batch read entry operation for v3 protocol."); + } + BatchedReadCompletion readCompletion = new BatchedReadCompletion( + completionKey, cb, ctx, ledgerId, startEntryId); + putCompletionKeyValue(completionKey, readCompletion); + + writeAndFlush(channel, completionKey, request, allowFastFail, null, null); } public void getBookieInfo(final long requested, GetBookieInfoCallback cb, Object ctx) { final long txnId = getTxnId(); - final CompletionKey completionKey = new V3CompletionKey(txnId, OperationType.GET_BOOKIE_INFO); + final CompletionKey completionKey = new TxnCompletionKey(txnId, OperationType.GET_BOOKIE_INFO); completionObjects.put(completionKey, new GetBookieInfoCompletion( completionKey, cb, ctx)); @@ -851,7 +1056,7 @@ public void getBookieInfo(final long requested, GetBookieInfoCallback cb, Object GetBookieInfoRequest.Builder getBookieInfoBuilder = GetBookieInfoRequest.newBuilder() .setRequested(requested); - final Request getBookieInfoRequest = Request.newBuilder() + final Request getBookieInfoRequest = withRequestContext(Request.newBuilder()) .setHeader(headerBuilder) .setGetBookieInfoRequest(getBookieInfoBuilder) .build(); @@ -870,7 +1075,7 @@ public void checkTimeoutOnPendingOperations() { if (timedOutOperations > 0) { LOG.info("Timed-out {} operations to channel {} for {}", - timedOutOperations, channel, addr); + timedOutOperations, channel, bookieId); } } @@ -882,7 +1087,7 @@ public void disconnect() { } public void disconnect(boolean wait) { - LOG.info("Disconnecting the per channel bookie client for {}", addr); + LOG.info("Disconnecting the per channel bookie client for {}", bookieId); closeInternal(false, wait); } @@ -894,7 +1099,7 @@ public void close() { } public void close(boolean wait) { - LOG.info("Closing the per channel bookie client for {}", addr); + LOG.info("Closing the per channel bookie client for {}", bookieId); closeLock.writeLock().lock(); try { if (ConnectionState.CLOSED == state) { @@ -954,16 +1159,20 @@ public void channelWritabilityChanged(ChannelHandlerContext ctx) throws Exceptio private void writeAndFlush(final Channel channel, final CompletionKey key, final Object request) { - writeAndFlush(channel, key, request, false); + writeAndFlush(channel, key, request, false, null, null); } private void writeAndFlush(final Channel channel, final CompletionKey key, final Object request, - final boolean allowFastFail) { + final boolean allowFastFail, final Runnable cleanupActionFailedBeforeWrite, + final Runnable cleanupActionAfterWrite) { if (channel == null) { LOG.warn("Operation {} failed: channel == null", StringUtils.requestToString(request)); errorOut(key); + if (cleanupActionFailedBeforeWrite != null) { + cleanupActionFailedBeforeWrite.run(); + } return; } @@ -978,6 +1187,9 @@ private void writeAndFlush(final Channel channel, StringUtils.requestToString(request)); errorOut(key, BKException.Code.TooManyRequestsException); + if (cleanupActionFailedBeforeWrite != null) { + cleanupActionFailedBeforeWrite.run(); + } return; } @@ -985,21 +1197,30 @@ private void writeAndFlush(final Channel channel, final long startTime = MathUtils.nowInNano(); ChannelPromise promise = channel.newPromise().addListener(future -> { - if (future.isSuccess()) { - nettyOpLogger.registerSuccessfulEvent(MathUtils.elapsedNanos(startTime), TimeUnit.NANOSECONDS); - CompletionValue completion = completionObjects.get(key); - if (completion != null) { - completion.setOutstanding(); + try { + if (future.isSuccess()) { + nettyOpLogger.registerSuccessfulEvent(MathUtils.elapsedNanos(startTime), TimeUnit.NANOSECONDS); + CompletionValue completion = completionObjects.get(key); + if (completion != null) { + completion.setOutstanding(); + } + } else { + nettyOpLogger.registerFailedEvent(MathUtils.elapsedNanos(startTime), TimeUnit.NANOSECONDS); + errorOut(key); + } + } finally { + if (cleanupActionAfterWrite != null) { + cleanupActionAfterWrite.run(); } - } else { - nettyOpLogger.registerFailedEvent(MathUtils.elapsedNanos(startTime), TimeUnit.NANOSECONDS); } }); - channel.writeAndFlush(request, promise); } catch (Throwable e) { LOG.warn("Operation {} failed", StringUtils.requestToString(request), e); errorOut(key); + if (cleanupActionFailedBeforeWrite != null) { + cleanupActionFailedBeforeWrite.run(); + } } } @@ -1092,6 +1313,7 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception { if (this.channel == ctx.channel() && state != ConnectionState.CLOSED) { state = ConnectionState.DISCONNECTED; + channel = null; } } @@ -1107,7 +1329,7 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception { public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception { exceptionCounter.inc(); if (cause instanceof CorruptedFrameException || cause instanceof TooLongFrameException) { - LOG.error("Corrupted frame received from bookie: {}", ctx.channel().remoteAddress()); + LOG.error("Corrupted frame received from bookie: {}", ctx.channel()); ctx.close(); return; } @@ -1122,17 +1344,27 @@ public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws E return; } - if (cause instanceof DecoderException && cause.getCause() instanceof SSLHandshakeException) { + // TLSv1.3 doesn't throw SSLHandshakeException for certificate issues + // see https://stackoverflow.com/a/62465859 for details about the reason + // therefore catch SSLException to also cover TLSv1.3 + if (cause instanceof DecoderException && cause.getCause() instanceof SSLException) { LOG.error("TLS handshake failed", cause); errorOutPendingOps(BKException.Code.SecurityException); Channel c = ctx.channel(); if (c != null) { closeChannel(c); } + return; } if (cause instanceof IOException) { - LOG.warn("Exception caught on:{} cause:", ctx.channel(), cause); + if (cause instanceof NativeIoException) { + // Stack trace is not very interesting for native IO exceptio, the important part is in + // the exception message + LOG.warn("Exception caught on:{} cause: {}", ctx.channel(), cause.getMessage()); + } else { + LOG.warn("Exception caught on:{} cause:", ctx.channel(), cause); + } ctx.close(); return; } @@ -1173,14 +1405,19 @@ private void readV2Response(final BookieProtocol.Response response) { OperationType operationType = getOperationType(response.getOpCode()); StatusCode status = getStatusCodeFromErrorCode(response.errorCode); - CompletionKey key = acquireV2Key(response.ledgerId, response.entryId, operationType); + CompletionKey key; + if (OperationType.BATCH_READ_ENTRY == operationType) { + key = new TxnCompletionKey(((BookieProtocol.BatchedReadResponse) response).getRequestId(), operationType); + } else { + key = acquireV2Key(response.ledgerId, response.entryId, operationType); + } CompletionValue completionValue = getCompletionValue(key); key.release(); if (null == completionValue) { // Unexpected response, so log it. The txnId should have been present. if (LOG.isDebugEnabled()) { - LOG.debug("Unexpected response received from bookie : " + addr + " for type : " + operationType + LOG.debug("Unexpected response received from bookie : " + bookieId + " for type : " + operationType + " and ledger:entry : " + response.ledgerId + ":" + response.entryId); } response.release(); @@ -1192,7 +1429,7 @@ private void readV2Response(final BookieProtocol.Response response) { } } - private static class ReadV2ResponseCallback extends SafeRunnable { + private static class ReadV2ResponseCallback implements Runnable { CompletionValue completionValue; long ledgerId; long entryId; @@ -1211,7 +1448,7 @@ static ReadV2ResponseCallback create(CompletionValue completionValue, long ledge } @Override - public void safeRun() { + public void run() { completionValue.handleV2Response(ledgerId, entryId, status, response); response.release(); response.recycle(); @@ -1255,6 +1492,8 @@ private static OperationType getOperationType(byte opCode) { return OperationType.WRITE_LAC; case BookieProtocol.GET_BOOKIE_INFO: return OperationType.GET_BOOKIE_INFO; + case BookieProtocol.BATCH_READ_ENTRY: + return OperationType.BATCH_READ_ENTRY; default: throw new IllegalArgumentException("Invalid operation type " + opCode); } @@ -1290,20 +1529,21 @@ private static StatusCode getStatusCodeFromErrorCode(int errorCode) { private void readV3Response(final Response response) { final BKPacketHeader header = response.getHeader(); - final CompletionValue completionValue = completionObjects.get(newCompletionKey(header.getTxnId(), - header.getOperation())); + final CompletionKey key = newCompletionKey(header.getTxnId(), header.getOperation()); + final CompletionValue completionValue = completionObjects.get(key); if (null == completionValue) { // Unexpected response, so log it. The txnId should have been present. if (LOG.isDebugEnabled()) { - LOG.debug("Unexpected response received from bookie : " + addr + " for type : " + LOG.debug("Unexpected response received from bookie : " + bookieId + " for type : " + header.getOperation() + " and txnId : " + header.getTxnId()); } } else { long orderingKey = completionValue.ledgerId; - executor.executeOrdered(orderingKey, new SafeRunnable() { + executor.executeOrdered(orderingKey, new Runnable() { @Override - public void safeRun() { + public void run() { + completionValue.restoreMdcContext(); completionValue.handleV3Response(response); } @@ -1316,15 +1556,35 @@ public String toString() { }); } - completionObjects.remove(newCompletionKey(header.getTxnId(), header.getOperation())); + completionObjects.remove(key); } void initTLSHandshake() { // create TLS handler PerChannelBookieClient parentObj = PerChannelBookieClient.this; - SslHandler handler = parentObj.shFactory.newTLSHandler(); - channel.pipeline().addFirst(parentObj.shFactory.getHandlerName(), handler); - handler.handshakeFuture().addListener(new GenericFutureListener>() { + SocketAddress socketAddress = channel.remoteAddress(); + InetSocketAddress address; + if (socketAddress instanceof LocalAddress) { + // if it is a local address, it looks like this: local:hostname:port + String[] addr = socketAddress.toString().split(":"); + String hostname = addr[1]; + int port = Integer.parseInt(addr[2]); + address = new InetSocketAddress(hostname, port); + } else if (socketAddress instanceof InetSocketAddress) { + address = (InetSocketAddress) socketAddress; + } else { + throw new RuntimeException("Unexpected socket address type"); + } + LOG.info("Starting TLS handshake with {}:{}", address.getHostString(), address.getPort()); + SslHandler sslHandler = parentObj.shFactory.newTLSHandler(address.getHostName(), address.getPort()); + String sslHandlerName = parentObj.shFactory.getHandlerName(); + if (channel.pipeline().names().contains(CONSOLIDATION_HANDLER_NAME)) { + channel.pipeline().addAfter(CONSOLIDATION_HANDLER_NAME, sslHandlerName, sslHandler); + } else { + // local transport doesn't contain FlushConsolidationHandler + channel.pipeline().addFirst(sslHandlerName, sslHandler); + } + sslHandler.handshakeFuture().addListener(new GenericFutureListener>() { @Override public void operationComplete(Future future) throws Exception { int rc; @@ -1332,7 +1592,7 @@ public void operationComplete(Future future) throws Exception { synchronized (PerChannelBookieClient.this) { if (future.isSuccess() && state == ConnectionState.CONNECTING) { - LOG.error("Connection state changed before TLS handshake completed {}/{}", addr, state); + LOG.error("Connection state changed before TLS handshake completed {}/{}", bookieId, state); rc = BKException.Code.BookieHandleNotAvailableException; closeChannel(channel); channel = null; @@ -1341,7 +1601,7 @@ public void operationComplete(Future future) throws Exception { } } else if (future.isSuccess() && state == ConnectionState.START_TLS) { rc = BKException.Code.OK; - LOG.info("Successfully connected to bookie using TLS: " + addr); + LOG.info("Successfully connected to bookie using TLS: " + bookieId); state = ConnectionState.CONNECTED; AuthHandler.ClientSideHandler authHandler = future.get().pipeline() @@ -1356,13 +1616,16 @@ public void operationComplete(Future future) throws Exception { rc = BKException.Code.BookieHandleNotAvailableException; channel = null; } else if (future.isSuccess() && state == ConnectionState.CONNECTED) { - LOG.debug("Already connected with another channel({}), so close the new channel({})", - channel, channel); + if (LOG.isDebugEnabled()) { + LOG.debug("Already connected with another channel({}), " + + "so close the new channel({})", + channel, channel); + } closeChannel(channel); return; // pendingOps should have been completed when other channel connected } else { LOG.error("TLS handshake failed with bookie: {}/{}, current state {} : ", - channel, addr, state, future.cause()); + channel, bookieId, state, future.cause()); rc = BKException.Code.SecurityException; closeChannel(channel); channel = null; @@ -1399,6 +1662,7 @@ abstract class CompletionValue { private final OpStatsLogger opLogger; private final OpStatsLogger timeoutOpLogger; private final String operationName; + private final Map mdcContextMap; protected Object ctx; protected long ledgerId; protected long entryId; @@ -1416,6 +1680,7 @@ public CompletionValue(String operationName, this.startTime = MathUtils.nowInNano(); this.opLogger = opLogger; this.timeoutOpLogger = timeoutOpLogger; + this.mdcContextMap = preserveMdcForTaskExecution ? MDC.getCopyOfContextMap() : null; } private long latency() { @@ -1452,7 +1717,7 @@ void timeout() { protected void logResponse(StatusCode status, Object... extraInfo) { if (LOG.isDebugEnabled()) { - LOG.debug("Got {} response from bookie:{} rc:{}, {}", operationName, addr, status, + LOG.debug("Got {} response from bookie:{} rc:{}, {}", operationName, bookieId, status, Joiner.on(":").join(extraInfo)); } } @@ -1462,13 +1727,16 @@ protected int convertStatus(StatusCode status, int defaultStatus) { int rcToRet = statusCodeToExceptionCode(status); if (rcToRet == BKException.Code.UNINITIALIZED) { LOG.error("{} for failed on bookie {} code {}", - operationName, addr, status); + operationName, bookieId, status); return defaultStatus; } else { return rcToRet; } } + public void restoreMdcContext() { + MdcUtils.restoreContext(mdcContextMap); + } public abstract void errorOut(); public abstract void errorOut(int rc); @@ -1477,23 +1745,19 @@ public void setOutstanding() { } protected void errorOutAndRunCallback(final Runnable callback) { - executor.executeOrdered(ledgerId, - new SafeRunnable() { - @Override - public void safeRun() { - String bAddress = "null"; - Channel c = channel; - if (c != null && c.remoteAddress() != null) { - bAddress = c.remoteAddress().toString(); - } - if (LOG.isDebugEnabled()) { - LOG.debug("Could not write {} request to bookie {} for ledger {}, entry {}", - operationName, bAddress, - ledgerId, entryId); - } - callback.run(); - } - }); + executor.executeOrdered(ledgerId, () -> { + String bAddress = "null"; + Channel c = channel; + if (c != null && c.remoteAddress() != null) { + bAddress = c.remoteAddress().toString(); + } + if (LOG.isDebugEnabled()) { + LOG.debug("Could not write {} request to bookie {} for ledger {}, entry {}", + operationName, bAddress, + ledgerId, entryId); + } + callback.run(); + }); } public void handleV2Response( @@ -1520,7 +1784,7 @@ public WriteLacCompletion(final CompletionKey key, this.cb = new WriteLacCallback() { @Override public void writeLacComplete(int rc, long ledgerId, - BookieSocketAddress addr, + BookieId addr, Object ctx) { logOpResult(rc); originalCallback.writeLacComplete(rc, ledgerId, @@ -1538,7 +1802,7 @@ public void errorOut() { @Override public void errorOut(final int rc) { errorOutAndRunCallback( - () -> cb.writeLacComplete(rc, ledgerId, addr, ctx)); + () -> cb.writeLacComplete(rc, ledgerId, bookieId, ctx)); } @Override @@ -1552,7 +1816,7 @@ public void handleV3Response(BookkeeperProtocol.Response response) { logResponse(status, "ledger", ledgerId); } int rc = convertStatus(status, BKException.Code.WriteException); - cb.writeLacComplete(rc, ledgerId, addr, ctx); + cb.writeLacComplete(rc, ledgerId, bookieId, ctx); } } @@ -1569,7 +1833,7 @@ public ForceLedgerCompletion(final CompletionKey key, this.cb = new ForceLedgerCallback() { @Override public void forceLedgerComplete(int rc, long ledgerId, - BookieSocketAddress addr, + BookieId addr, Object ctx) { logOpResult(rc); originalCallback.forceLedgerComplete(rc, ledgerId, @@ -1587,7 +1851,7 @@ public void errorOut() { @Override public void errorOut(final int rc) { errorOutAndRunCallback( - () -> cb.forceLedgerComplete(rc, ledgerId, addr, ctx)); + () -> cb.forceLedgerComplete(rc, ledgerId, bookieId, ctx)); } @Override @@ -1601,7 +1865,7 @@ public void handleV3Response(BookkeeperProtocol.Response response) { logResponse(status, "ledger", ledgerId); } int rc = convertStatus(status, BKException.Code.WriteException); - cb.forceLedgerComplete(rc, ledgerId, addr, ctx); + cb.forceLedgerComplete(rc, ledgerId, bookieId, ctx); } } @@ -1741,7 +2005,8 @@ public void handleV3Response(BookkeeperProtocol.Response response) { handleReadResponse(readResponse.getLedgerId(), readResponse.getEntryId(), status, buffer, maxLAC, lacUpdateTimestamp); - buffer.release(); // meaningless using unpooled, but client may expect to hold the last reference + ReferenceCountUtil.release( + buffer); // meaningless using unpooled, but client may expect to hold the last reference } private void handleReadResponse(long ledgerId, @@ -1767,6 +2032,83 @@ private void handleReadResponse(long ledgerId, } } + class BatchedReadCompletion extends CompletionValue { + + final BatchedReadEntryCallback cb; + + public BatchedReadCompletion(final CompletionKey key, + final BatchedReadEntryCallback originalCallback, + final Object originalCtx, + long ledgerId, final long entryId) { + super("BatchedRead", originalCtx, ledgerId, entryId, + readEntryOpLogger, readTimeoutOpLogger); + this.cb = new BatchedReadEntryCallback() { + + @Override + public void readEntriesComplete(int rc, + long ledgerId, + long startEntryId, + ByteBufList bufList, + Object ctx) { + logOpResult(rc); + originalCallback.readEntriesComplete(rc, + ledgerId, entryId, + bufList, originalCtx); + key.release(); + } + }; + } + + @Override + public void errorOut() { + errorOut(BKException.Code.BookieHandleNotAvailableException); + } + + @Override + public void errorOut(final int rc) { + errorOutAndRunCallback( + () -> cb.readEntriesComplete(rc, ledgerId, + entryId, null, ctx)); + } + + @Override + public void handleV2Response(long ledgerId, + long entryId, + StatusCode status, + BookieProtocol.Response response) { + + readEntryOutstanding.dec(); + if (!(response instanceof BookieProtocol.BatchedReadResponse)) { + return; + } + BookieProtocol.BatchedReadResponse readResponse = (BookieProtocol.BatchedReadResponse) response; + handleBatchedReadResponse(ledgerId, entryId, status, readResponse.getData(), + INVALID_ENTRY_ID, -1L); + } + + @Override + public void handleV3Response(Response response) { + // V3 protocol haven't supported batched read yet. + } + + private void handleBatchedReadResponse(long ledgerId, + long entryId, + StatusCode status, + ByteBufList buffers, + long maxLAC, // max known lac piggy-back from bookies + long lacUpdateTimestamp) { // the timestamp when the lac is updated. + int rc = convertStatus(status, BKException.Code.ReadException); + + if (maxLAC > INVALID_ENTRY_ID && (ctx instanceof ReadEntryCallbackCtx)) { + ((ReadEntryCallbackCtx) ctx).setLastAddConfirmed(maxLAC); + } + if (lacUpdateTimestamp > -1L && (ctx instanceof ReadLastConfirmedAndEntryContext)) { + ((ReadLastConfirmedAndEntryContext) ctx).setLacUpdateTimestamp(lacUpdateTimestamp); + } + cb.readEntriesComplete(rc, ledgerId, entryId, buffers, ctx); + } + } + class StartTLSCompletion extends CompletionValue { final StartTLSCallback cb; @@ -1838,6 +2180,16 @@ public void getBookieInfoComplete(int rc, BookieInfo bInfo, }; } + @Override + boolean maybeTimeout() { + if (MathUtils.elapsedNanos(startTime) >= getBookieInfoTimeoutNanos) { + timeout(); + return true; + } else { + return false; + } + } + @Override public void errorOut() { errorOut(BKException.Code.BookieHandleNotAvailableException); @@ -1869,7 +2221,63 @@ public void handleV3Response(BookkeeperProtocol.Response response) { } } + class GetListOfEntriesOfLedgerCompletion extends CompletionValue { + final GetListOfEntriesOfLedgerCallback cb; + + public GetListOfEntriesOfLedgerCompletion(final CompletionKey key, + final GetListOfEntriesOfLedgerCallback origCallback, final long ledgerId) { + super("GetListOfEntriesOfLedger", null, ledgerId, 0L, getListOfEntriesOfLedgerCompletionOpLogger, + getListOfEntriesOfLedgerCompletionTimeoutOpLogger); + this.cb = new GetListOfEntriesOfLedgerCallback() { + @Override + public void getListOfEntriesOfLedgerComplete(int rc, long ledgerId, + AvailabilityOfEntriesOfLedger availabilityOfEntriesOfLedger) { + logOpResult(rc); + origCallback.getListOfEntriesOfLedgerComplete(rc, ledgerId, availabilityOfEntriesOfLedger); + key.release(); + } + }; + } + + @Override + public void errorOut() { + errorOut(BKException.Code.BookieHandleNotAvailableException); + } + + @Override + public void errorOut(final int rc) { + errorOutAndRunCallback(() -> cb.getListOfEntriesOfLedgerComplete(rc, ledgerId, null)); + } + + @Override + public void handleV3Response(BookkeeperProtocol.Response response) { + GetListOfEntriesOfLedgerResponse getListOfEntriesOfLedgerResponse = response + .getGetListOfEntriesOfLedgerResponse(); + ByteBuf availabilityOfEntriesOfLedgerBuffer = Unpooled.EMPTY_BUFFER; + StatusCode status = response.getStatus() == StatusCode.EOK ? getListOfEntriesOfLedgerResponse.getStatus() + : response.getStatus(); + + if (getListOfEntriesOfLedgerResponse.hasAvailabilityOfEntriesOfLedger()) { + availabilityOfEntriesOfLedgerBuffer = Unpooled.wrappedBuffer( + getListOfEntriesOfLedgerResponse.getAvailabilityOfEntriesOfLedger().asReadOnlyByteBuffer()); + } + + if (LOG.isDebugEnabled()) { + logResponse(status, "ledgerId", ledgerId); + } + + int rc = convertStatus(status, BKException.Code.ReadException); + AvailabilityOfEntriesOfLedger availabilityOfEntriesOfLedger = null; + if (rc == BKException.Code.OK) { + availabilityOfEntriesOfLedger = new AvailabilityOfEntriesOfLedger( + availabilityOfEntriesOfLedgerBuffer.slice()); + } + cb.getListOfEntriesOfLedgerComplete(rc, ledgerId, availabilityOfEntriesOfLedger); + } + } + private final Recycler addCompletionRecycler = new Recycler() { + @Override protected AddCompletion newObject(Recycler.Handle handle) { return new AddCompletion(handle); } @@ -1910,7 +2318,7 @@ void reset(final CompletionKey key, @Override public void writeComplete(int rc, long ledgerId, long entryId, - BookieSocketAddress addr, + BookieId addr, Object ctx) { logOpResult(rc); originalCallback.writeComplete(rc, ledgerId, entryId, addr, ctx); @@ -1936,7 +2344,7 @@ public void errorOut() { @Override public void errorOut(final int rc) { errorOutAndRunCallback( - () -> writeComplete(rc, ledgerId, entryId, addr, ctx)); + () -> writeComplete(rc, ledgerId, entryId, bookieId, ctx)); } @Override @@ -1970,27 +2378,29 @@ private void handleResponse(long ledgerId, long entryId, } int rc = convertStatus(status, BKException.Code.WriteException); - writeComplete(rc, ledgerId, entryId, addr, ctx); + writeComplete(rc, ledgerId, entryId, bookieId, ctx); } } - // visable for testing + // visible for testing CompletionKey newCompletionKey(long txnId, OperationType operationType) { - return new V3CompletionKey(txnId, operationType); + return new TxnCompletionKey(txnId, operationType); } - class V3CompletionKey extends CompletionKey { + class TxnCompletionKey extends CompletionKey { + final long txnId; - public V3CompletionKey(long txnId, OperationType operationType) { - super(txnId, operationType); + public TxnCompletionKey(long txnId, OperationType operationType) { + super(operationType); + this.txnId = txnId; } @Override public boolean equals(Object obj) { - if (!(obj instanceof V3CompletionKey)) { + if (!(obj instanceof TxnCompletionKey)) { return false; } - V3CompletionKey that = (V3CompletionKey) obj; + TxnCompletionKey that = (TxnCompletionKey) obj; return this.txnId == that.txnId && this.operationType == that.operationType; } @@ -2007,12 +2417,9 @@ public String toString() { } abstract class CompletionKey { - final long txnId; OperationType operationType; - CompletionKey(long txnId, - OperationType operationType) { - this.txnId = txnId; + CompletionKey(OperationType operationType) { this.operationType = operationType; } @@ -2045,6 +2452,8 @@ private int statusCodeToExceptionCode(StatusCode status) { return BKException.Code.WriteOnReadOnlyBookieException; case ETOOMANYREQUESTS: return BKException.Code.TooManyRequestsException; + case EUNKNOWNLEDGERSTATE: + return BKException.Code.DataUnknownException; default: return BKException.Code.UNINITIALIZED; } @@ -2071,27 +2480,28 @@ private long getTxnId() { return txnIdGenerator.incrementAndGet(); } - private final Recycler v2KeyRecycler = new Recycler() { - protected V2CompletionKey newObject( - Recycler.Handle handle) { - return new V2CompletionKey(handle); + private final Recycler v2KeyRecycler = new Recycler() { + @Override + protected EntryCompletionKey newObject( + Recycler.Handle handle) { + return new EntryCompletionKey(handle); } }; - V2CompletionKey acquireV2Key(long ledgerId, long entryId, + EntryCompletionKey acquireV2Key(long ledgerId, long entryId, OperationType operationType) { - V2CompletionKey key = v2KeyRecycler.get(); + EntryCompletionKey key = v2KeyRecycler.get(); key.reset(ledgerId, entryId, operationType); return key; } - private class V2CompletionKey extends CompletionKey { - private final Handle recyclerHandle; + private class EntryCompletionKey extends CompletionKey { + private final Handle recyclerHandle; long ledgerId; long entryId; - private V2CompletionKey(Handle handle) { - super(-1, null); + private EntryCompletionKey(Handle handle) { + super(null); this.recyclerHandle = handle; } @@ -2103,10 +2513,10 @@ void reset(long ledgerId, long entryId, OperationType operationType) { @Override public boolean equals(Object object) { - if (!(object instanceof V2CompletionKey)) { + if (!(object instanceof EntryCompletionKey)) { return false; } - V2CompletionKey that = (V2CompletionKey) object; + EntryCompletionKey that = (EntryCompletionKey) object; return this.entryId == that.entryId && this.ledgerId == that.ledgerId && this.operationType == that.operationType; @@ -2128,6 +2538,55 @@ public void release() { } } + Request.Builder withRequestContext(Request.Builder builder) { + if (preserveMdcForTaskExecution) { + return appendRequestContext(builder); + } + return builder; + } + + static Request.Builder appendRequestContext(Request.Builder builder) { + final Map mdcContextMap = MDC.getCopyOfContextMap(); + if (mdcContextMap == null || mdcContextMap.isEmpty()) { + return builder; + } + for (Map.Entry kv : mdcContextMap.entrySet()) { + final BookkeeperProtocol.ContextPair context = BookkeeperProtocol.ContextPair.newBuilder() + .setKey(kv.getKey()) + .setValue(kv.getValue()) + .build(); + builder.addRequestContext(context); + } + return builder; + } + + ChannelFutureListener contextPreservingListener(ChannelFutureListener listener) { + return preserveMdcForTaskExecution ? new ContextPreservingFutureListener(listener) : listener; + } + + /** + * Decorator to preserve MDC for connection listener. + */ + static class ContextPreservingFutureListener implements ChannelFutureListener { + private final ChannelFutureListener listener; + private final Map mdcContextMap; + + ContextPreservingFutureListener(ChannelFutureListener listener) { + this.listener = listener; + this.mdcContextMap = MDC.getCopyOfContextMap(); + } + + @Override + public void operationComplete(ChannelFuture future) throws Exception { + MdcUtils.restoreContext(mdcContextMap); + try { + listener.operationComplete(future); + } finally { + MDC.clear(); + } + } + } + /** * Connection listener. */ @@ -2139,7 +2598,7 @@ class ConnectionFutureListener implements ChannelFutureListener { } @Override - public void operationComplete(ChannelFuture future) throws Exception { + public void operationComplete(ChannelFuture future) { if (LOG.isDebugEnabled()) { LOG.debug("Channel connected ({}) {}", future.isSuccess(), future.channel()); } @@ -2158,21 +2617,21 @@ public void operationComplete(ChannelFuture future) throws Exception { synchronized (PerChannelBookieClient.this) { if (future.isSuccess() && state == ConnectionState.CONNECTING && future.channel().isActive()) { - LOG.info("Successfully connected to bookie: {}", future.channel()); rc = BKException.Code.OK; channel = future.channel(); if (shFactory != null) { + LOG.info("Successfully connected to bookie: {} {} initiate TLS", bookieId, future.channel()); makeWritable(); initiateTLS(); return; } else { - LOG.info("Successfully connected to bookie: " + addr); + LOG.info("Successfully connected to bookie: {} {}", bookieId, future.channel()); state = ConnectionState.CONNECTED; activeNonTlsChannelCounter.inc(); } } else if (future.isSuccess() && state == ConnectionState.START_TLS) { rc = BKException.Code.OK; - LOG.info("Successfully connected to bookie using TLS: " + addr); + LOG.info("Successfully connected to bookie using TLS: " + bookieId); state = ConnectionState.CONNECTED; AuthHandler.ClientSideHandler authHandler = future.channel().pipeline() @@ -2194,10 +2653,22 @@ public void operationComplete(ChannelFuture future) throws Exception { closeChannel(future.channel()); return; // pendingOps should have been completed when other channel connected } else { - LOG.error("Could not connect to bookie: {}/{}, current state {} : ", - future.channel(), addr, state, future.cause()); + Throwable cause = future.cause(); + if (cause instanceof UnknownHostException || cause instanceof NativeIoException) { + // Don't log stack trace for common errors + logBookieUnavailable(() -> LOG.warn("Could not connect to bookie: {}/{}, current state {} : {}", + future.channel(), bookieId, state, future.cause().getMessage())); + } else { + // Regular exceptions, include stack trace + logBookieUnavailable(() -> LOG.error("Could not connect to bookie: {}/{}, current state {} : ", + future.channel(), bookieId, state, future.cause())); + } + rc = BKException.Code.BookieHandleNotAvailableException; - closeChannel(future.channel()); + Channel failedChannel = future.channel(); + if (failedChannel != null) { // can be null in case of dummy failed ChannelFuture + closeChannel(failedChannel); + } channel = null; if (state != ConnectionState.CLOSED) { state = ConnectionState.DISCONNECTED; @@ -2219,16 +2690,24 @@ public void operationComplete(ChannelFuture future) throws Exception { makeWritable(); } + + private void logBookieUnavailable(Runnable logger) { + final long now = System.currentTimeMillis(); + if ((now - lastBookieUnavailableLogTimestamp) > conf.getClientConnectBookieUnavailableLogThrottlingMs()) { + logger.run(); + lastBookieUnavailableLogTimestamp = now; + } + } } private void initiateTLS() { LOG.info("Initializing TLS to {}", channel); assert state == ConnectionState.CONNECTING; final long txnId = getTxnId(); - final CompletionKey completionKey = new V3CompletionKey(txnId, OperationType.START_TLS); + final CompletionKey completionKey = new TxnCompletionKey(txnId, OperationType.START_TLS); completionObjects.put(completionKey, new StartTLSCompletion(completionKey)); - BookkeeperProtocol.Request.Builder h = BookkeeperProtocol.Request.newBuilder(); + BookkeeperProtocol.Request.Builder h = withRequestContext(BookkeeperProtocol.Request.newBuilder()); BKPacketHeader.Builder headerBuilder = BKPacketHeader.newBuilder() .setVersion(ProtocolVersion.VERSION_THREE) .setOperation(OperationType.START_TLS) @@ -2252,4 +2731,130 @@ private void failTLS(int rc) { } failedTlsHandshakeCounter.inc(); } + + private static class FailedChannelFutureImpl implements ChannelFuture { + + private final Throwable failureCause; + public FailedChannelFutureImpl(Throwable failureCause) { + this.failureCause = failureCause; + } + + @Override + public Channel channel() { + // used only for log + return null; + } + + @Override + public ChannelFuture addListener(GenericFutureListener> listener) { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + @SuppressWarnings({"unchecked", "varargs"}) + public ChannelFuture addListeners(GenericFutureListener>... listeners) { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public ChannelFuture removeListener(GenericFutureListener> listener) { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + @SuppressWarnings({"unchecked", "varargs"}) + public ChannelFuture removeListeners(GenericFutureListener>... listeners) { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public ChannelFuture sync() throws InterruptedException { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public ChannelFuture syncUninterruptibly() { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public ChannelFuture await() throws InterruptedException { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public ChannelFuture awaitUninterruptibly() { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public boolean isVoid() { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public boolean isSuccess() { + return false; + } + + @Override + public boolean isCancellable() { + return false; + } + + @Override + public Throwable cause() { + return failureCause; + } + + @Override + public boolean await(long timeout, TimeUnit unit) throws InterruptedException { + return true; + } + + @Override + public boolean await(long timeoutMillis) throws InterruptedException { + return true; + } + + @Override + public boolean awaitUninterruptibly(long timeout, TimeUnit unit) { + return true; + } + + @Override + public boolean awaitUninterruptibly(long timeoutMillis) { + return true; + } + + @Override + public Void getNow() { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public boolean cancel(boolean mayInterruptIfRunning) { + return false; + } + + @Override + public boolean isCancelled() { + return false; + } + + @Override + public boolean isDone() { + return true; + } + + @Override + public Void get() throws InterruptedException, ExecutionException { + throw new ExecutionException(failureCause); + } + + @Override + public Void get(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException { + throw new ExecutionException(failureCause); + } + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PerChannelBookieClientFactory.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PerChannelBookieClientFactory.java index 17abb565827..d4fd5750a60 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PerChannelBookieClientFactory.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PerChannelBookieClientFactory.java @@ -20,7 +20,7 @@ */ package org.apache.bookkeeper.proto; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.tls.SecurityException; import org.apache.bookkeeper.tls.SecurityHandlerFactory; @@ -36,6 +36,7 @@ interface PerChannelBookieClientFactory { * @return the client connected to address. * @throws SecurityException */ - PerChannelBookieClient create(BookieSocketAddress address, - PerChannelBookieClientPool pcbcPool, SecurityHandlerFactory shFactory) throws SecurityException; + PerChannelBookieClient create(BookieId address, PerChannelBookieClientPool pcbcPool, + SecurityHandlerFactory shFactory, + boolean forceUseV3) throws SecurityException; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PerChannelBookieClientPool.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PerChannelBookieClientPool.java index aa7a5e94492..e8ba5f50201 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PerChannelBookieClientPool.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PerChannelBookieClientPool.java @@ -28,9 +28,9 @@ public interface PerChannelBookieClientPool { /** - * intialize the pool. the implementation should not be blocked. + * initialize the pool. the implementation should not be blocked. */ - void intialize(); + void initialize(); /** * Obtain a channel from channel pool to execute operations. @@ -40,6 +40,16 @@ public interface PerChannelBookieClientPool { */ void obtain(GenericCallback callback, long key); + /** + * Obtain a channel from channel pool by version to execute operations. + * + * @param callback + * callback to return channel from channel pool + * @param forceUseV3 + * whether or not use v3 protocol for connection + */ + void obtain(GenericCallback callback, long key, boolean forceUseV3); + /** * Returns status of a client. * It is suggested to delay/throttle requests to this channel if isWritable is false. diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ReadEntryProcessor.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ReadEntryProcessor.java index edeb8a674f7..3930b8ea752 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ReadEntryProcessor.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ReadEntryProcessor.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,31 +18,41 @@ package org.apache.bookkeeper.proto; import io.netty.buffer.ByteBuf; -import io.netty.channel.Channel; import io.netty.util.Recycler; -import io.netty.util.Recycler.Handle; import io.netty.util.ReferenceCountUtil; - +import io.netty.util.ReferenceCounted; import java.io.IOException; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; -import java.util.concurrent.Future; +import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; - import org.apache.bookkeeper.bookie.Bookie; import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.common.concurrent.FutureEventListener; +import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.proto.BookieProtocol.ReadRequest; -import org.apache.bookkeeper.util.MathUtils; +import org.apache.bookkeeper.stats.OpStatsLogger; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + class ReadEntryProcessor extends PacketProcessorBase { private static final Logger LOG = LoggerFactory.getLogger(ReadEntryProcessor.class); - public static ReadEntryProcessor create(ReadRequest request, Channel channel, - BookieRequestProcessor requestProcessor) { + protected ExecutorService fenceThreadPool; + protected boolean throttleReadResponses; + + public static ReadEntryProcessor create(ReadRequest request, + BookieRequestHandler requestHandler, + BookieRequestProcessor requestProcessor, + ExecutorService fenceThreadPool, + boolean throttleReadResponses) { ReadEntryProcessor rep = RECYCLER.get(); - rep.init(request, channel, requestProcessor); + rep.init(request, requestHandler, requestProcessor); + rep.fenceThreadPool = fenceThreadPool; + rep.throttleReadResponses = throttleReadResponses; + requestProcessor.onReadRequestStart(requestHandler.ctx().channel()); return rep; } @@ -51,55 +61,38 @@ protected void processPacket() { if (LOG.isDebugEnabled()) { LOG.debug("Received new read request: {}", request); } - int errorCode = BookieProtocol.EIO; + if (!requestHandler.ctx().channel().isOpen()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Dropping read request for closed channel: {}", requestHandler.ctx().channel()); + } + requestProcessor.onReadRequestFinish(); + recycle(); + return; + } + int errorCode = BookieProtocol.EOK; long startTimeNanos = MathUtils.nowInNano(); - ByteBuf data = null; + ReferenceCounted data = null; try { - Future fenceResult = null; + CompletableFuture fenceResult = null; if (request.isFencing()) { - LOG.warn("Ledger: {} fenced by: {}", request.getLedgerId(), channel.remoteAddress()); + LOG.warn("Ledger: {} fenced by: {}", request.getLedgerId(), + requestHandler.ctx().channel().remoteAddress()); if (request.hasMasterKey()) { - fenceResult = requestProcessor.bookie.fenceLedger(request.getLedgerId(), request.getMasterKey()); + fenceResult = requestProcessor.getBookie().fenceLedger(request.getLedgerId(), + request.getMasterKey()); } else { LOG.error("Password not provided, Not safe to fence {}", request.getLedgerId()); throw BookieException.create(BookieException.Code.UnauthorizedAccessException); } } - data = requestProcessor.bookie.readEntry(request.getLedgerId(), request.getEntryId()); + data = readData(); if (LOG.isDebugEnabled()) { - LOG.debug("##### Read entry ##### {} -- ref-count: {}", data.readableBytes(), data.refCnt()); + LOG.debug("##### Read entry ##### -- ref-count: {}", data.refCnt()); } - if (null != fenceResult) { - // TODO: {@link https://github.com/apache/bookkeeper/issues/283} - // currently we don't have readCallback to run in separated read - // threads. after BOOKKEEPER-429 is complete, we could improve - // following code to make it not wait here - // - // For now, since we only try to wait after read entry. so writing - // to journal and read entry are executed in different thread - // it would be fine. - try { - Boolean fenced = fenceResult.get(1000, TimeUnit.MILLISECONDS); - if (null == fenced || !fenced) { - // if failed to fence, fail the read request to make it retry. - errorCode = BookieProtocol.EIO; - } else { - errorCode = BookieProtocol.EOK; - } - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - LOG.error("Interrupting fence read entry {}", request, ie); - errorCode = BookieProtocol.EIO; - } catch (ExecutionException ee) { - LOG.error("Failed to fence read entry {}", request, ee); - errorCode = BookieProtocol.EIO; - } catch (TimeoutException te) { - LOG.error("Timeout to fence read entry {}", request, te); - errorCode = BookieProtocol.EIO; - } - } else { - errorCode = BookieProtocol.EOK; + if (fenceResult != null) { + handleReadResultForFenceRead(fenceResult, data, startTimeNanos); + return; } } catch (Bookie.NoLedgerException e) { if (LOG.isDebugEnabled()) { @@ -116,6 +109,9 @@ protected void processPacket() { LOG.debug("Error reading {}", request, e); } errorCode = BookieProtocol.EIO; + } catch (BookieException.DataUnknownException e) { + LOG.error("Ledger {} is in an unknown state", request.getLedgerId(), e); + errorCode = BookieProtocol.EUNKNOWNLEDGERSTATE; } catch (BookieException e) { LOG.error("Unauthorized access to ledger {}", request.getLedgerId(), e); errorCode = BookieProtocol.EUA; @@ -128,30 +124,86 @@ protected void processPacket() { if (LOG.isTraceEnabled()) { LOG.trace("Read entry rc = {} for {}", errorCode, request); } + sendResponse(data, errorCode, startTimeNanos); + } + + protected ReferenceCounted readData() throws Exception { + return requestProcessor.getBookie().readEntry(request.getLedgerId(), request.getEntryId()); + } + + private void sendResponse(ReferenceCounted data, int errorCode, long startTimeNanos) { + final RequestStats stats = requestProcessor.getRequestStats(); + final OpStatsLogger logger = stats.getReadEntryStats(); + BookieProtocol.Response response; if (errorCode == BookieProtocol.EOK) { - requestProcessor.readEntryStats.registerSuccessfulEvent(MathUtils.elapsedNanos(startTimeNanos), - TimeUnit.NANOSECONDS); - sendResponse(errorCode, ResponseBuilder.buildReadResponse(data, request), - requestProcessor.readRequestStats); + logger.registerSuccessfulEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); + response = buildReadResponse(data); } else { - ReferenceCountUtil.release(data); - - requestProcessor.readEntryStats.registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), - TimeUnit.NANOSECONDS); - sendResponse(errorCode, ResponseBuilder.buildErrorResponse(errorCode, request), - requestProcessor.readRequestStats); + if (data != null) { + ReferenceCountUtil.release(data); + } + logger.registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); + response = ResponseBuilder.buildErrorResponse(errorCode, request); } + + sendReadReqResponse(errorCode, response, stats.getReadRequestStats(), throttleReadResponses); recycle(); } + protected BookieProtocol.Response buildReadResponse(ReferenceCounted data) { + return ResponseBuilder.buildReadResponse((ByteBuf) data, request); + } + + private void sendFenceResponse(Boolean result, ReferenceCounted data, long startTimeNanos) { + final int retCode = result != null && result ? BookieProtocol.EOK : BookieProtocol.EIO; + sendResponse(data, retCode, startTimeNanos); + } + + private void handleReadResultForFenceRead(CompletableFuture fenceResult, + ReferenceCounted data, + long startTimeNanos) { + if (null != fenceThreadPool) { + fenceResult.whenCompleteAsync(new FutureEventListener() { + @Override + public void onSuccess(Boolean result) { + sendFenceResponse(result, data, startTimeNanos); + } + + @Override + public void onFailure(Throwable t) { + LOG.error("Error processing fence request", t); + // if failed to fence, fail the read request to make it retry. + sendResponse(data, BookieProtocol.EIO, startTimeNanos); + } + }, fenceThreadPool); + } else { + try { + Boolean fenced = fenceResult.get(1000, TimeUnit.MILLISECONDS); + sendFenceResponse(fenced, data, startTimeNanos); + return; + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + LOG.error("Interrupting fence read entry {}", request, ie); + } catch (ExecutionException ee) { + LOG.error("Failed to fence read entry {}", request, ee.getCause()); + } catch (TimeoutException te) { + LOG.error("Timeout to fence read entry {}", request, te); + } + sendResponse(data, BookieProtocol.EIO, startTimeNanos); + } + } + @Override public String toString() { return String.format("ReadEntry(%d, %d)", request.getLedgerId(), request.getEntryId()); } - private void recycle() { + void recycle() { + request.recycle(); super.reset(); - this.recyclerHandle.recycle(this); + if (this.recyclerHandle != null) { + this.recyclerHandle.recycle(this); + } } private final Recycler.Handle recyclerHandle; @@ -160,6 +212,10 @@ private ReadEntryProcessor(Recycler.Handle recyclerHandle) { this.recyclerHandle = recyclerHandle; } + protected ReadEntryProcessor() { + this.recyclerHandle = null; + } + private static final Recycler RECYCLER = new Recycler() { @Override protected ReadEntryProcessor newObject(Recycler.Handle handle) { diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ReadEntryProcessorV3.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ReadEntryProcessorV3.java index e7e56533317..3a85ca0949a 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ReadEntryProcessorV3.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ReadEntryProcessorV3.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,28 +18,24 @@ package org.apache.bookkeeper.proto; import com.google.common.base.Stopwatch; -import com.google.common.util.concurrent.FutureCallback; -import com.google.common.util.concurrent.Futures; -import com.google.common.util.concurrent.SettableFuture; import com.google.protobuf.ByteString; - import io.netty.buffer.ByteBuf; import io.netty.channel.Channel; import io.netty.util.ReferenceCountUtil; - import java.io.IOException; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; - import org.apache.bookkeeper.bookie.Bookie; import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.common.concurrent.FutureEventListener; +import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.proto.BookkeeperProtocol.ReadRequest; import org.apache.bookkeeper.proto.BookkeeperProtocol.ReadResponse; import org.apache.bookkeeper.proto.BookkeeperProtocol.Request; import org.apache.bookkeeper.proto.BookkeeperProtocol.Response; import org.apache.bookkeeper.proto.BookkeeperProtocol.StatusCode; import org.apache.bookkeeper.stats.OpStatsLogger; -import org.apache.bookkeeper.util.MathUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -50,7 +46,7 @@ class ReadEntryProcessorV3 extends PacketProcessorBaseV3 { protected Stopwatch lastPhaseStartTime; private final ExecutorService fenceThreadPool; - private SettableFuture fenceResult = null; + private CompletableFuture fenceResult = null; protected final ReadRequest readRequest; protected final long ledgerId; @@ -61,24 +57,24 @@ class ReadEntryProcessorV3 extends PacketProcessorBaseV3 { protected final OpStatsLogger reqStats; public ReadEntryProcessorV3(Request request, - Channel channel, + BookieRequestHandler requestHandler, BookieRequestProcessor requestProcessor, ExecutorService fenceThreadPool) { - super(request, channel, requestProcessor); - requestProcessor.onReadRequestStart(channel); + super(request, requestHandler, requestProcessor); + requestProcessor.onReadRequestStart(requestHandler.ctx().channel()); this.readRequest = request.getReadRequest(); this.ledgerId = readRequest.getLedgerId(); this.entryId = readRequest.getEntryId(); if (RequestUtils.isFenceRequest(this.readRequest)) { - this.readStats = requestProcessor.fenceReadEntryStats; - this.reqStats = requestProcessor.fenceReadRequestStats; + this.readStats = requestProcessor.getRequestStats().getFenceReadEntryStats(); + this.reqStats = requestProcessor.getRequestStats().getFenceReadRequestStats(); } else if (readRequest.hasPreviousLAC()) { - this.readStats = requestProcessor.longPollReadStats; - this.reqStats = requestProcessor.longPollReadRequestStats; + this.readStats = requestProcessor.getRequestStats().getLongPollReadStats(); + this.reqStats = requestProcessor.getRequestStats().getLongPollReadRequestStats(); } else { - this.readStats = requestProcessor.readEntryStats; - this.reqStats = requestProcessor.readRequestStats; + this.readStats = requestProcessor.getRequestStats().getReadEntryStats(); + this.reqStats = requestProcessor.getRequestStats().getReadRequestStats(); } this.fenceThreadPool = fenceThreadPool; @@ -113,7 +109,7 @@ protected void handleReadResultForFenceRead( // reset last phase start time to measure fence result waiting time lastPhaseStartTime.reset().start(); if (null != fenceThreadPool) { - Futures.addCallback(fenceResult, new FutureCallback() { + fenceResult.whenCompleteAsync(new FutureEventListener() { @Override public void onSuccess(Boolean result) { sendFenceResponse(readResponseBuilder, entryBody, result, startTimeSw); @@ -153,7 +149,7 @@ public void onFailure(Throwable t) { protected ReadResponse readEntry(ReadResponse.Builder readResponseBuilder, long entryId, Stopwatch startTimeSw) - throws IOException { + throws IOException, BookieException { return readEntry(readResponseBuilder, entryId, false, startTimeSw); } @@ -173,8 +169,8 @@ protected ReadResponse readEntry(ReadResponse.Builder readResponseBuilder, long entryId, boolean readLACPiggyBack, Stopwatch startTimeSw) - throws IOException { - ByteBuf entryBody = requestProcessor.bookie.readEntry(ledgerId, entryId); + throws IOException, BookieException { + ByteBuf entryBody = requestProcessor.getBookie().readEntry(ledgerId, entryId); if (null != fenceResult) { handleReadResultForFenceRead(entryBody, readResponseBuilder, entryId, startTimeSw); return null; @@ -184,7 +180,7 @@ protected ReadResponse readEntry(ReadResponse.Builder readResponseBuilder, if (readLACPiggyBack) { readResponseBuilder.setEntryId(entryId); } else { - long knownLAC = requestProcessor.bookie.readLastAddConfirmed(ledgerId); + long knownLAC = requestProcessor.getBookie().readLastAddConfirmed(ledgerId); readResponseBuilder.setMaxLAC(knownLAC); } registerSuccessfulEvent(readStats, startTimeSw); @@ -198,12 +194,13 @@ protected ReadResponse readEntry(ReadResponse.Builder readResponseBuilder, protected ReadResponse getReadResponse() { final Stopwatch startTimeSw = Stopwatch.createStarted(); + final Channel channel = requestHandler.ctx().channel(); final ReadResponse.Builder readResponse = ReadResponse.newBuilder() .setLedgerId(ledgerId) .setEntryId(entryId); try { - // handle fence reqest + // handle fence request if (RequestUtils.isFenceRequest(readRequest)) { LOG.info("Ledger fence request received for ledger: {} from address: {}", ledgerId, channel.remoteAddress()); @@ -236,6 +233,11 @@ protected ReadResponse getReadResponse() { } catch (IOException e) { LOG.error("IOException while reading entry: {} from ledger {} ", entryId, ledgerId, e); return buildResponse(readResponse, StatusCode.EIO, startTimeSw); + } catch (BookieException.DataUnknownException e) { + if (LOG.isDebugEnabled()) { + LOG.debug("Ledger has unknown state for entry: {} from ledger {}", entryId, ledgerId); + } + return buildResponse(readResponse, StatusCode.EUNKNOWNLEDGERSTATE, startTimeSw); } catch (BookieException e) { LOG.error( "Unauthorized access to ledger:{} while reading entry:{} in request from address: {}", @@ -245,9 +247,16 @@ protected ReadResponse getReadResponse() { } @Override - public void safeRun() { - requestProcessor.readEntrySchedulingDelayStats.registerSuccessfulEvent( + public void run() { + requestProcessor.getRequestStats().getReadEntrySchedulingDelayStats().registerSuccessfulEvent( MathUtils.elapsedNanos(enqueueNanos), TimeUnit.NANOSECONDS); + if (!requestHandler.ctx().channel().isOpen()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Dropping read request for closed channel: {}", requestHandler.ctx().channel()); + } + requestProcessor.onReadRequestFinish(); + return; + } if (!isVersionCompatible()) { ReadResponse readResponse = ReadResponse.newBuilder() @@ -275,11 +284,11 @@ private void getFenceResponse(ReadResponse.Builder readResponse, StatusCode status; if (!fenceResult) { status = StatusCode.EIO; - registerFailedEvent(requestProcessor.fenceReadWaitStats, lastPhaseStartTime); + registerFailedEvent(requestProcessor.getRequestStats().getFenceReadWaitStats(), lastPhaseStartTime); } else { status = StatusCode.EOK; readResponse.setBody(ByteString.copyFrom(entryBody.nioBuffer())); - registerSuccessfulEvent(requestProcessor.fenceReadWaitStats, lastPhaseStartTime); + registerSuccessfulEvent(requestProcessor.getRequestStats().getFenceReadWaitStats(), lastPhaseStartTime); } if (null != entryBody) { @@ -296,7 +305,7 @@ private void sendFenceResponse(ReadResponse.Builder readResponse, // build the fence read response getFenceResponse(readResponse, entryBody, fenceResult); // register fence read stat - registerEvent(!fenceResult, requestProcessor.fenceReadEntryStats, startTimeSw); + registerEvent(!fenceResult, requestProcessor.getRequestStats().getFenceReadEntryStats(), startTimeSw); // send the fence read response sendResponse(readResponse.build()); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ReadLacProcessorV3.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ReadLacProcessorV3.java index 898ddb0413b..66411819976 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ReadLacProcessorV3.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ReadLacProcessorV3.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,21 +21,18 @@ package org.apache.bookkeeper.proto; import com.google.protobuf.ByteString; - import io.netty.buffer.ByteBuf; -import io.netty.channel.Channel; import io.netty.util.ReferenceCountUtil; - import java.io.IOException; import java.util.concurrent.TimeUnit; - import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.proto.BookkeeperProtocol.ReadLacRequest; import org.apache.bookkeeper.proto.BookkeeperProtocol.ReadLacResponse; import org.apache.bookkeeper.proto.BookkeeperProtocol.Request; import org.apache.bookkeeper.proto.BookkeeperProtocol.Response; import org.apache.bookkeeper.proto.BookkeeperProtocol.StatusCode; -import org.apache.bookkeeper.util.MathUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -45,9 +42,9 @@ class ReadLacProcessorV3 extends PacketProcessorBaseV3 implements Runnable { private static final Logger logger = LoggerFactory.getLogger(ReadLacProcessorV3.class); - public ReadLacProcessorV3(Request request, Channel channel, + public ReadLacProcessorV3(Request request, BookieRequestHandler requestHandler, BookieRequestProcessor requestProcessor) { - super(request, channel, requestProcessor); + super(request, requestHandler, requestProcessor); } // Returns null if there is no exception thrown @@ -74,10 +71,13 @@ private ReadLacResponse getReadLacResponse() { } } catch (Bookie.NoLedgerException e) { status = StatusCode.ENOLEDGER; - logger.error("No ledger found while performing readLac from ledger: {}", ledgerId, e); - } catch (IOException e) { + logger.debug("No ledger found while performing readLac from ledger: {}", ledgerId, e); + } catch (BookieException.DataUnknownException e) { + status = StatusCode.EUNKNOWNLEDGERSTATE; + logger.error("Ledger {} in unknown state and cannot serve reacLac requests", ledgerId, e); + } catch (BookieException | IOException e) { status = StatusCode.EIO; - logger.error("IOException while performing readLac from ledger: {}", ledgerId); + logger.error("IOException while performing readLac from ledger: {}", ledgerId, e); } finally { ReferenceCountUtil.release(lac); } @@ -89,8 +89,11 @@ private ReadLacResponse getReadLacResponse() { } } catch (Bookie.NoLedgerException e) { status = StatusCode.ENOLEDGER; - logger.error("No ledger found while trying to read last entry: {}", ledgerId, e); - } catch (IOException e) { + logger.debug("No ledger found while trying to read last entry: {}", ledgerId, e); + } catch (BookieException.DataUnknownException e) { + status = StatusCode.EUNKNOWNLEDGERSTATE; + logger.error("Ledger in an unknown state while trying to read last entry: {}", ledgerId, e); + } catch (BookieException | IOException e) { status = StatusCode.EIO; logger.error("IOException while trying to read last entry: {}", ledgerId, e); } finally { @@ -102,11 +105,11 @@ private ReadLacResponse getReadLacResponse() { } if (status == StatusCode.EOK) { - requestProcessor.readLacStats.registerSuccessfulEvent(MathUtils.elapsedNanos(startTimeNanos), - TimeUnit.NANOSECONDS); + requestProcessor.getRequestStats().getReadLacStats() + .registerSuccessfulEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); } else { - requestProcessor.readLacStats.registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), - TimeUnit.NANOSECONDS); + requestProcessor.getRequestStats().getReadLacStats() + .registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); } // Finally set the status and return readLacResponse.setStatus(status); @@ -114,7 +117,7 @@ private ReadLacResponse getReadLacResponse() { } @Override - public void safeRun() { + public void run() { ReadLacResponse readLacResponse = getReadLacResponse(); sendResponse(readLacResponse); } @@ -126,6 +129,6 @@ private void sendResponse(ReadLacResponse readLacResponse) { .setReadLacResponse(readLacResponse); sendResponse(response.getStatus(), response.build(), - requestProcessor.readLacRequestStats); + requestProcessor.getRequestStats().getReadLacRequestStats()); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ReadLastConfirmedAndEntryContext.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ReadLastConfirmedAndEntryContext.java index 1bf3685b6c1..dea6f6dd6a7 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ReadLastConfirmedAndEntryContext.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ReadLastConfirmedAndEntryContext.java @@ -17,9 +17,9 @@ */ package org.apache.bookkeeper.proto; -import com.google.common.base.Optional; +import java.util.Optional; import org.apache.bookkeeper.client.LedgerHandle; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadEntryCallbackCtx; /** @@ -28,11 +28,11 @@ public class ReadLastConfirmedAndEntryContext implements ReadEntryCallbackCtx { final int bookieIndex; - final BookieSocketAddress bookie; + final BookieId bookie; long lac = LedgerHandle.INVALID_ENTRY_ID; - Optional lacUpdateTimestamp = Optional.absent(); + Optional lacUpdateTimestamp = Optional.empty(); - public ReadLastConfirmedAndEntryContext(int bookieIndex, BookieSocketAddress bookie) { + public ReadLastConfirmedAndEntryContext(int bookieIndex, BookieId bookie) { this.bookieIndex = bookieIndex; this.bookie = bookie; } @@ -41,7 +41,7 @@ public int getBookieIndex() { return bookieIndex; } - public BookieSocketAddress getBookieAddress() { + public BookieId getBookieAddress() { return bookie; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/RequestStats.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/RequestStats.java new file mode 100644 index 00000000000..e31963edd14 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/RequestStats.java @@ -0,0 +1,385 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.proto; + +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.ADD_ENTRY; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.ADD_ENTRY_BLOCKED; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.ADD_ENTRY_BLOCKED_WAIT; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.ADD_ENTRY_IN_PROGRESS; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.ADD_ENTRY_REJECTED; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.ADD_ENTRY_REQUEST; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.CATEGORY_SERVER; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.CHANNEL_WRITE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.FORCE_LEDGER; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.FORCE_LEDGER_REQUEST; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.GET_BOOKIE_INFO; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.GET_BOOKIE_INFO_REQUEST; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.GET_LIST_OF_ENTRIES_OF_LEDGER; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.GET_LIST_OF_ENTRIES_OF_LEDGER_REQUEST; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_BLOCKED; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_BLOCKED_WAIT; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_FENCE_READ; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_FENCE_REQUEST; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_FENCE_WAIT; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_IN_PROGRESS; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_LONG_POLL_PRE_WAIT; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_LONG_POLL_READ; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_LONG_POLL_REQUEST; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_LONG_POLL_WAIT; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_REJECTED; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_REQUEST; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_ENTRY_SCHEDULING_DELAY; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_LAC; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_LAC_REQUEST; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.READ_LAST_ENTRY_NOENTRY_ERROR; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.SERVER_SCOPE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.WRITE_LAC; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.WRITE_LAC_REQUEST; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.WRITE_THREAD_QUEUED_LATENCY; + +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import lombok.Getter; +import org.apache.bookkeeper.stats.Counter; +import org.apache.bookkeeper.stats.Gauge; +import org.apache.bookkeeper.stats.OpStatsLogger; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.annotations.StatsDoc; + +/** + * A umbrella class for request related stats. + */ +@StatsDoc( + name = SERVER_SCOPE, + category = CATEGORY_SERVER, + help = "Bookie request stats" +) +@Getter +public class RequestStats { + + final AtomicInteger addsInProgress = new AtomicInteger(0); + final AtomicInteger maxAddsInProgress = new AtomicInteger(0); + final AtomicInteger addsBlocked = new AtomicInteger(0); + final AtomicInteger readsInProgress = new AtomicInteger(0); + final AtomicInteger readsBlocked = new AtomicInteger(0); + final AtomicInteger maxReadsInProgress = new AtomicInteger(0); + + @StatsDoc( + name = ADD_ENTRY_REQUEST, + help = "request stats of AddEntry on a bookie" + ) + private final OpStatsLogger addRequestStats; + @StatsDoc( + name = ADD_ENTRY, + help = "operation stats of AddEntry on a bookie", + parent = ADD_ENTRY_REQUEST + ) + private final OpStatsLogger addEntryStats; + + @StatsDoc( + name = WRITE_THREAD_QUEUED_LATENCY, + help = "operation stats of enqueuing requests to write threadpool queue", + parent = ADD_ENTRY_REQUEST + ) + private final OpStatsLogger writeThreadQueuedLatency; + + @StatsDoc( + name = ADD_ENTRY_REJECTED, + help = "Counter for rejected adds on a bookie", + parent = ADD_ENTRY_REQUEST + ) + private final Counter addEntryRejectedCounter; + @StatsDoc( + name = READ_ENTRY_REQUEST, + help = "request stats of ReadEntry on a bookie" + ) + final OpStatsLogger readRequestStats; + @StatsDoc( + name = READ_ENTRY, + help = "operation stats of ReadEntry on a bookie", + parent = READ_ENTRY_REQUEST + ) + final OpStatsLogger readEntryStats; + @StatsDoc( + name = READ_ENTRY_REJECTED, + help = "Counter for rejected reads on a bookie", + parent = READ_ENTRY_REQUEST + ) + private final Counter readEntryRejectedCounter; + @StatsDoc( + name = FORCE_LEDGER, + help = "operation stats of ForceLedger on a bookie", + parent = FORCE_LEDGER_REQUEST + ) + final OpStatsLogger forceLedgerStats; + @StatsDoc( + name = FORCE_LEDGER_REQUEST, + help = "request stats of ForceLedger on a bookie" + ) + final OpStatsLogger forceLedgerRequestStats; + @StatsDoc( + name = READ_ENTRY_FENCE_REQUEST, + help = "request stats of FenceRead on a bookie" + ) + final OpStatsLogger fenceReadRequestStats; + @StatsDoc( + name = READ_ENTRY_FENCE_READ, + help = "operation stats of FenceRead on a bookie", + parent = READ_ENTRY_FENCE_REQUEST, + happensAfter = READ_ENTRY_FENCE_WAIT + ) + final OpStatsLogger fenceReadEntryStats; + @StatsDoc( + name = READ_ENTRY_FENCE_WAIT, + help = "operation stats of FenceReadWait on a bookie", + parent = READ_ENTRY_FENCE_REQUEST + ) + final OpStatsLogger fenceReadWaitStats; + @StatsDoc( + name = READ_ENTRY_SCHEDULING_DELAY, + help = "operation stats of ReadEntry scheduling delays on a bookie" + ) + final OpStatsLogger readEntrySchedulingDelayStats; + @StatsDoc( + name = READ_ENTRY_LONG_POLL_PRE_WAIT, + help = "operation stats of LongPoll Reads pre wait time on a bookie", + parent = READ_ENTRY_LONG_POLL_REQUEST + ) + final OpStatsLogger longPollPreWaitStats; + @StatsDoc( + name = READ_ENTRY_LONG_POLL_WAIT, + help = "operation stats of LongPoll Reads wait time on a bookie", + happensAfter = READ_ENTRY_LONG_POLL_PRE_WAIT, + parent = READ_ENTRY_LONG_POLL_REQUEST + ) + final OpStatsLogger longPollWaitStats; + @StatsDoc( + name = READ_ENTRY_LONG_POLL_READ, + help = "operation stats of LongPoll Reads on a bookie", + happensAfter = READ_ENTRY_LONG_POLL_WAIT, + parent = READ_ENTRY_LONG_POLL_REQUEST + ) + final OpStatsLogger longPollReadStats; + @StatsDoc( + name = READ_ENTRY_LONG_POLL_REQUEST, + help = "request stats of LongPoll Reads on a bookie" + ) + final OpStatsLogger longPollReadRequestStats; + @StatsDoc( + name = READ_LAST_ENTRY_NOENTRY_ERROR, + help = "total NOENTRY errors of reading last entry on a bookie" + ) + final Counter readLastEntryNoEntryErrorCounter; + @StatsDoc( + name = WRITE_LAC_REQUEST, + help = "request stats of WriteLac on a bookie" + ) + final OpStatsLogger writeLacRequestStats; + @StatsDoc( + name = WRITE_LAC, + help = "operation stats of WriteLac on a bookie", + parent = WRITE_LAC_REQUEST + ) + final OpStatsLogger writeLacStats; + @StatsDoc( + name = READ_LAC_REQUEST, + help = "request stats of ReadLac on a bookie" + ) + final OpStatsLogger readLacRequestStats; + @StatsDoc( + name = READ_LAC, + help = "operation stats of ReadLac on a bookie", + parent = READ_LAC_REQUEST + ) + final OpStatsLogger readLacStats; + @StatsDoc( + name = GET_BOOKIE_INFO_REQUEST, + help = "request stats of GetBookieInfo on a bookie" + ) + final OpStatsLogger getBookieInfoRequestStats; + @StatsDoc( + name = GET_BOOKIE_INFO, + help = "operation stats of GetBookieInfo on a bookie" + ) + final OpStatsLogger getBookieInfoStats; + @StatsDoc( + name = CHANNEL_WRITE, + help = "channel write stats on a bookie" + ) + final OpStatsLogger channelWriteStats; + @StatsDoc( + name = ADD_ENTRY_BLOCKED, + help = "operation stats of AddEntry blocked on a bookie" + ) + final OpStatsLogger addEntryBlockedStats; + @StatsDoc( + name = READ_ENTRY_BLOCKED, + help = "operation stats of ReadEntry blocked on a bookie" + ) + final OpStatsLogger readEntryBlockedStats; + @StatsDoc( + name = GET_LIST_OF_ENTRIES_OF_LEDGER_REQUEST, + help = "request stats of GetListOfEntriesOfLedger on a bookie" + ) + final OpStatsLogger getListOfEntriesOfLedgerRequestStats; + @StatsDoc( + name = "GET_LIST_OF_ENTRIES_OF_LEDGER", + help = "operation stats of GetListOfEntriesOfLedger", + parent = GET_LIST_OF_ENTRIES_OF_LEDGER_REQUEST + ) + final OpStatsLogger getListOfEntriesOfLedgerStats; + + public RequestStats(StatsLogger statsLogger) { + this.addEntryStats = statsLogger.getThreadScopedOpStatsLogger(ADD_ENTRY); + this.writeThreadQueuedLatency = statsLogger.getThreadScopedOpStatsLogger(WRITE_THREAD_QUEUED_LATENCY); + this.addRequestStats = statsLogger.getOpStatsLogger(ADD_ENTRY_REQUEST); + this.addEntryRejectedCounter = statsLogger.getCounter(ADD_ENTRY_REJECTED); + this.readEntryStats = statsLogger.getThreadScopedOpStatsLogger(READ_ENTRY); + this.readEntryRejectedCounter = statsLogger.getCounter(READ_ENTRY_REJECTED); + this.forceLedgerStats = statsLogger.getOpStatsLogger(FORCE_LEDGER); + this.forceLedgerRequestStats = statsLogger.getOpStatsLogger(FORCE_LEDGER_REQUEST); + this.readRequestStats = statsLogger.getOpStatsLogger(READ_ENTRY_REQUEST); + this.fenceReadEntryStats = statsLogger.getOpStatsLogger(READ_ENTRY_FENCE_READ); + this.fenceReadRequestStats = statsLogger.getOpStatsLogger(READ_ENTRY_FENCE_REQUEST); + this.fenceReadWaitStats = statsLogger.getOpStatsLogger(READ_ENTRY_FENCE_WAIT); + this.readEntrySchedulingDelayStats = statsLogger.getOpStatsLogger(READ_ENTRY_SCHEDULING_DELAY); + this.longPollPreWaitStats = statsLogger.getOpStatsLogger(READ_ENTRY_LONG_POLL_PRE_WAIT); + this.longPollWaitStats = statsLogger.getOpStatsLogger(READ_ENTRY_LONG_POLL_WAIT); + this.longPollReadStats = statsLogger.getOpStatsLogger(READ_ENTRY_LONG_POLL_READ); + this.longPollReadRequestStats = statsLogger.getOpStatsLogger(READ_ENTRY_LONG_POLL_REQUEST); + this.readLastEntryNoEntryErrorCounter = statsLogger.getCounter(READ_LAST_ENTRY_NOENTRY_ERROR); + this.writeLacStats = statsLogger.getOpStatsLogger(WRITE_LAC); + this.writeLacRequestStats = statsLogger.getOpStatsLogger(WRITE_LAC_REQUEST); + this.readLacStats = statsLogger.getOpStatsLogger(READ_LAC); + this.readLacRequestStats = statsLogger.getOpStatsLogger(READ_LAC_REQUEST); + this.getBookieInfoStats = statsLogger.getOpStatsLogger(GET_BOOKIE_INFO); + this.getBookieInfoRequestStats = statsLogger.getOpStatsLogger(GET_BOOKIE_INFO_REQUEST); + this.channelWriteStats = statsLogger.getOpStatsLogger(CHANNEL_WRITE); + + this.addEntryBlockedStats = statsLogger.getOpStatsLogger(ADD_ENTRY_BLOCKED_WAIT); + this.readEntryBlockedStats = statsLogger.getOpStatsLogger(READ_ENTRY_BLOCKED_WAIT); + + this.getListOfEntriesOfLedgerStats = statsLogger.getOpStatsLogger(GET_LIST_OF_ENTRIES_OF_LEDGER); + this.getListOfEntriesOfLedgerRequestStats = + statsLogger.getOpStatsLogger(GET_LIST_OF_ENTRIES_OF_LEDGER_REQUEST); + + statsLogger.registerGauge(ADD_ENTRY_IN_PROGRESS, new Gauge() { + @Override + public Number getDefaultValue() { + return 0; + } + + @Override + public Number getSample() { + return addsInProgress; + } + }); + + statsLogger.registerGauge(ADD_ENTRY_BLOCKED, new Gauge() { + @Override + public Number getDefaultValue() { + return 0; + } + + @Override + public Number getSample() { + return addsBlocked; + } + }); + + statsLogger.registerGauge(READ_ENTRY_IN_PROGRESS, new Gauge() { + @Override + public Number getDefaultValue() { + return 0; + } + + @Override + public Number getSample() { + return readsInProgress; + } + }); + + statsLogger.registerGauge(READ_ENTRY_BLOCKED, new Gauge() { + @Override + public Number getDefaultValue() { + return 0; + } + + @Override + public Number getSample() { + return readsBlocked; + } + }); + } + + // + // Add requests + // + + void blockAddRequest() { + addsBlocked.incrementAndGet(); + } + + void unblockAddRequest(long delayNanos) { + addEntryBlockedStats.registerSuccessfulEvent(delayNanos, TimeUnit.NANOSECONDS); + addsBlocked.decrementAndGet(); + } + + void trackAddRequest() { + final int curr = addsInProgress.incrementAndGet(); + maxAddsInProgress.accumulateAndGet(curr, Integer::max); + } + + void untrackAddRequest() { + addsInProgress.decrementAndGet(); + } + + int maxAddsInProgressCount() { + return maxAddsInProgress.get(); + } + + // + // Read requests + // + + void blockReadRequest() { + readsBlocked.incrementAndGet(); + } + + void unblockReadRequest(long delayNanos) { + readEntryBlockedStats.registerSuccessfulEvent(delayNanos, TimeUnit.NANOSECONDS); + readsBlocked.decrementAndGet(); + } + + void trackReadRequest() { + final int curr = readsInProgress.incrementAndGet(); + maxReadsInProgress.accumulateAndGet(curr, Integer::max); + } + + void untrackReadRequest() { + readsInProgress.decrementAndGet(); + } + + int maxReadsInProgressCount() { + return maxReadsInProgress.get(); + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/RequestUtils.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/RequestUtils.java index d384c817a43..9b8533da0c6 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/RequestUtils.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/RequestUtils.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ResponseBuilder.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ResponseBuilder.java index 342acd5df31..4faa3dbc340 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ResponseBuilder.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/ResponseBuilder.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,16 +21,20 @@ package org.apache.bookkeeper.proto; import io.netty.buffer.ByteBuf; +import org.apache.bookkeeper.util.ByteBufList; class ResponseBuilder { static BookieProtocol.Response buildErrorResponse(int errorCode, BookieProtocol.Request r) { if (r.getOpCode() == BookieProtocol.ADDENTRY) { return BookieProtocol.AddResponse.create(r.getProtocolVersion(), errorCode, r.getLedgerId(), r.getEntryId()); - } else { - assert(r.getOpCode() == BookieProtocol.READENTRY); + } else if (r.getOpCode() == BookieProtocol.READENTRY) { return new BookieProtocol.ReadResponse(r.getProtocolVersion(), errorCode, r.getLedgerId(), r.getEntryId()); + } else { + assert(r.getOpCode() == BookieProtocol.BATCH_READ_ENTRY); + return new BookieProtocol.BatchedReadResponse(r.getProtocolVersion(), errorCode, + r.getLedgerId(), r.getEntryId(), ((BookieProtocol.BatchedReadRequest) r).getRequestId()); } } @@ -43,4 +47,9 @@ static BookieProtocol.Response buildReadResponse(ByteBuf data, BookieProtocol.Re return new BookieProtocol.ReadResponse(r.getProtocolVersion(), BookieProtocol.EOK, r.getLedgerId(), r.getEntryId(), data); } + + static BookieProtocol.Response buildBatchedReadResponse(ByteBufList data, BookieProtocol.BatchedReadRequest r) { + return new BookieProtocol.BatchedReadResponse(r.getProtocolVersion(), BookieProtocol.EOK, + r.getLedgerId(), r.getEntryId(), r.getRequestId(), data); + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/SimpleBookieServiceInfoProvider.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/SimpleBookieServiceInfoProvider.java new file mode 100644 index 00000000000..4d2f69c4e25 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/SimpleBookieServiceInfoProvider.java @@ -0,0 +1,54 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.proto; + +import java.net.UnknownHostException; +import java.util.function.Supplier; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.discover.BookieServiceInfo; +import org.apache.bookkeeper.discover.BookieServiceInfoUtils; +import org.apache.bookkeeper.net.BookieSocketAddress; + +/** + * Simple Implementation of BookieServiceInfo supplier. + */ +public class SimpleBookieServiceInfoProvider implements Supplier { + private final BookieSocketAddress bookieSocketAddress; + + public SimpleBookieServiceInfoProvider(ServerConfiguration serverConfiguration) { + try { + this.bookieSocketAddress = BookieImpl.getBookieAddress(serverConfiguration); + } catch (UnknownHostException err) { + throw new RuntimeException(err); + } + } + + @Override + public BookieServiceInfo get() { + try { + return BookieServiceInfoUtils.buildLegacyBookieServiceInfo(bookieSocketAddress.toBookieId().toString()); + } catch (UnknownHostException err) { + throw new RuntimeException(err); + } + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/WriteEntryProcessor.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/WriteEntryProcessor.java index f5af75ac4a6..2fbc8ffb0f7 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/WriteEntryProcessor.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/WriteEntryProcessor.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -19,18 +19,15 @@ import com.google.common.annotations.VisibleForTesting; import io.netty.buffer.ByteBuf; -import io.netty.channel.Channel; import io.netty.util.Recycler; - import java.io.IOException; import java.util.concurrent.TimeUnit; - import org.apache.bookkeeper.bookie.BookieException; import org.apache.bookkeeper.bookie.BookieException.OperationRejectedException; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.common.util.MathUtils; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookieProtocol.ParsedAddRequest; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback; -import org.apache.bookkeeper.util.MathUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,15 +40,17 @@ class WriteEntryProcessor extends PacketProcessorBase implemen long startTimeNanos; + @Override protected void reset() { super.reset(); startTimeNanos = -1L; } - public static WriteEntryProcessor create(ParsedAddRequest request, Channel channel, + public static WriteEntryProcessor create(ParsedAddRequest request, BookieRequestHandler requestHandler, BookieRequestProcessor requestProcessor) { WriteEntryProcessor wep = RECYCLER.get(); - wep.init(request, channel, requestProcessor); + wep.init(request, requestHandler, requestProcessor); + requestProcessor.onAddRequestStart(requestHandler.ctx().channel()); return wep; } @@ -61,11 +60,12 @@ protected void processPacket() { && !(request.isHighPriority() && requestProcessor.getBookie().isAvailableForHighPriorityWrites())) { LOG.warn("BookieServer is running in readonly mode," + " so rejecting the request from the client!"); - sendResponse(BookieProtocol.EREADONLY, + sendWriteReqResponse(BookieProtocol.EREADONLY, ResponseBuilder.buildErrorResponse(BookieProtocol.EREADONLY, request), - requestProcessor.getAddRequestStats()); + requestProcessor.getRequestStats().getAddRequestStats()); request.release(); request.recycle(); + recycle(); return; } @@ -74,22 +74,25 @@ protected void processPacket() { ByteBuf addData = request.getData(); try { if (request.isRecoveryAdd()) { - requestProcessor.getBookie().recoveryAddEntry(addData, this, channel, request.getMasterKey()); + requestProcessor.getBookie().recoveryAddEntry(addData, this, requestHandler, request.getMasterKey()); } else { - requestProcessor.getBookie().addEntry(addData, false, this, channel, request.getMasterKey()); + requestProcessor.getBookie().addEntry(addData, false, this, + requestHandler, request.getMasterKey()); } } catch (OperationRejectedException e) { - // Avoid to log each occurence of this exception as this can happen when the ledger storage is + requestProcessor.getRequestStats().getAddEntryRejectedCounter().inc(); + // Avoid to log each occurrence of this exception as this can happen when the ledger storage is // unable to keep up with the write rate. if (LOG.isDebugEnabled()) { LOG.debug("Operation rejected while writing {}", request, e); } - rc = BookieProtocol.EIO; + rc = BookieProtocol.ETOOMANYREQUESTS; } catch (IOException e) { LOG.error("Error writing {}", request, e); rc = BookieProtocol.EIO; } catch (BookieException.LedgerFencedException lfe) { - LOG.error("Attempt to write to fenced ledger", lfe); + LOG.warn("Write attempt on fenced ledger {} by client {}", request.getLedgerId(), + requestHandler.ctx().channel().remoteAddress()); rc = BookieProtocol.EFENCED; } catch (BookieException e) { LOG.error("Unauthorized access to ledger {}", request.getLedgerId(), e); @@ -99,33 +102,33 @@ protected void processPacket() { request.ledgerId, request.entryId, t.getMessage(), t); // some bad request which cause unexpected exception rc = BookieProtocol.EBADREQ; - } finally { - addData.release(); } if (rc != BookieProtocol.EOK) { - requestProcessor.getAddEntryStats().registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), - TimeUnit.NANOSECONDS); - sendResponse(rc, + requestProcessor.getRequestStats().getAddEntryStats() + .registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); + sendWriteReqResponse(rc, ResponseBuilder.buildErrorResponse(rc, request), - requestProcessor.getAddRequestStats()); + requestProcessor.getRequestStats().getAddRequestStats()); request.recycle(); + recycle(); } } @Override public void writeComplete(int rc, long ledgerId, long entryId, - BookieSocketAddress addr, Object ctx) { + BookieId addr, Object ctx) { if (BookieProtocol.EOK == rc) { - requestProcessor.getAddEntryStats().registerSuccessfulEvent(MathUtils.elapsedNanos(startTimeNanos), - TimeUnit.NANOSECONDS); + requestProcessor.getRequestStats().getAddEntryStats() + .registerSuccessfulEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); } else { - requestProcessor.getAddEntryStats().registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), - TimeUnit.NANOSECONDS); + requestProcessor.getRequestStats().getAddEntryStats() + .registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); } - sendResponse(rc, - ResponseBuilder.buildAddResponse(request), - requestProcessor.getAddRequestStats()); + + requestHandler.prepareSendResponseV2(rc, request); + requestProcessor.onAddRequestFinish(); + request.recycle(); recycle(); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/WriteEntryProcessorV3.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/WriteEntryProcessorV3.java index 7747e5c0e1a..3aaa9219f9a 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/WriteEntryProcessorV3.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/WriteEntryProcessorV3.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -22,33 +22,30 @@ import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; -import io.netty.channel.Channel; - import java.io.IOException; import java.util.EnumSet; import java.util.concurrent.TimeUnit; - import org.apache.bookkeeper.bookie.BookieException; import org.apache.bookkeeper.bookie.BookieException.OperationRejectedException; import org.apache.bookkeeper.client.api.WriteFlag; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.common.util.MathUtils; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookkeeperProtocol.AddRequest; import org.apache.bookkeeper.proto.BookkeeperProtocol.AddResponse; import org.apache.bookkeeper.proto.BookkeeperProtocol.Request; import org.apache.bookkeeper.proto.BookkeeperProtocol.Response; import org.apache.bookkeeper.proto.BookkeeperProtocol.StatusCode; import org.apache.bookkeeper.stats.OpStatsLogger; -import org.apache.bookkeeper.util.MathUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; class WriteEntryProcessorV3 extends PacketProcessorBaseV3 { private static final Logger logger = LoggerFactory.getLogger(WriteEntryProcessorV3.class); - public WriteEntryProcessorV3(Request request, Channel channel, + public WriteEntryProcessorV3(Request request, BookieRequestHandler requestHandler, BookieRequestProcessor requestProcessor) { - super(request, channel, requestProcessor); - requestProcessor.onAddRequestStart(channel); + super(request, requestHandler, requestProcessor); + requestProcessor.onAddRequestStart(requestHandler.ctx().channel()); } // Returns null if there is no exception thrown @@ -78,13 +75,13 @@ private AddResponse getAddResponse() { BookkeeperInternalCallbacks.WriteCallback wcb = new BookkeeperInternalCallbacks.WriteCallback() { @Override public void writeComplete(int rc, long ledgerId, long entryId, - BookieSocketAddress addr, Object ctx) { + BookieId addr, Object ctx) { if (BookieProtocol.EOK == rc) { - requestProcessor.getAddEntryStats().registerSuccessfulEvent(MathUtils.elapsedNanos(startTimeNanos), - TimeUnit.NANOSECONDS); + requestProcessor.getRequestStats().getAddEntryStats() + .registerSuccessfulEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); } else { - requestProcessor.getAddEntryStats().registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), - TimeUnit.NANOSECONDS); + requestProcessor.getRequestStats().getAddEntryStats() + .registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); } StatusCode status; @@ -105,7 +102,7 @@ public void writeComplete(int rc, long ledgerId, long entryId, .setStatus(addResponse.getStatus()) .setAddResponse(addResponse); Response resp = response.build(); - sendResponse(status, resp, requestProcessor.getAddRequestStats()); + sendResponse(status, resp, requestProcessor.getRequestStats().getAddRequestStats()); } }; final EnumSet writeFlags; @@ -120,18 +117,21 @@ public void writeComplete(int rc, long ledgerId, long entryId, ByteBuf entryToAdd = Unpooled.wrappedBuffer(addRequest.getBody().asReadOnlyByteBuffer()); try { if (RequestUtils.hasFlag(addRequest, AddRequest.Flag.RECOVERY_ADD)) { - requestProcessor.getBookie().recoveryAddEntry(entryToAdd, wcb, channel, masterKey); + requestProcessor.getBookie().recoveryAddEntry(entryToAdd, wcb, + requestHandler.ctx().channel(), masterKey); } else { - requestProcessor.getBookie().addEntry(entryToAdd, ackBeforeSync, wcb, channel, masterKey); + requestProcessor.getBookie().addEntry(entryToAdd, ackBeforeSync, wcb, + requestHandler.ctx().channel(), masterKey); } status = StatusCode.EOK; } catch (OperationRejectedException e) { - // Avoid to log each occurence of this exception as this can happen when the ledger storage is + requestProcessor.getRequestStats().getAddEntryRejectedCounter().inc(); + // Avoid to log each occurrence of this exception as this can happen when the ledger storage is // unable to keep up with the write rate. if (logger.isDebugEnabled()) { logger.debug("Operation rejected while writing {}", request, e); } - status = StatusCode.EIO; + status = StatusCode.ETOOMANYREQUESTS; } catch (IOException e) { logger.error("Error writing entry:{} to ledger:{}", entryId, ledgerId, e); @@ -161,7 +161,9 @@ public void writeComplete(int rc, long ledgerId, long entryId, } @Override - public void safeRun() { + public void run() { + requestProcessor.getRequestStats().getWriteThreadQueuedLatency() + .registerSuccessfulEvent(MathUtils.elapsedNanos(enqueueNanos), TimeUnit.NANOSECONDS); AddResponse addResponse = getAddResponse(); if (null != addResponse) { // This means there was an error and we should send this back. @@ -171,7 +173,7 @@ public void safeRun() { .setAddResponse(addResponse); Response resp = response.build(); sendResponse(addResponse.getStatus(), resp, - requestProcessor.getAddRequestStats()); + requestProcessor.getRequestStats().getAddRequestStats()); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/WriteLacProcessorV3.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/WriteLacProcessorV3.java index 2f018ff9610..5590f5175ce 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/WriteLacProcessorV3.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/WriteLacProcessorV3.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,20 +21,17 @@ package org.apache.bookkeeper.proto; import io.netty.buffer.Unpooled; -import io.netty.channel.Channel; - import java.io.IOException; import java.nio.ByteBuffer; import java.util.concurrent.TimeUnit; - import org.apache.bookkeeper.bookie.BookieException; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.common.util.MathUtils; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookkeeperProtocol.Request; import org.apache.bookkeeper.proto.BookkeeperProtocol.Response; import org.apache.bookkeeper.proto.BookkeeperProtocol.StatusCode; import org.apache.bookkeeper.proto.BookkeeperProtocol.WriteLacRequest; import org.apache.bookkeeper.proto.BookkeeperProtocol.WriteLacResponse; -import org.apache.bookkeeper.util.MathUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -42,9 +39,9 @@ class WriteLacProcessorV3 extends PacketProcessorBaseV3 implements Runnable { private static final Logger logger = LoggerFactory.getLogger(WriteLacProcessorV3.class); - public WriteLacProcessorV3(Request request, Channel channel, + public WriteLacProcessorV3(Request request, BookieRequestHandler requestHandler, BookieRequestProcessor requestProcessor) { - super(request, channel, requestProcessor); + super(request, requestHandler, requestProcessor); } // Returns null if there is no exception thrown @@ -69,13 +66,13 @@ private WriteLacResponse getWriteLacResponse() { BookkeeperInternalCallbacks.WriteCallback writeCallback = new BookkeeperInternalCallbacks.WriteCallback() { @Override - public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddress addr, Object ctx) { + public void writeComplete(int rc, long ledgerId, long entryId, BookieId addr, Object ctx) { if (BookieProtocol.EOK == rc) { - requestProcessor.writeLacStats.registerSuccessfulEvent(MathUtils.elapsedNanos(startTimeNanos), - TimeUnit.NANOSECONDS); + requestProcessor.getRequestStats().getWriteLacStats() + .registerSuccessfulEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); } else { - requestProcessor.writeLacStats.registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), - TimeUnit.NANOSECONDS); + requestProcessor.getRequestStats().getWriteLacStats() + .registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); } StatusCode status; @@ -96,7 +93,7 @@ public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddre .setStatus(writeLacResponse.getStatus()) .setWriteLacResponse(writeLacResponse); Response resp = response.build(); - sendResponse(status, resp, requestProcessor.writeLacRequestStats); + sendResponse(status, resp, requestProcessor.getRequestStats().getWriteLacRequestStats()); } }; @@ -105,12 +102,18 @@ public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddre byte[] masterKey = writeLacRequest.getMasterKey().toByteArray(); try { - requestProcessor.bookie.setExplicitLac(Unpooled.wrappedBuffer(lacToAdd), writeCallback, channel, masterKey); + requestProcessor.bookie.setExplicitLac(Unpooled.wrappedBuffer(lacToAdd), + writeCallback, requestHandler, masterKey); status = StatusCode.EOK; } catch (IOException e) { logger.error("Error saving lac {} for ledger:{}", lac, ledgerId, e); status = StatusCode.EIO; + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + logger.error("Interrupted while saving lac {} for ledger:{}", + lac, ledgerId, e); + status = StatusCode.EIO; } catch (BookieException e) { logger.error("Unauthorized access to ledger:{} while adding lac:{}", ledgerId, lac, e); @@ -125,8 +128,8 @@ public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddre // If everything is okay, we return null so that the calling function // dosn't return a response back to the caller. if (!status.equals(StatusCode.EOK)) { - requestProcessor.writeLacStats.registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), - TimeUnit.NANOSECONDS); + requestProcessor.getRequestStats().getWriteLacStats() + .registerFailedEvent(MathUtils.elapsedNanos(startTimeNanos), TimeUnit.NANOSECONDS); writeLacResponse.setStatus(status); return writeLacResponse.build(); } @@ -134,7 +137,7 @@ public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddre } @Override - public void safeRun() { + public void run() { WriteLacResponse writeLacResponse = getWriteLacResponse(); if (null != writeLacResponse) { Response.Builder response = Response.newBuilder() @@ -142,7 +145,10 @@ public void safeRun() { .setStatus(writeLacResponse.getStatus()) .setWriteLacResponse(writeLacResponse); Response resp = response.build(); - sendResponse(writeLacResponse.getStatus(), resp, requestProcessor.writeLacRequestStats); + sendResponse( + writeLacResponse.getStatus(), + resp, + requestProcessor.getRequestStats().getWriteLacRequestStats()); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/CRC32CDigestManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/CRC32CDigestManager.java index d6d1949b47f..5343357198a 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/CRC32CDigestManager.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/CRC32CDigestManager.java @@ -19,30 +19,15 @@ */ import com.scurrilous.circe.checksum.Crc32cIntChecksum; -import com.scurrilous.circe.crc.Sse42Crc32C; - import io.netty.buffer.ByteBuf; -import io.netty.util.concurrent.FastThreadLocal; - +import io.netty.buffer.ByteBufAllocator; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.mutable.MutableInt; - @Slf4j class CRC32CDigestManager extends DigestManager { - private static final FastThreadLocal currentCrc = new FastThreadLocal() { - @Override - protected MutableInt initialValue() throws Exception { - return new MutableInt(0); - } - }; - - public CRC32CDigestManager(long ledgerId, boolean useV2Protocol) { - super(ledgerId, useV2Protocol); - if (!Sse42Crc32C.isSupported()) { - log.error("Sse42Crc32C is not supported, will use less slower CRC32C implementation."); - } + public CRC32CDigestManager(long ledgerId, boolean useV2Protocol, ByteBufAllocator allocator) { + super(ledgerId, useV2Protocol, allocator); } @Override @@ -51,16 +36,27 @@ int getMacCodeLength() { } @Override - void populateValueAndReset(ByteBuf buf) { - MutableInt current = currentCrc.get(); - buf.writeInt(current.intValue()); - current.setValue(0); + boolean isInt32Digest() { + return true; + } + + @Override + void populateValueAndReset(int digest, ByteBuf buf) { + buf.writeInt(digest); + } + + @Override + int internalUpdate(int digest, ByteBuf data, int offset, int len) { + return Crc32cIntChecksum.resumeChecksum(digest, data, offset, len); + } + + @Override + int internalUpdate(int digest, byte[] buffer, int offset, int len) { + return Crc32cIntChecksum.resumeChecksum(digest, buffer, offset, len); } @Override - void update(ByteBuf data) { - MutableInt current = currentCrc.get(); - final int lastCrc = current.intValue(); - current.setValue(Crc32cIntChecksum.resumeChecksum(lastCrc, data)); + boolean acceptsMemoryAddressBuffer() { + return Crc32cIntChecksum.acceptsMemoryAddressBuffer(); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/CRC32DigestManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/CRC32DigestManager.java index b71ab596262..0d18312cfc8 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/CRC32DigestManager.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/CRC32DigestManager.java @@ -19,6 +19,7 @@ */ import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; import io.netty.util.concurrent.FastThreadLocal; /** @@ -32,7 +33,8 @@ class CRC32DigestManager extends DigestManager { interface CRC32Digest { long getValueAndReset(); - void update(ByteBuf buf); + void update(ByteBuf buf, int offset, int len); + void update(byte[] buffer, int offset, int len); } private static final FastThreadLocal crc = new FastThreadLocal() { @@ -46,8 +48,8 @@ protected CRC32Digest initialValue() { } }; - public CRC32DigestManager(long ledgerId, boolean useV2Protocol) { - super(ledgerId, useV2Protocol); + public CRC32DigestManager(long ledgerId, boolean useV2Protocol, ByteBufAllocator allocator) { + super(ledgerId, useV2Protocol, allocator); } @Override @@ -56,12 +58,30 @@ int getMacCodeLength() { } @Override - void populateValueAndReset(ByteBuf buf) { + void populateValueAndReset(int digest, ByteBuf buf) { buf.writeLong(crc.get().getValueAndReset()); } @Override - void update(ByteBuf data) { - crc.get().update(data); + int internalUpdate(int digest, ByteBuf data, int offset, int len) { + crc.get().update(data, offset, len); + return 0; + } + + @Override + int internalUpdate(int digest, byte[] buffer, int offset, int len) { + crc.get().update(buffer, offset, len); + return 0; + } + + @Override + boolean isInt32Digest() { + // This is stored as 8 bytes + return false; + } + + @Override + boolean acceptsMemoryAddressBuffer() { + return DirectMemoryCRC32Digest.isSupported(); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/DigestManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/DigestManager.java index 4c174a8df16..1e78e4075eb 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/DigestManager.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/DigestManager.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,16 +18,21 @@ package org.apache.bookkeeper.proto.checksum; import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.ByteBufUtil; import io.netty.buffer.PooledByteBufAllocator; import io.netty.buffer.Unpooled; -import io.netty.util.ReferenceCountUtil; - +import io.netty.util.ReferenceCounted; +import io.netty.util.concurrent.FastThreadLocal; import java.security.GeneralSecurityException; - +import java.security.NoSuchAlgorithmException; import org.apache.bookkeeper.client.BKException.BKDigestMatchException; import org.apache.bookkeeper.client.LedgerHandle; +import org.apache.bookkeeper.proto.BookieProtoEncoding; +import org.apache.bookkeeper.proto.BookieProtocol; import org.apache.bookkeeper.proto.DataFormats.LedgerMetadataFormat.DigestType; import org.apache.bookkeeper.util.ByteBufList; +import org.apache.bookkeeper.util.ByteBufVisitor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -46,46 +51,61 @@ public abstract class DigestManager { final long ledgerId; final boolean useV2Protocol; + private final ByteBufAllocator allocator; + private final DigestUpdaterByteBufVisitorCallback byteBufVisitorCallback; abstract int getMacCodeLength(); - void update(byte[] data) { - update(Unpooled.wrappedBuffer(data, 0, data.length)); + abstract int internalUpdate(int digest, ByteBuf buffer, int offset, int len); + + abstract int internalUpdate(int digest, byte[] buffer, int offset, int len); + + final int update(int digest, ByteBuf buffer, int offset, int len) { + if (buffer.hasMemoryAddress() && acceptsMemoryAddressBuffer()) { + return internalUpdate(digest, buffer, offset, len); + } else if (buffer.hasArray()) { + return internalUpdate(digest, buffer.array(), buffer.arrayOffset() + offset, len); + } else { + UpdateContext updateContext = new UpdateContext(digest); + ByteBufVisitor.visitBuffers(buffer, offset, len, byteBufVisitorCallback, updateContext); + return updateContext.digest; + } } - abstract void update(ByteBuf buffer); + abstract void populateValueAndReset(int digest, ByteBuf buffer); - abstract void populateValueAndReset(ByteBuf buffer); + abstract boolean isInt32Digest(); final int macCodeLength; - public DigestManager(long ledgerId, boolean useV2Protocol) { + public DigestManager(long ledgerId, boolean useV2Protocol, ByteBufAllocator allocator) { this.ledgerId = ledgerId; this.useV2Protocol = useV2Protocol; - macCodeLength = getMacCodeLength(); - } - - public static DigestManager instantiate(long ledgerId, byte[] passwd, DigestType digestType) - throws GeneralSecurityException { - return instantiate(ledgerId, passwd, digestType, false); + this.macCodeLength = getMacCodeLength(); + this.allocator = allocator; + this.byteBufVisitorCallback = new DigestUpdaterByteBufVisitorCallback(); } public static DigestManager instantiate(long ledgerId, byte[] passwd, DigestType digestType, - boolean useV2Protocol) throws GeneralSecurityException { + ByteBufAllocator allocator, boolean useV2Protocol) throws GeneralSecurityException { switch(digestType) { case HMAC: - return new MacDigestManager(ledgerId, passwd, useV2Protocol); + return new MacDigestManager(ledgerId, passwd, useV2Protocol, allocator); case CRC32: - return new CRC32DigestManager(ledgerId, useV2Protocol); + return new CRC32DigestManager(ledgerId, useV2Protocol, allocator); case CRC32C: - return new CRC32CDigestManager(ledgerId, useV2Protocol); + return new CRC32CDigestManager(ledgerId, useV2Protocol, allocator); case DUMMY: - return new DummyDigestManager(ledgerId, useV2Protocol); + return new DummyDigestManager(ledgerId, useV2Protocol, allocator); default: throw new GeneralSecurityException("Unknown checksum type: " + digestType); } } + public static byte[] generateMasterKey(byte[] password) throws NoSuchAlgorithmException { + return password.length > 0 ? MacDigestManager.genDigest("ledger", password) : MacDigestManager.EMPTY_LEDGER_KEY; + } + /** * Computes the digest for an entry and put bytes together for sending. * @@ -95,45 +115,72 @@ public static DigestManager instantiate(long ledgerId, byte[] passwd, DigestType * @param data * @return */ - public ByteBufList computeDigestAndPackageForSending(long entryId, long lastAddConfirmed, long length, - ByteBuf data) { + public ReferenceCounted computeDigestAndPackageForSending(long entryId, long lastAddConfirmed, long length, + ByteBuf data, byte[] masterKey, int flags) { if (this.useV2Protocol) { - /* - * For V2 protocol, use pooled direct ByteBuf's to avoid object allocation in DigestManager. - */ - ByteBuf headersBuffer = PooledByteBufAllocator.DEFAULT.buffer(METADATA_LENGTH + macCodeLength); - headersBuffer.writeLong(ledgerId); - headersBuffer.writeLong(entryId); - headersBuffer.writeLong(lastAddConfirmed); - headersBuffer.writeLong(length); - - update(headersBuffer); - update(data); - populateValueAndReset(headersBuffer); - - return ByteBufList.get(headersBuffer, data); + return computeDigestAndPackageForSendingV2(entryId, lastAddConfirmed, length, data, masterKey, flags); } else { - /* - * For V3 protocol, use unpooled heap ByteBuf's (backed by accessible array): The one object - * allocation here saves us later allocations when converting to protobuf ByteString. - */ - ByteBuf sendBuffer = Unpooled.buffer(METADATA_LENGTH + macCodeLength + data.readableBytes()); - sendBuffer.writeLong(ledgerId); - sendBuffer.writeLong(entryId); - sendBuffer.writeLong(lastAddConfirmed); - sendBuffer.writeLong(length); - - update(sendBuffer); - update(data); - populateValueAndReset(sendBuffer); - - sendBuffer.writeBytes(data, data.readerIndex(), data.readableBytes()); - ReferenceCountUtil.release(data); - - return ByteBufList.get(sendBuffer); + return computeDigestAndPackageForSendingV3(entryId, lastAddConfirmed, length, data); } } + private ReferenceCounted computeDigestAndPackageForSendingV2(long entryId, long lastAddConfirmed, long length, + ByteBuf data, byte[] masterKey, int flags) { + boolean isSmallEntry = data.readableBytes() < BookieProtoEncoding.SMALL_ENTRY_SIZE_THRESHOLD; + + int headersSize = 4 // Request header + + BookieProtocol.MASTER_KEY_LENGTH // for the master key + + METADATA_LENGTH // + + macCodeLength; + int payloadSize = data.readableBytes(); + int bufferSize = 4 + headersSize + (isSmallEntry ? payloadSize : 0); + + ByteBuf buf = allocator.buffer(bufferSize, bufferSize); + buf.writeInt(headersSize + payloadSize); + buf.writeInt( + BookieProtocol.PacketHeader.toInt( + BookieProtocol.CURRENT_PROTOCOL_VERSION, BookieProtocol.ADDENTRY, (short) flags)); + buf.writeBytes(masterKey, 0, BookieProtocol.MASTER_KEY_LENGTH); + + // The checksum is computed on the next part of the buffer only + buf.readerIndex(buf.writerIndex()); + buf.writeLong(ledgerId); + buf.writeLong(entryId); + buf.writeLong(lastAddConfirmed); + buf.writeLong(length); + + // Compute checksum over the headers + int digest = update(0, buf, buf.readerIndex(), buf.readableBytes()); + digest = update(digest, data, data.readerIndex(), data.readableBytes()); + + populateValueAndReset(digest, buf); + + // Reset the reader index to the beginning + buf.readerIndex(0); + + if (isSmallEntry) { + buf.writeBytes(data, data.readerIndex(), data.readableBytes()); + data.release(); + return buf; + } else { + return ByteBufList.get(buf, data); + } + } + + private ByteBufList computeDigestAndPackageForSendingV3(long entryId, long lastAddConfirmed, long length, + ByteBuf data) { + ByteBuf headersBuffer = Unpooled.buffer(METADATA_LENGTH + macCodeLength); + headersBuffer.writeLong(ledgerId); + headersBuffer.writeLong(entryId); + headersBuffer.writeLong(lastAddConfirmed); + headersBuffer.writeLong(length); + + int digest = update(0, headersBuffer, 0, METADATA_LENGTH); + digest = update(digest, data, data.readerIndex(), data.readableBytes()); + populateValueAndReset(digest, headersBuffer); + return ByteBufList.get(headersBuffer, data); + } + /** * Computes the digest for writeLac for sending. * @@ -144,15 +191,15 @@ public ByteBufList computeDigestAndPackageForSending(long entryId, long lastAddC public ByteBufList computeDigestAndPackageForSendingLac(long lac) { ByteBuf headersBuffer; if (this.useV2Protocol) { - headersBuffer = PooledByteBufAllocator.DEFAULT.buffer(LAC_METADATA_LENGTH + macCodeLength); + headersBuffer = allocator.buffer(LAC_METADATA_LENGTH + macCodeLength); } else { headersBuffer = Unpooled.buffer(LAC_METADATA_LENGTH + macCodeLength); } headersBuffer.writeLong(ledgerId); headersBuffer.writeLong(lac); - update(headersBuffer); - populateValueAndReset(headersBuffer); + int digest = update(0, headersBuffer, 0, LAC_METADATA_LENGTH); + populateValueAndReset(digest, headersBuffer); return ByteBufList.get(headersBuffer); } @@ -165,6 +212,18 @@ private void verifyDigest(long entryId, ByteBuf dataReceived) throws BKDigestMat verifyDigest(entryId, dataReceived, false); } + private static final FastThreadLocal DIGEST_BUFFER = new FastThreadLocal() { + @Override + protected ByteBuf initialValue() throws Exception { + return PooledByteBufAllocator.DEFAULT.directBuffer(1024); + } + + @Override + protected void onRemoval(ByteBuf value) throws Exception { + value.release(); + } + }; + private void verifyDigest(long entryId, ByteBuf dataReceived, boolean skipEntryIdCheck) throws BKDigestMatchException { @@ -175,21 +234,26 @@ private void verifyDigest(long entryId, ByteBuf dataReceived, boolean skipEntryI this.getClass().getName(), dataReceived.readableBytes()); throw new BKDigestMatchException(); } - update(dataReceived.slice(0, METADATA_LENGTH)); + int digest = update(0, dataReceived, 0, METADATA_LENGTH); int offset = METADATA_LENGTH + macCodeLength; - update(dataReceived.slice(offset, dataReceived.readableBytes() - offset)); + digest = update(digest, dataReceived, offset, dataReceived.readableBytes() - offset); - ByteBuf digest = PooledByteBufAllocator.DEFAULT.buffer(macCodeLength); - populateValueAndReset(digest); + if (isInt32Digest()) { + int receivedDigest = dataReceived.getInt(METADATA_LENGTH); + if (receivedDigest != digest) { + logger.error("Digest mismatch for ledger-id: " + ledgerId + ", entry-id: " + entryId); + throw new BKDigestMatchException(); + } + } else { + ByteBuf digestBuf = DIGEST_BUFFER.get(); + digestBuf.clear(); + populateValueAndReset(digest, digestBuf); - try { - if (digest.compareTo(dataReceived.slice(METADATA_LENGTH, macCodeLength)) != 0) { + if (!ByteBufUtil.equals(digestBuf, 0, dataReceived, METADATA_LENGTH, macCodeLength)) { logger.error("Mac mismatch for ledger-id: " + ledgerId + ", entry-id: " + entryId); throw new BKDigestMatchException(); } - } finally { - digest.release(); } long actualLedgerId = dataReceived.readLong(); @@ -218,18 +282,23 @@ public long verifyDigestAndReturnLac(ByteBuf dataReceived) throws BKDigestMatchE throw new BKDigestMatchException(); } - update(dataReceived.slice(0, LAC_METADATA_LENGTH)); + int digest = update(0, dataReceived, 0, LAC_METADATA_LENGTH); - ByteBuf digest = PooledByteBufAllocator.DEFAULT.buffer(macCodeLength); - try { - populateValueAndReset(digest); + if (isInt32Digest()) { + int receivedDigest = dataReceived.getInt(LAC_METADATA_LENGTH); + if (receivedDigest != digest) { + logger.error("Digest mismatch for ledger-id LAC: " + ledgerId); + throw new BKDigestMatchException(); + } + } else { + ByteBuf digestBuf = DIGEST_BUFFER.get(); + digestBuf.clear(); + populateValueAndReset(digest, digestBuf); - if (digest.compareTo(dataReceived.slice(LAC_METADATA_LENGTH, macCodeLength)) != 0) { + if (!ByteBufUtil.equals(digestBuf, 0, dataReceived, LAC_METADATA_LENGTH, macCodeLength)) { logger.error("Mac mismatch for ledger-id LAC: " + ledgerId); throw new BKDigestMatchException(); } - } finally { - digest.release(); } long actualLedgerId = dataReceived.readLong(); @@ -288,4 +357,34 @@ public RecoveryData verifyDigestAndReturnLastConfirmed(ByteBuf dataReceived) thr long length = dataReceived.readLong(); return new RecoveryData(lastAddConfirmed, length); } + + private static class UpdateContext { + int digest; + + UpdateContext(int digest) { + this.digest = digest; + } + } + + private class DigestUpdaterByteBufVisitorCallback implements ByteBufVisitor.ByteBufVisitorCallback { + + @Override + public void visitBuffer(UpdateContext context, ByteBuf visitBuffer, int visitIndex, int visitLength) { + // recursively visit the sub buffer and update the digest + context.digest = internalUpdate(context.digest, visitBuffer, visitIndex, visitLength); + } + + @Override + public void visitArray(UpdateContext context, byte[] visitArray, int visitIndex, int visitLength) { + // update the digest with the array + context.digest = internalUpdate(context.digest, visitArray, visitIndex, visitLength); + } + + @Override + public boolean acceptsMemoryAddress(UpdateContext context) { + return DigestManager.this.acceptsMemoryAddressBuffer(); + } + } + + abstract boolean acceptsMemoryAddressBuffer(); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/DirectMemoryCRC32Digest.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/DirectMemoryCRC32Digest.java index a895153f5fa..07a2bdf464f 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/DirectMemoryCRC32Digest.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/DirectMemoryCRC32Digest.java @@ -18,11 +18,9 @@ package org.apache.bookkeeper.proto.checksum; import io.netty.buffer.ByteBuf; - import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.util.zip.CRC32; - import org.apache.bookkeeper.proto.checksum.CRC32DigestManager.CRC32Digest; /** @@ -45,28 +43,39 @@ public long getValueAndReset() { } @Override - public void update(ByteBuf buf) { - int index = buf.readerIndex(); - int length = buf.readableBytes(); - + public void update(ByteBuf buf, int index, int length) { try { if (buf.hasMemoryAddress()) { // Calculate CRC directly from the direct memory pointer crcValue = (int) updateByteBuffer.invoke(null, crcValue, buf.memoryAddress(), index, length); } else if (buf.hasArray()) { // Use the internal method to update from array based - crcValue = (int) updateBytes.invoke(null, crcValue, buf.array(), buf.arrayOffset() + index, length); + crcValue = updateArray(crcValue, buf.array(), buf.arrayOffset() + index, length); } else { // Fallback to data copy if buffer is not contiguous byte[] b = new byte[length]; buf.getBytes(index, b, 0, length); - crcValue = (int) updateBytes.invoke(null, crcValue, b, 0, b.length); + crcValue = updateArray(crcValue, b, 0, length); } } catch (IllegalAccessException | InvocationTargetException e) { throw new RuntimeException(e); } } + private static int updateArray(int crcValue, byte[] buf, int offset, int length) + throws IllegalAccessException, InvocationTargetException { + return (int) updateBytes.invoke(null, crcValue, buf, offset, length); + } + + @Override + public void update(byte[] buffer, int offset, int len) { + try { + crcValue = updateArray(crcValue, buffer, offset, len); + } catch (IllegalAccessException | InvocationTargetException e) { + throw new RuntimeException(e); + } + } + private static final Method updateByteBuffer; private static final Method updateBytes; @@ -83,7 +92,7 @@ public void update(ByteBuf buf) { updateBytesMethod = CRC32.class.getDeclaredMethod("updateBytes", int.class, byte[].class, int.class, int.class); updateBytesMethod.setAccessible(true); - } catch (NoSuchMethodException | SecurityException e) { + } catch (Throwable e) { updateByteBufferMethod = null; updateBytesMethod = null; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/DummyDigestManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/DummyDigestManager.java index 1b771f0785b..e2fff9bd7ca 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/DummyDigestManager.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/DummyDigestManager.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,14 +21,15 @@ package org.apache.bookkeeper.proto.checksum; import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; /** * This class provides a noop digest implementation. */ public class DummyDigestManager extends DigestManager { - public DummyDigestManager(long ledgerId, boolean useV2Protocol) { - super(ledgerId, useV2Protocol); + public DummyDigestManager(long ledgerId, boolean useV2Protocol, ByteBufAllocator allocator) { + super(ledgerId, useV2Protocol, allocator); } @Override @@ -37,8 +38,25 @@ int getMacCodeLength() { } @Override - void update(ByteBuf buffer) {} + int internalUpdate(int digest, ByteBuf buffer, int offset, int len) { + return 0; + } + + @Override + int internalUpdate(int digest, byte[] buffer, int offset, int len) { + return 0; + } @Override - void populateValueAndReset(ByteBuf buffer) {} + void populateValueAndReset(int digest, ByteBuf buffer) {} + + @Override + boolean isInt32Digest() { + return false; + } + + @Override + boolean acceptsMemoryAddressBuffer() { + return false; + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/MacDigestManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/MacDigestManager.java index 8d830a488d5..f9fda5a531d 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/MacDigestManager.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/MacDigestManager.java @@ -18,10 +18,10 @@ package org.apache.bookkeeper.proto.checksum; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import io.netty.buffer.ByteBuf; - +import io.netty.buffer.ByteBufAllocator; import java.security.GeneralSecurityException; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; @@ -46,6 +46,15 @@ public class MacDigestManager extends DigestManager { final byte[] passwd; + static final byte[] EMPTY_LEDGER_KEY; + static { + try { + EMPTY_LEDGER_KEY = MacDigestManager.genDigest("ledger", new byte[0]); + } catch (NoSuchAlgorithmException e) { + throw new RuntimeException(e); + } + } + private final ThreadLocal mac = new ThreadLocal() { @Override protected Mac initialValue() { @@ -62,9 +71,9 @@ protected Mac initialValue() { } }; - public MacDigestManager(long ledgerId, byte[] passwd, boolean useV2Protocol) + public MacDigestManager(long ledgerId, byte[] passwd, boolean useV2Protocol, ByteBufAllocator allocator) throws GeneralSecurityException { - super(ledgerId, useV2Protocol); + super(ledgerId, useV2Protocol, allocator); this.passwd = Arrays.copyOf(passwd, passwd.length); } @@ -82,14 +91,29 @@ int getMacCodeLength() { @Override - void populateValueAndReset(ByteBuf buffer) { + void populateValueAndReset(int digest, ByteBuf buffer) { buffer.writeBytes(mac.get().doFinal()); } @Override - void update(ByteBuf data) { - mac.get().update(data.nioBuffer()); + int internalUpdate(int digest, ByteBuf data, int offset, int len) { + mac.get().update(data.slice(offset, len).nioBuffer()); + return 0; } + @Override + int internalUpdate(int digest, byte[] buffer, int offset, int len) { + mac.get().update(buffer, offset, len); + return 0; + } + @Override + boolean isInt32Digest() { + return false; + } + + @Override + boolean acceptsMemoryAddressBuffer() { + return false; + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/StandardCRC32Digest.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/StandardCRC32Digest.java index f103b14c4f5..7635e3e9f20 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/StandardCRC32Digest.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/checksum/StandardCRC32Digest.java @@ -18,9 +18,7 @@ package org.apache.bookkeeper.proto.checksum; import io.netty.buffer.ByteBuf; - import java.util.zip.CRC32; - import org.apache.bookkeeper.proto.checksum.CRC32DigestManager.CRC32Digest; /** @@ -38,7 +36,12 @@ public long getValueAndReset() { } @Override - public void update(ByteBuf buf) { - crc.update(buf.nioBuffer()); + public void update(ByteBuf buf, int offset, int len) { + crc.update(buf.slice(offset, len).nioBuffer()); + } + + @Override + public void update(byte[] buffer, int offset, int len) { + crc.update(buffer, offset, len); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java index 788aaa2764d..9c6be197550 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,7 +21,6 @@ package org.apache.bookkeeper.replication; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Stopwatch; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.google.common.util.concurrent.SettableFuture; @@ -29,44 +28,31 @@ import java.util.ArrayList; import java.util.Collection; import java.util.List; -import java.util.Map; import java.util.Set; -import java.util.concurrent.CompletableFuture; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ThreadFactory; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.BiConsumer; import java.util.stream.Collectors; import org.apache.bookkeeper.client.BKException; -import org.apache.bookkeeper.client.BKException.Code; import org.apache.bookkeeper.client.BookKeeper; import org.apache.bookkeeper.client.BookKeeperAdmin; -import org.apache.bookkeeper.client.LedgerChecker; -import org.apache.bookkeeper.client.LedgerFragment; -import org.apache.bookkeeper.client.LedgerHandle; -import org.apache.bookkeeper.common.concurrent.FutureUtils; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.meta.AbstractZkLedgerManagerFactory; import org.apache.bookkeeper.meta.LedgerManager; import org.apache.bookkeeper.meta.LedgerManagerFactory; import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; -import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.Processor; import org.apache.bookkeeper.replication.ReplicationException.BKAuditException; import org.apache.bookkeeper.replication.ReplicationException.CompatibilityException; import org.apache.bookkeeper.replication.ReplicationException.UnavailableException; -import org.apache.bookkeeper.stats.Counter; -import org.apache.bookkeeper.stats.OpStatsLogger; +import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.stats.StatsLogger; -import org.apache.bookkeeper.zookeeper.ZooKeeperClient; import org.apache.commons.collections4.CollectionUtils; -import org.apache.zookeeper.AsyncCallback; -import org.apache.zookeeper.KeeperException; -import org.apache.zookeeper.ZooKeeper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -82,81 +68,143 @@ public class Auditor implements AutoCloseable { private static final Logger LOG = LoggerFactory.getLogger(Auditor.class); private final ServerConfiguration conf; - private BookKeeper bkc; - private BookKeeperAdmin admin; + private final BookKeeper bkc; + private final boolean ownBkc; + private final BookKeeperAdmin admin; + private final boolean ownAdmin; private BookieLedgerIndexer bookieLedgerIndexer; private LedgerManager ledgerManager; private LedgerUnderreplicationManager ledgerUnderreplicationManager; private final ScheduledExecutorService executor; private List knownBookies = new ArrayList(); private final String bookieIdentifier; - private final StatsLogger statsLogger; - private final OpStatsLogger numUnderReplicatedLedger; - private final OpStatsLogger uRLPublishTimeForLostBookies; - private final OpStatsLogger bookieToLedgersMapCreationTime; - private final OpStatsLogger checkAllLedgersTime; - private final Counter numLedgersChecked; - private final OpStatsLogger numFragmentsPerLedger; - private final OpStatsLogger numBookiesPerLedger; - private final Counter numBookieAuditsDelayed; - private final Counter numDelayedBookieAuditsCancelled; - private volatile Future auditTask; - private Set bookiesToBeAudited = Sets.newHashSet(); + protected volatile Future auditTask; + private final Set bookiesToBeAudited = Sets.newHashSet(); private volatile int lostBookieRecoveryDelayBeforeChange; + protected AuditorBookieCheckTask auditorBookieCheckTask; + protected AuditorTask auditorCheckAllLedgersTask; + protected AuditorTask auditorPlacementPolicyCheckTask; + protected AuditorTask auditorReplicasCheckTask; + private final List allAuditorTasks = Lists.newArrayList(); + + private final AuditorStats auditorStats; + + static BookKeeper createBookKeeperClient(ServerConfiguration conf) throws InterruptedException, IOException { + return createBookKeeperClient(conf, NullStatsLogger.INSTANCE); + } + + static BookKeeper createBookKeeperClient(ServerConfiguration conf, StatsLogger statsLogger) + throws InterruptedException, IOException { + ClientConfiguration clientConfiguration = new ClientConfiguration(conf); + clientConfiguration.setClientRole(ClientConfiguration.CLIENT_ROLE_SYSTEM); + try { + return BookKeeper.forConfig(clientConfiguration).statsLogger(statsLogger).build(); + } catch (BKException e) { + throw new IOException("Failed to create bookkeeper client", e); + } + } + + static BookKeeper createBookKeeperClientThrowUnavailableException(ServerConfiguration conf) + throws UnavailableException { + try { + return createBookKeeperClient(conf); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new UnavailableException("Failed to create bookkeeper client", e); + } catch (IOException e) { + throw new UnavailableException("Failed to create bookkeeper client", e); + } + } + + public Auditor(final String bookieIdentifier, + ServerConfiguration conf, + StatsLogger statsLogger) + throws UnavailableException { + this( + bookieIdentifier, + conf, + createBookKeeperClientThrowUnavailableException(conf), + true, + statsLogger); + } + + public Auditor(final String bookieIdentifier, + ServerConfiguration conf, + BookKeeper bkc, + boolean ownBkc, + StatsLogger statsLogger) + throws UnavailableException { + this(bookieIdentifier, + conf, + bkc, + ownBkc, + new BookKeeperAdmin(bkc, statsLogger, new ClientConfiguration(conf)), + true, + statsLogger); + } - public Auditor(final String bookieIdentifier, ServerConfiguration conf, - ZooKeeper zkc, StatsLogger statsLogger) throws UnavailableException { + public Auditor(final String bookieIdentifier, + ServerConfiguration conf, + BookKeeper bkc, + boolean ownBkc, + BookKeeperAdmin admin, + boolean ownAdmin, + StatsLogger statsLogger) + throws UnavailableException { this.conf = conf; this.bookieIdentifier = bookieIdentifier; - this.statsLogger = statsLogger; - - numUnderReplicatedLedger = this.statsLogger.getOpStatsLogger(ReplicationStats.NUM_UNDER_REPLICATED_LEDGERS); - uRLPublishTimeForLostBookies = this.statsLogger - .getOpStatsLogger(ReplicationStats.URL_PUBLISH_TIME_FOR_LOST_BOOKIE); - bookieToLedgersMapCreationTime = this.statsLogger - .getOpStatsLogger(ReplicationStats.BOOKIE_TO_LEDGERS_MAP_CREATION_TIME); - checkAllLedgersTime = this.statsLogger.getOpStatsLogger(ReplicationStats.CHECK_ALL_LEDGERS_TIME); - numLedgersChecked = this.statsLogger.getCounter(ReplicationStats.NUM_LEDGERS_CHECKED); - numFragmentsPerLedger = statsLogger.getOpStatsLogger(ReplicationStats.NUM_FRAGMENTS_PER_LEDGER); - numBookiesPerLedger = statsLogger.getOpStatsLogger(ReplicationStats.NUM_BOOKIES_PER_LEDGER); - numBookieAuditsDelayed = this.statsLogger.getCounter(ReplicationStats.NUM_BOOKIE_AUDITS_DELAYED); - numDelayedBookieAuditsCancelled = this.statsLogger - .getCounter(ReplicationStats.NUM_DELAYED_BOOKIE_AUDITS_DELAYES_CANCELLED); - - initialize(conf, zkc); - + this.auditorStats = new AuditorStats(statsLogger); + + this.bkc = bkc; + this.ownBkc = ownBkc; + this.admin = admin; + this.ownAdmin = ownAdmin; + initialize(conf, bkc); + + AuditorTask.ShutdownTaskHandler shutdownTaskHandler = this::submitShutdownTask; + BiConsumer submitBookieCheckTask = (ignore, throwable) -> this.submitBookieCheckTask(); + BiConsumer hasAuditCheckTask = (flag, throwable) -> flag.set(auditTask != null); + this.auditorBookieCheckTask = new AuditorBookieCheckTask( + conf, auditorStats, admin, ledgerManager, + ledgerUnderreplicationManager, shutdownTaskHandler, + bookieLedgerIndexer, hasAuditCheckTask, submitBookieCheckTask); + allAuditorTasks.add(auditorBookieCheckTask); + this.auditorCheckAllLedgersTask = new AuditorCheckAllLedgersTask( + conf, auditorStats, admin, ledgerManager, + ledgerUnderreplicationManager, shutdownTaskHandler, hasAuditCheckTask); + allAuditorTasks.add(auditorCheckAllLedgersTask); + this.auditorPlacementPolicyCheckTask = new AuditorPlacementPolicyCheckTask( + conf, auditorStats, admin, ledgerManager, + ledgerUnderreplicationManager, shutdownTaskHandler, hasAuditCheckTask); + allAuditorTasks.add(auditorPlacementPolicyCheckTask); + this.auditorReplicasCheckTask = new AuditorReplicasCheckTask( + conf, auditorStats, admin, ledgerManager, + ledgerUnderreplicationManager, shutdownTaskHandler, hasAuditCheckTask); + allAuditorTasks.add(auditorReplicasCheckTask); executor = Executors.newSingleThreadScheduledExecutor(new ThreadFactory() { - @Override - public Thread newThread(Runnable r) { - Thread t = new Thread(r, "AuditorBookie-" + bookieIdentifier); - t.setDaemon(true); - return t; - } - }); + @Override + public Thread newThread(Runnable r) { + Thread t = new Thread(r, "AuditorBookie-" + bookieIdentifier); + t.setDaemon(true); + return t; + } + }); } - private void initialize(ServerConfiguration conf, ZooKeeper zkc) + private void initialize(ServerConfiguration conf, BookKeeper bkc) throws UnavailableException { try { - ClientConfiguration clientConfiguration = new ClientConfiguration(conf); - clientConfiguration.setClientRole(ClientConfiguration.CLIENT_ROLE_SYSTEM); - LOG.info("AuthProvider used by the Auditor is {}", - clientConfiguration.getClientAuthProviderFactoryClass()); - this.bkc = new BookKeeper(clientConfiguration, zkc); - - LedgerManagerFactory ledgerManagerFactory = AbstractZkLedgerManagerFactory - .newLedgerManagerFactory( - conf, - bkc.getMetadataClientDriver().getLayoutManager()); + LedgerManagerFactory ledgerManagerFactory = bkc.getLedgerManagerFactory(); ledgerManager = ledgerManagerFactory.newLedgerManager(); this.bookieLedgerIndexer = new BookieLedgerIndexer(ledgerManager); this.ledgerUnderreplicationManager = ledgerManagerFactory .newLedgerUnderreplicationManager(); - this.admin = new BookKeeperAdmin(bkc, statsLogger); + LOG.info("AuthProvider used by the Auditor is {}", + admin.getConf().getClientAuthProviderFactoryClass()); if (this.ledgerUnderreplicationManager .initializeLostBookieRecoveryDelay(conf.getLostBookieRecoveryDelay())) { - LOG.info("Initializing lostBookieRecoveryDelay zNode to the conif value: {}", + LOG.info("Initializing lostBookieRecoveryDelay zNode to the conf value: {}", conf.getLostBookieRecoveryDelay()); } else { LOG.info("Valid lostBookieRecoveryDelay zNode is available, so not creating " @@ -166,9 +214,6 @@ private void initialize(ServerConfiguration conf, ZooKeeper zkc) } catch (CompatibilityException ce) { throw new UnavailableException( "CompatibilityException while initializing Auditor", ce); - } catch (IOException | BKException | KeeperException ioe) { - throw new UnavailableException( - "Exception while initializing Auditor", ioe); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); throw new UnavailableException( @@ -178,16 +223,17 @@ private void initialize(ServerConfiguration conf, ZooKeeper zkc) private void submitShutdownTask() { synchronized (this) { + LOG.info("Executing submitShutdownTask"); if (executor.isShutdown()) { + LOG.info("executor is already shutdown"); return; } - executor.submit(new Runnable() { - public void run() { - synchronized (Auditor.this) { - executor.shutdown(); - } - } - }); + executor.submit(() -> { + synchronized (Auditor.this) { + LOG.info("Shutting down Auditor's Executor"); + executor.shutdown(); + } + }); } } @@ -198,139 +244,133 @@ synchronized Future submitAuditTask() { f.setException(new BKAuditException("Auditor shutting down")); return f; } - return executor.submit(new Runnable() { - @SuppressWarnings("unchecked") - public void run() { - try { - waitIfLedgerReplicationDisabled(); - int lostBookieRecoveryDelay = Auditor.this.ledgerUnderreplicationManager - .getLostBookieRecoveryDelay(); - List availableBookies = getAvailableBookies(); - - // casting to String, as knownBookies and availableBookies - // contains only String values - // find new bookies(if any) and update the known bookie list - Collection newBookies = CollectionUtils.subtract( - availableBookies, knownBookies); - knownBookies.addAll(newBookies); - if (!bookiesToBeAudited.isEmpty() && knownBookies.containsAll(bookiesToBeAudited)) { - // the bookie, which went down earlier and had an audit scheduled for, - // has come up. So let us stop tracking it and cancel the audit. Since - // we allow delaying of audit when there is only one failed bookie, - // bookiesToBeAudited should just have 1 element and hence containsAll - // check should be ok - if (auditTask != null && auditTask.cancel(false)) { - auditTask = null; - numDelayedBookieAuditsCancelled.inc(); - } - bookiesToBeAudited.clear(); - } - - // find lost bookies(if any) - bookiesToBeAudited.addAll(CollectionUtils.subtract(knownBookies, availableBookies)); - if (bookiesToBeAudited.size() == 0) { - return; - } - - knownBookies.removeAll(bookiesToBeAudited); - if (lostBookieRecoveryDelay == 0) { - startAudit(false); - bookiesToBeAudited.clear(); - return; - } - if (bookiesToBeAudited.size() > 1) { - // if more than one bookie is down, start the audit immediately; - LOG.info("Multiple bookie failure; not delaying bookie audit. " + return executor.submit(() -> { + try { + waitIfLedgerReplicationDisabled(); + int lostBookieRecoveryDelay = Auditor.this.ledgerUnderreplicationManager + .getLostBookieRecoveryDelay(); + List availableBookies = getAvailableBookies(); + + // casting to String, as knownBookies and availableBookies + // contains only String values + // find new bookies(if any) and update the known bookie list + Collection newBookies = CollectionUtils.subtract( + availableBookies, knownBookies); + knownBookies.addAll(newBookies); + if (!bookiesToBeAudited.isEmpty() && knownBookies.containsAll(bookiesToBeAudited)) { + // the bookie, which went down earlier and had an audit scheduled for, + // has come up. So let us stop tracking it and cancel the audit. Since + // we allow delaying of audit when there is only one failed bookie, + // bookiesToBeAudited should just have 1 element and hence containsAll + // check should be ok + if (auditTask != null && auditTask.cancel(false)) { + auditTask = null; + auditorStats.getNumDelayedBookieAuditsCancelled().inc(); + } + bookiesToBeAudited.clear(); + } + + // find lost bookies(if any) + bookiesToBeAudited.addAll(CollectionUtils.subtract(knownBookies, availableBookies)); + if (bookiesToBeAudited.size() == 0) { + return; + } + + knownBookies.removeAll(bookiesToBeAudited); + if (lostBookieRecoveryDelay == 0) { + auditorBookieCheckTask.startAudit(false); + bookiesToBeAudited.clear(); + return; + } + if (bookiesToBeAudited.size() > 1) { + // if more than one bookie is down, start the audit immediately; + LOG.info("Multiple bookie failure; not delaying bookie audit. " + "Bookies lost now: {}; All lost bookies: {}", - CollectionUtils.subtract(knownBookies, availableBookies), - bookiesToBeAudited); - if (auditTask != null && auditTask.cancel(false)) { - auditTask = null; - numDelayedBookieAuditsCancelled.inc(); - } - startAudit(false); - bookiesToBeAudited.clear(); - return; - } - if (auditTask == null) { - // if there is no scheduled audit, schedule one - auditTask = executor.schedule(new Runnable() { - public void run() { - startAudit(false); - auditTask = null; - bookiesToBeAudited.clear(); - } - }, lostBookieRecoveryDelay, TimeUnit.SECONDS); - numBookieAuditsDelayed.inc(); - LOG.info("Delaying bookie audit by {} secs for {}", lostBookieRecoveryDelay, - bookiesToBeAudited); - } - } catch (BKException bke) { - LOG.error("Exception getting bookie list", bke); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - LOG.error("Interrupted while watching available bookies ", ie); - } catch (UnavailableException ue) { - LOG.error("Exception while watching available bookies", ue); + CollectionUtils.subtract(knownBookies, availableBookies), + bookiesToBeAudited); + if (auditTask != null && auditTask.cancel(false)) { + auditTask = null; + auditorStats.getNumDelayedBookieAuditsCancelled().inc(); } + auditorBookieCheckTask.startAudit(false); + bookiesToBeAudited.clear(); + return; } - }); + if (auditTask == null) { + // if there is no scheduled audit, schedule one + auditTask = executor.schedule(() -> { + auditorBookieCheckTask.startAudit(false); + auditTask = null; + bookiesToBeAudited.clear(); + }, lostBookieRecoveryDelay, TimeUnit.SECONDS); + auditorStats.getNumBookieAuditsDelayed().inc(); + LOG.info("Delaying bookie audit by {} secs for {}", lostBookieRecoveryDelay, + bookiesToBeAudited); + } + } catch (BKException bke) { + LOG.error("Exception getting bookie list", bke); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + LOG.error("Interrupted while watching available bookies ", ie); + } catch (UnavailableException ue) { + LOG.error("Exception while watching available bookies", ue); + } + }); } synchronized Future submitLostBookieRecoveryDelayChangedEvent() { if (executor.isShutdown()) { - SettableFuture f = SettableFuture. create(); + SettableFuture f = SettableFuture.create(); f.setException(new BKAuditException("Auditor shutting down")); return f; } - return executor.submit(new Runnable() { + return executor.submit(() -> { int lostBookieRecoveryDelay = -1; - public void run() { - try { - waitIfLedgerReplicationDisabled(); - lostBookieRecoveryDelay = Auditor.this.ledgerUnderreplicationManager - .getLostBookieRecoveryDelay(); - // if there is pending auditTask, cancel the task. So that it can be rescheduled - // after new lostBookieRecoveryDelay period - if (auditTask != null) { - LOG.info("lostBookieRecoveryDelay period has been changed so canceling the pending AuditTask"); - auditTask.cancel(false); - numDelayedBookieAuditsCancelled.inc(); - } + try { + waitIfLedgerReplicationDisabled(); + lostBookieRecoveryDelay = Auditor.this.ledgerUnderreplicationManager + .getLostBookieRecoveryDelay(); + // if there is pending auditTask, cancel the task. So that it can be rescheduled + // after new lostBookieRecoveryDelay period + if (auditTask != null) { + LOG.info("lostBookieRecoveryDelay period has been changed so canceling the pending AuditTask"); + auditTask.cancel(false); + auditorStats.getNumDelayedBookieAuditsCancelled().inc(); + } - // if lostBookieRecoveryDelay is set to its previous value then consider it as - // signal to trigger the Audit immediately. - if ((lostBookieRecoveryDelay == 0) - || (lostBookieRecoveryDelay == lostBookieRecoveryDelayBeforeChange)) { - LOG.info( - "lostBookieRecoveryDelay has been set to 0 or reset to its previous value, " - + "so starting AuditTask. Current lostBookieRecoveryDelay: {}, " - + "previous lostBookieRecoveryDelay: {}", - lostBookieRecoveryDelay, lostBookieRecoveryDelayBeforeChange); - startAudit(false); + // if lostBookieRecoveryDelay is set to its previous value then consider it as + // signal to trigger the Audit immediately. + if ((lostBookieRecoveryDelay == 0) + || (lostBookieRecoveryDelay == lostBookieRecoveryDelayBeforeChange)) { + LOG.info( + "lostBookieRecoveryDelay has been set to 0 or reset to its previous value, " + + "so starting AuditTask. Current lostBookieRecoveryDelay: {}, " + + "previous lostBookieRecoveryDelay: {}", + lostBookieRecoveryDelay, lostBookieRecoveryDelayBeforeChange); + auditorBookieCheckTask.startAudit(false); + auditTask = null; + bookiesToBeAudited.clear(); + } else if (auditTask != null) { + LOG.info("lostBookieRecoveryDelay has been set to {}, so rescheduling AuditTask accordingly", + lostBookieRecoveryDelay); + auditTask = executor.schedule(() -> { + auditorBookieCheckTask.startAudit(false); auditTask = null; bookiesToBeAudited.clear(); - } else if (auditTask != null) { - LOG.info("lostBookieRecoveryDelay has been set to {}, so rescheduling AuditTask accordingly", - lostBookieRecoveryDelay); - auditTask = executor.schedule(new Runnable() { - public void run() { - startAudit(false); - auditTask = null; - bookiesToBeAudited.clear(); - } - }, lostBookieRecoveryDelay, TimeUnit.SECONDS); - numBookieAuditsDelayed.inc(); - } - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - LOG.error("Interrupted while for LedgersReplication to be enabled ", ie); - } catch (UnavailableException ue) { - LOG.error("Exception while reading from ZK", ue); - } finally { - if (lostBookieRecoveryDelay != -1) { - lostBookieRecoveryDelayBeforeChange = lostBookieRecoveryDelay; - } + }, lostBookieRecoveryDelay, TimeUnit.SECONDS); + auditorStats.getNumBookieAuditsDelayed().inc(); + } + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + LOG.error("Interrupted while for LedgersReplication to be enabled ", ie); + } catch (ReplicationException.NonRecoverableReplicationException nre) { + LOG.error("Non Recoverable Exception while reading from ZK", nre); + submitShutdownTask(); + } catch (UnavailableException ue) { + LOG.error("Exception while reading from ZK", ue); + } finally { + if (lostBookieRecoveryDelay != -1) { + lostBookieRecoveryDelayBeforeChange = lostBookieRecoveryDelay; } } }); @@ -345,67 +385,176 @@ public void start() { return; } - long interval = conf.getAuditorPeriodicCheckInterval(); - - if (interval > 0) { - LOG.info("Auditor periodic ledger checking enabled" - + " 'auditorPeriodicCheckInterval' {} seconds", interval); - executor.scheduleAtFixedRate(new Runnable() { - public void run() { - try { - if (!ledgerUnderreplicationManager.isLedgerReplicationEnabled()) { - LOG.info("Ledger replication disabled, skipping"); - return; - } - - Stopwatch stopwatch = Stopwatch.createStarted(); - checkAllLedgers(); - checkAllLedgersTime.registerSuccessfulEvent(stopwatch.stop() - .elapsed(TimeUnit.MILLISECONDS), - TimeUnit.MILLISECONDS); - } catch (KeeperException ke) { - LOG.error("Exception while running periodic check", ke); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - LOG.error("Interrupted while running periodic check", ie); - } catch (BKException bke) { - LOG.error("Exception running periodic check", bke); - } catch (IOException ioe) { - LOG.error("I/O exception running periodic check", ioe); - } catch (ReplicationException.UnavailableException ue) { - LOG.error("Underreplication manager unavailable running periodic check", ue); - } - } - }, interval, interval, TimeUnit.SECONDS); - } else { - LOG.info("Periodic checking disabled"); - } try { watchBookieChanges(); - knownBookies = getAvailableBookies(); + // Start with all available bookies + // to handle situations where the auditor + // is started after some bookies have already failed + knownBookies = admin.getAllBookies().stream() + .map(BookieId::toString) + .collect(Collectors.toList()); + this.ledgerUnderreplicationManager + .notifyLostBookieRecoveryDelayChanged(new LostBookieRecoveryDelayChangedCb()); } catch (BKException bke) { - LOG.error("Couldn't get bookie list, exiting", bke); + LOG.error("Couldn't get bookie list, so exiting", bke); + submitShutdownTask(); + return; + } catch (UnavailableException ue) { + LOG.error("Exception while registering for change notification, so exiting", ue); submitShutdownTask(); + return; } + scheduleBookieCheckTask(); + scheduleCheckAllLedgersTask(); + schedulePlacementPolicyCheckTask(); + scheduleReplicasCheckTask(); + } + } + + protected void submitBookieCheckTask() { + executor.submit(auditorBookieCheckTask); + } + + private void scheduleBookieCheckTask() { + long bookieCheckInterval = conf.getAuditorPeriodicBookieCheckInterval(); + if (bookieCheckInterval == 0) { + LOG.info("Auditor periodic bookie checking disabled, running once check now anyhow"); + submitBookieCheckTask(); + } else { + LOG.info("Auditor periodic bookie checking enabled" + " 'auditorPeriodicBookieCheckInterval' {} seconds", + bookieCheckInterval); + executor.scheduleAtFixedRate(auditorBookieCheckTask, 0, bookieCheckInterval, TimeUnit.SECONDS); + } + } + private void scheduleCheckAllLedgersTask() { + long interval = conf.getAuditorPeriodicCheckInterval(); + + if (interval > 0) { + LOG.info("Auditor periodic ledger checking enabled" + " 'auditorPeriodicCheckInterval' {} seconds", + interval); + + long checkAllLedgersLastExecutedCTime; + long durationSinceLastExecutionInSecs; + long initialDelay; try { - this.ledgerUnderreplicationManager - .notifyLostBookieRecoveryDelayChanged(new LostBookieRecoveryDelayChangedCb()); - } catch (UnavailableException ue) { - LOG.error("Exception while registering for LostBookieRecoveryDelay change notification", ue); + checkAllLedgersLastExecutedCTime = ledgerUnderreplicationManager.getCheckAllLedgersCTime(); + } catch (ReplicationException.NonRecoverableReplicationException nre) { + LOG.error("Non Recoverable Exception while reading from ZK", nre); submitShutdownTask(); + return; + } catch (UnavailableException ue) { + LOG.error("Got UnavailableException while trying to get checkAllLedgersCTime", ue); + checkAllLedgersLastExecutedCTime = -1; + } + if (checkAllLedgersLastExecutedCTime == -1) { + durationSinceLastExecutionInSecs = -1; + initialDelay = 0; + } else { + durationSinceLastExecutionInSecs = (System.currentTimeMillis() - checkAllLedgersLastExecutedCTime) + / 1000; + if (durationSinceLastExecutionInSecs < 0) { + // this can happen if there is no strict time ordering + durationSinceLastExecutionInSecs = 0; + } + initialDelay = durationSinceLastExecutionInSecs > interval ? 0 + : (interval - durationSinceLastExecutionInSecs); } + LOG.info( + "checkAllLedgers scheduling info. checkAllLedgersLastExecutedCTime: {} " + + "durationSinceLastExecutionInSecs: {} initialDelay: {} interval: {}", + checkAllLedgersLastExecutedCTime, durationSinceLastExecutionInSecs, initialDelay, interval); + + executor.scheduleAtFixedRate(auditorCheckAllLedgersTask, initialDelay, interval, TimeUnit.SECONDS); + } else { + LOG.info("Periodic checking disabled"); + } + } - long bookieCheckInterval = conf.getAuditorPeriodicBookieCheckInterval(); - if (bookieCheckInterval == 0) { - LOG.info("Auditor periodic bookie checking disabled, running once check now anyhow"); - executor.submit(bookieCheck); + private void schedulePlacementPolicyCheckTask() { + long interval = conf.getAuditorPeriodicPlacementPolicyCheckInterval(); + + if (interval > 0) { + LOG.info("Auditor periodic placement policy check enabled" + + " 'auditorPeriodicPlacementPolicyCheckInterval' {} seconds", interval); + + long placementPolicyCheckLastExecutedCTime; + long durationSinceLastExecutionInSecs; + long initialDelay; + try { + placementPolicyCheckLastExecutedCTime = ledgerUnderreplicationManager.getPlacementPolicyCheckCTime(); + } catch (ReplicationException.NonRecoverableReplicationException nre) { + LOG.error("Non Recoverable Exception while reading from ZK", nre); + submitShutdownTask(); + return; + } catch (UnavailableException ue) { + LOG.error("Got UnavailableException while trying to get placementPolicyCheckCTime", ue); + placementPolicyCheckLastExecutedCTime = -1; + } + if (placementPolicyCheckLastExecutedCTime == -1) { + durationSinceLastExecutionInSecs = -1; + initialDelay = 0; } else { - LOG.info("Auditor periodic bookie checking enabled" - + " 'auditorPeriodicBookieCheckInterval' {} seconds", bookieCheckInterval); - executor.scheduleAtFixedRate(bookieCheck, 0, bookieCheckInterval, TimeUnit.SECONDS); + durationSinceLastExecutionInSecs = (System.currentTimeMillis() - placementPolicyCheckLastExecutedCTime) + / 1000; + if (durationSinceLastExecutionInSecs < 0) { + // this can happen if there is no strict time ordering + durationSinceLastExecutionInSecs = 0; + } + initialDelay = durationSinceLastExecutionInSecs > interval ? 0 + : (interval - durationSinceLastExecutionInSecs); + } + LOG.info( + "placementPolicyCheck scheduling info. placementPolicyCheckLastExecutedCTime: {} " + + "durationSinceLastExecutionInSecs: {} initialDelay: {} interval: {}", + placementPolicyCheckLastExecutedCTime, durationSinceLastExecutionInSecs, initialDelay, interval); + + executor.scheduleAtFixedRate(auditorPlacementPolicyCheckTask, initialDelay, interval, TimeUnit.SECONDS); + } else { + LOG.info("Periodic placementPolicy check disabled"); + } + } + + private void scheduleReplicasCheckTask() { + long interval = conf.getAuditorPeriodicReplicasCheckInterval(); + + if (interval <= 0) { + LOG.info("Periodic replicas check disabled"); + return; + } + + LOG.info("Auditor periodic replicas check enabled" + " 'auditorReplicasCheckInterval' {} seconds", interval); + long replicasCheckLastExecutedCTime; + long durationSinceLastExecutionInSecs; + long initialDelay; + try { + replicasCheckLastExecutedCTime = ledgerUnderreplicationManager.getReplicasCheckCTime(); + } catch (ReplicationException.NonRecoverableReplicationException nre) { + LOG.error("Non Recoverable Exception while reading from ZK", nre); + submitShutdownTask(); + return; + } catch (UnavailableException ue) { + LOG.error("Got UnavailableException while trying to get replicasCheckCTime", ue); + replicasCheckLastExecutedCTime = -1; + } + if (replicasCheckLastExecutedCTime == -1) { + durationSinceLastExecutionInSecs = -1; + initialDelay = 0; + } else { + durationSinceLastExecutionInSecs = (System.currentTimeMillis() - replicasCheckLastExecutedCTime) / 1000; + if (durationSinceLastExecutionInSecs < 0) { + // this can happen if there is no strict time ordering + durationSinceLastExecutionInSecs = 0; } + initialDelay = durationSinceLastExecutionInSecs > interval ? 0 + : (interval - durationSinceLastExecutionInSecs); } + LOG.info( + "replicasCheck scheduling info. replicasCheckLastExecutedCTime: {} " + + "durationSinceLastExecutionInSecs: {} initialDelay: {} interval: {}", + replicasCheckLastExecutedCTime, durationSinceLastExecutionInSecs, initialDelay, interval); + + executor.scheduleAtFixedRate(auditorReplicasCheckTask, initialDelay, interval, TimeUnit.SECONDS); } private class LostBookieRecoveryDelayChangedCb implements GenericCallback { @@ -414,6 +563,9 @@ public void operationComplete(int rc, Void result) { try { Auditor.this.ledgerUnderreplicationManager .notifyLostBookieRecoveryDelayChanged(LostBookieRecoveryDelayChangedCb.this); + } catch (ReplicationException.NonRecoverableReplicationException nre) { + LOG.error("Non Recoverable Exception while reading from ZK", nre); + submitShutdownTask(); } catch (UnavailableException ae) { LOG.error("Exception while registering for a LostBookieRecoveryDelay notification", ae); } @@ -423,23 +575,23 @@ public void operationComplete(int rc, Void result) { private void waitIfLedgerReplicationDisabled() throws UnavailableException, InterruptedException { - ReplicationEnableCb cb = new ReplicationEnableCb(); if (!ledgerUnderreplicationManager.isLedgerReplicationEnabled()) { + ReplicationEnableCb cb = new ReplicationEnableCb(); LOG.info("LedgerReplication is disabled externally through Zookeeper, " - + "since DISABLE_NODE ZNode is created, so waiting untill it is enabled"); + + "since DISABLE_NODE ZNode is created, so waiting until it is enabled"); ledgerUnderreplicationManager.notifyLedgerReplicationEnabled(cb); cb.await(); } } - private List getAvailableBookies() throws BKException { + protected List getAvailableBookies() throws BKException { // Get the available bookies - Collection availableBkAddresses = admin.getAvailableBookies(); - Collection readOnlyBkAddresses = admin.getReadOnlyBookies(); + Collection availableBkAddresses = admin.getAvailableBookies(); + Collection readOnlyBkAddresses = admin.getReadOnlyBookies(); availableBkAddresses.addAll(readOnlyBkAddresses); List availableBookies = new ArrayList(); - for (BookieSocketAddress addr : availableBkAddresses) { + for (BookieId addr : availableBkAddresses) { availableBookies.add(addr.toString()); } return availableBookies; @@ -450,224 +602,6 @@ private void watchBookieChanges() throws BKException { admin.watchReadOnlyBookiesChanged(bookies -> submitAuditTask()); } - /** - * Start running the actual audit task. - * - * @param shutDownTask - * A boolean that indicates whether or not to schedule shutdown task on any failure - */ - private void startAudit(boolean shutDownTask) { - try { - auditBookies(); - shutDownTask = false; - } catch (BKException bke) { - LOG.error("Exception getting bookie list", bke); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - LOG.error("Interrupted while watching available bookies ", ie); - } catch (BKAuditException bke) { - LOG.error("Exception while watching available bookies", bke); - } - if (shutDownTask) { - submitShutdownTask(); - } - } - - @SuppressWarnings("unchecked") - private void auditBookies() - throws BKAuditException, InterruptedException, BKException { - try { - waitIfLedgerReplicationDisabled(); - } catch (UnavailableException ue) { - LOG.error("Underreplication unavailable, skipping audit." - + "Will retry after a period"); - return; - } - - Stopwatch stopwatch = Stopwatch.createStarted(); - // put exit cases here - Map> ledgerDetails = generateBookie2LedgersIndex(); - try { - if (!ledgerUnderreplicationManager.isLedgerReplicationEnabled()) { - // has been disabled while we were generating the index - // discard this run, and schedule a new one - executor.submit(bookieCheck); - return; - } - } catch (UnavailableException ue) { - LOG.error("Underreplication unavailable, skipping audit." - + "Will retry after a period"); - return; - } - - List availableBookies = getAvailableBookies(); - // find lost bookies - Set knownBookies = ledgerDetails.keySet(); - Collection lostBookies = CollectionUtils.subtract(knownBookies, - availableBookies); - - bookieToLedgersMapCreationTime.registerSuccessfulEvent(stopwatch.elapsed(TimeUnit.MILLISECONDS), - TimeUnit.MILLISECONDS); - if (lostBookies.size() > 0) { - try { - FutureUtils.result( - handleLostBookiesAsync(lostBookies, ledgerDetails), ReplicationException.EXCEPTION_HANDLER); - } catch (ReplicationException e) { - throw new BKAuditException(e.getMessage(), e.getCause()); - } - uRLPublishTimeForLostBookies.registerSuccessfulEvent(stopwatch.stop().elapsed(TimeUnit.MILLISECONDS), - TimeUnit.MILLISECONDS); - } - - } - - private Map> generateBookie2LedgersIndex() - throws BKAuditException { - return bookieLedgerIndexer.getBookieToLedgerIndex(); - } - - private CompletableFuture handleLostBookiesAsync(Collection lostBookies, - Map> ledgerDetails) { - LOG.info("Following are the failed bookies: {}," - + " and searching its ledgers for re-replication", lostBookies); - - return FutureUtils.processList( - Lists.newArrayList(lostBookies), - bookieIP -> publishSuspectedLedgersAsync( - Lists.newArrayList(bookieIP), ledgerDetails.get(bookieIP)), - null - ); - } - - private CompletableFuture publishSuspectedLedgersAsync(Collection missingBookies, Set ledgers) { - if (null == ledgers || ledgers.size() == 0) { - // there is no ledgers available for this bookie and just - // ignoring the bookie failures - LOG.info("There is no ledgers for the failed bookie: {}", missingBookies); - return FutureUtils.Void(); - } - LOG.info("Following ledgers: {} of bookie: {} are identified as underreplicated", ledgers, missingBookies); - numUnderReplicatedLedger.registerSuccessfulValue(ledgers.size()); - return FutureUtils.processList( - Lists.newArrayList(ledgers), - ledgerId -> ledgerUnderreplicationManager.markLedgerUnderreplicatedAsync(ledgerId, missingBookies), - null - ); - } - - /** - * Process the result returned from checking a ledger. - */ - private class ProcessLostFragmentsCb implements GenericCallback> { - final LedgerHandle lh; - final AsyncCallback.VoidCallback callback; - - ProcessLostFragmentsCb(LedgerHandle lh, AsyncCallback.VoidCallback callback) { - this.lh = lh; - this.callback = callback; - } - - public void operationComplete(int rc, Set fragments) { - if (rc == BKException.Code.OK) { - Set bookies = Sets.newHashSet(); - for (LedgerFragment f : fragments) { - bookies.addAll(f.getAddresses()); - } - publishSuspectedLedgersAsync( - bookies.stream().map(BookieSocketAddress::toString).collect(Collectors.toList()), - Sets.newHashSet(lh.getId()) - ).whenComplete((result, cause) -> { - if (null != cause) { - LOG.error("Auditor exception publishing suspected ledger {} with lost bookies {}", - lh.getId(), bookies, cause); - callback.processResult(Code.ReplicationException, null, null); - } else { - callback.processResult(Code.OK, null, null); - } - }); - } else { - callback.processResult(rc, null, null); - } - lh.closeAsync().whenComplete((result, cause) -> { - if (null != cause) { - LOG.warn("Error closing ledger {} : {}", lh.getId(), cause.getMessage()); - } - }); - } - } - - /** - * List all the ledgers and check them individually. This should not - * be run very often. - */ - void checkAllLedgers() throws BKException, IOException, InterruptedException, KeeperException { - ZooKeeper newzk = ZooKeeperClient.newBuilder() - .connectString(ZKMetadataDriverBase.resolveZkServers(conf)) - .sessionTimeoutMs(conf.getZkTimeout()) - .build(); - - final BookKeeper client = new BookKeeper(new ClientConfiguration(conf), - newzk); - final BookKeeperAdmin admin = new BookKeeperAdmin(client, statsLogger); - - try { - final LedgerChecker checker = new LedgerChecker(client); - - final CompletableFuture processFuture = new CompletableFuture<>(); - - Processor checkLedgersProcessor = (ledgerId, callback) -> { - try { - if (!ledgerUnderreplicationManager.isLedgerReplicationEnabled()) { - LOG.info("Ledger rereplication has been disabled, aborting periodic check"); - FutureUtils.complete(processFuture, null); - return; - } - } catch (UnavailableException ue) { - LOG.error("Underreplication manager unavailable running periodic check", ue); - FutureUtils.complete(processFuture, null); - return; - } - - admin.asyncOpenLedgerNoRecovery(ledgerId, (rc, lh, ctx) -> { - if (Code.OK == rc) { - checker.checkLedger(lh, - // the ledger handle will be closed after checkLedger is done. - new ProcessLostFragmentsCb(lh, callback), - conf.getAuditorLedgerVerificationPercentage()); - // we collect the following stats to get a measure of the - // distribution of a single ledger within the bk cluster - // the higher the number of fragments/bookies, the more distributed it is - numFragmentsPerLedger.registerSuccessfulValue(lh.getNumFragments()); - numBookiesPerLedger.registerSuccessfulValue(lh.getNumBookies()); - numLedgersChecked.inc(); - } else if (Code.NoSuchLedgerExistsException == rc) { - if (LOG.isDebugEnabled()) { - LOG.debug("Ledger {} was deleted before we could check it", ledgerId); - } - callback.processResult(Code.OK, null, null); - } else { - LOG.error("Couldn't open ledger {} to check : {}", ledgerId, BKException.getMessage(rc)); - callback.processResult(rc, null, null); - } - }, null); - }; - - ledgerManager.asyncProcessLedgers(checkLedgersProcessor, - (rc, path, ctx) -> { - if (Code.OK == rc) { - FutureUtils.complete(processFuture, null); - } else { - FutureUtils.completeExceptionally(processFuture, BKException.create(rc)); - } - }, null, BKException.Code.OK, BKException.Code.ReadException); - FutureUtils.result(processFuture, BKException.HANDLER); - } finally { - admin.close(); - client.close(); - newzk.close(); - } - } - /** * Shutdown the auditor. */ @@ -679,12 +613,27 @@ public void shutdown() { LOG.warn("Executor not shutting down, interrupting"); executor.shutdownNow(); } - admin.close(); - bkc.close(); + + // shutdown all auditorTasks to clean some resource + allAuditorTasks.forEach(AuditorTask::shutdown); + allAuditorTasks.clear(); + + if (ownAdmin) { + admin.close(); + } + if (ownBkc) { + bkc.close(); + } + if (ledgerManager != null) { + ledgerManager.close(); + } + if (ledgerUnderreplicationManager != null) { + ledgerUnderreplicationManager.close(); + } } catch (InterruptedException ie) { Thread.currentThread().interrupt(); LOG.warn("Interrupted while shutting down auditor bookie", ie); - } catch (BKException bke) { + } catch (UnavailableException | IOException | BKException bke) { LOG.warn("Exception while shutting down auditor bookie", bke); } } @@ -703,20 +652,6 @@ public boolean isRunning() { return !executor.isShutdown(); } - private final Runnable bookieCheck = new Runnable() { - public void run() { - if (auditTask == null) { - startAudit(true); - } else { - // if due to a lost bookie an audit task was scheduled, - // let us not run this periodic bookie check now, if we - // went ahead, we'll report under replication and the user - // wanted to avoid that(with lostBookieRecoveryDelay option) - LOG.info("Audit already scheduled; skipping periodic bookie check"); - } - } - }; - int getLostBookieRecoveryDelayBeforeChange() { return lostBookieRecoveryDelayBeforeChange; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorBookieCheckTask.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorBookieCheckTask.java new file mode 100644 index 00000000000..0325e56b44d --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorBookieCheckTask.java @@ -0,0 +1,185 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.replication; + +import com.google.common.base.Stopwatch; +import com.google.common.collect.Lists; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.BiConsumer; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; +import org.apache.commons.collections4.CollectionUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class AuditorBookieCheckTask extends AuditorTask { + private static final Logger LOG = LoggerFactory.getLogger(AuditorBookieCheckTask.class); + + private final BookieLedgerIndexer bookieLedgerIndexer; + private final BiConsumer submitCheckTask; + + public AuditorBookieCheckTask(ServerConfiguration conf, + AuditorStats auditorStats, + BookKeeperAdmin admin, + LedgerManager ledgerManager, + LedgerUnderreplicationManager ledgerUnderreplicationManager, + ShutdownTaskHandler shutdownTaskHandler, + BookieLedgerIndexer bookieLedgerIndexer, + BiConsumer hasAuditCheckTask, + BiConsumer submitCheckTask) { + super(conf, auditorStats, admin, ledgerManager, + ledgerUnderreplicationManager, shutdownTaskHandler, hasAuditCheckTask); + this.bookieLedgerIndexer = bookieLedgerIndexer; + this.submitCheckTask = submitCheckTask; + } + + @Override + protected void runTask() { + if (!hasBookieCheckTask()) { + startAudit(true); + } else { + // if due to a lost bookie an audit task was scheduled, + // let us not run this periodic bookie check now, if we + // went ahead, we'll report under replication and the user + // wanted to avoid that(with lostBookieRecoveryDelay option) + LOG.info("Audit already scheduled; skipping periodic bookie check"); + auditorStats.getNumSkippingCheckTaskTimes().inc(); + } + } + + @Override + public void shutdown() { + + } + + /** + * Start running the actual audit task. + * + * @param shutDownTask A boolean that indicates whether or not to schedule shutdown task on any failure + */ + void startAudit(boolean shutDownTask) { + try { + auditBookies(); + shutDownTask = false; + } catch (BKException bke) { + LOG.error("Exception getting bookie list", bke); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + LOG.error("Interrupted while watching available bookies ", ie); + } catch (ReplicationException.BKAuditException bke) { + LOG.error("Exception while watching available bookies", bke); + } + if (shutDownTask) { + submitShutdownTask(); + } + } + + void auditBookies() + throws ReplicationException.BKAuditException, InterruptedException, BKException { + try { + waitIfLedgerReplicationDisabled(); + } catch (ReplicationException.NonRecoverableReplicationException nre) { + LOG.error("Non Recoverable Exception while reading from ZK", nre); + submitShutdownTask(); + return; + } catch (ReplicationException.UnavailableException ue) { + LOG.error("Underreplication unavailable, skipping audit." + + "Will retry after a period"); + return; + } + LOG.info("Starting auditBookies"); + Stopwatch stopwatch = Stopwatch.createStarted(); + // put exit cases here + Map> ledgerDetails = generateBookie2LedgersIndex(); + try { + if (!isLedgerReplicationEnabled()) { + // has been disabled while we were generating the index + // discard this run, and schedule a new one + submitCheckTask.accept(null, null); + return; + } + } catch (ReplicationException.UnavailableException ue) { + LOG.error("Underreplication unavailable, skipping audit." + + "Will retry after a period"); + return; + } + + List availableBookies = getAvailableBookies(); + // find lost bookies + Set knownBookies = ledgerDetails.keySet(); + Collection lostBookies = CollectionUtils.subtract(knownBookies, + availableBookies); + + auditorStats.getBookieToLedgersMapCreationTime() + .registerSuccessfulEvent(stopwatch.elapsed(TimeUnit.MILLISECONDS), + TimeUnit.MILLISECONDS); + if (lostBookies.size() > 0) { + try { + FutureUtils.result( + handleLostBookiesAsync(lostBookies, ledgerDetails), ReplicationException.EXCEPTION_HANDLER); + } catch (ReplicationException e) { + throw new ReplicationException.BKAuditException(e.getMessage(), e.getCause()); + } + auditorStats.getURLPublishTimeForLostBookies() + .registerSuccessfulEvent(stopwatch.elapsed(TimeUnit.MILLISECONDS), + TimeUnit.MILLISECONDS); + } + LOG.info("Completed auditBookies"); + auditorStats.getAuditBookiesTime().registerSuccessfulEvent(stopwatch.stop().elapsed(TimeUnit.MILLISECONDS), + TimeUnit.MILLISECONDS); + } + + private Map> generateBookie2LedgersIndex() + throws ReplicationException.BKAuditException { + return bookieLedgerIndexer.getBookieToLedgerIndex(); + } + + private CompletableFuture handleLostBookiesAsync(Collection lostBookies, + Map> ledgerDetails) { + LOG.info("Following are the failed bookies: {}," + + " and searching its ledgers for re-replication", lostBookies); + + return FutureUtils.processList( + Lists.newArrayList(lostBookies), + bookieIP -> publishSuspectedLedgersAsync( + Lists.newArrayList(bookieIP), ledgerDetails.get(bookieIP)), + null + ); + } + + protected void waitIfLedgerReplicationDisabled() throws ReplicationException.UnavailableException, + InterruptedException { + if (!isLedgerReplicationEnabled()) { + LOG.info("LedgerReplication is disabled externally through Zookeeper, " + + "since DISABLE_NODE ZNode is created, so waiting until it is enabled"); + ReplicationEnableCb cb = new ReplicationEnableCb(); + ledgerUnderreplicationManager.notifyLedgerReplicationEnabled(cb); + cb.await(); + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorCheckAllLedgersTask.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorCheckAllLedgersTask.java new file mode 100644 index 00000000000..299d47a2810 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorCheckAllLedgersTask.java @@ -0,0 +1,295 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.replication; + +import com.google.common.base.Stopwatch; +import com.google.common.collect.Sets; +import java.io.IOException; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Semaphore; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.BiConsumer; +import java.util.stream.Collectors; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeper; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.client.LedgerChecker; +import org.apache.bookkeeper.client.LedgerFragment; +import org.apache.bookkeeper.client.LedgerHandle; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks; +import org.apache.bookkeeper.replication.ReplicationException.UnavailableException; +import org.apache.zookeeper.AsyncCallback; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class AuditorCheckAllLedgersTask extends AuditorTask { + private static final Logger LOG = LoggerFactory.getLogger(AuditorBookieCheckTask.class); + + private final Semaphore openLedgerNoRecoverySemaphore; + private final int openLedgerNoRecoverySemaphoreWaitTimeoutMSec; + private final ExecutorService ledgerCheckerExecutor; + + AuditorCheckAllLedgersTask(ServerConfiguration conf, + AuditorStats auditorStats, + BookKeeperAdmin admin, + LedgerManager ledgerManager, + LedgerUnderreplicationManager ledgerUnderreplicationManager, + ShutdownTaskHandler shutdownTaskHandler, + BiConsumer hasAuditCheckTask) + throws UnavailableException { + super(conf, auditorStats, admin, ledgerManager, + ledgerUnderreplicationManager, shutdownTaskHandler, hasAuditCheckTask); + + if (conf.getAuditorMaxNumberOfConcurrentOpenLedgerOperations() <= 0) { + LOG.error("auditorMaxNumberOfConcurrentOpenLedgerOperations should be greater than 0"); + throw new UnavailableException("auditorMaxNumberOfConcurrentOpenLedgerOperations should be greater than 0"); + } + this.openLedgerNoRecoverySemaphore = + new Semaphore(conf.getAuditorMaxNumberOfConcurrentOpenLedgerOperations()); + + if (conf.getAuditorAcquireConcurrentOpenLedgerOperationsTimeoutMSec() < 0) { + LOG.error("auditorAcquireConcurrentOpenLedgerOperationsTimeoutMSec should be greater than or equal to 0"); + throw new UnavailableException("auditorAcquireConcurrentOpenLedgerOperationsTimeoutMSec " + + "should be greater than or equal to 0"); + } + this.openLedgerNoRecoverySemaphoreWaitTimeoutMSec = + conf.getAuditorAcquireConcurrentOpenLedgerOperationsTimeoutMSec(); + + this.ledgerCheckerExecutor = Executors.newSingleThreadExecutor(new ThreadFactory() { + @Override + public Thread newThread(Runnable r) { + Thread t = new Thread(r, "AuditorCheckAllLedgers-LedgerChecker"); + t.setDaemon(true); + return t; + } + }); + } + + @Override + protected void runTask() { + if (hasBookieCheckTask()) { + LOG.info("Audit bookie task already scheduled; skipping periodic all ledgers check task"); + auditorStats.getNumSkippingCheckTaskTimes().inc(); + return; + } + + Stopwatch stopwatch = Stopwatch.createStarted(); + boolean checkSuccess = false; + try { + if (!isLedgerReplicationEnabled()) { + LOG.info("Ledger replication disabled, skipping checkAllLedgers"); + checkSuccess = true; + return; + } + + LOG.info("Starting checkAllLedgers"); + checkAllLedgers(); + long checkAllLedgersDuration = stopwatch.stop().elapsed(TimeUnit.MILLISECONDS); + LOG.info("Completed checkAllLedgers in {} milliSeconds", checkAllLedgersDuration); + auditorStats.getCheckAllLedgersTime() + .registerSuccessfulEvent(checkAllLedgersDuration, TimeUnit.MILLISECONDS); + checkSuccess = true; + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + LOG.error("Interrupted while running periodic check", ie); + } catch (BKException bke) { + LOG.error("Exception running periodic check", bke); + } catch (IOException ioe) { + LOG.error("I/O exception running periodic check", ioe); + } catch (ReplicationException.NonRecoverableReplicationException nre) { + LOG.error("Non Recoverable Exception while reading from ZK", nre); + submitShutdownTask(); + } catch (ReplicationException.UnavailableException ue) { + LOG.error("Underreplication manager unavailable running periodic check", ue); + } finally { + if (!checkSuccess) { + long checkAllLedgersDuration = stopwatch.stop().elapsed(TimeUnit.MILLISECONDS); + auditorStats.getCheckAllLedgersTime() + .registerFailedEvent(checkAllLedgersDuration, TimeUnit.MILLISECONDS); + } + } + } + + @Override + public void shutdown() { + LOG.info("Shutting down AuditorCheckAllLedgersTask"); + ledgerCheckerExecutor.shutdown(); + try { + while (!ledgerCheckerExecutor.awaitTermination(30, TimeUnit.SECONDS)) { + LOG.warn("Executor for ledger checker not shutting down, interrupting"); + ledgerCheckerExecutor.shutdownNow(); + } + + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + LOG.warn("Interrupted while shutting down AuditorCheckAllLedgersTask", ie); + } + } + + /** + * List all the ledgers and check them individually. This should not + * be run very often. + */ + void checkAllLedgers() throws BKException, IOException, InterruptedException { + final BookKeeper localClient = getBookKeeper(conf); + final BookKeeperAdmin localAdmin = getBookKeeperAdmin(localClient); + try { + final LedgerChecker checker = new LedgerChecker(localClient, conf.getInFlightReadEntryNumInLedgerChecker()); + + final CompletableFuture processFuture = new CompletableFuture<>(); + + BookkeeperInternalCallbacks.Processor checkLedgersProcessor = (ledgerId, callback) -> { + try { + if (!ledgerUnderreplicationManager.isLedgerReplicationEnabled()) { + LOG.info("Ledger rereplication has been disabled, aborting periodic check"); + FutureUtils.complete(processFuture, null); + return; + } + } catch (ReplicationException.NonRecoverableReplicationException nre) { + LOG.error("Non Recoverable Exception while reading from ZK", nre); + submitShutdownTask(); + return; + } catch (ReplicationException.UnavailableException ue) { + LOG.error("Underreplication manager unavailable running periodic check", ue); + FutureUtils.complete(processFuture, null); + return; + } + + try { + if (!openLedgerNoRecoverySemaphore.tryAcquire(openLedgerNoRecoverySemaphoreWaitTimeoutMSec, + TimeUnit.MILLISECONDS)) { + LOG.warn("Failed to acquire semaphore for {} ms, ledgerId: {}", + openLedgerNoRecoverySemaphoreWaitTimeoutMSec, ledgerId); + FutureUtils.complete(processFuture, null); + return; + } + } catch (InterruptedException e) { + LOG.error("Unable to acquire open ledger operation semaphore ", e); + Thread.currentThread().interrupt(); + FutureUtils.complete(processFuture, null); + return; + } + + localAdmin.asyncOpenLedgerNoRecovery(ledgerId, (rc, lh, ctx) -> { + openLedgerNoRecoverySemaphore.release(); + if (BKException.Code.OK == rc) { + // BookKeeperClientWorker-OrderedExecutor threads should not execute LedgerChecker#checkLedger + // as this can lead to deadlocks + ledgerCheckerExecutor.execute(() -> { + checker.checkLedger(lh, + // the ledger handle will be closed after checkLedger is done. + new ProcessLostFragmentsCb(lh, callback), + conf.getAuditorLedgerVerificationPercentage()); + // we collect the following stats to get a measure of the + // distribution of a single ledger within the bk cluster + // the higher the number of fragments/bookies, the more distributed it is + auditorStats.getNumFragmentsPerLedger().registerSuccessfulValue(lh.getNumFragments()); + auditorStats.getNumBookiesPerLedger().registerSuccessfulValue(lh.getNumBookies()); + auditorStats.getNumLedgersChecked().inc(); + }); + } else if (BKException.Code.NoSuchLedgerExistsOnMetadataServerException == rc) { + if (LOG.isDebugEnabled()) { + LOG.debug("Ledger {} was deleted before we could check it", ledgerId); + } + callback.processResult(BKException.Code.OK, null, null); + } else { + LOG.error("Couldn't open ledger {} to check : {}", ledgerId, BKException.getMessage(rc)); + callback.processResult(rc, null, null); + } + }, null); + }; + + ledgerManager.asyncProcessLedgers(checkLedgersProcessor, + (rc, path, ctx) -> { + if (BKException.Code.OK == rc) { + FutureUtils.complete(processFuture, null); + } else { + FutureUtils.completeExceptionally(processFuture, BKException.create(rc)); + } + }, null, BKException.Code.OK, BKException.Code.ReadException); + FutureUtils.result(processFuture, BKException.HANDLER); + try { + ledgerUnderreplicationManager.setCheckAllLedgersCTime(System.currentTimeMillis()); + } catch (ReplicationException.NonRecoverableReplicationException nre) { + LOG.error("Non Recoverable Exception while reading from ZK", nre); + submitShutdownTask(); + } catch (ReplicationException.UnavailableException ue) { + LOG.error("Got exception while trying to set checkAllLedgersCTime", ue); + } + } finally { + localAdmin.close(); + localClient.close(); + } + } + + /** + * Process the result returned from checking a ledger. + */ + private class ProcessLostFragmentsCb implements BookkeeperInternalCallbacks.GenericCallback> { + final LedgerHandle lh; + final AsyncCallback.VoidCallback callback; + + ProcessLostFragmentsCb(LedgerHandle lh, AsyncCallback.VoidCallback callback) { + this.lh = lh; + this.callback = callback; + } + + @Override + public void operationComplete(int rc, Set fragments) { + if (rc == BKException.Code.OK) { + Set bookies = Sets.newHashSet(); + for (LedgerFragment f : fragments) { + bookies.addAll(f.getAddresses()); + } + if (bookies.isEmpty()) { + // no missing fragments + callback.processResult(BKException.Code.OK, null, null); + } else { + publishSuspectedLedgersAsync(bookies.stream().map(BookieId::toString).collect(Collectors.toList()), + Sets.newHashSet(lh.getId()) + ).whenComplete((result, cause) -> { + if (null != cause) { + LOG.error("Auditor exception publishing suspected ledger {} with lost bookies {}", + lh.getId(), bookies, cause); + callback.processResult(BKException.Code.ReplicationException, null, null); + } else { + callback.processResult(BKException.Code.OK, null, null); + } + }); + } + } else { + callback.processResult(rc, null, null); + } + lh.closeAsync().whenComplete((result, cause) -> { + if (null != cause) { + LOG.warn("Error closing ledger {} : {}", lh.getId(), cause.getMessage()); + } + }); + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorElector.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorElector.java index 8cdf3b36570..f6b3a3a04f2 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorElector.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorElector.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,41 +20,29 @@ */ package org.apache.bookkeeper.replication; -import static com.google.common.base.Charsets.UTF_8; -import static org.apache.bookkeeper.replication.ReplicationStats.ELECTION_ATTEMPTS; +import static org.apache.bookkeeper.replication.ReplicationStats.AUDITOR_SCOPE; import com.google.common.annotations.VisibleForTesting; -import com.google.protobuf.TextFormat; - import java.io.IOException; -import java.io.Serializable; -import java.util.Collections; -import java.util.Comparator; -import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.ThreadFactory; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; - +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeper; import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; -import org.apache.bookkeeper.net.BookieSocketAddress; -import org.apache.bookkeeper.proto.DataFormats.AuditorVoteFormat; +import org.apache.bookkeeper.meta.LedgerAuditorManager; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.replication.ReplicationException.UnavailableException; -import org.apache.bookkeeper.stats.Counter; import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.stats.StatsLogger; -import org.apache.bookkeeper.util.BookKeeperConstants; -import org.apache.bookkeeper.util.ZkUtils; -import org.apache.commons.lang.StringUtils; -import org.apache.zookeeper.CreateMode; -import org.apache.zookeeper.KeeperException; -import org.apache.zookeeper.WatchedEvent; -import org.apache.zookeeper.Watcher; -import org.apache.zookeeper.Watcher.Event.EventType; -import org.apache.zookeeper.Watcher.Event.KeeperState; -import org.apache.zookeeper.ZooKeeper; -import org.apache.zookeeper.data.ACL; +import org.apache.bookkeeper.stats.annotations.StatsDoc; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -67,35 +55,37 @@ * will be elected as Auditor. All the other bookies will be watching on their * predecessor znode according to the ephemeral sequence numbers. */ +@StatsDoc( + name = AUDITOR_SCOPE, + help = "Auditor related stats" +) public class AuditorElector { private static final Logger LOG = LoggerFactory .getLogger(AuditorElector.class); - // Represents the index of the auditor node - private static final int AUDITOR_INDEX = 0; - // Represents vote prefix - private static final String VOTE_PREFIX = "V_"; - // Represents path Separator - private static final String PATH_SEPARATOR = "/"; - private static final String ELECTION_ZNODE = "auditorelection"; - // Represents urLedger path in zk - private final String basePath; - // Represents auditor election path in zk - private final String electionPath; private final String bookieId; private final ServerConfiguration conf; - private final ZooKeeper zkc; + private final BookKeeper bkc; + private final boolean ownBkc; private final ExecutorService executor; + private final LedgerAuditorManager ledgerAuditorManager; - private String myVote; Auditor auditor; private AtomicBoolean running = new AtomicBoolean(false); - // Expose Stats - private final Counter electionAttempts; + private final StatsLogger statsLogger; + @VisibleForTesting + public AuditorElector(final String bookieId, ServerConfiguration conf) throws UnavailableException { + this( + bookieId, + conf, + Auditor.createBookKeeperClientThrowUnavailableException(conf), + true); + } + /** * AuditorElector for performing the auditor election. * @@ -103,14 +93,16 @@ public class AuditorElector { * - bookie identifier, comprises HostAddress:Port * @param conf * - configuration - * @param zkc - * - ZK instance + * @param bkc + * - bookkeeper instance * @throws UnavailableException * throws unavailable exception while initializing the elector */ - public AuditorElector(final String bookieId, ServerConfiguration conf, - ZooKeeper zkc) throws UnavailableException { - this(bookieId, conf, zkc, NullStatsLogger.INSTANCE); + public AuditorElector(final String bookieId, + ServerConfiguration conf, + BookKeeper bkc, + boolean ownBkc) throws UnavailableException { + this(bookieId, conf, bkc, NullStatsLogger.INSTANCE, ownBkc); } /** @@ -120,24 +112,28 @@ public AuditorElector(final String bookieId, ServerConfiguration conf, * - bookie identifier, comprises HostAddress:Port * @param conf * - configuration - * @param zkc - * - ZK instance + * @param bkc + * - bookkeeper instance * @param statsLogger * - stats logger * @throws UnavailableException * throws unavailable exception while initializing the elector */ - public AuditorElector(final String bookieId, ServerConfiguration conf, - ZooKeeper zkc, StatsLogger statsLogger) throws UnavailableException { + public AuditorElector(final String bookieId, + ServerConfiguration conf, + BookKeeper bkc, + StatsLogger statsLogger, + boolean ownBkc) throws UnavailableException { this.bookieId = bookieId; this.conf = conf; - this.zkc = zkc; + this.bkc = bkc; + this.ownBkc = ownBkc; this.statsLogger = statsLogger; - this.electionAttempts = statsLogger.getCounter(ELECTION_ATTEMPTS); - basePath = ZKMetadataDriverBase.resolveZkLedgersRootPath(conf) + '/' - + BookKeeperConstants.UNDER_REPLICATION_NODE; - electionPath = basePath + '/' + ELECTION_ZNODE; - createElectorPath(); + try { + this.ledgerAuditorManager = bkc.getLedgerManagerFactory().newLedgerAuditorManager(); + } catch (Exception e) { + throw new UnavailableException("Failed to instantiate the ledger auditor manager", e); + } executor = Executors.newSingleThreadExecutor(new ThreadFactory() { @Override public Thread newThread(Runnable r) { @@ -146,95 +142,35 @@ public Thread newThread(Runnable r) { }); } - private void createMyVote() throws KeeperException, InterruptedException { - if (null == myVote || null == zkc.exists(myVote, false)) { - List zkAcls = ZkUtils.getACLs(conf); - AuditorVoteFormat.Builder builder = AuditorVoteFormat.newBuilder() - .setBookieId(bookieId); - myVote = zkc.create(getVotePath(PATH_SEPARATOR + VOTE_PREFIX), - TextFormat.printToString(builder.build()).getBytes(UTF_8), zkAcls, - CreateMode.EPHEMERAL_SEQUENTIAL); - } + public Future start() { + running.set(true); + return submitElectionTask(); } - private String getVotePath(String vote) { - return electionPath + vote; + /** + * Run cleanup operations for the auditor elector. + */ + private Future submitShutdownTask() { + return executor.submit(shutdownTask); } - private void createElectorPath() throws UnavailableException { - try { - List zkAcls = ZkUtils.getACLs(conf); - if (zkc.exists(basePath, false) == null) { - try { - zkc.create(basePath, new byte[0], zkAcls, - CreateMode.PERSISTENT); - } catch (KeeperException.NodeExistsException nee) { - // do nothing, someone else could have created it - } - } - if (zkc.exists(getVotePath(""), false) == null) { - try { - zkc.create(getVotePath(""), new byte[0], - zkAcls, CreateMode.PERSISTENT); - } catch (KeeperException.NodeExistsException nee) { - // do nothing, someone else could have created it - } + Runnable shutdownTask = new Runnable() { + @Override + public void run() { + if (!running.compareAndSet(true, false)) { + return; } - } catch (KeeperException ke) { - throw new UnavailableException( - "Failed to initialize Auditor Elector", ke); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - throw new UnavailableException( - "Failed to initialize Auditor Elector", ie); - } - } - /** - * Watching the predecessor bookies and will do election on predecessor node - * deletion or expiration. - */ - private class ElectionWatcher implements Watcher { - @Override - public void process(WatchedEvent event) { - if (event.getState() == KeeperState.Expired) { - LOG.error("Lost ZK connection, shutting down"); - submitShutdownTask(); - } else if (event.getType() == EventType.NodeDeleted) { - submitElectionTask(); + try { + ledgerAuditorManager.close(); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + LOG.warn("InterruptedException while closing ledger auditor manager", ie); + } catch (Exception ke) { + LOG.error("Exception while closing ledger auditor manager", ke); } } - } - - public void start() { - running.set(true); - submitElectionTask(); - } - - /** - * Run cleanup operations for the auditor elector. - */ - private void submitShutdownTask() { - executor.submit(new Runnable() { - public void run() { - if (!running.compareAndSet(true, false)) { - return; - } - LOG.info("Shutting down AuditorElector"); - if (myVote != null) { - try { - zkc.delete(myVote, -1); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - LOG.warn("InterruptedException while deleting myVote: " + myVote, - ie); - } catch (KeeperException ke) { - LOG.error("Exception while deleting myVote:" + myVote, ke); - } - } - } - }); - } + }; /** * Performing the auditor election using the ZooKeeper ephemeral sequential @@ -242,67 +178,50 @@ public void run() { * Auditor. */ @VisibleForTesting - void submitElectionTask() { + Future submitElectionTask() { Runnable r = new Runnable() { + @Override public void run() { if (!running.get()) { return; } try { - // creating my vote in zk. Vote format is 'V_numeric' - createMyVote(); - List children = zkc.getChildren(getVotePath(""), false); - - if (0 >= children.size()) { - throw new IllegalArgumentException( - "Atleast one bookie server should present to elect the Auditor!"); - } - - // sorting in ascending order of sequential number - Collections.sort(children, new ElectionComparator()); - String voteNode = StringUtils.substringAfterLast(myVote, - PATH_SEPARATOR); - - // starting Auditing service - if (children.get(AUDITOR_INDEX).equals(voteNode)) { - // update the auditor bookie id in the election path. This is - // done for debugging purpose - AuditorVoteFormat.Builder builder = AuditorVoteFormat.newBuilder() - .setBookieId(bookieId); + ledgerAuditorManager.tryToBecomeAuditor(bookieId, e -> handleAuditorEvent(e)); - zkc.setData(getVotePath(""), - TextFormat.printToString(builder.build()).getBytes(UTF_8), -1); - auditor = new Auditor(bookieId, conf, zkc, statsLogger); - auditor.start(); - } else { - // If not an auditor, will be watching to my predecessor and - // looking the previous node deletion. - Watcher electionWatcher = new ElectionWatcher(); - int myIndex = children.indexOf(voteNode); - int prevNodeIndex = myIndex - 1; - if (null == zkc.exists(getVotePath(PATH_SEPARATOR) - + children.get(prevNodeIndex), electionWatcher)) { - // While adding, the previous znode doesn't exists. - // Again going to election. - submitElectionTask(); - } - electionAttempts.inc(); - } - } catch (KeeperException e) { - LOG.error("Exception while performing auditor election", e); - submitShutdownTask(); + auditor = new Auditor(bookieId, conf, bkc, false, statsLogger); + auditor.start(); } catch (InterruptedException e) { LOG.error("Interrupted while performing auditor election", e); Thread.currentThread().interrupt(); submitShutdownTask(); - } catch (UnavailableException e) { - LOG.error("Ledger underreplication manager unavailable during election", e); + } catch (Exception e) { + LOG.error("Exception while performing auditor election", e); submitShutdownTask(); } } }; - executor.submit(r); + try { + return executor.submit(r); + } catch (RejectedExecutionException e) { + if (LOG.isDebugEnabled()) { + LOG.debug("Executor was already closed"); + } + return CompletableFuture.completedFuture(null); + } + } + + private void handleAuditorEvent(LedgerAuditorManager.AuditorEvent e) { + switch (e) { + case SessionLost: + LOG.error("Lost ZK connection, shutting down"); + submitShutdownTask(); + break; + + case VoteWasDeleted: + submitElectionTask(); + break; + } } @VisibleForTesting @@ -310,29 +229,9 @@ Auditor getAuditor() { return auditor; } - /** - * Query zookeeper for the currently elected auditor. - * @return the bookie id of the current auditor - */ - public static BookieSocketAddress getCurrentAuditor(ServerConfiguration conf, ZooKeeper zk) - throws KeeperException, InterruptedException, IOException { - String electionRoot = ZKMetadataDriverBase.resolveZkLedgersRootPath(conf) + '/' - + BookKeeperConstants.UNDER_REPLICATION_NODE + '/' + ELECTION_ZNODE; - - List children = zk.getChildren(electionRoot, false); - Collections.sort(children, new AuditorElector.ElectionComparator()); - if (children.size() < 1) { - return null; - } - String ledger = electionRoot + "/" + children.get(AUDITOR_INDEX); - byte[] data = zk.getData(ledger, false, null); - AuditorVoteFormat.Builder builder = AuditorVoteFormat.newBuilder(); - TextFormat.merge(new String(data, UTF_8), builder); - AuditorVoteFormat v = builder.build(); - String[] parts = v.getBookieId().split(":"); - return new BookieSocketAddress(parts[0], - Integer.parseInt(parts[1])); + public BookieId getCurrentAuditor() throws IOException, InterruptedException { + return ledgerAuditorManager.getCurrentAuditor(); } /** @@ -343,14 +242,32 @@ public void shutdown() throws InterruptedException { if (executor.isShutdown()) { return; } - submitShutdownTask(); - executor.shutdown(); + // close auditor manager + try { + submitShutdownTask().get(10, TimeUnit.SECONDS); + executor.shutdown(); + } catch (ExecutionException e) { + LOG.warn("Failed to close auditor manager", e); + executor.shutdownNow(); + shutdownTask.run(); + } catch (TimeoutException e) { + LOG.warn("Failed to close auditor manager in 10 seconds", e); + executor.shutdownNow(); + shutdownTask.run(); + } } if (auditor != null) { auditor.shutdown(); auditor = null; } + if (ownBkc) { + try { + bkc.close(); + } catch (BKException e) { + LOG.warn("Failed to close bookkeeper client", e); + } + } } /** @@ -370,29 +287,4 @@ public boolean isRunning() { public String toString() { return "AuditorElector for " + bookieId; } - - /** - * Compare the votes in the ascending order of the sequence number. Vote - * format is 'V_sequencenumber', comparator will do sorting based on the - * numeric sequence value. - */ - private static class ElectionComparator - implements Comparator, Serializable { - /** - * Return -1 if the first vote is less than second. Return 1 if the - * first vote is greater than second. Return 0 if the votes are equal. - */ - public int compare(String vote1, String vote2) { - long voteSeqId1 = getVoteSequenceId(vote1); - long voteSeqId2 = getVoteSequenceId(vote2); - int result = voteSeqId1 < voteSeqId2 ? -1 - : (voteSeqId1 > voteSeqId2 ? 1 : 0); - return result; - } - - private long getVoteSequenceId(String vote) { - String voteId = StringUtils.substringAfter(vote, VOTE_PREFIX); - return Long.parseLong(voteId); - } - } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorPlacementPolicyCheckTask.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorPlacementPolicyCheckTask.java new file mode 100644 index 00000000000..7479e195536 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorPlacementPolicyCheckTask.java @@ -0,0 +1,322 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.replication; + +import com.google.common.base.Stopwatch; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.BiConsumer; +import lombok.Getter; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.client.EnsemblePlacementPolicy; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; +import org.apache.bookkeeper.meta.UnderreplicatedLedger; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks; +import org.apache.bookkeeper.versioning.Versioned; +import org.apache.zookeeper.AsyncCallback; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Getter +public class AuditorPlacementPolicyCheckTask extends AuditorTask { + private static final Logger LOG = LoggerFactory.getLogger(AuditorPlacementPolicyCheckTask.class); + + private final long underreplicatedLedgerRecoveryGracePeriod; + + private final AtomicInteger numOfLedgersFoundNotAdheringInPlacementPolicyCheck; + private final AtomicInteger numOfLedgersFoundSoftlyAdheringInPlacementPolicyCheck; + private final AtomicInteger numOfClosedLedgersAuditedInPlacementPolicyCheck; + private final AtomicInteger numOfURLedgersElapsedRecoveryGracePeriod; + + AuditorPlacementPolicyCheckTask(ServerConfiguration conf, + AuditorStats auditorStats, + BookKeeperAdmin admin, + LedgerManager ledgerManager, + LedgerUnderreplicationManager ledgerUnderreplicationManager, + ShutdownTaskHandler shutdownTaskHandler, + BiConsumer hasAuditCheckTask) { + super(conf, auditorStats, admin, ledgerManager, + ledgerUnderreplicationManager, shutdownTaskHandler, hasAuditCheckTask); + this.underreplicatedLedgerRecoveryGracePeriod = conf.getUnderreplicatedLedgerRecoveryGracePeriod(); + this.numOfLedgersFoundNotAdheringInPlacementPolicyCheck = new AtomicInteger(0); + this.numOfLedgersFoundSoftlyAdheringInPlacementPolicyCheck = new AtomicInteger(0); + this.numOfClosedLedgersAuditedInPlacementPolicyCheck = new AtomicInteger(0); + this.numOfURLedgersElapsedRecoveryGracePeriod = new AtomicInteger(0); + } + + @Override + protected void runTask() { + if (hasBookieCheckTask()) { + LOG.info("Audit bookie task already scheduled; skipping periodic placement policy check task"); + auditorStats.getNumSkippingCheckTaskTimes().inc(); + return; + } + + try { + if (!isLedgerReplicationEnabled()) { + LOG.info("Ledger replication disabled, skipping placementPolicyCheck"); + return; + } + + Stopwatch stopwatch = Stopwatch.createStarted(); + LOG.info("Starting PlacementPolicyCheck"); + placementPolicyCheck(); + long placementPolicyCheckDuration = stopwatch.stop().elapsed(TimeUnit.MILLISECONDS); + int numOfLedgersFoundNotAdheringInPlacementPolicyCheckValue = + numOfLedgersFoundNotAdheringInPlacementPolicyCheck.get(); + int numOfLedgersFoundSoftlyAdheringInPlacementPolicyCheckValue = + numOfLedgersFoundSoftlyAdheringInPlacementPolicyCheck.get(); + int numOfClosedLedgersAuditedInPlacementPolicyCheckValue = + numOfClosedLedgersAuditedInPlacementPolicyCheck.get(); + int numOfURLedgersElapsedRecoveryGracePeriodValue = + numOfURLedgersElapsedRecoveryGracePeriod.get(); + LOG.info( + "Completed placementPolicyCheck in {} milliSeconds." + + " numOfClosedLedgersAuditedInPlacementPolicyCheck {}" + + " numOfLedgersNotAdheringToPlacementPolicy {}" + + " numOfLedgersSoftlyAdheringToPlacementPolicy {}" + + " numOfURLedgersElapsedRecoveryGracePeriod {}", + placementPolicyCheckDuration, numOfClosedLedgersAuditedInPlacementPolicyCheckValue, + numOfLedgersFoundNotAdheringInPlacementPolicyCheckValue, + numOfLedgersFoundSoftlyAdheringInPlacementPolicyCheckValue, + numOfURLedgersElapsedRecoveryGracePeriodValue); + auditorStats.getLedgersNotAdheringToPlacementPolicyGuageValue() + .set(numOfLedgersFoundNotAdheringInPlacementPolicyCheckValue); + auditorStats.getLedgersSoftlyAdheringToPlacementPolicyGuageValue() + .set(numOfLedgersFoundSoftlyAdheringInPlacementPolicyCheckValue); + auditorStats.getNumOfURLedgersElapsedRecoveryGracePeriodGuageValue() + .set(numOfURLedgersElapsedRecoveryGracePeriodValue); + auditorStats.getPlacementPolicyCheckTime().registerSuccessfulEvent(placementPolicyCheckDuration, + TimeUnit.MILLISECONDS); + } catch (ReplicationException.BKAuditException e) { + int numOfLedgersFoundInPlacementPolicyCheckValue = + numOfLedgersFoundNotAdheringInPlacementPolicyCheck.get(); + if (numOfLedgersFoundInPlacementPolicyCheckValue > 0) { + /* + * Though there is BKAuditException while doing + * placementPolicyCheck, it found few ledgers not + * adhering to placement policy. So reporting it. + */ + auditorStats.getLedgersNotAdheringToPlacementPolicyGuageValue() + .set(numOfLedgersFoundInPlacementPolicyCheckValue); + } + + int numOfLedgersFoundSoftlyAdheringInPlacementPolicyCheckValue = + numOfLedgersFoundSoftlyAdheringInPlacementPolicyCheck.get(); + if (numOfLedgersFoundSoftlyAdheringInPlacementPolicyCheckValue > 0) { + /* + * Though there is BKAuditException while doing + * placementPolicyCheck, it found few ledgers softly + * adhering to placement policy. So reporting it. + */ + auditorStats.getLedgersSoftlyAdheringToPlacementPolicyGuageValue() + .set(numOfLedgersFoundSoftlyAdheringInPlacementPolicyCheckValue); + } + + int numOfURLedgersElapsedRecoveryGracePeriodValue = + numOfURLedgersElapsedRecoveryGracePeriod.get(); + if (numOfURLedgersElapsedRecoveryGracePeriodValue > 0) { + /* + * Though there is BKAuditException while doing + * placementPolicyCheck, it found few urledgers have + * elapsed recovery graceperiod. So reporting it. + */ + auditorStats.getNumOfURLedgersElapsedRecoveryGracePeriodGuageValue() + .set(numOfURLedgersElapsedRecoveryGracePeriodValue); + } + + LOG.error( + "BKAuditException running periodic placementPolicy check." + + "numOfLedgersNotAdheringToPlacementPolicy {}, " + + "numOfLedgersSoftlyAdheringToPlacementPolicy {}," + + "numOfURLedgersElapsedRecoveryGracePeriod {}", + numOfLedgersFoundInPlacementPolicyCheckValue, + numOfLedgersFoundSoftlyAdheringInPlacementPolicyCheckValue, + numOfURLedgersElapsedRecoveryGracePeriodValue, e); + } catch (ReplicationException.UnavailableException ue) { + LOG.error("Underreplication manager unavailable running periodic check", ue); + } + } + + @Override + public void shutdown() { + + } + + void placementPolicyCheck() throws ReplicationException.BKAuditException { + final CountDownLatch placementPolicyCheckLatch = new CountDownLatch(1); + numOfLedgersFoundNotAdheringInPlacementPolicyCheck.set(0); + numOfLedgersFoundSoftlyAdheringInPlacementPolicyCheck.set(0); + numOfClosedLedgersAuditedInPlacementPolicyCheck.set(0); + numOfURLedgersElapsedRecoveryGracePeriod.set(0); + if (this.underreplicatedLedgerRecoveryGracePeriod > 0) { + Iterator underreplicatedLedgersInfo = ledgerUnderreplicationManager + .listLedgersToRereplicate(null); + List urLedgersElapsedRecoveryGracePeriod = new ArrayList(); + while (underreplicatedLedgersInfo.hasNext()) { + UnderreplicatedLedger underreplicatedLedger = underreplicatedLedgersInfo.next(); + long underreplicatedLedgerMarkTimeInMilSecs = underreplicatedLedger.getCtime(); + if (underreplicatedLedgerMarkTimeInMilSecs != UnderreplicatedLedger.UNASSIGNED_CTIME) { + long elapsedTimeInSecs = + (System.currentTimeMillis() - underreplicatedLedgerMarkTimeInMilSecs) / 1000; + if (elapsedTimeInSecs > this.underreplicatedLedgerRecoveryGracePeriod) { + urLedgersElapsedRecoveryGracePeriod.add(underreplicatedLedger.getLedgerId()); + numOfURLedgersElapsedRecoveryGracePeriod.incrementAndGet(); + } + } + } + if (urLedgersElapsedRecoveryGracePeriod.isEmpty()) { + LOG.info("No Underreplicated ledger has elapsed recovery graceperiod: {}", + urLedgersElapsedRecoveryGracePeriod); + } else { + LOG.error("Following Underreplicated ledgers have elapsed recovery graceperiod: {}", + urLedgersElapsedRecoveryGracePeriod); + } + } + BookkeeperInternalCallbacks.Processor ledgerProcessor = + new BookkeeperInternalCallbacks.Processor() { + @Override + public void process(Long ledgerId, AsyncCallback.VoidCallback iterCallback) { + ledgerManager.readLedgerMetadata(ledgerId).whenComplete((metadataVer, exception) -> { + if (exception == null) { + doPlacementPolicyCheck(ledgerId, iterCallback, metadataVer); + } else if (BKException.getExceptionCode(exception) + == BKException.Code.NoSuchLedgerExistsOnMetadataServerException) { + if (LOG.isDebugEnabled()) { + LOG.debug("Ignoring replication of already deleted ledger {}", + ledgerId); + } + iterCallback.processResult(BKException.Code.OK, null, null); + } else { + LOG.warn("Unable to read the ledger: {} information", ledgerId); + iterCallback.processResult(BKException.getExceptionCode(exception), null, null); + } + }); + } + }; + // Reading the result after processing all the ledgers + final List resultCode = new ArrayList(1); + ledgerManager.asyncProcessLedgers(ledgerProcessor, new AsyncCallback.VoidCallback() { + + @Override + public void processResult(int rc, String s, Object obj) { + resultCode.add(rc); + placementPolicyCheckLatch.countDown(); + } + }, null, BKException.Code.OK, BKException.Code.ReadException); + try { + placementPolicyCheckLatch.await(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new ReplicationException.BKAuditException("Exception while doing placementPolicy check", e); + } + if (!resultCode.contains(BKException.Code.OK)) { + throw new ReplicationException.BKAuditException("Exception while doing placementPolicy check", + BKException.create(resultCode.get(0))); + } + try { + ledgerUnderreplicationManager.setPlacementPolicyCheckCTime(System.currentTimeMillis()); + } catch (ReplicationException.NonRecoverableReplicationException nre) { + LOG.error("Non Recoverable Exception while reading from ZK", nre); + submitShutdownTask(); + } catch (ReplicationException.UnavailableException ue) { + LOG.error("Got exception while trying to set PlacementPolicyCheckCTime", ue); + } + } + + void doPlacementPolicyCheck(Long ledgerId, + AsyncCallback.VoidCallback iterCallback, + Versioned metadataVer) { + LedgerMetadata metadata = metadataVer.getValue(); + int writeQuorumSize = metadata.getWriteQuorumSize(); + int ackQuorumSize = metadata.getAckQuorumSize(); + if (metadata.isClosed()) { + boolean foundSegmentNotAdheringToPlacementPolicy = false; + boolean foundSegmentSoftlyAdheringToPlacementPolicy = false; + for (Map.Entry> ensemble : metadata + .getAllEnsembles().entrySet()) { + long startEntryIdOfSegment = ensemble.getKey(); + List ensembleOfSegment = ensemble.getValue(); + EnsemblePlacementPolicy.PlacementPolicyAdherence segmentAdheringToPlacementPolicy = admin + .isEnsembleAdheringToPlacementPolicy(ensembleOfSegment, writeQuorumSize, + ackQuorumSize); + if (segmentAdheringToPlacementPolicy == EnsemblePlacementPolicy.PlacementPolicyAdherence.FAIL) { + foundSegmentNotAdheringToPlacementPolicy = true; + LOG.warn( + "For ledger: {}, Segment starting at entry: {}, with ensemble: {} having " + + "writeQuorumSize: {} and ackQuorumSize: {} is not adhering to " + + "EnsemblePlacementPolicy", + ledgerId, startEntryIdOfSegment, ensembleOfSegment, writeQuorumSize, + ackQuorumSize); + } else if (segmentAdheringToPlacementPolicy + == EnsemblePlacementPolicy.PlacementPolicyAdherence.MEETS_SOFT) { + foundSegmentSoftlyAdheringToPlacementPolicy = true; + if (LOG.isDebugEnabled()) { + LOG.debug( + "For ledger: {}, Segment starting at entry: {}, with ensemble: {}" + + " having writeQuorumSize: {} and ackQuorumSize: {} is" + + " softly adhering to EnsemblePlacementPolicy", + ledgerId, startEntryIdOfSegment, ensembleOfSegment, writeQuorumSize, + ackQuorumSize); + } + } + } + if (foundSegmentNotAdheringToPlacementPolicy) { + numOfLedgersFoundNotAdheringInPlacementPolicyCheck.incrementAndGet(); + //If user enable repaired, mark this ledger to under replication manager. + if (conf.isRepairedPlacementPolicyNotAdheringBookieEnable()) { + ledgerUnderreplicationManager.markLedgerUnderreplicatedAsync(ledgerId, + Collections.emptyList()).whenComplete((res, e) -> { + if (e != null) { + LOG.error("For ledger: {}, the placement policy not adhering bookie " + + "storage, mark it to under replication manager failed.", + ledgerId, e); + return; + } + if (LOG.isDebugEnabled()) { + LOG.debug("For ledger: {}, the placement policy not adhering bookie" + + " storage, mark it to under replication manager", ledgerId); + } + }); + } + } else if (foundSegmentSoftlyAdheringToPlacementPolicy) { + numOfLedgersFoundSoftlyAdheringInPlacementPolicyCheck + .incrementAndGet(); + } + numOfClosedLedgersAuditedInPlacementPolicyCheck.incrementAndGet(); + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("Ledger: {} is not yet closed, so skipping the placementPolicy" + + "check analysis for now", ledgerId); + } + } + iterCallback.processResult(BKException.Code.OK, null, null); + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorReplicasCheckTask.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorReplicasCheckTask.java new file mode 100644 index 00000000000..b4d98c39398 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorReplicasCheckTask.java @@ -0,0 +1,767 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.replication; + +import com.google.common.base.Stopwatch; +import com.google.common.collect.HashMultiset; +import com.google.common.collect.Multiset; +import java.io.IOException; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.BiConsumer; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.client.RoundRobinDistributionSchedule; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.MultiCallback; +import org.apache.bookkeeper.util.AvailabilityOfEntriesOfLedger; +import org.apache.bookkeeper.versioning.Versioned; +import org.apache.zookeeper.AsyncCallback; +import org.apache.zookeeper.AsyncCallback.VoidCallback; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class AuditorReplicasCheckTask extends AuditorTask { + private static final Logger LOG = LoggerFactory.getLogger(AuditorReplicasCheckTask.class); + + private static final int MAX_CONCURRENT_REPLICAS_CHECK_LEDGER_REQUESTS = 100; + private static final int REPLICAS_CHECK_TIMEOUT_IN_SECS = 120; + private static final BitSet EMPTY_BITSET = new BitSet(); + + private final int zkOpTimeoutMs; + + private final AtomicInteger numLedgersFoundHavingNoReplicaOfAnEntry; + private final AtomicInteger numLedgersFoundHavingLessThanAQReplicasOfAnEntry; + private final AtomicInteger numLedgersFoundHavingLessThanWQReplicasOfAnEntry; + + AuditorReplicasCheckTask(ServerConfiguration conf, + AuditorStats auditorStats, BookKeeperAdmin admin, + LedgerManager ledgerManager, + LedgerUnderreplicationManager ledgerUnderreplicationManager, + ShutdownTaskHandler shutdownTaskHandler, + BiConsumer hasAuditCheckTask) { + super(conf, auditorStats, admin, ledgerManager, + ledgerUnderreplicationManager, shutdownTaskHandler, hasAuditCheckTask); + this.zkOpTimeoutMs = conf.getZkTimeout() * 2; + this.numLedgersFoundHavingNoReplicaOfAnEntry = new AtomicInteger(0); + this.numLedgersFoundHavingLessThanAQReplicasOfAnEntry = new AtomicInteger(0); + this.numLedgersFoundHavingLessThanWQReplicasOfAnEntry = new AtomicInteger(0); + } + + @Override + protected void runTask() { + if (hasBookieCheckTask()) { + LOG.info("Audit bookie task already scheduled; skipping periodic replicas check task"); + auditorStats.getNumSkippingCheckTaskTimes().inc(); + return; + } + + try { + if (!ledgerUnderreplicationManager.isLedgerReplicationEnabled()) { + LOG.info("Ledger replication disabled, skipping replicasCheck task."); + return; + } + Stopwatch stopwatch = Stopwatch.createStarted(); + LOG.info("Starting ReplicasCheck"); + replicasCheck(); + long replicasCheckDuration = stopwatch.stop().elapsed(TimeUnit.MILLISECONDS); + int numLedgersFoundHavingNoReplicaOfAnEntryValue = + numLedgersFoundHavingNoReplicaOfAnEntry.get(); + int numLedgersFoundHavingLessThanAQReplicasOfAnEntryValue = + numLedgersFoundHavingLessThanAQReplicasOfAnEntry.get(); + int numLedgersFoundHavingLessThanWQReplicasOfAnEntryValue = + numLedgersFoundHavingLessThanWQReplicasOfAnEntry.get(); + LOG.info( + "Completed ReplicasCheck in {} milliSeconds numLedgersFoundHavingNoReplicaOfAnEntry {}" + + " numLedgersFoundHavingLessThanAQReplicasOfAnEntry {}" + + " numLedgersFoundHavingLessThanWQReplicasOfAnEntry {}.", + replicasCheckDuration, numLedgersFoundHavingNoReplicaOfAnEntryValue, + numLedgersFoundHavingLessThanAQReplicasOfAnEntryValue, + numLedgersFoundHavingLessThanWQReplicasOfAnEntryValue); + auditorStats.getNumLedgersHavingNoReplicaOfAnEntryGuageValue() + .set(numLedgersFoundHavingNoReplicaOfAnEntryValue); + auditorStats.getNumLedgersHavingLessThanAQReplicasOfAnEntryGuageValue() + .set(numLedgersFoundHavingLessThanAQReplicasOfAnEntryValue); + auditorStats.getNumLedgersHavingLessThanWQReplicasOfAnEntryGuageValue() + .set(numLedgersFoundHavingLessThanWQReplicasOfAnEntryValue); + auditorStats.getReplicasCheckTime().registerSuccessfulEvent( + replicasCheckDuration, TimeUnit.MILLISECONDS); + } catch (ReplicationException.BKAuditException e) { + LOG.error("BKAuditException running periodic replicas check.", e); + int numLedgersFoundHavingNoReplicaOfAnEntryValue = + numLedgersFoundHavingNoReplicaOfAnEntry.get(); + if (numLedgersFoundHavingNoReplicaOfAnEntryValue > 0) { + /* + * Though there is BKAuditException while doing + * replicasCheck, it found few ledgers having no replica + * of an entry. So reporting it. + */ + auditorStats.getNumLedgersHavingNoReplicaOfAnEntryGuageValue() + .set(numLedgersFoundHavingNoReplicaOfAnEntryValue); + } + int numLedgersFoundHavingLessThanAQReplicasOfAnEntryValue = + numLedgersFoundHavingLessThanAQReplicasOfAnEntry.get(); + if (numLedgersFoundHavingLessThanAQReplicasOfAnEntryValue > 0) { + /* + * Though there is BKAuditException while doing + * replicasCheck, it found few ledgers having an entry + * less than AQ num of Replicas. So reporting it. + */ + auditorStats.getNumLedgersHavingLessThanAQReplicasOfAnEntryGuageValue() + .set(numLedgersFoundHavingLessThanAQReplicasOfAnEntryValue); + } + int numLedgersFoundHavingLessThanWQReplicasOfAnEntryValue = + numLedgersFoundHavingLessThanWQReplicasOfAnEntry.get(); + if (numLedgersFoundHavingLessThanWQReplicasOfAnEntryValue > 0) { + /* + * Though there is BKAuditException while doing + * replicasCheck, it found few ledgers having an entry + * less than WQ num of Replicas. So reporting it. + */ + auditorStats.getNumLedgersHavingLessThanWQReplicasOfAnEntryGuageValue() + .set(numLedgersFoundHavingLessThanWQReplicasOfAnEntryValue); + } + } catch (ReplicationException.UnavailableException ue) { + LOG.error("Underreplication manager unavailable running periodic check", ue); + } + } + + @Override + public void shutdown() { + + } + + void replicasCheck() throws ReplicationException.BKAuditException { + ConcurrentHashMap ledgersWithMissingEntries = + new ConcurrentHashMap(); + ConcurrentHashMap ledgersWithUnavailableBookies = + new ConcurrentHashMap(); + LedgerManager.LedgerRangeIterator ledgerRangeIterator = ledgerManager.getLedgerRanges(zkOpTimeoutMs); + final Semaphore maxConcurrentSemaphore = new Semaphore(MAX_CONCURRENT_REPLICAS_CHECK_LEDGER_REQUESTS); + while (true) { + LedgerManager.LedgerRange ledgerRange = null; + try { + if (ledgerRangeIterator.hasNext()) { + ledgerRange = ledgerRangeIterator.next(); + } else { + break; + } + } catch (IOException ioe) { + LOG.error("Got IOException while iterating LedgerRangeIterator", ioe); + throw new ReplicationException.BKAuditException( + "Got IOException while iterating LedgerRangeIterator", ioe); + } + ledgersWithMissingEntries.clear(); + ledgersWithUnavailableBookies.clear(); + numLedgersFoundHavingNoReplicaOfAnEntry.set(0); + numLedgersFoundHavingLessThanAQReplicasOfAnEntry.set(0); + numLedgersFoundHavingLessThanWQReplicasOfAnEntry.set(0); + Set ledgersInRange = ledgerRange.getLedgers(); + int numOfLedgersInRange = ledgersInRange.size(); + // Final result after processing all the ledgers + final AtomicInteger resultCode = new AtomicInteger(); + final CountDownLatch replicasCheckLatch = new CountDownLatch(1); + + ReplicasCheckFinalCallback finalCB = new ReplicasCheckFinalCallback(resultCode, replicasCheckLatch); + MultiCallback mcbForThisLedgerRange = new MultiCallback(numOfLedgersInRange, finalCB, null, + BKException.Code.OK, BKException.Code.ReadException) { + @Override + public void processResult(int rc, String path, Object ctx) { + try { + super.processResult(rc, path, ctx); + } finally { + maxConcurrentSemaphore.release(); + } + } + }; + if (LOG.isDebugEnabled()) { + LOG.debug("Number of ledgers in the current LedgerRange : {}", + numOfLedgersInRange); + } + for (Long ledgerInRange : ledgersInRange) { + try { + if (!maxConcurrentSemaphore.tryAcquire(REPLICAS_CHECK_TIMEOUT_IN_SECS, TimeUnit.SECONDS)) { + LOG.error("Timedout ({} secs) while waiting for acquiring semaphore", + REPLICAS_CHECK_TIMEOUT_IN_SECS); + throw new ReplicationException.BKAuditException( + "Timedout while waiting for acquiring semaphore"); + } + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + LOG.error("Got InterruptedException while acquiring semaphore for replicascheck", ie); + throw new ReplicationException.BKAuditException( + "Got InterruptedException while acquiring semaphore for replicascheck", ie); + } + if (checkUnderReplicationForReplicasCheck(ledgerInRange, mcbForThisLedgerRange)) { + /* + * if ledger is marked underreplicated, then ignore this + * ledger for replicascheck. + */ + continue; + } + ledgerManager.readLedgerMetadata(ledgerInRange) + .whenComplete(new ReadLedgerMetadataCallbackForReplicasCheck(ledgerInRange, + mcbForThisLedgerRange, ledgersWithMissingEntries, ledgersWithUnavailableBookies)); + } + try { + /* + * if mcbForThisLedgerRange is not calledback within + * REPLICAS_CHECK_TIMEOUT_IN_SECS secs then better give up + * doing replicascheck, since there could be an issue and + * blocking the single threaded auditor executor thread is not + * expected. + */ + if (!replicasCheckLatch.await(REPLICAS_CHECK_TIMEOUT_IN_SECS, TimeUnit.SECONDS)) { + LOG.error( + "For LedgerRange with num of ledgers : {} it didn't complete replicascheck" + + " in {} secs, so giving up", + numOfLedgersInRange, REPLICAS_CHECK_TIMEOUT_IN_SECS); + throw new ReplicationException.BKAuditException( + "Got InterruptedException while doing replicascheck"); + } + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + LOG.error("Got InterruptedException while doing replicascheck", ie); + throw new ReplicationException.BKAuditException( + "Got InterruptedException while doing replicascheck", ie); + } + reportLedgersWithMissingEntries(ledgersWithMissingEntries); + reportLedgersWithUnavailableBookies(ledgersWithUnavailableBookies); + int resultCodeIntValue = resultCode.get(); + if (resultCodeIntValue != BKException.Code.OK) { + throw new ReplicationException.BKAuditException("Exception while doing replicas check", + BKException.create(resultCodeIntValue)); + } + } + try { + ledgerUnderreplicationManager.setReplicasCheckCTime(System.currentTimeMillis()); + } catch (ReplicationException.NonRecoverableReplicationException nre) { + LOG.error("Non Recoverable Exception while reading from ZK", nre); + submitShutdownTask(); + } catch (ReplicationException.UnavailableException ue) { + LOG.error("Got exception while trying to set ReplicasCheckCTime", ue); + } + } + + private static class MissingEntriesInfo { + // ledger id of missing entries + private final long ledgerId; + /* + * segment details, like start entryid of the segment and ensemble List. + */ + private final Entry> segmentEnsemble; + // bookie missing these entries + private final BookieId bookieMissingEntries; + /* + * entries of this segment which are supposed to contain in this bookie + * but missing in this bookie. + */ + private final List unavailableEntriesList; + + private MissingEntriesInfo(long ledgerId, Entry> segmentEnsemble, + BookieId bookieMissingEntries, List unavailableEntriesList) { + this.ledgerId = ledgerId; + this.segmentEnsemble = segmentEnsemble; + this.bookieMissingEntries = bookieMissingEntries; + this.unavailableEntriesList = unavailableEntriesList; + } + + private long getLedgerId() { + return ledgerId; + } + + private Entry> getSegmentEnsemble() { + return segmentEnsemble; + } + + private BookieId getBookieMissingEntries() { + return bookieMissingEntries; + } + + private List getUnavailableEntriesList() { + return unavailableEntriesList; + } + } + + private static class MissingEntriesInfoOfLedger { + private final long ledgerId; + private final int ensembleSize; + private final int writeQuorumSize; + private final int ackQuorumSize; + private final List missingEntriesInfoList; + + private MissingEntriesInfoOfLedger(long ledgerId, int ensembleSize, int writeQuorumSize, int ackQuorumSize, + List missingEntriesInfoList) { + this.ledgerId = ledgerId; + this.ensembleSize = ensembleSize; + this.writeQuorumSize = writeQuorumSize; + this.ackQuorumSize = ackQuorumSize; + this.missingEntriesInfoList = missingEntriesInfoList; + } + + private long getLedgerId() { + return ledgerId; + } + + private int getEnsembleSize() { + return ensembleSize; + } + + private int getWriteQuorumSize() { + return writeQuorumSize; + } + + private int getAckQuorumSize() { + return ackQuorumSize; + } + + private List getMissingEntriesInfoList() { + return missingEntriesInfoList; + } + } + + private class ReadLedgerMetadataCallbackForReplicasCheck + implements BiConsumer, Throwable> { + private final long ledgerInRange; + private final MultiCallback mcbForThisLedgerRange; + private final ConcurrentHashMap ledgersWithMissingEntries; + private final ConcurrentHashMap ledgersWithUnavailableBookies; + + ReadLedgerMetadataCallbackForReplicasCheck( + long ledgerInRange, + MultiCallback mcbForThisLedgerRange, + ConcurrentHashMap ledgersWithMissingEntries, + ConcurrentHashMap ledgersWithUnavailableBookies) { + this.ledgerInRange = ledgerInRange; + this.mcbForThisLedgerRange = mcbForThisLedgerRange; + this.ledgersWithMissingEntries = ledgersWithMissingEntries; + this.ledgersWithUnavailableBookies = ledgersWithUnavailableBookies; + } + + @Override + public void accept(Versioned metadataVer, Throwable exception) { + if (exception != null) { + if (BKException + .getExceptionCode(exception) == BKException.Code.NoSuchLedgerExistsOnMetadataServerException) { + if (LOG.isDebugEnabled()) { + LOG.debug("Ignoring replicas check of already deleted ledger {}", + ledgerInRange); + } + mcbForThisLedgerRange.processResult(BKException.Code.OK, null, null); + return; + } else { + LOG.warn("Unable to read the ledger: {} information", ledgerInRange, exception); + mcbForThisLedgerRange.processResult(BKException.getExceptionCode(exception), null, null); + return; + } + } + + LedgerMetadata metadata = metadataVer.getValue(); + if (!metadata.isClosed()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Ledger: {} is not yet closed, " + + "so skipping the replicas check analysis for now", + ledgerInRange); + } + mcbForThisLedgerRange.processResult(BKException.Code.OK, null, null); + return; + } + + final long lastEntryId = metadata.getLastEntryId(); + if (lastEntryId == -1) { + if (LOG.isDebugEnabled()) { + LOG.debug("Ledger: {} is closed but it doesn't has any entries, " + + "so skipping the replicas check", ledgerInRange); + } + mcbForThisLedgerRange.processResult(BKException.Code.OK, null, null); + return; + } + + int writeQuorumSize = metadata.getWriteQuorumSize(); + int ackQuorumSize = metadata.getAckQuorumSize(); + int ensembleSize = metadata.getEnsembleSize(); + RoundRobinDistributionSchedule distributionSchedule = new RoundRobinDistributionSchedule(writeQuorumSize, + ackQuorumSize, ensembleSize); + List>> segments = new LinkedList<>( + metadata.getAllEnsembles().entrySet()); + /* + * since there are multiple segments, MultiCallback should be + * created for (ensembleSize * segments.size()) calls. + */ + MultiCallback mcbForThisLedger = new MultiCallback(ensembleSize * segments.size(), + mcbForThisLedgerRange, null, BKException.Code.OK, BKException.Code.ReadException); + HashMap> bookiesSegmentInfoMap = + new HashMap>(); + for (int segmentNum = 0; segmentNum < segments.size(); segmentNum++) { + final Entry> segmentEnsemble = segments.get(segmentNum); + final List ensembleOfSegment = segmentEnsemble.getValue(); + final long startEntryIdOfSegment = segmentEnsemble.getKey(); + final boolean lastSegment = (segmentNum == (segments.size() - 1)); + final long lastEntryIdOfSegment = lastSegment ? lastEntryId + : segments.get(segmentNum + 1).getKey() - 1; + /* + * Segment can be empty. If last segment is empty, then + * startEntryIdOfSegment of it will be greater than lastEntryId + * of the ledger. If the segment in middle is empty, then its + * startEntry will be same as startEntry of the following + * segment. + */ + final boolean emptySegment = lastSegment ? (startEntryIdOfSegment > lastEntryId) + : (startEntryIdOfSegment == segments.get(segmentNum + 1).getKey()); + for (int bookieIndex = 0; bookieIndex < ensembleOfSegment.size(); bookieIndex++) { + final BookieId bookieInEnsemble = ensembleOfSegment.get(bookieIndex); + final BitSet entriesStripedToThisBookie = emptySegment ? EMPTY_BITSET + : distributionSchedule.getEntriesStripedToTheBookie(bookieIndex, startEntryIdOfSegment, + lastEntryIdOfSegment); + if (entriesStripedToThisBookie.cardinality() == 0) { + /* + * if no entry is expected to contain in this bookie, + * then there is no point in making + * getListOfEntriesOfLedger call for this bookie. So + * instead callback with success result. + */ + if (LOG.isDebugEnabled()) { + LOG.debug( + "For ledger: {}, in Segment: {}, no entry is expected to contain in" + + " this bookie: {}. So skipping getListOfEntriesOfLedger call", + ledgerInRange, segmentEnsemble, bookieInEnsemble); + } + mcbForThisLedger.processResult(BKException.Code.OK, null, null); + continue; + } + List bookieSegmentInfoList = bookiesSegmentInfoMap + .get(bookieInEnsemble); + if (bookieSegmentInfoList == null) { + bookieSegmentInfoList = new ArrayList(); + bookiesSegmentInfoMap.put(bookieInEnsemble, bookieSegmentInfoList); + } + bookieSegmentInfoList.add(new BookieExpectedToContainSegmentInfo(startEntryIdOfSegment, + lastEntryIdOfSegment, segmentEnsemble, entriesStripedToThisBookie)); + } + } + for (Entry> bookiesSegmentInfoTuple : + bookiesSegmentInfoMap.entrySet()) { + final BookieId bookieInEnsemble = bookiesSegmentInfoTuple.getKey(); + final List bookieSegmentInfoList = bookiesSegmentInfoTuple + .getValue(); + admin.asyncGetListOfEntriesOfLedger(bookieInEnsemble, ledgerInRange) + .whenComplete(new GetListOfEntriesOfLedgerCallbackForReplicasCheck(ledgerInRange, ensembleSize, + writeQuorumSize, ackQuorumSize, bookieInEnsemble, bookieSegmentInfoList, + ledgersWithMissingEntries, ledgersWithUnavailableBookies, mcbForThisLedger)); + } + } + } + + private static class BookieExpectedToContainSegmentInfo { + private final long startEntryIdOfSegment; + private final long lastEntryIdOfSegment; + private final Entry> segmentEnsemble; + private final BitSet entriesOfSegmentStripedToThisBookie; + + private BookieExpectedToContainSegmentInfo(long startEntryIdOfSegment, long lastEntryIdOfSegment, + Entry> segmentEnsemble, + BitSet entriesOfSegmentStripedToThisBookie) { + this.startEntryIdOfSegment = startEntryIdOfSegment; + this.lastEntryIdOfSegment = lastEntryIdOfSegment; + this.segmentEnsemble = segmentEnsemble; + this.entriesOfSegmentStripedToThisBookie = entriesOfSegmentStripedToThisBookie; + } + + public long getStartEntryIdOfSegment() { + return startEntryIdOfSegment; + } + + public long getLastEntryIdOfSegment() { + return lastEntryIdOfSegment; + } + + public Entry> getSegmentEnsemble() { + return segmentEnsemble; + } + + public BitSet getEntriesOfSegmentStripedToThisBookie() { + return entriesOfSegmentStripedToThisBookie; + } + } + + private static class GetListOfEntriesOfLedgerCallbackForReplicasCheck + implements BiConsumer { + private final long ledgerInRange; + private final int ensembleSize; + private final int writeQuorumSize; + private final int ackQuorumSize; + private final BookieId bookieInEnsemble; + private final List bookieExpectedToContainSegmentInfoList; + private final ConcurrentHashMap ledgersWithMissingEntries; + private final ConcurrentHashMap ledgersWithUnavailableBookies; + private final MultiCallback mcbForThisLedger; + + private GetListOfEntriesOfLedgerCallbackForReplicasCheck( + long ledgerInRange, + int ensembleSize, + int writeQuorumSize, + int ackQuorumSize, + BookieId bookieInEnsemble, + List bookieExpectedToContainSegmentInfoList, + ConcurrentHashMap ledgersWithMissingEntries, + ConcurrentHashMap ledgersWithUnavailableBookies, + MultiCallback mcbForThisLedger) { + this.ledgerInRange = ledgerInRange; + this.ensembleSize = ensembleSize; + this.writeQuorumSize = writeQuorumSize; + this.ackQuorumSize = ackQuorumSize; + this.bookieInEnsemble = bookieInEnsemble; + this.bookieExpectedToContainSegmentInfoList = bookieExpectedToContainSegmentInfoList; + this.ledgersWithMissingEntries = ledgersWithMissingEntries; + this.ledgersWithUnavailableBookies = ledgersWithUnavailableBookies; + this.mcbForThisLedger = mcbForThisLedger; + } + + @Override + public void accept(AvailabilityOfEntriesOfLedger availabilityOfEntriesOfLedger, + Throwable listOfEntriesException) { + + if (listOfEntriesException != null) { + if (BKException + .getExceptionCode(listOfEntriesException) == BKException.Code.NoSuchLedgerExistsException) { + if (LOG.isDebugEnabled()) { + LOG.debug("Got NoSuchLedgerExistsException for ledger: {} from bookie: {}", + ledgerInRange, bookieInEnsemble); + } + /* + * in the case of NoSuchLedgerExistsException, it should be + * considered as empty AvailabilityOfEntriesOfLedger. + */ + availabilityOfEntriesOfLedger = AvailabilityOfEntriesOfLedger.EMPTY_AVAILABILITYOFENTRIESOFLEDGER; + } else { + LOG.warn("Unable to GetListOfEntriesOfLedger for ledger: {} from: {}", ledgerInRange, + bookieInEnsemble, listOfEntriesException); + MissingEntriesInfoOfLedger unavailableBookiesInfoOfThisLedger = ledgersWithUnavailableBookies + .get(ledgerInRange); + if (unavailableBookiesInfoOfThisLedger == null) { + ledgersWithUnavailableBookies.putIfAbsent(ledgerInRange, + new MissingEntriesInfoOfLedger(ledgerInRange, ensembleSize, writeQuorumSize, + ackQuorumSize, + Collections.synchronizedList(new ArrayList()))); + unavailableBookiesInfoOfThisLedger = ledgersWithUnavailableBookies.get(ledgerInRange); + } + List missingEntriesInfoList = + unavailableBookiesInfoOfThisLedger.getMissingEntriesInfoList(); + for (BookieExpectedToContainSegmentInfo bookieExpectedToContainSegmentInfo + : bookieExpectedToContainSegmentInfoList) { + missingEntriesInfoList.add( + new MissingEntriesInfo(ledgerInRange, + bookieExpectedToContainSegmentInfo.getSegmentEnsemble(), + bookieInEnsemble, null)); + /* + * though GetListOfEntriesOfLedger has failed with + * exception, mcbForThisLedger should be called back + * with OK response, because we dont consider this as + * fatal error in replicasCheck and dont want + * replicasCheck to exit just because of this issue. So + * instead maintain the state of + * ledgersWithUnavailableBookies, so that replicascheck + * will report these ledgers/bookies appropriately. + */ + mcbForThisLedger.processResult(BKException.Code.OK, null, null); + } + return; + } + } + + for (BookieExpectedToContainSegmentInfo bookieExpectedToContainSegmentInfo + : bookieExpectedToContainSegmentInfoList) { + final long startEntryIdOfSegment = bookieExpectedToContainSegmentInfo.getStartEntryIdOfSegment(); + final long lastEntryIdOfSegment = bookieExpectedToContainSegmentInfo.getLastEntryIdOfSegment(); + final BitSet entriesStripedToThisBookie = bookieExpectedToContainSegmentInfo + .getEntriesOfSegmentStripedToThisBookie(); + final Entry> segmentEnsemble = + bookieExpectedToContainSegmentInfo.getSegmentEnsemble(); + final List unavailableEntriesList = availabilityOfEntriesOfLedger + .getUnavailableEntries(startEntryIdOfSegment, + lastEntryIdOfSegment, entriesStripedToThisBookie); + if ((unavailableEntriesList != null) && (!unavailableEntriesList.isEmpty())) { + MissingEntriesInfoOfLedger missingEntriesInfoOfThisLedger = ledgersWithMissingEntries + .get(ledgerInRange); + if (missingEntriesInfoOfThisLedger == null) { + ledgersWithMissingEntries.putIfAbsent(ledgerInRange, + new MissingEntriesInfoOfLedger(ledgerInRange, ensembleSize, writeQuorumSize, + ackQuorumSize, + Collections.synchronizedList(new ArrayList()))); + missingEntriesInfoOfThisLedger = ledgersWithMissingEntries.get(ledgerInRange); + } + missingEntriesInfoOfThisLedger.getMissingEntriesInfoList().add( + new MissingEntriesInfo(ledgerInRange, segmentEnsemble, + bookieInEnsemble, unavailableEntriesList)); + } + /* + * here though unavailableEntriesList is not empty, + * mcbForThisLedger should be called back with OK response, + * because we dont consider this as fatal error in replicasCheck + * and dont want replicasCheck to exit just because of this + * issue. So instead maintain the state of + * missingEntriesInfoOfThisLedger, so that replicascheck will + * report these ledgers/bookies/missingentries appropriately. + */ + mcbForThisLedger.processResult(BKException.Code.OK, null, null); + } + } + } + + private static class ReplicasCheckFinalCallback implements AsyncCallback.VoidCallback { + final AtomicInteger resultCode; + final CountDownLatch replicasCheckLatch; + + private ReplicasCheckFinalCallback(AtomicInteger resultCode, CountDownLatch replicasCheckLatch) { + this.resultCode = resultCode; + this.replicasCheckLatch = replicasCheckLatch; + } + + @Override + public void processResult(int rc, String s, Object obj) { + resultCode.set(rc); + replicasCheckLatch.countDown(); + } + } + + private void reportLedgersWithMissingEntries( + ConcurrentHashMap ledgersWithMissingEntries) { + StringBuilder errMessage = new StringBuilder(); + HashMultiset missingEntries = HashMultiset.create(); + int writeQuorumSize; + int ackQuorumSize; + for (Map.Entry missingEntriesInfoOfLedgerEntry : ledgersWithMissingEntries + .entrySet()) { + missingEntries.clear(); + errMessage.setLength(0); + long ledgerWithMissingEntries = missingEntriesInfoOfLedgerEntry.getKey(); + MissingEntriesInfoOfLedger missingEntriesInfoOfLedger = missingEntriesInfoOfLedgerEntry.getValue(); + List missingEntriesInfoList = missingEntriesInfoOfLedger.getMissingEntriesInfoList(); + writeQuorumSize = missingEntriesInfoOfLedger.getWriteQuorumSize(); + ackQuorumSize = missingEntriesInfoOfLedger.getAckQuorumSize(); + errMessage.append("Ledger : " + ledgerWithMissingEntries + " has following missing entries : "); + for (int listInd = 0; listInd < missingEntriesInfoList.size(); listInd++) { + MissingEntriesInfo missingEntriesInfo = missingEntriesInfoList.get(listInd); + List unavailableEntriesList = missingEntriesInfo.getUnavailableEntriesList(); + Entry> segmentEnsemble = + missingEntriesInfo.getSegmentEnsemble(); + missingEntries.addAll(unavailableEntriesList); + errMessage.append("In segment starting at " + segmentEnsemble.getKey() + " with ensemble " + + segmentEnsemble.getValue() + ", following entries " + unavailableEntriesList + + " are missing in bookie: " + missingEntriesInfo.getBookieMissingEntries()); + if (listInd < (missingEntriesInfoList.size() - 1)) { + errMessage.append(", "); + } + } + LOG.error(errMessage.toString()); + Set> missingEntriesSet = missingEntries.entrySet(); + int maxNumOfMissingReplicas = 0; + long entryWithMaxNumOfMissingReplicas = -1L; + for (Multiset.Entry missingEntryWithCount : missingEntriesSet) { + if (missingEntryWithCount.getCount() > maxNumOfMissingReplicas) { + maxNumOfMissingReplicas = missingEntryWithCount.getCount(); + entryWithMaxNumOfMissingReplicas = missingEntryWithCount.getElement(); + } + } + int leastNumOfReplicasOfAnEntry = writeQuorumSize - maxNumOfMissingReplicas; + if (leastNumOfReplicasOfAnEntry == 0) { + numLedgersFoundHavingNoReplicaOfAnEntry.incrementAndGet(); + LOG.error("Ledger : {} entryId : {} is missing all replicas", ledgerWithMissingEntries, + entryWithMaxNumOfMissingReplicas); + } else if (leastNumOfReplicasOfAnEntry < ackQuorumSize) { + numLedgersFoundHavingLessThanAQReplicasOfAnEntry.incrementAndGet(); + LOG.error("Ledger : {} entryId : {} is having: {} replicas, less than ackQuorum num of replicas : {}", + ledgerWithMissingEntries, entryWithMaxNumOfMissingReplicas, leastNumOfReplicasOfAnEntry, + ackQuorumSize); + } else if (leastNumOfReplicasOfAnEntry < writeQuorumSize) { + numLedgersFoundHavingLessThanWQReplicasOfAnEntry.incrementAndGet(); + LOG.error("Ledger : {} entryId : {} is having: {} replicas, less than writeQuorum num of replicas : {}", + ledgerWithMissingEntries, entryWithMaxNumOfMissingReplicas, leastNumOfReplicasOfAnEntry, + writeQuorumSize); + } + } + } + + private void reportLedgersWithUnavailableBookies( + ConcurrentHashMap ledgersWithUnavailableBookies) { + StringBuilder errMessage = new StringBuilder(); + for (Map.Entry ledgerWithUnavailableBookiesInfo : + ledgersWithUnavailableBookies.entrySet()) { + errMessage.setLength(0); + long ledgerWithUnavailableBookies = ledgerWithUnavailableBookiesInfo.getKey(); + List missingBookiesInfoList = ledgerWithUnavailableBookiesInfo.getValue() + .getMissingEntriesInfoList(); + errMessage.append("Ledger : " + ledgerWithUnavailableBookies + " has following unavailable bookies : "); + for (int listInd = 0; listInd < missingBookiesInfoList.size(); listInd++) { + MissingEntriesInfo missingBookieInfo = missingBookiesInfoList.get(listInd); + Entry> segmentEnsemble = + missingBookieInfo.getSegmentEnsemble(); + errMessage.append("In segment starting at " + segmentEnsemble.getKey() + " with ensemble " + + segmentEnsemble.getValue() + ", following bookie has not responded " + + missingBookieInfo.getBookieMissingEntries()); + if (listInd < (missingBookiesInfoList.size() - 1)) { + errMessage.append(", "); + } + } + LOG.error(errMessage.toString()); + } + } + + boolean checkUnderReplicationForReplicasCheck(long ledgerInRange, VoidCallback mcbForThisLedgerRange) { + try { + if (ledgerUnderreplicationManager.getLedgerUnreplicationInfo(ledgerInRange) == null) { + return false; + } + /* + * this ledger is marked underreplicated, so ignore it for + * replicasCheck. + */ + if (LOG.isDebugEnabled()) { + LOG.debug("Ledger: {} is marked underrreplicated, ignore this ledger for replicasCheck", + ledgerInRange); + } + mcbForThisLedgerRange.processResult(BKException.Code.OK, null, null); + return true; + } catch (ReplicationException.NonRecoverableReplicationException nre) { + LOG.error("Non Recoverable Exception while reading from ZK", nre); + submitShutdownTask(); + return true; + } catch (ReplicationException.UnavailableException une) { + LOG.error("Got exception while trying to check if ledger: {} is underreplicated", ledgerInRange, une); + mcbForThisLedgerRange.processResult(BKException.getExceptionCode(une), null, null); + return true; + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorStats.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorStats.java new file mode 100644 index 00000000000..6b973dc403a --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorStats.java @@ -0,0 +1,276 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.replication; + +import static org.apache.bookkeeper.replication.ReplicationStats.AUDITOR_SCOPE; +import static org.apache.bookkeeper.replication.ReplicationStats.AUDIT_BOOKIES_TIME; +import static org.apache.bookkeeper.replication.ReplicationStats.BOOKIE_TO_LEDGERS_MAP_CREATION_TIME; +import static org.apache.bookkeeper.replication.ReplicationStats.CHECK_ALL_LEDGERS_TIME; +import static org.apache.bookkeeper.replication.ReplicationStats.NUM_BOOKIES_PER_LEDGER; +import static org.apache.bookkeeper.replication.ReplicationStats.NUM_BOOKIE_AUDITS_DELAYED; +import static org.apache.bookkeeper.replication.ReplicationStats.NUM_DELAYED_BOOKIE_AUDITS_DELAYES_CANCELLED; +import static org.apache.bookkeeper.replication.ReplicationStats.NUM_FRAGMENTS_PER_LEDGER; +import static org.apache.bookkeeper.replication.ReplicationStats.NUM_LEDGERS_CHECKED; +import static org.apache.bookkeeper.replication.ReplicationStats.NUM_LEDGERS_HAVING_LESS_THAN_AQ_REPLICAS_OF_AN_ENTRY; +import static org.apache.bookkeeper.replication.ReplicationStats.NUM_LEDGERS_HAVING_LESS_THAN_WQ_REPLICAS_OF_AN_ENTRY; +import static org.apache.bookkeeper.replication.ReplicationStats.NUM_LEDGERS_HAVING_NO_REPLICA_OF_AN_ENTRY; +import static org.apache.bookkeeper.replication.ReplicationStats.NUM_LEDGERS_NOT_ADHERING_TO_PLACEMENT_POLICY; +import static org.apache.bookkeeper.replication.ReplicationStats.NUM_LEDGERS_SOFTLY_ADHERING_TO_PLACEMENT_POLICY; +import static org.apache.bookkeeper.replication.ReplicationStats.NUM_SKIPPING_CHECK_TASK_TIMES; +import static org.apache.bookkeeper.replication.ReplicationStats.NUM_UNDERREPLICATED_LEDGERS_ELAPSED_RECOVERY_GRACE_PERIOD; +import static org.apache.bookkeeper.replication.ReplicationStats.NUM_UNDER_REPLICATED_LEDGERS; +import static org.apache.bookkeeper.replication.ReplicationStats.PLACEMENT_POLICY_CHECK_TIME; +import static org.apache.bookkeeper.replication.ReplicationStats.REPLICAS_CHECK_TIME; +import static org.apache.bookkeeper.replication.ReplicationStats.UNDER_REPLICATED_LEDGERS_TOTAL_SIZE; +import static org.apache.bookkeeper.replication.ReplicationStats.URL_PUBLISH_TIME_FOR_LOST_BOOKIE; + +import java.util.concurrent.atomic.AtomicInteger; +import lombok.Getter; +import org.apache.bookkeeper.stats.Counter; +import org.apache.bookkeeper.stats.Gauge; +import org.apache.bookkeeper.stats.OpStatsLogger; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.annotations.StatsDoc; + +@StatsDoc( + name = AUDITOR_SCOPE, + help = "Auditor related stats" +) +@Getter +public class AuditorStats { + + private final AtomicInteger ledgersNotAdheringToPlacementPolicyGuageValue; + private final AtomicInteger ledgersSoftlyAdheringToPlacementPolicyGuageValue; + private final AtomicInteger numOfURLedgersElapsedRecoveryGracePeriodGuageValue; + private final AtomicInteger numLedgersHavingNoReplicaOfAnEntryGuageValue; + private final AtomicInteger numLedgersHavingLessThanAQReplicasOfAnEntryGuageValue; + private final AtomicInteger numLedgersHavingLessThanWQReplicasOfAnEntryGuageValue; + private final AtomicInteger underReplicatedLedgersGuageValue; + private final StatsLogger statsLogger; + @StatsDoc( + name = NUM_UNDER_REPLICATED_LEDGERS, + help = "the distribution of num under_replicated ledgers on each auditor run" + ) + private final OpStatsLogger numUnderReplicatedLedger; + + @StatsDoc( + name = UNDER_REPLICATED_LEDGERS_TOTAL_SIZE, + help = "the distribution of under_replicated ledgers total size on each auditor run" + ) + private final OpStatsLogger underReplicatedLedgerTotalSize; + @StatsDoc( + name = URL_PUBLISH_TIME_FOR_LOST_BOOKIE, + help = "the latency distribution of publishing under replicated ledgers for lost bookies" + ) + private final OpStatsLogger uRLPublishTimeForLostBookies; + @StatsDoc( + name = BOOKIE_TO_LEDGERS_MAP_CREATION_TIME, + help = "the latency distribution of creating bookies-to-ledgers map" + ) + private final OpStatsLogger bookieToLedgersMapCreationTime; + @StatsDoc( + name = CHECK_ALL_LEDGERS_TIME, + help = "the latency distribution of checking all ledgers" + ) + private final OpStatsLogger checkAllLedgersTime; + @StatsDoc( + name = PLACEMENT_POLICY_CHECK_TIME, + help = "the latency distribution of placementPolicy check" + ) + private final OpStatsLogger placementPolicyCheckTime; + @StatsDoc( + name = REPLICAS_CHECK_TIME, + help = "the latency distribution of replicas check" + ) + private final OpStatsLogger replicasCheckTime; + @StatsDoc( + name = AUDIT_BOOKIES_TIME, + help = "the latency distribution of auditing all the bookies" + ) + private final OpStatsLogger auditBookiesTime; + @StatsDoc( + name = NUM_LEDGERS_CHECKED, + help = "the number of ledgers checked by the auditor" + ) + private final Counter numLedgersChecked; + @StatsDoc( + name = NUM_FRAGMENTS_PER_LEDGER, + help = "the distribution of number of fragments per ledger" + ) + private final OpStatsLogger numFragmentsPerLedger; + @StatsDoc( + name = NUM_BOOKIES_PER_LEDGER, + help = "the distribution of number of bookies per ledger" + ) + private final OpStatsLogger numBookiesPerLedger; + @StatsDoc( + name = NUM_BOOKIE_AUDITS_DELAYED, + help = "the number of bookie-audits delayed" + ) + private final Counter numBookieAuditsDelayed; + @StatsDoc( + name = NUM_DELAYED_BOOKIE_AUDITS_DELAYES_CANCELLED, + help = "the number of delayed-bookie-audits cancelled" + ) + private final Counter numDelayedBookieAuditsCancelled; + @StatsDoc( + name = NUM_LEDGERS_NOT_ADHERING_TO_PLACEMENT_POLICY, + help = "Gauge for number of ledgers not adhering to placement policy found in placement policy check" + ) + private final Gauge numLedgersNotAdheringToPlacementPolicy; + @StatsDoc( + name = NUM_LEDGERS_SOFTLY_ADHERING_TO_PLACEMENT_POLICY, + help = "Gauge for number of ledgers softly adhering to placement policy found in placement policy check" + ) + private final Gauge numLedgersSoftlyAdheringToPlacementPolicy; + @StatsDoc( + name = NUM_UNDERREPLICATED_LEDGERS_ELAPSED_RECOVERY_GRACE_PERIOD, + help = "Gauge for number of underreplicated ledgers elapsed recovery grace period" + ) + private final Gauge numUnderreplicatedLedgersElapsedRecoveryGracePeriod; + @StatsDoc( + name = NUM_LEDGERS_HAVING_NO_REPLICA_OF_AN_ENTRY, + help = "Gauge for number of ledgers having an entry with all the replicas missing" + ) + private final Gauge numLedgersHavingNoReplicaOfAnEntry; + @StatsDoc( + name = NUM_LEDGERS_HAVING_LESS_THAN_AQ_REPLICAS_OF_AN_ENTRY, + help = "Gauge for number of ledgers having an entry with less than AQ number of replicas" + + ", this doesn't include ledgers counted towards numLedgersHavingNoReplicaOfAnEntry" + ) + private final Gauge numLedgersHavingLessThanAQReplicasOfAnEntry; + @StatsDoc( + name = NUM_LEDGERS_HAVING_LESS_THAN_WQ_REPLICAS_OF_AN_ENTRY, + help = "Gauge for number of ledgers having an entry with less than WQ number of replicas" + + ", this doesn't include ledgers counted towards numLedgersHavingLessThanAQReplicasOfAnEntry" + ) + private final Gauge numLedgersHavingLessThanWQReplicasOfAnEntry; + @StatsDoc( + name = NUM_SKIPPING_CHECK_TASK_TIMES, + help = "the times of auditor check task skipped" + ) + private final Counter numSkippingCheckTaskTimes; + + public AuditorStats(StatsLogger statsLogger) { + this.statsLogger = statsLogger; + this.ledgersNotAdheringToPlacementPolicyGuageValue = new AtomicInteger(0); + this.ledgersSoftlyAdheringToPlacementPolicyGuageValue = new AtomicInteger(0); + this.numOfURLedgersElapsedRecoveryGracePeriodGuageValue = new AtomicInteger(0); + this.numLedgersHavingNoReplicaOfAnEntryGuageValue = new AtomicInteger(0); + this.numLedgersHavingLessThanAQReplicasOfAnEntryGuageValue = new AtomicInteger(0); + this.numLedgersHavingLessThanWQReplicasOfAnEntryGuageValue = new AtomicInteger(0); + this.underReplicatedLedgersGuageValue = new AtomicInteger(0); + numUnderReplicatedLedger = this.statsLogger.getOpStatsLogger(ReplicationStats.NUM_UNDER_REPLICATED_LEDGERS); + underReplicatedLedgerTotalSize = this.statsLogger.getOpStatsLogger(UNDER_REPLICATED_LEDGERS_TOTAL_SIZE); + uRLPublishTimeForLostBookies = this.statsLogger + .getOpStatsLogger(ReplicationStats.URL_PUBLISH_TIME_FOR_LOST_BOOKIE); + bookieToLedgersMapCreationTime = this.statsLogger + .getOpStatsLogger(ReplicationStats.BOOKIE_TO_LEDGERS_MAP_CREATION_TIME); + checkAllLedgersTime = this.statsLogger.getOpStatsLogger(ReplicationStats.CHECK_ALL_LEDGERS_TIME); + placementPolicyCheckTime = this.statsLogger.getOpStatsLogger(ReplicationStats.PLACEMENT_POLICY_CHECK_TIME); + replicasCheckTime = this.statsLogger.getOpStatsLogger(ReplicationStats.REPLICAS_CHECK_TIME); + auditBookiesTime = this.statsLogger.getOpStatsLogger(ReplicationStats.AUDIT_BOOKIES_TIME); + numLedgersChecked = this.statsLogger.getCounter(ReplicationStats.NUM_LEDGERS_CHECKED); + numFragmentsPerLedger = this.statsLogger.getOpStatsLogger(ReplicationStats.NUM_FRAGMENTS_PER_LEDGER); + numBookiesPerLedger = this.statsLogger.getOpStatsLogger(ReplicationStats.NUM_BOOKIES_PER_LEDGER); + numBookieAuditsDelayed = this.statsLogger.getCounter(ReplicationStats.NUM_BOOKIE_AUDITS_DELAYED); + numDelayedBookieAuditsCancelled = this.statsLogger + .getCounter(ReplicationStats.NUM_DELAYED_BOOKIE_AUDITS_DELAYES_CANCELLED); + numSkippingCheckTaskTimes = this.statsLogger.getCounter(NUM_SKIPPING_CHECK_TASK_TIMES); + numLedgersNotAdheringToPlacementPolicy = new Gauge() { + @Override + public Integer getDefaultValue() { + return 0; + } + + @Override + public Integer getSample() { + return ledgersNotAdheringToPlacementPolicyGuageValue.get(); + } + }; + this.statsLogger.registerGauge(ReplicationStats.NUM_LEDGERS_NOT_ADHERING_TO_PLACEMENT_POLICY, + numLedgersNotAdheringToPlacementPolicy); + numLedgersSoftlyAdheringToPlacementPolicy = new Gauge() { + @Override + public Integer getDefaultValue() { + return 0; + } + + @Override + public Integer getSample() { + return ledgersSoftlyAdheringToPlacementPolicyGuageValue.get(); + } + }; + this.statsLogger.registerGauge(ReplicationStats.NUM_LEDGERS_SOFTLY_ADHERING_TO_PLACEMENT_POLICY, + numLedgersSoftlyAdheringToPlacementPolicy); + + numUnderreplicatedLedgersElapsedRecoveryGracePeriod = new Gauge() { + @Override + public Integer getDefaultValue() { + return 0; + } + + @Override + public Integer getSample() { + return numOfURLedgersElapsedRecoveryGracePeriodGuageValue.get(); + } + }; + this.statsLogger.registerGauge(ReplicationStats.NUM_UNDERREPLICATED_LEDGERS_ELAPSED_RECOVERY_GRACE_PERIOD, + numUnderreplicatedLedgersElapsedRecoveryGracePeriod); + + numLedgersHavingNoReplicaOfAnEntry = new Gauge() { + @Override + public Integer getDefaultValue() { + return 0; + } + + @Override + public Integer getSample() { + return numLedgersHavingNoReplicaOfAnEntryGuageValue.get(); + } + }; + this.statsLogger.registerGauge(ReplicationStats.NUM_LEDGERS_HAVING_NO_REPLICA_OF_AN_ENTRY, + numLedgersHavingNoReplicaOfAnEntry); + numLedgersHavingLessThanAQReplicasOfAnEntry = new Gauge() { + @Override + public Integer getDefaultValue() { + return 0; + } + + @Override + public Integer getSample() { + return numLedgersHavingLessThanAQReplicasOfAnEntryGuageValue.get(); + } + }; + this.statsLogger.registerGauge(ReplicationStats.NUM_LEDGERS_HAVING_LESS_THAN_AQ_REPLICAS_OF_AN_ENTRY, + numLedgersHavingLessThanAQReplicasOfAnEntry); + numLedgersHavingLessThanWQReplicasOfAnEntry = new Gauge() { + @Override + public Integer getDefaultValue() { + return 0; + } + + @Override + public Integer getSample() { + return numLedgersHavingLessThanWQReplicasOfAnEntryGuageValue.get(); + } + }; + this.statsLogger.registerGauge(ReplicationStats.NUM_LEDGERS_HAVING_LESS_THAN_WQ_REPLICAS_OF_AN_ENTRY, + numLedgersHavingLessThanWQReplicasOfAnEntry); + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorTask.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorTask.java new file mode 100644 index 00000000000..0ecfc2ffb29 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorTask.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.replication; + +import com.google.common.collect.Lists; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.LongAdder; +import java.util.function.BiConsumer; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeper; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; +import org.apache.bookkeeper.net.BookieId; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +abstract class AuditorTask implements Runnable { + private static final Logger LOG = LoggerFactory.getLogger(AuditorTask.class); + + protected final ServerConfiguration conf; + protected AuditorStats auditorStats; + protected BookKeeperAdmin admin; + protected LedgerManager ledgerManager; + protected LedgerUnderreplicationManager ledgerUnderreplicationManager; + private final ShutdownTaskHandler shutdownTaskHandler; + private final BiConsumer hasAuditCheckTask; + private final AtomicBoolean hasTask = new AtomicBoolean(false); + + AuditorTask(ServerConfiguration conf, + AuditorStats auditorStats, + BookKeeperAdmin admin, + LedgerManager ledgerManager, + LedgerUnderreplicationManager ledgerUnderreplicationManager, + ShutdownTaskHandler shutdownTaskHandler, + BiConsumer hasAuditCheckTask) { + this.conf = conf; + this.auditorStats = auditorStats; + this.admin = admin; + this.ledgerManager = ledgerManager; + this.ledgerUnderreplicationManager = ledgerUnderreplicationManager; + this.shutdownTaskHandler = shutdownTaskHandler; + this.hasAuditCheckTask = hasAuditCheckTask; + } + + @Override + public void run() { + runTask(); + } + + protected abstract void runTask(); + + protected boolean isLedgerReplicationEnabled() throws ReplicationException.UnavailableException { + return ledgerUnderreplicationManager.isLedgerReplicationEnabled(); + } + + protected CompletableFuture publishSuspectedLedgersAsync(Collection missingBookies, Set ledgers) { + if (null == ledgers || ledgers.size() == 0) { + // there is no ledgers available for this bookie and just + // ignoring the bookie failures + LOG.info("There is no ledgers for the failed bookie: {}", missingBookies); + return FutureUtils.Void(); + } + LOG.info("Following ledgers: {} of bookie: {} are identified as underreplicated", ledgers, missingBookies); + auditorStats.getNumUnderReplicatedLedger().registerSuccessfulValue(ledgers.size()); + LongAdder underReplicatedSize = new LongAdder(); + FutureUtils.processList( + Lists.newArrayList(ledgers), + ledgerId -> + ledgerManager.readLedgerMetadata(ledgerId).whenComplete((metadata, exception) -> { + if (exception == null) { + underReplicatedSize.add(metadata.getValue().getLength()); + } + }), null).whenComplete((res, e) -> { + auditorStats.getUnderReplicatedLedgerTotalSize().registerSuccessfulValue(underReplicatedSize.longValue()); + }); + + return FutureUtils.processList( + Lists.newArrayList(ledgers), + ledgerId -> ledgerUnderreplicationManager.markLedgerUnderreplicatedAsync(ledgerId, missingBookies), + null + ); + } + + protected List getAvailableBookies() throws BKException { + // Get the available bookies + Collection availableBkAddresses = admin.getAvailableBookies(); + Collection readOnlyBkAddresses = admin.getReadOnlyBookies(); + availableBkAddresses.addAll(readOnlyBkAddresses); + + List availableBookies = new ArrayList(); + for (BookieId addr : availableBkAddresses) { + availableBookies.add(addr.toString()); + } + return availableBookies; + } + + /** + * Get BookKeeper client according to configuration. + * @param conf + * @return + * @throws IOException + * @throws InterruptedException + */ + BookKeeper getBookKeeper(ServerConfiguration conf) throws IOException, InterruptedException { + return Auditor.createBookKeeperClient(conf); + } + + /** + * Get BookKeeper admin according to bookKeeper client. + * @param bookKeeper + * @return + */ + BookKeeperAdmin getBookKeeperAdmin(final BookKeeper bookKeeper) { + return new BookKeeperAdmin(bookKeeper, auditorStats.getStatsLogger(), new ClientConfiguration(conf)); + } + + protected void submitShutdownTask() { + if (shutdownTaskHandler != null) { + shutdownTaskHandler.submitShutdownTask(); + } + } + + public abstract void shutdown(); + + protected boolean hasBookieCheckTask() { + hasTask.set(false); + hasAuditCheckTask.accept(hasTask, null); + return hasTask.get(); + } + + /** + * ShutdownTaskHandler used to shutdown auditor executor. + */ + interface ShutdownTaskHandler { + void submitShutdownTask(); + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AutoRecoveryMain.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AutoRecoveryMain.java index 9830c592904..ab074933fff 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AutoRecoveryMain.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AutoRecoveryMain.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -24,26 +24,32 @@ import static org.apache.bookkeeper.replication.ReplicationStats.REPLICATION_WORKER_SCOPE; import com.google.common.annotations.VisibleForTesting; - import java.io.File; import java.io.IOException; +import java.lang.Thread.UncaughtExceptionHandler; import java.net.MalformedURLException; -import java.util.HashSet; -import java.util.Set; - -import org.apache.bookkeeper.bookie.Bookie; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; import org.apache.bookkeeper.bookie.BookieCriticalThread; +import org.apache.bookkeeper.bookie.BookieImpl; import org.apache.bookkeeper.bookie.ExitCode; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeper; +import org.apache.bookkeeper.client.BookKeeperClientStats; +import org.apache.bookkeeper.common.component.ComponentStarter; +import org.apache.bookkeeper.common.component.LifecycleComponent; +import org.apache.bookkeeper.common.component.LifecycleComponentStack; import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.http.HttpServer; -import org.apache.bookkeeper.http.HttpServerLoader; -import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; +import org.apache.bookkeeper.meta.MetadataClientDriver; import org.apache.bookkeeper.replication.ReplicationException.CompatibilityException; import org.apache.bookkeeper.replication.ReplicationException.UnavailableException; +import org.apache.bookkeeper.server.conf.BookieConfiguration; import org.apache.bookkeeper.server.http.BKHttpServiceProvider; +import org.apache.bookkeeper.server.service.AutoRecoveryService; +import org.apache.bookkeeper.server.service.HttpService; +import org.apache.bookkeeper.server.service.StatsProviderService; import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.stats.StatsLogger; -import org.apache.bookkeeper.zookeeper.ZooKeeperClient; import org.apache.commons.cli.BasicParser; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.HelpFormatter; @@ -51,9 +57,6 @@ import org.apache.commons.cli.ParseException; import org.apache.commons.configuration.ConfigurationException; import org.apache.zookeeper.KeeperException; -import org.apache.zookeeper.WatchedEvent; -import org.apache.zookeeper.Watcher; -import org.apache.zookeeper.ZooKeeper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -66,15 +69,18 @@ public class AutoRecoveryMain { private static final Logger LOG = LoggerFactory .getLogger(AutoRecoveryMain.class); - private ServerConfiguration conf; - ZooKeeper zk; - AuditorElector auditorElector; - ReplicationWorker replicationWorker; - private AutoRecoveryDeathWatcher deathWatcher; - private int exitCode; + private final ServerConfiguration conf; + final BookKeeper bkc; + final AuditorElector auditorElector; + final ReplicationWorker replicationWorker; + final AutoRecoveryDeathWatcher deathWatcher; + int exitCode; private volatile boolean shuttingDown = false; private volatile boolean running = false; + // Exception handler + private volatile UncaughtExceptionHandler uncaughtExceptionHandler = null; + public AutoRecoveryMain(ServerConfiguration conf) throws IOException, InterruptedException, KeeperException, UnavailableException, CompatibilityException { @@ -85,48 +91,40 @@ public AutoRecoveryMain(ServerConfiguration conf, StatsLogger statsLogger) throws IOException, InterruptedException, KeeperException, UnavailableException, CompatibilityException { this.conf = conf; - Set watchers = new HashSet(); - // TODO: better session handling for auto recovery daemon https://issues.apache.org/jira/browse/BOOKKEEPER-594 - // since {@link org.apache.bookkeeper.meta.ZkLedgerUnderreplicationManager} - // use Watcher, need to ensure the logic works correctly after recreating - // a new zookeeper client when session expired. - // for now just shutdown it. - watchers.add(new Watcher() { - @Override - public void process(WatchedEvent event) { - // Check for expired connection. - if (event.getState().equals(Watcher.Event.KeeperState.Expired)) { - LOG.error("ZK client connection to the ZK server has expired!"); - shutdown(ExitCode.ZK_EXPIRED); - } - } + this.bkc = Auditor.createBookKeeperClient(conf, statsLogger.scope(BookKeeperClientStats.CLIENT_SCOPE)); + MetadataClientDriver metadataClientDriver = bkc.getMetadataClientDriver(); + metadataClientDriver.setSessionStateListener(() -> { + LOG.error("Client connection to the Metadata server has expired, so shutting down AutoRecoveryMain!"); + // do not run "shutdown" in the main ZooKeeper client thread + // as it performs some blocking operations + CompletableFuture.runAsync(() -> { + shutdown(ExitCode.ZK_EXPIRED); + }); }); - zk = ZooKeeperClient.newBuilder() - .connectString(ZKMetadataDriverBase.resolveZkServers(conf)) - .sessionTimeoutMs(conf.getZkTimeout()) - .watchers(watchers) - .build(); - auditorElector = new AuditorElector(Bookie.getBookieAddress(conf).toString(), conf, - zk, statsLogger.scope(AUDITOR_SCOPE)); - replicationWorker = new ReplicationWorker(zk, conf, statsLogger.scope(REPLICATION_WORKER_SCOPE)); - deathWatcher = new AutoRecoveryDeathWatcher(this); - } - public AutoRecoveryMain(ServerConfiguration conf, ZooKeeper zk) throws IOException, InterruptedException, - KeeperException, UnavailableException, CompatibilityException { - this.conf = conf; - this.zk = zk; - auditorElector = new AuditorElector(Bookie.getBookieAddress(conf).toString(), conf, zk); - replicationWorker = new ReplicationWorker(zk, conf); + auditorElector = new AuditorElector( + BookieImpl.getBookieId(conf).toString(), + conf, + bkc, + statsLogger.scope(AUDITOR_SCOPE), + false); + replicationWorker = new ReplicationWorker( + conf, + bkc, + false, + statsLogger.scope(REPLICATION_WORKER_SCOPE)); deathWatcher = new AutoRecoveryDeathWatcher(this); } /* * Start daemons */ - public void start() throws UnavailableException { + public void start() { auditorElector.start(); replicationWorker.start(); + if (null != uncaughtExceptionHandler) { + deathWatcher.setUncaughtExceptionHandler(uncaughtExceptionHandler); + } deathWatcher.start(); running = true; } @@ -154,13 +152,6 @@ private void shutdown(int exitCode) { shuttingDown = true; running = false; this.exitCode = exitCode; - try { - deathWatcher.interrupt(); - deathWatcher.join(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - LOG.warn("Interrupted shutting down auto recovery", e); - } try { auditorElector.shutdown(); @@ -170,10 +161,12 @@ private void shutdown(int exitCode) { } replicationWorker.shutdown(); try { - zk.close(); + bkc.close(); + } catch (BKException e) { + LOG.warn("Failed to close bookkeeper client for auto recovery", e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); - LOG.warn("Interrupted shutting down auto recovery", e); + LOG.warn("Interrupted closing bookkeeper client for auto recovery", e); } } @@ -181,11 +174,28 @@ private int getExitCode() { return exitCode; } + /** + * Currently the uncaught exception handler is used for DeathWatcher to notify + * lifecycle management that a bookie is dead for some reasons. + * + *

          in future, we can register this exceptionHandler to critical threads + * so when those threads are dead, it will automatically trigger lifecycle management + * to shutdown the process. + */ + public void setExceptionHandler(UncaughtExceptionHandler exceptionHandler) { + this.uncaughtExceptionHandler = exceptionHandler; + } + @VisibleForTesting public Auditor getAuditor() { return auditorElector.getAuditor(); } + @VisibleForTesting + public ReplicationWorker getReplicationWorker() { + return replicationWorker; + } + /** Is auto-recovery service running? */ public boolean isAutoRecoveryRunning() { return running; @@ -194,7 +204,7 @@ public boolean isAutoRecoveryRunning() { /* * DeathWatcher for AutoRecovery daemons. */ - private static class AutoRecoveryDeathWatcher extends BookieCriticalThread { + private class AutoRecoveryDeathWatcher extends BookieCriticalThread { private int watchInterval; private AutoRecoveryMain autoRecoveryMain; @@ -203,6 +213,13 @@ public AutoRecoveryDeathWatcher(AutoRecoveryMain autoRecoveryMain) { + autoRecoveryMain.conf.getBookiePort()); this.autoRecoveryMain = autoRecoveryMain; watchInterval = autoRecoveryMain.conf.getDeathWatchInterval(); + // set a default uncaught exception handler to shutdown the AutoRecovery + // when it notices the AutoRecovery is not running any more. + setUncaughtExceptionHandler((thread, cause) -> { + LOG.info("AutoRecoveryDeathWatcher exited loop due to uncaught exception from thread {}", + thread.getName(), cause); + shutdown(); + }); } @Override @@ -212,13 +229,20 @@ public void run() { Thread.sleep(watchInterval); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); - break; } // If any one service not running, then shutdown peer. - if (!autoRecoveryMain.auditorElector.isRunning() - || !autoRecoveryMain.replicationWorker.isRunning()) { - autoRecoveryMain.shutdown(ExitCode.SERVER_EXCEPTION); - break; + if (!autoRecoveryMain.auditorElector.isRunning() || !autoRecoveryMain.replicationWorker.isRunning()) { + LOG.info( + "AutoRecoveryDeathWatcher noticed the AutoRecovery is not running any more," + + "exiting the watch loop!"); + /* + * death watcher has noticed that AutoRecovery is not + * running any more throw an exception to fail the death + * watcher thread and it will trigger the uncaught exception + * handler to handle this "AutoRecovery not running" + * situation. + */ + throw new RuntimeException("AutoRecovery is not running any more"); } } } @@ -273,14 +297,14 @@ private static ServerConfiguration parseArgs(String[] args) if (cmdLine.hasOption('c')) { if (null != leftArgs && leftArgs.length > 0) { - throw new IllegalArgumentException(); + throw new IllegalArgumentException("unexpected arguments [" + String.join(" ", leftArgs) + "]"); } String confFile = cmdLine.getOptionValue("c"); loadConfFile(conf, confFile); } if (null != leftArgs && leftArgs.length > 0) { - throw new IllegalArgumentException(); + throw new IllegalArgumentException("unexpected arguments [" + String.join(" ", leftArgs) + "]"); } return conf; } catch (ParseException e) { @@ -289,45 +313,78 @@ private static ServerConfiguration parseArgs(String[] args) } public static void main(String[] args) { - ServerConfiguration conf = null; + int retCode = doMain(args); + Runtime.getRuntime().exit(retCode); + } + + static int doMain(String[] args) { + + ServerConfiguration conf; + + // 0. parse command line try { conf = parseArgs(args); } catch (IllegalArgumentException iae) { LOG.error("Error parsing command line arguments : ", iae); - System.err.println(iae.getMessage()); + if (iae.getMessage() != null) { + System.err.println(iae.getMessage()); + } printUsage(); - System.exit(ExitCode.INVALID_CONF); + return ExitCode.INVALID_CONF; } + // 1. building the component stack: + LifecycleComponent server; try { - final AutoRecoveryMain autoRecoveryMain = new AutoRecoveryMain(conf); - autoRecoveryMain.start(); - HttpServerLoader.loadHttpServer(conf); - final HttpServer httpServer = HttpServerLoader.get(); - if (conf.isHttpServerEnabled() && httpServer != null) { - BKHttpServiceProvider serviceProvider = new BKHttpServiceProvider.Builder() - .setAutoRecovery(autoRecoveryMain) - .setServerConfiguration(conf) - .build(); - httpServer.initialize(serviceProvider); - httpServer.startServer(conf.getHttpServerPort()); - } - Runtime.getRuntime().addShutdownHook(new Thread() { - @Override - public void run() { - autoRecoveryMain.shutdown(); - if (httpServer != null && httpServer.isRunning()) { - httpServer.stopServer(); - } - LOG.info("Shutdown AutoRecoveryMain successfully"); - } - }); - LOG.info("Register shutdown hook successfully"); - autoRecoveryMain.join(); - System.exit(autoRecoveryMain.getExitCode()); + server = buildAutoRecoveryServer(new BookieConfiguration(conf)); } catch (Exception e) { - LOG.error("Exception running AutoRecoveryMain : ", e); - System.exit(ExitCode.SERVER_EXCEPTION); + LOG.error("Failed to build AutoRecovery Server", e); + return ExitCode.SERVER_EXCEPTION; + } + + // 2. start the server + try { + ComponentStarter.startComponent(server).get(); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + // the server is interrupted + LOG.info("AutoRecovery server is interrupted. Exiting ..."); + } catch (ExecutionException ee) { + LOG.error("Error in bookie shutdown", ee.getCause()); + return ExitCode.SERVER_EXCEPTION; } + return ExitCode.OK; + } + + public static LifecycleComponentStack buildAutoRecoveryServer(BookieConfiguration conf) throws Exception { + LifecycleComponentStack.Builder serverBuilder = LifecycleComponentStack.newBuilder() + .withName("autorecovery-server"); + + // 1. build stats provider + StatsProviderService statsProviderService = new StatsProviderService(conf); + StatsLogger rootStatsLogger = statsProviderService.getStatsProvider().getStatsLogger(""); + + serverBuilder.addComponent(statsProviderService); + LOG.info("Load lifecycle component : {}", StatsProviderService.class.getName()); + + // 2. build AutoRecovery server + AutoRecoveryService autoRecoveryService = new AutoRecoveryService(conf, rootStatsLogger); + + serverBuilder.addComponent(autoRecoveryService); + LOG.info("Load lifecycle component : {}", AutoRecoveryService.class.getName()); + + // 4. build http service + if (conf.getServerConf().isHttpServerEnabled()) { + BKHttpServiceProvider provider = new BKHttpServiceProvider.Builder() + .setAutoRecovery(autoRecoveryService.getAutoRecoveryServer()) + .setServerConfiguration(conf.getServerConf()) + .setStatsProvider(statsProviderService.getStatsProvider()).build(); + HttpService httpService = new HttpService(provider, conf, rootStatsLogger); + + serverBuilder.addComponent(httpService); + LOG.info("Load lifecycle component : {}", HttpService.class.getName()); + } + + return serverBuilder.build(); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/BookieLedgerIndexer.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/BookieLedgerIndexer.java index 027081a08f6..38cc79e56e2 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/BookieLedgerIndexer.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/BookieLedgerIndexer.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -25,12 +25,9 @@ import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CountDownLatch; - import org.apache.bookkeeper.client.BKException; -import org.apache.bookkeeper.client.LedgerMetadata; import org.apache.bookkeeper.meta.LedgerManager; -import org.apache.bookkeeper.net.BookieSocketAddress; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.Processor; import org.apache.bookkeeper.replication.ReplicationException.BKAuditException; import org.apache.zookeeper.AsyncCallback; @@ -65,37 +62,28 @@ public Map> getBookieToLedgerIndex() final CountDownLatch ledgerCollectorLatch = new CountDownLatch(1); Processor ledgerProcessor = new Processor() { - @Override - public void process(final Long ledgerId, - final AsyncCallback.VoidCallback iterCallback) { - GenericCallback genericCallback = new GenericCallback() { - @Override - public void operationComplete(int rc, - LedgerMetadata ledgerMetadata) { - if (rc == BKException.Code.OK) { - for (Map.Entry> ensemble : ledgerMetadata - .getEnsembles().entrySet()) { - for (BookieSocketAddress bookie : ensemble - .getValue()) { - putLedger(bookie2ledgersMap, - bookie.toString(), - ledgerId); + @Override + public void process(Long ledgerId, AsyncCallback.VoidCallback iterCallback) { + ledgerManager.readLedgerMetadata(ledgerId).whenComplete((metadata, exception) -> { + if (exception == null) { + for (Map.Entry> ensemble + : metadata.getValue().getAllEnsembles().entrySet()) { + for (BookieId bookie : ensemble.getValue()) { + putLedger(bookie2ledgersMap, bookie.toString(), ledgerId); + } + } + iterCallback.processResult(BKException.Code.OK, null, null); + } else if (BKException.getExceptionCode(exception) + == BKException.Code.NoSuchLedgerExistsOnMetadataServerException) { + LOG.info("Ignoring replication of already deleted ledger {}", ledgerId); + iterCallback.processResult(BKException.Code.OK, null, null); + } else { + LOG.warn("Unable to read the ledger: {} information", ledgerId); + iterCallback.processResult(BKException.getExceptionCode(exception), null, null); } - } - } else if (rc == BKException.Code.NoSuchLedgerExistsException) { - LOG.info("Ignoring replication of already deleted ledger {}", - ledgerId); - rc = BKException.Code.OK; - } else { - LOG.warn("Unable to read the ledger:" + ledgerId - + " information"); - } - iterCallback.processResult(rc, null, null); - } - }; - ledgerManager.readLedgerMetadata(ledgerId, genericCallback); - } - }; + }); + } + }; // Reading the result after processing all the ledgers final List resultCode = new ArrayList(1); ledgerManager.asyncProcessLedgers(ledgerProcessor, diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationEnableCb.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationEnableCb.java index 36672b366b0..af94fbf42dd 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationEnableCb.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationEnableCb.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,7 +21,6 @@ package org.apache.bookkeeper.replication; import java.util.concurrent.CountDownLatch; - import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationException.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationException.java index 733f63bde80..1c1bcde6984 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationException.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationException.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -19,12 +19,22 @@ package org.apache.bookkeeper.replication; import java.util.function.Function; +import org.apache.zookeeper.KeeperException; /** * Exceptions for use within the replication service. */ public abstract class ReplicationException extends Exception { + public static UnavailableException fromKeeperException(String message, KeeperException ke) { + if (ke instanceof KeeperException.ConnectionLossException + || ke instanceof KeeperException.SessionExpiredException) { + return new NonRecoverableReplicationException(message, ke); + } + return new UnavailableException(message, ke); + } + + public static final Function EXCEPTION_HANDLER = cause -> { if (cause instanceof ReplicationException) { return (ReplicationException) cause; @@ -56,6 +66,21 @@ public UnavailableException(String message) { } } + /** + * The replication service encountered an error that requires service restart. + */ + public static class NonRecoverableReplicationException extends UnavailableException { + private static final long serialVersionUID = 31872211L; + + public NonRecoverableReplicationException(String message, Throwable cause) { + super(message, cause); + } + + public NonRecoverableReplicationException(String message) { + super(message); + } + } + /** * Compatibility error. This version of the code, doesn't know how to * deal with the metadata it has found. diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationStats.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationStats.java index b1afa816d04..78231244833 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationStats.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationStats.java @@ -30,14 +30,27 @@ public interface ReplicationStats { String AUDITOR_SCOPE = "auditor"; String ELECTION_ATTEMPTS = "election_attempts"; String NUM_UNDER_REPLICATED_LEDGERS = "NUM_UNDER_REPLICATED_LEDGERS"; + String UNDER_REPLICATED_LEDGERS_TOTAL_SIZE = "UNDER_REPLICATED_LEDGERS_TOTAL_SIZE"; String URL_PUBLISH_TIME_FOR_LOST_BOOKIE = "URL_PUBLISH_TIME_FOR_LOST_BOOKIE"; String BOOKIE_TO_LEDGERS_MAP_CREATION_TIME = "BOOKIE_TO_LEDGERS_MAP_CREATION_TIME"; String CHECK_ALL_LEDGERS_TIME = "CHECK_ALL_LEDGERS_TIME"; + String PLACEMENT_POLICY_CHECK_TIME = "PLACEMENT_POLICY_CHECK_TIME"; + String REPLICAS_CHECK_TIME = "REPLICAS_CHECK_TIME"; + String AUDIT_BOOKIES_TIME = "AUDIT_BOOKIES_TIME"; String NUM_FRAGMENTS_PER_LEDGER = "NUM_FRAGMENTS_PER_LEDGER"; String NUM_BOOKIES_PER_LEDGER = "NUM_BOOKIES_PER_LEDGER"; String NUM_LEDGERS_CHECKED = "NUM_LEDGERS_CHECKED"; String NUM_BOOKIE_AUDITS_DELAYED = "NUM_BOOKIE_AUDITS_DELAYED"; String NUM_DELAYED_BOOKIE_AUDITS_DELAYES_CANCELLED = "NUM_DELAYED_BOOKIE_AUDITS_CANCELLED"; + String NUM_LEDGERS_NOT_ADHERING_TO_PLACEMENT_POLICY = "NUM_LEDGERS_NOT_ADHERING_TO_PLACEMENT_POLICY"; + String NUM_LEDGERS_SOFTLY_ADHERING_TO_PLACEMENT_POLICY = "NUM_LEDGERS_SOFTLY_ADHERING_TO_PLACEMENT_POLICY"; + String NUM_UNDERREPLICATED_LEDGERS_ELAPSED_RECOVERY_GRACE_PERIOD = + "NUM_UNDERREPLICATED_LEDGERS_ELAPSED_RECOVERY_GRACE_PERIOD"; + String NUM_LEDGERS_HAVING_NO_REPLICA_OF_AN_ENTRY = "NUM_LEDGERS_HAVING_NO_REPLICA_OF_AN_ENTRY"; + String NUM_LEDGERS_HAVING_LESS_THAN_AQ_REPLICAS_OF_AN_ENTRY = + "NUM_LEDGERS_HAVING_LESS_THAN_AQ_REPLICAS_OF_AN_ENTRY"; + String NUM_LEDGERS_HAVING_LESS_THAN_WQ_REPLICAS_OF_AN_ENTRY = + "NUM_LEDGERS_HAVING_LESS_THAN_WQ_REPLICAS_OF_AN_ENTRY"; String REPLICATION_WORKER_SCOPE = "replication_worker"; String REREPLICATE_OP = "rereplicate"; @@ -46,9 +59,11 @@ public interface ReplicationStats { String NUM_BYTES_READ = "NUM_BYTES_READ"; String NUM_ENTRIES_WRITTEN = "NUM_ENTRIES_WRITTEN"; String NUM_BYTES_WRITTEN = "NUM_BYTES_WRITTEN"; + String READ_DATA_LATENCY = "READ_DATA_LATENCY"; + String WRITE_DATA_LATENCY = "WRITE_DATA_LATENCY"; String REPLICATE_EXCEPTION = "exceptions"; String NUM_DEFER_LEDGER_LOCK_RELEASE_OF_FAILED_LEDGER = "NUM_DEFER_LEDGER_LOCK_RELEASE_OF_FAILED_LEDGER"; - - String BK_CLIENT_SCOPE = "bk_client"; - + String NUM_ENTRIES_UNABLE_TO_READ_FOR_REPLICATION = "NUM_ENTRIES_UNABLE_TO_READ_FOR_REPLICATION"; + String NUM_NOT_ADHERING_PLACEMENT_LEDGERS_REPLICATED = "NUM_NOT_ADHERING_PLACEMENT_LEDGERS_REPLICATED"; + String NUM_SKIPPING_CHECK_TASK_TIMES = "NUM_SKIPPING_CHECK_TASK_TIMES"; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java index d5c85f16107..c32487a30bc 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -19,46 +19,57 @@ */ package org.apache.bookkeeper.replication; -import static org.apache.bookkeeper.replication.ReplicationStats.BK_CLIENT_SCOPE; import static org.apache.bookkeeper.replication.ReplicationStats.NUM_DEFER_LEDGER_LOCK_RELEASE_OF_FAILED_LEDGER; +import static org.apache.bookkeeper.replication.ReplicationStats.NUM_ENTRIES_UNABLE_TO_READ_FOR_REPLICATION; import static org.apache.bookkeeper.replication.ReplicationStats.NUM_FULL_OR_PARTIAL_LEDGERS_REPLICATED; +import static org.apache.bookkeeper.replication.ReplicationStats.NUM_NOT_ADHERING_PLACEMENT_LEDGERS_REPLICATED; import static org.apache.bookkeeper.replication.ReplicationStats.REPLICATE_EXCEPTION; +import static org.apache.bookkeeper.replication.ReplicationStats.REPLICATION_WORKER_SCOPE; import static org.apache.bookkeeper.replication.ReplicationStats.REREPLICATE_OP; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Stopwatch; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; - +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.io.IOException; +import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.NavigableSet; import java.util.Set; import java.util.SortedMap; import java.util.Timer; import java.util.TimerTask; +import java.util.TreeMap; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentSkipListSet; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; - +import java.util.function.BiConsumer; import org.apache.bookkeeper.bookie.BookieThread; import org.apache.bookkeeper.client.BKException; -import org.apache.bookkeeper.client.BKException.BKNoSuchLedgerExistsException; +import org.apache.bookkeeper.client.BKException.BKNoSuchLedgerExistsOnMetadataServerException; import org.apache.bookkeeper.client.BKException.BKNotEnoughBookiesException; import org.apache.bookkeeper.client.BookKeeper; import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.client.EnsemblePlacementPolicy; import org.apache.bookkeeper.client.LedgerChecker; import org.apache.bookkeeper.client.LedgerFragment; import org.apache.bookkeeper.client.LedgerHandle; -import org.apache.bookkeeper.client.LedgerMetadata; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.common.concurrent.FutureUtils; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.meta.AbstractZkLedgerManagerFactory; -import org.apache.bookkeeper.meta.LedgerManagerFactory; +import org.apache.bookkeeper.meta.LedgerManager; import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.replication.ReplicationException.CompatibilityException; import org.apache.bookkeeper.replication.ReplicationException.UnavailableException; @@ -66,8 +77,8 @@ import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.stats.OpStatsLogger; import org.apache.bookkeeper.stats.StatsLogger; -import org.apache.zookeeper.KeeperException; -import org.apache.zookeeper.ZooKeeper; +import org.apache.bookkeeper.stats.annotations.StatsDoc; +import org.apache.bookkeeper.versioning.Versioned; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -75,48 +86,80 @@ * ReplicationWorker will take the fragments one by one from * ZKLedgerUnderreplicationManager and replicates to it. */ +@StatsDoc( + name = REPLICATION_WORKER_SCOPE, + help = "replication worker related stats" +) public class ReplicationWorker implements Runnable { private static final Logger LOG = LoggerFactory .getLogger(ReplicationWorker.class); - private static final int REPLICATED_FAILED_LEDGERS_MAXSIZE = 100; - static final int MAXNUMBER_REPLICATION_FAILURES_ALLOWED_BEFORE_DEFERRING = 10; + private static final int REPLICATED_FAILED_LEDGERS_MAXSIZE = 2000; + public static final int NUM_OF_EXPONENTIAL_BACKOFF_RETRIALS = 5; private final LedgerUnderreplicationManager underreplicationManager; private final ServerConfiguration conf; - private final ZooKeeper zkc; private volatile boolean workerRunning = false; private final BookKeeperAdmin admin; private final LedgerChecker ledgerChecker; private final BookKeeper bkc; + private final boolean ownBkc; private final Thread workerThread; private final long rwRereplicateBackoffMs; private final long openLedgerRereplicationGracePeriod; private final Timer pendingReplicationTimer; private final long lockReleaseOfFailedLedgerGracePeriod; + private final long baseBackoffForLockReleaseOfFailedLedger; + private final BiConsumer onReadEntryFailureCallback; + private final LedgerManager ledgerManager; // Expose Stats private final StatsLogger statsLogger; + @StatsDoc( + name = REPLICATE_EXCEPTION, + help = "replication related exceptions" + ) + private final StatsLogger exceptionLogger; + @StatsDoc( + name = REREPLICATE_OP, + help = "operation stats of re-replicating ledgers" + ) private final OpStatsLogger rereplicateOpStats; + @StatsDoc( + name = NUM_FULL_OR_PARTIAL_LEDGERS_REPLICATED, + help = "the number of ledgers re-replicated" + ) private final Counter numLedgersReplicated; + @StatsDoc( + name = NUM_DEFER_LEDGER_LOCK_RELEASE_OF_FAILED_LEDGER, + help = "the number of defer-ledger-lock-releases of failed ledgers" + ) private final Counter numDeferLedgerLockReleaseOfFailedLedger; + @StatsDoc( + name = NUM_ENTRIES_UNABLE_TO_READ_FOR_REPLICATION, + help = "the number of entries ReplicationWorker unable to read" + ) + private final Counter numEntriesUnableToReadForReplication; + @StatsDoc( + name = NUM_NOT_ADHERING_PLACEMENT_LEDGERS_REPLICATED, + help = "the number of not adhering placement policy ledgers re-replicated" + ) + private final Counter numNotAdheringPlacementLedgersReplicated; private final Map exceptionCounters; final LoadingCache replicationFailedLedgers; + final LoadingCache> unableToReadEntriesForReplication; /** * Replication worker for replicating the ledger fragments from * UnderReplicationManager to the targetBookie. This target bookie will be a * local bookie. * - * @param zkc - * - ZK instance * @param conf * - configurations */ - public ReplicationWorker(final ZooKeeper zkc, - final ServerConfiguration conf) - throws CompatibilityException, KeeperException, + public ReplicationWorker(final ServerConfiguration conf) + throws CompatibilityException, UnavailableException, InterruptedException, IOException { - this(zkc, conf, NullStatsLogger.INSTANCE); + this(conf, NullStatsLogger.INSTANCE); } /** @@ -124,39 +167,37 @@ public ReplicationWorker(final ZooKeeper zkc, * UnderReplicationManager to the targetBookie. This target bookie will be a * local bookie. * - * @param zkc - * - ZK instance * @param conf * - configurations * @param statsLogger * - stats logger */ - public ReplicationWorker(final ZooKeeper zkc, - final ServerConfiguration conf, + public ReplicationWorker(final ServerConfiguration conf, StatsLogger statsLogger) - throws CompatibilityException, KeeperException, + throws CompatibilityException, UnavailableException, InterruptedException, IOException { - this.zkc = zkc; + this(conf, Auditor.createBookKeeperClient(conf), true, statsLogger); + } + + ReplicationWorker(final ServerConfiguration conf, + BookKeeper bkc, + boolean ownBkc, + StatsLogger statsLogger) + throws CompatibilityException, InterruptedException, UnavailableException { this.conf = conf; - try { - this.bkc = BookKeeper.forConfig(new ClientConfiguration(conf)) - .statsLogger(statsLogger.scope(BK_CLIENT_SCOPE)) - .build(); - } catch (BKException e) { - throw new IOException("Failed to instantiate replication worker", e); - } - LedgerManagerFactory mFactory = AbstractZkLedgerManagerFactory - .newLedgerManagerFactory( - this.conf, - bkc.getMetadataClientDriver().getLayoutManager()); - this.underreplicationManager = mFactory - .newLedgerUnderreplicationManager(); - this.admin = new BookKeeperAdmin(bkc, statsLogger); + this.bkc = bkc; + this.ownBkc = ownBkc; + + this.underreplicationManager = bkc.getLedgerManagerFactory().newLedgerUnderreplicationManager(); + this.ledgerManager = bkc.getLedgerManagerFactory().newLedgerManager(); + this.admin = new BookKeeperAdmin(bkc, statsLogger, new ClientConfiguration(conf)); this.ledgerChecker = new LedgerChecker(bkc); this.workerThread = new BookieThread(this, "ReplicationWorker"); this.openLedgerRereplicationGracePeriod = conf .getOpenLedgerRereplicationGracePeriod(); this.lockReleaseOfFailedLedgerGracePeriod = conf.getLockReleaseOfFailedLedgerGracePeriod(); + this.baseBackoffForLockReleaseOfFailedLedger = this.lockReleaseOfFailedLedgerGracePeriod + / (long) (Math.pow(2, NUM_OF_EXPONENTIAL_BACKOFF_RETRIALS)); this.rwRereplicateBackoffMs = conf.getRwRereplicateBackoffMs(); this.pendingReplicationTimer = new Timer("PendingReplicationTimer"); this.replicationFailedLedgers = CacheBuilder.newBuilder().maximumSize(REPLICATED_FAILED_LEDGERS_MAXSIZE) @@ -166,14 +207,31 @@ public AtomicInteger load(Long key) throws Exception { return new AtomicInteger(); } }); + this.unableToReadEntriesForReplication = CacheBuilder.newBuilder() + .maximumSize(REPLICATED_FAILED_LEDGERS_MAXSIZE) + .build(new CacheLoader>() { + @Override + public ConcurrentSkipListSet load(Long key) throws Exception { + return new ConcurrentSkipListSet(); + } + }); // Expose Stats this.statsLogger = statsLogger; + this.exceptionLogger = statsLogger.scope(REPLICATE_EXCEPTION); this.rereplicateOpStats = this.statsLogger.getOpStatsLogger(REREPLICATE_OP); this.numLedgersReplicated = this.statsLogger.getCounter(NUM_FULL_OR_PARTIAL_LEDGERS_REPLICATED); this.numDeferLedgerLockReleaseOfFailedLedger = this.statsLogger .getCounter(NUM_DEFER_LEDGER_LOCK_RELEASE_OF_FAILED_LEDGER); + this.numEntriesUnableToReadForReplication = this.statsLogger + .getCounter(NUM_ENTRIES_UNABLE_TO_READ_FOR_REPLICATION); + this.numNotAdheringPlacementLedgersReplicated = this.statsLogger + .getCounter(NUM_NOT_ADHERING_PLACEMENT_LEDGERS_REPLICATED); this.exceptionCounters = new HashMap(); + this.onReadEntryFailureCallback = (ledgerid, entryid) -> { + numEntriesUnableToReadForReplication.inc(); + unableToReadEntriesForReplication.getUnchecked(ledgerid).add(entryid); + }; } /** @@ -188,9 +246,12 @@ public void run() { workerRunning = true; while (workerRunning) { try { - rereplicate(); + if (!rereplicate()) { + LOG.warn("failed while replicating fragments"); + waitBackOffTime(rwRereplicateBackoffMs); + } } catch (InterruptedException e) { - LOG.info("InterruptedException " + LOG.error("InterruptedException " + "while replicating fragments", e); shutdown(); Thread.currentThread().interrupt(); @@ -198,10 +259,20 @@ public void run() { } catch (BKException e) { LOG.error("BKException while replicating fragments", e); waitBackOffTime(rwRereplicateBackoffMs); + } catch (ReplicationException.NonRecoverableReplicationException nre) { + LOG.error("NonRecoverableReplicationException " + + "while replicating fragments", nre); + shutdown(); + return; } catch (UnavailableException e) { LOG.error("UnavailableException " + "while replicating fragments", e); waitBackOffTime(rwRereplicateBackoffMs); + if (Thread.currentThread().isInterrupted()) { + LOG.error("Interrupted while replicating fragments"); + shutdown(); + return; + } } } LOG.info("ReplicationWorker exited loop!"); @@ -219,7 +290,7 @@ private static void waitBackOffTime(long backoffMs) { * Replicates the under replicated fragments from failed bookie ledger to * targetBookie. */ - private void rereplicate() throws InterruptedException, BKException, + private boolean rereplicate() throws InterruptedException, BKException, UnavailableException { long ledgerIdToReplicate = underreplicationManager .getLedgerToRereplicate(); @@ -236,6 +307,7 @@ private void rereplicate() throws InterruptedException, BKException, rereplicateOpStats.registerFailedEvent(latencyMillis, TimeUnit.MILLISECONDS); } } + return success; } private void logBKExceptionAndReleaseLedger(BKException e, long ledgerIdToReplicate) @@ -251,6 +323,123 @@ private void logBKExceptionAndReleaseLedger(BKException e, long ledgerIdToReplic getExceptionCounter(e.getClass().getSimpleName()).inc(); } + private boolean tryReadingFaultyEntries(LedgerHandle lh, LedgerFragment ledgerFragment) { + long ledgerId = lh.getId(); + ConcurrentSkipListSet entriesUnableToReadForThisLedger = unableToReadEntriesForReplication + .getIfPresent(ledgerId); + if (entriesUnableToReadForThisLedger == null) { + return true; + } + long firstEntryIdOfFragment = ledgerFragment.getFirstEntryId(); + long lastEntryIdOfFragment = ledgerFragment.getLastKnownEntryId(); + NavigableSet entriesOfThisFragmentUnableToRead = entriesUnableToReadForThisLedger + .subSet(firstEntryIdOfFragment, true, lastEntryIdOfFragment, true); + if (entriesOfThisFragmentUnableToRead.isEmpty()) { + return true; + } + final CountDownLatch multiReadComplete = new CountDownLatch(1); + final AtomicInteger numOfResponsesToWaitFor = new AtomicInteger(entriesOfThisFragmentUnableToRead.size()); + final AtomicInteger returnRCValue = new AtomicInteger(BKException.Code.OK); + for (long entryIdToRead : entriesOfThisFragmentUnableToRead) { + if (multiReadComplete.getCount() == 0) { + /* + * if an asyncRead request had already failed then break the + * loop. + */ + break; + } + lh.asyncReadEntries(entryIdToRead, entryIdToRead, (rc, ledHan, seq, ctx) -> { + long thisEntryId = (Long) ctx; + if (rc == BKException.Code.OK) { + entriesUnableToReadForThisLedger.remove(thisEntryId); + if (numOfResponsesToWaitFor.decrementAndGet() == 0) { + multiReadComplete.countDown(); + } + } else { + LOG.error("Received error: {} while trying to read entry: {} of ledger: {} in ReplicationWorker", + rc, entryIdToRead, ledgerId); + returnRCValue.compareAndSet(BKException.Code.OK, rc); + /* + * on receiving a failure error response, multiRead can be + * marked completed, since there is not need to wait for + * other responses. + */ + multiReadComplete.countDown(); + } + }, entryIdToRead); + } + try { + multiReadComplete.await(); + } catch (InterruptedException e) { + LOG.error("Got interrupted exception while trying to read entries", e); + Thread.currentThread().interrupt(); // set interrupt flag + return false; + } + return (returnRCValue.get() == BKException.Code.OK); + } + + private Set getNeedRepairedPlacementNotAdheringFragments(LedgerHandle lh) { + if (!conf.isRepairedPlacementPolicyNotAdheringBookieEnable()) { + return Collections.emptySet(); + } + long ledgerId = lh.getId(); + Set placementNotAdheringFragments = new HashSet<>(); + CompletableFuture> future = ledgerManager.readLedgerMetadata( + ledgerId).whenComplete((metadataVer, exception) -> { + if (exception == null) { + LedgerMetadata metadata = metadataVer.getValue(); + int writeQuorumSize = metadata.getWriteQuorumSize(); + int ackQuorumSize = metadata.getAckQuorumSize(); + if (!metadata.isClosed()) { + return; + } + Long curEntryId = null; + EnsemblePlacementPolicy.PlacementPolicyAdherence previousSegmentAdheringToPlacementPolicy = null; + + for (Map.Entry> entry : metadata.getAllEnsembles().entrySet()) { + if (curEntryId != null) { + if (EnsemblePlacementPolicy.PlacementPolicyAdherence.FAIL + == previousSegmentAdheringToPlacementPolicy) { + LedgerFragment ledgerFragment = new LedgerFragment(lh, curEntryId, + entry.getKey() - 1, new HashSet<>()); + ledgerFragment.setReplicateType(LedgerFragment.ReplicateType.DATA_NOT_ADHERING_PLACEMENT); + placementNotAdheringFragments.add(ledgerFragment); + } + } + previousSegmentAdheringToPlacementPolicy = + admin.isEnsembleAdheringToPlacementPolicy(entry.getValue(), + writeQuorumSize, ackQuorumSize); + curEntryId = entry.getKey(); + } + if (curEntryId != null) { + if (EnsemblePlacementPolicy.PlacementPolicyAdherence.FAIL + == previousSegmentAdheringToPlacementPolicy) { + long lastEntry = lh.getLedgerMetadata().getLastEntryId(); + LedgerFragment ledgerFragment = new LedgerFragment(lh, curEntryId, lastEntry, + new HashSet<>()); + ledgerFragment.setReplicateType(LedgerFragment.ReplicateType.DATA_NOT_ADHERING_PLACEMENT); + placementNotAdheringFragments.add(ledgerFragment); + } + } + } else if (BKException.getExceptionCode(exception) + == BKException.Code.NoSuchLedgerExistsOnMetadataServerException) { + if (LOG.isDebugEnabled()) { + LOG.debug("Ignoring replication of already deleted ledger {}", ledgerId); + } + } else { + LOG.warn("Unable to read the ledger: {} information", ledgerId); + } + }); + try { + FutureUtils.result(future); + } catch (Exception e) { + LOG.warn("Check ledger need repaired placement not adhering bookie failed", e); + return Collections.emptySet(); + } + return placementNotAdheringFragments; + } + + @SuppressFBWarnings("RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE") private boolean rereplicate(long ledgerIdToReplicate) throws InterruptedException, BKException, UnavailableException { if (LOG.isDebugEnabled()) { @@ -260,21 +449,33 @@ private boolean rereplicate(long ledgerIdToReplicate) throws InterruptedExceptio boolean deferLedgerLockRelease = false; try (LedgerHandle lh = admin.openLedgerNoRecovery(ledgerIdToReplicate)) { - Set fragments = - getUnderreplicatedFragments(lh, conf.getAuditorLedgerVerificationPercentage()); + Set fragments = getUnderreplicatedFragments(lh, + conf.getAuditorLedgerVerificationPercentage()); if (LOG.isDebugEnabled()) { LOG.debug("Founds fragments {} for replication from ledger: {}", fragments, ledgerIdToReplicate); } boolean foundOpenFragments = false; + long numFragsReplicated = 0; + long numNotAdheringPlacementFragsReplicated = 0; for (LedgerFragment ledgerFragment : fragments) { if (!ledgerFragment.isClosed()) { foundOpenFragments = true; continue; } + if (!tryReadingFaultyEntries(lh, ledgerFragment)) { + LOG.error("Failed to read faulty entries, so giving up replicating ledgerFragment {}", + ledgerFragment); + continue; + } try { - admin.replicateLedgerFragment(lh, ledgerFragment); + admin.replicateLedgerFragment(lh, ledgerFragment, onReadEntryFailureCallback); + numFragsReplicated++; + if (ledgerFragment.getReplicateType() == LedgerFragment + .ReplicateType.DATA_NOT_ADHERING_PLACEMENT) { + numNotAdheringPlacementFragsReplicated++; + } } catch (BKException.BKBookieHandleNotAvailableException e) { LOG.warn("BKBookieHandleNotAvailableException while replicating the fragment", e); } catch (BKException.BKLedgerRecoveryException e) { @@ -284,6 +485,13 @@ private boolean rereplicate(long ledgerIdToReplicate) throws InterruptedExceptio } } + if (numFragsReplicated > 0) { + numLedgersReplicated.inc(); + } + if (numNotAdheringPlacementFragsReplicated > 0) { + numNotAdheringPlacementLedgersReplicated.inc(); + } + if (foundOpenFragments || isLastSegmentOpenAndMissingBookies(lh)) { deferLedgerLockRelease = true; deferLedgerLockRelease(ledgerIdToReplicate); @@ -296,29 +504,22 @@ private boolean rereplicate(long ledgerIdToReplicate) throws InterruptedExceptio underreplicationManager.markLedgerReplicated(ledgerIdToReplicate); return true; } else { - if (replicationFailedLedgers.getUnchecked(ledgerIdToReplicate) - .incrementAndGet() == MAXNUMBER_REPLICATION_FAILURES_ALLOWED_BEFORE_DEFERRING) { - deferLedgerLockRelease = true; - LOG.error( - "ReplicationWorker failed to replicate Ledger : {} for {} number of times, " - + "so deferring the ledger lock release", - ledgerIdToReplicate, MAXNUMBER_REPLICATION_FAILURES_ALLOWED_BEFORE_DEFERRING); - deferLedgerLockReleaseOfFailedLedger(ledgerIdToReplicate); - numDeferLedgerLockReleaseOfFailedLedger.inc(); - } + deferLedgerLockRelease = true; + deferLedgerLockReleaseOfFailedLedger(ledgerIdToReplicate); + numDeferLedgerLockReleaseOfFailedLedger.inc(); // Releasing the underReplication ledger lock and compete // for the replication again for the pending fragments return false; } - } catch (BKNoSuchLedgerExistsException e) { + } catch (BKNoSuchLedgerExistsOnMetadataServerException e) { // Ledger might have been deleted by user - LOG.info("BKNoSuchLedgerExistsException while opening " + LOG.info("BKNoSuchLedgerExistsOnMetadataServerException while opening " + "ledger {} for replication. Other clients " + "might have deleted the ledger. " + "So, no harm to continue", ledgerIdToReplicate); underreplicationManager.markLedgerReplicated(ledgerIdToReplicate); - getExceptionCounter("BKNoSuchLedgerExistsException").inc(); + getExceptionCounter("BKNoSuchLedgerExistsOnMetadataServerException").inc(); return false; } catch (BKNotEnoughBookiesException e) { logBKExceptionAndReleaseLedger(e, ledgerIdToReplicate); @@ -357,11 +558,19 @@ private boolean rereplicate(long ledgerIdToReplicate) throws InterruptedExceptio * *

          To avoid this situation, we need to check if bookies in the final open ensemble * are unavailable, and take action if so. The action to take is to close the ledger, - * after a grace period as the writting client may replace the faulty bookie on its + * after a grace period as the writing client may replace the faulty bookie on its * own. * *

          Missing bookies in closed ledgers are fine, as we know the last confirmed add, so * we can tell which entries are supposed to exist and rereplicate them if necessary. + * + *

          Another corner case is that there are multiple ensembles in the ledger and the last + * segment/ensemble is open, but nothing has been written to some quorums in the ensemble. + * For the v2 protocol, this ledger's lastAddConfirm entry is the last segment/ensemble's `key - 2`, + * not `key - 2`, the explanation please refer to: https://github.com/apache/bookkeeper/pull/3917. + * If we treat the penultimate segment/ensemble as closed state, we will can't replicate + * the last entry in the segment. So in this case, we should also check if the penultimate + * segment/ensemble has missing bookies. */ private boolean isLastSegmentOpenAndMissingBookies(LedgerHandle lh) throws BKException { LedgerMetadata md = admin.getLedgerMetadata(lh); @@ -369,10 +578,14 @@ private boolean isLastSegmentOpenAndMissingBookies(LedgerHandle lh) throws BKExc return false; } - SortedMap> ensembles = admin.getLedgerMetadata(lh).getEnsembles(); - List finalEnsemble = ensembles.get(ensembles.lastKey()); - Collection available = admin.getAvailableBookies(); - for (BookieSocketAddress b : finalEnsemble) { + SortedMap> ensembles = admin.getLedgerMetadata(lh).getAllEnsembles(); + List finalEnsemble = ensembles.get(ensembles.lastKey()); + if (ensembles.size() > 1 && lh.getLastAddConfirmed() < ensembles.lastKey() - 1) { + finalEnsemble = new ArrayList<>(finalEnsemble); + finalEnsemble.addAll((new TreeMap<>(ensembles)).floorEntry(ensembles.lastKey() - 1).getValue()); + } + Collection available = admin.getAvailableBookies(); + for (BookieId b : finalEnsemble) { if (!available.contains(b)) { if (LOG.isDebugEnabled()) { LOG.debug("Bookie {} is missing from the list of Available Bookies. ledger {}:ensemble {}.", @@ -387,12 +600,46 @@ private boolean isLastSegmentOpenAndMissingBookies(LedgerHandle lh) throws BKExc /** * Gets the under replicated fragments. */ - private Set getUnderreplicatedFragments(LedgerHandle lh, Long ledgerVerificationPercentage) + Set getUnderreplicatedFragments(LedgerHandle lh, Long ledgerVerificationPercentage) + throws InterruptedException { + //The data loss fragments is first to repair. If a fragment is data_loss and not_adhering_placement + //at the same time, we only fix data_loss in this time. After fix data_loss, the fragment is still + //not_adhering_placement, Auditor will mark this ledger again. + Set underreplicatedFragments = new HashSet<>(); + + Set dataLossFragments = getDataLossFragments(lh, ledgerVerificationPercentage); + underreplicatedFragments.addAll(dataLossFragments); + + Set notAdheringFragments = getNeedRepairedPlacementNotAdheringFragments(lh); + + for (LedgerFragment notAdheringFragment : notAdheringFragments) { + if (!checkFragmentRepeat(underreplicatedFragments, notAdheringFragment)) { + underreplicatedFragments.add(notAdheringFragment); + } + } + return underreplicatedFragments; + } + + private Set getDataLossFragments(LedgerHandle lh, Long ledgerVerificationPercentage) throws InterruptedException { CheckerCallback checkerCb = new CheckerCallback(); ledgerChecker.checkLedger(lh, checkerCb, ledgerVerificationPercentage); - Set fragments = checkerCb.waitAndGetResult(); - return fragments; + return checkerCb.waitAndGetResult(); + } + + private boolean checkFragmentRepeat(Set fragments, LedgerFragment needChecked) { + for (LedgerFragment fragment : fragments) { + if (fragment.getLedgerId() == needChecked.getLedgerId() + && fragment.getFirstEntryId() == needChecked.getFirstEntryId() + && fragment.getLastKnownEntryId() == needChecked.getLastKnownEntryId()) { + return true; + } + } + return false; + } + + void scheduleTaskWithDelay(TimerTask timerTask, long delayPeriod) { + pendingReplicationTimer.schedule(timerTask, delayPeriod); } /** @@ -413,7 +660,7 @@ public void run() { // Need recovery open, close the old ledger handle. lh.close(); // Recovery open could result in client write failure. - LOG.warn("Missing bookie(s) from last segment. Opening Ledger{} for Recovery.", ledgerId); + LOG.warn("Missing bookie(s) from last segment. Opening Ledger {} for Recovery.", ledgerId); lh = admin.openLedger(ledgerId); isRecoveryOpen = true; } @@ -425,7 +672,7 @@ public void run() { // Need recovery open, close the old ledger handle. lh.close(); // Recovery open could result in client write failure. - LOG.warn("Open Fragment{}. Opening Ledger{} for Recovery.", + LOG.warn("Open Fragment{}. Opening Ledger {} for Recovery.", fragment.getEnsemble(), ledgerId); lh = admin.openLedger(ledgerId); isRecoveryOpen = true; @@ -437,7 +684,7 @@ public void run() { Thread.currentThread().interrupt(); LOG.info("InterruptedException while fencing the ledger {}" + " for rereplication of postponed ledgers", ledgerId, e); - } catch (BKNoSuchLedgerExistsException bknsle) { + } catch (BKNoSuchLedgerExistsOnMetadataServerException bknsle) { if (LOG.isDebugEnabled()) { LOG.debug("Ledger {} was deleted, safe to continue", ledgerId, bknsle); } @@ -470,18 +717,30 @@ public void run() { } } }; - pendingReplicationTimer.schedule(timerTask, gracePeriod); + scheduleTaskWithDelay(timerTask, gracePeriod); } /** * Schedules a timer task for releasing the lock. */ private void deferLedgerLockReleaseOfFailedLedger(final long ledgerId) { + int numOfTimesFailedSoFar = replicationFailedLedgers.getUnchecked(ledgerId).getAndIncrement(); + /* + * for the first NUM_OF_EXPONENTIAL_BACKOFF_RETRIALS retrials do + * exponential backoff, starting from + * baseBackoffForLockReleaseOfFailedLedger + */ + long delayOfLedgerLockReleaseInMSecs = (numOfTimesFailedSoFar >= NUM_OF_EXPONENTIAL_BACKOFF_RETRIALS) + ? this.lockReleaseOfFailedLedgerGracePeriod + : this.baseBackoffForLockReleaseOfFailedLedger * (int) Math.pow(2, numOfTimesFailedSoFar); + LOG.error( + "ReplicationWorker failed to replicate Ledger : {} for {} number of times, " + + "so deferring the ledger lock release by {} msecs", + ledgerId, numOfTimesFailedSoFar, delayOfLedgerLockReleaseInMSecs); TimerTask timerTask = new TimerTask() { @Override public void run() { try { - replicationFailedLedgers.invalidate(ledgerId); underreplicationManager.releaseUnderreplicatedLedger(ledgerId); } catch (UnavailableException e) { LOG.error("UnavailableException while replicating fragments of ledger {}", ledgerId, e); @@ -489,7 +748,7 @@ public void run() { } } }; - pendingReplicationTimer.schedule(timerTask, lockReleaseOfFailedLedgerGracePeriod); + scheduleTaskWithDelay(timerTask, delayOfLedgerLockReleaseInMSecs); } /** @@ -514,13 +773,15 @@ public void shutdown() { e); Thread.currentThread().interrupt(); } - try { - bkc.close(); - } catch (InterruptedException e) { - LOG.warn("Interrupted while closing the Bookie client", e); - Thread.currentThread().interrupt(); - } catch (BKException e) { - LOG.warn("Exception while closing the Bookie client", e); + if (ownBkc) { + try { + bkc.close(); + } catch (InterruptedException e) { + LOG.warn("Interrupted while closing the Bookie client", e); + Thread.currentThread().interrupt(); + } catch (BKException e) { + LOG.warn("Exception while closing the Bookie client", e); + } } try { underreplicationManager.close(); @@ -533,7 +794,8 @@ public void shutdown() { /** * Gives the running status of ReplicationWorker. */ - boolean isRunning() { + @VisibleForTesting + public boolean isRunning() { return workerRunning && workerThread.isAlive(); } @@ -564,7 +826,7 @@ Set waitAndGetResult() throws InterruptedException { private Counter getExceptionCounter(String name) { Counter counter = this.exceptionCounters.get(name); if (counter == null) { - counter = this.statsLogger.scope(REPLICATE_EXCEPTION).getCounter(name); + counter = this.exceptionLogger.getCounter(name); this.exceptionCounters.put(name, counter); } return counter; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/JAASCredentialsContainer.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/JAASCredentialsContainer.java index 7523e40b5aa..f7b681009df 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/JAASCredentialsContainer.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/JAASCredentialsContainer.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SASLBookieAuthProvider.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SASLBookieAuthProvider.java index 6a81219c248..eb0c817b5c9 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SASLBookieAuthProvider.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SASLBookieAuthProvider.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -25,7 +25,6 @@ import javax.security.auth.Subject; import javax.security.auth.login.LoginException; import javax.security.sasl.SaslException; - import org.apache.bookkeeper.auth.AuthCallbacks; import org.apache.bookkeeper.auth.AuthToken; import org.apache.bookkeeper.auth.BookieAuthProvider; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SASLBookieAuthProviderFactory.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SASLBookieAuthProviderFactory.java index 25062868639..a899b285edb 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SASLBookieAuthProviderFactory.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SASLBookieAuthProviderFactory.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -23,7 +23,6 @@ import java.io.IOException; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; - import javax.security.auth.Subject; import javax.security.auth.callback.Callback; import javax.security.auth.callback.CallbackHandler; @@ -38,7 +37,6 @@ import javax.security.sasl.AuthorizeCallback; import javax.security.sasl.RealmCallback; import javax.security.sasl.SaslException; - import org.apache.bookkeeper.auth.AuthCallbacks; import org.apache.bookkeeper.conf.AbstractConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SASLClientAuthProvider.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SASLClientAuthProvider.java index 052c34ccf28..d730c05ee03 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SASLClientAuthProvider.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SASLClientAuthProvider.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -26,7 +26,6 @@ import java.net.SocketAddress; import javax.security.auth.Subject; import javax.security.sasl.SaslException; - import org.apache.bookkeeper.auth.AuthCallbacks; import org.apache.bookkeeper.auth.AuthToken; import org.apache.bookkeeper.auth.ClientAuthProvider; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SASLClientProviderFactory.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SASLClientProviderFactory.java index 7fd9b7dcb77..2d877fc9938 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SASLClientProviderFactory.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SASLClientProviderFactory.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -27,7 +27,6 @@ import static org.apache.bookkeeper.sasl.SaslConstants.JAAS_DEFAULT_CLIENT_SECTION_NAME; import java.io.IOException; - import javax.security.auth.Subject; import javax.security.auth.kerberos.KerberosTicket; import javax.security.auth.login.AppConfigurationEntry; @@ -35,7 +34,6 @@ import javax.security.auth.login.LoginContext; import javax.security.auth.login.LoginException; import javax.security.sasl.SaslException; - import org.apache.bookkeeper.auth.AuthCallbacks; import org.apache.bookkeeper.auth.ClientAuthProvider; import org.apache.bookkeeper.conf.AbstractConfiguration; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SaslClientState.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SaslClientState.java index 324480c4b49..4da35d9289c 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SaslClientState.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SaslClientState.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -50,7 +50,9 @@ public class SaslClientState { private String password; public SaslClientState(String serverHostname, Subject subject) throws SaslException { - String serverPrincipal = SaslConstants.SASL_BOOKKEEPER_PROTOCOL + "/" + serverHostname; + String saslServiceName = System.getProperty(SaslConstants.SASL_SERVICE_NAME, + SaslConstants.SASL_SERVICE_NAME_DEFAULT); + String serverPrincipal = saslServiceName + "/" + serverHostname; this.clientSubject = subject; if (clientSubject == null) { throw new SaslException("Cannot create JAAS Sujbect for SASL"); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SaslConstants.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SaslConstants.java index 98a83b7b941..83548f0dfef 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SaslConstants.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SaslConstants.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -52,6 +52,8 @@ public class SaslConstants { static final String SASL_BOOKKEEPER_PROTOCOL = "bookkeeper"; static final String SASL_BOOKKEEPER_REALM = "bookkeeper"; + static final String SASL_SERVICE_NAME = "bookkeeper.sasl.servicename"; + static final String SASL_SERVICE_NAME_DEFAULT = "bookkeeper"; static final String SASL_MD5_DUMMY_HOSTNAME = "bookkeeper"; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SaslServerState.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SaslServerState.java index 2291526c72f..d8c6a62ed90 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SaslServerState.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/SaslServerState.java @@ -26,7 +26,6 @@ import java.util.HashMap; import java.util.Map; import java.util.regex.Pattern; - import javax.security.auth.Subject; import javax.security.auth.callback.Callback; import javax.security.auth.callback.CallbackHandler; @@ -41,7 +40,6 @@ import javax.security.sasl.Sasl; import javax.security.sasl.SaslException; import javax.security.sasl.SaslServer; - import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.zookeeper.server.auth.KerberosName; import org.slf4j.LoggerFactory; @@ -78,8 +76,7 @@ private SaslServer createSaslServer(final Subject subject, ServerConfiguration s final String servicePrincipalNameAndHostname = servicePrincipal.getName(); int indexOf = servicePrincipalNameAndHostname.indexOf("/"); - final String serviceHostnameAndKerbDomain = servicePrincipalNameAndHostname.substring(indexOf + 1, - servicePrincipalNameAndHostname.length()); + final String serviceHostnameAndKerbDomain = servicePrincipalNameAndHostname.substring(indexOf + 1); int indexOfAt = serviceHostnameAndKerbDomain.indexOf("@"); final String servicePrincipalName, serviceHostname; @@ -150,7 +147,7 @@ public SaslServerCallbackHandler(Configuration configuration, ServerConfiguratio throws IOException { String configurationEntry = serverConfiguration.getString(SaslConstants.JAAS_BOOKIE_SECTION_NAME, SaslConstants.JAAS_DEFAULT_BOOKIE_SECTION_NAME); - AppConfigurationEntry configurationEntries[] = configuration.getAppConfigurationEntry(configurationEntry); + AppConfigurationEntry[] configurationEntries = configuration.getAppConfigurationEntry(configurationEntry); if (configurationEntries == null) { String errorMessage = "Could not find a '" + configurationEntry diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/TGTRefreshThread.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/TGTRefreshThread.java index f4a2508f786..2d8c9efa759 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/TGTRefreshThread.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/sasl/TGTRefreshThread.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -162,7 +162,7 @@ public void run() { LOG.info("refreshing now because expiry is before next scheduled refresh time."); } else if (now < nextRefresh) { Date until = new Date(nextRefresh); - LOG.info("TGT refresh sleeping until: {}", until.toString()); + LOG.info("TGT refresh sleeping until: {}", until); try { Thread.sleep(nextRefresh - now); } catch (InterruptedException ie) { diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/EmbeddedServer.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/EmbeddedServer.java new file mode 100644 index 00000000000..b2ac638db86 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/EmbeddedServer.java @@ -0,0 +1,642 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.bookkeeper.server; + +import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_SCOPE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LD_INDEX_SCOPE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LD_LEDGER_SCOPE; +import static org.apache.bookkeeper.bookie.BookieImpl.newBookieImpl; +import static org.apache.bookkeeper.bookie.LegacyCookieValidation.newLegacyCookieValidation; +import static org.apache.bookkeeper.client.BookKeeperClientStats.CLIENT_SCOPE; +import static org.apache.bookkeeper.replication.ReplicationStats.REPLICATION_SCOPE; +import static org.apache.bookkeeper.server.Main.storageDirectoriesFromConf; +import static org.apache.bookkeeper.server.component.ServerLifecycleComponent.loadServerComponents; + +import com.google.common.base.Ticker; +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.CompositeByteBuf; +import io.reactivex.rxjava3.core.Scheduler; +import io.reactivex.rxjava3.schedulers.Schedulers; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.function.Consumer; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.BookieResources; +import org.apache.bookkeeper.bookie.CookieValidation; +import org.apache.bookkeeper.bookie.LedgerDirsManager; +import org.apache.bookkeeper.bookie.LedgerStorage; +import org.apache.bookkeeper.bookie.ReadOnlyBookie; +import org.apache.bookkeeper.bookie.ScrubberStats; +import org.apache.bookkeeper.bookie.UncleanShutdownDetection; +import org.apache.bookkeeper.bookie.UncleanShutdownDetectionImpl; +import org.apache.bookkeeper.bookie.datainteg.DataIntegrityCheck; +import org.apache.bookkeeper.bookie.datainteg.DataIntegrityCheckImpl; +import org.apache.bookkeeper.bookie.datainteg.DataIntegrityCookieValidation; +import org.apache.bookkeeper.bookie.datainteg.DataIntegrityService; +import org.apache.bookkeeper.bookie.datainteg.EntryCopier; +import org.apache.bookkeeper.bookie.datainteg.EntryCopierImpl; +import org.apache.bookkeeper.client.BookKeeper; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.common.allocator.ByteBufAllocatorWithOomHandler; +import org.apache.bookkeeper.common.component.AutoCloseableLifecycleComponent; +import org.apache.bookkeeper.common.component.ComponentInfoPublisher; +import org.apache.bookkeeper.common.component.LifecycleComponentStack; +import org.apache.bookkeeper.common.component.RxSchedulerLifecycleComponent; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.discover.BookieServiceInfo; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.LedgerManagerFactory; +import org.apache.bookkeeper.meta.MetadataBookieDriver; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.server.component.ServerLifecycleComponent; +import org.apache.bookkeeper.server.conf.BookieConfiguration; +import org.apache.bookkeeper.server.http.BKHttpServiceProvider; +import org.apache.bookkeeper.server.service.AutoRecoveryService; +import org.apache.bookkeeper.server.service.BookieService; +import org.apache.bookkeeper.server.service.HttpService; +import org.apache.bookkeeper.server.service.ScrubberService; +import org.apache.bookkeeper.server.service.StatsProviderService; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.StatsProvider; +import org.apache.bookkeeper.util.DiskChecker; +import org.apache.commons.lang3.StringUtils; + +/** + * An embedded server is a server that run bookie and serving rpc requests. + * + *

          + * It is a rewritten server using {@link org.apache.bookkeeper.common.component.LifecycleComponent}, replacing the + * legacy server {@link org.apache.bookkeeper.proto.BookieServer}. + */ +public class EmbeddedServer { + + private final LifecycleComponentStack lifecycleComponentStack; + + private final StatsProvider statsProvider; + + private final RegistrationManager registrationManager; + + private final LedgerManagerFactory ledgerManagerFactory; + + private final DiskChecker diskChecker; + private final LedgerDirsManager ledgerDirsManager; + private final LedgerDirsManager indexDirsManager; + + private final BookieService bookieService; + private final AutoRecoveryService autoRecoveryService; + private final DataIntegrityService dataIntegrityService; + private final HttpService httpService; + + private EmbeddedServer(LifecycleComponentStack lifecycleComponentStack, StatsProvider statsProvider, + RegistrationManager registrationManager, LedgerManagerFactory ledgerManagerFactory, + DiskChecker diskChecker, LedgerDirsManager ledgerDirsManager, + LedgerDirsManager indexDirsManager, BookieService bookieService, + AutoRecoveryService autoRecoveryService, DataIntegrityService dataIntegrityService, + HttpService httpService) { + this.lifecycleComponentStack = lifecycleComponentStack; + this.statsProvider = statsProvider; + this.registrationManager = registrationManager; + this.ledgerManagerFactory = ledgerManagerFactory; + this.diskChecker = diskChecker; + this.ledgerDirsManager = ledgerDirsManager; + this.indexDirsManager = indexDirsManager; + this.bookieService = bookieService; + this.autoRecoveryService = autoRecoveryService; + this.dataIntegrityService = dataIntegrityService; + this.httpService = httpService; + } + + public LifecycleComponentStack getLifecycleComponentStack() { + return lifecycleComponentStack; + } + + public StatsProvider getStatsProvider() { + return statsProvider; + } + + public RegistrationManager getRegistrationManager() { + return registrationManager; + } + + public LedgerManagerFactory getLedgerManagerFactory() { + return ledgerManagerFactory; + } + + public DiskChecker getDiskChecker() { + return diskChecker; + } + + public LedgerDirsManager getLedgerDirsManager() { + return ledgerDirsManager; + } + + public LedgerDirsManager getIndexDirsManager() { + return indexDirsManager; + } + + public BookieService getBookieService() { + return bookieService; + } + + public AutoRecoveryService getAutoRecoveryService() { + return autoRecoveryService; + } + + public DataIntegrityService getDataIntegrityService() { + return dataIntegrityService; + } + + public HttpService getHttpService() { + return httpService; + } + + /** + * Create a new builder from given configuration. Actual services implementations can be provided to the builder and + * will override ones defined in the configuration. + *

          + * Invoker is responsible to start and stop provided services implementations, components from + * {@link EmbeddedServer#getLifecycleComponentStack()} will reflect only those created from provided configuration. + * + * @param conf bookie configuration + * @return a new embedded server builder + */ + public static final Builder builder(BookieConfiguration conf) { + return new Builder(conf); + } + + @Slf4j + public static class Builder { + + private BookieConfiguration conf; + + private StatsProvider statsProvider; + + private MetadataBookieDriver metadataDriver; + + private RegistrationManager registrationManager; + + private LedgerManagerFactory ledgerManagerFactory; + + private DiskChecker diskChecker; + private LedgerDirsManager ledgerDirsManager; + private LedgerDirsManager indexDirsManager; + + private ByteBufAllocator allocator; + private UncleanShutdownDetection uncleanShutdownDetection; + + private Builder(BookieConfiguration conf) { + checkNotNull(conf, "bookieConfiguration cannot be null"); + + this.conf = conf; + } + + public Builder statsProvider(StatsProvider statsProvider) { + this.statsProvider = statsProvider; + return this; + } + + public Builder metadataDriver(MetadataBookieDriver metadataDriver) { + this.metadataDriver = metadataDriver; + return this; + } + + public Builder registrationManager(RegistrationManager registrationManager) { + this.registrationManager = registrationManager; + return this; + } + + public Builder ledgerManagerFactory(LedgerManagerFactory ledgerManagerFactory) { + this.ledgerManagerFactory = ledgerManagerFactory; + return this; + } + + public Builder diskChecker(DiskChecker diskChecker) { + this.diskChecker = diskChecker; + return this; + } + + public Builder ledgerDirsManager(LedgerDirsManager ledgerDirsManager) { + this.ledgerDirsManager = ledgerDirsManager; + return this; + } + + public Builder indexDirsManager(LedgerDirsManager indexDirsManager) { + this.indexDirsManager = indexDirsManager; + return this; + } + + public Builder allocator(ByteBufAllocator allocator) { + this.allocator = allocator; + return this; + } + + public Builder uncleanShutdownDetection(UncleanShutdownDetection uncleanShutdownDetection) { + this.uncleanShutdownDetection = uncleanShutdownDetection; + return this; + } + + /** + * Build the bookie server. + * + *

          + * The sequence of the components is: + * + *

          +         * - stats provider
          +         * - bookie server
          +         * - autorecovery daemon
          +         * - http service
          +         * 
          + * + * @return lifecycle stack + * @throws java.lang.Exception + */ + public EmbeddedServer build() throws Exception { + + final ComponentInfoPublisher componentInfoPublisher = new ComponentInfoPublisher(); + + final Supplier bookieServiceInfoProvider = + () -> buildBookieServiceInfo(componentInfoPublisher); + + LifecycleComponentStack.Builder serverBuilder = LifecycleComponentStack + .newBuilder() + .withComponentInfoPublisher(componentInfoPublisher) + .withName("bookie-server"); + + // 1. build stats provider + if (statsProvider == null) { + StatsProviderService statsProviderService = new StatsProviderService(conf); + statsProvider = statsProviderService.getStatsProvider(); + serverBuilder.addComponent(statsProviderService); + log.info("Load lifecycle component : {}", statsProviderService.getName()); + } + + StatsLogger rootStatsLogger = statsProvider.getStatsLogger(""); + + // 2. Build metadata driver + if (metadataDriver == null) { + if (ledgerManagerFactory == null || registrationManager == null) { + metadataDriver = BookieResources.createMetadataDriver(conf.getServerConf(), rootStatsLogger); + serverBuilder.addComponent(new AutoCloseableLifecycleComponent("metadataDriver", metadataDriver)); + } + } + + if (registrationManager == null) { + registrationManager = metadataDriver.createRegistrationManager(); + serverBuilder.addComponent( + new AutoCloseableLifecycleComponent("registrationManager", registrationManager)); + } + + // 3. Build ledger manager + if (ledgerManagerFactory == null) { + ledgerManagerFactory = metadataDriver.getLedgerManagerFactory(); + serverBuilder.addComponent(new AutoCloseableLifecycleComponent("lmFactory", ledgerManagerFactory)); + } + LedgerManager ledgerManager = ledgerManagerFactory.newLedgerManager(); + serverBuilder.addComponent(new AutoCloseableLifecycleComponent("ledgerManager", ledgerManager)); + + // 4. Build bookie + StatsLogger bookieStats = rootStatsLogger.scope(BOOKIE_SCOPE); + + if (diskChecker == null) { + diskChecker = BookieResources.createDiskChecker(conf.getServerConf()); + } + + if (ledgerDirsManager == null) { + ledgerDirsManager = BookieResources.createLedgerDirsManager( + conf.getServerConf(), diskChecker, bookieStats.scope(LD_LEDGER_SCOPE)); + } + + if (indexDirsManager == null) { + indexDirsManager = BookieResources.createIndexDirsManager( + conf.getServerConf(), diskChecker, bookieStats.scope(LD_INDEX_SCOPE), ledgerDirsManager); + } + + ByteBufAllocatorWithOomHandler allocatorWithOomHandler; + if (allocator == null) { + allocatorWithOomHandler = BookieResources.createAllocator(conf.getServerConf()); + allocator = allocatorWithOomHandler; + } else { + if (allocator instanceof ByteBufAllocatorWithOomHandler) { + allocatorWithOomHandler = (ByteBufAllocatorWithOomHandler) allocator; + } else { + allocatorWithOomHandler = new ByteBuffAllocatorWrapper(allocator); + } + } + + if (uncleanShutdownDetection == null) { + uncleanShutdownDetection = new UncleanShutdownDetectionImpl(ledgerDirsManager); + } + if (uncleanShutdownDetection.lastShutdownWasUnclean()) { + log.info("Unclean shutdown detected. " + + "The bookie did not register a graceful shutdown prior to this boot."); + } + + // bookie takes ownership of storage, so shuts it down + LedgerStorage storage = null; + DataIntegrityCheck integCheck = null; + + if (conf.getServerConf().isDataIntegrityCheckingEnabled()) { + StatsLogger clientStats = bookieStats.scope(CLIENT_SCOPE); + ClientConfiguration clientConfiguration = new ClientConfiguration(conf.getServerConf()); + clientConfiguration.setClientRole(ClientConfiguration.CLIENT_ROLE_SYSTEM); + BookKeeper bkc = BookKeeper.forConfig(clientConfiguration).statsLogger(clientStats).build(); + serverBuilder.addComponent(new AutoCloseableLifecycleComponent("bkc", bkc)); + + BookieId bookieId = BookieImpl.getBookieId(conf.getServerConf()); + ExecutorService rxExecutor = Executors.newFixedThreadPool( + 2, new ThreadFactoryBuilder().setNameFormat("rx-schedule-%d") + .setUncaughtExceptionHandler( + (t, ex) -> log.error("Uncaught exception on thread {}", t.getName(), ex)) + .build()); + Scheduler rxScheduler = Schedulers.from(rxExecutor); + serverBuilder.addComponent( + new RxSchedulerLifecycleComponent("rx-scheduler", conf, bookieStats, + rxScheduler, rxExecutor)); + + storage = BookieResources.createLedgerStorage(conf.getServerConf(), ledgerManager, + ledgerDirsManager, indexDirsManager, bookieStats, allocator); + + EntryCopier copier = new EntryCopierImpl(bookieId, + ((org.apache.bookkeeper.client.BookKeeper) bkc).getClientCtx().getBookieClient(), + storage, Ticker.systemTicker()); + + integCheck = new DataIntegrityCheckImpl(bookieId, + ledgerManager, storage, copier, + new BookKeeperAdmin(bkc, clientStats, clientConfiguration), + rxScheduler); + + // if we're running with journal writes disabled and an unclean shutdown occurred then + // run the preboot check to protect against data loss and to perform data repair + if (!conf.getServerConf().getJournalWriteData() + && uncleanShutdownDetection.lastShutdownWasUnclean()) { + integCheck.runPreBootCheck("UNCLEAN_SHUTDOWN"); + } + CookieValidation cookieValidation = new DataIntegrityCookieValidation(conf.getServerConf(), + registrationManager, integCheck); + cookieValidation.checkCookies(storageDirectoriesFromConf(conf.getServerConf())); + } else { + CookieValidation cookieValidation = newLegacyCookieValidation(conf.getServerConf(), + registrationManager); + cookieValidation.checkCookies(storageDirectoriesFromConf(conf.getServerConf())); + // storage should be created after legacy validation or it will fail (it would find ledger dirs) + storage = BookieResources.createLedgerStorage(conf.getServerConf(), ledgerManager, + ledgerDirsManager, indexDirsManager, bookieStats, allocator); + } + + Bookie bookie; + if (conf.getServerConf().isForceReadOnlyBookie()) { + bookie = new ReadOnlyBookie(conf.getServerConf(), registrationManager, storage, + diskChecker, + ledgerDirsManager, indexDirsManager, + bookieStats, allocator, + bookieServiceInfoProvider); + } else { + bookie = newBookieImpl(conf.getServerConf(), registrationManager, storage, + diskChecker, + ledgerDirsManager, indexDirsManager, + bookieStats, allocator, + bookieServiceInfoProvider); + } + + // 5. build bookie server + BookieService bookieService = + new BookieService(conf, bookie, rootStatsLogger, allocatorWithOomHandler, uncleanShutdownDetection); + + serverBuilder.addComponent(bookieService); + log.info("Load lifecycle component : {}", bookieService.getName()); + + if (conf.getServerConf().isLocalScrubEnabled()) { + serverBuilder.addComponent( + new ScrubberService( + rootStatsLogger.scope(ScrubberStats.SCOPE), + conf, bookieService.getServer().getBookie().getLedgerStorage())); + } + + // 6. build auto recovery + AutoRecoveryService autoRecoveryService = null; + if (conf.getServerConf().isAutoRecoveryDaemonEnabled()) { + autoRecoveryService = new AutoRecoveryService(conf, rootStatsLogger.scope(REPLICATION_SCOPE)); + + serverBuilder.addComponent(autoRecoveryService); + log.info("Load lifecycle component : {}", autoRecoveryService.getName()); + } + + // 7. build data integrity check service + DataIntegrityService dataIntegrityService = null; + if (conf.getServerConf().isDataIntegrityCheckingEnabled()) { + checkNotNull(integCheck, "integCheck should have been initialized with the cookie validation"); + dataIntegrityService = + new DataIntegrityService(conf, rootStatsLogger.scope(REPLICATION_SCOPE), integCheck); + serverBuilder.addComponent(dataIntegrityService); + log.info("Load lifecycle component : {}", dataIntegrityService.getName()); + } + + // 8. build http service + HttpService httpService = null; + if (conf.getServerConf().isHttpServerEnabled()) { + BKHttpServiceProvider provider = new BKHttpServiceProvider.Builder() + .setBookieServer(bookieService.getServer()) + .setServerConfiguration(conf.getServerConf()) + .setStatsProvider(statsProvider) + .setLedgerManagerFactory(ledgerManagerFactory) + .build(); + httpService = new HttpService(provider, conf, rootStatsLogger); + serverBuilder.addComponent(httpService); + log.info("Load lifecycle component : {}", httpService.getName()); + } + + // 9. build extra services + String[] extraComponents = conf.getServerConf().getExtraServerComponents(); + if (null != extraComponents) { + try { + List components = loadServerComponents( + extraComponents, + conf, + rootStatsLogger); + for (ServerLifecycleComponent component : components) { + serverBuilder.addComponent(component); + log.info("Load lifecycle component : {}", component.getName()); + } + } catch (Exception e) { + if (conf.getServerConf().getIgnoreExtraServerComponentsStartupFailures()) { + log.info("Failed to load extra components '{}' - {}. Continuing without those components.", + StringUtils.join(extraComponents), e.getMessage()); + } else { + throw e; + } + } + } + + return new EmbeddedServer(serverBuilder.build(), statsProvider, registrationManager, ledgerManagerFactory, + diskChecker, ledgerDirsManager, indexDirsManager, bookieService, autoRecoveryService, + dataIntegrityService, httpService); + + } + + /** + * Create the {@link BookieServiceInfo} starting from the published endpoints. + * + * @see ComponentInfoPublisher + * @param componentInfoPublisher the endpoint publisher + * @return the created bookie service info + */ + private static BookieServiceInfo buildBookieServiceInfo(ComponentInfoPublisher componentInfoPublisher) { + List endpoints = componentInfoPublisher.getEndpoints().values() + .stream().map(e -> { + return new BookieServiceInfo.Endpoint( + e.getId(), + e.getPort(), + e.getHost(), + e.getProtocol(), + e.getAuth(), + e.getExtensions() + ); + }).collect(Collectors.toList()); + return new BookieServiceInfo(componentInfoPublisher.getProperties(), endpoints); + } + } + + private static final class ByteBuffAllocatorWrapper implements ByteBufAllocatorWithOomHandler { + + private final ByteBufAllocator allocator; + + @Override + public ByteBuf buffer() { + return allocator.buffer(); + } + + @Override + public ByteBuf buffer(int i) { + return allocator.buffer(i); + } + + @Override + public ByteBuf buffer(int i, int i1) { + return allocator.buffer(i, i1); + } + + @Override + public ByteBuf ioBuffer() { + return allocator.ioBuffer(); + } + + @Override + public ByteBuf ioBuffer(int i) { + return allocator.ioBuffer(i); + } + + @Override + public ByteBuf ioBuffer(int i, int i1) { + return allocator.ioBuffer(i, i1); + } + + @Override + public ByteBuf heapBuffer() { + return allocator.heapBuffer(); + } + + @Override + public ByteBuf heapBuffer(int i) { + return allocator.heapBuffer(i); + } + + @Override + public ByteBuf heapBuffer(int i, int i1) { + return allocator.heapBuffer(i, i1); + } + + @Override + public ByteBuf directBuffer() { + return allocator.directBuffer(); + } + + @Override + public ByteBuf directBuffer(int i) { + return allocator.directBuffer(i); + } + + @Override + public ByteBuf directBuffer(int i, int i1) { + return allocator.directBuffer(i, i1); + } + + @Override + public CompositeByteBuf compositeBuffer() { + return allocator.compositeBuffer(); + } + + @Override + public CompositeByteBuf compositeBuffer(int i) { + return allocator.compositeBuffer(i); + } + + @Override + public CompositeByteBuf compositeHeapBuffer() { + return allocator.compositeHeapBuffer(); + } + + @Override + public CompositeByteBuf compositeHeapBuffer(int i) { + return allocator.compositeHeapBuffer(i); + } + + @Override + public CompositeByteBuf compositeDirectBuffer() { + return allocator.compositeDirectBuffer(); + } + + @Override + public CompositeByteBuf compositeDirectBuffer(int i) { + return allocator.compositeDirectBuffer(i); + } + + @Override + public boolean isDirectBufferPooled() { + return allocator.isDirectBufferPooled(); + } + + @Override + public int calculateNewCapacity(int i, int i1) { + return allocator.calculateNewCapacity(i, i1); + } + + public ByteBuffAllocatorWrapper(ByteBufAllocator allocator) { + this.allocator = allocator; + } + + @Override + public void setOomHandler(Consumer handler) { + // NOP + } + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/Main.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/Main.java index ae92955a326..3eff455a840 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/Main.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/Main.java @@ -18,29 +18,22 @@ package org.apache.bookkeeper.server; -import static org.apache.bookkeeper.replication.ReplicationStats.REPLICATION_SCOPE; -import static org.apache.bookkeeper.server.component.ServerLifecycleComponent.loadServerComponents; - import java.io.File; +import java.io.IOException; import java.net.MalformedURLException; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.concurrent.ExecutionException; import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.BookieImpl; import org.apache.bookkeeper.bookie.ExitCode; import org.apache.bookkeeper.common.component.ComponentStarter; import org.apache.bookkeeper.common.component.LifecycleComponent; import org.apache.bookkeeper.common.component.LifecycleComponentStack; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.conf.UncheckedConfigurationException; -import org.apache.bookkeeper.server.component.ServerLifecycleComponent; import org.apache.bookkeeper.server.conf.BookieConfiguration; -import org.apache.bookkeeper.server.http.BKHttpServiceProvider; -import org.apache.bookkeeper.server.service.AutoRecoveryService; -import org.apache.bookkeeper.server.service.BookieService; -import org.apache.bookkeeper.server.service.HttpService; -import org.apache.bookkeeper.server.service.StatsProviderService; -import org.apache.bookkeeper.stats.StatsLogger; import org.apache.commons.cli.BasicParser; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.HelpFormatter; @@ -48,7 +41,6 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.configuration.ConfigurationException; -import org.apache.commons.lang3.StringUtils; /** * A bookie server is a server that run bookie and serving rpc requests. @@ -58,7 +50,6 @@ */ @Slf4j public class Main { - static final Options BK_OPTS = new Options(); static { BK_OPTS.addOption("c", "conf", true, "Configuration for Bookie Server"); @@ -69,6 +60,7 @@ public class Main { BK_OPTS.addOption("z", "zkserver", true, "Zookeeper Server"); BK_OPTS.addOption("m", "zkledgerpath", true, "Zookeeper ledgers root path"); BK_OPTS.addOption("p", "bookieport", true, "bookie port exported"); + BK_OPTS.addOption("hp", "httpport", true, "bookie http port exported"); BK_OPTS.addOption("j", "journal", true, "bookie journal directory"); Option indexDirs = new Option ("i", "indexdirs", true, "bookie index directories"); indexDirs.setArgs(10); @@ -117,12 +109,13 @@ private static ServerConfiguration parseArgs(String[] args) BasicParser parser = new BasicParser(); CommandLine cmdLine = parser.parse(BK_OPTS, args); + ServerConfiguration conf = new ServerConfiguration(); + if (cmdLine.hasOption('h')) { - throw new IllegalArgumentException(); + conf.setProperty("help", true); + return conf; } - ServerConfiguration conf = new ServerConfiguration(); - if (cmdLine.hasOption('c')) { String confFile = cmdLine.getOptionValue("c"); loadConfFile(conf, confFile); @@ -162,8 +155,14 @@ private static ServerConfiguration parseArgs(String[] args) if (cmdLine.hasOption('p')) { String sPort = cmdLine.getOptionValue('p'); log.info("Get cmdline bookie port: {}", sPort); + conf.setBookiePort(Integer.parseInt(sPort)); + } + + if (cmdLine.hasOption("httpport")) { + String sPort = cmdLine.getOptionValue("httpport"); + log.info("Get cmdline http port: {}", sPort); Integer iPort = Integer.parseInt(sPort); - conf.setBookiePort(iPort.intValue()); + conf.setHttpServerPort(iPort.intValue()); } if (cmdLine.hasOption('j')) { @@ -203,7 +202,6 @@ public static void main(String[] args) { } static int doMain(String[] args) { - ServerConfiguration conf; // 0. parse command line @@ -213,6 +211,11 @@ static int doMain(String[] args) { return ExitCode.INVALID_CONF; } + if (conf.getBoolean("help", false)) { + printUsage(); + return ExitCode.OK; + } + // 1. building the component stack: LifecycleComponent server; try { @@ -248,22 +251,19 @@ private static ServerConfiguration parseCommandLine(String[] args) throw iae; } - StringBuilder sb = new StringBuilder(); - String[] ledgerDirNames = conf.getLedgerDirNames(); - for (int i = 0; i < ledgerDirNames.length; i++) { - if (i != 0) { - sb.append(','); - } - sb.append(ledgerDirNames[i]); + if (conf.getBoolean("help", false)) { + return conf; } String hello = String.format( - "Hello, I'm your bookie, listening on port %1$s. Metadata service uri is %2$s." - + " Journals are in %3$s. Ledgers are stored in %4$s.", + "Hello, I'm your bookie, bookieId is %1$s, listening on port %2$s. Metadata service uri is %3$s." + + " Journals are in %4$s. Ledgers are stored in %5$s. Indexes are stored in %6$s.", + conf.getBookieId() != null ? conf.getBookieId() : "", conf.getBookiePort(), conf.getMetadataServiceUriUnchecked(), Arrays.asList(conf.getJournalDirNames()), - sb); + Arrays.asList(conf.getLedgerDirNames()), + Arrays.asList(conf.getIndexDirNames() != null ? conf.getIndexDirNames() : conf.getLedgerDirNames())); log.info(hello); return conf; @@ -284,70 +284,45 @@ private static ServerConfiguration parseCommandLine(String[] args) * @param conf bookie server configuration * @return lifecycle stack */ - static LifecycleComponentStack buildBookieServer(BookieConfiguration conf) throws Exception { - LifecycleComponentStack.Builder serverBuilder = LifecycleComponentStack.newBuilder().withName("bookie-server"); - - // 1. build stats provider - StatsProviderService statsProviderService = - new StatsProviderService(conf); - StatsLogger rootStatsLogger = statsProviderService.getStatsProvider().getStatsLogger(""); - - serverBuilder.addComponent(statsProviderService); - log.info("Load lifecycle component : {}", StatsProviderService.class.getName()); - - // 2. build bookie server - BookieService bookieService = - new BookieService(conf, rootStatsLogger); - - serverBuilder.addComponent(bookieService); - log.info("Load lifecycle component : {}", BookieService.class.getName()); + public static LifecycleComponentStack buildBookieServer(BookieConfiguration conf) throws Exception { + return EmbeddedServer.builder(conf).build().getLifecycleComponentStack(); + } - // 3. build auto recovery - if (conf.getServerConf().isAutoRecoveryDaemonEnabled()) { - AutoRecoveryService autoRecoveryService = - new AutoRecoveryService(conf, rootStatsLogger.scope(REPLICATION_SCOPE)); + public static List storageDirectoriesFromConf(ServerConfiguration conf) throws IOException { + List dirs = new ArrayList<>(); - serverBuilder.addComponent(autoRecoveryService); - log.info("Load lifecycle component : {}", AutoRecoveryService.class.getName()); - } - - // 4. build http service - if (conf.getServerConf().isHttpServerEnabled()) { - BKHttpServiceProvider provider = new BKHttpServiceProvider.Builder() - .setBookieServer(bookieService.getServer()) - .setServerConfiguration(conf.getServerConf()) - .setStatsProvider(statsProviderService.getStatsProvider()) - .build(); - HttpService httpService = - new HttpService(provider, conf, rootStatsLogger); - - serverBuilder.addComponent(httpService); - log.info("Load lifecycle component : {}", HttpService.class.getName()); + File[] journalDirs = conf.getJournalDirs(); + if (journalDirs != null) { + for (File j : journalDirs) { + File cur = BookieImpl.getCurrentDirectory(j); + if (!dirs.stream().anyMatch(f -> f.equals(cur))) { + BookieImpl.checkDirectoryStructure(cur); + dirs.add(cur); + } + } } - // 5. build extra services - String[] extraComponents = conf.getServerConf().getExtraServerComponents(); - if (null != extraComponents) { - try { - List components = loadServerComponents( - extraComponents, - conf, - rootStatsLogger); - for (ServerLifecycleComponent component : components) { - serverBuilder.addComponent(component); - log.info("Load lifecycle component : {}", component.getClass().getName()); + File[] ledgerDirs = conf.getLedgerDirs(); + if (ledgerDirs != null) { + for (File l : ledgerDirs) { + File cur = BookieImpl.getCurrentDirectory(l); + if (!dirs.stream().anyMatch(f -> f.equals(cur))) { + BookieImpl.checkDirectoryStructure(cur); + dirs.add(cur); } - } catch (Exception e) { - if (conf.getServerConf().getIgnoreExtraServerComponentsStartupFailures()) { - log.info("Failed to load extra components '{}' - {}. Continuing without those components.", - StringUtils.join(extraComponents), e.getMessage()); - } else { - throw e; + } + } + File[] indexDirs = conf.getIndexDirs(); + if (indexDirs != null) { + for (File i : indexDirs) { + File cur = BookieImpl.getCurrentDirectory(i); + if (!dirs.stream().anyMatch(f -> f.equals(cur))) { + BookieImpl.checkDirectoryStructure(cur); + dirs.add(cur); } } } - - return serverBuilder.build(); + return dirs; } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/component/ServerLifecycleComponent.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/component/ServerLifecycleComponent.java index 281bb7d756d..cc8ee6ef7bf 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/component/ServerLifecycleComponent.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/component/ServerLifecycleComponent.java @@ -26,9 +26,9 @@ import org.apache.bookkeeper.common.annotation.InterfaceStability.Evolving; import org.apache.bookkeeper.common.component.AbstractLifecycleComponent; import org.apache.bookkeeper.common.component.LifecycleComponent; +import org.apache.bookkeeper.common.util.ReflectionUtils; import org.apache.bookkeeper.server.conf.BookieConfiguration; import org.apache.bookkeeper.stats.StatsLogger; -import org.apache.bookkeeper.util.ReflectionUtils; /** * A {@link LifecycleComponent} that runs on a bookie server. It can be loaded via reflections. diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/BKHttpServiceProvider.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/BKHttpServiceProvider.java index 052b50ecf62..823cf486524 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/BKHttpServiceProvider.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/BKHttpServiceProvider.java @@ -33,14 +33,22 @@ import org.apache.bookkeeper.http.service.ErrorHttpService; import org.apache.bookkeeper.http.service.HeartbeatService; import org.apache.bookkeeper.http.service.HttpEndpointService; -import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; +import org.apache.bookkeeper.meta.LedgerManagerFactory; import org.apache.bookkeeper.proto.BookieServer; import org.apache.bookkeeper.replication.Auditor; import org.apache.bookkeeper.replication.AutoRecoveryMain; +import org.apache.bookkeeper.server.http.service.AutoRecoveryStatusService; +import org.apache.bookkeeper.server.http.service.BookieInfoService; +import org.apache.bookkeeper.server.http.service.BookieIsReadyService; +import org.apache.bookkeeper.server.http.service.BookieSanityService; +import org.apache.bookkeeper.server.http.service.BookieStateReadOnlyService; +import org.apache.bookkeeper.server.http.service.BookieStateService; +import org.apache.bookkeeper.server.http.service.ClusterInfoService; import org.apache.bookkeeper.server.http.service.ConfigurationService; import org.apache.bookkeeper.server.http.service.DecommissionService; import org.apache.bookkeeper.server.http.service.DeleteLedgerService; import org.apache.bookkeeper.server.http.service.ExpandStorageService; +import org.apache.bookkeeper.server.http.service.GCDetailsService; import org.apache.bookkeeper.server.http.service.GetLastLogMarkService; import org.apache.bookkeeper.server.http.service.GetLedgerMetaService; import org.apache.bookkeeper.server.http.service.ListBookieInfoService; @@ -52,19 +60,19 @@ import org.apache.bookkeeper.server.http.service.MetricsService; import org.apache.bookkeeper.server.http.service.ReadLedgerEntryService; import org.apache.bookkeeper.server.http.service.RecoveryBookieService; +import org.apache.bookkeeper.server.http.service.ResumeCompactionService; +import org.apache.bookkeeper.server.http.service.SuspendCompactionService; import org.apache.bookkeeper.server.http.service.TriggerAuditService; +import org.apache.bookkeeper.server.http.service.TriggerGCService; +import org.apache.bookkeeper.server.http.service.TriggerLocationCompactService; import org.apache.bookkeeper.server.http.service.WhoIsAuditorService; import org.apache.bookkeeper.stats.StatsProvider; -import org.apache.bookkeeper.zookeeper.ZooKeeperClient; import org.apache.zookeeper.KeeperException; -import org.apache.zookeeper.ZooKeeper; /** * Bookkeeper based implementation of HttpServiceProvider, * which provide bookkeeper services to handle http requests * from different http endpoints. - * - *

          TODO: eliminate the direct usage of zookeeper here {@link https://github.com/apache/bookkeeper/issues/1332} */ @Slf4j public class BKHttpServiceProvider implements HttpServiceProvider { @@ -72,26 +80,22 @@ public class BKHttpServiceProvider implements HttpServiceProvider { private final StatsProvider statsProvider; private final BookieServer bookieServer; private final AutoRecoveryMain autoRecovery; + private final LedgerManagerFactory ledgerManagerFactory; private final ServerConfiguration serverConf; - private final ZooKeeper zk; private final BookKeeperAdmin bka; private final ExecutorService executor; private BKHttpServiceProvider(BookieServer bookieServer, AutoRecoveryMain autoRecovery, + LedgerManagerFactory ledgerManagerFactory, ServerConfiguration serverConf, StatsProvider statsProvider) throws IOException, KeeperException, InterruptedException, BKException { this.bookieServer = bookieServer; this.autoRecovery = autoRecovery; + this.ledgerManagerFactory = ledgerManagerFactory; this.serverConf = serverConf; this.statsProvider = statsProvider; - String zkServers = ZKMetadataDriverBase.resolveZkServers(serverConf); - this.zk = ZooKeeperClient.newBuilder() - .connectString(zkServers) - .sessionTimeoutMs(serverConf.getZkTimeout()) - .build(); - ClientConfiguration clientConfiguration = new ClientConfiguration(serverConf); this.bka = new BookKeeperAdmin(clientConfiguration); @@ -106,9 +110,6 @@ public void close() throws IOException { if (bka != null) { bka.close(); } - if (zk != null) { - zk.close(); - } } catch (InterruptedException ie) { Thread.currentThread().interrupt(); log.error("Interruption while closing BKHttpServiceProvider", ie); @@ -138,6 +139,7 @@ public static class Builder { BookieServer bookieServer = null; AutoRecoveryMain autoRecovery = null; + LedgerManagerFactory ledgerManagerFactory = null; ServerConfiguration serverConf = null; StatsProvider statsProvider = null; @@ -161,11 +163,17 @@ public Builder setStatsProvider(StatsProvider statsProvider) { return this; } + public Builder setLedgerManagerFactory(LedgerManagerFactory ledgerManagerFactory) { + this.ledgerManagerFactory = ledgerManagerFactory; + return this; + } + public BKHttpServiceProvider build() throws IOException, KeeperException, InterruptedException, BKException { return new BKHttpServiceProvider( bookieServer, autoRecovery, + ledgerManagerFactory, serverConf, statsProvider ); @@ -191,9 +199,9 @@ public HttpEndpointService provideHttpEndpointService(ApiType type) { case DELETE_LEDGER: return new DeleteLedgerService(configuration); case LIST_LEDGER: - return new ListLedgerService(configuration, bookieServer); + return new ListLedgerService(configuration, ledgerManagerFactory); case GET_LEDGER_META: - return new GetLedgerMetaService(configuration, bookieServer); + return new GetLedgerMetaService(configuration, ledgerManagerFactory); case READ_LEDGER_ENTRY: return new ReadLedgerEntryService(configuration, bka); @@ -208,14 +216,38 @@ public HttpEndpointService provideHttpEndpointService(ApiType type) { return new ListDiskFilesService(configuration); case EXPAND_STORAGE: return new ExpandStorageService(configuration); + case GC: + return new TriggerGCService(configuration, bookieServer); + case GC_DETAILS: + return new GCDetailsService(configuration, bookieServer); + case BOOKIE_STATE: + return new BookieStateService(bookieServer.getBookie()); + case BOOKIE_SANITY: + return new BookieSanityService(configuration); + case BOOKIE_STATE_READONLY: + return new BookieStateReadOnlyService(bookieServer.getBookie()); + case BOOKIE_IS_READY: + return new BookieIsReadyService(bookieServer.getBookie()); + case BOOKIE_INFO: + return new BookieInfoService(bookieServer.getBookie()); + case CLUSTER_INFO: + return new ClusterInfoService(bka, ledgerManagerFactory); + case SUSPEND_GC_COMPACTION: + return new SuspendCompactionService(bookieServer); + case RESUME_GC_COMPACTION: + return new ResumeCompactionService(bookieServer); + case TRIGGER_ENTRY_LOCATION_COMPACT: + return new TriggerLocationCompactService(bookieServer); // autorecovery + case AUTORECOVERY_STATUS: + return new AutoRecoveryStatusService(configuration); case RECOVERY_BOOKIE: return new RecoveryBookieService(configuration, bka, executor); case LIST_UNDER_REPLICATED_LEDGER: - return new ListUnderReplicatedLedgerService(configuration, bookieServer); + return new ListUnderReplicatedLedgerService(configuration, ledgerManagerFactory); case WHO_IS_AUDITOR: - return new WhoIsAuditorService(configuration, zk); + return new WhoIsAuditorService(configuration, bka); case TRIGGER_AUDIT: return new TriggerAuditService(configuration, bka); case LOST_BOOKIE_RECOVERY_DELAY: diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/AutoRecoveryStatusService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/AutoRecoveryStatusService.java new file mode 100644 index 00000000000..463d9bc8759 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/AutoRecoveryStatusService.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.server.http.service; + +import com.google.common.collect.ImmutableMap; +import com.google.common.util.concurrent.UncheckedExecutionException; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import java.util.Collections; +import java.util.Map; +import org.apache.bookkeeper.common.util.JsonUtil; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.http.HttpServer; +import org.apache.bookkeeper.http.service.HttpEndpointService; +import org.apache.bookkeeper.http.service.HttpServiceRequest; +import org.apache.bookkeeper.http.service.HttpServiceResponse; +import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; +import org.apache.bookkeeper.meta.MetadataDrivers; +import org.apache.commons.lang3.ObjectUtils; + +/** + * HttpEndpointService that handles Autorecovery status related http requests. + * + *

          The GET method returns the current status of Autorecovery. The output would be like {"enabled" : true}. + * + *

          The PUT method requires a parameter 'enabled', and enables Autorecovery if its value is 'true', + * and disables Autorecovery otherwise. The behaviour is idempotent if Autorecovery status is already + * the same as desired. The output would be the current status after the action. + * + */ +@SuppressFBWarnings("RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE") +public class AutoRecoveryStatusService implements HttpEndpointService { + protected final ServerConfiguration conf; + + public AutoRecoveryStatusService(ServerConfiguration conf) { + this.conf = conf; + } + + @Override + public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { + return MetadataDrivers.runFunctionWithLedgerManagerFactory(conf, + ledgerManagerFactory -> { + try (LedgerUnderreplicationManager ledgerUnderreplicationManager = ledgerManagerFactory + .newLedgerUnderreplicationManager()) { + switch (request.getMethod()) { + case GET: + return handleGetStatus(ledgerUnderreplicationManager); + case PUT: + return handlePutStatus(request, ledgerUnderreplicationManager); + default: + return new HttpServiceResponse("Not found method. Should be GET or PUT method", + HttpServer.StatusCode.NOT_FOUND); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new UncheckedExecutionException(e); + } catch (Exception e) { + throw new UncheckedExecutionException(e); + } + }); + } + + private HttpServiceResponse handleGetStatus(LedgerUnderreplicationManager ledgerUnderreplicationManager) + throws Exception { + String body = JsonUtil.toJson(ImmutableMap.of("enabled", + ledgerUnderreplicationManager.isLedgerReplicationEnabled())); + return new HttpServiceResponse(body, HttpServer.StatusCode.OK); + } + + private HttpServiceResponse handlePutStatus(HttpServiceRequest request, + LedgerUnderreplicationManager ledgerUnderreplicationManager) + throws Exception { + Map params = ObjectUtils.defaultIfNull(request.getParams(), Collections.emptyMap()); + String enabled = params.get("enabled"); + if (enabled == null) { + return new HttpServiceResponse("Param 'enabled' not found in " + params, + HttpServer.StatusCode.BAD_REQUEST); + } + if (Boolean.parseBoolean(enabled)) { + if (!ledgerUnderreplicationManager.isLedgerReplicationEnabled()) { + ledgerUnderreplicationManager.enableLedgerReplication(); + } + } else { + if (ledgerUnderreplicationManager.isLedgerReplicationEnabled()) { + ledgerUnderreplicationManager.disableLedgerReplication(); + } + } + + // use the current status as the response + String body = JsonUtil.toJson(ImmutableMap.of("enabled", + ledgerUnderreplicationManager.isLedgerReplicationEnabled())); + return new HttpServiceResponse(body, HttpServer.StatusCode.OK); + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/BookieInfoService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/BookieInfoService.java new file mode 100644 index 00000000000..62b118662d5 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/BookieInfoService.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.server.http.service; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.NonNull; +import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.common.util.JsonUtil; +import org.apache.bookkeeper.http.HttpServer; +import org.apache.bookkeeper.http.service.HttpEndpointService; +import org.apache.bookkeeper.http.service.HttpServiceRequest; +import org.apache.bookkeeper.http.service.HttpServiceResponse; + +/** + * HttpEndpointService that exposes the current info of the bookie. + * + *

          + * 
          + * {
          + *  "freeSpace" : 0,
          + *  "totalSpace" : 0
          + * }
          + * 
          + * 
          + */ +@AllArgsConstructor +public class BookieInfoService implements HttpEndpointService { + @NonNull private final Bookie bookie; + + /** + * POJO definition for the bookie info response. + */ + @Data + @NoArgsConstructor + @AllArgsConstructor + public static class BookieInfo { + private long freeSpace; + private long totalSpace; + } + + @Override + public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { + HttpServiceResponse response = new HttpServiceResponse(); + + if (HttpServer.Method.GET != request.getMethod()) { + response.setCode(HttpServer.StatusCode.NOT_FOUND); + response.setBody("Only GET is supported."); + return response; + } + + BookieInfo bi = new BookieInfo(bookie.getTotalFreeSpace(), bookie.getTotalDiskSpace()); + + String jsonResponse = JsonUtil.toJson(bi); + response.setBody(jsonResponse); + response.setCode(HttpServer.StatusCode.OK); + return response; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/BookieIsReadyService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/BookieIsReadyService.java new file mode 100644 index 00000000000..f7cca4a5e66 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/BookieIsReadyService.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.server.http.service; + +import static com.google.common.base.Preconditions.checkNotNull; + +import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.StateManager; +import org.apache.bookkeeper.http.HttpServer; +import org.apache.bookkeeper.http.service.HttpEndpointService; +import org.apache.bookkeeper.http.service.HttpServiceRequest; +import org.apache.bookkeeper.http.service.HttpServiceResponse; + +/** + * HttpEndpointService that returns 200 if the bookie is ready. + */ +public class BookieIsReadyService implements HttpEndpointService { + + private final Bookie bookie; + + public BookieIsReadyService(Bookie bookie) { + this.bookie = checkNotNull(bookie); + } + + @Override + public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { + HttpServiceResponse response = new HttpServiceResponse(); + + if (HttpServer.Method.GET != request.getMethod()) { + response.setCode(HttpServer.StatusCode.NOT_FOUND); + response.setBody("Only support GET method check if bookie is ready."); + return response; + } + + StateManager sm = bookie.getStateManager(); + if (sm.isRunning() && !sm.isShuttingDown()) { + response.setCode(HttpServer.StatusCode.OK); + response.setBody("OK"); + } else { + response.setCode(HttpServer.StatusCode.SERVICE_UNAVAILABLE); + response.setBody("Bookie is not fully started yet"); + } + return response; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/BookieSanityService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/BookieSanityService.java new file mode 100644 index 00000000000..dd9c1dd6d54 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/BookieSanityService.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.server.http.service; + +import static com.google.common.base.Preconditions.checkNotNull; + +import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; +import lombok.Data; +import lombok.NoArgsConstructor; +import org.apache.bookkeeper.common.util.JsonUtil; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.http.HttpServer; +import org.apache.bookkeeper.http.service.HttpEndpointService; +import org.apache.bookkeeper.http.service.HttpServiceRequest; +import org.apache.bookkeeper.http.service.HttpServiceResponse; +import org.apache.bookkeeper.tools.cli.commands.bookie.SanityTestCommand; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * HttpEndpointService that exposes the bookie sanity state. + * + *

          + * Get the current bookie sanity response: + * + *

          + * 
          + * {
          + *  "passed" : true,
          + *  "readOnly" : false
          + *}
          + * 
          + * 
          + */ +public class BookieSanityService implements HttpEndpointService { + + static final Logger LOG = LoggerFactory.getLogger(BookieSanityService.class); + private final ServerConfiguration config; + private Semaphore lock = new Semaphore(1); + private static final int TIMEOUT_MS = 5000; + private static final int MAX_CONCURRENT_REQUESTS = 1; + + public BookieSanityService(ServerConfiguration config) { + this.config = checkNotNull(config); + } + + /** + * POJO definition for the bookie sanity response. + */ + @Data + @NoArgsConstructor + public static class BookieSanity { + private boolean passed; + private boolean readOnly; + } + + @Override + public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { + HttpServiceResponse response = new HttpServiceResponse(); + + if (HttpServer.Method.GET != request.getMethod()) { + response.setCode(HttpServer.StatusCode.NOT_FOUND); + response.setBody("Only support GET method to retrieve bookie sanity state."); + return response; + } + + BookieSanity bs = new BookieSanity(); + if (config.isForceReadOnlyBookie()) { + bs.readOnly = true; + } else { + try { + // allow max concurrent request as sanity-test check relatively + // longer time to complete + try { + lock.tryAcquire(MAX_CONCURRENT_REQUESTS, TIMEOUT_MS, TimeUnit.MILLISECONDS); + } catch (InterruptedException e) { + LOG.error("Timing out due to max {} of sanity request are running concurrently", + MAX_CONCURRENT_REQUESTS); + response.setCode(HttpServer.StatusCode.INTERNAL_ERROR); + response.setBody("Timing out due to max number of sanity request are running concurrently"); + return response; + } + SanityTestCommand sanity = new SanityTestCommand(); + bs.passed = sanity.apply(config, new SanityTestCommand.SanityFlags()); + } finally { + lock.release(); + } + } + String jsonResponse = JsonUtil.toJson(bs); + response.setBody(jsonResponse); + response.setCode(HttpServer.StatusCode.OK); + return response; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/BookieStateReadOnlyService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/BookieStateReadOnlyService.java new file mode 100644 index 00000000000..d32074e2bb3 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/BookieStateReadOnlyService.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.server.http.service; + +import static com.google.common.base.Preconditions.checkNotNull; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; +import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.StateManager; +import org.apache.bookkeeper.common.util.JsonUtil; +import org.apache.bookkeeper.http.HttpServer; +import org.apache.bookkeeper.http.service.HttpEndpointService; +import org.apache.bookkeeper.http.service.HttpServiceRequest; +import org.apache.bookkeeper.http.service.HttpServiceResponse; + +/** + * HttpEndpointService that handles readOnly state related http requests. + * The GET method will get the current readOnly state of the bookie. + * The PUT method will change the current readOnly state of the bookie if the desired state is + * different from the current. The request body could be {"readOnly":true/false}. The current + * or the updated state will be included in the response. + */ +public class BookieStateReadOnlyService implements HttpEndpointService { + private final Bookie bookie; + + public BookieStateReadOnlyService(Bookie bookie) { + this.bookie = checkNotNull(bookie); + } + + @Override + public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { + HttpServiceResponse response = new HttpServiceResponse(); + StateManager stateManager = this.bookie.getStateManager(); + + if (HttpServer.Method.PUT.equals(request.getMethod())) { + ReadOnlyState inState = JsonUtil.fromJson(request.getBody(), ReadOnlyState.class); + if (stateManager.isReadOnly() && !inState.isReadOnly()) { + if (stateManager.isForceReadOnly()) { + response.setCode(HttpServer.StatusCode.BAD_REQUEST); + response.setBody("Bookie is in forceReadOnly mode, cannot transit to writable mode"); + return response; + } + stateManager.transitionToWritableMode().get(); + } else if (!stateManager.isReadOnly() && inState.isReadOnly()) { + if (!stateManager.isReadOnlyModeEnabled()) { + response.setCode(HttpServer.StatusCode.BAD_REQUEST); + response.setBody("Bookie is disabled ReadOnly mode, cannot transit to readOnly mode"); + return response; + } + stateManager.transitionToReadOnlyMode().get(); + } + } else if (!HttpServer.Method.GET.equals(request.getMethod())) { + response.setCode(HttpServer.StatusCode.NOT_FOUND); + response.setBody("Unsupported method. Should be GET or PUT method"); + return response; + } + + ReadOnlyState outState = new ReadOnlyState(stateManager.isReadOnly()); + response.setBody(JsonUtil.toJson(outState)); + response.setCode(HttpServer.StatusCode.OK); + + return response; + } + + /** + * The object represent the readOnly state. + */ + @AllArgsConstructor + @NoArgsConstructor + @Data + public static class ReadOnlyState { + private boolean readOnly; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/BookieStateService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/BookieStateService.java new file mode 100644 index 00000000000..072ebd74c80 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/BookieStateService.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.server.http.service; + +import static com.google.common.base.Preconditions.checkNotNull; + +import lombok.Data; +import lombok.NoArgsConstructor; +import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.StateManager; +import org.apache.bookkeeper.common.util.JsonUtil; +import org.apache.bookkeeper.http.HttpServer; +import org.apache.bookkeeper.http.service.HttpEndpointService; +import org.apache.bookkeeper.http.service.HttpServiceRequest; +import org.apache.bookkeeper.http.service.HttpServiceResponse; + +/** + * HttpEndpointService that exposes the current state of the bookie. + * + *

          Get the current bookie status: + * + *

          + * 
          + * {
          + *  "running" : true,
          + *  "readOnly" : false,
          + *  "shuttingDown" : false,
          + *  "availableForHighPriorityWrites" : true
          + *}
          + * 
          + * 
          + */ +public class BookieStateService implements HttpEndpointService { + + private final Bookie bookie; + + public BookieStateService(Bookie bookie) { + this.bookie = checkNotNull(bookie); + } + + /** + * POJO definition for the bookie state response. + */ + @Data + @NoArgsConstructor + public static class BookieState { + private boolean running; + private boolean readOnly; + private boolean shuttingDown; + private boolean availableForHighPriorityWrites; + } + + @Override + public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { + HttpServiceResponse response = new HttpServiceResponse(); + + if (HttpServer.Method.GET != request.getMethod()) { + response.setCode(HttpServer.StatusCode.NOT_FOUND); + response.setBody("Only support GET method to retrieve bookie state."); + return response; + } + + StateManager sm = bookie.getStateManager(); + BookieState bs = new BookieState(); + bs.running = sm.isRunning(); + bs.readOnly = sm.isReadOnly(); + bs.shuttingDown = sm.isShuttingDown(); + bs.availableForHighPriorityWrites = sm.isAvailableForHighPriorityWrites(); + + String jsonResponse = JsonUtil.toJson(bs); + response.setBody(jsonResponse); + response.setCode(HttpServer.StatusCode.OK); + return response; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ClusterInfoService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ClusterInfoService.java new file mode 100644 index 00000000000..99665eae3ff --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ClusterInfoService.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.server.http.service; + +import java.util.Iterator; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NonNull; +import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.common.util.JsonUtil; +import org.apache.bookkeeper.http.HttpServer; +import org.apache.bookkeeper.http.service.HttpEndpointService; +import org.apache.bookkeeper.http.service.HttpServiceRequest; +import org.apache.bookkeeper.http.service.HttpServiceResponse; +import org.apache.bookkeeper.meta.LedgerManagerFactory; +import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; +import org.apache.bookkeeper.meta.UnderreplicatedLedger; +import org.apache.bookkeeper.net.BookieId; + +/** + * HttpEndpointService that exposes the current info about the cluster of bookies. + * + *
          + * 
          + * {
          + *  "hasAuditorElected" : true,
          + *  "auditorId" : "blah",
          + *  "hasUnderReplicatedLedgers": false,
          + *  "isLedgerReplicationEnabled": true,
          + *  "totalBookiesCount": 10,
          + *  "writableBookiesCount": 6,
          + *  "readonlyBookiesCount": 3,
          + *  "unavailableBookiesCount": 1
          + * }
          + * 
          + * 
          + */ +@AllArgsConstructor +@Slf4j +public class ClusterInfoService implements HttpEndpointService { + + @NonNull + private final BookKeeperAdmin bka; + @NonNull + private final LedgerManagerFactory ledgerManagerFactory; + + /** + * POJO definition for the cluster info response. + */ + @Data + public static class ClusterInfo { + private boolean auditorElected; + private String auditorId; + private boolean clusterUnderReplicated; + private boolean ledgerReplicationEnabled; + private int totalBookiesCount; + private int writableBookiesCount; + private int readonlyBookiesCount; + private int unavailableBookiesCount; + } + + @Override + public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { + final HttpServiceResponse response = new HttpServiceResponse(); + + if (HttpServer.Method.GET != request.getMethod()) { + response.setCode(HttpServer.StatusCode.NOT_FOUND); + response.setBody("Only GET is supported."); + return response; + } + + final ClusterInfo info = new ClusterInfo(); + fillUReplicatedInfo(info); + fillAuditorInfo(info); + fillBookiesInfo(info); + + String jsonResponse = JsonUtil.toJson(info); + response.setBody(jsonResponse); + response.setCode(HttpServer.StatusCode.OK); + return response; + } + + @SneakyThrows + private void fillBookiesInfo(ClusterInfo info) { + int totalBookiesCount = bka.getAllBookies().size(); + int writableBookiesCount = bka.getAvailableBookies().size(); + int readonlyBookiesCount = bka.getReadOnlyBookies().size(); + int unavailableBookiesCount = totalBookiesCount - writableBookiesCount - readonlyBookiesCount; + + info.setTotalBookiesCount(totalBookiesCount); + info.setWritableBookiesCount(writableBookiesCount); + info.setReadonlyBookiesCount(readonlyBookiesCount); + info.setUnavailableBookiesCount(unavailableBookiesCount); + } + + private void fillAuditorInfo(ClusterInfo info) { + try { + BookieId currentAuditor = bka.getCurrentAuditor(); + info.setAuditorElected(currentAuditor != null); + info.setAuditorId(currentAuditor == null ? "" : currentAuditor.getId()); + } catch (Exception e) { + log.error("Could not get Auditor info", e); + info.setAuditorElected(false); + info.setAuditorId(""); + } + } + + @SneakyThrows + private void fillUReplicatedInfo(ClusterInfo info) { + try (LedgerUnderreplicationManager underreplicationManager = + ledgerManagerFactory.newLedgerUnderreplicationManager()) { + Iterator iter = underreplicationManager.listLedgersToRereplicate(null); + + info.setClusterUnderReplicated(iter.hasNext()); + info.setLedgerReplicationEnabled(underreplicationManager.isLedgerReplicationEnabled()); + } + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ConfigurationService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ConfigurationService.java index bcf55781d30..de166eeacf2 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ConfigurationService.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ConfigurationService.java @@ -22,7 +22,6 @@ import java.util.HashMap; import java.util.Map; - import org.apache.bookkeeper.common.util.JsonUtil; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.http.HttpServer; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/DecommissionService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/DecommissionService.java index 0091ce9ee24..225afd346cd 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/DecommissionService.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/DecommissionService.java @@ -22,7 +22,6 @@ import java.util.HashMap; import java.util.concurrent.ExecutorService; - import org.apache.bookkeeper.client.BookKeeperAdmin; import org.apache.bookkeeper.common.util.JsonUtil; import org.apache.bookkeeper.conf.ServerConfiguration; @@ -30,7 +29,7 @@ import org.apache.bookkeeper.http.service.HttpEndpointService; import org.apache.bookkeeper.http.service.HttpServiceRequest; import org.apache.bookkeeper.http.service.HttpServiceResponse; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -74,9 +73,7 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { HashMap configMap = JsonUtil.fromJson(requestBody, HashMap.class); if (configMap != null && configMap.containsKey("bookie_src")) { try { - String bookieSrcString[] = configMap.get("bookie_src").split(":"); - BookieSocketAddress bookieSrc = new BookieSocketAddress( - bookieSrcString[0], Integer.parseInt(bookieSrcString[1])); + BookieId bookieSrc = BookieId.parse(configMap.get("bookie_src")); executor.execute(() -> { try { @@ -84,12 +81,12 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { bka.decommissionBookie(bookieSrc); LOG.info("Complete decommissioning bookie."); } catch (Exception e) { - LOG.error("Error handling decommissionBookie: {} with exception {}", bookieSrc, e); + LOG.error("Error handling decommissionBookie: {}.", bookieSrc, e); } }); response.setCode(HttpServer.StatusCode.OK); - response.setBody("Success send decommission Bookie command " + bookieSrc.toString()); + response.setBody("Success send decommission Bookie command " + bookieSrc); return response; } catch (Exception e) { LOG.error("Exception occurred while decommissioning bookie: ", e); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/DeleteLedgerService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/DeleteLedgerService.java index 626b89f9dd9..57cc127908d 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/DeleteLedgerService.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/DeleteLedgerService.java @@ -56,17 +56,20 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { if (params != null && params.containsKey("ledger_id")) { ClientConfiguration clientConf = new ClientConfiguration(); clientConf.addConfiguration(conf); - BookKeeper bk = new BookKeeper(clientConf); - Long ledgerId = Long.parseLong(params.get("ledger_id")); + try (BookKeeper bk = new BookKeeper(clientConf)) { + Long ledgerId = Long.parseLong(params.get("ledger_id")); - bk.deleteLedger(ledgerId); + bk.deleteLedger(ledgerId); - String output = "Deleted ledger: " + ledgerId; - String jsonResponse = JsonUtil.toJson(output); - LOG.debug("output body:" + jsonResponse); - response.setBody(jsonResponse); - response.setCode(HttpServer.StatusCode.OK); - return response; + String output = "Deleted ledger: " + ledgerId; + String jsonResponse = JsonUtil.toJson(output); + if (LOG.isDebugEnabled()) { + LOG.debug("output body:" + jsonResponse); + } + response.setBody(jsonResponse); + response.setCode(HttpServer.StatusCode.OK); + return response; + } } else { response.setCode(HttpServer.StatusCode.NOT_FOUND); response.setBody("Not ledger found. Should provide ledger_id="); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ExpandStorageService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ExpandStorageService.java index 2d59661b252..667904fb3a3 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ExpandStorageService.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ExpandStorageService.java @@ -21,13 +21,16 @@ import static com.google.common.base.Preconditions.checkNotNull; import com.google.common.collect.Lists; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.io.File; import java.net.URI; import java.util.Arrays; import java.util.List; -import org.apache.bookkeeper.bookie.Bookie; import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.LegacyCookieValidation; import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.discover.RegistrationManager; import org.apache.bookkeeper.http.HttpServer; import org.apache.bookkeeper.http.service.HttpEndpointService; import org.apache.bookkeeper.http.service.HttpServiceRequest; @@ -60,17 +63,18 @@ public ExpandStorageService(ServerConfiguration conf) { * Update the directories info in the conf file before running the command. */ @Override + @SuppressFBWarnings("RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE") public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { HttpServiceResponse response = new HttpServiceResponse(); if (HttpServer.Method.PUT == request.getMethod()) { - File[] ledgerDirectories = Bookie.getCurrentDirectories(conf.getLedgerDirs()); - File[] journalDirectories = Bookie.getCurrentDirectories(conf.getJournalDirs()); + File[] ledgerDirectories = BookieImpl.getCurrentDirectories(conf.getLedgerDirs()); + File[] journalDirectories = BookieImpl.getCurrentDirectories(conf.getJournalDirs()); File[] indexDirectories; if (null == conf.getIndexDirs()) { indexDirectories = ledgerDirectories; } else { - indexDirectories = Bookie.getCurrentDirectories(conf.getIndexDirs()); + indexDirectories = BookieImpl.getCurrentDirectories(conf.getIndexDirs()); } List allLedgerDirs = Lists.newArrayList(); @@ -80,11 +84,15 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { } try (MetadataBookieDriver driver = MetadataDrivers.getBookieDriver( - URI.create(conf.getMetadataServiceUri()) - )) { - driver.initialize(conf, () -> { }, NullStatsLogger.INSTANCE); - Bookie.checkEnvironmentWithStorageExpansion(conf, driver, - Lists.newArrayList(journalDirectories), allLedgerDirs); + URI.create(conf.getMetadataServiceUri()))) { + driver.initialize(conf, NullStatsLogger.INSTANCE); + + try (RegistrationManager registrationManager = driver.createRegistrationManager()) { + LegacyCookieValidation validation = new LegacyCookieValidation(conf, registrationManager); + List dirs = Lists.newArrayList(journalDirectories); + dirs.addAll(allLedgerDirs); + validation.checkCookies(dirs); + } } catch (BookieException e) { LOG.error("Exception occurred while updating cookie for storage expansion", e); response.setCode(HttpServer.StatusCode.INTERNAL_ERROR); @@ -93,7 +101,9 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { } String jsonResponse = "Success expand storage"; - LOG.debug("output body:" + jsonResponse); + if (LOG.isDebugEnabled()) { + LOG.debug("output body:" + jsonResponse); + } response.setBody(jsonResponse); response.setCode(HttpServer.StatusCode.OK); return response; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/GCDetailsService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/GCDetailsService.java new file mode 100644 index 00000000000..29b59e271ba --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/GCDetailsService.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.server.http.service; + +import static com.google.common.base.Preconditions.checkNotNull; + +import java.util.List; +import org.apache.bookkeeper.bookie.GarbageCollectionStatus; +import org.apache.bookkeeper.common.util.JsonUtil; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.http.HttpServer; +import org.apache.bookkeeper.http.service.HttpEndpointService; +import org.apache.bookkeeper.http.service.HttpServiceRequest; +import org.apache.bookkeeper.http.service.HttpServiceResponse; +import org.apache.bookkeeper.proto.BookieServer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * HttpEndpointService that handle get garbage collection details service. + * + *

          Get Garbage Collection status, the output would be like: + * [ { + * "forceCompacting" : false, + * "majorCompacting" : false, + * "minorCompacting" : false, + * "lastMajorCompactionTime" : 1544578144944, + * "lastMinorCompactionTime" : 1544578144944, + * "majorCompactionCounter" : 1, + * "minorCompactionCounter" : 0 + * } ] + */ +public class GCDetailsService implements HttpEndpointService { + + static final Logger LOG = LoggerFactory.getLogger(GCDetailsService.class); + + protected ServerConfiguration conf; + protected BookieServer bookieServer; + + public GCDetailsService(ServerConfiguration conf, BookieServer bookieServer) { + checkNotNull(conf); + checkNotNull(bookieServer); + this.conf = conf; + this.bookieServer = bookieServer; + } + + @Override + public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { + HttpServiceResponse response = new HttpServiceResponse(); + + if (HttpServer.Method.GET == request.getMethod()) { + List details = bookieServer.getBookie() + .getLedgerStorage().getGarbageCollectionStatus(); + + String jsonResponse = JsonUtil.toJson(details); + if (LOG.isDebugEnabled()) { + LOG.debug("output body:" + jsonResponse); + } + response.setBody(jsonResponse); + response.setCode(HttpServer.StatusCode.OK); + return response; + } else { + response.setCode(HttpServer.StatusCode.NOT_FOUND); + response.setBody("Only support GET method to retrieve GC details." + + " If you want to trigger gc, send a POST to gc endpoint."); + return response; + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/GetLastLogMarkService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/GetLastLogMarkService.java index 0ffa9bbfb21..30a0f6322b1 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/GetLastLogMarkService.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/GetLastLogMarkService.java @@ -22,11 +22,9 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; - import java.io.File; import java.util.List; import java.util.Map; - import org.apache.bookkeeper.bookie.Journal; import org.apache.bookkeeper.bookie.LedgerDirsManager; import org.apache.bookkeeper.bookie.LogMark; @@ -84,20 +82,24 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { } for (Journal journal : journals) { LogMark lastLogMark = journal.getLastLogMark().getCurMark(); - LOG.debug("LastLogMark: Journal Id - " + lastLogMark.getLogFileId() + "(" - + Long.toHexString(lastLogMark.getLogFileId()) + ".txn), Pos - " - + lastLogMark.getLogFileOffset()); + if (LOG.isDebugEnabled()) { + LOG.debug("LastLogMark: Journal Id - " + lastLogMark.getLogFileId() + "(" + + Long.toHexString(lastLogMark.getLogFileId()) + ".txn), Pos - " + + lastLogMark.getLogFileOffset()); + } output.put("LastLogMark: Journal Id - " + lastLogMark.getLogFileId() + "(" + Long.toHexString(lastLogMark.getLogFileId()) + ".txn)", "Pos - " + lastLogMark.getLogFileOffset()); } String jsonResponse = JsonUtil.toJson(output); - LOG.debug("output body:" + jsonResponse); + if (LOG.isDebugEnabled()) { + LOG.debug("output body:" + jsonResponse); + } response.setBody(jsonResponse); response.setCode(HttpServer.StatusCode.OK); return response; - } catch (Exception e) { + } catch (Throwable e) { LOG.error("Exception occurred while getting last log mark", e); response.setCode(HttpServer.StatusCode.NOT_FOUND); response.setBody("ERROR handling request: " + e.getMessage()); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/GetLedgerMetaService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/GetLedgerMetaService.java index 43b49943f41..a8bda6bbe12 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/GetLedgerMetaService.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/GetLedgerMetaService.java @@ -18,12 +18,11 @@ */ package org.apache.bookkeeper.server.http.service; -import static com.google.common.base.Charsets.UTF_8; import static com.google.common.base.Preconditions.checkNotNull; import com.google.common.collect.Maps; import java.util.Map; -import org.apache.bookkeeper.client.LedgerMetadata; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.common.util.JsonUtil; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.http.HttpServer; @@ -32,7 +31,7 @@ import org.apache.bookkeeper.http.service.HttpServiceResponse; import org.apache.bookkeeper.meta.LedgerManager; import org.apache.bookkeeper.meta.LedgerManagerFactory; -import org.apache.bookkeeper.proto.BookieServer; +import org.apache.bookkeeper.meta.LedgerMetadataSerDe; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -45,11 +44,14 @@ public class GetLedgerMetaService implements HttpEndpointService { static final Logger LOG = LoggerFactory.getLogger(GetLedgerMetaService.class); protected ServerConfiguration conf; - protected BookieServer bookieServer; - public GetLedgerMetaService(ServerConfiguration conf, BookieServer bookieServer) { + private final LedgerManagerFactory ledgerManagerFactory; + private final LedgerMetadataSerDe serDe; + + public GetLedgerMetaService(ServerConfiguration conf, LedgerManagerFactory ledgerManagerFactory) { checkNotNull(conf); this.conf = conf; - this.bookieServer = bookieServer; + this.ledgerManagerFactory = ledgerManagerFactory; + this.serDe = new LedgerMetadataSerDe(); } @Override @@ -60,21 +62,19 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { if (HttpServer.Method.GET == request.getMethod() && (params != null) && params.containsKey("ledger_id")) { Long ledgerId = Long.parseLong(params.get("ledger_id")); - LedgerManagerFactory mFactory = bookieServer.getBookie().getLedgerManagerFactory(); - LedgerManager manager = mFactory.newLedgerManager(); + LedgerManager manager = ledgerManagerFactory.newLedgerManager(); // output - Map output = Maps.newHashMap(); - ListLedgerService.ReadLedgerMetadataCallback cb = - new ListLedgerService.ReadLedgerMetadataCallback(ledgerId); - manager.readLedgerMetadata(ledgerId, cb); - LedgerMetadata md = cb.get(); - output.put(ledgerId.toString(), new String(md.serialize(), UTF_8)); + Map output = Maps.newHashMap(); + LedgerMetadata md = manager.readLedgerMetadata(ledgerId).get().getValue(); + output.put(ledgerId.toString(), md); manager.close(); String jsonResponse = JsonUtil.toJson(output); - LOG.debug("output body:" + jsonResponse); + if (LOG.isDebugEnabled()) { + LOG.debug("output body:" + jsonResponse); + } response.setBody(jsonResponse); response.setCode(HttpServer.StatusCode.OK); return response; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ListBookieInfoService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ListBookieInfoService.java index 14439be86c5..b62a8dedde8 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ListBookieInfoService.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ListBookieInfoService.java @@ -21,12 +21,10 @@ import static com.google.common.base.Preconditions.checkNotNull; import com.google.common.collect.Maps; - import java.math.RoundingMode; import java.text.DecimalFormat; import java.util.LinkedHashMap; import java.util.Map; - import org.apache.bookkeeper.client.BookKeeper; import org.apache.bookkeeper.client.BookieInfoReader; import org.apache.bookkeeper.common.util.JsonUtil; @@ -36,7 +34,7 @@ import org.apache.bookkeeper.http.service.HttpEndpointService; import org.apache.bookkeeper.http.service.HttpServiceRequest; import org.apache.bookkeeper.http.service.HttpServiceResponse; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -64,7 +62,7 @@ public ListBookieInfoService(ServerConfiguration conf) { } String getReadable(long val) { - String unit[] = {"", "KB", "MB", "GB", "TB" }; + String[] unit = {"", "KB", "MB", "GB", "TB" }; int cnt = 0; double d = val; while (d >= 1000 && cnt < unit.length - 1) { @@ -85,7 +83,7 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { clientConf.setDiskWeightBasedPlacementEnabled(true); BookKeeper bk = new BookKeeper(clientConf); - Map map = bk.getBookieInfo(); + Map map = bk.getBookieInfo(); if (map.size() == 0) { bk.close(); response.setCode(HttpServer.StatusCode.NOT_FOUND); @@ -104,7 +102,7 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { */ LinkedHashMap output = Maps.newLinkedHashMapWithExpectedSize(map.size()); Long totalFree = 0L, total = 0L; - for (Map.Entry infoEntry : map.entrySet()) { + for (Map.Entry infoEntry : map.entrySet()) { BookieInfoReader.BookieInfo bInfo = infoEntry.getValue(); output.put(infoEntry.getKey().toString(), ": {Free: " + bInfo.getFreeDiskSpace() + getReadable(bInfo.getFreeDiskSpace()) @@ -119,7 +117,9 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { bk.close(); String jsonResponse = JsonUtil.toJson(output); - LOG.debug("output body:" + jsonResponse); + if (LOG.isDebugEnabled()) { + LOG.debug("output body:" + jsonResponse); + } response.setBody(jsonResponse); response.setCode(HttpServer.StatusCode.OK); return response; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ListBookiesService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ListBookiesService.java index f969ff77978..6f134f6b857 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ListBookiesService.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ListBookiesService.java @@ -21,11 +21,9 @@ import static com.google.common.base.Preconditions.checkNotNull; import com.google.common.collect.Maps; - import java.util.ArrayList; import java.util.Collection; import java.util.Map; - import org.apache.bookkeeper.client.BookKeeperAdmin; import org.apache.bookkeeper.common.util.JsonUtil; import org.apache.bookkeeper.conf.ServerConfiguration; @@ -33,6 +31,7 @@ import org.apache.bookkeeper.http.service.HttpEndpointService; import org.apache.bookkeeper.http.service.HttpServiceRequest; import org.apache.bookkeeper.http.service.HttpServiceResponse; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.BookieSocketAddress; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -59,7 +58,7 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { HttpServiceResponse response = new HttpServiceResponse(); // GET if (HttpServer.Method.GET == request.getMethod()) { - Collection bookies = new ArrayList(); + Collection bookies = new ArrayList(); Map params = request.getParams(); // default print rw @@ -79,9 +78,16 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { // output Map output = Maps.newHashMap(); - for (BookieSocketAddress b : bookies) { - output.putIfAbsent(b.toString(), printHostname ? b.getHostName() : null); - LOG.debug("bookie: " + b.toString() + " hostname:" + b.getHostName()); + for (BookieId b : bookies) { + String hostname = null; + if (printHostname) { + BookieSocketAddress resolved = bka.getBookieAddressResolver().resolve(b); + hostname = resolved.getHostName(); + } + output.putIfAbsent(b.toString(), hostname); + if (LOG.isDebugEnabled()) { + LOG.debug("bookie: " + b + " hostname:" + hostname); + } } String jsonResponse = JsonUtil.toJson(output); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ListDiskFilesService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ListDiskFilesService.java index 330ea7f0162..dd3c85b4c38 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ListDiskFilesService.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ListDiskFilesService.java @@ -22,11 +22,9 @@ import static org.apache.bookkeeper.bookie.BookieShell.listFilesAndSort; import com.google.common.collect.Maps; - import java.io.File; import java.util.List; import java.util.Map; - import org.apache.bookkeeper.common.util.JsonUtil; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.http.HttpServer; @@ -92,9 +90,9 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { if (all || journal) { File[] journalDirs = conf.getJournalDirs(); List journalFiles = listFilesAndSort(journalDirs, "txn"); - StringBuffer files = new StringBuffer(); + StringBuilder files = new StringBuilder(); for (File journalFile : journalFiles) { - files.append(journalFile.getName() + "\t"); + files.append(journalFile.getName()).append("\t"); } output.put("journal files", files.toString()); } @@ -102,9 +100,9 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { if (all || entrylog) { File[] ledgerDirs = conf.getLedgerDirs(); List ledgerFiles = listFilesAndSort(ledgerDirs, "log"); - StringBuffer files = new StringBuffer(); + StringBuilder files = new StringBuilder(); for (File ledgerFile : ledgerFiles) { - files.append(ledgerFile.getName() + "\t"); + files.append(ledgerFile.getName()).append("\t"); } output.put("entrylog files", files.toString()); } @@ -112,15 +110,17 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { if (all || index) { File[] indexDirs = (conf.getIndexDirs() == null) ? conf.getLedgerDirs() : conf.getIndexDirs(); List indexFiles = listFilesAndSort(indexDirs, "idx"); - StringBuffer files = new StringBuffer(); + StringBuilder files = new StringBuilder(); for (File indexFile : indexFiles) { - files.append(indexFile.getName() + "\t"); + files.append(indexFile.getName()).append("\t"); } output.put("index files", files.toString()); } String jsonResponse = JsonUtil.toJson(output); - LOG.debug("output body:" + jsonResponse); + if (LOG.isDebugEnabled()) { + LOG.debug("output body:" + jsonResponse); + } response.setBody(jsonResponse); response.setCode(HttpServer.StatusCode.OK); return response; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ListLedgerService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ListLedgerService.java index efd00e7341f..db650224945 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ListLedgerService.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ListLedgerService.java @@ -18,17 +18,14 @@ */ package org.apache.bookkeeper.server.http.service; -import static com.google.common.base.Charsets.UTF_8; import static com.google.common.base.Preconditions.checkNotNull; +import static java.nio.charset.StandardCharsets.UTF_8; -import com.google.common.collect.Lists; import com.google.common.collect.Maps; -import com.google.common.util.concurrent.AbstractFuture; import java.util.LinkedHashMap; -import java.util.List; import java.util.Map; -import org.apache.bookkeeper.client.BKException; -import org.apache.bookkeeper.client.LedgerMetadata; +import java.util.concurrent.CompletableFuture; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.common.util.JsonUtil; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.http.HttpServer; @@ -37,8 +34,8 @@ import org.apache.bookkeeper.http.service.HttpServiceResponse; import org.apache.bookkeeper.meta.LedgerManager; import org.apache.bookkeeper.meta.LedgerManagerFactory; -import org.apache.bookkeeper.proto.BookieServer; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks; +import org.apache.bookkeeper.meta.LedgerMetadataSerDe; +import org.apache.bookkeeper.versioning.Versioned; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -53,44 +50,29 @@ public class ListLedgerService implements HttpEndpointService { static final Logger LOG = LoggerFactory.getLogger(ListLedgerService.class); protected ServerConfiguration conf; - protected BookieServer bookieServer; + protected LedgerManagerFactory ledgerManagerFactory; + private final LedgerMetadataSerDe serDe; - public ListLedgerService(ServerConfiguration conf, BookieServer bookieServer) { + public ListLedgerService(ServerConfiguration conf, LedgerManagerFactory ledgerManagerFactory) { checkNotNull(conf); this.conf = conf; - this.bookieServer = bookieServer; + this.ledgerManagerFactory = ledgerManagerFactory; + this.serDe = new LedgerMetadataSerDe(); + } // Number of LedgerMetadata contains in each page static final int LIST_LEDGER_BATCH_SIZE = 100; - /** - * Callback for reading ledger metadata. - */ - public static class ReadLedgerMetadataCallback extends AbstractFuture - implements BookkeeperInternalCallbacks.GenericCallback { - final long ledgerId; - - ReadLedgerMetadataCallback(long ledgerId) { - this.ledgerId = ledgerId; - } - - long getLedgerId() { - return ledgerId; - } - - public void operationComplete(int rc, LedgerMetadata result) { - if (rc != 0) { - setException(BKException.create(rc)); - } else { - set(result); - } - } - } - static void keepLedgerMetadata(ReadLedgerMetadataCallback cb, LinkedHashMap output) + private void keepLedgerMetadata(long ledgerId, CompletableFuture> future, + LinkedHashMap output, boolean decodeMeta) throws Exception { - LedgerMetadata md = cb.get(); - output.put(Long.valueOf(cb.getLedgerId()).toString(), new String(md.serialize(), UTF_8)); + LedgerMetadata md = future.get().getValue(); + if (decodeMeta) { + output.put(Long.valueOf(ledgerId).toString(), md); + } else { + output.put(Long.valueOf(ledgerId).toString(), new String(serDe.serialize(md), UTF_8)); + } } @Override @@ -105,18 +87,22 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { && params.containsKey("print_metadata") && params.get("print_metadata").equals("true"); + // do not decode meta by default for backward compatibility + boolean decodeMeta = (params != null) + && params.getOrDefault("decode_meta", "false").equals("true"); + // Page index should start from 1; int pageIndex = (printMeta && params.containsKey("page")) ? Integer.parseInt(params.get("page")) : -1; - LedgerManagerFactory mFactory = bookieServer.getBookie().getLedgerManagerFactory(); - LedgerManager manager = mFactory.newLedgerManager(); - LedgerManager.LedgerRangeIterator iter = manager.getLedgerRanges(); + LedgerManager manager = ledgerManagerFactory.newLedgerManager(); + LedgerManager.LedgerRangeIterator iter = manager.getLedgerRanges(0); // output - LinkedHashMap output = Maps.newLinkedHashMap(); + LinkedHashMap output = Maps.newLinkedHashMap(); // futures for readLedgerMetadata for each page. - List futures = Lists.newArrayListWithExpectedSize(LIST_LEDGER_BATCH_SIZE); + Map>> futures = + new LinkedHashMap<>(LIST_LEDGER_BATCH_SIZE); if (printMeta) { int ledgerIndex = 0; @@ -136,22 +122,20 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { ledgerIndex++; if (endLedgerIndex == 0 // no actual page parameter provided || (ledgerIndex >= startLedgerIndex && ledgerIndex <= endLedgerIndex)) { - ReadLedgerMetadataCallback cb = new ReadLedgerMetadataCallback(lid); - manager.readLedgerMetadata(lid, cb); - futures.add(cb); + futures.put(lid, manager.readLedgerMetadata(lid)); } } if (futures.size() >= LIST_LEDGER_BATCH_SIZE) { - while (!futures.isEmpty()) { - ReadLedgerMetadataCallback cb = futures.remove(0); - keepLedgerMetadata(cb, output); + for (Map.Entry> > e : futures.entrySet()) { + keepLedgerMetadata(e.getKey(), e.getValue(), output, decodeMeta); } + futures.clear(); } } - while (!futures.isEmpty()) { - ReadLedgerMetadataCallback cb = futures.remove(0); - keepLedgerMetadata(cb, output); + for (Map.Entry> > e : futures.entrySet()) { + keepLedgerMetadata(e.getKey(), e.getValue(), output, decodeMeta); } + futures.clear(); } else { while (iter.hasNext()) { LedgerManager.LedgerRange r = iter.next(); @@ -164,7 +148,9 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { manager.close(); String jsonResponse = JsonUtil.toJson(output); - LOG.debug("output body:" + jsonResponse); + if (LOG.isDebugEnabled()) { + LOG.debug("output body:" + jsonResponse); + } response.setBody(jsonResponse); response.setCode(HttpServer.StatusCode.OK); return response; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ListUnderReplicatedLedgerService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ListUnderReplicatedLedgerService.java index ef71a68b688..efac8a7e616 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ListUnderReplicatedLedgerService.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ListUnderReplicatedLedgerService.java @@ -35,7 +35,6 @@ import org.apache.bookkeeper.meta.LedgerManagerFactory; import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; import org.apache.bookkeeper.meta.UnderreplicatedLedger; -import org.apache.bookkeeper.proto.BookieServer; import org.apache.commons.lang.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -51,12 +50,12 @@ public class ListUnderReplicatedLedgerService implements HttpEndpointService { static final Logger LOG = LoggerFactory.getLogger(ListUnderReplicatedLedgerService.class); protected ServerConfiguration conf; - protected BookieServer bookieServer; + private final LedgerManagerFactory ledgerManagerFactory; - public ListUnderReplicatedLedgerService(ServerConfiguration conf, BookieServer bookieServer) { + public ListUnderReplicatedLedgerService(ServerConfiguration conf, LedgerManagerFactory ledgerManagerFactory) { checkNotNull(conf); this.conf = conf; - this.bookieServer = bookieServer; + this.ledgerManagerFactory = ledgerManagerFactory; } /* @@ -100,8 +99,8 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { boolean hasURLedgers = false; List outputLedgers = null; Map> outputLedgersWithMissingReplica = null; - LedgerManagerFactory mFactory = bookieServer.getBookie().getLedgerManagerFactory(); - LedgerUnderreplicationManager underreplicationManager = mFactory.newLedgerUnderreplicationManager(); + LedgerUnderreplicationManager underreplicationManager = + ledgerManagerFactory.newLedgerUnderreplicationManager(); Iterator iter = underreplicationManager.listLedgersToRereplicate(predicate); hasURLedgers = iter.hasNext(); @@ -129,7 +128,9 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { response.setCode(HttpServer.StatusCode.OK); String jsonResponse = JsonUtil .toJson(printMissingReplica ? outputLedgersWithMissingReplica : outputLedgers); - LOG.debug("output body: " + jsonResponse); + if (LOG.isDebugEnabled()) { + LOG.debug("output body: " + jsonResponse); + } response.setBody(jsonResponse); return response; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/LostBookieRecoveryDelayService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/LostBookieRecoveryDelayService.java index 6442daaa56b..20adedc5970 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/LostBookieRecoveryDelayService.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/LostBookieRecoveryDelayService.java @@ -21,7 +21,6 @@ import static com.google.common.base.Preconditions.checkNotNull; import java.util.HashMap; - import org.apache.bookkeeper.client.BookKeeperAdmin; import org.apache.bookkeeper.common.util.JsonUtil; import org.apache.bookkeeper.conf.ServerConfiguration; @@ -86,7 +85,9 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { int delaySeconds = bka.getLostBookieRecoveryDelay(); response.setCode(HttpServer.StatusCode.OK); response.setBody("lostBookieRecoveryDelay value: " + delaySeconds); - LOG.debug("response body:" + response.getBody()); + if (LOG.isDebugEnabled()) { + LOG.debug("response body:" + response.getBody()); + } return response; } catch (Exception e) { // may get noNode exception diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/MetricsService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/MetricsService.java index 0d62ca9dafe..d3d57f72fa4 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/MetricsService.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/MetricsService.java @@ -35,6 +35,8 @@ */ public class MetricsService implements HttpEndpointService { + public static final String PROMETHEUS_CONTENT_TYPE_004 = "text/plain; version=0.0.4; charset=utf-8"; + private final ServerConfiguration conf; private final StatsProvider statsProvider; @@ -65,6 +67,7 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { statsProvider.writeAllMetrics(writer); writer.flush(); response.setCode(StatusCode.OK); + response.setContentType(PROMETHEUS_CONTENT_TYPE_004); response.setBody(writer.getBuffer().toString()); } catch (UnsupportedOperationException uoe) { response.setCode(StatusCode.INTERNAL_ERROR); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ReadLedgerEntryService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ReadLedgerEntryService.java index 4c2141dc1bd..63999b08528 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ReadLedgerEntryService.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ReadLedgerEntryService.java @@ -18,11 +18,10 @@ */ package org.apache.bookkeeper.server.http.service; -import static com.google.common.base.Charsets.US_ASCII; import static com.google.common.base.Preconditions.checkNotNull; +import static java.nio.charset.StandardCharsets.US_ASCII; import com.google.common.collect.Maps; - import java.util.Iterator; import java.util.Map; import org.apache.bookkeeper.client.BookKeeperAdmin; @@ -107,7 +106,9 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { } String jsonResponse = JsonUtil.toJson(output); - LOG.debug("output body:" + jsonResponse); + if (LOG.isDebugEnabled()) { + LOG.debug("output body:" + jsonResponse); + } response.setBody(jsonResponse); response.setCode(HttpServer.StatusCode.OK); return response; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/RecoveryBookieService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/RecoveryBookieService.java index 3a08bafc5b1..27f3610a697 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/RecoveryBookieService.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/RecoveryBookieService.java @@ -22,10 +22,8 @@ import static org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithRegistrationManager; import com.fasterxml.jackson.annotation.JsonProperty; - import java.util.List; import java.util.concurrent.ExecutorService; - import org.apache.bookkeeper.bookie.Cookie; import org.apache.bookkeeper.client.BookKeeperAdmin; import org.apache.bookkeeper.common.util.JsonUtil; @@ -34,7 +32,7 @@ import org.apache.bookkeeper.http.service.HttpEndpointService; import org.apache.bookkeeper.http.service.HttpServiceRequest; import org.apache.bookkeeper.http.service.HttpServiceResponse; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.versioning.Versioned; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -46,7 +44,6 @@ * The parameter of input body should be like this format: * { * "bookie_src": [ "bookie_src1", "bookie_src2"... ], - * "bookie_dest": [ "bookie_dest1", "bookie_dest2"... ], * "delete_cookie": <bool_value> * } */ @@ -94,8 +91,10 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { try { requestJsonBody = JsonUtil.fromJson(requestBody, RecoveryRequestJsonBody.class); - LOG.debug("bookie_src: [" + requestJsonBody.bookieSrc.get(0) - + "], delete_cookie: [" + requestJsonBody.deleteCookie + "]"); + if (LOG.isDebugEnabled()) { + LOG.debug("bookie_src: [" + requestJsonBody.bookieSrc.get(0) + + "], delete_cookie: [" + requestJsonBody.deleteCookie + "]"); + } } catch (JsonUtil.ParseJsonException e) { LOG.error("Meet Exception: ", e); response.setCode(HttpServer.StatusCode.NOT_FOUND); @@ -105,12 +104,11 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { if (HttpServer.Method.PUT == request.getMethod() && !requestJsonBody.bookieSrc.isEmpty()) { runFunctionWithRegistrationManager(conf, rm -> { - String bookieSrcString[] = requestJsonBody.bookieSrc.get(0).split(":"); - BookieSocketAddress bookieSrc = new BookieSocketAddress( - bookieSrcString[0], Integer.parseInt(bookieSrcString[1])); - boolean deleteCookie = requestJsonBody.deleteCookie; + final String bookieSrcSerialized = requestJsonBody.bookieSrc.get(0); executor.execute(() -> { try { + BookieId bookieSrc = BookieId.parse(bookieSrcSerialized); + boolean deleteCookie = requestJsonBody.deleteCookie; LOG.info("Start recovering bookie."); bka.recoverBookieData(bookieSrc); if (deleteCookie) { diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ResumeCompactionService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ResumeCompactionService.java new file mode 100644 index 00000000000..92d66fbcf7d --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/ResumeCompactionService.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.server.http.service; + +import static com.google.common.base.Preconditions.checkNotNull; + +import java.util.HashMap; +import java.util.Map; +import org.apache.bookkeeper.common.util.JsonUtil; +import org.apache.bookkeeper.http.HttpServer; +import org.apache.bookkeeper.http.service.HttpEndpointService; +import org.apache.bookkeeper.http.service.HttpServiceRequest; +import org.apache.bookkeeper.http.service.HttpServiceResponse; +import org.apache.bookkeeper.proto.BookieServer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ResumeCompactionService implements HttpEndpointService { + + static final Logger LOG = LoggerFactory.getLogger(ResumeCompactionService.class); + + protected BookieServer bookieServer; + + public ResumeCompactionService(BookieServer bookieServer) { + checkNotNull(bookieServer); + this.bookieServer = bookieServer; + } + + @Override + public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { + HttpServiceResponse response = new HttpServiceResponse(); + + if (HttpServer.Method.PUT == request.getMethod()) { + String requestBody = request.getBody(); + if (null == requestBody) { + return new HttpServiceResponse("Empty request body", HttpServer.StatusCode.BAD_REQUEST); + } else { + @SuppressWarnings("unchecked") + Map configMap = JsonUtil.fromJson(requestBody, HashMap.class); + Boolean resumeMajor = (Boolean) configMap.get("resumeMajor"); + Boolean resumeMinor = (Boolean) configMap.get("resumeMinor"); + if (resumeMajor == null && resumeMinor == null) { + return new HttpServiceResponse("No resumeMajor or resumeMinor params found", + HttpServer.StatusCode.BAD_REQUEST); + } + String output = ""; + if (resumeMajor != null && resumeMajor) { + output = "Resume majorGC on BookieServer: " + bookieServer.toString(); + bookieServer.getBookie().getLedgerStorage().resumeMajorGC(); + } + if (resumeMinor != null && resumeMinor) { + output += ", Resume minorGC on BookieServer: " + bookieServer.toString(); + bookieServer.getBookie().getLedgerStorage().resumeMinorGC(); + } + String jsonResponse = JsonUtil.toJson(output); + if (LOG.isDebugEnabled()) { + LOG.debug("output body:" + jsonResponse); + } + response.setBody(jsonResponse); + response.setCode(HttpServer.StatusCode.OK); + return response; + } + } else { + response.setCode(HttpServer.StatusCode.NOT_FOUND); + response.setBody("Not found method. Should be PUT to resume major or minor compaction, Or GET to get " + + "compaction state."); + return response; + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/SuspendCompactionService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/SuspendCompactionService.java new file mode 100644 index 00000000000..74e284ad960 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/SuspendCompactionService.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.server.http.service; + +import static com.google.common.base.Preconditions.checkNotNull; + +import java.util.HashMap; +import java.util.Map; +import org.apache.bookkeeper.common.util.JsonUtil; +import org.apache.bookkeeper.http.HttpServer; +import org.apache.bookkeeper.http.service.HttpEndpointService; +import org.apache.bookkeeper.http.service.HttpServiceRequest; +import org.apache.bookkeeper.http.service.HttpServiceResponse; +import org.apache.bookkeeper.proto.BookieServer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SuspendCompactionService implements HttpEndpointService { + + static final Logger LOG = LoggerFactory.getLogger(SuspendCompactionService.class); + + protected BookieServer bookieServer; + + public SuspendCompactionService(BookieServer bookieServer) { + checkNotNull(bookieServer); + this.bookieServer = bookieServer; + } + + @Override + public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { + HttpServiceResponse response = new HttpServiceResponse(); + + if (HttpServer.Method.PUT == request.getMethod()) { + String requestBody = request.getBody(); + if (null == requestBody) { + return new HttpServiceResponse("Empty request body", HttpServer.StatusCode.BAD_REQUEST); + } else { + @SuppressWarnings("unchecked") + Map configMap = JsonUtil.fromJson(requestBody, HashMap.class); + Boolean suspendMajor = (Boolean) configMap.get("suspendMajor"); + Boolean suspendMinor = (Boolean) configMap.get("suspendMinor"); + if (suspendMajor == null && suspendMinor == null) { + return new HttpServiceResponse("No suspendMajor or suspendMinor params found", + HttpServer.StatusCode.BAD_REQUEST); + } + String output = ""; + if (suspendMajor != null && suspendMajor) { + output = "Suspend majorGC on BookieServer: " + bookieServer.toString(); + bookieServer.getBookie().getLedgerStorage().suspendMajorGC(); + } + if (suspendMinor != null && suspendMinor) { + output += ", Suspend minorGC on BookieServer: " + bookieServer.toString(); + bookieServer.getBookie().getLedgerStorage().suspendMinorGC(); + } + String jsonResponse = JsonUtil.toJson(output); + if (LOG.isDebugEnabled()) { + LOG.debug("output body:" + jsonResponse); + } + response.setBody(jsonResponse); + response.setCode(HttpServer.StatusCode.OK); + return response; + } + } else if (HttpServer.Method.GET == request.getMethod()) { + boolean isMajorGcSuspend = bookieServer.getBookie().getLedgerStorage().isMajorGcSuspended(); + boolean isMinorGcSuspend = bookieServer.getBookie().getLedgerStorage().isMinorGcSuspended(); + Map output = new HashMap<>(); + output.put("isMajorGcSuspended", Boolean.toString(isMajorGcSuspend)); + output.put("isMinorGcSuspended", Boolean.toString(isMinorGcSuspend)); + String jsonResponse = JsonUtil.toJson(output); + if (LOG.isDebugEnabled()) { + LOG.debug("output body:" + jsonResponse); + } + response.setBody(jsonResponse); + response.setCode(HttpServer.StatusCode.OK); + return response; + } else { + response.setCode(HttpServer.StatusCode.NOT_FOUND); + response.setBody("Not found method. Should be PUT to suspend major or minor compaction, " + + "Or GET to get compaction state."); + return response; + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/TriggerAuditService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/TriggerAuditService.java index f5c2b4e7f90..96286a87fa2 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/TriggerAuditService.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/TriggerAuditService.java @@ -65,7 +65,9 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { response.setCode(HttpServer.StatusCode.OK); response.setBody("Success trigger audit."); - LOG.debug("response body:" + response.getBody()); + if (LOG.isDebugEnabled()) { + LOG.debug("response body:" + response.getBody()); + } return response; } else { response.setCode(HttpServer.StatusCode.NOT_FOUND); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/TriggerGCService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/TriggerGCService.java new file mode 100644 index 00000000000..1f4eea7fb1c --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/TriggerGCService.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.server.http.service; + +import static com.google.common.base.Preconditions.checkNotNull; + +import java.util.HashMap; +import java.util.Map; +import org.apache.bookkeeper.bookie.LedgerStorage; +import org.apache.bookkeeper.common.util.JsonUtil; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.http.HttpServer; +import org.apache.bookkeeper.http.service.HttpEndpointService; +import org.apache.bookkeeper.http.service.HttpServiceRequest; +import org.apache.bookkeeper.http.service.HttpServiceResponse; +import org.apache.bookkeeper.proto.BookieServer; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * HttpEndpointService that handle force trigger GC requests. + * + *

          The PUT method will force trigger GC on current bookie, and make GC run at backend. + * + *

          The GET method will get the force triggered GC running or not. + * Output would be like: + * { + * "is_in_force_gc" : "false" + * } + */ +public class TriggerGCService implements HttpEndpointService { + + static final Logger LOG = LoggerFactory.getLogger(TriggerGCService.class); + + protected ServerConfiguration conf; + protected BookieServer bookieServer; + + public TriggerGCService(ServerConfiguration conf, BookieServer bookieServer) { + checkNotNull(conf); + checkNotNull(bookieServer); + this.conf = conf; + this.bookieServer = bookieServer; + } + + @Override + public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { + HttpServiceResponse response = new HttpServiceResponse(); + try { + if (HttpServer.Method.PUT == request.getMethod()) { + String requestBody = request.getBody(); + if (StringUtils.isBlank(requestBody)) { + bookieServer.getBookie().getLedgerStorage().forceGC(); + } else { + @SuppressWarnings("unchecked") + Map configMap = JsonUtil.fromJson(requestBody, HashMap.class); + LedgerStorage ledgerStorage = bookieServer.getBookie().getLedgerStorage(); + boolean forceMajor = !ledgerStorage.isMajorGcSuspended(); + boolean forceMinor = !ledgerStorage.isMinorGcSuspended(); + + forceMajor = Boolean.parseBoolean(configMap.getOrDefault("forceMajor", forceMajor).toString()); + forceMinor = Boolean.parseBoolean(configMap.getOrDefault("forceMinor", forceMinor).toString()); + ledgerStorage.forceGC(forceMajor, forceMinor); + } + + String output = "Triggered GC on BookieServer: " + bookieServer.getBookieId(); + String jsonResponse = JsonUtil.toJson(output); + if (LOG.isDebugEnabled()) { + LOG.debug("output body:" + jsonResponse); + } + response.setBody(jsonResponse); + response.setCode(HttpServer.StatusCode.OK); + return response; + } else if (HttpServer.Method.GET == request.getMethod()) { + Boolean isInForceGC = bookieServer.getBookie().getLedgerStorage().isInForceGC(); + Pair output = Pair.of("is_in_force_gc", isInForceGC.toString()); + String jsonResponse = JsonUtil.toJson(output); + if (LOG.isDebugEnabled()) { + LOG.debug("output body:" + jsonResponse); + } + response.setBody(jsonResponse); + response.setCode(HttpServer.StatusCode.OK); + return response; + } else { + response.setCode(HttpServer.StatusCode.METHOD_NOT_ALLOWED); + response.setBody("Not allowed method. Should be PUT to trigger GC, Or GET to get Force GC state."); + return response; + } + } catch (Exception e) { + LOG.error("Failed to handle the request, method: {}, body: {} ", request.getMethod(), request.getBody(), e); + response.setCode(HttpServer.StatusCode.BAD_REQUEST); + response.setBody("Failed to handle the request, exception: " + e.getMessage()); + return response; + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/TriggerLocationCompactService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/TriggerLocationCompactService.java new file mode 100644 index 00000000000..95b7f277506 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/TriggerLocationCompactService.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.server.http.service; + +import static com.google.common.base.Preconditions.checkNotNull; + +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.bookkeeper.bookie.LedgerStorage; +import org.apache.bookkeeper.common.util.JsonUtil; +import org.apache.bookkeeper.http.HttpServer; +import org.apache.bookkeeper.http.service.HttpEndpointService; +import org.apache.bookkeeper.http.service.HttpServiceRequest; +import org.apache.bookkeeper.http.service.HttpServiceResponse; +import org.apache.bookkeeper.proto.BookieServer; +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * HttpEndpointService that handle force trigger entry location compact requests. + * + *

          The PUT method will trigger entry location compact on current bookie. + * + *

          The GET method will get the entry location compact running or not. + * Output would be like: + * { + * "/data1/bookkeeper/ledgers/current/locations" : "false", + * "/data2/bookkeeper/ledgers/current/locations" : "true", + * } + */ + +public class TriggerLocationCompactService implements HttpEndpointService { + + static final Logger LOG = LoggerFactory.getLogger(TriggerLocationCompactService.class); + + private final BookieServer bookieServer; + private final List entryLocationDBPath; + + public TriggerLocationCompactService(BookieServer bookieServer) { + this.bookieServer = checkNotNull(bookieServer); + this.entryLocationDBPath = bookieServer.getBookie().getLedgerStorage().getEntryLocationDBPath(); + } + + @Override + public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { + HttpServiceResponse response = new HttpServiceResponse(); + LedgerStorage ledgerStorage = bookieServer.getBookie().getLedgerStorage(); + + if (HttpServer.Method.PUT.equals(request.getMethod())) { + String requestBody = request.getBody(); + String output = "Not trigger Entry Location RocksDB compact."; + + if (StringUtils.isBlank(requestBody)) { + output = "Empty request body"; + response.setBody(output); + response.setCode(HttpServer.StatusCode.BAD_REQUEST); + return response; + } + + try { + @SuppressWarnings("unchecked") + Map configMap = JsonUtil.fromJson(requestBody, HashMap.class); + Boolean isEntryLocationCompact = (Boolean) configMap + .getOrDefault("entryLocationRocksDBCompact", false); + String entryLocations = (String) configMap.getOrDefault("entryLocations", ""); + + if (!isEntryLocationCompact) { + // If entryLocationRocksDBCompact is false, doing nothing. + response.setBody(output); + response.setCode(HttpServer.StatusCode.OK); + return response; + } + if (StringUtils.isNotBlank(entryLocations)) { + // Specified trigger RocksDB compact entryLocations. + Set locations = Sets.newHashSet(entryLocations.trim().split(",")); + if (CollectionUtils.isSubCollection(locations, entryLocationDBPath)) { + ledgerStorage.entryLocationCompact(Lists.newArrayList(locations)); + output = String.format("Triggered entry Location RocksDB: %s compact on bookie:%s.", + entryLocations, bookieServer.getBookieId()); + response.setCode(HttpServer.StatusCode.OK); + } else { + output = String.format("Specified trigger compact entryLocations: %s is invalid. " + + "Bookie entry location RocksDB path: %s.", entryLocations, entryLocationDBPath); + response.setCode(HttpServer.StatusCode.BAD_REQUEST); + } + } else { + // Not specified trigger compact entryLocations, trigger compact for all entry location. + ledgerStorage.entryLocationCompact(); + output = "Triggered entry Location RocksDB compact on bookie:" + bookieServer.getBookieId(); + response.setCode(HttpServer.StatusCode.OK); + } + } catch (JsonUtil.ParseJsonException ex) { + output = ex.getMessage(); + response.setCode(HttpServer.StatusCode.BAD_REQUEST); + LOG.warn("Trigger entry location index RocksDB compact failed, caused by: " + ex.getMessage()); + } + + String jsonResponse = JsonUtil.toJson(output); + if (LOG.isDebugEnabled()) { + LOG.debug("output body:" + jsonResponse); + } + response.setBody(jsonResponse); + return response; + } else if (HttpServer.Method.GET == request.getMethod()) { + Map compactStatus = ledgerStorage.isEntryLocationCompacting(entryLocationDBPath); + String jsonResponse = JsonUtil.toJson(compactStatus); + if (LOG.isDebugEnabled()) { + LOG.debug("output body:" + jsonResponse); + } + response.setBody(jsonResponse); + response.setCode(HttpServer.StatusCode.OK); + return response; + } else { + response.setCode(HttpServer.StatusCode.METHOD_NOT_ALLOWED); + response.setBody("Not found method. Should be PUT to trigger entry location compact," + + " Or GET to get entry location compact state."); + return response; + } + } +} \ No newline at end of file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/WhoIsAuditorService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/WhoIsAuditorService.java index 9154ccd6433..636fef84685 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/WhoIsAuditorService.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/http/service/WhoIsAuditorService.java @@ -20,14 +20,13 @@ import static com.google.common.base.Preconditions.checkNotNull; +import org.apache.bookkeeper.client.BookKeeperAdmin; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.http.HttpServer; import org.apache.bookkeeper.http.service.HttpEndpointService; import org.apache.bookkeeper.http.service.HttpServiceRequest; import org.apache.bookkeeper.http.service.HttpServiceResponse; -import org.apache.bookkeeper.net.BookieSocketAddress; -import org.apache.bookkeeper.replication.AuditorElector; -import org.apache.zookeeper.ZooKeeper; +import org.apache.bookkeeper.net.BookieId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,12 +40,12 @@ public class WhoIsAuditorService implements HttpEndpointService { static final Logger LOG = LoggerFactory.getLogger(WhoIsAuditorService.class); protected ServerConfiguration conf; - protected ZooKeeper zk; + protected BookKeeperAdmin bka; - public WhoIsAuditorService(ServerConfiguration conf, ZooKeeper zk) { + public WhoIsAuditorService(ServerConfiguration conf, BookKeeperAdmin bka) { checkNotNull(conf); this.conf = conf; - this.zk = zk; + this.bka = bka; } /* @@ -57,9 +56,9 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { HttpServiceResponse response = new HttpServiceResponse(); if (HttpServer.Method.GET == request.getMethod()) { - BookieSocketAddress bookieId = null; + BookieId bookieId; try { - bookieId = AuditorElector.getCurrentAuditor(conf, zk); + bookieId = bka.getCurrentAuditor(); if (bookieId == null) { response.setCode(HttpServer.StatusCode.NOT_FOUND); @@ -74,11 +73,10 @@ public HttpServiceResponse handle(HttpServiceRequest request) throws Exception { } response.setCode(HttpServer.StatusCode.OK); - response.setBody("Auditor: " - + bookieId.getSocketAddress().getAddress().getCanonicalHostName() + "/" - + bookieId.getSocketAddress().getAddress().getHostAddress() + ":" - + bookieId.getSocketAddress().getPort()); - LOG.debug("response body:" + response.getBody()); + response.setBody("Auditor: " + bookieId); + if (LOG.isDebugEnabled()) { + LOG.debug("response body:" + response.getBody()); + } return response; } else { response.setCode(HttpServer.StatusCode.NOT_FOUND); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/service/AutoRecoveryService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/service/AutoRecoveryService.java index b2b8f07d0c9..61681f010c1 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/service/AutoRecoveryService.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/service/AutoRecoveryService.java @@ -19,8 +19,8 @@ package org.apache.bookkeeper.server.service; import java.io.IOException; +import java.lang.Thread.UncaughtExceptionHandler; import org.apache.bookkeeper.replication.AutoRecoveryMain; -import org.apache.bookkeeper.replication.ReplicationException.UnavailableException; import org.apache.bookkeeper.server.component.ServerLifecycleComponent; import org.apache.bookkeeper.server.conf.BookieConfiguration; import org.apache.bookkeeper.stats.StatsLogger; @@ -41,13 +41,19 @@ public AutoRecoveryService(BookieConfiguration conf, StatsLogger statsLogger) th statsLogger); } + @Override + public void setExceptionHandler(UncaughtExceptionHandler handler) { + super.setExceptionHandler(handler); + main.setExceptionHandler(handler); + } + + public AutoRecoveryMain getAutoRecoveryServer() { + return main; + } + @Override protected void doStart() { - try { - this.main.start(); - } catch (UnavailableException e) { - throw new RuntimeException("Can't not start '" + NAME + "' component.", e); - } + this.main.start(); } @Override diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/service/BookieService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/service/BookieService.java index d6837f6f90f..7fcb60db59a 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/service/BookieService.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/service/BookieService.java @@ -18,34 +18,65 @@ package org.apache.bookkeeper.server.service; +import static org.apache.bookkeeper.proto.BookieServer.newBookieServer; + import java.io.IOException; import java.lang.Thread.UncaughtExceptionHandler; -import org.apache.bookkeeper.client.BKException; +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.List; +import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.UncleanShutdownDetection; +import org.apache.bookkeeper.common.allocator.ByteBufAllocatorWithOomHandler; +import org.apache.bookkeeper.common.component.ComponentInfoPublisher; +import org.apache.bookkeeper.common.component.ComponentInfoPublisher.EndpointInfo; +import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.proto.BookieServer; -import org.apache.bookkeeper.replication.ReplicationException.UnavailableException; import org.apache.bookkeeper.server.component.ServerLifecycleComponent; import org.apache.bookkeeper.server.conf.BookieConfiguration; import org.apache.bookkeeper.stats.StatsLogger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * A {@link ServerLifecycleComponent} that starts the core bookie server. */ -public class BookieService extends ServerLifecycleComponent { +public class BookieService extends ServerLifecycleComponent { + private static final Logger log = LoggerFactory.getLogger(BookieService.class); public static final String NAME = "bookie-server"; private final BookieServer server; + private final ByteBufAllocatorWithOomHandler allocator; public BookieService(BookieConfiguration conf, - StatsLogger statsLogger) + Bookie bookie, + StatsLogger statsLogger, + ByteBufAllocatorWithOomHandler allocator, + UncleanShutdownDetection uncleanShutdownDetection) throws Exception { super(NAME, conf, statsLogger); - this.server = new BookieServer(conf.getServerConf(), statsLogger); + this.server = newBookieServer(conf.getServerConf(), + bookie, + statsLogger, + allocator, + uncleanShutdownDetection); + this.allocator = allocator; } @Override public void setExceptionHandler(UncaughtExceptionHandler handler) { + super.setExceptionHandler(handler); server.setExceptionHandler(handler); + allocator.setOomHandler((ex) -> { + try { + log.error("Unable to allocate memory, exiting bookie", ex); + } finally { + if (uncaughtExceptionHandler != null) { + uncaughtExceptionHandler.uncaughtException(Thread.currentThread(), ex); + } + } + }); } public BookieServer getServer() { @@ -56,8 +87,8 @@ public BookieServer getServer() { protected void doStart() { try { this.server.start(); - } catch (IOException | UnavailableException | InterruptedException | BKException e) { - throw new RuntimeException("Failed to start bookie server", e); + } catch (InterruptedException | IOException exc) { + throw new RuntimeException("Failed to start bookie server", exc); } } @@ -70,4 +101,24 @@ protected void doStop() { protected void doClose() throws IOException { this.server.shutdown(); } + + @Override + public void publishInfo(ComponentInfoPublisher componentInfoPublisher) { + try { + BookieSocketAddress localAddress = getServer().getLocalAddress(); + List extensions = new ArrayList<>(); + if (conf.getServerConf().getTLSProviderFactoryClass() != null) { + extensions.add("tls"); + } + EndpointInfo endpoint = new EndpointInfo("bookie", + localAddress.getPort(), + localAddress.getHostName(), + "bookie-rpc", null, extensions); + componentInfoPublisher.publishEndpoint(endpoint); + + } catch (UnknownHostException err) { + log.error("Cannot compute local address", err); + } + } + } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/service/HttpService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/service/HttpService.java index 8a86ffa0260..cee00f84c91 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/service/HttpService.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/service/HttpService.java @@ -21,8 +21,11 @@ import static com.google.common.base.Preconditions.checkNotNull; import java.io.IOException; - +import org.apache.bookkeeper.common.component.ComponentInfoPublisher; +import org.apache.bookkeeper.common.component.ComponentInfoPublisher.EndpointInfo; +import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.http.HttpServer; +import org.apache.bookkeeper.http.HttpServerConfiguration; import org.apache.bookkeeper.http.HttpServerLoader; import org.apache.bookkeeper.server.component.ServerLifecycleComponent; import org.apache.bookkeeper.server.conf.BookieConfiguration; @@ -45,13 +48,18 @@ public HttpService(BKHttpServiceProvider provider, HttpServerLoader.loadHttpServer(conf.getServerConf()); server = HttpServerLoader.get(); - checkNotNull(server); + checkNotNull(server, "httpServerClass is not configured or it could not be started," + + " please check your configuration and logs"); server.initialize(provider); } @Override protected void doStart() { - server.startServer(conf.getServerConf().getHttpServerPort()); + ServerConfiguration serverConf = conf.getServerConf(); + HttpServerConfiguration tlsOption = new HttpServerConfiguration(serverConf.isHttpServerTlsEnable(), + serverConf.getHttpServerKeystorePath(), serverConf.getHttpServerKeystorePassword(), + serverConf.getHttpServerTrustStorePath(), serverConf.getHttpServerTrustStorePassword()); + server.startServer(serverConf.getHttpServerPort(), serverConf.getHttpServerHost(), tlsOption); } @Override @@ -63,4 +71,16 @@ protected void doStop() { protected void doClose() throws IOException { server.stopServer(); } + + @Override + public void publishInfo(ComponentInfoPublisher componentInfoPublisher) { + if (conf.getServerConf().isHttpServerEnabled()) { + EndpointInfo endpoint = new EndpointInfo("httpserver", + conf.getServerConf().getHttpServerPort(), + conf.getServerConf().getHttpServerHost(), + "http", null, null); + componentInfoPublisher.publishEndpoint(endpoint); + } + } + } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/service/ScrubberService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/service/ScrubberService.java new file mode 100644 index 00000000000..63b9ec08d0b --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/service/ScrubberService.java @@ -0,0 +1,145 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.server.service; + +import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.bookkeeper.bookie.ScrubberStats.DETECTED_FATAL_SCRUB_ERRORS; +import static org.apache.bookkeeper.bookie.ScrubberStats.DETECTED_SCRUB_ERRORS; +import static org.apache.bookkeeper.bookie.ScrubberStats.RUN_DURATION; + +import com.google.common.util.concurrent.RateLimiter; +import io.netty.util.concurrent.DefaultThreadFactory; +import java.io.IOException; +import java.util.List; +import java.util.Optional; +import java.util.Random; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.bookkeeper.bookie.ExitCode; +import org.apache.bookkeeper.bookie.LedgerStorage; +import org.apache.bookkeeper.common.util.MathUtils; +import org.apache.bookkeeper.server.component.ServerLifecycleComponent; +import org.apache.bookkeeper.server.conf.BookieConfiguration; +import org.apache.bookkeeper.stats.Counter; +import org.apache.bookkeeper.stats.OpStatsLogger; +import org.apache.bookkeeper.stats.StatsLogger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A {@link org.apache.bookkeeper.common.component.LifecycleComponent} that runs the scrubber background service. + */ +public class ScrubberService extends ServerLifecycleComponent { + private static final Logger LOG = LoggerFactory.getLogger(ScrubberService.class); + + private static final String NAME = "scrubber"; + private final ScheduledExecutorService executor; + private final Random rng = new Random(); + private final long scrubPeriod; + private final Optional scrubRateLimiter; + private final AtomicBoolean stop = new AtomicBoolean(false); + private final LedgerStorage ledgerStorage; + + private final OpStatsLogger scrubCounter; + private final Counter errorCounter; + private final Counter fatalErrorCounter; + + public ScrubberService( + StatsLogger logger, + BookieConfiguration conf, + LedgerStorage ledgerStorage) { + super(NAME, conf, logger); + this.executor = Executors.newSingleThreadScheduledExecutor( + new DefaultThreadFactory("ScrubThread")); + + this.scrubPeriod = conf.getServerConf().getLocalScrubPeriod(); + checkArgument( + scrubPeriod > 0, + "localScrubInterval must be > 0 for ScrubberService to be used"); + + double rateLimit = conf.getServerConf().getLocalScrubRateLimit(); + this.scrubRateLimiter = rateLimit == 0 ? Optional.empty() : Optional.of(RateLimiter.create(rateLimit)); + + this.ledgerStorage = ledgerStorage; + + this.scrubCounter = logger.getOpStatsLogger(RUN_DURATION); + this.errorCounter = logger.getCounter(DETECTED_SCRUB_ERRORS); + this.fatalErrorCounter = logger.getCounter(DETECTED_FATAL_SCRUB_ERRORS); + } + + private long getNextPeriodMS() { + return (long) (((double) scrubPeriod) * (1.5 - rng.nextDouble()) * 1000); + } + + private void doSchedule() { + executor.schedule( + this::run, + getNextPeriodMS(), + TimeUnit.MILLISECONDS); + + } + + private void run() { + boolean success = false; + long start = MathUtils.nowInNano(); + try { + List errors = ledgerStorage.localConsistencyCheck(scrubRateLimiter); + if (errors.size() > 0) { + errorCounter.addCount(errors.size()); + LOG.error("Found inconsistency during localConsistencyCheck:"); + for (LedgerStorage.DetectedInconsistency error : errors) { + LOG.error("Ledger {}, entry {}: ", error.getLedgerId(), error.getEntryId(), error.getException()); + } + } + success = true; + } catch (IOException e) { + fatalErrorCounter.inc(); + LOG.error("Got fatal exception {} running localConsistencyCheck", e.toString()); + } + if (success) { + scrubCounter.registerSuccessfulEvent(MathUtils.elapsedNanos(start), TimeUnit.NANOSECONDS); + } else { + scrubCounter.registerFailedEvent(MathUtils.elapsedNanos(start), TimeUnit.NANOSECONDS); + Runtime.getRuntime().exit(ExitCode.BOOKIE_EXCEPTION); + } + if (!stop.get()) { + doSchedule(); + } + } + + @Override + protected void doStart() { + doSchedule(); + } + + @Override + protected void doStop() { + stop.set(true); + executor.shutdown(); + } + + @Override + protected void doClose() throws IOException { + // no-op + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/service/StatsProviderService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/service/StatsProviderService.java index ab5d894bfaa..ad734af7ce5 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/service/StatsProviderService.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/server/service/StatsProviderService.java @@ -19,11 +19,11 @@ package org.apache.bookkeeper.server.service; import java.io.IOException; +import org.apache.bookkeeper.common.util.ReflectionUtils; import org.apache.bookkeeper.server.component.ServerLifecycleComponent; import org.apache.bookkeeper.server.conf.BookieConfiguration; import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.stats.StatsProvider; -import org.apache.bookkeeper.util.ReflectionUtils; /** * A {@link org.apache.bookkeeper.common.component.LifecycleComponent} that runs stats provider. diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/stats/package-info.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/stats/package-info.java deleted file mode 100644 index df77c581c35..00000000000 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/stats/package-info.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/** - * The bookkeeper stats related classes. - */ -package org.apache.bookkeeper.stats; \ No newline at end of file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/streaming/LedgerOutputStream.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/streaming/LedgerOutputStream.java index 0d2720428a4..94866877dc8 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/streaming/LedgerOutputStream.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/streaming/LedgerOutputStream.java @@ -89,7 +89,7 @@ public synchronized void flush() { try { lh.addEntry(b); } catch (InterruptedException ie) { - LOG.warn("Interrupted while flusing " + ie); + LOG.warn("Interrupted while flushing " + ie); Thread.currentThread().interrupt(); } catch (BKException bke) { LOG.warn("BookKeeper exception ", bke); @@ -107,9 +107,7 @@ private boolean makeSpace(int len) { if (bytebuff.remaining() < len) { flush(); bytebuff.clear(); - if (bytebuff.capacity() < len) { - return false; - } + return bytebuff.capacity() >= len; } return true; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tls/BookieAuthZFactory.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tls/BookieAuthZFactory.java new file mode 100644 index 00000000000..5cef655c50b --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tls/BookieAuthZFactory.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.tls; + +import com.google.common.base.Strings; +import java.io.IOException; +import java.security.cert.X509Certificate; +import java.util.Collection; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.auth.AuthCallbacks; +import org.apache.bookkeeper.auth.AuthToken; +import org.apache.bookkeeper.auth.BookKeeperPrincipal; +import org.apache.bookkeeper.auth.BookieAuthProvider; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.proto.BookieConnectionPeer; +import org.apache.bookkeeper.util.CertUtils; + + +/** + * Authorization factory class. + */ +@Slf4j +public class BookieAuthZFactory implements BookieAuthProvider.Factory { + + public String[] allowedRoles; + + @Override + public String getPluginName() { + return "BookieAuthZFactory"; + } + + @Override + public void init(ServerConfiguration conf) throws IOException { + // Read from config + allowedRoles = conf.getAuthorizedRoles(); + + if (allowedRoles == null || allowedRoles.length == 0) { + throw new RuntimeException("Configuration option \'bookieAuthProviderFactoryClass\' is set to" + + " \'BookieAuthZFactory\' but no roles set for configuration field \'authorizedRoles\'."); + } + + // If authorization is enabled and there are no roles, exit + for (String allowedRole : allowedRoles) { + if (Strings.isNullOrEmpty(allowedRole)) { + throw new RuntimeException("Configuration option \'bookieAuthProviderFactoryClass\' is set to" + + " \'BookieAuthZFactory\' but no roles set for configuration field \'authorizedRoles\'."); + } + } + } + + @Override + public BookieAuthProvider newProvider(BookieConnectionPeer addr, + final AuthCallbacks.GenericCallback completeCb) { + return new BookieAuthProvider() { + + AuthCallbacks.GenericCallback completeCallback = completeCb; + + @Override + public void onProtocolUpgrade() { + + try { + boolean secureBookieSideChannel = addr.isSecure(); + Collection certificates = addr.getProtocolPrincipals(); + if (secureBookieSideChannel && !certificates.isEmpty() + && certificates.iterator().next() instanceof X509Certificate) { + X509Certificate tempCert = (X509Certificate) certificates.iterator().next(); + String[] certRole = CertUtils.getRolesFromOU(tempCert); + if (certRole == null || certRole.length == 0) { + log.error("AuthZ failed: No cert role in OU field of certificate. Must have a role from " + + "allowedRoles list {} host: {}", + allowedRoles, addr.getRemoteAddr()); + completeCallback.operationComplete(BKException.Code.UnauthorizedAccessException, null); + return; + } + boolean authorized = false; + for (String allowedRole : allowedRoles) { + if (certRole[0].equals(allowedRole)) { + authorized = true; + break; + } + } + if (authorized) { + addr.setAuthorizedId(new BookKeeperPrincipal(certRole[0])); + completeCallback.operationComplete(BKException.Code.OK, null); + } else { + log.error("AuthZ failed: Cert role {} doesn't match allowedRoles list {}; host: {}", + certRole, allowedRoles, addr.getRemoteAddr()); + completeCallback.operationComplete(BKException.Code.UnauthorizedAccessException, null); + } + } else { + if (!secureBookieSideChannel) { + log.error("AuthZ failed: Bookie side channel is not secured; host: {}", + addr.getRemoteAddr()); + } else if (certificates.isEmpty()) { + log.error("AuthZ failed: Certificate missing; host: {}", addr.getRemoteAddr()); + } else { + log.error("AuthZ failed: Certs are missing or not X509 type; host: {}", + addr.getRemoteAddr()); + } + completeCallback.operationComplete(BKException.Code.UnauthorizedAccessException, null); + } + } catch (Exception e) { + log.error("AuthZ failed: Failed to parse certificate; host: {}, {}", addr.getRemoteAddr(), e); + completeCallback.operationComplete(BKException.Code.UnauthorizedAccessException, null); + } + } + + @Override + public void process(AuthToken m, AuthCallbacks.GenericCallback cb) { + } + }; + } + + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tls/FileModifiedTimeUpdater.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tls/FileModifiedTimeUpdater.java new file mode 100644 index 00000000000..e41b507a584 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tls/FileModifiedTimeUpdater.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.tls; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.FileTime; +import lombok.Getter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Holder class to validate file modification. + */ +public class FileModifiedTimeUpdater { + @Getter + String fileName; + @Getter + FileTime lastModifiedTime; + + public FileModifiedTimeUpdater(String fileName) { + this.fileName = fileName; + this.lastModifiedTime = updateLastModifiedTime(); + } + + private FileTime updateLastModifiedTime() { + if (fileName != null) { + Path p = Paths.get(fileName); + try { + return Files.getLastModifiedTime(p); + } catch (IOException e) { + LOG.error("Unable to fetch lastModified time for file {}: ", fileName, e); + } + } + return null; + } + + public boolean checkAndRefresh() { + FileTime newLastModifiedTime = updateLastModifiedTime(); + if (newLastModifiedTime != null && !newLastModifiedTime.equals(lastModifiedTime)) { + this.lastModifiedTime = newLastModifiedTime; + return true; + } + return false; + } + + private static final Logger LOG = LoggerFactory.getLogger(FileModifiedTimeUpdater.class); +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tls/SecurityException.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tls/SecurityException.java index 67fa1980a76..d4c3ea97259 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tls/SecurityException.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tls/SecurityException.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tls/SecurityHandlerFactory.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tls/SecurityHandlerFactory.java index 59be8847dd9..a545b7ef04c 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tls/SecurityHandlerFactory.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tls/SecurityHandlerFactory.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,8 +17,8 @@ */ package org.apache.bookkeeper.tls; +import io.netty.buffer.ByteBufAllocator; import io.netty.handler.ssl.SslHandler; - import org.apache.bookkeeper.conf.AbstractConfiguration; /** @@ -37,7 +37,11 @@ enum NodeType { String getHandlerName(); - void init(NodeType type, AbstractConfiguration conf) throws SecurityException; + void init(NodeType type, AbstractConfiguration conf, ByteBufAllocator allocator) throws SecurityException; SslHandler newTLSHandler(); + + default SslHandler newTLSHandler(String host, int port) { + return this.newTLSHandler(); + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tls/SecurityProviderFactoryFactory.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tls/SecurityProviderFactoryFactory.java index a6dad0b0059..63bc87e2a43 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tls/SecurityProviderFactoryFactory.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tls/SecurityProviderFactoryFactory.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,7 +17,7 @@ */ package org.apache.bookkeeper.tls; -import org.apache.bookkeeper.util.ReflectionUtils; +import org.apache.bookkeeper.common.util.ReflectionUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tls/TLSContextFactory.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tls/TLSContextFactory.java index 17aea85fafa..7570436b952 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tls/TLSContextFactory.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tls/TLSContextFactory.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,42 +18,106 @@ package org.apache.bookkeeper.tls; import com.google.common.base.Strings; - import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; -import io.netty.buffer.PooledByteBufAllocator; +import io.netty.buffer.ByteBufAllocator; import io.netty.handler.ssl.ClientAuth; import io.netty.handler.ssl.OpenSsl; import io.netty.handler.ssl.SslContext; import io.netty.handler.ssl.SslContextBuilder; import io.netty.handler.ssl.SslHandler; import io.netty.handler.ssl.SslProvider; - import java.io.File; import java.io.FileInputStream; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.security.KeyStore; import java.security.KeyStoreException; import java.security.NoSuchAlgorithmException; import java.security.NoSuchProviderException; +import java.security.Provider; +import java.security.Security; import java.security.UnrecoverableKeyException; import java.security.cert.CertificateException; import java.security.spec.InvalidKeySpecException; import java.util.Arrays; - +import java.util.concurrent.TimeUnit; import javax.net.ssl.KeyManagerFactory; +import javax.net.ssl.SSLParameters; import javax.net.ssl.TrustManagerFactory; - +import lombok.extern.slf4j.Slf4j; import org.apache.bookkeeper.conf.AbstractConfiguration; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.commons.io.FileUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** * A factory to manage TLS contexts. */ +@Slf4j public class TLSContextFactory implements SecurityHandlerFactory { + + public static final Provider BC_PROVIDER = getProvider(); + public static final String BC_FIPS_PROVIDER_CLASS = "org.bouncycastle.jcajce.provider.BouncyCastleFipsProvider"; + public static final String BC_NON_FIPS_PROVIDER_CLASS = "org.bouncycastle.jce.provider.BouncyCastleProvider"; + + // Security.getProvider("BC") / Security.getProvider("BCFIPS"). + // also used to get Factories. e.g. CertificateFactory.getInstance("X.509", "BCFIPS") + public static final String BC_FIPS = "BCFIPS"; + public static final String BC = "BC"; + + /** + * Get Bouncy Castle provider, and call Security.addProvider(provider) if success. + */ + public static Provider getProvider() { + boolean isProviderInstalled = + Security.getProvider(BC) != null || Security.getProvider(BC_FIPS) != null; + + if (isProviderInstalled) { + Provider provider = Security.getProvider(BC) != null + ? Security.getProvider(BC) + : Security.getProvider(BC_FIPS); + if (log.isDebugEnabled()) { + log.debug("Already instantiated Bouncy Castle provider {}", provider.getName()); + } + return provider; + } + + // Not installed, try load from class path + try { + return getBCProviderFromClassPath(); + } catch (Exception e) { + log.warn("Not able to get Bouncy Castle provider for both FIPS and Non-FIPS from class path:", e); + throw new RuntimeException(e); + } + } + + /** + * Get Bouncy Castle provider from classpath, and call Security.addProvider. + * Throw Exception if failed. + */ + public static Provider getBCProviderFromClassPath() throws Exception { + Class clazz; + try { + clazz = Class.forName(BC_FIPS_PROVIDER_CLASS); + } catch (ClassNotFoundException cnf) { + if (log.isDebugEnabled()) { + log.debug("Not able to get Bouncy Castle provider: {}, try to get FIPS provider {}", + BC_NON_FIPS_PROVIDER_CLASS, BC_FIPS_PROVIDER_CLASS); + } + // attempt to use the NON_FIPS provider. + clazz = Class.forName(BC_NON_FIPS_PROVIDER_CLASS); + + } + + @SuppressWarnings("unchecked") + Provider provider = (Provider) clazz.getDeclaredConstructor().newInstance(); + Security.addProvider(provider); + if (log.isDebugEnabled()) { + log.debug("Found and Instantiated Bouncy Castle provider in classpath {}", provider.getName()); + } + return provider; + } + /** * Supported Key File Types. */ @@ -74,11 +138,18 @@ public String toString() { } } - private static final Logger LOG = LoggerFactory.getLogger(TLSContextFactory.class); private static final String TLSCONTEXT_HANDLER_NAME = "tls"; + private NodeType type; private String[] protocols; private String[] ciphers; - private SslContext sslContext; + private volatile SslContext sslContext; + private ByteBufAllocator allocator; + private AbstractConfiguration config; + private FileModifiedTimeUpdater tlsCertificateFilePath, tlsKeyStoreFilePath, tlsKeyStorePasswordFilePath, + tlsTrustStoreFilePath, tlsTrustStorePasswordFilePath; + private long certRefreshTime; + private volatile long certLastRefreshTime; + private boolean isServerCtx; private String getPasswordFromFile(String path) throws IOException { byte[] pwd; @@ -87,7 +158,7 @@ private String getPasswordFromFile(String path) throws IOException { return ""; } pwd = FileUtils.readFileToByteArray(passwdFile); - return new String(pwd, "UTF-8"); + return new String(pwd, StandardCharsets.UTF_8); } @SuppressFBWarnings( @@ -103,6 +174,7 @@ private KeyStore loadKeyStore(String keyStoreType, String keyStoreLocation, Stri return ks; } + @Override public String getHandlerName() { return TLSCONTEXT_HANDLER_NAME; } @@ -113,7 +185,7 @@ private KeyManagerFactory initKeyManagerFactory(String keyStoreType, String keyS KeyManagerFactory kmf = null; if (Strings.isNullOrEmpty(keyStoreLocation)) { - LOG.error("Key store location cannot be empty when Mutual Authentication is enabled!"); + log.error("Key store location cannot be empty when Mutual Authentication is enabled!"); throw new SecurityException("Key store location cannot be empty when Mutual Authentication is enabled!"); } @@ -136,7 +208,7 @@ private TrustManagerFactory initTrustManagerFactory(String trustStoreType, Strin TrustManagerFactory tmf; if (Strings.isNullOrEmpty(trustStoreLocation)) { - LOG.error("Trust Store location cannot be empty!"); + log.error("Trust Store location cannot be empty!"); throw new SecurityException("Trust Store location cannot be empty!"); } @@ -156,22 +228,32 @@ private TrustManagerFactory initTrustManagerFactory(String trustStoreType, Strin private SslProvider getTLSProvider(String sslProvider) { if (sslProvider.trim().equalsIgnoreCase("OpenSSL")) { if (OpenSsl.isAvailable()) { - LOG.info("Security provider - OpenSSL"); + log.info("Security provider - OpenSSL"); return SslProvider.OPENSSL; } Throwable causeUnavailable = OpenSsl.unavailabilityCause(); - LOG.warn("OpenSSL Unavailable: ", causeUnavailable); + log.warn("OpenSSL Unavailable: ", causeUnavailable); - LOG.info("Security provider - JDK"); + log.info("Security provider - JDK"); return SslProvider.JDK; } - LOG.info("Security provider - JDK"); + log.info("Security provider - JDK"); return SslProvider.JDK; } - private void createClientContext(AbstractConfiguration conf) + private void createClientContext() + throws SecurityException, KeyStoreException, NoSuchAlgorithmException, CertificateException, IOException, + UnrecoverableKeyException, InvalidKeySpecException, NoSuchProviderException { + ClientConfiguration clientConf = (ClientConfiguration) config; + markAutoCertRefresh(clientConf.getTLSCertificatePath(), clientConf.getTLSKeyStore(), + clientConf.getTLSKeyStorePasswordPath(), clientConf.getTLSTrustStore(), + clientConf.getTLSTrustStorePasswordPath()); + updateClientContext(); + } + + private synchronized void updateClientContext() throws SecurityException, KeyStoreException, NoSuchAlgorithmException, CertificateException, IOException, UnrecoverableKeyException, InvalidKeySpecException, NoSuchProviderException { final SslContextBuilder sslContextBuilder; @@ -180,11 +262,11 @@ private void createClientContext(AbstractConfiguration conf) final boolean clientAuthentication; // get key-file and trust-file locations and passwords - if (!(conf instanceof ClientConfiguration)) { - throw new SecurityException("Client configruation not provided"); + if (!(config instanceof ClientConfiguration)) { + throw new SecurityException("Client configuration not provided"); } - clientConf = (ClientConfiguration) conf; + clientConf = (ClientConfiguration) config; provider = getTLSProvider(clientConf.getTLSProvider()); clientAuthentication = clientConf.getTLSClientAuthentication(); @@ -258,9 +340,44 @@ private void createClientContext(AbstractConfiguration conf) } sslContext = sslContextBuilder.build(); + certLastRefreshTime = System.currentTimeMillis(); + } + + private void createServerContext() + throws SecurityException, KeyStoreException, NoSuchAlgorithmException, CertificateException, IOException, + UnrecoverableKeyException, InvalidKeySpecException, IllegalArgumentException { + isServerCtx = true; + ServerConfiguration clientConf = (ServerConfiguration) config; + markAutoCertRefresh(clientConf.getTLSCertificatePath(), clientConf.getTLSKeyStore(), + clientConf.getTLSKeyStorePasswordPath(), clientConf.getTLSTrustStore(), + clientConf.getTLSTrustStorePasswordPath()); + updateServerContext(); + } + + private synchronized SslContext getSSLContext() { + long now = System.currentTimeMillis(); + if ((certRefreshTime > 0 && now > (certLastRefreshTime + certRefreshTime))) { + if (tlsCertificateFilePath.checkAndRefresh() || tlsKeyStoreFilePath.checkAndRefresh() + || tlsKeyStorePasswordFilePath.checkAndRefresh() || tlsTrustStoreFilePath.checkAndRefresh() + || tlsTrustStorePasswordFilePath.checkAndRefresh()) { + try { + log.info("Updating tls certs certFile={}, keyStoreFile={}, trustStoreFile={}", + tlsCertificateFilePath.getFileName(), tlsKeyStoreFilePath.getFileName(), + tlsTrustStoreFilePath.getFileName()); + if (isServerCtx) { + updateServerContext(); + } else { + updateClientContext(); + } + } catch (Exception e) { + log.info("Failed to refresh tls certs", e); + } + } + } + return sslContext; } - private void createServerContext(AbstractConfiguration conf) throws SecurityException, KeyStoreException, + private synchronized void updateServerContext() throws SecurityException, KeyStoreException, NoSuchAlgorithmException, CertificateException, IOException, UnrecoverableKeyException, InvalidKeySpecException, IllegalArgumentException { final SslContextBuilder sslContextBuilder; @@ -269,11 +386,11 @@ private void createServerContext(AbstractConfiguration conf) throws SecurityExce final boolean clientAuthentication; // get key-file and trust-file locations and passwords - if (!(conf instanceof ServerConfiguration)) { - throw new SecurityException("Server configruation not provided"); + if (!(config instanceof ServerConfiguration)) { + throw new SecurityException("Server configuration not provided"); } - serverConf = (ServerConfiguration) conf; + serverConf = (ServerConfiguration) config; provider = getTLSProvider(serverConf.getTLSProvider()); clientAuthentication = serverConf.getTLSClientAuthentication(); @@ -347,12 +464,18 @@ private void createServerContext(AbstractConfiguration conf) throws SecurityExce } sslContext = sslContextBuilder.build(); + certLastRefreshTime = System.currentTimeMillis(); } @Override - public synchronized void init(NodeType type, AbstractConfiguration conf) throws SecurityException { + public synchronized void init(NodeType type, AbstractConfiguration conf, ByteBufAllocator allocator) + throws SecurityException { + this.allocator = allocator; + this.config = conf; + this.type = type; final String enabledProtocols; final String enabledCiphers; + certRefreshTime = TimeUnit.SECONDS.toMillis(conf.getTLSCertFilesRefreshDurationSeconds()); enabledCiphers = conf.getTLSEnabledCipherSuites(); enabledProtocols = conf.getTLSEnabledProtocols(); @@ -360,10 +483,10 @@ public synchronized void init(NodeType type, AbstractConfiguration conf) throws try { switch (type) { case Client: - createClientContext(conf); + createClientContext(); break; case Server: - createServerContext(conf); + createServerContext(); break; default: throw new SecurityException(new IllegalArgumentException("Invalid NodeType")); @@ -397,22 +520,45 @@ public synchronized void init(NodeType type, AbstractConfiguration conf) throws @Override public SslHandler newTLSHandler() { - SslHandler sslHandler = sslContext.newHandler(PooledByteBufAllocator.DEFAULT); + return this.newTLSHandler(null, -1); + } + + @Override + public SslHandler newTLSHandler(String peer, int port) { + SslHandler sslHandler = getSSLContext().newHandler(allocator, peer, port); if (protocols != null && protocols.length != 0) { sslHandler.engine().setEnabledProtocols(protocols); } - if (LOG.isDebugEnabled()) { - LOG.debug("Enabled cipher protocols: {} ", Arrays.toString(sslHandler.engine().getEnabledProtocols())); + if (log.isDebugEnabled()) { + log.debug("Enabled cipher protocols: {} ", Arrays.toString(sslHandler.engine().getEnabledProtocols())); } if (ciphers != null && ciphers.length != 0) { sslHandler.engine().setEnabledCipherSuites(ciphers); } - if (LOG.isDebugEnabled()) { - LOG.debug("Enabled cipher suites: {} ", Arrays.toString(sslHandler.engine().getEnabledCipherSuites())); + if (log.isDebugEnabled()) { + log.debug("Enabled cipher suites: {} ", Arrays.toString(sslHandler.engine().getEnabledCipherSuites())); + } + + if (type == NodeType.Client && ((ClientConfiguration) config).getHostnameVerificationEnabled()) { + SSLParameters sslParameters = sslHandler.engine().getSSLParameters(); + sslParameters.setEndpointIdentificationAlgorithm("HTTPS"); + sslHandler.engine().setSSLParameters(sslParameters); + if (log.isDebugEnabled()) { + log.debug("Enabled endpointIdentificationAlgorithm: HTTPS"); + } } return sslHandler; } + + private void markAutoCertRefresh(String tlsCertificatePath, String tlsKeyStore, String tlsKeyStorePasswordPath, + String tlsTrustStore, String tlsTrustStorePasswordPath) { + tlsCertificateFilePath = new FileModifiedTimeUpdater(tlsCertificatePath); + tlsKeyStoreFilePath = new FileModifiedTimeUpdater(tlsKeyStore); + tlsKeyStorePasswordFilePath = new FileModifiedTimeUpdater(tlsKeyStorePasswordPath); + tlsTrustStoreFilePath = new FileModifiedTimeUpdater(tlsTrustStore); + tlsTrustStorePasswordFilePath = new FileModifiedTimeUpdater(tlsTrustStorePasswordPath); + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/autorecovery/ListUnderReplicatedCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/autorecovery/ListUnderReplicatedCommand.java new file mode 100644 index 00000000000..09077cef700 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/autorecovery/ListUnderReplicatedCommand.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.autorecovery; + +import static org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithLedgerManagerFactory; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import java.util.Iterator; +import java.util.List; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Predicate; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; +import org.apache.bookkeeper.meta.UnderreplicatedLedger; +import org.apache.bookkeeper.meta.exceptions.MetadataException; +import org.apache.bookkeeper.replication.ReplicationException; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.apache.bookkeeper.util.LedgerIdFormatter; +import org.apache.commons.lang.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Command to listing under replicated ledgers. + */ +public class ListUnderReplicatedCommand extends BookieCommand { + + static final Logger LOG = LoggerFactory.getLogger(ListUnderReplicatedCommand.class); + + private static final String NAME = "listunderreplicated"; + private static final String DESC = "List ledgers marked as underreplicated, with oprional options to specify " + + "missingreplica (BookieId) and to exclude missingreplica."; + private static final String DEFAULT = ""; + + private LedgerIdFormatter ledgerIdFormatter; + + public ListUnderReplicatedCommand() { + this(new LURFlags()); + } + + public ListUnderReplicatedCommand(LedgerIdFormatter ledgerIdFormatter) { + this(); + this.ledgerIdFormatter = ledgerIdFormatter; + } + + private ListUnderReplicatedCommand(LURFlags flags) { + super(CliSpec.newBuilder() + .withName(NAME) + .withDescription(DESC) + .withFlags(flags) + .build()); + } + + /** + * Flags for list under replicated command. + */ + @Accessors(fluent = true) + @Setter + public static class LURFlags extends CliFlags{ + + @Parameter(names = { "-pmr", "--printmissingreplica" }, description = "Whether to print missingreplicas list?") + private boolean printMissingReplica; + + @Parameter(names = { "-prw", + "--printreplicationworkerid" }, description = "Whether wo print replicationworkerid?") + private boolean printReplicationWorkerId; + + @Parameter(names = { "-mr", "--missingreplica" }, description = "Bookie Id of missing replica") + private String missingReplica = DEFAULT; + + @Parameter(names = { "-emr", "--excludingmissingreplica" }, description = "Bookie Id of missing replica to " + + "ignore") + private String excludingMissingReplica = DEFAULT; + + @Parameter(names = {"-l", "--ledgeridformatter"}, description = "Set ledger id formatter") + private String ledgerIdFormatter = DEFAULT; + + @Parameter(names = {"-c", "--onlydisplayledgercount"}, + description = "Only display underreplicated ledger count") + private boolean onlyDisplayLedgerCount; + } + + @Override + public boolean apply(ServerConfiguration conf, LURFlags cmdFlags) { + if (!cmdFlags.ledgerIdFormatter.equals(DEFAULT) && ledgerIdFormatter == null) { + ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(cmdFlags.ledgerIdFormatter, conf); + } else if (ledgerIdFormatter == null) { + ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(conf); + } + try { + return handler(conf, cmdFlags); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + public boolean handler(ServerConfiguration bkConf, LURFlags flags) throws MetadataException, ExecutionException { + final String includingBookieId = flags.missingReplica; + final String excludingBookieId = flags.excludingMissingReplica; + final boolean printMissingReplica = flags.printMissingReplica; + final boolean printReplicationWorkerId = flags.printReplicationWorkerId; + final boolean onlyDisplayLedgerCount = flags.onlyDisplayLedgerCount; + + final Predicate> predicate; + if (!StringUtils.isBlank(includingBookieId) && !StringUtils.isBlank(excludingBookieId)) { + predicate = replicasList -> (replicasList.contains(includingBookieId) + && !replicasList.contains(excludingBookieId)); + } else if (!StringUtils.isBlank(includingBookieId)) { + predicate = replicasList -> replicasList.contains(includingBookieId); + } else if (!StringUtils.isBlank(excludingBookieId)) { + predicate = replicasList -> !replicasList.contains(excludingBookieId); + } else { + predicate = null; + } + + AtomicInteger underReplicatedLedgerCount = new AtomicInteger(0); + runFunctionWithLedgerManagerFactory(bkConf, mFactory -> { + LedgerUnderreplicationManager underreplicationManager; + try { + underreplicationManager = mFactory.newLedgerUnderreplicationManager(); + } catch (ReplicationException e) { + throw new UncheckedExecutionException("Failed to new ledger underreplicated manager", e); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new UncheckedExecutionException("Interrupted on newing ledger underreplicated manager", e); + } + Iterator iter = underreplicationManager.listLedgersToRereplicate(predicate); + while (iter.hasNext()) { + UnderreplicatedLedger underreplicatedLedger = iter.next(); + underReplicatedLedgerCount.incrementAndGet(); + if (onlyDisplayLedgerCount) { + continue; + } + + long urLedgerId = underreplicatedLedger.getLedgerId(); + LOG.info("{}", ledgerIdFormatter.formatLedgerId(urLedgerId)); + long ctime = underreplicatedLedger.getCtime(); + if (ctime != UnderreplicatedLedger.UNASSIGNED_CTIME) { + LOG.info("\tCtime : {}", ctime); + } + if (printMissingReplica) { + underreplicatedLedger.getReplicaList().forEach((missingReplica) -> { + LOG.info("\tMissingReplica : {}", missingReplica); + }); + } + if (printReplicationWorkerId) { + try { + String replicationWorkerId = underreplicationManager + .getReplicationWorkerIdRereplicatingLedger(urLedgerId); + if (replicationWorkerId != null) { + LOG.info("\tReplicationWorkerId : {}", replicationWorkerId); + } + } catch (ReplicationException.UnavailableException e) { + LOG.error("Failed to get ReplicationWorkerId rereplicating ledger {} -- {}", urLedgerId, + e.getMessage()); + } + } + } + + LOG.info("Under replicated ledger count: {}", underReplicatedLedgerCount.get()); + return null; + }); + return true; + } + +} + diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/autorecovery/LostBookieRecoveryDelayCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/autorecovery/LostBookieRecoveryDelayCommand.java new file mode 100644 index 00000000000..defab6895fb --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/autorecovery/LostBookieRecoveryDelayCommand.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.autorecovery; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import java.io.IOException; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.replication.ReplicationException; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.apache.zookeeper.KeeperException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Command to Setter and Getter for LostBookieRecoveryDelay value (in seconds) in metadata store. + */ +public class LostBookieRecoveryDelayCommand extends BookieCommand { + + static final Logger LOG = LoggerFactory.getLogger(LostBookieRecoveryDelayCommand.class); + + private static final String NAME = "lostbookierecoverydelay"; + private static final String DESC = + "Setter and Getter for LostBookieRecoveryDelay value (in seconds) in metadata store"; + + private static final int DEFAULT = 0; + + public LostBookieRecoveryDelayCommand() { + this(new LBRDFlags()); + } + + private LostBookieRecoveryDelayCommand(LBRDFlags flags) { + super(CliSpec.newBuilder() + .withName(NAME) + .withDescription(DESC) + .withFlags(flags) + .build()); + } + + /** + * Flags for command LostBookieRecoveryDelay. + */ + @Accessors(fluent = true) + @Setter + public static class LBRDFlags extends CliFlags{ + + @Parameter(names = { "-g", "--get" }, description = "Get LostBookieRecoveryDelay value (in seconds)") + private boolean get; + + @Parameter(names = { "-s", "--set" }, description = "Set LostBookieRecoveryDelay value (in seconds)") + private int set = DEFAULT; + + } + + @Override + public boolean apply(ServerConfiguration conf, LBRDFlags cmdFlags) { + try { + return handler(conf, cmdFlags); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + public boolean handler(ServerConfiguration conf, LBRDFlags flags) + throws InterruptedException, BKException, IOException, ReplicationException.UnavailableException, + ReplicationException.CompatibilityException, KeeperException { + boolean getter = flags.get; + boolean setter = false; + if (flags.set != DEFAULT) { + setter = true; + } + + if ((!getter && !setter) || (getter && setter)) { + LOG.error("One and only one of -get and -set must be specified"); + return false; + } + ClientConfiguration adminConf = new ClientConfiguration(conf); + BookKeeperAdmin admin = new BookKeeperAdmin(adminConf); + try { + if (getter) { + int lostBookieRecoveryDelay = admin.getLostBookieRecoveryDelay(); + LOG.info("LostBookieRecoveryDelay value in ZK: {}", lostBookieRecoveryDelay); + } else { + int lostBookieRecoveryDelay = flags.set; + admin.setLostBookieRecoveryDelay(lostBookieRecoveryDelay); + LOG.info("Successfully set LostBookieRecoveryDelay value in ZK: {}", + lostBookieRecoveryDelay); + } + } finally { + if (admin != null) { + admin.close(); + } + } + return true; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/autorecovery/QueryAutoRecoveryStatusCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/autorecovery/QueryAutoRecoveryStatusCommand.java new file mode 100644 index 00000000000..c301a13a6a7 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/autorecovery/QueryAutoRecoveryStatusCommand.java @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.autorecovery; + +import static org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithLedgerManagerFactory; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; +import org.apache.bookkeeper.meta.UnderreplicatedLedger; +import org.apache.bookkeeper.replication.ReplicationException; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * Command to Query current auto recovery status. + */ +public class QueryAutoRecoveryStatusCommand + extends BookieCommand { + static final Logger LOG = LoggerFactory. + getLogger(QueryAutoRecoveryStatusCommand.class); + private static final String NAME = "queryautorecoverystatus"; + private static final String DESC = "Query autorecovery status."; + + public QueryAutoRecoveryStatusCommand() { + super(CliSpec.newBuilder() + .withName(NAME) + .withDescription(DESC) + .withFlags(new QFlags()) + .build()); + } + + @Override + public boolean apply(ServerConfiguration conf, QFlags cmdFlags) { + try { + return handler(conf, cmdFlags); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + /** + * Flags for list under replicated command. + */ + @Accessors(fluent = true) + @Setter + public static class QFlags extends CliFlags{ + @Parameter(names = {"-v", "--verbose"}, description = "list recovering detailed ledger info") + private Boolean verbose = false; + } + + private static class LedgerRecoverInfo { + Long ledgerId; + String bookieId; + LedgerRecoverInfo(Long ledgerId, String bookieId) { + this.ledgerId = ledgerId; + this.bookieId = bookieId; + } + } + + /* + Print Message format is like this: + + CurrentRecoverLedgerInfo: + LedgerId: BookieId: LedgerSize:(detail) + LedgerId: BookieId: LedgerSize:(detail) + */ + public boolean handler(ServerConfiguration conf, QFlags flag) throws Exception { + runFunctionWithLedgerManagerFactory(conf, mFactory -> { + LedgerUnderreplicationManager underreplicationManager; + LedgerManager ledgerManager = mFactory.newLedgerManager(); + List ledgerList = new LinkedList<>(); + try { + underreplicationManager = mFactory.newLedgerUnderreplicationManager(); + } catch (ReplicationException e) { + throw new UncheckedExecutionException("Failed to new ledger underreplicated manager", e); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new UncheckedExecutionException("Interrupted on newing ledger underreplicated manager", e); + } + Iterator iter = underreplicationManager.listLedgersToRereplicate(null); + while (iter.hasNext()) { + UnderreplicatedLedger underreplicatedLedger = iter.next(); + long urLedgerId = underreplicatedLedger.getLedgerId(); + try { + String replicationWorkerId = underreplicationManager + .getReplicationWorkerIdRereplicatingLedger(urLedgerId); + if (replicationWorkerId != null) { + ledgerList.add(new LedgerRecoverInfo(urLedgerId, replicationWorkerId)); + } + } catch (ReplicationException.UnavailableException e) { + LOG.error("Failed to get ReplicationWorkerId rereplicating ledger {} -- {}", urLedgerId, + e.getMessage()); + } + } + + LOG.info("CurrentRecoverLedgerInfo:"); + if (!flag.verbose) { + for (int i = 0; i < ledgerList.size(); i++) { + LOG.info("\tLedgerId:{}\tBookieId:{}", ledgerList.get(i).ledgerId, ledgerList.get(i).bookieId); + } + } else { + for (int i = 0; i < ledgerList.size(); i++) { + LedgerRecoverInfo info = ledgerList.get(i); + ledgerManager.readLedgerMetadata(info.ledgerId).whenComplete((metadata, exception) -> { + if (exception == null) { + LOG.info("\tLedgerId:{}\tBookieId:{}\tLedgerSize:{}", + info.ledgerId, info.bookieId, metadata.getValue().getLength()); + } else { + LOG.error("Unable to read the ledger: {} information", info.ledgerId); + throw new UncheckedExecutionException(exception); + } + }); + } + } + if (ledgerList.size() == 0) { + // NO ledger is being auto recovering + LOG.info("\t No Ledger is being recovered."); + } + return null; + }); + return true; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/autorecovery/ToggleCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/autorecovery/ToggleCommand.java new file mode 100644 index 00000000000..99185a8a97d --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/autorecovery/ToggleCommand.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.autorecovery; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import java.util.concurrent.ExecutionException; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; +import org.apache.bookkeeper.meta.MetadataDrivers; +import org.apache.bookkeeper.meta.exceptions.MetadataException; +import org.apache.bookkeeper.replication.ReplicationException; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * Command to enable or disable auto recovery in the cluster. + */ +@SuppressFBWarnings("RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE") +public class ToggleCommand extends BookieCommand { + + static final Logger LOG = LoggerFactory.getLogger(ToggleCommand.class); + + private static final String NAME = "toggle"; + private static final String DESC = "Enable or disable auto recovery in the cluster. Default is disable."; + + public ToggleCommand() { + this(new AutoRecoveryFlags()); + } + + private ToggleCommand(AutoRecoveryFlags flags) { + super(CliSpec.newBuilder() + .withName(NAME).withDescription(DESC) + .withFlags(flags).build()); + } + + /** + * Flags for auto recovery command. + */ + @Accessors(fluent = true) + @Setter + public static class AutoRecoveryFlags extends CliFlags { + + @Parameter(names = { "-e", "--enable" }, description = "Enable or disable auto recovery of under replicated " + + "ledgers.") + private boolean enable; + + @Parameter(names = {"-s", "--status"}, description = "Check the auto recovery status.") + private boolean status; + + } + + @Override + public boolean apply(ServerConfiguration conf, AutoRecoveryFlags cmdFlags) { + try { + return handler(conf, cmdFlags); + } catch (MetadataException | ExecutionException e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + private boolean handler(ServerConfiguration conf, AutoRecoveryFlags flags) + throws MetadataException, ExecutionException { + MetadataDrivers.runFunctionWithLedgerManagerFactory(conf, mFactory -> { + try { + try (LedgerUnderreplicationManager underreplicationManager = mFactory + .newLedgerUnderreplicationManager()) { + if (flags.status) { + LOG.info("Autorecovery is {}", (underreplicationManager.isLedgerReplicationEnabled() + ? "enabled." : "disabled.")); + return null; + } + if (flags.enable) { + if (underreplicationManager.isLedgerReplicationEnabled()) { + LOG.warn("Autorecovery already enabled. Doing nothing"); + } else { + LOG.info("Enabling autorecovery"); + underreplicationManager.enableLedgerReplication(); + } + } else { + if (!underreplicationManager.isLedgerReplicationEnabled()) { + LOG.warn("Autorecovery already disabled. Doing nothing"); + } else { + LOG.info("Disabling autorecovery"); + underreplicationManager.disableLedgerReplication(); + } + } + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new UncheckedExecutionException(e); + } catch (ReplicationException e) { + throw new UncheckedExecutionException(e); + } + return null; + }); + return true; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/autorecovery/TriggerAuditCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/autorecovery/TriggerAuditCommand.java new file mode 100644 index 00000000000..c645a5c9972 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/autorecovery/TriggerAuditCommand.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.autorecovery; + +import com.google.common.util.concurrent.UncheckedExecutionException; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; + +/** + * Command to trigger AuditTask by resetting lostBookieRecoveryDelay to its current value. + */ +public class TriggerAuditCommand extends BookieCommand { + + private static final String NAME = "triggeraudit"; + private static final String DESC = "Force trigger the Audit by resetting the lostBookieRecoveryDelay."; + + public TriggerAuditCommand() { + super(CliSpec.newBuilder() + .withName(NAME) + .withDescription(DESC) + .withFlags(new CliFlags()) + .build()); + } + + @Override + public boolean apply(ServerConfiguration conf, CliFlags cmdFlags) { + try { + return handler(conf); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + public boolean handler(ServerConfiguration configuration) throws Exception { + ClientConfiguration adminConf = new ClientConfiguration(configuration); + BookKeeperAdmin admin = new BookKeeperAdmin(adminConf); + + try { + admin.triggerAudit(); + } finally { + if (admin != null) { + admin.close(); + } + } + + return true; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/autorecovery/WhoIsAuditorCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/autorecovery/WhoIsAuditorCommand.java new file mode 100644 index 00000000000..853fca0b8f9 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/autorecovery/WhoIsAuditorCommand.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.autorecovery; + + +import com.google.common.util.concurrent.UncheckedExecutionException; +import java.io.IOException; +import lombok.Cleanup; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Command to print which node has the auditor lock. + */ +public class WhoIsAuditorCommand extends BookieCommand { + + static final Logger LOG = LoggerFactory.getLogger(WhoIsAuditorCommand.class); + + private static final String NAME = "whoisauditor"; + private static final String DESC = "Print the node which holds the auditor lock."; + + private BookKeeperAdmin bka; + + public WhoIsAuditorCommand() { + this(null); + } + + public WhoIsAuditorCommand(BookKeeperAdmin bka) { + super(CliSpec.newBuilder() + .withName(NAME) + .withDescription(DESC) + .withFlags(new CliFlags()) + .build()); + this.bka = bka; + } + + @Override + public boolean apply(ServerConfiguration conf, CliFlags cmdFlags) { + try { + return getAuditor(conf); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + private boolean getAuditor(ServerConfiguration conf) + throws BKException, InterruptedException, IOException { + ClientConfiguration clientConfiguration = new ClientConfiguration(conf); + + BookieId bookieId; + if (this.bka != null) { + bookieId = bka.getCurrentAuditor(); + } else { + @Cleanup + BookKeeperAdmin bka = new BookKeeperAdmin(clientConfiguration); + bookieId = bka.getCurrentAuditor(); + } + if (bookieId == null) { + LOG.info("No auditor elected"); + return false; + } + LOG.info("Auditor: " + bookieId); + return true; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/autorecovery/package-info.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/autorecovery/package-info.java new file mode 100644 index 00000000000..6f19bae5695 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/autorecovery/package-info.java @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.tools.cli.commands.autorecovery; \ No newline at end of file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/CheckDBLedgersIndexCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/CheckDBLedgersIndexCommand.java new file mode 100644 index 00000000000..fe844e5be41 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/CheckDBLedgersIndexCommand.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookie; + +import com.beust.jcommander.Parameter; +import java.io.IOException; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.bookie.storage.ldb.LedgersIndexCheckOp; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Command to check the DBLedgerStorage ledgers index integrity. + */ +public class CheckDBLedgersIndexCommand extends BookieCommand { + + static final Logger LOG = LoggerFactory.getLogger(CheckDBLedgersIndexCommand.class); + + private static final String NAME = "check-db-ledgers-index"; + private static final String DESC = "Check the DBLedgerStorage ledgers index integrity by performing a read scan"; + + public CheckDBLedgersIndexCommand() { + this(new CheckLedgersIndexFlags()); + } + + public CheckDBLedgersIndexCommand(CheckLedgersIndexFlags flags) { + super(CliSpec.newBuilder().withName(NAME) + .withDescription(DESC).withFlags(flags).build()); + } + + @Override + public boolean apply(ServerConfiguration conf, CheckLedgersIndexFlags cmdFlags) { + LOG.info("=== Checking DBStorage ledgers index by running a read scan ==="); + ServerConfiguration serverConfiguration = new ServerConfiguration(conf); + try { + boolean success = new LedgersIndexCheckOp(serverConfiguration, cmdFlags.verbose).initiate(); + if (success) { + LOG.info("-- Done checking DBStorage ledgers index --"); + } else { + LOG.info("-- Aborted checking DBStorage ledgers index --"); + } + + return success; + } catch (IOException e) { + e.printStackTrace(); + return false; + } + + } + + /** + * Flags for read log command. + */ + @Accessors(fluent = true) + @Setter + public static class CheckLedgersIndexFlags extends CliFlags { + @Parameter(names = { "-v", "--verbose" }, description = "Verbose logging. Print each ledger.") + private boolean verbose; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ConvertToDBStorageCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ConvertToDBStorageCommand.java new file mode 100644 index 00000000000..602caf09c35 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ConvertToDBStorageCommand.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookie; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.InterleavedLedgerStorage; +import org.apache.bookkeeper.bookie.LedgerCache; +import org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorage; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.apache.bookkeeper.util.LedgerIdFormatter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A command to convert bookie indexes from InterleavedStorage to DbLedgerStorage format. + */ +public class ConvertToDBStorageCommand extends BookieCommand { + + private static final Logger LOG = LoggerFactory.getLogger(ConvertToDBStorageCommand.class); + private static final String NAME = "converttodbstorage"; + private static final String DESC = "Convert bookie indexes from InterleavedStorage to DbLedgerStorage format"; + private static final String NOT_INIT = "default formatter"; + + @Setter + private LedgerIdFormatter ledgerIdFormatter; + + public ConvertToDBStorageCommand() { + this(new CTDBFlags()); + } + public ConvertToDBStorageCommand(CTDBFlags flags) { + super(CliSpec.newBuilder().withFlags(flags).withName(NAME).withDescription(DESC).build()); + } + + /** + * Flags for this command. + */ + @Accessors(fluent = true) + @Setter + public static class CTDBFlags extends CliFlags { + @Parameter(names = { "-l", "--ledgeridformatter" }, description = "Set ledger id formatter") + private String ledgerIdFormatter = NOT_INIT; + } + + @Override + public boolean apply(ServerConfiguration conf, CTDBFlags cmdFlags) { + initLedgerIdFormatter(conf, cmdFlags); + try { + return handle(conf); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + private boolean handle(ServerConfiguration conf) throws Exception { + LOG.info("=== Converting to DbLedgerStorage ==="); + ServerConfiguration bkConf = new ServerConfiguration(conf); + + InterleavedLedgerStorage interleavedStorage = new InterleavedLedgerStorage(); + BookieImpl.mountLedgerStorageOffline(bkConf, interleavedStorage); + + DbLedgerStorage dbStorage = new DbLedgerStorage(); + BookieImpl.mountLedgerStorageOffline(bkConf, dbStorage); + + int convertedLedgers = 0; + for (long ledgerId : interleavedStorage.getActiveLedgersInRange(0, Long.MAX_VALUE)) { + if (LOG.isDebugEnabled()) { + LOG.debug("Converting ledger {}", ledgerIdFormatter.formatLedgerId(ledgerId)); + } + + LedgerCache.LedgerIndexMetadata fi = interleavedStorage.readLedgerIndexMetadata(ledgerId); + + LedgerCache.PageEntriesIterable pages = interleavedStorage.getIndexEntries(ledgerId); + + long numberOfEntries = dbStorage.addLedgerToIndex(ledgerId, fi.fenced, fi.masterKey, pages); + if (LOG.isDebugEnabled()) { + LOG.debug(" -- done. fenced={} entries={}", fi.fenced, numberOfEntries); + } + + // Remove index from old storage + interleavedStorage.deleteLedger(ledgerId); + + if (++convertedLedgers % 1000 == 0) { + LOG.info("Converted {} ledgers", convertedLedgers); + } + } + + dbStorage.shutdown(); + interleavedStorage.shutdown(); + + LOG.info("---- Done Converting ----"); + return true; + } + + private void initLedgerIdFormatter(ServerConfiguration conf, CTDBFlags flags) { + if (this.ledgerIdFormatter != null) { + return; + } + if (flags.ledgerIdFormatter.equals(NOT_INIT)) { + this.ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(conf); + } else { + this.ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(flags.ledgerIdFormatter, conf); + } + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ConvertToInterleavedStorageCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ConvertToInterleavedStorageCommand.java new file mode 100644 index 00000000000..0ffe9066df6 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ConvertToInterleavedStorageCommand.java @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookie; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import io.netty.buffer.PooledByteBufAllocator; +import java.nio.file.FileSystems; +import java.nio.file.Files; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.BookieResources; +import org.apache.bookkeeper.bookie.CheckpointSource; +import org.apache.bookkeeper.bookie.Checkpointer; +import org.apache.bookkeeper.bookie.InterleavedLedgerStorage; +import org.apache.bookkeeper.bookie.LedgerCache; +import org.apache.bookkeeper.bookie.LedgerDirsManager; +import org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorage; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.apache.bookkeeper.util.DiskChecker; +import org.apache.bookkeeper.util.LedgerIdFormatter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * A command to convert bookie indexes from DbLedgerStorage to InterleavedStorage format. + */ +public class ConvertToInterleavedStorageCommand extends BookieCommand { + + private static final Logger LOG = LoggerFactory.getLogger(ConvertToInterleavedStorageCommand.class); + private static final String NAME = "converttointerleavedstorage"; + private static final String DESC = "Convert bookie indexes from DbLedgerStorage to InterleavedStorage format"; + private static final String NOT_INIT = "default formatter"; + + @Setter + private LedgerIdFormatter ledgerIdFormatter; + + public ConvertToInterleavedStorageCommand() { + this(new CTISFlags()); + } + + public ConvertToInterleavedStorageCommand(CTISFlags flags) { + super(CliSpec.newBuilder().withName(NAME).withDescription(DESC).withFlags(flags).build()); + } + + /** + * Flags for this command. + */ + @Accessors(fluent = true) + public static class CTISFlags extends CliFlags{ + + @Parameter(names = { "-l", "--ledgeridformatter" }, description = "Set ledger id formatter") + private String ledgerIdFormatter = NOT_INIT; + + } + + @Override + public boolean apply(ServerConfiguration conf, CTISFlags cmdFlags) { + initLedgerIdFormatter(conf, cmdFlags); + try { + return handle(conf); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + private boolean handle(ServerConfiguration bkConf) throws Exception { + LOG.info("=== Converting DbLedgerStorage ==="); + ServerConfiguration conf = new ServerConfiguration(bkConf); + DiskChecker diskChecker = new DiskChecker(bkConf.getDiskUsageThreshold(), bkConf.getDiskUsageWarnThreshold()); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(bkConf, bkConf.getLedgerDirs(), diskChecker); + LedgerDirsManager indexDirsManager = BookieResources.createIndexDirsManager( + conf, diskChecker, NullStatsLogger.INSTANCE, ledgerDirsManager); + + DbLedgerStorage dbStorage = new DbLedgerStorage(); + InterleavedLedgerStorage interleavedStorage = new InterleavedLedgerStorage(); + + CheckpointSource checkpointSource = new CheckpointSource() { + @Override + public Checkpoint newCheckpoint() { + return Checkpoint.MAX; + } + + @Override + public void checkpointComplete(Checkpoint checkpoint, boolean compact) {} + }; + Checkpointer checkpointer = new Checkpointer() { + @Override + public void startCheckpoint(CheckpointSource.Checkpoint checkpoint) { + // No-op + } + + @Override + public void start() { + // no-op + } + }; + + dbStorage.initialize(conf, null, ledgerDirsManager, indexDirsManager, + NullStatsLogger.INSTANCE, PooledByteBufAllocator.DEFAULT); + dbStorage.setCheckpointSource(checkpointSource); + dbStorage.setCheckpointer(checkpointer); + interleavedStorage.initialize(conf, null, ledgerDirsManager, indexDirsManager, + NullStatsLogger.INSTANCE, PooledByteBufAllocator.DEFAULT); + interleavedStorage.setCheckpointSource(checkpointSource); + interleavedStorage.setCheckpointer(checkpointer); + LedgerCache interleavedLedgerCache = interleavedStorage.getLedgerCache(); + + int convertedLedgers = 0; + for (long ledgerId : dbStorage.getActiveLedgersInRange(0, Long.MAX_VALUE)) { + if (LOG.isDebugEnabled()) { + LOG.debug("Converting ledger {}", ledgerIdFormatter.formatLedgerId(ledgerId)); + } + + interleavedStorage.setMasterKey(ledgerId, dbStorage.readMasterKey(ledgerId)); + if (dbStorage.isFenced(ledgerId)) { + interleavedStorage.setFenced(ledgerId); + } + + long lastEntryInLedger = dbStorage.getLastEntryInLedger(ledgerId); + for (long entryId = 0; entryId <= lastEntryInLedger; entryId++) { + try { + long location = dbStorage.getLocation(ledgerId, entryId); + if (location != 0L) { + interleavedLedgerCache.putEntryOffset(ledgerId, entryId, location); + } + } catch (Bookie.NoEntryException e) { + // Ignore entry + } + } + + if (++convertedLedgers % 1000 == 0) { + LOG.info("Converted {} ledgers", convertedLedgers); + } + } + + dbStorage.shutdown(); + + interleavedLedgerCache.flushLedger(true); + interleavedStorage.flush(); + interleavedStorage.shutdown(); + + String baseDir = ledgerDirsManager.getAllLedgerDirs().get(0).toString(); + + // Rename databases and keep backup + Files.move(FileSystems.getDefault().getPath(baseDir, "ledgers"), + FileSystems.getDefault().getPath(baseDir, "ledgers.backup")); + + Files.move(FileSystems.getDefault().getPath(baseDir, "locations"), + FileSystems.getDefault().getPath(baseDir, "locations.backup")); + + LOG.info("---- Done Converting {} ledgers ----", convertedLedgers); + return true; + } + + private void initLedgerIdFormatter(ServerConfiguration conf, CTISFlags flags) { + if (this.ledgerIdFormatter != null) { + return; + } + if (flags.ledgerIdFormatter.equals(NOT_INIT)) { + this.ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(conf); + } else { + this.ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(flags.ledgerIdFormatter, conf); + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/FlipBookieIdCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/FlipBookieIdCommand.java new file mode 100644 index 00000000000..67f1ec1c6c3 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/FlipBookieIdCommand.java @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookie; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import java.io.IOException; +import java.util.concurrent.TimeUnit; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.BookieShell; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeper; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.client.UpdateLedgerOp; +import org.apache.bookkeeper.common.util.MathUtils; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Command to update ledger command. + */ +public class FlipBookieIdCommand extends BookieCommand { + + static final Logger LOG = LoggerFactory.getLogger(FlipBookieIdCommand.class); + + private static final String NAME = "flip-bookie-id"; + private static final String DESC = "Update bookie id in ledgers (this may take a long time)."; + + public FlipBookieIdCommand() { + this(new FlipBookieIdFlags()); + } + + private FlipBookieIdCommand(FlipBookieIdFlags flags) { + super(CliSpec.newBuilder() + .withName(NAME) + .withDescription(DESC) + .withFlags(flags) + .build()); + } + + /** + * Flags for update ledger command. + */ + @Accessors(fluent = true) + @Setter + public static class FlipBookieIdFlags extends CliFlags { + + @Parameter(names = { "-host", "--hostname" }, + description = "Expects configuration useHostNameAsBookieID=true as the option value (default: ip address)") + private boolean hostname; + + @Parameter(names = { "-s", "--updatepersec" }, + description = "Number of ledgers updating per second (default: 5 per sec)") + private int updatePerSec = 5; + + @Parameter(names = { "-r", + "--maxOutstandingReads" }, description = "Max outstanding reads (default: 5 * updatespersec)") + private int maxOutstandingReads = updatePerSec * 5; + + @Parameter(names = {"-l", "--limit"}, + description = "Maximum number of ledgers of ledgers to update (default: no limit)") + private int limit = Integer.MIN_VALUE; + + @Parameter(names = { "-v", "--verbose" }, description = "Print status of the ledger updation (default: false)") + private boolean verbose; + + @Parameter(names = { "-p", "--printprogress" }, + description = "Print messages on every configured seconds if verbose turned on (default: 10 secs)") + private long printProgress = 10; + } + + @Override + public boolean apply(ServerConfiguration conf, FlipBookieIdFlags cmdFlags) { + try { + return updateLedger(conf, cmdFlags); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + private boolean updateLedger(ServerConfiguration conf, FlipBookieIdFlags flags) + throws InterruptedException, BKException, IOException { + + if (!conf.getUseHostNameAsBookieID() && flags.hostname) { + LOG.error("Expects configuration useHostNameAsBookieID=true as the option value"); + return false; + } else if (conf.getUseHostNameAsBookieID() && !flags.hostname) { + LOG.error("Expects configuration useHostNameAsBookieID=false as the option value'"); + return false; + } + + final int rate = flags.updatePerSec; + if (rate <= 0) { + LOG.error("Invalid updatespersec {}, should be > 0", rate); + return false; + } + + final int maxOutstandingReads = flags.maxOutstandingReads; + if (maxOutstandingReads <= 0) { + LOG.error("Invalid maxOutstandingReads {}, should be > 0", maxOutstandingReads); + return false; + } + + final int limit = flags.limit; + if (limit <= 0 && limit != Integer.MIN_VALUE) { + LOG.error("Invalid limit {}, should be > 0", limit); + return false; + } + + final long printProgress; + if (flags.verbose) { + printProgress = 10; + } else { + printProgress = flags.printProgress; + } + + final ClientConfiguration clientConfiguration = new ClientConfiguration(); + clientConfiguration.addConfiguration(conf); + final BookKeeper bk = new BookKeeper(clientConfiguration); + final BookKeeperAdmin admin = new BookKeeperAdmin(bk); + final UpdateLedgerOp updateLedgerOp = new UpdateLedgerOp(bk, admin); + final ServerConfiguration serverConfiguration = new ServerConfiguration(conf); + final BookieId newBookieId = BookieImpl.getBookieId(serverConfiguration); + serverConfiguration.setUseHostNameAsBookieID(!flags.hostname); + final BookieId oldBookieId = BookieImpl.getBookieId(serverConfiguration); + + BookieShell.UpdateLedgerNotifier progressable = new BookieShell.UpdateLedgerNotifier() { + long lastReport = System.nanoTime(); + + @Override + public void progress(long updated, long issued) { + if (printProgress <= 0) { + return; // disabled + } + if (TimeUnit.MILLISECONDS.toSeconds(MathUtils.elapsedMSec(lastReport)) >= printProgress) { + LOG.info("Number of ledgers issued={}, updated={}", issued, updated); + lastReport = MathUtils.nowInNano(); + } + } + }; + + try { + updateLedgerOp.updateBookieIdInLedgers(oldBookieId, newBookieId, rate, maxOutstandingReads, limit, + progressable); + } catch (IOException e) { + LOG.error("Failed to update ledger metadata", e); + return false; + } + + return true; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/FormatCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/FormatCommand.java new file mode 100644 index 00000000000..ecef51ed456 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/FormatCommand.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.tools.cli.commands.bookie; + +import static org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithRegistrationManager; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import java.util.concurrent.ExecutionException; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.Cookie; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.meta.exceptions.MetadataException; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.apache.bookkeeper.versioning.Versioned; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Command to format the current server contents. + */ +public class FormatCommand extends BookieCommand { + + static final Logger LOG = LoggerFactory.getLogger(FormatCommand.class); + + private static final String NAME = "format"; + private static final String DESC = "Format the current server contents."; + + public FormatCommand() { + this(new Flags()); + } + + public FormatCommand(Flags flags) { + super(CliSpec.newBuilder() + .withName(NAME) + .withDescription(DESC) + .withFlags(flags) + .build()); + } + + /** + * Flags for format bookie command. + */ + @Accessors(fluent = true) + @Setter + public static class Flags extends CliFlags { + + @Parameter(names = {"-n", "--noninteractive"}, + description = "Whether to confirm if old data exists?") + private boolean nonInteractive; + + @Parameter(names = {"-f", "--force"}, + description = "If [noninteractive] is specified, then whether" + + "to force delete the old data without prompt?") + private boolean force; + + @Parameter(names = {"-d", "--deletecookie"}, + description = "Delete its cookie on metadata store.") + private boolean deleteCookie; + + } + + @Override + public boolean apply(ServerConfiguration conf, Flags cmdFlags) { + + ServerConfiguration bfconf = new ServerConfiguration(conf); + boolean result = BookieImpl.format(bfconf, cmdFlags.nonInteractive, cmdFlags.force); + + // delete cookie + if (cmdFlags.deleteCookie) { + try { + runFunctionWithRegistrationManager(conf, rm -> { + + try { + Versioned cookie = Cookie.readFromRegistrationManager(rm, bfconf); + cookie.getValue().deleteFromRegistrationManager(rm, bfconf, cookie.getVersion()); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + + return null; + }); + } catch (MetadataException | ExecutionException e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + return result; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/FormatUtil.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/FormatUtil.java new file mode 100644 index 00000000000..62a229c2338 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/FormatUtil.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookie; + +import io.netty.buffer.ByteBuf; +import java.util.Formatter; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.util.EntryFormatter; +import org.apache.bookkeeper.util.LedgerIdFormatter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * .Provide to format message. + */ +public class FormatUtil { + + private static final Logger LOG = LoggerFactory.getLogger(FormatUtil.class); + + /** + * Format the message into a readable format. + * @param pos + * File offset of the message stored in entry log file + * @param recBuff + * Entry Data + * @param printMsg + * Whether printing the message body + * @param ledgerIdFormatter + * @param entryFormatter + */ + public static void formatEntry(long pos, ByteBuf recBuff, boolean printMsg, LedgerIdFormatter ledgerIdFormatter, + EntryFormatter entryFormatter) { + int entrySize = recBuff.readableBytes(); + long ledgerId = recBuff.readLong(); + long entryId = recBuff.readLong(); + + LOG.info("--------- Lid={}, Eid={}, ByteOffset={}, EntrySize={} ---------", + ledgerIdFormatter.formatLedgerId(ledgerId), entryId, pos, entrySize); + if (entryId == BookieImpl.METAENTRY_ID_LEDGER_KEY) { + int masterKeyLen = recBuff.readInt(); + byte[] masterKey = new byte[masterKeyLen]; + recBuff.readBytes(masterKey); + LOG.info("Type: META"); + LOG.info("MasterKey: {}", bytes2Hex(masterKey)); + LOG.info(""); + return; + } + if (entryId == BookieImpl.METAENTRY_ID_FENCE_KEY) { + LOG.info("Type: META"); + LOG.info("Fenced"); + LOG.info(""); + return; + } + // process a data entry + long lastAddConfirmed = recBuff.readLong(); + LOG.info("Type: DATA"); + LOG.info("LastConfirmed: {}", lastAddConfirmed); + if (!printMsg) { + LOG.info(""); + return; + } + // skip digest checking + recBuff.skipBytes(8); + LOG.info("Data:"); + LOG.info(""); + try { + byte[] ret = new byte[recBuff.readableBytes()]; + recBuff.readBytes(ret); + entryFormatter.formatEntry(ret); + } catch (Exception e) { + LOG.info("N/A. Corrupted."); + } + LOG.info(""); + } + + public static String bytes2Hex(byte[] data) { + StringBuilder sb = new StringBuilder(data.length * 2); + Formatter formatter = new Formatter(sb); + for (byte b : data) { + formatter.format("%02x", b); + } + formatter.close(); + return sb.toString(); + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/InitCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/InitCommand.java new file mode 100644 index 00000000000..bf052de0756 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/InitCommand.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.tools.cli.commands.bookie; + +import com.google.common.util.concurrent.UncheckedExecutionException; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; + +/** + * A command to initialize new bookie. + */ +public class InitCommand extends BookieCommand { + + private static final String NAME = "init"; + private static final String DESC = "Initialize new bookie."; + + public InitCommand() { + super(CliSpec.newBuilder() + .withName(NAME) + .withDescription(DESC) + .withFlags(new CliFlags()) + .build()); + } + + @Override + public boolean apply(ServerConfiguration conf, CliFlags cmdFlags) { + + boolean result = false; + try { + result = BookKeeperAdmin.initBookie(conf); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + return result; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/LastMarkCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/LastMarkCommand.java index 61c1f92657d..46fae40ae26 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/LastMarkCommand.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/LastMarkCommand.java @@ -18,8 +18,10 @@ */ package org.apache.bookkeeper.tools.cli.commands.bookie; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.util.concurrent.UncheckedExecutionException; import java.io.File; - +import java.io.IOException; import org.apache.bookkeeper.bookie.Journal; import org.apache.bookkeeper.bookie.LedgerDirsManager; import org.apache.bookkeeper.bookie.LogMark; @@ -28,6 +30,8 @@ import org.apache.bookkeeper.tools.framework.CliFlags; import org.apache.bookkeeper.tools.framework.CliSpec; import org.apache.bookkeeper.util.DiskChecker; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * A bookie command to print the last log marker. @@ -36,6 +40,7 @@ public class LastMarkCommand extends BookieCommand { private static final String NAME = "lastmark"; private static final String DESC = "Print last log marker"; + private static final Logger LOG = LoggerFactory.getLogger(LastMarkCommand.class); public LastMarkCommand() { super(CliSpec.newBuilder() @@ -45,20 +50,30 @@ public LastMarkCommand() { .build()); } + @VisibleForTesting + public static LastMarkCommand newLastMarkCommand(){ + return new LastMarkCommand(); + } + @Override public boolean apply(ServerConfiguration conf, CliFlags flags) { - LedgerDirsManager dirsManager = new LedgerDirsManager( - conf, conf.getJournalDirs(), - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - File[] journalDirs = conf.getJournalDirs(); + try { + LedgerDirsManager dirsManager = new LedgerDirsManager( + conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); + File[] journalDirs = conf.getJournalDirs(); - for (int idx = 0; idx < journalDirs.length; idx++) { - Journal journal = new Journal(idx, journalDirs[idx], conf, dirsManager); - LogMark lastLogMark = journal.getLastLogMark().getCurMark(); - System.out.println("LastLogMark : Journal Id - " + lastLogMark.getLogFileId() + "(" - + Long.toHexString(lastLogMark.getLogFileId()) + ".txn), Pos - " - + lastLogMark.getLogFileOffset()); + for (int idx = 0; idx < journalDirs.length; idx++) { + Journal journal = new Journal(idx, journalDirs[idx], conf, dirsManager); + LogMark lastLogMark = journal.getLastLogMark().getCurMark(); + LOG.info("LastLogMark : Journal Id - {}({}.txn), Pos - {}", + lastLogMark.getLogFileId(), + Long.toHexString(lastLogMark.getLogFileId()), + lastLogMark.getLogFileOffset()); + } + return true; + } catch (IOException e) { + throw new UncheckedExecutionException(e.getMessage(), e); } - return true; } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/LedgerCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/LedgerCommand.java new file mode 100644 index 00000000000..7434706b9fe --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/LedgerCommand.java @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookie; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import java.io.IOException; +import java.util.function.Consumer; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.InterleavedLedgerStorage; +import org.apache.bookkeeper.bookie.LedgerCache; +import org.apache.bookkeeper.bookie.LedgerEntryPage; +import org.apache.bookkeeper.bookie.SortedLedgerStorage; +import org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorage; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.apache.bookkeeper.util.LedgerIdFormatter; +import org.apache.commons.lang.mutable.MutableLong; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Command to dump ledger index entries into readable format. + */ +public class LedgerCommand extends BookieCommand { + + static final Logger LOG = LoggerFactory.getLogger(LedgerCommand.class); + + private static final String NAME = "ledger"; + private static final String DESC = "Dump ledger index entries into readable format"; + + private LedgerIdFormatter ledgerIdFormatter; + + private Consumer print = this::printInfoLine; + + public void setPrint(Consumer print) { + this.print = print; + } + + public LedgerCommand() { + this(new LedgerFlags()); + } + + public LedgerCommand(LedgerIdFormatter ledgerIdFormatter) { + this(new LedgerFlags()); + this.ledgerIdFormatter = ledgerIdFormatter; + } + + private LedgerCommand(LedgerFlags flags) { + super(CliSpec.newBuilder().withName(NAME).withDescription(DESC).withFlags(flags).build()); + } + + /** + * Flags for ledger command. + */ + @Accessors(fluent = true) + @Setter + public static class LedgerFlags extends CliFlags { + + @Parameter(names = { "-id", "--ledgerId" }, description = "Specific ledger id", required = true) + private long ledgerId; + + @Parameter(names = { "-m", "--meta" }, description = "Print meta information") + private boolean meta; + + @Parameter(names = { "-l", "--ledgeridformatter" }, description = "Set ledger id formatter") + private String ledgerIdFormatter = ""; + } + + @Override + public boolean apply(ServerConfiguration conf, LedgerFlags cmdFlags) { + initLedgerIdFormatter(conf, cmdFlags); + long ledgerId = cmdFlags.ledgerId; + if (conf.getLedgerStorageClass().equals(DbLedgerStorage.class.getName())) { + // dump ledger info + return dumpLedgerInfo(ledgerId, conf); + } else if (conf.getLedgerStorageClass().equals(SortedLedgerStorage.class.getName()) + || conf.getLedgerStorageClass().equals(InterleavedLedgerStorage.class.getName())) { + ServerConfiguration tConf = new ServerConfiguration(conf); + InterleavedLedgerStorage interleavedLedgerStorage = new InterleavedLedgerStorage(); + try { + BookieImpl.mountLedgerStorageOffline(tConf, interleavedLedgerStorage); + } catch (IOException e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + + if (cmdFlags.meta) { + // print meta + printMeta(ledgerId, interleavedLedgerStorage); + } + + try { + print.accept("===== LEDGER: " + ledgerIdFormatter.formatLedgerId(ledgerId) + " ====="); + for (LedgerCache.PageEntries page : interleavedLedgerStorage.getIndexEntries(ledgerId)) { + if (printPageEntries(page)) { + return true; + } + } + } catch (IOException e) { + LOG.error("Failed to read index page"); + return true; + } + + } + return true; + } + + private void initLedgerIdFormatter(ServerConfiguration conf, LedgerFlags flags) { + if (flags.ledgerIdFormatter.equals("")) { + this.ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(conf); + } else { + this.ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(flags.ledgerIdFormatter, conf); + } + } + + private boolean dumpLedgerInfo(long ledgerId, ServerConfiguration conf) { + try { + DbLedgerStorage.readLedgerIndexEntries(ledgerId, conf, (currentEntry, entryLogId, position) -> System.out + .println("entry " + currentEntry + "\t:\t(log: " + entryLogId + ", pos: " + position + ")")); + } catch (IOException e) { + System.err.printf("ERROR: initializing dbLedgerStorage %s", e.getMessage()); + return false; + } + return true; + } + + private void printMeta(long ledgerId, InterleavedLedgerStorage interleavedLedgerStorage) { + print.accept("===== LEDGER: " + ledgerIdFormatter.formatLedgerId(ledgerId) + " ====="); + try { + LedgerCache.LedgerIndexMetadata meta = interleavedLedgerStorage.readLedgerIndexMetadata(ledgerId); + print.accept("master key : " + meta.getMasterKeyHex()); + long size = meta.size; + if (size % 8 == 0) { + print.accept("size : " + size); + } else { + print.accept("size : " + size + "(not aligned with 8, may be corrupted or under flushing now)"); + } + + print.accept("entries : " + (size / 8)); + print.accept("isFenced : " + meta.fenced); + } catch (IOException e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + @SuppressFBWarnings("RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE") + private boolean printPageEntries(LedgerCache.PageEntries page) { + final MutableLong curEntry = new MutableLong(page.getFirstEntry()); + try (LedgerEntryPage lep = page.getLEP()) { + lep.getEntries((entry, offset) -> { + while (curEntry.longValue() < entry) { + print.accept("entry " + curEntry + "\t:\tN/A"); + curEntry.increment(); + } + long entryLogId = offset >> 32L; + long pos = offset & 0xffffffffL; + print.accept("entry " + curEntry + "\t:\t(log:" + entryLogId + ", pos: " + pos + ")"); + curEntry.increment(); + return true; + }); + } catch (Exception e) { + print.accept( + "Failed to read index page @ " + page.getFirstEntry() + ", the index file may be corrupted : " + e + .getMessage()); + return true; + } + + while (curEntry.longValue() < page.getLastEntry()) { + print.accept("entry " + curEntry + "\t:\tN/A"); + curEntry.increment(); + } + + return false; + } + + + private void printInfoLine(String mes) { + System.out.println(mes); + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ListActiveLedgersCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ListActiveLedgersCommand.java new file mode 100644 index 00000000000..88c29d52b55 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ListActiveLedgersCommand.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.tools.cli.commands.bookie; + +import static org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithLedgerManagerFactory; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.bookie.DefaultEntryLogger; +import org.apache.bookkeeper.bookie.EntryLogMetadata; +import org.apache.bookkeeper.bookie.ReadOnlyDefaultEntryLogger; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.exceptions.MetadataException; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks; +import org.apache.bookkeeper.tools.cli.commands.bookie.ListActiveLedgersCommand.ActiveLedgerFlags; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.apache.bookkeeper.util.LedgerIdFormatter; +import org.apache.zookeeper.AsyncCallback.VoidCallback; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +/** + * List active(exist in metadata storage) ledgers in a entry log file. + * + **/ +@SuppressFBWarnings("RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE") +public class ListActiveLedgersCommand extends BookieCommand{ + private static final Logger LOG = LoggerFactory.getLogger(ListActiveLedgersCommand.class); + private static final String NAME = "active ledger"; + private static final String DESC = "Retrieve bookie active ledger info."; + private static final long DEFAULT_TIME_OUT = 1000; + private static final long DEFAULT_LOG_ID = 0; + private static final String DEFAULT_LEDGER_ID_FORMATTER = ""; + private LedgerIdFormatter ledgerIdFormatter; + + public ListActiveLedgersCommand(){ + this(new ActiveLedgerFlags()); + } + public ListActiveLedgersCommand(LedgerIdFormatter ledgerIdFormatter){ + this(new ActiveLedgerFlags()); + this.ledgerIdFormatter = ledgerIdFormatter; + } + + public ListActiveLedgersCommand(ActiveLedgerFlags ledgerFlags){ + super(CliSpec.newBuilder(). + withName(NAME). + withDescription(DESC). + withFlags(ledgerFlags). + build()); + } + + /** + * Flags for active ledger command. + */ + @Accessors(fluent = true) + @Setter + public static class ActiveLedgerFlags extends CliFlags { + + @Parameter(names = { "-l", "--logid" }, description = "Entry log file id") + private long logId = DEFAULT_LOG_ID; + @Parameter(names = { "-t", "--timeout" }, description = "Read timeout(ms)") + private long timeout = DEFAULT_TIME_OUT; + @Parameter(names = { "-f", "--ledgerIdFormatter" }, description = "Ledger id formatter") + private String ledgerIdFormatter = DEFAULT_LEDGER_ID_FORMATTER; + } + @Override + public boolean apply(ServerConfiguration bkConf, ActiveLedgerFlags cmdFlags){ + initLedgerFormatter(bkConf, cmdFlags); + try { + handler(bkConf, cmdFlags); + } catch (MetadataException | ExecutionException e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + return true; + } + + private void initLedgerFormatter(ServerConfiguration conf, ActiveLedgerFlags cmdFlags) { + if (!cmdFlags.ledgerIdFormatter.equals(DEFAULT_LEDGER_ID_FORMATTER)) { + this.ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(cmdFlags.ledgerIdFormatter, conf); + } else if (ledgerIdFormatter == null){ + this.ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(conf); + } + } + + public void handler(ServerConfiguration bkConf, ActiveLedgerFlags cmdFlags) + throws ExecutionException, MetadataException { + runFunctionWithLedgerManagerFactory(bkConf, mFactory -> { + try (LedgerManager ledgerManager = mFactory.newLedgerManager()) { + Set activeLedgersOnMetadata = new HashSet(); + BookkeeperInternalCallbacks.Processor ledgerProcessor = (ledger, cb)->{ + activeLedgersOnMetadata.add(ledger); + cb.processResult(BKException.Code.OK, null, null); + }; + CountDownLatch done = new CountDownLatch(1); + AtomicInteger resultCode = new AtomicInteger(BKException.Code.OK); + VoidCallback endCallback = (rs, s, obj)->{ + resultCode.set(rs); + done.countDown(); + }; + ledgerManager.asyncProcessLedgers(ledgerProcessor, endCallback, null, + BKException.Code.OK, BKException.Code.ReadException); + if (done.await(cmdFlags.timeout, TimeUnit.MILLISECONDS)){ + if (resultCode.get() == BKException.Code.OK) { + DefaultEntryLogger entryLogger = new ReadOnlyDefaultEntryLogger(bkConf); + EntryLogMetadata entryLogMetadata = entryLogger.getEntryLogMetadata(cmdFlags.logId); + List ledgersOnEntryLog = entryLogMetadata.getLedgersMap().keys(); + if (ledgersOnEntryLog.size() == 0) { + LOG.info("Ledgers on log file {} is empty", cmdFlags.logId); + } + List activeLedgersOnEntryLog = new ArrayList(ledgersOnEntryLog.size()); + for (long ledger : ledgersOnEntryLog) { + if (activeLedgersOnMetadata.contains(ledger)) { + activeLedgersOnEntryLog.add(ledger); + } + } + printActiveLedgerOnEntryLog(cmdFlags.logId, activeLedgersOnEntryLog); + } else { + LOG.info("Read active ledgers id from metadata store,fail code {}", resultCode.get()); + throw BKException.create(resultCode.get()); + } + } else { + LOG.info("Read active ledgers id from metadata store timeout"); + } + } catch (BKException | InterruptedException | IOException e){ + LOG.error("Received Exception while processing ledgers", e); + throw new UncheckedExecutionException(e); + } + return null; + }); + } + + public void printActiveLedgerOnEntryLog(long logId, List activeLedgers){ + if (activeLedgers.size() == 0){ + LOG.info("No active ledgers on log file {}", logId); + } else { + LOG.info("Active ledgers on entry log {} as follow:", logId); + } + Collections.sort(activeLedgers); + for (long a : activeLedgers){ + LOG.info("{} ", ledgerIdFormatter.formatLedgerId(a)); + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ListFilesOnDiscCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ListFilesOnDiscCommand.java new file mode 100644 index 00000000000..07472b29055 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ListFilesOnDiscCommand.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookie; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import java.io.File; +import java.io.IOException; +import java.util.List; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.bookie.BookieShell; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Command to list the files in JournalDirectories/LedgerDirectories/IndexDirectories. + */ +public class ListFilesOnDiscCommand extends BookieCommand { + + private static final String NAME = "listfilesondisc"; + private static final String DESC = "List the files in JournalDirectories/LedgerDirectories/IndexDirectories."; + private static final Logger LOG = LoggerFactory.getLogger(ListFilesOnDiscCommand.class); + + public ListFilesOnDiscCommand() { + this(new LFODFlags()); + } + + public ListFilesOnDiscCommand(LFODFlags flags) { + super(CliSpec.newBuilder().withName(NAME).withDescription(DESC).withFlags(flags).build()); + } + + /** + * Flags for list files on disc command. + */ + @Accessors(fluent = true) + @Setter + public static class LFODFlags extends CliFlags { + @Parameter(names = {"-txn", "--journal"}, description = "Print list of Journal Files") + private boolean journal; + + @Parameter(names = {"-log", "--entrylog"}, description = "Print list of EntryLog Files") + private boolean entrylog; + + @Parameter(names = {"-idx", "--index"}, description = "Print list of Index Files") + private boolean index; + } + + @Override + public boolean apply(ServerConfiguration conf, LFODFlags cmdFlags) { + try { + return handler(conf, cmdFlags); + } catch (IOException e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + private boolean handler(ServerConfiguration conf, LFODFlags cmd) throws IOException { + if (cmd.journal) { + File[] journalDirs = conf.getJournalDirs(); + List journalFiles = BookieShell.listFilesAndSort(journalDirs, "txn"); + LOG.info("--------- Printing the list of Journal Files ---------"); + for (File journalFile : journalFiles) { + LOG.info("{}", journalFile.getCanonicalPath()); + } + LOG.info(""); + } + if (cmd.entrylog) { + File[] ledgerDirs = conf.getLedgerDirs(); + List ledgerFiles = BookieShell.listFilesAndSort(ledgerDirs, "log"); + LOG.info("--------- Printing the list of EntryLog/Ledger Files ---------"); + for (File ledgerFile : ledgerFiles) { + LOG.info("{}", ledgerFile.getCanonicalPath()); + } + LOG.info(""); + } + if (cmd.index) { + File[] indexDirs = (conf.getIndexDirs() == null) ? conf.getLedgerDirs() : conf.getIndexDirs(); + List indexFiles = BookieShell.listFilesAndSort(indexDirs, "idx"); + LOG.info("--------- Printing the list of Index Files ---------"); + for (File indexFile : indexFiles) { + LOG.info("{}", indexFile.getCanonicalPath()); + } + } + return true; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ListLedgersCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ListLedgersCommand.java new file mode 100644 index 00000000000..462c5abcbe7 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ListLedgersCommand.java @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookie; + +import static org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithLedgerManagerFactory; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import java.net.UnknownHostException; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.AtomicInteger; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.exceptions.MetadataException; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks; +import org.apache.bookkeeper.tools.cli.commands.bookie.ListLedgersCommand.ListLedgersFlags; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.apache.bookkeeper.util.LedgerIdFormatter; +import org.apache.commons.lang.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Command for list all ledgers on the cluster. + */ +@SuppressFBWarnings("RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE") +public class ListLedgersCommand extends BookieCommand { + + private static final Logger LOG = LoggerFactory.getLogger(ListLedgersCommand.class); + + private static final String NAME = "listledgers"; + private static final String DESC = "List all ledgers on the cluster (this may take a long time)."; + private static final String DEFAULT = ""; + + private LedgerIdFormatter ledgerIdFormatter; + + public ListLedgersCommand() { + this(new ListLedgersFlags()); + } + + public ListLedgersCommand(LedgerIdFormatter ledgerIdFormatter) { + this(new ListLedgersFlags()); + this.ledgerIdFormatter = ledgerIdFormatter; + } + + public ListLedgersCommand(ListLedgersFlags flags) { + super(CliSpec.newBuilder() + .withName(NAME) + .withDescription(DESC) + .withFlags(flags) + .build()); + } + + /** + * Flags for ListLedgers command. + */ + @Accessors(fluent = true) + @Setter + public static class ListLedgersFlags extends CliFlags{ + + @Parameter(names = {"-m", "--meta"}, description = "Print metadata") + private boolean meta; + + @Parameter(names = { "-id", "--bookieid" }, description = "List ledgers residing in this bookie") + private String bookieId; + + @Parameter(names = { "-l", "--ledgerIdFormatter" }, description = "Set ledger id formatter") + private String ledgerIdFormatter = DEFAULT; + } + + @Override + public boolean apply(ServerConfiguration conf, ListLedgersFlags cmdFlags) { + initLedgerFrommat(conf, cmdFlags); + try { + handler(conf, cmdFlags); + } catch (UnknownHostException e) { + LOG.error("Bookie id error"); + return false; + } catch (MetadataException | ExecutionException e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + + return true; + } + + private void initLedgerFrommat(ServerConfiguration conf, ListLedgersFlags cmdFlags) { + if (ledgerIdFormatter != null) { + return; + } + if (!cmdFlags.ledgerIdFormatter.equals(DEFAULT)) { + this.ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(cmdFlags.ledgerIdFormatter, conf); + } else { + this.ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(conf); + } + } + + public boolean handler(ServerConfiguration conf, ListLedgersFlags flags) + throws UnknownHostException, MetadataException, ExecutionException { + + final BookieId bookieAddress = StringUtils.isBlank(flags.bookieId) ? null : + BookieId.parse(flags.bookieId); + + runFunctionWithLedgerManagerFactory(conf, mFactory -> { + try (LedgerManager ledgerManager = mFactory.newLedgerManager()) { + + final AtomicInteger returnCode = new AtomicInteger(BKException.Code.OK); + final CountDownLatch processDone = new CountDownLatch(1); + + BookkeeperInternalCallbacks.Processor ledgerProcessor = (ledgerId, cb) -> { + if (!flags.meta && (bookieAddress == null)) { + printLedgerMetadata(ledgerId, null, false); + cb.processResult(BKException.Code.OK, null, null); + } else { + ledgerManager.readLedgerMetadata(ledgerId).whenComplete((metadata, exception) -> { + if (exception == null) { + if ((bookieAddress == null) + || BookKeeperAdmin.areEntriesOfLedgerStoredInTheBookie + (ledgerId, bookieAddress, metadata.getValue())) { + /* + * the print method has to be in + * synchronized scope, otherwise + * output of printLedgerMetadata + * could interleave since this + * callback for different + * ledgers can happen in + * different threads. + */ + synchronized (ListLedgersCommand.this) { + printLedgerMetadata(ledgerId, metadata.getValue(), flags.meta); + } + } + cb.processResult(BKException.Code.OK, null, null); + } else if (BKException.getExceptionCode(exception) + == BKException.Code.NoSuchLedgerExistsException + || BKException.getExceptionCode(exception) + == BKException.Code.NoSuchLedgerExistsOnMetadataServerException) { + cb.processResult(BKException.Code.OK, null, null); + } else { + LOG.error("Unable to read the ledger: {} information", ledgerId); + cb.processResult(BKException.getExceptionCode(exception), null, null); + } + }); + } + }; + + ledgerManager.asyncProcessLedgers(ledgerProcessor, (rc, s, obj) -> { + returnCode.set(rc); + processDone.countDown(); + }, null, BKException.Code.OK, BKException.Code.ReadException); + + processDone.await(); + if (returnCode.get() != BKException.Code.OK) { + LOG.error("Received error return value while processing ledgers: {}", returnCode.get()); + throw BKException.create(returnCode.get()); + } + + } catch (Exception ioe) { + LOG.error("Received Exception while processing ledgers", ioe); + throw new UncheckedExecutionException(ioe); + } + return null; + }); + + return true; + } + + private void printLedgerMetadata(long ledgerId, LedgerMetadata md, boolean printMeta) { + LOG.info("ledgerID: {}", ledgerIdFormatter.formatLedgerId(ledgerId)); + if (printMeta) { + LOG.info("{}", md.toString()); + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/LocalConsistencyCheckCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/LocalConsistencyCheckCommand.java new file mode 100644 index 00000000000..181d1e661bd --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/LocalConsistencyCheckCommand.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookie; + +import com.google.common.util.concurrent.UncheckedExecutionException; +import java.io.IOException; +import java.util.List; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.LedgerStorage; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Command to check local storage for inconsistencies. + */ +public class LocalConsistencyCheckCommand extends BookieCommand { + + static final Logger LOG = LoggerFactory.getLogger(LocalConsistencyCheckCommand.class); + + private static final String NAME = "localconsistencycheck"; + private static final String DESC = "Validate Ledger Storage internal metadata"; + + public LocalConsistencyCheckCommand() { + super(CliSpec.newBuilder() + .withName(NAME) + .withDescription(DESC) + .withFlags(new CliFlags()) + .build()); + } + + @Override + public boolean apply(ServerConfiguration conf, CliFlags cmdFlags) { + try { + return check(conf); + } catch (IOException e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + private boolean check(ServerConfiguration conf) throws IOException { + LOG.info("=== Performing local consistency check ==="); + ServerConfiguration serverConfiguration = new ServerConfiguration(conf); + LedgerStorage ledgerStorage = BookieImpl.mountLedgerStorageOffline(serverConfiguration, null); + List errors = ledgerStorage.localConsistencyCheck( + java.util.Optional.empty()); + if (errors.size() > 0) { + LOG.info("=== Check returned errors: ==="); + for (LedgerStorage.DetectedInconsistency error : errors) { + LOG.error("Ledger {}, entry {}: ", error.getLedgerId(), error.getEntryId(), error.getException()); + } + return false; + } else { + LOG.info("=== Check passed ==="); + return true; + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ReadJournalCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ReadJournalCommand.java new file mode 100644 index 00000000000..ab985070ff9 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ReadJournalCommand.java @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookie; + +import com.beust.jcommander.Parameter; +import com.google.common.collect.Lists; +import com.google.common.util.concurrent.UncheckedExecutionException; +import io.netty.buffer.Unpooled; +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.List; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.bookie.Journal; +import org.apache.bookkeeper.bookie.LedgerDirsManager; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.apache.bookkeeper.util.BookKeeperConstants; +import org.apache.bookkeeper.util.DiskChecker; +import org.apache.bookkeeper.util.EntryFormatter; +import org.apache.bookkeeper.util.LedgerIdFormatter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Command to scan a journal file and format the entries into readable format. + */ +public class ReadJournalCommand extends BookieCommand { + + private static final String NAME = "readjournal"; + private static final String DESC = "Scan a journal file and format the entries into readable format."; + private static final long DEFAULT_JOURNALID = -1L; + private static final String DEFAULT = ""; + private LedgerIdFormatter ledgerIdFormatter; + private EntryFormatter entryFormatter; + private static final Logger LOG = LoggerFactory.getLogger(ReadJournalCommand.class); + + List journals = null; + + public ReadJournalCommand() { + this(new ReadJournalFlags()); + } + + public ReadJournalCommand(LedgerIdFormatter idFormatter, EntryFormatter entryFormatter) { + this(new ReadJournalFlags()); + this.ledgerIdFormatter = idFormatter; + this.entryFormatter = entryFormatter; + } + + ReadJournalCommand(ReadJournalFlags flags) { + super(CliSpec.newBuilder().withName(NAME).withDescription(DESC).withFlags(flags).build()); + } + + /** + * Flag for read journal command. + */ + @Accessors(fluent = true) + @Setter + public static class ReadJournalFlags extends CliFlags { + + @Parameter(names = {"-m", "--msg"}, description = "Print message body") + private boolean msg; + + @Parameter(names = { "-d", "--dir" }, description = "Journal directory (needed if more than one journal " + + "configured)") + private String dir = DEFAULT; + + @Parameter(names = {"-id", "--journalid"}, description = "Journal Id") + private long journalId = DEFAULT_JOURNALID; + + @Parameter(names = {"-f", "--filename"}, description = "Journal file name") + private String fileName = DEFAULT; + + @Parameter(names = {"-l", "--ledgerIdFormatter"}, description = "Set ledger id formatter") + private String ledgerIdFormatter = DEFAULT; + + @Parameter(names = {"-e", "--entryformatter"}, description = "set entry formatter") + private String entryFormatter = DEFAULT; + + } + + @Override + public boolean apply(ServerConfiguration conf, ReadJournalFlags cmdFlags) { + initTools(conf, cmdFlags); + if (!checkArgs(cmdFlags)) { + return false; + } + try { + return handler(conf, cmdFlags); + } catch (IOException e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + private void initTools(ServerConfiguration conf, ReadJournalFlags flags) { + if (!flags.ledgerIdFormatter.equals(DEFAULT)) { + ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(flags.ledgerIdFormatter, conf); + } else { + ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(conf); + } + + if (!flags.entryFormatter.equals(DEFAULT)) { + entryFormatter = EntryFormatter.newEntryFormatter(flags.entryFormatter, conf); + } else { + entryFormatter = EntryFormatter.newEntryFormatter(conf); + } + } + private boolean handler(ServerConfiguration conf, ReadJournalFlags cmd) throws IOException { + Journal journal = null; + if (getJournals(conf).size() > 1) { + if (cmd.dir.equals(DEFAULT)) { + LOG.error("ERROR: invalid or missing journal directory"); + usage(); + return false; + } + File journalDirectory = new File(cmd.dir); + for (Journal j : getJournals(conf)) { + if (j.getJournalDirectory().equals(journalDirectory)) { + journal = j; + break; + } + } + + if (journal == null) { + LOG.error("ERROR: journal directory not found"); + usage(); + return false; + } + } else { + journal = getJournals(conf).get(0); + } + + long journalId = cmd.journalId; + if (cmd.journalId == DEFAULT_JOURNALID && !cmd.fileName.equals(DEFAULT)) { + File f = new File(cmd.fileName); + String name = f.getName(); + if (!name.endsWith(".txn")) { + LOG.error("ERROR: invalid journal file name {}", cmd.fileName); + usage(); + return false; + } + String idString = name.split("\\.")[0]; + journalId = Long.parseLong(idString, 16); + } + scanJournal(journal, journalId, cmd.msg); + return true; + } + + private boolean checkArgs(ReadJournalFlags flags) { + if ((flags.fileName.equals(DEFAULT) && flags.journalId == DEFAULT_JOURNALID)) { + LOG.info("ERROR: You should figure jounalId or journal filename"); + return false; + } + + return true; + } + + private synchronized List getJournals(ServerConfiguration conf) throws IOException { + if (null == journals) { + journals = Lists.newArrayListWithCapacity(conf.getJournalDirs().length); + int idx = 0; + for (File journalDir : conf.getJournalDirs()) { + journals.add(new Journal(idx++, new File(journalDir, BookKeeperConstants.CURRENT_DIR), conf, + new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())))); + } + } + return journals; + } + + /** + * Scan a journal file. + * + * @param journalId Journal File Id + * @param printMsg Whether printing the entry data. + */ + private void scanJournal(Journal journal, long journalId, final boolean printMsg) throws IOException { + LOG.info("Scan journal {} ({}.txn)", journalId, Long.toHexString(journalId)); + scanJournal(journal, journalId, new Journal.JournalScanner() { + boolean printJournalVersion = false; + + @Override + public void process(int journalVersion, long offset, ByteBuffer entry) throws IOException { + if (!printJournalVersion) { + LOG.info("Journal Version : {}", journalVersion); + printJournalVersion = true; + } + FormatUtil + .formatEntry(offset, Unpooled.wrappedBuffer(entry), printMsg, ledgerIdFormatter, entryFormatter); + } + }); + } + + private void scanJournal(Journal journal, long journalId, Journal.JournalScanner scanner) throws IOException { + journal.scanJournal(journalId, 0L, scanner, false); + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ReadLedgerCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ReadLedgerCommand.java new file mode 100644 index 00000000000..e07dc9d9d9e --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ReadLedgerCommand.java @@ -0,0 +1,248 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookie; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import io.netty.buffer.ByteBufUtil; +import io.netty.buffer.UnpooledByteBufAllocator; +import io.netty.channel.EventLoopGroup; +import io.netty.channel.nio.NioEventLoopGroup; +import io.netty.util.concurrent.DefaultThreadFactory; +import java.io.IOException; +import java.util.Iterator; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.stream.LongStream; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.client.LedgerEntry; +import org.apache.bookkeeper.client.LedgerHandle; +import org.apache.bookkeeper.common.util.OrderedExecutor; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.BookieClient; +import org.apache.bookkeeper.proto.BookieClientImpl; +import org.apache.bookkeeper.proto.BookieProtocol; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.apache.bookkeeper.util.EntryFormatter; +import org.apache.bookkeeper.util.LedgerIdFormatter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Command to read ledger entries. + */ +public class ReadLedgerCommand extends BookieCommand { + + private static final Logger LOG = LoggerFactory.getLogger(ReadLedgerCommand.class); + + private static final String NAME = "readledger"; + private static final String DESC = "Read a range of entries from a ledger."; + + EntryFormatter entryFormatter; + LedgerIdFormatter ledgerIdFormatter; + + public ReadLedgerCommand() { + this(new ReadLedgerFlags()); + } + + public ReadLedgerCommand(EntryFormatter entryFormatter, LedgerIdFormatter ledgerIdFormatter) { + this(new ReadLedgerFlags()); + this.ledgerIdFormatter = ledgerIdFormatter; + this.entryFormatter = entryFormatter; + } + + private ReadLedgerCommand(ReadLedgerFlags flags) { + super(CliSpec.newBuilder() + .withName(NAME) + .withDescription(DESC) + .withFlags(flags) + .build()); + } + + /** + * Flags for read ledger command. + */ + @Accessors(fluent = true) + @Setter + public static class ReadLedgerFlags extends CliFlags { + + @Parameter(names = { "-m", "--msg" }, description = "Print message body") + private boolean msg; + + @Parameter(names = { "-l", "--ledgerid" }, description = "Ledger ID") + private long ledgerId = -1; + + @Parameter(names = { "-fe", "--firstentryid" }, description = "First Entry ID") + private long firstEntryId = -1; + + @Parameter(names = { "-le", "--lastentryid" }, description = "Last Entry ID") + private long lastEntryId = -1; + + @Parameter(names = { "-r", "--force-recovery" }, + description = "Ensure the ledger is properly closed before reading") + private boolean forceRecovery; + + @Parameter(names = { "-b", "--bookie" }, description = "Only read from a specific bookie") + private String bookieAddress; + + @Parameter(names = { "-lf", "--ledgeridformatter" }, description = "Set ledger id formatter") + private String ledgerIdFormatter; + + @Parameter(names = { "-ef", "--entryformatter" }, description = "Set entry formatter") + private String entryFormatter; + } + + @Override + public boolean apply(ServerConfiguration conf, ReadLedgerFlags cmdFlags) { + if (cmdFlags.ledgerIdFormatter != null && ledgerIdFormatter == null) { + this.ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(cmdFlags.ledgerIdFormatter, conf); + } else if (ledgerIdFormatter == null) { + this.ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(conf); + } + + if (cmdFlags.entryFormatter != null && entryFormatter == null) { + this.entryFormatter = EntryFormatter.newEntryFormatter(cmdFlags.entryFormatter, conf); + } else if (entryFormatter == null) { + this.entryFormatter = EntryFormatter.newEntryFormatter(conf); + } + + try { + return readledger(conf, cmdFlags); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + @SuppressFBWarnings("RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE") + private boolean readledger(ServerConfiguration serverConf, ReadLedgerFlags flags) + throws InterruptedException, BKException, IOException { + + long lastEntry = flags.lastEntryId; + + final BookieId bookie; + if (flags.bookieAddress != null) { + // A particular bookie was specified + bookie = BookieId.parse(flags.bookieAddress); + } else { + bookie = null; + } + + ClientConfiguration conf = new ClientConfiguration(); + conf.addConfiguration(serverConf); + + try (BookKeeperAdmin bk = new BookKeeperAdmin(conf)) { + if (flags.forceRecovery) { + // Force the opening of the ledger to trigger recovery + try (LedgerHandle lh = bk.openLedger(flags.ledgerId)) { + if (lastEntry == -1 || lastEntry > lh.getLastAddConfirmed()) { + lastEntry = lh.getLastAddConfirmed(); + } + } + } + + if (bookie == null) { + // No bookie was specified, use normal bk client + Iterator entries = bk.readEntries(flags.ledgerId, flags.firstEntryId, lastEntry) + .iterator(); + while (entries.hasNext()) { + LedgerEntry entry = entries.next(); + formatEntry(entry, flags.msg); + } + } else { + // Use BookieClient to target a specific bookie + EventLoopGroup eventLoopGroup = new NioEventLoopGroup(); + OrderedExecutor executor = OrderedExecutor.newBuilder() + .numThreads(1) + .name("BookieClientScheduler") + .build(); + + ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor( + new DefaultThreadFactory("BookKeeperClientSchedulerPool")); + + BookieClient bookieClient = new BookieClientImpl(conf, eventLoopGroup, UnpooledByteBufAllocator.DEFAULT, + executor, scheduler, NullStatsLogger.INSTANCE, + bk.getBookieAddressResolver()); + + LongStream.range(flags.firstEntryId, lastEntry).forEach(entryId -> { + CompletableFuture future = new CompletableFuture<>(); + + bookieClient.readEntry(bookie, flags.ledgerId, entryId, + (rc, ledgerId1, entryId1, buffer, ctx) -> { + if (rc != BKException.Code.OK) { + LOG.error("Failed to read entry {} -- {}", entryId1, + BKException.getMessage(rc)); + future.completeExceptionally(BKException.create(rc)); + return; + } + + LOG.info("--------- Lid={}, Eid={} ---------", + ledgerIdFormatter.formatLedgerId(flags.ledgerId), entryId); + if (flags.msg) { + LOG.info("Data: " + ByteBufUtil.prettyHexDump(buffer)); + } + + future.complete(null); + }, null, BookieProtocol.FLAG_NONE); + + try { + future.get(); + } catch (Exception e) { + LOG.error("Error future.get while reading entries from ledger {}", flags.ledgerId, e); + } + }); + + eventLoopGroup.shutdownGracefully(); + executor.shutdown(); + bookieClient.close(); + } + } + return true; + } + + /** + * Format the entry into a readable format. + * + * @param entry + * ledgerentry to print + * @param printMsg + * Whether printing the message body + */ + private void formatEntry(LedgerEntry entry, boolean printMsg) { + long ledgerId = entry.getLedgerId(); + long entryId = entry.getEntryId(); + long entrySize = entry.getLength(); + + LOG.info("--------- Lid={}, Eid={}, EntrySize={} ---------", + ledgerIdFormatter.formatLedgerId(ledgerId), entryId, entrySize); + + if (printMsg) { + entryFormatter.formatEntry(entry.getEntry()); + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ReadLogCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ReadLogCommand.java new file mode 100644 index 00000000000..edef6609ff2 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ReadLogCommand.java @@ -0,0 +1,297 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookie; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import io.netty.buffer.ByteBuf; +import java.io.File; +import java.io.IOException; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.bookie.ReadOnlyDefaultEntryLogger; +import org.apache.bookkeeper.bookie.storage.EntryLogScanner; +import org.apache.bookkeeper.bookie.storage.EntryLogger; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.apache.bookkeeper.util.EntryFormatter; +import org.apache.bookkeeper.util.LedgerIdFormatter; +import org.apache.commons.lang.mutable.MutableBoolean; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Command to read entry log files. + */ +public class ReadLogCommand extends BookieCommand { + + private static final String NAME = "readlog"; + private static final String DESC = "Scan an entry file and format the entries into readable format."; + private static final Logger LOG = LoggerFactory.getLogger(ReadLogCommand.class); + + private EntryLogger entryLogger; + private EntryFormatter entryFormatter; + private LedgerIdFormatter ledgerIdFormatter; + + public ReadLogCommand() { + this(new ReadLogFlags()); + } + + public ReadLogCommand(LedgerIdFormatter ledgerIdFormatter, EntryFormatter entryFormatter) { + this(new ReadLogFlags()); + this.ledgerIdFormatter = ledgerIdFormatter; + this.entryFormatter = entryFormatter; + } + private ReadLogCommand(ReadLogFlags flags) { + super(CliSpec.newBuilder().withName(NAME).withDescription(DESC).withFlags(flags).build()); + } + + /** + * Flags for read log command. + */ + @Accessors(fluent = true) + @Setter + public static class ReadLogFlags extends CliFlags { + + @Parameter(names = { "-m", "msg" }, description = "Print message body") + private boolean msg; + + @Parameter(names = { "-l", "--ledgerid" }, description = "Ledger ID") + private long ledgerId = -1; + + @Parameter(names = { "-e", "--entryid" }, description = "Entry ID") + private long entryId = -1; + + @Parameter(names = { "-sp", "--startpos" }, description = "Start Position") + private long startPos = -1; + + @Parameter(names = { "-ep", "--endpos" }, description = "End Position") + private long endPos = -1; + + @Parameter(names = { "-f", "--filename" }, description = "Entry log filename") + private String filename; + + @Parameter(names = { "-li", "--entrylogid" }, description = "Entry log id") + private long entryLogId = -1; + + @Parameter(names = {"-lf", "--ledgerIdFormatter"}, description = "Set ledger id formatter") + private String ledgerIdFormatter; + + @Parameter(names = {"-ef", "--entryformatter"}, description = "set entry formatter") + private String entryFormatter; + } + + @Override + public boolean apply(ServerConfiguration conf, ReadLogFlags cmdFlags) { + + if (cmdFlags.ledgerIdFormatter != null && this.ledgerIdFormatter == null) { + this.ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(cmdFlags.ledgerIdFormatter, conf); + } else if (this.ledgerIdFormatter == null) { + this.ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(conf); + } + + if (cmdFlags.entryFormatter != null && this.entryFormatter == null) { + this.entryFormatter = EntryFormatter.newEntryFormatter(cmdFlags.entryFormatter, conf); + } else if (this.entryFormatter == null) { + this.entryFormatter = EntryFormatter.newEntryFormatter(conf); + } + + if (cmdFlags.entryLogId == -1 && cmdFlags.filename == null) { + LOG.error("Missing entry log id or entry log file name"); + usage(); + return false; + } + try { + return readLog(conf, cmdFlags); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + private boolean readLog(ServerConfiguration conf, ReadLogFlags flags) throws Exception { + long logId = flags.entryLogId; + if (logId == -1 && flags.filename != null) { + File f = new File(flags.filename); + String name = f.getName(); + if (!name.endsWith(".log")) { + LOG.error("Invalid entry log file name {}", flags.filename); + usage(); + return false; + } + String idString = name.split("\\.")[0]; + logId = Long.parseLong(idString, 16); + } + + final long lId = flags.ledgerId; + final long eId = flags.entryId; + final long startpos = flags.startPos; + final long endpos = flags.endPos; + + // scan entry log + if (startpos != -1) { + if ((endpos != -1) && (endpos < startpos)) { + LOG.error("ERROR: StartPosition of the range should be lesser than or equal to EndPosition"); + return false; + } + scanEntryLogForPositionRange(conf, logId, startpos, endpos, flags.msg); + } else if (lId != -1) { + scanEntryLogForSpecificEntry(conf, logId, lId, eId, flags.msg); + } else { + scanEntryLog(conf, logId, flags.msg); + } + return true; + } + + /** + * Scan over an entry log file for entries in the given position range. + * + * @param logId Entry Log File id. + * @param rangeStartPos Start position of the entry we are looking for + * @param rangeEndPos End position of the entry we are looking for (-1 for till the end of the entrylog) + * @param printMsg Whether printing the entry data. + * @throws Exception + */ + private void scanEntryLogForPositionRange(ServerConfiguration conf, long logId, final long rangeStartPos, + final long rangeEndPos, + final boolean printMsg) throws Exception { + LOG.info("Scan entry log {} ({}.log) for PositionRange: {} - {}", + logId, Long.toHexString(logId), rangeStartPos, rangeEndPos); + final MutableBoolean entryFound = new MutableBoolean(false); + scanEntryLog(conf, logId, new EntryLogScanner() { + private MutableBoolean stopScanning = new MutableBoolean(false); + + @Override + public boolean accept(long ledgerId) { + return !stopScanning.booleanValue(); + } + + @Override + public void process(long ledgerId, long entryStartPos, ByteBuf entry) throws IOException { + if (!stopScanning.booleanValue()) { + if ((rangeEndPos != -1) && (entryStartPos > rangeEndPos)) { + stopScanning.setValue(true); + } else { + int entrySize = entry.readableBytes(); + /** + * entrySize of an entry (inclusive of payload and + * header) value is stored as int value in log file, but + * it is not counted in the entrySize, hence for calculating + * the end position of the entry we need to add additional + * 4 (intsize of entrySize). Please check + * EntryLogger.scanEntryLog. + */ + long entryEndPos = entryStartPos + entrySize + 4 - 1; + if (((rangeEndPos == -1) || (entryStartPos <= rangeEndPos)) && (rangeStartPos <= entryEndPos)) { + FormatUtil.formatEntry(entryStartPos, entry, printMsg, ledgerIdFormatter, entryFormatter); + entryFound.setValue(true); + } + } + } + } + }); + if (!entryFound.booleanValue()) { + LOG.info("Entry log {} ({}.log) doesn't has any entry in the range {} - {}. " + + "Probably the position range, you have provided is lesser than the LOGFILE_HEADER_SIZE (1024) " + + "or greater than the current log filesize.", + logId, Long.toHexString(logId), rangeStartPos, rangeEndPos); + } + } + + /** + * Scan over entry log. + * + * @param logId Entry Log Id + * @param scanner Entry Log Scanner + */ + private void scanEntryLog(ServerConfiguration conf, long logId, EntryLogScanner scanner) + throws IOException { + initEntryLogger(conf); + entryLogger.scanEntryLog(logId, scanner); + } + + private synchronized void initEntryLogger(ServerConfiguration conf) throws IOException { + if (null == entryLogger) { + // provide read only entry logger + entryLogger = new ReadOnlyDefaultEntryLogger(conf); + } + } + + /** + * Scan over an entry log file for a particular entry. + * + * @param logId Entry Log File id. + * @param ledgerId id of the ledger + * @param entryId entryId of the ledger we are looking for (-1 for all of the entries of the ledger) + * @param printMsg Whether printing the entry data. + * @throws Exception + */ + private void scanEntryLogForSpecificEntry(ServerConfiguration conf, long logId, final long ledgerId, + final long entryId, + final boolean printMsg) throws Exception { + LOG.info("Scan entry log {} ({}.log) for LedgerId {} {}", logId, Long.toHexString(logId), ledgerId, + ((entryId == -1) ? "" : " for EntryId " + entryId)); + final MutableBoolean entryFound = new MutableBoolean(false); + scanEntryLog(conf, logId, new EntryLogScanner() { + @Override + public boolean accept(long candidateLedgerId) { + return ((candidateLedgerId == ledgerId) && ((!entryFound.booleanValue()) || (entryId == -1))); + } + + @Override + public void process(long candidateLedgerId, long startPos, ByteBuf entry) { + long entrysLedgerId = entry.getLong(entry.readerIndex()); + long entrysEntryId = entry.getLong(entry.readerIndex() + 8); + if ((candidateLedgerId == entrysLedgerId) && (candidateLedgerId == ledgerId) + && ((entrysEntryId == entryId) || (entryId == -1))) { + entryFound.setValue(true); + FormatUtil.formatEntry(startPos, entry, printMsg, ledgerIdFormatter, entryFormatter); + } + } + }); + if (!entryFound.booleanValue()) { + LOG.info("LedgerId {} {} is not available in the entry log {} ({}.log)", + ledgerId, ((entryId == -1) ? "" : " EntryId " + entryId), logId, Long.toHexString(logId)); + } + } + + /** + * Scan over an entry log file. + * + * @param logId + * Entry Log File id. + * @param printMsg + * Whether printing the entry data. + */ + private void scanEntryLog(ServerConfiguration conf, long logId, final boolean printMsg) throws Exception { + LOG.info("Scan entry log {} ({}.log)", logId, Long.toHexString(logId)); + scanEntryLog(conf, logId, new EntryLogScanner() { + @Override + public boolean accept(long ledgerId) { + return true; + } + + @Override + public void process(long ledgerId, long startPos, ByteBuf entry) { + FormatUtil.formatEntry(startPos, entry, printMsg, ledgerIdFormatter, entryFormatter); + } + }); + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ReadLogMetadataCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ReadLogMetadataCommand.java new file mode 100644 index 00000000000..b3a88af7e05 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/ReadLogMetadataCommand.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookie; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import java.io.File; +import java.io.IOException; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.bookie.EntryLogMetadata; +import org.apache.bookkeeper.bookie.ReadOnlyDefaultEntryLogger; +import org.apache.bookkeeper.bookie.storage.EntryLogger; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.tools.cli.commands.bookie.ReadLogMetadataCommand.ReadLogMetadataFlags; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.apache.bookkeeper.util.LedgerIdFormatter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Command to print metadata of entry log. + */ +public class ReadLogMetadataCommand extends BookieCommand { + + static final Logger LOG = LoggerFactory.getLogger(ReadLogMetadataCommand.class); + + private static final String NAME = "readlogmetadata"; + private static final String DESC = "Prints entrylog's metadata"; + + private static final long DEFAULT_LOGID = -1L; + private static final String DEFAULT_FILENAME = ""; + private static final String DEFAULT = ""; + + private LedgerIdFormatter ledgerIdFormatter; + + EntryLogger entryLogger = null; + + public ReadLogMetadataCommand() { + this(new ReadLogMetadataFlags()); + } + + public ReadLogMetadataCommand(LedgerIdFormatter ledgerIdFormatter) { + this(new ReadLogMetadataFlags()); + this.ledgerIdFormatter = ledgerIdFormatter; + } + + private ReadLogMetadataCommand(ReadLogMetadataFlags flags) { + super(CliSpec.newBuilder() + .withName(NAME) + .withDescription(DESC) + .withFlags(flags) + .build()); + } + + /** + * Flags for read log metadata command. + */ + @Accessors(fluent = true) + @Setter + public static class ReadLogMetadataFlags extends CliFlags { + + @Parameter(names = { "-l", "--logid" }, description = "Entry log id") + private long logId; + + @Parameter(names = { "-f", "--filename" }, description = "Entry log filename") + private String logFilename; + + @Parameter(names = { "-lf", "--ledgeridformatter" }, description = "Set ledger id formatter") + private String ledgerIdFormatter = DEFAULT; + + } + + @Override + public boolean apply(ServerConfiguration conf, ReadLogMetadataFlags cmdFlags) { + if (!cmdFlags.ledgerIdFormatter.equals(DEFAULT) && ledgerIdFormatter == null) { + this.ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(cmdFlags.ledgerIdFormatter, conf); + } else if (ledgerIdFormatter == null) { + this.ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(conf); + } + if (cmdFlags.logId == DEFAULT_LOGID && cmdFlags.logFilename.equals(DEFAULT_FILENAME)) { + LOG.error("Missing entry log id or entry log file name"); + return false; + } + try { + return readLogMetadata(conf, cmdFlags); + } catch (IOException e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + public boolean readLogMetadata(ServerConfiguration conf, ReadLogMetadataFlags flags) throws IOException { + long logid = DEFAULT_LOGID; + if (flags.logId != DEFAULT_LOGID) { + logid = flags.logId; + } else if (!flags.logFilename.equals(DEFAULT_FILENAME)) { + File f = new File(flags.logFilename); + String name = f.getName(); + if (!name.endsWith(".log")) { + LOG.error("ERROR: invalid entry log file name " + flags.logFilename); + return false; + } + String idString = name.split("\\.")[0]; + logid = Long.parseLong(idString, 16); + } + + printEntryLogMetadata(conf, logid); + return true; + } + + private void printEntryLogMetadata(ServerConfiguration conf, long logId) throws IOException { + LOG.info("Print entryLogMetadata of entrylog {} ({}.log)", logId, Long.toHexString(logId)); + initEntryLogger(conf); + EntryLogMetadata entryLogMetadata = entryLogger.getEntryLogMetadata(logId); + LOG.info("entryLogId: {}, remaining size: {}, total size: {}, usage: {}", entryLogMetadata.getEntryLogId(), + entryLogMetadata.getRemainingSize(), entryLogMetadata.getTotalSize(), entryLogMetadata.getUsage()); + + entryLogMetadata.getLedgersMap().forEach((ledgerId, size) -> { + LOG.info("--------- Lid={}, TotalSizeOfEntriesOfLedger={} ---------", + ledgerIdFormatter.formatLedgerId(ledgerId), size); + }); + } + + @SuppressFBWarnings("IS2_INCONSISTENT_SYNC") + private synchronized void initEntryLogger(ServerConfiguration conf) throws IOException { + // provide read only entry logger + if (null == entryLogger) { + entryLogger = new ReadOnlyDefaultEntryLogger(conf); + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/RebuildDBLedgerLocationsIndexCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/RebuildDBLedgerLocationsIndexCommand.java new file mode 100644 index 00000000000..8cbb986633d --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/RebuildDBLedgerLocationsIndexCommand.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookie; + +import java.io.IOException; +import org.apache.bookkeeper.bookie.storage.ldb.LocationsIndexRebuildOp; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Command to rebuild DBLedgerStorage locations index. + */ +public class RebuildDBLedgerLocationsIndexCommand extends BookieCommand { + + static final Logger LOG = LoggerFactory.getLogger(RebuildDBLedgerLocationsIndexCommand.class); + + private static final String NAME = "rebuild-db-ledger-locations-index"; + private static final String DESC = "Rbuild DBLedgerStorage locations index by scanning the entry logs"; + + public RebuildDBLedgerLocationsIndexCommand() { + super(CliSpec.newBuilder().withName(NAME).withDescription(DESC).withFlags(new CliFlags()).build()); + } + + @Override + public boolean apply(ServerConfiguration conf, CliFlags cmdFlags) { + LOG.info("=== Rebuilding DBStorage locations index ==="); + ServerConfiguration serverConfiguration = new ServerConfiguration(conf); + try { + new LocationsIndexRebuildOp(serverConfiguration).initiate(); + } catch (IOException e) { + e.printStackTrace(); + } + LOG.info("-- Done rebuilding DBStorage locations index --"); + return true; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/RebuildDBLedgersIndexCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/RebuildDBLedgersIndexCommand.java new file mode 100644 index 00000000000..b37f968b0d3 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/RebuildDBLedgersIndexCommand.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookie; + +import com.beust.jcommander.Parameter; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.bookie.storage.ldb.LedgersIndexRebuildOp; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Command to rebuild DBLedgerStorage ledgers index. + */ +public class RebuildDBLedgersIndexCommand extends BookieCommand { + + static final Logger LOG = LoggerFactory.getLogger(RebuildDBLedgersIndexCommand.class); + + private static final String NAME = "rebuild-db-ledgers-index"; + private static final String DESC = "Rebuild DBLedgerStorage ledgers index by scanning the journal" + + " and entry logs (sets all ledgers to fenced)"; + + public RebuildDBLedgersIndexCommand() { + this(new RebuildLedgersIndexFlags()); + } + + public RebuildDBLedgersIndexCommand(RebuildLedgersIndexFlags flags) { + super(CliSpec.newBuilder().withName(NAME) + .withDescription(DESC).withFlags(flags).build()); + } + + @Override + public boolean apply(ServerConfiguration conf, RebuildLedgersIndexFlags cmdFlags) { + LOG.info("=== Rebuilding DBStorage ledgers index ==="); + ServerConfiguration serverConfiguration = new ServerConfiguration(conf); + boolean success = new LedgersIndexRebuildOp(serverConfiguration, cmdFlags.verbose).initiate(); + if (success) { + LOG.info("-- Done rebuilding DBStorage ledgers index --"); + } else { + LOG.info("-- Aborted rebuilding DBStorage ledgers index --"); + } + + return success; + } + + /** + * Flags for read log command. + */ + @Accessors(fluent = true) + @Setter + public static class RebuildLedgersIndexFlags extends CliFlags { + @Parameter(names = { "-v", "--verbose" }, + description = "Verbose logging. Print each ledger id found and added to the rebuilt index") + private boolean verbose; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/RegenerateInterleavedStorageIndexFileCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/RegenerateInterleavedStorageIndexFileCommand.java new file mode 100644 index 00000000000..c50e808116e --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/RegenerateInterleavedStorageIndexFileCommand.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookie; + +import com.beust.jcommander.Parameter; +import com.beust.jcommander.converters.CommaParameterSplitter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.security.NoSuchAlgorithmException; +import java.util.Base64; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.bookie.InterleavedStorageRegenerateIndexOp; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Command to regenerate an index file for interleaved storage. + */ +public class RegenerateInterleavedStorageIndexFileCommand + extends BookieCommand { + + static final Logger LOG = LoggerFactory.getLogger(RegenerateInterleavedStorageIndexFileCommand.class); + + private static final String NAME = "regenerate-interleaved-storage-index-file"; + private static final String DESC = + "Regenerate an interleaved storage index file, from available entrylogger " + "files."; + private static final String DEFAULT = ""; + + public RegenerateInterleavedStorageIndexFileCommand() { + this(new RISIFFlags()); + } + + private RegenerateInterleavedStorageIndexFileCommand(RISIFFlags flags) { + super(CliSpec.newBuilder() + .withName(NAME) + .withDescription(DESC) + .withFlags(flags) + .build()); + } + + /** + * Flags for regenerate interleaved storage index file command. + */ + @Accessors(fluent = true) + @Setter + public static class RISIFFlags extends CliFlags { + + @Parameter(names = { "-p", "--password" }, + description = "The bookie stores the password in the index file, so we need it to regenerate." + + "This must match the value in the ledger metadata.") + private String password = DEFAULT; + + @Parameter(names = { "-b", "--b64password" }, + description = "The password in base64 encoding, for cases where the password is not UTF-8.") + private String b64Password = DEFAULT; + + @Parameter(names = { "-d", "--dryrun" }, description = "Process the entryLogger, but don't write anything.") + private boolean dryRun; + + @Parameter(names = { "-l", "--ledgerids" }, + description = "Ledger(s) whose index needs to be regenerated. Multiple can be specified, comma separated.", + splitter = CommaParameterSplitter.class) + private List ledgerIds; + + } + + @Override + public boolean apply(ServerConfiguration conf, RISIFFlags cmdFlags) { + try { + return generate(conf, cmdFlags); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + private boolean generate(ServerConfiguration conf, RISIFFlags flags) throws NoSuchAlgorithmException, IOException { + validateFlags(flags); + byte[] password; + if (!flags.password.equals(DEFAULT)) { + password = flags.password.getBytes(StandardCharsets.UTF_8); + } else if (!flags.b64Password.equals(DEFAULT)) { + password = Base64.getDecoder().decode(flags.b64Password); + } else { + LOG.error("The password must be specified to regenerate the index file"); + return false; + } + + Set ledgerIds = flags.ledgerIds.stream().collect(Collectors.toSet()); + + LOG.info("=== Rebuilding index file for {} ===", ledgerIds); + ServerConfiguration serverConfiguration = new ServerConfiguration(conf); + InterleavedStorageRegenerateIndexOp i = new InterleavedStorageRegenerateIndexOp(serverConfiguration, ledgerIds, + password); + i.initiate(flags.dryRun); + + LOG.info("-- Done rebuilding index file for {} --", ledgerIds); + return true; + } + + private void validateFlags(RISIFFlags flags) { + if (flags.password == null) { + flags.password = DEFAULT; + } + if (flags.b64Password == null) { + flags.b64Password = DEFAULT; + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/SanityTestCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/SanityTestCommand.java new file mode 100644 index 00000000000..d8a1908fb90 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/SanityTestCommand.java @@ -0,0 +1,210 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookie; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.bookie.LocalBookieEnsemblePlacementPolicy; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeper; +import org.apache.bookkeeper.client.LedgerEntry; +import org.apache.bookkeeper.client.LedgerHandle; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.tools.cli.commands.bookie.SanityTestCommand.SanityFlags; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A bookie command to sanity test for local bookie. + */ +public class SanityTestCommand extends BookieCommand { + + private static final Logger LOG = LoggerFactory.getLogger(SanityTestCommand.class); + private static final String NAME = "sanitytest"; + private static final String DESC = "Sanity test for local bookie. " + + "Create ledger and write/reads entries on local bookie."; + + public SanityTestCommand() { + this(new SanityFlags()); + } + + public SanityTestCommand(SanityFlags flags) { + super(CliSpec.newBuilder().withFlags(flags).withName(NAME).withDescription(DESC).build()); + } + + /** + * Flags for sanity command. + */ + @Accessors(fluent = true) + @Setter + public static class SanityFlags extends CliFlags{ + + @Parameter(names = {"-e", "--entries"}, description = "Total entries to be added for the test (default 10)") + private int entries = 10; + + @Parameter(names = { "-t", + "--timeout" }, description = "Timeout for write/read operations in seconds (default 1)") + private int timeout = 1; + + } + + @Override + public boolean apply(ServerConfiguration conf, SanityFlags cmdFlags) { + try { + return handle(conf, cmdFlags); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + private static boolean handle(ServerConfiguration conf, SanityFlags cmdFlags) throws Exception { + try { + return handleAsync(conf, cmdFlags).get(); + } catch (Exception e) { + LOG.warn("Error in bookie sanity test", e); + return false; + } + } + + public static CompletableFuture handleAsync(ServerConfiguration conf, SanityFlags cmdFlags) { + CompletableFuture result = new CompletableFuture(); + ClientConfiguration clientConf = new ClientConfiguration(); + clientConf.addConfiguration(conf); + clientConf.setEnsemblePlacementPolicy(LocalBookieEnsemblePlacementPolicy.class); + clientConf.setAddEntryTimeout(cmdFlags.timeout); + clientConf.setReadEntryTimeout(cmdFlags.timeout); + + BookKeeper bk; + try { + bk = new BookKeeper(clientConf); + } catch (BKException | IOException | InterruptedException e) { + LOG.warn("Failed to initialize bookkeeper client", e); + result.completeExceptionally(e); + return result; + } + + bk.asyncCreateLedger(1, 1, BookKeeper.DigestType.MAC, new byte[0], (rc, lh, ctx) -> { + if (rc != BKException.Code.OK) { + LOG.warn("ledger creation failed for sanity command {}", rc); + result.completeExceptionally(BKException.create(rc)); + return; + } + List> entriesFutures = new ArrayList<>(); + for (int i = 0; i < cmdFlags.entries; i++) { + String content = "entry-" + i; + CompletableFuture entryFuture = new CompletableFuture<>(); + entriesFutures.add(entryFuture); + lh.asyncAddEntry(content.getBytes(UTF_8), (arc, alh, entryId, actx) -> { + if (arc != BKException.Code.OK) { + LOG.warn("ledger add entry failed for {}-{}", alh.getId(), arc); + entryFuture.completeExceptionally(BKException.create(arc)); + return; + } + entryFuture.complete(null); + }, null); + } + CompletableFuture lhFuture = new CompletableFuture<>(); + CompletableFuture readEntryFuture = new CompletableFuture<>(); + FutureUtils.collect(entriesFutures).thenCompose(_r -> lh.closeAsync()).thenCompose(_r -> { + bk.asyncOpenLedger(lh.getId(), BookKeeper.DigestType.MAC, new byte[0], (orc, olh, octx) -> { + if (orc != BKException.Code.OK) { + LOG.warn("open sanity ledger failed for {}-{}", lh.getId(), orc); + lhFuture.completeExceptionally(BKException.create(orc)); + return; + } + long lac = olh.getLastAddConfirmed(); + if (lac != (cmdFlags.entries - 1)) { + lhFuture.completeExceptionally(new Exception("Invalid last entry found on ledger. expecting: " + + (cmdFlags.entries - 1) + " -- found: " + lac)); + return; + } + lhFuture.complete(lh); + }, null); + return lhFuture; + }).thenCompose(rlh -> { + rlh.asyncReadEntries(0, cmdFlags.entries - 1, (rrc, rlh2, entries, rctx) -> { + if (rrc != BKException.Code.OK) { + LOG.warn("reading sanity ledger failed for {}-{}", lh.getId(), rrc); + readEntryFuture.completeExceptionally(BKException.create(rrc)); + return; + } + int i = 0; + while (entries.hasMoreElements()) { + LedgerEntry entry = entries.nextElement(); + String actualMsg = new String(entry.getEntry(), UTF_8); + String expectedMsg = "entry-" + (i++); + if (!expectedMsg.equals(actualMsg)) { + readEntryFuture.completeExceptionally( + new Exception("Failed validation of received message - Expected: " + expectedMsg + + ", Actual: " + actualMsg)); + return; + } + } + LOG.info("Read {} entries from ledger {}", i, lh.getId()); + LOG.info("Bookie sanity test succeeded"); + readEntryFuture.complete(null); + }, null); + return readEntryFuture; + }).thenAccept(_r -> { + close(bk, lh); + result.complete(true); + }).exceptionally(ex -> { + close(bk, lh); + result.completeExceptionally(ex.getCause()); + return null; + }); + }, null); + return result; + } + + public static void close(BookKeeper bk, LedgerHandle lh) { + if (lh != null) { + bk.asyncDeleteLedger(lh.getId(), (rc, ctx) -> { + if (rc != BKException.Code.OK) { + LOG.info("Failed to delete ledger id {}", lh.getId()); + } + close(bk); + }, null); + } else { + close(bk); + } + } + + private static void close(BookKeeper bk) { + try { + bk.close(); + } catch (Exception e) { + LOG.info("Failed to close bookkeeper client {}", e.getMessage(), e); + } + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/UpdateBookieInLedgerCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/UpdateBookieInLedgerCommand.java new file mode 100644 index 00000000000..eed3bf53948 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookie/UpdateBookieInLedgerCommand.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookie; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import java.io.IOException; +import java.util.concurrent.TimeUnit; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.bookie.BookieShell; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeper; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.client.UpdateLedgerOp; +import org.apache.bookkeeper.common.util.MathUtils; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Command to update ledger command. + */ +public class UpdateBookieInLedgerCommand extends BookieCommand { + + static final Logger LOG = LoggerFactory.getLogger(UpdateBookieInLedgerCommand.class); + + private static final String NAME = "update-bookie-ledger-cmd"; + private static final String DESC = "Update bookie in ledgers metadata (this may take a long time)."; + + public UpdateBookieInLedgerCommand() { + this(new UpdateBookieInLedgerFlags()); + } + + private UpdateBookieInLedgerCommand(UpdateBookieInLedgerFlags flags) { + super(CliSpec.newBuilder() + .withName(NAME) + .withDescription(DESC) + .withFlags(flags) + .build()); + } + + /** + * Flags for update bookie in ledger command. + */ + @Accessors(fluent = true) + @Setter + public static class UpdateBookieInLedgerFlags extends CliFlags { + + @Parameter(names = { "-sb", "--srcBookie" }, + description = "Source bookie which needs to be replaced by destination bookie. ") + private String srcBookie; + + @Parameter(names = { "-db", "--destBookie" }, + description = "Destination bookie which replaces source bookie. ") + private String destBookie; + + @Parameter(names = { "-s", "--updatepersec" }, + description = "Number of ledgers updating per second (default: 5 per sec)") + private int updatePerSec = 5; + + @Parameter(names = { "-r", + "--maxOutstandingReads" }, description = "Max outstanding reads (default: 5 * updatespersec)") + private int maxOutstandingReads = updatePerSec * 5; + + @Parameter(names = {"-l", "--limit"}, + description = "Maximum number of ledgers of ledgers to update (default: no limit)") + private int limit = Integer.MIN_VALUE; + + @Parameter(names = { "-v", "--verbose" }, description = "Print status of the ledger updation (default: false)") + private boolean verbose; + + @Parameter(names = { "-p", "--printprogress" }, + description = "Print messages on every configured seconds if verbose turned on (default: 10 secs)") + private long printProgress = 10; + } + + @Override + public boolean apply(ServerConfiguration conf, UpdateBookieInLedgerFlags cmdFlags) { + try { + return updateLedger(conf, cmdFlags); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + private boolean updateLedger(ServerConfiguration conf, UpdateBookieInLedgerFlags flags) + throws InterruptedException, BKException, IOException { + + BookieId srcBookieAddress; + BookieId destBookieAddress; + try { + String bookieAddress = flags.srcBookie; + srcBookieAddress = BookieId.parse(bookieAddress); + bookieAddress = flags.destBookie; + destBookieAddress = BookieId.parse(bookieAddress); + } catch (Exception e) { + LOG.error("Bookie address must in
          : format"); + return false; + } + + final int rate = flags.updatePerSec; + if (rate <= 0) { + LOG.error("Invalid updatespersec {}, should be > 0", rate); + return false; + } + + final int maxOutstandingReads = flags.maxOutstandingReads; + if (maxOutstandingReads <= 0) { + LOG.error("Invalid maxOutstandingReads {}, should be > 0", maxOutstandingReads); + return false; + } + + final int limit = flags.limit; + if (limit <= 0 && limit != Integer.MIN_VALUE) { + LOG.error("Invalid limit {}, should be > 0", limit); + return false; + } + + final long printProgress; + if (flags.verbose) { + printProgress = 10; + } else { + printProgress = flags.printProgress; + } + + final ClientConfiguration clientConfiguration = new ClientConfiguration(); + clientConfiguration.addConfiguration(conf); + final BookKeeper bk = new BookKeeper(clientConfiguration); + final BookKeeperAdmin admin = new BookKeeperAdmin(bk, clientConfiguration); + if (admin.getAvailableBookies().contains(srcBookieAddress) + || admin.getReadOnlyBookies().contains(srcBookieAddress)) { + bk.close(); + admin.close(); + LOG.error("Source bookie {} can't be active", srcBookieAddress); + return false; + } + final UpdateLedgerOp updateLedgerOp = new UpdateLedgerOp(bk, admin); + + BookieShell.UpdateLedgerNotifier progressable = new BookieShell.UpdateLedgerNotifier() { + long lastReport = System.nanoTime(); + + @Override + public void progress(long updated, long issued) { + if (printProgress <= 0) { + return; // disabled + } + if (TimeUnit.MILLISECONDS.toSeconds(MathUtils.elapsedMSec(lastReport)) >= printProgress) { + LOG.info("Number of ledgers issued={}, updated={}", issued, updated); + lastReport = MathUtils.nowInNano(); + } + } + }; + + try { + updateLedgerOp.updateBookieIdInLedgers(srcBookieAddress, destBookieAddress, rate, maxOutstandingReads, + limit, progressable); + } catch (IOException e) { + LOG.error("Failed to update ledger metadata", e); + return false; + } + + return true; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/ClusterInfoCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/ClusterInfoCommand.java new file mode 100644 index 00000000000..4ff9c5932e7 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/ClusterInfoCommand.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookies; + +import static org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithLedgerManagerFactory; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.util.concurrent.UncheckedExecutionException; +import java.util.Iterator; +import lombok.Data; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.common.util.JsonUtil; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; +import org.apache.bookkeeper.meta.UnderreplicatedLedger; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A bookie command to retrieve bookies cluster info. + */ +public class ClusterInfoCommand extends BookieCommand { + + private static final String NAME = "cluster-info"; + private static final String DESC = "Exposes the current info about the cluster of bookies"; + private static final Logger LOG = LoggerFactory.getLogger(ClusterInfoCommand.class); + private ClusterInfo info; + + public ClusterInfoCommand() { + super(CliSpec.newBuilder() + .withName(NAME) + .withFlags(new CliFlags()) + .withDescription(DESC) + .build()); + } + + @VisibleForTesting + public static ClusterInfoCommand newClusterInfoCommand() { + return new ClusterInfoCommand(); + } + + /** + * POJO definition for the cluster info response. + */ + @Data + public static class ClusterInfo { + private boolean auditorElected; + private String auditorId; + private boolean clusterUnderReplicated; + private boolean ledgerReplicationEnabled; + private int totalBookiesCount; + private int writableBookiesCount; + private int readonlyBookiesCount; + private int unavailableBookiesCount; + } + + @Override + public boolean apply(ServerConfiguration conf, CliFlags cmdFlags) { + + ClientConfiguration clientConfiguration = new ClientConfiguration(conf); + try (BookKeeperAdmin admin = new BookKeeperAdmin(clientConfiguration)) { + LOG.info("Starting fill cluster info."); + info = new ClusterInfo(); + fillUReplicatedInfo(info, conf); + fillAuditorInfo(info, admin); + fillBookiesInfo(info, admin); + + LOG.info("--------- Cluster Info ---------"); + LOG.info("{}", JsonUtil.toJson(info)); + } catch (Exception e) { + e.printStackTrace(); + } + + return true; + } + + private void fillBookiesInfo(ClusterInfo info, BookKeeperAdmin bka) throws BKException { + int totalBookiesCount = bka.getAllBookies().size(); + int writableBookiesCount = bka.getAvailableBookies().size(); + int readonlyBookiesCount = bka.getReadOnlyBookies().size(); + int unavailableBookiesCount = totalBookiesCount - writableBookiesCount - readonlyBookiesCount; + + info.setTotalBookiesCount(totalBookiesCount); + info.setWritableBookiesCount(writableBookiesCount); + info.setReadonlyBookiesCount(readonlyBookiesCount); + info.setUnavailableBookiesCount(unavailableBookiesCount); + } + + private void fillAuditorInfo(ClusterInfo info, BookKeeperAdmin bka) { + try { + BookieId currentAuditor = bka.getCurrentAuditor(); + info.setAuditorElected(currentAuditor != null); + info.setAuditorId(currentAuditor == null ? "" : currentAuditor.getId()); + } catch (Exception e) { + LOG.error("Could not get Auditor info", e); + info.setAuditorElected(false); + info.setAuditorId(""); + } + } + + private void fillUReplicatedInfo(ClusterInfo info, ServerConfiguration conf) throws Exception { + runFunctionWithLedgerManagerFactory(conf, mFactory -> { + try (LedgerUnderreplicationManager underreplicationManager = + mFactory.newLedgerUnderreplicationManager()) { + Iterator iter = underreplicationManager.listLedgersToRereplicate(null); + + info.setClusterUnderReplicated(iter.hasNext()); + info.setLedgerReplicationEnabled(underreplicationManager.isLedgerReplicationEnabled()); + } catch (Exception e) { + throw new UncheckedExecutionException(e); + } + return null; + }); + } + + @VisibleForTesting + public ClusterInfo info() { + return info; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/DecommissionCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/DecommissionCommand.java new file mode 100644 index 00000000000..beb9fbcbf4e --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/DecommissionCommand.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookies; + +import static org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithRegistrationManager; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import java.io.IOException; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.Cookie; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.apache.bookkeeper.versioning.Versioned; +import org.apache.commons.lang.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Command to trigger AuditTask by resetting lostBookieRecoveryDelay and + * then make sure the ledgers stored in the bookie are properly replicated + * and Cookie of the decommissioned bookie should be deleted from metadata + * server. + */ +public class DecommissionCommand extends BookieCommand { + + static final Logger LOG = LoggerFactory.getLogger(DecommissionCommand.class); + + private static final String NAME = "decommission"; + private static final String DESC = + "Force trigger the Audittask and make sure all the ledgers stored in the decommissioning bookie" + + " are replicated and cookie of the decommissioned bookie is deleted from metadata server."; + + public DecommissionCommand() { + this(new DecommissionFlags()); + } + + private DecommissionCommand(DecommissionFlags flags) { + super(CliSpec.newBuilder().withName(NAME).withDescription(DESC).withFlags(flags).build()); + } + + /** + * Flags for decommission command. + */ + @Accessors(fluent = true) + @Setter + public static class DecommissionFlags extends CliFlags { + + @Parameter(names = { "-b", "--bookieid" }, description = "Decommission a remote bookie") + private String remoteBookieIdToDecommission; + + } + + @Override + public boolean apply(ServerConfiguration conf, DecommissionFlags cmdFlags) { + try { + return decommission(conf, cmdFlags); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + private boolean decommission(ServerConfiguration conf, DecommissionFlags flags) + throws BKException, InterruptedException, IOException { + ClientConfiguration adminConf = new ClientConfiguration(conf); + BookKeeperAdmin admin = new BookKeeperAdmin(adminConf); + try { + final String remoteBookieidToDecommission = flags.remoteBookieIdToDecommission; + final BookieId bookieAddressToDecommission = (StringUtils.isBlank(remoteBookieidToDecommission) + ? BookieImpl.getBookieId(conf) + : BookieId.parse(remoteBookieidToDecommission)); + admin.decommissionBookie(bookieAddressToDecommission); + LOG.info("The ledgers stored in the given decommissioning bookie: {} are properly replicated", + bookieAddressToDecommission); + runFunctionWithRegistrationManager(conf, rm -> { + try { + Versioned cookie = Cookie.readFromRegistrationManager(rm, bookieAddressToDecommission); + cookie.getValue().deleteFromRegistrationManager(rm, bookieAddressToDecommission, + cookie.getVersion()); + } catch (BookieException.CookieNotFoundException nne) { + LOG.warn("No cookie to remove for the decommissioning bookie: {}, it could be deleted already", + bookieAddressToDecommission, nne); + } catch (BookieException be) { + throw new UncheckedExecutionException(be.getMessage(), be); + } + return true; + }); + LOG.info("Cookie of the decommissioned bookie: {} is deleted successfully", + bookieAddressToDecommission); + return true; + } catch (Exception e) { + LOG.error("Received exception in DecommissionBookieCmd ", e); + return false; + } finally { + if (admin != null) { + admin.close(); + } + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/EndpointInfoCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/EndpointInfoCommand.java new file mode 100644 index 00000000000..634eb8411ce --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/EndpointInfoCommand.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookies; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import java.io.IOException; +import java.util.Collection; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.discover.BookieServiceInfo; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Get endpoint information about a Bookie. + */ +public class EndpointInfoCommand extends BookieCommand { + + static final Logger LOG = LoggerFactory.getLogger(EndpointInfoCommand.class); + + private static final String NAME = "endpointinfo"; + private static final String DESC = "Get all end point information about a given bookie."; + + public EndpointInfoCommand() { + this(new EndpointInfoFlags()); + } + + private EndpointInfoCommand(EndpointInfoFlags flags) { + super(CliSpec.newBuilder().withName(NAME).withDescription(DESC).withFlags(flags).build()); + } + + /** + * Flags for this command. + */ + @Accessors(fluent = true) + @Setter + public static class EndpointInfoFlags extends CliFlags { + + @Parameter(required = true, names = {"-b", "--bookieid"}, description = "Get information about a remote bookie") + private String bookie; + + } + + @Override + public boolean apply(ServerConfiguration conf, EndpointInfoFlags cmdFlags) { + try { + return getEndpointInfo(conf, cmdFlags); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + private boolean getEndpointInfo(ServerConfiguration conf, EndpointInfoFlags flags) + throws BKException, InterruptedException, IOException { + ClientConfiguration adminConf = new ClientConfiguration(conf); + BookKeeperAdmin admin = new BookKeeperAdmin(adminConf); + try { + final String bookieIdStr = flags.bookie; + if (bookieIdStr == null || bookieIdStr.isEmpty()) { + throw new IllegalArgumentException("BookieId is required"); + } + BookieId bookieId = BookieId.parse(bookieIdStr); + Collection allBookies = admin.getAllBookies(); + if (!allBookies.contains(bookieId)) { + LOG.info("Bookie {} does not exist, only {}", bookieId, allBookies); + return false; + } + BookieServiceInfo bookieServiceInfo = admin.getBookieServiceInfo(bookieId); + + LOG.info("BookiedId: {}", bookieId); + if (!bookieServiceInfo.getProperties().isEmpty()) { + LOG.info("Properties"); + bookieServiceInfo.getProperties().forEach((k, v) -> { + LOG.info("{} : {}", k, v); + }); + } + if (!bookieServiceInfo.getEndpoints().isEmpty()) { + bookieServiceInfo.getEndpoints().forEach(e -> { + LOG.info("Endpoint: {}", e.getId()); + LOG.info("Protocol: {}", e.getProtocol()); + LOG.info("Address: {} : {}", e.getHost(), e.getPort()); + LOG.info("Auth: {}", e.getAuth()); + LOG.info("Extensions: {}", e.getExtensions()); + }); + } else { + LOG.info("Bookie did not publish any endpoint info. Maybe it is down"); + return false; + } + + return true; + } catch (Exception e) { + LOG.error("Received exception in EndpointInfoCommand ", e); + return false; + } finally { + if (admin != null) { + admin.close(); + } + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/InfoCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/InfoCommand.java new file mode 100644 index 00000000000..c00a2b1b4d9 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/InfoCommand.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookies; + +import java.io.IOException; +import java.math.RoundingMode; +import java.text.DecimalFormat; +import java.util.Map; +import java.util.stream.Collectors; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeper; +import org.apache.bookkeeper.client.BookieInfoReader.BookieInfo; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.cli.helpers.CommandHelpers; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * A bookie command to retrieve bookie info. + */ +public class InfoCommand extends BookieCommand { + + private static final String NAME = "info"; + private static final String DESC = "Retrieve bookie info such as free and total disk space."; + private static final Logger LOG = LoggerFactory.getLogger(InfoCommand.class); + + public InfoCommand() { + super(CliSpec.newBuilder() + .withName(NAME) + .withFlags(new CliFlags()) + .withDescription(DESC) + .build()); + } + + String getReadable(long val) { + String[] unit = {"", "KB", "MB", "GB", "TB"}; + int cnt = 0; + double d = val; + while (d >= 1000 && cnt < unit.length - 1) { + d = d / 1000; + cnt++; + } + DecimalFormat df = new DecimalFormat("#.###"); + df.setRoundingMode(RoundingMode.DOWN); + return cnt > 0 ? "(" + df.format(d) + unit[cnt] + ")" : unit[cnt]; + } + + + @Override + public boolean apply(ServerConfiguration conf, CliFlags cmdFlags) { + + ClientConfiguration clientConf = new ClientConfiguration(conf); + clientConf.setDiskWeightBasedPlacementEnabled(true); + try (BookKeeper bk = new BookKeeper(clientConf)) { + Map map = bk.getBookieInfo(); + if (map.size() == 0) { + LOG.info("Failed to retrieve bookie information from any of the bookies"); + bk.close(); + return true; + } + + LOG.info("Free disk space info:"); + long totalFree = 0, total = 0; + for (Map.Entry e : map.entrySet()) { + BookieInfo bInfo = e.getValue(); + BookieId bookieId = e.getKey(); + LOG.info("{}: \tFree: {}\tTotal: {}", + CommandHelpers.getBookieSocketAddrStringRepresentation(bookieId, bk.getBookieAddressResolver()), + bInfo.getFreeDiskSpace() + getReadable(bInfo.getFreeDiskSpace()), + bInfo.getTotalDiskSpace() + getReadable(bInfo.getTotalDiskSpace())); + } + + // group by hostname + Map dedupedMap = map.entrySet() + .stream() + .collect(Collectors.toMap( + entry -> entry.getKey().toString(), + entry -> entry.getValue(), + (key1, key2) -> key2 + )); + for (BookieInfo bookieInfo : dedupedMap.values()) { + totalFree += bookieInfo.getFreeDiskSpace(); + total += bookieInfo.getTotalDiskSpace(); + } + + LOG.info("Total free disk space in the cluster:\t{}", totalFree + getReadable(totalFree)); + LOG.info("Total disk capacity in the cluster:\t{}", total + getReadable(total)); + bk.close(); + + return true; + } catch (IOException | InterruptedException | BKException e) { + e.printStackTrace(); + } + return true; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/InitCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/InitCommand.java new file mode 100644 index 00000000000..84f7c013e62 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/InitCommand.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookies; + +import com.google.common.util.concurrent.UncheckedExecutionException; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; + +/** + * Initializes new cluster by creating required znodes for the cluster. If + * ledgersrootpath is already existing then it will error out. If for any + * reason it errors out while creating znodes for the cluster, then before + * running initnewcluster again, try nuking existing cluster by running + * nukeexistingcluster. This is required because ledgersrootpath znode would + * be created after verifying that it doesn't exist, hence during next retry + * of initnewcluster it would complain saying that ledgersrootpath is + * already existing. + */ +public class InitCommand extends BookieCommand { + + private static final String NAME = "init"; + private static final String DESC = + "Initializes a new bookkeeper cluster. If initnewcluster fails then try nuking " + + "existing cluster by running nukeexistingcluster before running initnewcluster again"; + + public InitCommand() { + super(CliSpec.newBuilder() + .withName(NAME) + .withDescription(DESC) + .withFlags(new CliFlags()) + .build()); + } + + @Override + public boolean apply(ServerConfiguration conf, CliFlags cmdFlags) { + try { + return BookKeeperAdmin.initNewCluster(conf); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/InstanceIdCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/InstanceIdCommand.java new file mode 100644 index 00000000000..6fd60e35f08 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/InstanceIdCommand.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookies; + +import static org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithRegistrationManager; + +import com.google.common.util.concurrent.UncheckedExecutionException; +import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Command to print instance id of the cluster. + */ +public class InstanceIdCommand extends BookieCommand { + + static final Logger LOG = LoggerFactory.getLogger(InstanceIdCommand.class); + + private static final String NAME = "instanceid"; + private static final String DESC = "Print the instanceid of the cluster"; + + public InstanceIdCommand() { + super(CliSpec.newBuilder().withName(NAME).withDescription(DESC).withFlags(new CliFlags()).build()); + } + + @Override + public boolean apply(ServerConfiguration conf, CliFlags cmdFlags) { + try { + runFunctionWithRegistrationManager(conf, rm -> { + String readInstanceId = null; + try { + readInstanceId = rm.getClusterInstanceId(); + } catch (BookieException e) { + throw new UncheckedExecutionException(e); + } + LOG.info("Metadata Service Uri: {} InstanceId: {}", + conf.getMetadataServiceUriUnchecked(), readInstanceId); + return null; + }); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + return true; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/ListBookiesCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/ListBookiesCommand.java index 4563530e3e0..dcbfd9c2e50 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/ListBookiesCommand.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/ListBookiesCommand.java @@ -22,16 +22,22 @@ import static org.apache.bookkeeper.tools.cli.helpers.CommandHelpers.getBookieSocketAddrStringRepresentation; import com.beust.jcommander.Parameter; +import com.google.common.annotations.VisibleForTesting; import java.util.Collection; import java.util.Set; import lombok.Setter; import lombok.experimental.Accessors; +import org.apache.bookkeeper.client.BookieAddressResolverDisabled; +import org.apache.bookkeeper.client.DefaultBookieAddressResolver; import org.apache.bookkeeper.discover.RegistrationClient; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.BookieAddressResolver; import org.apache.bookkeeper.tools.cli.commands.bookies.ListBookiesCommand.Flags; import org.apache.bookkeeper.tools.cli.helpers.DiscoveryCommand; import org.apache.bookkeeper.tools.framework.CliFlags; import org.apache.bookkeeper.tools.framework.CliSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Command to list available bookies. @@ -40,9 +46,10 @@ public class ListBookiesCommand extends DiscoveryCommand { private static final String NAME = "list"; private static final String DESC = "List the bookies, which are running as either readwrite or readonly mode."; + private static final Logger LOG = LoggerFactory.getLogger(ListBookiesCommand.class); public ListBookiesCommand() { - this(new Flags()); + this(Flags.newFlags()); } public ListBookiesCommand(Flags flags) { @@ -53,6 +60,11 @@ public ListBookiesCommand(Flags flags) { .build()); } + @VisibleForTesting + public static ListBookiesCommand newListBookiesCommand(Flags flags) { + return new ListBookiesCommand(flags); + } + /** * Flags for list bookies command. */ @@ -64,46 +76,69 @@ public static class Flags extends CliFlags { private boolean readwrite = false; @Parameter(names = { "-ro", "--readonly" }, description = "Print readonly bookies") private boolean readonly = false; + @Parameter(names = { "-a", "--all" }, description = "Print all bookies") + private boolean all = false; + + @VisibleForTesting + public static Flags newFlags(){ + return new Flags(); + } } @Override - protected void run(RegistrationClient regClient, Flags flags) throws Exception { - if (!flags.readwrite && !flags.readonly) { + protected void run(RegistrationClient regClient, Flags flags, boolean bookieAddressResolverEnabled) + throws Exception { + if (!flags.readwrite && !flags.readonly && !flags.all) { // case: no args is provided. list all the bookies by default. flags.readwrite = true; flags.readonly = true; + flags.all = true; } + BookieAddressResolver bookieAddressResolver = bookieAddressResolverEnabled + ? new DefaultBookieAddressResolver(regClient) + : new BookieAddressResolverDisabled(); + boolean hasBookies = false; if (flags.readwrite) { - Set bookies = result( + Set bookies = result( regClient.getWritableBookies() ).getValue(); if (!bookies.isEmpty()) { - System.out.println("ReadWrite Bookies :"); - printBookies(bookies); + LOG.info("ReadWrite Bookies :"); + printBookies(bookies, bookieAddressResolver); hasBookies = true; } } if (flags.readonly) { - Set bookies = result( + Set bookies = result( regClient.getReadOnlyBookies() ).getValue(); if (!bookies.isEmpty()) { - System.out.println("Readonly Bookies :"); - printBookies(bookies); + LOG.info("Readonly Bookies :"); + printBookies(bookies, bookieAddressResolver); + hasBookies = true; + } + } + if (flags.all) { + Set bookies = result( + regClient.getAllBookies() + ).getValue(); + if (!bookies.isEmpty()) { + LOG.info("All Bookies :"); + printBookies(bookies, bookieAddressResolver); hasBookies = true; } } if (!hasBookies) { - System.err.println("No bookie exists!"); + LOG.error("No bookie exists!"); } } - private static void printBookies(Collection bookies) { - for (BookieSocketAddress b : bookies) { - System.out.println(getBookieSocketAddrStringRepresentation(b)); + private static void printBookies(Collection bookies, BookieAddressResolver bookieAddressResolver) { + for (BookieId b : bookies) { + LOG.info("{}", getBookieSocketAddrStringRepresentation(b, bookieAddressResolver)); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/MetaFormatCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/MetaFormatCommand.java new file mode 100644 index 00000000000..8ec247b662d --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/MetaFormatCommand.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookies; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; + +/** + * Format the bookkeeper metadata present in zookeeper. + */ +public class MetaFormatCommand extends BookieCommand { + + private static final String NAME = "metaformat"; + private static final String DESC = "Format bookkeeper metadata in zookeeper."; + + public MetaFormatCommand() { + this(new MetaFormatFlags()); + } + + private MetaFormatCommand(MetaFormatFlags flags) { + super(CliSpec.newBuilder() + .withName(NAME) + .withDescription(DESC) + .withFlags(flags) + .build()); + } + + /** + * Flags for command meta format. + */ + @Accessors(fluent = true) + @Setter + public static class MetaFormatFlags extends CliFlags { + + @Parameter(names = { "-n", "nonInteractive" }, description = "Whether to confirm old data exists..?") + private boolean interactive; + + @Parameter(names = {"-f", "--force"}, + description = "If [nonInteractive] is specified, then whether to force delete the old data without prompt.") + private boolean force; + } + + @Override + public boolean apply(ServerConfiguration conf, MetaFormatFlags flags) { + try { + return BookKeeperAdmin.format(conf, flags.interactive, flags.force); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/NukeExistingClusterCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/NukeExistingClusterCommand.java new file mode 100644 index 00000000000..4e46ccbeb8f --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/NukeExistingClusterCommand.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookies; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Nuke bookkeeper metadata of existing cluster in zookeeper. + */ +public class NukeExistingClusterCommand extends BookieCommand { + + static final Logger LOG = LoggerFactory.getLogger(NukeExistingClusterCommand.class); + + private static final String NAME = "nukeexistingcluster"; + private static final String DESC = "Nuke bookkeeper cluster by deleting metadata."; + + public NukeExistingClusterCommand() { + this(new NukeExistingClusterFlags()); + } + + private NukeExistingClusterCommand(NukeExistingClusterFlags flags) { + super(CliSpec.newBuilder() + .withName(NAME) + .withDescription(DESC) + .withFlags(flags) + .build()); + } + + /** + * Flags for nuke existing cluster command. + */ + @Accessors(fluent = true) + @Setter + public static class NukeExistingClusterFlags extends CliFlags { + + @Parameter(names = {"-f", "--force"}, + description = "If instance id is not specified, then whether to force nuke " + + "the metadata without " + "validating instance id") + private boolean force; + + @Parameter(names = {"-p", "--zkledgersrootpath"}, description = "zookeeper ledgers root path", required = true) + private String zkLedgersRootPath; + + @Parameter(names = {"-i", "--instanceid"}, description = "instance id") + private String instandId; + + } + + @Override + public boolean apply(ServerConfiguration conf, NukeExistingClusterFlags cmdFlags) { + /* + * for NukeExistingCluster command 'zkledgersrootpath' should be provided and either force option or + * instanceid should be provided. + */ + if (cmdFlags.force == (cmdFlags.instandId != null)) { + LOG.error("Either force option or instanceid should be specified (but no both)"); + return false; + } + try { + return BookKeeperAdmin.nukeExistingCluster(conf, cmdFlags.zkLedgersRootPath, + cmdFlags.instandId, cmdFlags.force); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/RecoverCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/RecoverCommand.java new file mode 100644 index 00000000000..401d6a20630 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/RecoverCommand.java @@ -0,0 +1,280 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.bookies; + +import static org.apache.bookkeeper.client.BookKeeperAdmin.newBookKeeperAdmin; +import static org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithRegistrationManager; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import java.io.IOException; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.Cookie; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.apache.bookkeeper.util.IOUtils; +import org.apache.bookkeeper.versioning.Versioned; +import org.apache.zookeeper.KeeperException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Command to ledger data recovery for failed bookie. + */ +public class RecoverCommand extends BookieCommand { + + private static final Logger LOG = LoggerFactory.getLogger(RecoverCommand.class); + + private static final String NAME = "recover"; + private static final String DESC = "Recover the ledger data for failed bookie"; + + private static final long DEFAULT_ID = -1L; + + public RecoverCommand() { + this(new RecoverFlags()); + } + + private RecoverCommand(RecoverFlags flags) { + super(CliSpec.newBuilder() + .withName(NAME) + .withDescription(DESC) + .withFlags(flags) + .build()); + } + + /** + * Flags for recover command. + */ + @Accessors(fluent = true) + @Setter + public static class RecoverFlags extends CliFlags{ + + @Parameter(names = { "-l", "--ledger" }, description = "Recover a specific ledger") + private long ledger = DEFAULT_ID; + + @Parameter(names = { "-f", "--force" }, description = "Force recovery without confirmation") + private boolean force; + + @Parameter(names = { "-q", "--query" }, description = "Query the ledgers that contain given bookies") + private boolean query; + + @Parameter(names = { "-dr", "--drarun" }, description = "Printing the recovery plan w/o doing actual recovery") + private boolean dryRun; + + @Parameter(names = {"-sk", "--skipopenledgers"}, description = "Skip recovering open ledgers") + private boolean skipOpenLedgers; + + @Parameter(names = { "-d", "--deletecookie" }, description = "Delete cookie node for the bookie") + private boolean deleteCookie; + + @Parameter(names = { "-bs", "--bokiesrc" }, description = "Bookie address") + private String bookieAddress; + + @Parameter(names = {"-sku", "--skipunrecoverableledgers"}, description = "Skip unrecoverable ledgers") + private boolean skipUnrecoverableLedgers; + + @Parameter(names = { "-rate", "--replicationrate" }, description = "Replication rate in bytes") + private int replicateRate; + } + + @Override + public boolean apply(ServerConfiguration conf, RecoverFlags cmdFlags) { + try { + return recover(conf, cmdFlags); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + private boolean recover(ServerConfiguration conf, RecoverFlags flags) + throws IOException, BKException, InterruptedException, KeeperException { + boolean query = flags.query; + boolean dryrun = flags.dryRun; + boolean force = flags.force; + boolean skipOpenLedgers = flags.skipOpenLedgers; + boolean removeCookies = !dryrun && flags.deleteCookie; + boolean skipUnrecoverableLedgers = flags.skipUnrecoverableLedgers; + + Long ledgerId = flags.ledger; + int replicateRate = flags.replicateRate; + + // Get bookies list + final String[] bookieStrs = flags.bookieAddress.split(","); + final Set bookieAddrs = new HashSet<>(); + for (String bookieStr : bookieStrs) { + try { + bookieAddrs.add(BookieId.parse(bookieStr)); + } catch (IllegalArgumentException err) { + LOG.error("BookieSrcs has invalid bookie id format: {}", bookieStr); + return false; + } + } + + if (!force) { + LOG.error("Bookies : {}", bookieAddrs); + if (!IOUtils.confirmPrompt("Are you sure to recover them : (Y/N)")) { + LOG.error("Give up!"); + return false; + } + } + + LOG.info("Constructing admin"); + conf.setReplicationRateByBytes(replicateRate); + ClientConfiguration adminConf = new ClientConfiguration(conf); + BookKeeperAdmin admin = newBookKeeperAdmin(adminConf); + LOG.info("Construct admin : {}", admin); + try { + if (query) { + return bkQuery(admin, bookieAddrs); + } + if (DEFAULT_ID != ledgerId) { + return bkRecoveryLedger(admin, ledgerId, bookieAddrs, dryrun, skipOpenLedgers, removeCookies); + } + return bkRecovery(admin, bookieAddrs, dryrun, skipOpenLedgers, removeCookies, skipUnrecoverableLedgers); + } finally { + admin.close(); + } + } + + private boolean bkQuery(BookKeeperAdmin bkAdmin, Set bookieAddrs) + throws InterruptedException, BKException { + SortedMap ledgersContainBookies = + bkAdmin.getLedgersContainBookies(bookieAddrs); + LOG.error("NOTE: Bookies in inspection list are marked with '*'."); + for (Map.Entry ledger : ledgersContainBookies.entrySet()) { + LOG.info("ledger {} : {}", ledger.getKey(), ledger.getValue().getState()); + Map numBookiesToReplacePerEnsemble = + inspectLedger(ledger.getValue(), bookieAddrs); + LOG.info("summary: ["); + for (Map.Entry entry : numBookiesToReplacePerEnsemble.entrySet()) { + LOG.info("{}={}, ", entry.getKey(), entry.getValue()); + } + LOG.info("]"); + LOG.info(""); + } + LOG.error("Done"); + return true; + } + + private Map inspectLedger(LedgerMetadata metadata, Set bookiesToInspect) { + Map numBookiesToReplacePerEnsemble = new TreeMap(); + for (Map.Entry> ensemble : + metadata.getAllEnsembles().entrySet()) { + List bookieList = ensemble.getValue(); + LOG.info("{}:\t", ensemble.getKey()); + int numBookiesToReplace = 0; + for (BookieId bookie : bookieList) { + LOG.info("{}", bookie.toString()); + if (bookiesToInspect.contains(bookie)) { + LOG.info("*"); + ++numBookiesToReplace; + } else { + LOG.info(" "); + } + LOG.info(" "); + } + LOG.info(""); + numBookiesToReplacePerEnsemble.put(ensemble.getKey(), numBookiesToReplace); + } + return numBookiesToReplacePerEnsemble; + } + + private boolean bkRecoveryLedger(BookKeeperAdmin bkAdmin, + long lid, + Set bookieAddrs, + boolean dryrun, + boolean skipOpenLedgers, + boolean removeCookies) + throws InterruptedException, BKException { + bkAdmin.recoverBookieData(lid, bookieAddrs, dryrun, skipOpenLedgers); + if (removeCookies) { + deleteCookies(bkAdmin.getConf(), bookieAddrs); + } + return true; + } + + private void deleteCookies(ClientConfiguration conf, + Set bookieAddrs) throws BKException { + ServerConfiguration serverConf = new ServerConfiguration(conf); + try { + runFunctionWithRegistrationManager(serverConf, rm -> { + try { + for (BookieId addr : bookieAddrs) { + deleteCookie(rm, addr); + } + } catch (Exception e) { + throw new UncheckedExecutionException(e); + } + return null; + }); + } catch (Exception e) { + Throwable cause = e; + if (e instanceof UncheckedExecutionException) { + cause = e.getCause(); + } + if (cause instanceof BKException) { + throw (BKException) cause; + } else { + BKException bke = new BKException.MetaStoreException(); + bke.initCause(bke); + throw bke; + } + } + + } + + private void deleteCookie(RegistrationManager rm, BookieId bookieSrc) throws BookieException { + try { + Versioned cookie = Cookie.readFromRegistrationManager(rm, bookieSrc); + cookie.getValue().deleteFromRegistrationManager(rm, bookieSrc, cookie.getVersion()); + } catch (BookieException.CookieNotFoundException nne) { + LOG.warn("No cookie to remove for {} : ", bookieSrc, nne); + } + } + + private boolean bkRecovery(BookKeeperAdmin bkAdmin, + Set bookieAddrs, + boolean dryrun, + boolean skipOpenLedgers, + boolean removeCookies, + boolean skipUnrecoverableLedgers) + throws InterruptedException, BKException { + bkAdmin.recoverBookieData(bookieAddrs, dryrun, skipOpenLedgers, skipUnrecoverableLedgers); + if (removeCookies) { + deleteCookies(bkAdmin.getConf(), bookieAddrs); + } + return true; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/client/DeleteLedgerCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/client/DeleteLedgerCommand.java new file mode 100644 index 00000000000..f53b6650d9f --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/client/DeleteLedgerCommand.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.client; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import java.io.IOException; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeper; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.apache.bookkeeper.util.IOUtils; +import org.apache.bookkeeper.util.LedgerIdFormatter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Command to delete a given ledger. + */ +public class DeleteLedgerCommand extends BookieCommand { + + private static final String NAME = "delete"; + private static final String DESC = "Delete a ledger."; + private static final String DEFAULT = ""; + private static final Logger LOG = LoggerFactory.getLogger(DeleteLedgerCommand.class); + + private LedgerIdFormatter ledgerIdFormatter; + + public DeleteLedgerCommand() { + this(new DeleteLedgerFlags()); + } + + public DeleteLedgerCommand(LedgerIdFormatter ledgerIdFormatter) { + this(new DeleteLedgerFlags()); + this.ledgerIdFormatter = ledgerIdFormatter; + } + + private DeleteLedgerCommand(DeleteLedgerFlags flags) { + super(CliSpec.newBuilder() + .withName(NAME) + .withDescription(DESC) + .withFlags(flags) + .build()); + } + + /** + * Flags for delete ledger command. + */ + @Accessors(fluent = true) + @Setter + public static class DeleteLedgerFlags extends CliFlags { + + @Parameter(names = { "-l", "--ledgerid" }, description = "Ledger ID", required = true) + private long ledgerId; + + @Parameter(names = { "-f", "--force" }, description = "Whether to force delete the Ledger without prompt..?") + private boolean force; + + @Parameter(names = { "-lf", "--ledgeridformatter" }, description = "Set ledger id formatter") + private String ledgerIdFormatter = DEFAULT; + + } + + @Override + public boolean apply(ServerConfiguration conf, DeleteLedgerFlags cmdFlags) { + initLedgerIdFormatter(conf, cmdFlags); + try { + return deleteLedger(conf, cmdFlags); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + private void initLedgerIdFormatter(ServerConfiguration conf, DeleteLedgerFlags flags) { + if (null == ledgerIdFormatter && !flags.ledgerIdFormatter.equals(DEFAULT)) { + this.ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(flags.ledgerIdFormatter, conf); + } else if (null == ledgerIdFormatter && flags.ledgerIdFormatter.equals(DEFAULT)) { + this.ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(conf); + } + } + + private boolean deleteLedger(ServerConfiguration conf, DeleteLedgerFlags flags) + throws IOException, BKException, InterruptedException { + + if (flags.ledgerId < 0) { + LOG.error("Ledger id error."); + return false; + } + + boolean confirm = false; + if (!flags.force) { + confirm = IOUtils.confirmPrompt( + "Are your sure to delete Ledger : " + ledgerIdFormatter.formatLedgerId(flags.ledgerId) + "?"); + } + + BookKeeper bookKeeper = null; + try { + if (flags.force || confirm) { + ClientConfiguration configuration = new ClientConfiguration(); + configuration.addConfiguration(conf); + bookKeeper = new BookKeeper(configuration); + bookKeeper.deleteLedger(flags.ledgerId); + } + } finally { + if (bookKeeper != null) { + bookKeeper.close(); + } + } + + return true; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/client/LedgerMetaDataCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/client/LedgerMetaDataCommand.java new file mode 100644 index 00000000000..430c0dde21d --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/client/LedgerMetaDataCommand.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.client; + +import static org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithLedgerManagerFactory; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import java.nio.file.FileSystems; +import java.nio.file.Files; +import java.util.Optional; +import java.util.concurrent.ExecutionException; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.client.BKException.BKLedgerExistException; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.LedgerMetadataSerDe; +import org.apache.bookkeeper.meta.exceptions.MetadataException; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.apache.bookkeeper.util.LedgerIdFormatter; +import org.apache.bookkeeper.versioning.LongVersion; +import org.apache.bookkeeper.versioning.Versioned; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Print the metadata for a ledger. + */ +@SuppressFBWarnings("RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE") +public class LedgerMetaDataCommand extends BookieCommand { + + private static final String NAME = "show"; + private static final String DESC = "Print the metadata for a ledger, or optionally dump to a file."; + private static final String DEFAULT = ""; + private static final long DEFAULT_ID = -1L; + private static final Logger LOG = LoggerFactory.getLogger(LedgerMetaDataCommand.class); + + private LedgerMetadataSerDe serDe = new LedgerMetadataSerDe(); + private LedgerIdFormatter ledgerIdFormatter; + + public LedgerMetaDataCommand() { + this(new LedgerMetadataFlag()); + } + + public LedgerMetaDataCommand(LedgerIdFormatter ledgerIdFormatter) { + this(); + this.ledgerIdFormatter = ledgerIdFormatter; + } + + public LedgerMetaDataCommand(LedgerMetadataFlag flag) { + super(CliSpec.newBuilder() + .withName(NAME) + .withDescription(DESC) + .withFlags(flag) + .build()); + } + + /** + * Flags for ledger metadata command. + */ + @Accessors(fluent = true) + @Setter + public static class LedgerMetadataFlag extends CliFlags { + + @Parameter(names = { "-l", "--ledgerid" }, description = "Ledger ID", required = true) + private long ledgerId = DEFAULT_ID; + + @Parameter(names = { "-d", "--dumptofile" }, description = "Dump metadata for ledger, to a file") + private String dumpToFile = DEFAULT; + + @Parameter(names = { "-r", "--restorefromfile" }, description = "Restore metadata for ledger, from a file") + private String restoreFromFile = DEFAULT; + + @Parameter(names = {"-lf", "--ledgeridformatter"}, description = "Set ledger id formatter") + private String ledgerIdFormatter = DEFAULT; + + @Parameter(names = { "-u", + "--update" }, description = "Update metadata if already exist while restoring metadata") + private boolean update = false; + } + + @Override + public boolean apply(ServerConfiguration conf, LedgerMetadataFlag cmdFlags) { + if (!cmdFlags.ledgerIdFormatter.equals(DEFAULT) && ledgerIdFormatter == null) { + this.ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(cmdFlags.ledgerIdFormatter, conf); + } else if (ledgerIdFormatter == null) { + this.ledgerIdFormatter = LedgerIdFormatter.newLedgerIdFormatter(conf); + } + try { + return handler(conf, cmdFlags); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + private boolean handler(ServerConfiguration conf, LedgerMetadataFlag flag) + throws MetadataException, ExecutionException { + if (flag.ledgerId == DEFAULT_ID) { + LOG.error("Must specific a ledger id"); + return false; + } + runFunctionWithLedgerManagerFactory(conf, mFactory -> { + try (LedgerManager m = mFactory.newLedgerManager()) { + if (!flag.dumpToFile.equals(DEFAULT)) { + Versioned md = m.readLedgerMetadata(flag.ledgerId).join(); + Files.write(FileSystems.getDefault().getPath(flag.dumpToFile), + serDe.serialize(md.getValue())); + } else if (!flag.restoreFromFile.equals(DEFAULT)) { + byte[] serialized = Files.readAllBytes( + FileSystems.getDefault().getPath(flag.restoreFromFile)); + LedgerMetadata md = serDe.parseConfig(serialized, flag.ledgerId, Optional.empty()); + try { + m.createLedgerMetadata(flag.ledgerId, md).join(); + } catch (Exception be) { + if (!flag.update || !(be.getCause() instanceof BKLedgerExistException)) { + throw be; + } + m.writeLedgerMetadata(flag.ledgerId, md, new LongVersion(-1L)).join(); + LOG.info("successfully updated ledger metadata {}", flag.ledgerId); + } + } else { + printLedgerMetadata(flag.ledgerId, m.readLedgerMetadata(flag.ledgerId).get().getValue(), true); + } + } catch (Exception e) { + throw new UncheckedExecutionException(e); + } + return null; + }); + return true; + } + + private void printLedgerMetadata(long ledgerId, LedgerMetadata md, boolean printMeta) { + LOG.info("ledgerID: {}", ledgerIdFormatter.formatLedgerId(ledgerId)); + if (printMeta) { + LOG.info("{}", md.toString()); + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/client/SimpleTestCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/client/SimpleTestCommand.java index ea20a662779..3007c3b7d77 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/client/SimpleTestCommand.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/client/SimpleTestCommand.java @@ -21,16 +21,27 @@ import static org.apache.bookkeeper.common.concurrent.FutureUtils.result; import com.beust.jcommander.Parameter; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableMap; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Random; import java.util.concurrent.TimeUnit; import lombok.Setter; import lombok.experimental.Accessors; import org.apache.bookkeeper.client.api.BookKeeper; import org.apache.bookkeeper.client.api.DigestType; +import org.apache.bookkeeper.client.api.LedgerEntries; +import org.apache.bookkeeper.client.api.LedgerEntry; +import org.apache.bookkeeper.client.api.ReadHandle; import org.apache.bookkeeper.client.api.WriteHandle; import org.apache.bookkeeper.tools.cli.commands.client.SimpleTestCommand.Flags; import org.apache.bookkeeper.tools.cli.helpers.ClientCommand; import org.apache.bookkeeper.tools.framework.CliFlags; import org.apache.bookkeeper.tools.framework.CliSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * A client command that simply tests if a cluster is healthy. @@ -38,7 +49,8 @@ public class SimpleTestCommand extends ClientCommand { private static final String NAME = "simpletest"; - private static final String DESC = "Simple test to create a ledger and write entries to it."; + private static final String DESC = "Simple test to create a ledger and write entries to it, then read it."; + private static final Logger LOG = LoggerFactory.getLogger(SimpleTestCommand.class); /** * Flags for simple test command. @@ -56,9 +68,14 @@ public static class Flags extends CliFlags { @Parameter(names = { "-n", "--num-entries" }, description = "Entries to write (default 100)") private int numEntries = 100; + @VisibleForTesting + public static Flags newFlags(){ + return new Flags(); + } + } public SimpleTestCommand() { - this(new Flags()); + this(Flags.newFlags()); } public SimpleTestCommand(Flags flags) { @@ -69,29 +86,84 @@ public SimpleTestCommand(Flags flags) { .build()); } + @VisibleForTesting + public static SimpleTestCommand newSimpleTestCommand(Flags flags) { + return new SimpleTestCommand(flags); + } + @Override + @SuppressFBWarnings({"RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE", "DMI_RANDOM_USED_ONLY_ONCE"}) protected void run(BookKeeper bk, Flags flags) throws Exception { byte[] data = new byte[100]; // test data - + Random random = new Random(0); + for (int i = 0; i < data.length; i++) { + data[i] = (byte) (random.nextInt(26) + 65); + } + long ledgerId = -1L; + long lastEntryId = -1L; try (WriteHandle wh = result(bk.newCreateLedgerOp() - .withEnsembleSize(flags.ensembleSize) - .withWriteQuorumSize(flags.writeQuorumSize) - .withAckQuorumSize(flags.ackQuorumSize) - .withDigestType(DigestType.CRC32C) - .withPassword(new byte[0]) - .execute())) { - - System.out.println("Ledger ID: " + wh.getId()); + .withEnsembleSize(flags.ensembleSize) + .withWriteQuorumSize(flags.writeQuorumSize) + .withAckQuorumSize(flags.ackQuorumSize) + .withDigestType(DigestType.CRC32C) + .withCustomMetadata(ImmutableMap.of("Bookie", NAME.getBytes(StandardCharsets.UTF_8))) + .withPassword(new byte[0]) + .execute())) { + ledgerId = wh.getId(); + LOG.info("Ledger ID: {}", ledgerId); long lastReport = System.nanoTime(); for (int i = 0; i < flags.numEntries; i++) { wh.append(data); if (TimeUnit.SECONDS.convert(System.nanoTime() - lastReport, TimeUnit.NANOSECONDS) > 1) { - System.out.println(i + " entries written"); + LOG.info("{} entries written", i); lastReport = System.nanoTime(); } } - System.out.println(flags.numEntries + " entries written to ledger " + wh.getId()); + lastEntryId = wh.getLastAddPushed(); + LOG.info("{} entries written to ledger {}. Last entry Id {}", flags.numEntries, ledgerId, lastEntryId); + if (lastEntryId != flags.numEntries - 1) { + throw new IllegalStateException("Last entry id doesn't match the expected value"); + } + // check that all entries are readable + readEntries(bk, ledgerId, lastEntryId, flags.numEntries, true, data); + } + if (ledgerId != -1) { + try { + if (lastEntryId != -1) { + // check that all entries are readable and confirmed + readEntries(bk, ledgerId, lastEntryId, flags.numEntries, false, data); + } else { + throw new IllegalStateException("Last entry id is not set"); + } + } finally { + // delete the ledger + result(bk.newDeleteLedgerOp().withLedgerId(ledgerId).execute()); + } + } else { + throw new IllegalStateException("Ledger id is not set"); + } + } + + private static void readEntries(BookKeeper bk, long ledgerId, long lastEntryId, int expectedNumberOfEntries, + boolean readUnconfirmed, byte[] data) throws Exception { + int entriesRead = 0; + try (ReadHandle rh = result(bk.newOpenLedgerOp().withLedgerId(ledgerId).withDigestType(DigestType.CRC32C) + .withPassword(new byte[0]).execute())) { + try (LedgerEntries ledgerEntries = readUnconfirmed ? rh.readUnconfirmed(0, lastEntryId) : + rh.read(0, lastEntryId)) { + for (LedgerEntry ledgerEntry : ledgerEntries) { + if (!Arrays.equals(ledgerEntry.getEntryBytes(), data)) { + throw new IllegalStateException("Read data doesn't match the data written."); + } + entriesRead++; + } + } + } + if (entriesRead != expectedNumberOfEntries) { + throw new IllegalStateException( + String.format("Number of entries read (%d) doesn't match the expected value (%d).", + entriesRead, expectedNumberOfEntries)); } } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/cookie/AdminCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/cookie/AdminCommand.java new file mode 100644 index 00000000000..7a755ac4806 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/cookie/AdminCommand.java @@ -0,0 +1,331 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.cookie; + +import static org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithMetadataBookieDriver; +import static org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithRegistrationManager; + +import com.beust.jcommander.Parameter; +import com.google.common.collect.Lists; +import com.google.common.util.concurrent.UncheckedExecutionException; +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.Cookie; +import org.apache.bookkeeper.bookie.LegacyCookieValidation; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.apache.bookkeeper.util.BookKeeperConstants; +import org.apache.bookkeeper.util.IOUtils; +import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; +import org.apache.commons.lang3.ArrayUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Command to update cookie. + */ +public class AdminCommand extends BookieCommand { + + static final Logger LOG = LoggerFactory.getLogger(AdminCommand.class); + + private static final String NAME = "admin"; + private static final String DESC = "Command to update cookie"; + + private File[] journalDirectories; + private File[] ledgerDirectories; + private File[] indexDirectories; + + public AdminCommand() { + this(new AdminFlags()); + } + + private AdminCommand(AdminFlags flags) { + super(CliSpec.newBuilder().withName(NAME).withDescription(DESC).withFlags(flags).build()); + } + + /** + * Flags for admin command. + */ + @Accessors(fluent = true) + @Setter + public static class AdminFlags extends CliFlags { + + @Parameter(names = { "-host", + "--hostname" }, description = "Expects config useHostNameAsBookieID=true as the option value") + private boolean hostname; + + @Parameter(names = { "-p", "-ip" }, + description = "Expects config useHostNameAsBookieID=false as the option value") + private boolean ip; + + @Parameter(names = { "-e", "--expandstorage" }, description = "Add new empty ledger/index directories") + private boolean expandstorage; + + @Parameter(names = { "-l", "--list" }, description = "List paths of all the cookies present locally and on " + + "zooKeeper") + private boolean list; + + @Parameter(names = { "-d", "--delete" }, description = "Delete cookie both locally and in zooKeeper") + private boolean delete; + + @Parameter(names = {"-f", "--force"}, description = "Force delete cookie") + private boolean force; + + } + + @Override + public boolean apply(ServerConfiguration conf, AdminFlags cmdFlags) { + initDirectory(conf); + try { + return update(conf, cmdFlags); + } catch (Exception e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + private void initDirectory(ServerConfiguration bkConf) { + this.journalDirectories = BookieImpl.getCurrentDirectories(bkConf.getJournalDirs()); + this.ledgerDirectories = BookieImpl.getCurrentDirectories(bkConf.getLedgerDirs()); + if (null == bkConf.getIndexDirs()) { + this.indexDirectories = this.ledgerDirectories; + } else { + this.indexDirectories = BookieImpl.getCurrentDirectories(bkConf.getIndexDirs()); + } + } + + private boolean update(ServerConfiguration conf, AdminFlags flags) throws Exception { + boolean useHostName = flags.hostname; + if (flags.hostname || flags.ip) { + if (!conf.getUseHostNameAsBookieID() && useHostName) { + LOG.error("Expects configuration useHostNameAsBookieID=true as the option value"); + return false; + } else if (conf.getUseHostNameAsBookieID() && !useHostName) { + LOG.error("Expects configuration useHostNameAsBookieID=false as the option value"); + return false; + } + return updateBookieIdInCookie(conf, flags.hostname); + } else if (flags.expandstorage) { + conf.setAllowStorageExpansion(true); + return expandStorage(conf); + } else if (flags.list) { + return listOrDeleteCookies(conf, false, false); + } else if (flags.delete) { + return listOrDeleteCookies(conf, true, flags.force); + } else { + LOG.error("Invalid command !"); + usage(); + return false; + } + } + + private boolean updateBookieIdInCookie(ServerConfiguration bkConf, final boolean useHostname) + throws Exception { + return runFunctionWithRegistrationManager(bkConf, rm -> { + try { + ServerConfiguration conf = new ServerConfiguration(bkConf); + String newBookieId = BookieImpl.getBookieId(conf).toString(); + // read oldcookie + Versioned oldCookie = null; + try { + conf.setUseHostNameAsBookieID(!useHostname); + oldCookie = Cookie.readFromRegistrationManager(rm, conf); + } catch (BookieException.CookieNotFoundException nne) { + LOG.error("Either cookie already updated with UseHostNameAsBookieID={} or no cookie exists!", + useHostname, nne); + return false; + } + Cookie newCookie = Cookie.newBuilder(oldCookie.getValue()).setBookieId(newBookieId).build(); + + boolean hasCookieUpdatedInDirs = verifyCookie(newCookie, journalDirectories[0]); + for (File dir : ledgerDirectories) { + hasCookieUpdatedInDirs &= verifyCookie(newCookie, dir); + } + if (indexDirectories != ledgerDirectories) { + for (File dir : indexDirectories) { + hasCookieUpdatedInDirs &= verifyCookie(newCookie, dir); + } + } + + if (hasCookieUpdatedInDirs) { + try { + conf.setUseHostNameAsBookieID(useHostname); + Cookie.readFromRegistrationManager(rm, conf); + // since newcookie exists, just do cleanup of oldcookie and return + conf.setUseHostNameAsBookieID(!useHostname); + oldCookie.getValue().deleteFromRegistrationManager(rm, conf, oldCookie.getVersion()); + return true; + } catch (BookieException.CookieNotFoundException nne) { + if (LOG.isDebugEnabled()) { + LOG.debug("Ignoring, cookie will be written to zookeeper"); + } + } + } else { + // writes newcookie to local dirs + for (File journalDirectory : journalDirectories) { + newCookie.writeToDirectory(journalDirectory); + LOG.info("Updated cookie file present in journalDirectory {}", journalDirectory); + } + for (File dir : ledgerDirectories) { + newCookie.writeToDirectory(dir); + } + LOG.info("Updated cookie file present in ledgerDirectories {}", (Object) ledgerDirectories); + if (ledgerDirectories != indexDirectories) { + for (File dir : indexDirectories) { + newCookie.writeToDirectory(dir); + } + LOG.info("Updated cookie file present in indexDirectories {}", (Object) indexDirectories); + } + } + // writes newcookie to zookeeper + conf.setUseHostNameAsBookieID(useHostname); + newCookie.writeToRegistrationManager(rm, conf, Version.NEW); + + // delete oldcookie + conf.setUseHostNameAsBookieID(!useHostname); + oldCookie.getValue().deleteFromRegistrationManager(rm, conf, oldCookie.getVersion()); + return true; + } catch (IOException | BookieException ioe) { + LOG.error("IOException during cookie updation!", ioe); + return false; + } + }); + } + + private boolean verifyCookie(Cookie oldCookie, File dir) throws IOException { + try { + Cookie cookie = Cookie.readFromDirectory(dir); + cookie.verify(oldCookie); + } catch (BookieException.InvalidCookieException e) { + return false; + } + return true; + } + + private boolean expandStorage(ServerConfiguration bkConf) throws Exception { + return runFunctionWithMetadataBookieDriver(bkConf, driver -> { + List allLedgerDirs = Lists.newArrayList(); + allLedgerDirs.addAll(Arrays.asList(ledgerDirectories)); + if (indexDirectories != ledgerDirectories) { + allLedgerDirs.addAll(Arrays.asList(indexDirectories)); + } + + try (RegistrationManager registrationManager = driver.createRegistrationManager()) { + LegacyCookieValidation validation = new LegacyCookieValidation(bkConf, registrationManager); + List dirs = Lists.newArrayList(); + dirs.addAll(Arrays.asList(journalDirectories)); + dirs.addAll(allLedgerDirs); + validation.checkCookies(dirs); + return true; + } catch (BookieException e) { + LOG.error("Exception while updating cookie for storage expansion", e); + return false; + } + }); + } + + private boolean listOrDeleteCookies(ServerConfiguration bkConf, boolean delete, boolean force) throws Exception { + BookieId bookieAddress = BookieImpl.getBookieId(bkConf); + File[] journalDirs = bkConf.getJournalDirs(); + File[] ledgerDirs = bkConf.getLedgerDirs(); + File[] indexDirs = bkConf.getIndexDirs(); + File[] allDirs = ArrayUtils.addAll(journalDirs, ledgerDirs); + if (indexDirs != null) { + allDirs = ArrayUtils.addAll(allDirs, indexDirs); + } + + File[] allCurDirs = BookieImpl.getCurrentDirectories(allDirs); + List allVersionFiles = new LinkedList(); + File versionFile; + for (File curDir : allCurDirs) { + versionFile = new File(curDir, BookKeeperConstants.VERSION_FILENAME); + if (versionFile.exists()) { + allVersionFiles.add(versionFile); + } + } + + if (!allVersionFiles.isEmpty()) { + if (delete) { + boolean confirm = force; + if (!confirm) { + confirm = IOUtils.confirmPrompt("Are you sure you want to delete Cookies locally?"); + } + if (confirm) { + for (File verFile : allVersionFiles) { + if (!verFile.delete()) { + LOG.error("Failed to delete Local cookie file {}. So aborting deletecookie of Bookie: {}", + verFile, bookieAddress); + return false; + } + } + LOG.info("Deleted Local Cookies of Bookie: {}", bookieAddress); + } else { + LOG.info("Skipping deleting local Cookies of Bookie: {}", bookieAddress); + } + } else { + LOG.info("Listing local Cookie Files of Bookie: {}", bookieAddress); + for (File verFile : allVersionFiles) { + LOG.info(verFile.getCanonicalPath()); + } + } + } else { + LOG.info("No local cookies for Bookie: {}", bookieAddress); + } + + return runFunctionWithRegistrationManager(bkConf, rm -> { + try { + Versioned cookie = null; + try { + cookie = Cookie.readFromRegistrationManager(rm, bookieAddress); + } catch (BookieException.CookieNotFoundException nne) { + LOG.info("No cookie for {} in metadata store", bookieAddress); + return true; + } + + if (delete) { + boolean confirm = force; + if (!confirm) { + confirm = IOUtils.confirmPrompt("Are you sure you want to delete Cookies from metadata store?"); + } + + if (confirm) { + cookie.getValue().deleteFromRegistrationManager(rm, bkConf, cookie.getVersion()); + LOG.info("Deleted Cookie from metadata store for Bookie: {}", bookieAddress); + } else { + LOG.info("Skipping deleting cookie from metadata store for Bookie: {}", bookieAddress); + } + } + } catch (BookieException | IOException e) { + return false; + } + return true; + }); + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/cookie/CookieCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/cookie/CookieCommand.java new file mode 100644 index 00000000000..d08f1f1eeff --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/cookie/CookieCommand.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.tools.cli.commands.cookie; + +import static com.google.common.base.Preconditions.checkArgument; + +import com.google.common.util.concurrent.UncheckedExecutionException; +import java.io.IOException; +import java.net.UnknownHostException; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.Paths; +import java.util.concurrent.ExecutionException; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.common.net.ServiceURI; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.meta.MetadataDrivers; +import org.apache.bookkeeper.meta.exceptions.MetadataException; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.tools.cli.helpers.BookieShellCommand; +import org.apache.bookkeeper.tools.common.BKCommand; +import org.apache.bookkeeper.tools.common.BKFlags; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.apache.commons.configuration.CompositeConfiguration; + +/** + * This is a mixin for cookie related commands to extends. + */ +@Slf4j +abstract class CookieCommand + extends BKCommand { + + protected CookieCommand(CliSpec spec) { + super(spec); + } + + @Override + protected boolean apply(ServiceURI serviceURI, + CompositeConfiguration conf, + BKFlags globalFlags, + CookieFlagsT cmdFlags) { + ServerConfiguration serverConf = new ServerConfiguration(); + serverConf.loadConf(conf); + + if (null != serviceURI) { + serverConf.setMetadataServiceUri(serviceURI.getUri().toString()); + } + + try { + return MetadataDrivers.runFunctionWithRegistrationManager(serverConf, registrationManager -> { + try { + apply(registrationManager, cmdFlags); + return true; + } catch (Exception e) { + throw new UncheckedExecutionException(e); + } + }); + } catch (MetadataException | ExecutionException | UncheckedExecutionException e) { + Throwable cause = e; + if (!(e instanceof MetadataException) && null != e.getCause()) { + cause = e.getCause(); + } + spec.console().println("Failed to process cookie command '" + name() + "'"); + cause.printStackTrace(spec.console()); + return false; + } + } + + protected BookieId getBookieId(CookieFlagsT cmdFlags) throws UnknownHostException { + checkArgument( + cmdFlags.arguments.size() == 1, + "No bookie id or more bookie ids is specified"); + + String bookieId = cmdFlags.arguments.get(0); + try { + new BookieSocketAddress(bookieId); + } catch (UnknownHostException nhe) { + spec.console() + .println("Invalid bookie id '" + + bookieId + "'is used to create cookie." + + " Bookie id should be in the format of ':'"); + throw nhe; + } + return BookieId.parse(bookieId); + } + + protected byte[] readCookieDataFromFile(String cookieFile) throws IOException { + try { + return Files.readAllBytes(Paths.get(cookieFile)); + } catch (NoSuchFileException nfe) { + spec.console() + .println("Cookie file '" + cookieFile + "' doesn't exist."); + throw nfe; + } + } + + + protected abstract void apply(RegistrationManager rm, CookieFlagsT cmdFlags) + throws Exception; + + public org.apache.bookkeeper.bookie.BookieShell.Command asShellCommand(String shellCmdName, + CompositeConfiguration conf) { + return new BookieShellCommand<>(shellCmdName, this, conf); + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/cookie/CreateCookieCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/cookie/CreateCookieCommand.java new file mode 100644 index 00000000000..c2e63594d19 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/cookie/CreateCookieCommand.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.tools.cli.commands.cookie; + +import com.beust.jcommander.Parameter; +import java.io.PrintStream; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.BookieException.CookieExistException; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.tools.cli.commands.cookie.CreateCookieCommand.Flags; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; + +/** + * A command that create cookie. + */ +@Slf4j +public class CreateCookieCommand extends CookieCommand { + + private static final String NAME = "create"; + private static final String DESC = "Create a cookie for a given bookie"; + private static final String USAGE = "cookie_create Create a cookie for a given bookie\n" + + " Usage: cookie_create [options]\n" + + " Options:\n" + + " * -cf, --cookie-file\n" + + " The file to be uploaded as cookie (param format: `cookieFilePath`)"; + + /** + * Flags to create a cookie for a given bookie. + */ + @Accessors(fluent = true) + @Setter + public static class Flags extends CliFlags { + + @Parameter( + names = { "-cf", "--cookie-file" }, + description = "The file to be uploaded as cookie", + required = true) + private String cookieFile; + + } + + public CreateCookieCommand() { + this(new Flags()); + } + + protected CreateCookieCommand(PrintStream console) { + this(new Flags(), console); + } + + public CreateCookieCommand(Flags flags) { + this(flags, System.out); + } + + private CreateCookieCommand(Flags flags, PrintStream console) { + super(CliSpec.newBuilder() + .withName(NAME) + .withUsage(USAGE) + .withDescription(DESC) + .withFlags(flags) + .withConsole(console) + .withArgumentsUsage("") + .build()); + } + + @Override + protected void apply(RegistrationManager rm, Flags cmdFlags) throws Exception { + BookieId bookieId = getBookieId(cmdFlags); + + byte[] data = readCookieDataFromFile(cmdFlags.cookieFile); + Versioned cookie = new Versioned<>(data, Version.NEW); + try { + rm.writeCookie(bookieId, cookie); + } catch (CookieExistException cee) { + spec.console() + .println("Cookie already exist for bookie '" + bookieId + "'"); + throw cee; + } catch (BookieException be) { + spec.console() + .println("Exception on creating cookie for bookie '" + bookieId + "'"); + be.printStackTrace(spec.console()); + throw be; + } + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/cookie/DeleteCookieCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/cookie/DeleteCookieCommand.java new file mode 100644 index 00000000000..d7ce7584e2c --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/cookie/DeleteCookieCommand.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.tools.cli.commands.cookie; + +import java.io.PrintStream; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.BookieException.CookieNotFoundException; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.tools.cli.commands.cookie.DeleteCookieCommand.Flags; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.apache.bookkeeper.versioning.LongVersion; + +/** + * A command that deletes cookie. + */ +@Slf4j +public class DeleteCookieCommand extends CookieCommand { + + private static final String NAME = "delete"; + private static final String DESC = "Delete a cookie for a given bookie"; + + private static final String USAGE = "cookie_delete Delete a cookie for a given bookie\n" + + " Usage: cookie_delete [options]\n" + + " Options:\n" + + " * \n" + + " The bookie-id to be deleted (param format: `address:port`)"; + + /** + * Flags to delete a cookie for a given bookie. + */ + @Accessors(fluent = true) + @Setter + public static class Flags extends CliFlags { + } + + public DeleteCookieCommand() { + this(new Flags()); + } + + DeleteCookieCommand(PrintStream console) { + this(new Flags(), console); + } + + public DeleteCookieCommand(Flags flags) { + this(flags, System.out); + } + + private DeleteCookieCommand(Flags flags, PrintStream console) { + super(CliSpec.newBuilder() + .withName(NAME) + .withUsage(USAGE) + .withDescription(DESC) + .withFlags(flags) + .withConsole(console) + .withArgumentsUsage("") + .build()); + } + + @Override + protected void apply(RegistrationManager rm, Flags cmdFlags) throws Exception { + BookieId bookieId = getBookieId(cmdFlags); + + try { + rm.removeCookie(bookieId, new LongVersion(-1)); + } catch (CookieNotFoundException cee) { + spec.console() + .println("Cookie not found for bookie '" + bookieId + "'"); + throw cee; + } catch (BookieException be) { + spec.console() + .println("Exception on deleting cookie for bookie '" + bookieId + "'"); + be.printStackTrace(spec.console()); + throw be; + } + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/cookie/GenerateCookieCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/cookie/GenerateCookieCommand.java new file mode 100644 index 00000000000..827078c0192 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/cookie/GenerateCookieCommand.java @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.tools.cli.commands.cookie; + +import com.beust.jcommander.Parameter; +import java.io.File; +import java.io.PrintStream; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.Cookie; +import org.apache.bookkeeper.bookie.Cookie.Builder; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.tools.cli.commands.cookie.GenerateCookieCommand.Flags; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.apache.commons.lang3.StringUtils; + +/** + * A command that generate cookie. + */ +@Slf4j +public class GenerateCookieCommand extends CookieCommand { + + private static final String NAME = "generate"; + private static final String DESC = "Generate a cookie for a given bookie"; + + private static final String USAGE = "cookie_generate Generate a cookie for a given bookie\n" + + " Usage: cookie_generate [options]\n" + + " Options:\n" + + " -i, --instance-id\n" + + " The instance id of the cluster that this bookie belongs to." + + " If omitted, it will used the instance id of the cluster that this cli connects to. \n" + + " * -j, --journal-dirs\n" + + " The journal directories used by this bookie " + + "(param format: `journalDir1,...,journalDirM`)\n" + + " * -l, --ledger-dirs\n" + + " The ledger directories used by this bookie (param format: `ledgerDir1,...,ledgerDirN`)\n" + + " * -o, --output-file\n" + + " The output file to save the generated cookie (param format: `cookieLocalFilePath`)\n" + + " -ix, --index-dirs\n" + + " The index directories used by this bookie (param format: `indexDir1,...,indexDirN`)"; + + /** + * Flags to generate a cookie for a given bookie. + */ + @Accessors(fluent = true) + @Setter + public static class Flags extends CliFlags { + + @Parameter( + names = { "-j", "--journal-dirs" }, + description = "The journal directories used by this bookie", + required = true) + private String journalDirs; + + @Parameter( + names = { "-l", "--ledger-dirs" }, + description = "The ledger directories used by this bookie", + required = true) + private String ledgerDirs; + + @Parameter( + names = { "-ix", "--index-dirs" }, + description = "The index directories used by this bookie") + private String indexDirs = null; + + @Parameter( + names = { "-i", "--instance-id" }, + description = "The instance id of the cluster that this bookie belongs to." + + " If omitted, it will used the instance id of the cluster that this cli connects to.") + private String instanceId = null; + + @Parameter( + names = { "-o", "--output-file" }, + description = "The output file to save the generated cookie.", + required = true) + private String outputFile; + + } + + public GenerateCookieCommand() { + this(new Flags()); + } + + GenerateCookieCommand(PrintStream console) { + this(new Flags(), console); + } + + public GenerateCookieCommand(Flags flags) { + this(flags, System.out); + } + + private GenerateCookieCommand(Flags flags, PrintStream console) { + super(CliSpec.newBuilder() + .withName(NAME) + .withUsage(USAGE) + .withDescription(DESC) + .withFlags(flags) + .withConsole(console) + .withArgumentsUsage("") + .build()); + } + + @Override + protected void apply(RegistrationManager rm, Flags cmdFlags) throws Exception { + BookieId bookieId = getBookieId(cmdFlags); + + String instanceId; + if (null == cmdFlags.instanceId) { + instanceId = rm.getClusterInstanceId(); + } else { + instanceId = cmdFlags.instanceId; + } + + Builder builder = Cookie.newBuilder(); + builder.setBookieId(bookieId.toString()); + if (StringUtils.isEmpty(instanceId)) { + builder.setInstanceId(null); + } else { + builder.setInstanceId(instanceId); + } + builder.setJournalDirs(cmdFlags.journalDirs); + builder.setLedgerDirs(Cookie.encodeDirPaths(cmdFlags.ledgerDirs.split(","))); + if (StringUtils.isNotBlank(cmdFlags.indexDirs)) { + builder.setIndexDirs(Cookie.encodeDirPaths(cmdFlags.indexDirs.split(","))); + } + Cookie cookie = builder.build(); + cookie.writeToFile(new File(cmdFlags.outputFile)); + spec.console().println("Successfully saved the generated cookie to " + cmdFlags.outputFile); + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/cookie/GetCookieCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/cookie/GetCookieCommand.java new file mode 100644 index 00000000000..372dcf4a94b --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/cookie/GetCookieCommand.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.tools.cli.commands.cookie; + +import java.io.PrintStream; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.BookieException.CookieNotFoundException; +import org.apache.bookkeeper.bookie.Cookie; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.tools.cli.commands.cookie.GetCookieCommand.Flags; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.apache.bookkeeper.versioning.Versioned; + +/** + * A command that deletes cookie. + */ +@Slf4j +public class GetCookieCommand extends CookieCommand { + + private static final String NAME = "get"; + private static final String DESC = "Retrieve a cookie for a given bookie"; + + private static final String USAGE = "cookie_get Retrieve a cookie for a given bookie\n" + + " Usage: cookie_get [options]\n" + + " Options:\n" + + " * \n" + + " The bookie-id to get (param format: `address:port`)"; + + /** + * Flags to delete a cookie for a given bookie. + */ + @Accessors(fluent = true) + @Setter + public static class Flags extends CliFlags { + } + + public GetCookieCommand() { + this(new Flags()); + } + + GetCookieCommand(PrintStream console) { + this(new Flags(), console); + } + + public GetCookieCommand(Flags flags) { + this(flags, System.out); + } + + private GetCookieCommand(Flags flags, PrintStream console) { + super(CliSpec.newBuilder() + .withName(NAME) + .withUsage(USAGE) + .withDescription(DESC) + .withFlags(flags) + .withConsole(console) + .withArgumentsUsage("") + .build()); + } + + @Override + protected void apply(RegistrationManager rm, Flags cmdFlags) throws Exception { + BookieId bookieId = getBookieId(cmdFlags); + + try { + Versioned cookie = Cookie.readFromRegistrationManager(rm, bookieId); + spec.console().println("Cookie for bookie '" + bookieId + "' is:"); + spec.console().println("---"); + spec.console().println( + cookie.getValue() + ); + spec.console().println("---"); + } catch (CookieNotFoundException cee) { + spec.console() + .println("Cookie not found for bookie '" + bookieId + "'"); + throw cee; + } catch (BookieException be) { + spec.console() + .println("Exception on getting cookie for bookie '" + bookieId + "'"); + be.printStackTrace(spec.console()); + throw be; + } + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/cookie/UpdateCookieCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/cookie/UpdateCookieCommand.java new file mode 100644 index 00000000000..f1fe3db455a --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/cookie/UpdateCookieCommand.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.tools.cli.commands.cookie; + +import com.beust.jcommander.Parameter; +import java.io.PrintStream; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.BookieException.CookieNotFoundException; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.tools.cli.commands.cookie.UpdateCookieCommand.Flags; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.apache.bookkeeper.versioning.LongVersion; +import org.apache.bookkeeper.versioning.Versioned; + +/** + * A command that updates cookie. + */ +@Slf4j +public class UpdateCookieCommand extends CookieCommand { + + private static final String NAME = "update"; + private static final String DESC = "Update a cookie for a given bookie"; + + private static final String USAGE = "cookie_update Update a cookie for a given bookie\n" + + " Usage: cookie_update [options]\n" + + " Options:\n" + + " * -cf, --cookie-file\n" + + " The file to be uploaded as cookie (param format: `cookieFilePath`)\n" + + " * \n" + + " Bookie ID (param format: `address:port`)"; + + /** + * Flags to create a cookie for a given bookie. + */ + @Accessors(fluent = true) + @Setter + public static class Flags extends CliFlags { + + @Parameter( + names = { "-cf", "--cookie-file" }, + description = "The file to be uploaded as cookie", + required = true) + private String cookieFile; + + } + + public UpdateCookieCommand() { + this(new Flags()); + } + + UpdateCookieCommand(PrintStream console) { + this(new Flags(), console); + } + + public UpdateCookieCommand(Flags flags) { + this(flags, System.out); + } + + private UpdateCookieCommand(Flags flags, PrintStream console) { + super(CliSpec.newBuilder() + .withName(NAME) + .withUsage(USAGE) + .withDescription(DESC) + .withFlags(flags) + .withConsole(console) + .withArgumentsUsage("") + .build()); + } + + @Override + protected void apply(RegistrationManager rm, Flags cmdFlags) throws Exception { + BookieId bookieId = getBookieId(cmdFlags); + + byte[] data = readCookieDataFromFile(cmdFlags.cookieFile); + Versioned cookie = new Versioned<>(data, new LongVersion(-1L)); + try { + rm.writeCookie(bookieId, cookie); + } catch (CookieNotFoundException cnfe) { + spec.console() + .println("Cookie not found for bookie '" + bookieId + "' to update"); + throw cnfe; + } catch (BookieException be) { + spec.console() + .println("Exception on updating cookie for bookie '" + bookieId + "'"); + be.printStackTrace(spec.console()); + throw be; + } + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/cookie/package-info.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/cookie/package-info.java new file mode 100644 index 00000000000..248c49ad7a0 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/cookie/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * Cookie related cli commands. + */ +package org.apache.bookkeeper.tools.cli.commands.cookie; \ No newline at end of file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/health/SwitchOfHealthCheckCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/health/SwitchOfHealthCheckCommand.java new file mode 100644 index 00000000000..fd7142590b7 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/health/SwitchOfHealthCheckCommand.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.tools.cli.commands.health; + +import com.beust.jcommander.Parameter; +import com.google.common.util.concurrent.UncheckedExecutionException; +import java.util.concurrent.ExecutionException; +import lombok.Setter; +import lombok.experimental.Accessors; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.meta.MetadataDrivers; +import org.apache.bookkeeper.meta.exceptions.MetadataException; +import org.apache.bookkeeper.tools.cli.helpers.BookieCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.bookkeeper.tools.framework.CliSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + + +/** + * Command to enable or disable health check in the cluster. + */ +public class SwitchOfHealthCheckCommand extends BookieCommand { + + static final Logger LOG = LoggerFactory.getLogger(SwitchOfHealthCheckCommand.class); + + private static final String NAME = "switch"; + private static final String DESC = "Enables or disables health check in the cluster. Default is enabled."; + + public SwitchOfHealthCheckCommand() { + this(new HealthCheckFlags()); + } + + private SwitchOfHealthCheckCommand(HealthCheckFlags flags) { + super(CliSpec.newBuilder() + .withName(NAME).withDescription(DESC) + .withFlags(flags).build()); + } + + /** + * Flags for health check command. + */ + @Accessors(fluent = true) + @Setter + public static class HealthCheckFlags extends CliFlags { + + @Parameter(names = { "-e", "--enable" }, description = "Enable or disable health check.") + private boolean enable; + + @Parameter(names = {"-s", "--status"}, description = "Check the health check status.") + private boolean status; + + } + + @Override + public boolean apply(ServerConfiguration conf, HealthCheckFlags cmdFlags) { + try { + return handler(conf, cmdFlags); + } catch (MetadataException | ExecutionException e) { + throw new UncheckedExecutionException(e.getMessage(), e); + } + } + + private boolean handler(ServerConfiguration conf, HealthCheckFlags flags) + throws MetadataException, ExecutionException { + + MetadataDrivers.runFunctionWithMetadataBookieDriver(conf, driver -> { + try { + boolean isEnable = driver.isHealthCheckEnabled().get(); + + if (flags.status) { + LOG.info("EnableHealthCheck is " + (isEnable ? "enabled." : "disabled.")); + return null; + } + + if (flags.enable) { + if (isEnable) { + LOG.warn("HealthCheck already enabled. Doing nothing"); + } else { + LOG.info("Enable HealthCheck"); + driver.enableHealthCheck().get(); + } + } else { + if (!isEnable) { + LOG.warn("HealthCheck already disabled. Doing nothing"); + } else { + LOG.info("Disable HealthCheck"); + driver.disableHealthCheck().get(); + } + } + } catch (Exception e) { + LOG.error("exception", e); + throw new UncheckedExecutionException(e); + } + return null; + }); + return true; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/health/package-info.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/health/package-info.java new file mode 100644 index 00000000000..1f6b68a602a --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/health/package-info.java @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.tools.cli.commands.health; \ No newline at end of file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/helpers/BookieShellCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/helpers/BookieShellCommand.java new file mode 100644 index 00000000000..8c2068b6516 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/helpers/BookieShellCommand.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.tools.cli.helpers; + +import org.apache.bookkeeper.bookie.BookieShell.Command; +import org.apache.bookkeeper.tools.common.BKCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.apache.commons.configuration.CompositeConfiguration; +import org.apache.commons.lang3.StringUtils; + +/** + * This is a util class that converts new cli command to old shell command. + */ +public class BookieShellCommand implements Command { + + protected final String shellCmdName; + protected final BKCommand bkCmd; + protected final CompositeConfiguration conf; + + public BookieShellCommand(String shellCmdName, + BKCommand bkCmd, + CompositeConfiguration conf) { + this.shellCmdName = shellCmdName; + this.bkCmd = bkCmd; + this.conf = conf; + } + + @Override + public int runCmd(String[] args) throws Exception { + return bkCmd.apply( + shellCmdName, + conf, + args + ); + } + + @Override + public String description() { + // format as org.apache.bookkeeper.bookie.BookieShell.MyCommand.description + return StringUtils.isBlank(bkCmd.getUsage()) ? shellCmdName + " [options]" : bkCmd.getUsage(); + } + + @Override + public void printUsage() { + bkCmd.usage(); + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/helpers/ClientCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/helpers/ClientCommand.java index ae807dfb657..45cb40e43a3 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/helpers/ClientCommand.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/helpers/ClientCommand.java @@ -66,7 +66,7 @@ protected boolean apply(ClientConfiguration conf, run(bk, cmdFlags); return true; } catch (Exception e) { - log.error("Faild to process command '{}'", name(), e); + log.error("Failed to process command '{}'", name(), e); return false; } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/helpers/CommandHelpers.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/helpers/CommandHelpers.java index 88ac52e4abf..46980e14325 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/helpers/CommandHelpers.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/helpers/CommandHelpers.java @@ -19,9 +19,13 @@ package org.apache.bookkeeper.tools.cli.helpers; import com.google.common.net.InetAddresses; +import java.net.InetAddress; import lombok.AccessLevel; import lombok.NoArgsConstructor; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.proto.BookieAddressResolver; + /** * Helper classes used by the cli commands. @@ -29,25 +33,43 @@ @NoArgsConstructor(access = AccessLevel.PRIVATE) public final class CommandHelpers { + private static final String UNKNOWN = "UNKNOWN"; /* * The string returned is of the form: - * 'hostname'('otherformofhostname'):'port number' - * - * where hostname and otherformofhostname are ipaddress and - * canonicalhostname or viceversa + * BookieID:bookieId, IP:ip, Port: port, Hostname: hostname + * When using hostname as bookie id, it's possible that the host is no longer valid and + * can't get a ip from the hostname, so using UNKNOWN to indicate ip is unknown for the hostname */ - public static String getBookieSocketAddrStringRepresentation(BookieSocketAddress bookieId) { - String hostname = bookieId.getHostName(); - boolean isHostNameIpAddress = InetAddresses.isInetAddress(hostname); - String otherFormOfHostname = null; - if (isHostNameIpAddress) { - otherFormOfHostname = bookieId.getSocketAddress().getAddress().getCanonicalHostName(); - } else { - otherFormOfHostname = bookieId.getSocketAddress().getAddress().getHostAddress(); + public static String getBookieSocketAddrStringRepresentation(BookieId bookieId, + BookieAddressResolver bookieAddressResolver) { + try { + BookieSocketAddress networkAddress = bookieAddressResolver.resolve(bookieId); + String hostname = networkAddress.getHostName(); + String realHostname; + String ip; + if (InetAddresses.isInetAddress(hostname)){ + ip = hostname; + realHostname = networkAddress.getSocketAddress().getAddress().getCanonicalHostName(); + } else { + InetAddress ia = networkAddress.getSocketAddress().getAddress(); + if (null != ia){ + ip = ia.getHostAddress(); + } else { + ip = UNKNOWN; + } + realHostname = hostname; + } + return formatBookieSocketAddress(bookieId, ip, networkAddress.getPort(), realHostname); + } catch (BookieAddressResolver.BookieIdNotResolvedException bookieNotAvailable) { + return formatBookieSocketAddress(bookieId, UNKNOWN, 0, UNKNOWN); } - String bookieSocketAddrStringRepresentation = hostname + "(" + otherFormOfHostname + ")" + ":" - + bookieId.getSocketAddress().getPort(); - return bookieSocketAddrStringRepresentation; + } + + /** + * Format {@link BookieSocketAddress}. + **/ + private static String formatBookieSocketAddress(BookieId bookieId, String ip, int port, String hostName) { + return String.format("BookieID:%s, IP:%s, Port:%d, Hostname:%s", bookieId.toString(), ip, port, hostName); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/helpers/DiscoveryCommand.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/helpers/DiscoveryCommand.java index 5af6af39211..f1a28496908 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/helpers/DiscoveryCommand.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/helpers/DiscoveryCommand.java @@ -18,6 +18,7 @@ */ package org.apache.bookkeeper.tools.cli.helpers; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.net.URI; import java.util.Optional; import java.util.concurrent.Executors; @@ -44,6 +45,7 @@ protected DiscoveryCommand(CliSpec spec) { } @Override + @SuppressFBWarnings("RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE") protected boolean apply(ClientConfiguration clientConf, DiscoveryFlagsT cmdFlags) { try { URI metadataServiceUri = URI.create(clientConf.getMetadataServiceUri()); @@ -54,7 +56,7 @@ protected boolean apply(ClientConfiguration clientConf, DiscoveryFlagsT cmdFlags executor, NullStatsLogger.INSTANCE, Optional.empty()); - run(driver.getRegistrationClient(), cmdFlags); + run(driver.getRegistrationClient(), cmdFlags, clientConf.getBookieAddressResolverEnabled()); return true; } } catch (Exception e) { @@ -68,7 +70,7 @@ protected void run(BookKeeper bk, DiscoveryFlagsT cmdFlags) throws Exception { throw new IllegalStateException("It should never be called."); } - protected abstract void run(RegistrationClient regClient, DiscoveryFlagsT cmdFlags) - throws Exception; + protected abstract void run(RegistrationClient regClient, DiscoveryFlagsT cmdFlags, + boolean bookieAddressResolverEnabled) throws Exception; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/AvailabilityOfEntriesOfLedger.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/AvailabilityOfEntriesOfLedger.java new file mode 100644 index 00000000000..cf221f5dda7 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/AvailabilityOfEntriesOfLedger.java @@ -0,0 +1,438 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.util; + +import io.netty.buffer.ByteBuf; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.List; +import java.util.Map.Entry; +import java.util.PrimitiveIterator; +import java.util.TreeMap; +import org.apache.commons.lang3.mutable.MutableInt; +import org.apache.commons.lang3.mutable.MutableLong; +import org.apache.commons.lang3.mutable.MutableObject; + +/** + * Ordered collection of SequenceGroups will represent entries of the ledger + * residing in a bookie. + * + *

          In the byte array representation of AvailabilityOfEntriesOfLedger, for the + * sake of future extensibility it would be helpful to have reserved space for + * header at the beginning. So the first 64 bytes will be used for header, with + * the first four bytes specifying the int version number, next four bytes + * specifying the number of sequencegroups for now and the rest of the bytes in + * the reserved space will be 0's. The encoded format will be represented after + * the first 64 bytes. The ordered collection of SequenceGroups will be appended + * sequentially to this byte array, with each SequenceGroup taking 24 bytes. + */ +public class AvailabilityOfEntriesOfLedger { + public static final long INVALID_ENTRYID = -1; + public static final AvailabilityOfEntriesOfLedger EMPTY_AVAILABILITYOFENTRIESOFLEDGER; + static { + long[] tmpArray = {}; + EMPTY_AVAILABILITYOFENTRIESOFLEDGER = new AvailabilityOfEntriesOfLedger(Arrays.stream(tmpArray).iterator()); + } + + /* + * + * Nomenclature: + * + * - Continuous entries are grouped as a ’Sequence’. - Number of continuous + * entries in a ‘Sequence’ is called ‘sequenceSize’. - Gap between + * Consecutive sequences is called ‘sequencePeriod’. - Consecutive sequences + * with same sequenceSize and same sequencePeriod in between consecutive + * sequences are grouped as a SequenceGroup. - ‘firstSequenceStart’ is the + * first entry in the first sequence of the SequenceGroup. - + * ‘lastSequenceStart’ is the first entry in the last sequence of the + * SequenceGroup. + * + * To represent a SequenceGroup, two long values and two int values are + * needed, so each SequenceGroup can be represented with (2 * 8 + 2 * 4 = 24 + * bytes). + */ + private static class SequenceGroup { + private static final int SEQUENCEGROUP_BYTES = 2 * Long.BYTES + 2 * Integer.BYTES; + private final long firstSequenceStart; + private final int sequenceSize; + private long lastSequenceStart = INVALID_ENTRYID; + private int sequencePeriod; + private boolean isSequenceGroupClosed = false; + private long numOfEntriesInSequenceGroup = 0; + + private SequenceGroup(long firstSequenceStart, int sequenceSize) { + this.firstSequenceStart = firstSequenceStart; + this.lastSequenceStart = firstSequenceStart; + this.sequenceSize = sequenceSize; + this.sequencePeriod = 0; + } + + private SequenceGroup(byte[] serializedSequenceGroup) { + ByteBuffer buffer = ByteBuffer.wrap(serializedSequenceGroup); + firstSequenceStart = buffer.getLong(); + lastSequenceStart = buffer.getLong(); + sequenceSize = buffer.getInt(); + sequencePeriod = buffer.getInt(); + setSequenceGroupClosed(); + } + + private boolean isSequenceGroupClosed() { + return isSequenceGroupClosed; + } + + private void setSequenceGroupClosed() { + this.isSequenceGroupClosed = true; + numOfEntriesInSequenceGroup = (lastSequenceStart - firstSequenceStart) == 0 ? sequenceSize + : (((lastSequenceStart - firstSequenceStart) / sequencePeriod) + 1) * sequenceSize; + } + + private long getNumOfEntriesInSequenceGroup() { + if (!isSequenceGroupClosed()) { + throw new IllegalStateException( + "SequenceGroup is not yet closed, it is illegal to call getNumOfEntriesInSequenceGroup"); + } + return numOfEntriesInSequenceGroup; + } + + private long getLastSequenceStart() { + return lastSequenceStart; + } + + private long getLastEntryInSequenceGroup() { + return lastSequenceStart + sequenceSize; + } + + private void setLastSequenceStart(long lastSequenceStart) { + this.lastSequenceStart = lastSequenceStart; + } + + private int getSequencePeriod() { + return sequencePeriod; + } + + private void setSequencePeriod(int sequencePeriod) { + this.sequencePeriod = sequencePeriod; + } + + private long getFirstSequenceStart() { + return firstSequenceStart; + } + + private void serializeSequenceGroup(byte[] byteArrayForSerialization) { + if (!isSequenceGroupClosed()) { + throw new IllegalStateException( + "SequenceGroup is not yet closed, it is illegal to call serializeSequenceGroup"); + } + ByteBuffer buffer = ByteBuffer.wrap(byteArrayForSerialization); + buffer.putLong(firstSequenceStart); + buffer.putLong(lastSequenceStart); + buffer.putInt(sequenceSize); + buffer.putInt(sequencePeriod); + } + + private boolean isEntryAvailable(long entryId) { + if (!isSequenceGroupClosed()) { + throw new IllegalStateException( + "SequenceGroup is not yet closed, it is illegal to call isEntryAvailable"); + } + + if ((entryId >= firstSequenceStart) && (entryId <= (lastSequenceStart + sequenceSize))) { + if (sequencePeriod == 0) { + return ((entryId - firstSequenceStart) < sequenceSize); + } else { + return (((entryId - firstSequenceStart) % sequencePeriod) < sequenceSize); + } + } else { + return false; + } + } + } + + public static final int HEADER_SIZE = 64; + public static final int V0 = 0; + // current version of AvailabilityOfEntriesOfLedger header is V0 + public static final int CURRENT_HEADER_VERSION = V0; + private final TreeMap sortedSequenceGroups = new TreeMap(); + private MutableObject curSequenceGroup = new MutableObject(null); + private MutableLong curSequenceStartEntryId = new MutableLong(INVALID_ENTRYID); + private MutableInt curSequenceSize = new MutableInt(0); + private boolean availabilityOfEntriesOfLedgerClosed = false; + private long totalNumOfAvailableEntries = 0; + + public AvailabilityOfEntriesOfLedger(PrimitiveIterator.OfLong entriesOfLedgerItr) { + while (entriesOfLedgerItr.hasNext()) { + this.addEntryToAvailableEntriesOfLedger(entriesOfLedgerItr.nextLong()); + } + this.closeStateOfEntriesOfALedger(); + } + + public AvailabilityOfEntriesOfLedger(long[] entriesOfLedger) { + for (long entry : entriesOfLedger) { + this.addEntryToAvailableEntriesOfLedger(entry); + } + this.closeStateOfEntriesOfALedger(); + } + + public AvailabilityOfEntriesOfLedger(byte[] serializeStateOfEntriesOfLedger) { + byte[] header = new byte[HEADER_SIZE]; + byte[] serializedSequenceGroupByteArray = new byte[SequenceGroup.SEQUENCEGROUP_BYTES]; + System.arraycopy(serializeStateOfEntriesOfLedger, 0, header, 0, HEADER_SIZE); + + ByteBuffer headerByteBuf = ByteBuffer.wrap(header); + int headerVersion = headerByteBuf.getInt(); + if (headerVersion > CURRENT_HEADER_VERSION) { + throw new IllegalArgumentException("Unsupported Header Version: " + headerVersion); + } + int numOfSequenceGroups = headerByteBuf.getInt(); + SequenceGroup newSequenceGroup; + for (int i = 0; i < numOfSequenceGroups; i++) { + Arrays.fill(serializedSequenceGroupByteArray, (byte) 0); + System.arraycopy(serializeStateOfEntriesOfLedger, HEADER_SIZE + (i * SequenceGroup.SEQUENCEGROUP_BYTES), + serializedSequenceGroupByteArray, 0, SequenceGroup.SEQUENCEGROUP_BYTES); + newSequenceGroup = new SequenceGroup(serializedSequenceGroupByteArray); + sortedSequenceGroups.put(newSequenceGroup.getFirstSequenceStart(), newSequenceGroup); + } + setAvailabilityOfEntriesOfLedgerClosed(); + } + + public AvailabilityOfEntriesOfLedger(ByteBuf byteBuf) { + byte[] header = new byte[HEADER_SIZE]; + byte[] serializedSequenceGroupByteArray = new byte[SequenceGroup.SEQUENCEGROUP_BYTES]; + int readerIndex = byteBuf.readerIndex(); + byteBuf.getBytes(readerIndex, header, 0, HEADER_SIZE); + + ByteBuffer headerByteBuf = ByteBuffer.wrap(header); + int headerVersion = headerByteBuf.getInt(); + if (headerVersion > CURRENT_HEADER_VERSION) { + throw new IllegalArgumentException("Unsupported Header Version: " + headerVersion); + } + int numOfSequenceGroups = headerByteBuf.getInt(); + SequenceGroup newSequenceGroup; + for (int i = 0; i < numOfSequenceGroups; i++) { + Arrays.fill(serializedSequenceGroupByteArray, (byte) 0); + byteBuf.getBytes(readerIndex + HEADER_SIZE + (i * SequenceGroup.SEQUENCEGROUP_BYTES), + serializedSequenceGroupByteArray, 0, SequenceGroup.SEQUENCEGROUP_BYTES); + newSequenceGroup = new SequenceGroup(serializedSequenceGroupByteArray); + sortedSequenceGroups.put(newSequenceGroup.getFirstSequenceStart(), newSequenceGroup); + } + setAvailabilityOfEntriesOfLedgerClosed(); + } + + private void initializeCurSequence(long curSequenceStartEntryIdValue) { + curSequenceStartEntryId.setValue(curSequenceStartEntryIdValue); + curSequenceSize.setValue(1); + } + + private void resetCurSequence() { + curSequenceStartEntryId.setValue(INVALID_ENTRYID); + curSequenceSize.setValue(0); + } + + private boolean isCurSequenceInitialized() { + return curSequenceStartEntryId.longValue() != INVALID_ENTRYID; + } + + private boolean isEntryExistingInCurSequence(long entryId) { + return (curSequenceStartEntryId.longValue() <= entryId) + && (entryId < (curSequenceStartEntryId.longValue() + curSequenceSize.intValue())); + } + + private boolean isEntryAppendableToCurSequence(long entryId) { + return ((curSequenceStartEntryId.longValue() + curSequenceSize.intValue()) == entryId); + } + + private void incrementCurSequenceSize() { + curSequenceSize.increment(); + } + + private void createNewSequenceGroupWithCurSequence() { + SequenceGroup curSequenceGroupValue = curSequenceGroup.getValue(); + curSequenceGroupValue.setSequenceGroupClosed(); + sortedSequenceGroups.put(curSequenceGroupValue.getFirstSequenceStart(), curSequenceGroupValue); + curSequenceGroup.setValue(new SequenceGroup(curSequenceStartEntryId.longValue(), curSequenceSize.intValue())); + } + + private boolean isCurSequenceGroupInitialized() { + return curSequenceGroup.getValue() != null; + } + + private void initializeCurSequenceGroupWithCurSequence() { + curSequenceGroup.setValue(new SequenceGroup(curSequenceStartEntryId.longValue(), curSequenceSize.intValue())); + } + + private boolean doesCurSequenceBelongToCurSequenceGroup() { + long curSequenceStartEntryIdValue = curSequenceStartEntryId.longValue(); + int curSequenceSizeValue = curSequenceSize.intValue(); + boolean belongsToThisSequenceGroup = false; + SequenceGroup curSequenceGroupValue = curSequenceGroup.getValue(); + if ((curSequenceGroupValue.sequenceSize == curSequenceSizeValue) + && ((curSequenceGroupValue.getLastSequenceStart() == INVALID_ENTRYID) || ((curSequenceStartEntryIdValue + - curSequenceGroupValue.getLastSequenceStart()) == curSequenceGroupValue + .getSequencePeriod()))) { + belongsToThisSequenceGroup = true; + } + return belongsToThisSequenceGroup; + } + + private void appendCurSequenceToCurSequenceGroup() { + SequenceGroup curSequenceGroupValue = curSequenceGroup.getValue(); + curSequenceGroupValue.setLastSequenceStart(curSequenceStartEntryId.longValue()); + if (curSequenceGroupValue.getSequencePeriod() == 0) { + curSequenceGroupValue.setSequencePeriod( + ((int) (curSequenceGroupValue.getLastSequenceStart() - curSequenceGroupValue.firstSequenceStart))); + } + } + + private void addCurSequenceToSequenceGroup() { + if (!isCurSequenceGroupInitialized()) { + initializeCurSequenceGroupWithCurSequence(); + } else if (doesCurSequenceBelongToCurSequenceGroup()) { + appendCurSequenceToCurSequenceGroup(); + } else { + createNewSequenceGroupWithCurSequence(); + } + } + + private void addEntryToAvailableEntriesOfLedger(long entryId) { + if (!isCurSequenceInitialized()) { + initializeCurSequence(entryId); + } else if (isEntryExistingInCurSequence(entryId)) { + /* this entry is already added so do nothing */ + } else if (isEntryAppendableToCurSequence(entryId)) { + incrementCurSequenceSize(); + } else { + addCurSequenceToSequenceGroup(); + initializeCurSequence(entryId); + } + } + + private void closeStateOfEntriesOfALedger() { + if (isCurSequenceInitialized()) { + addCurSequenceToSequenceGroup(); + resetCurSequence(); + } + SequenceGroup curSequenceGroupValue = curSequenceGroup.getValue(); + if (curSequenceGroupValue != null) { + curSequenceGroupValue.setSequenceGroupClosed(); + sortedSequenceGroups.put(curSequenceGroupValue.getFirstSequenceStart(), curSequenceGroupValue); + } + setAvailabilityOfEntriesOfLedgerClosed(); + } + + private boolean isAvailabilityOfEntriesOfLedgerClosed() { + return availabilityOfEntriesOfLedgerClosed; + } + + private void setAvailabilityOfEntriesOfLedgerClosed() { + this.availabilityOfEntriesOfLedgerClosed = true; + for (Entry seqGroupEntry : sortedSequenceGroups.entrySet()) { + totalNumOfAvailableEntries += seqGroupEntry.getValue().getNumOfEntriesInSequenceGroup(); + } + } + + public byte[] serializeStateOfEntriesOfLedger() { + if (!isAvailabilityOfEntriesOfLedgerClosed()) { + throw new IllegalStateException("AvailabilityOfEntriesOfLedger is not yet closed," + + "it is illegal to call serializeStateOfEntriesOfLedger"); + } + byte[] header = new byte[HEADER_SIZE]; + ByteBuffer headerByteBuf = ByteBuffer.wrap(header); + byte[] serializedSequenceGroupByteArray = new byte[SequenceGroup.SEQUENCEGROUP_BYTES]; + byte[] serializedStateByteArray = new byte[HEADER_SIZE + + (sortedSequenceGroups.size() * SequenceGroup.SEQUENCEGROUP_BYTES)]; + final int numOfSequenceGroups = sortedSequenceGroups.size(); + headerByteBuf.putInt(CURRENT_HEADER_VERSION); + headerByteBuf.putInt(numOfSequenceGroups); + System.arraycopy(header, 0, serializedStateByteArray, 0, HEADER_SIZE); + int seqNum = 0; + for (Entry seqGroupEntry : sortedSequenceGroups.entrySet()) { + SequenceGroup seqGroup = seqGroupEntry.getValue(); + Arrays.fill(serializedSequenceGroupByteArray, (byte) 0); + seqGroup.serializeSequenceGroup(serializedSequenceGroupByteArray); + System.arraycopy(serializedSequenceGroupByteArray, 0, serializedStateByteArray, + HEADER_SIZE + ((seqNum++) * SequenceGroup.SEQUENCEGROUP_BYTES), SequenceGroup.SEQUENCEGROUP_BYTES); + } + return serializedStateByteArray; + } + + public boolean isEntryAvailable(long entryId) { + if (!isAvailabilityOfEntriesOfLedgerClosed()) { + throw new IllegalStateException( + "AvailabilityOfEntriesOfLedger is not yet closed, it is illegal to call isEntryAvailable"); + } + Entry seqGroup = sortedSequenceGroups.floorEntry(entryId); + if (seqGroup == null) { + return false; + } + return seqGroup.getValue().isEntryAvailable(entryId); + } + + public List getUnavailableEntries(long startEntryId, long lastEntryId, BitSet availabilityOfEntries) { + if (!isAvailabilityOfEntriesOfLedgerClosed()) { + throw new IllegalStateException( + "AvailabilityOfEntriesOfLedger is not yet closed, it is illegal to call getUnavailableEntries"); + } + List unavailableEntries = new ArrayList(); + SequenceGroup curSeqGroup = null; + boolean noSeqGroupRemaining = false; + int bitSetIndex = 0; + for (long entryId = startEntryId; entryId <= lastEntryId; entryId++, bitSetIndex++) { + if (noSeqGroupRemaining) { + if (availabilityOfEntries.get(bitSetIndex)) { + unavailableEntries.add(entryId); + } + continue; + } + if ((curSeqGroup == null) || (entryId > curSeqGroup.getLastEntryInSequenceGroup())) { + Entry curSeqGroupEntry = sortedSequenceGroups.floorEntry(entryId); + if (curSeqGroupEntry == null) { + if (availabilityOfEntries.get(bitSetIndex)) { + unavailableEntries.add(entryId); + } + if (sortedSequenceGroups.ceilingEntry(entryId) == null) { + noSeqGroupRemaining = true; + } + continue; + } else { + curSeqGroup = curSeqGroupEntry.getValue(); + if (entryId > curSeqGroup.getLastEntryInSequenceGroup()) { + if (availabilityOfEntries.get(bitSetIndex)) { + unavailableEntries.add(entryId); + } + noSeqGroupRemaining = true; + continue; + } + } + } + if (availabilityOfEntries.get(bitSetIndex) && (!curSeqGroup.isEntryAvailable(entryId))) { + unavailableEntries.add(entryId); + } + } + return unavailableEntries; + } + + public long getTotalNumOfAvailableEntries() { + if (!isAvailabilityOfEntriesOfLedgerClosed()) { + throw new IllegalStateException("AvailabilityOfEntriesOfLedger is not yet closed," + + " it is illegal to call getTotalNumOfAvailableEntries"); + } + return totalNumOfAvailableEntries; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/BookKeeperConstants.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/BookKeeperConstants.java index bd6801f9404..107708092f6 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/BookKeeperConstants.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/BookKeeperConstants.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -31,6 +31,7 @@ public class BookKeeperConstants { public static final String BOOKIE_STATUS_FILENAME = "BOOKIE_STATUS"; public static final String PASSWD = "passwd"; public static final String CURRENT_DIR = "current"; + public static final String METADATA_CACHE = "metadata-cache"; public static final String READONLY = "readonly"; // ////////////////////////// @@ -42,9 +43,13 @@ public class BookKeeperConstants { public static final String UNDER_REPLICATION_LOCK = "locks"; public static final String DISABLE_NODE = "disable"; public static final String LOSTBOOKIERECOVERYDELAY_NODE = "lostBookieRecoveryDelay"; + public static final String CHECK_ALL_LEDGERS_CTIME = "checkallledgersctime"; + public static final String PLACEMENT_POLICY_CHECK_CTIME = "placementpolicycheckctime"; + public static final String REPLICAS_CHECK_CTIME = "replicascheckctime"; public static final String DEFAULT_ZK_LEDGERS_ROOT_PATH = "/ledgers"; public static final String LAYOUT_ZNODE = "LAYOUT"; public static final String INSTANCEID = "INSTANCEID"; + public static final String DISABLE_HEALTH_CHECK = "disableHealthCheck"; /** * Set the max log size limit to 1GB. It makes extra room for entry log file before diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ByteBufList.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ByteBufList.java index f9e4cffce09..133a37f0379 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ByteBufList.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ByteBufList.java @@ -21,9 +21,7 @@ package org.apache.bookkeeper.util; import com.google.common.annotations.VisibleForTesting; - import io.netty.buffer.ByteBuf; -import io.netty.buffer.PooledByteBufAllocator; import io.netty.buffer.Unpooled; import io.netty.channel.ChannelHandler.Sharable; import io.netty.channel.ChannelHandlerContext; @@ -34,7 +32,6 @@ import io.netty.util.Recycler.Handle; import io.netty.util.ReferenceCountUtil; import io.netty.util.ReferenceCounted; - import java.util.ArrayList; /** @@ -44,8 +41,6 @@ * will need to be encoded on the channel. There are 2 utility encoders: *

            *
          • {@link #ENCODER}: regular encode that will write all the buffers in the {@link ByteBufList} on the channel
          • - *
          • {@link #ENCODER_WITH_SIZE}: similar to the previous one, but also prepend a 4 bytes size header, once, carrying - * the size of the readable bytes across all the buffers contained in the {@link ByteBufList}
          • *
          * *

          Example: @@ -127,7 +122,7 @@ public static ByteBufList clone(ByteBufList other) { return buf; } - private static ByteBufList get() { + public static ByteBufList get() { ByteBufList buf = RECYCLER.get(); buf.setRefCnt(1); return buf; @@ -278,13 +273,7 @@ public ReferenceCounted touch(Object hint) { /** * Encoder for the {@link ByteBufList} that doesn't prepend any size header. */ - public static final Encoder ENCODER = new Encoder(false); - - /** - * Encoder for the {@link ByteBufList} that will prepend a 4 byte header with the size of the whole - * {@link ByteBufList} readable bytes. - */ - public static final Encoder ENCODER_WITH_SIZE = new Encoder(true); + public static final Encoder ENCODER = new Encoder(); /** * {@link ByteBufList} encoder. @@ -292,38 +281,34 @@ public ReferenceCounted touch(Object hint) { @Sharable public static class Encoder extends ChannelOutboundHandlerAdapter { - private final boolean prependSize; - - public Encoder(boolean prependSize) { - this.prependSize = prependSize; - } - @Override public void write(ChannelHandlerContext ctx, Object msg, ChannelPromise promise) throws Exception { if (msg instanceof ByteBufList) { ByteBufList b = (ByteBufList) msg; - try { - if (prependSize) { - // Prepend the frame size before writing the buffer list, so that we only have 1 single size - // header - ByteBuf sizeBuffer = PooledByteBufAllocator.DEFAULT.directBuffer(4, 4); - sizeBuffer.writeInt(b.readableBytes()); - ctx.write(sizeBuffer, ctx.voidPromise()); - } - - // Write each buffer individually on the socket. The retain() here is needed to preserve the fact - // that ByteBuf are automatically released after a write. If the ByteBufPair ref count is increased - // and it gets written multiple times, the individual buffers refcount should be reflected as well. - int buffersCount = b.buffers.size(); - for (int i = 0; i < buffersCount; i++) { - ByteBuf bx = b.buffers.get(i); - // Last buffer will carry on the final promise to notify when everything was written on the - // socket - ctx.write(bx.retainedDuplicate(), i == (buffersCount - 1) ? promise : ctx.voidPromise()); - } - } finally { + ChannelPromise compositePromise = ctx.newPromise(); + compositePromise.addListener(future -> { + // release the ByteBufList after the write operation is completed ReferenceCountUtil.safeRelease(b); + // complete the promise passed as an argument unless it's a void promise + if (promise != null && !promise.isVoid()) { + if (future.isSuccess()) { + promise.setSuccess(); + } else { + promise.setFailure(future.cause()); + } + } + }); + + // Write each buffer individually on the socket. The retain() here is needed to preserve the fact + // that ByteBuf are automatically released after a write. If the ByteBufPair ref count is increased + // and it gets written multiple times, the individual buffers refcount should be reflected as well. + int buffersCount = b.buffers.size(); + for (int i = 0; i < buffersCount; i++) { + ByteBuf bx = b.buffers.get(i); + // Last buffer will carry on the final promise to notify when everything was written on the + // socket + ctx.write(bx.retainedDuplicate(), i == (buffersCount - 1) ? compositePromise : ctx.voidPromise()); } } else { ctx.write(msg, promise); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ByteBufVisitor.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ByteBufVisitor.java new file mode 100644 index 00000000000..4bb60f40c6e --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ByteBufVisitor.java @@ -0,0 +1,1126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.util; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.util.ByteProcessor; +import io.netty.util.concurrent.FastThreadLocal; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; +import java.nio.channels.GatheringByteChannel; +import java.nio.channels.ScatteringByteChannel; +import java.nio.charset.Charset; + +/** + * This class visits the possible wrapped child buffers of a Netty {@link ByteBuf} for a given offset and length. + *

          + * The Netty ByteBuf API does not provide a method to visit the wrapped child buffers. The + * {@link ByteBuf#unwrap()} method is not suitable for this purpose as it loses the + * {@link ByteBuf#readerIndex()} state, resulting in incorrect offset and length information. + *

          + * Despite Netty not having a public API for visiting the sub buffers, it is possible to achieve this using + * the {@link ByteBuf#getBytes(int, ByteBuf, int, int)} method. This class uses this method to visit the + * wrapped child buffers by providing a suitable {@link ByteBuf} implementation. This implementation supports + * the role of the destination buffer for the getBytes call. It requires implementing the + * {@link ByteBuf#setBytes(int, ByteBuf, int, int)} and {@link ByteBuf#setBytes(int, byte[], int, int)} methods + * and other methods required by getBytes such as {@link ByteBuf#hasArray()}, {@link ByteBuf#hasMemoryAddress()}, + * {@link ByteBuf#nioBufferCount()} and {@link ByteBuf#capacity()}. + * All other methods in the internal ByteBuf implementation are not supported and will throw an exception. + * This is to ensure correctness and to fail fast if some ByteBuf implementation is not following the expected + * and supported interface contract. + */ +public class ByteBufVisitor { + private static final int DEFAULT_VISIT_MAX_DEPTH = 10; + + private ByteBufVisitor() { + // prevent instantiation + } + + /** + * This method traverses the potential nested composite buffers of the provided buffer, given a specific offset and + * length. The traversal continues until it encounters a buffer that is backed by an array or a memory address, + * which allows for the inspection of individual buffer segments without the need for data duplication. + * If no such wrapped buffer is found, the callback function is invoked with the original buffer, offset, + * and length as parameters. + * + * @param buffer the buffer to visit + * @param offset the offset for the buffer + * @param length the length for the buffer + * @param callback the callback to call for each visited buffer + * @param context the context to pass to the callback + */ + public static void visitBuffers(ByteBuf buffer, int offset, int length, ByteBufVisitorCallback callback, + T context) { + visitBuffers(buffer, offset, length, callback, context, DEFAULT_VISIT_MAX_DEPTH); + } + + /** + * The callback interface for visiting buffers. + * In case of a heap buffer that is backed by an byte[] array, the visitArray method is called. This + * is due to the internal implementation detail of the {@link ByteBuf#getBytes(int, ByteBuf, int, int)} + * method for heap buffers. + */ + public interface ByteBufVisitorCallback { + void visitBuffer(T context, ByteBuf visitBuffer, int visitIndex, int visitLength); + void visitArray(T context, byte[] visitArray, int visitIndex, int visitLength); + default boolean preferArrayOrMemoryAddress(T context) { + return true; + } + default boolean acceptsMemoryAddress(T context) { + return false; + } + } + + /** + * See @{@link #visitBuffers(ByteBuf, int, int, ByteBufVisitorCallback, Object)}. This method + * allows to specify the maximum depth of recursion for visiting wrapped buffers. + */ + public static void visitBuffers(ByteBuf buffer, int offset, int length, ByteBufVisitorCallback callback, + T context, int maxDepth) { + if (length == 0) { + // skip visiting empty buffers + return; + } + InternalContext internalContext = new InternalContext<>(); + internalContext.maxDepth = maxDepth; + internalContext.callbackContext = context; + internalContext.callback = callback; + internalContext.recursivelyVisitBuffers(buffer, offset, length); + } + + private static final int TL_COPY_BUFFER_SIZE = 64 * 1024; + private static final FastThreadLocal TL_COPY_BUFFER = new FastThreadLocal() { + @Override + protected byte[] initialValue() { + return new byte[TL_COPY_BUFFER_SIZE]; + } + }; + + private static class InternalContext { + int depth; + int maxDepth; + ByteBuf parentBuffer; + int parentOffset; + int parentLength; + T callbackContext; + ByteBufVisitorCallback callback; + GetBytesCallbackByteBuf callbackByteBuf = new GetBytesCallbackByteBuf(this); + + void recursivelyVisitBuffers(ByteBuf visitBuffer, int visitIndex, int visitLength) { + // visit the wrapped buffers recursively if the buffer is not backed by an array or memory address + // and the max depth has not been reached + if (depth < maxDepth && !visitBuffer.hasMemoryAddress() && !visitBuffer.hasArray()) { + parentBuffer = visitBuffer; + parentOffset = visitIndex; + parentLength = visitLength; + depth++; + // call getBytes to trigger the wrapped buffer visit + visitBuffer.getBytes(visitIndex, callbackByteBuf, 0, visitLength); + depth--; + } else { + passBufferToCallback(visitBuffer, visitIndex, visitLength); + } + } + + void handleBuffer(ByteBuf visitBuffer, int visitIndex, int visitLength) { + if (visitLength == 0) { + // skip visiting empty buffers + return; + } + if (visitBuffer == parentBuffer && visitIndex == parentOffset && visitLength == parentLength) { + // further recursion would cause unnecessary recursion up to the max depth of recursion + passBufferToCallback(visitBuffer, visitIndex, visitLength); + } else { + // use the doRecursivelyVisitBuffers method to visit the wrapped buffer, possibly recursively + recursivelyVisitBuffers(visitBuffer, visitIndex, visitLength); + } + } + + private void passBufferToCallback(ByteBuf visitBuffer, int visitIndex, int visitLength) { + if (callback.preferArrayOrMemoryAddress(callbackContext)) { + if (visitBuffer.hasArray()) { + handleArray(visitBuffer.array(), visitBuffer.arrayOffset() + visitIndex, visitLength); + } else if (visitBuffer.hasMemoryAddress() && callback.acceptsMemoryAddress(callbackContext)) { + callback.visitBuffer(callbackContext, visitBuffer, visitIndex, visitLength); + } else { + // fallback to reading the visited buffer into the copy buffer in a loop + byte[] copyBuffer = TL_COPY_BUFFER.get(); + int remaining = visitLength; + int currentOffset = visitIndex; + while (remaining > 0) { + int readLen = Math.min(remaining, copyBuffer.length); + visitBuffer.getBytes(currentOffset, copyBuffer, 0, readLen); + handleArray(copyBuffer, 0, readLen); + remaining -= readLen; + currentOffset += readLen; + } + } + } else { + callback.visitBuffer(callbackContext, visitBuffer, visitIndex, visitLength); + } + } + + void handleArray(byte[] visitArray, int visitIndex, int visitLength) { + if (visitLength == 0) { + // skip visiting empty arrays + return; + } + // pass array to callback + callback.visitArray(callbackContext, visitArray, visitIndex, visitLength); + } + } + + /** + * A ByteBuf implementation that can be used as the destination buffer for + * a {@link ByteBuf#getBytes(int, ByteBuf)} for visiting the wrapped child buffers. + */ + static class GetBytesCallbackByteBuf extends ByteBuf { + private final InternalContext internalContext; + + GetBytesCallbackByteBuf(InternalContext internalContext) { + this.internalContext = internalContext; + } + + @Override + public ByteBuf setBytes(int index, ByteBuf src, int srcIndex, int length) { + internalContext.handleBuffer(src, srcIndex, length); + return this; + } + + @Override + public ByteBuf setBytes(int index, byte[] src, int srcIndex, int length) { + internalContext.handleArray(src, srcIndex, length); + return this; + } + + @Override + public boolean hasArray() { + // return false so that the wrapped buffer is visited + return false; + } + + @Override + public boolean hasMemoryAddress() { + // return false so that the wrapped buffer is visited + return false; + } + + @Override + public int nioBufferCount() { + // return 0 so that the wrapped buffer is visited + return 0; + } + + @Override + public int capacity() { + // should return sufficient capacity for the total length + return Integer.MAX_VALUE; + } + + @Override + public ByteBuf capacity(int newCapacity) { + throw new UnsupportedOperationException(); + } + + @Override + public int maxCapacity() { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBufAllocator alloc() { + throw new UnsupportedOperationException(); + } + + @Override + public ByteOrder order() { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf order(ByteOrder endianness) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf unwrap() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean isDirect() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean isReadOnly() { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf asReadOnly() { + throw new UnsupportedOperationException(); + } + + @Override + public int readerIndex() { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf readerIndex(int readerIndex) { + throw new UnsupportedOperationException(); + } + + @Override + public int writerIndex() { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf writerIndex(int writerIndex) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf setIndex(int readerIndex, int writerIndex) { + throw new UnsupportedOperationException(); + } + + @Override + public int readableBytes() { + throw new UnsupportedOperationException(); + } + + @Override + public int writableBytes() { + throw new UnsupportedOperationException(); + } + + @Override + public int maxWritableBytes() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean isReadable() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean isReadable(int size) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean isWritable() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean isWritable(int size) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf clear() { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf markReaderIndex() { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf resetReaderIndex() { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf markWriterIndex() { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf resetWriterIndex() { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf discardReadBytes() { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf discardSomeReadBytes() { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf ensureWritable(int minWritableBytes) { + throw new UnsupportedOperationException(); + } + + @Override + public int ensureWritable(int minWritableBytes, boolean force) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean getBoolean(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public byte getByte(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public short getUnsignedByte(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public short getShort(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public short getShortLE(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public int getUnsignedShort(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public int getUnsignedShortLE(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public int getMedium(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public int getMediumLE(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public int getUnsignedMedium(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public int getUnsignedMediumLE(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public int getInt(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public int getIntLE(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public long getUnsignedInt(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public long getUnsignedIntLE(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public long getLong(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public long getLongLE(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public char getChar(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public float getFloat(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public double getDouble(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf getBytes(int index, ByteBuf dst) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf getBytes(int index, ByteBuf dst, int length) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf getBytes(int index, ByteBuf dst, int dstIndex, int length) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf getBytes(int index, byte[] dst) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf getBytes(int index, byte[] dst, int dstIndex, int length) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf getBytes(int index, ByteBuffer dst) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf getBytes(int index, OutputStream out, int length) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int getBytes(int index, GatheringByteChannel out, int length) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int getBytes(int index, FileChannel out, long position, int length) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public CharSequence getCharSequence(int index, int length, Charset charset) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf setBoolean(int index, boolean value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf setByte(int index, int value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf setShort(int index, int value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf setShortLE(int index, int value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf setMedium(int index, int value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf setMediumLE(int index, int value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf setInt(int index, int value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf setIntLE(int index, int value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf setLong(int index, long value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf setLongLE(int index, long value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf setChar(int index, int value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf setFloat(int index, float value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf setDouble(int index, double value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf setBytes(int index, ByteBuf src) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf setBytes(int index, ByteBuf src, int length) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf setBytes(int index, byte[] src) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf setBytes(int index, ByteBuffer src) { + throw new UnsupportedOperationException(); + } + + @Override + public int setBytes(int index, InputStream in, int length) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int setBytes(int index, ScatteringByteChannel in, int length) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int setBytes(int index, FileChannel in, long position, int length) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf setZero(int index, int length) { + throw new UnsupportedOperationException(); + } + + @Override + public int setCharSequence(int index, CharSequence sequence, Charset charset) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean readBoolean() { + throw new UnsupportedOperationException(); + } + + @Override + public byte readByte() { + throw new UnsupportedOperationException(); + } + + @Override + public short readUnsignedByte() { + throw new UnsupportedOperationException(); + } + + @Override + public short readShort() { + throw new UnsupportedOperationException(); + } + + @Override + public short readShortLE() { + throw new UnsupportedOperationException(); + } + + @Override + public int readUnsignedShort() { + throw new UnsupportedOperationException(); + } + + @Override + public int readUnsignedShortLE() { + throw new UnsupportedOperationException(); + } + + @Override + public int readMedium() { + throw new UnsupportedOperationException(); + } + + @Override + public int readMediumLE() { + throw new UnsupportedOperationException(); + } + + @Override + public int readUnsignedMedium() { + throw new UnsupportedOperationException(); + } + + @Override + public int readUnsignedMediumLE() { + throw new UnsupportedOperationException(); + } + + @Override + public int readInt() { + throw new UnsupportedOperationException(); + } + + @Override + public int readIntLE() { + throw new UnsupportedOperationException(); + } + + @Override + public long readUnsignedInt() { + throw new UnsupportedOperationException(); + } + + @Override + public long readUnsignedIntLE() { + throw new UnsupportedOperationException(); + } + + @Override + public long readLong() { + throw new UnsupportedOperationException(); + } + + @Override + public long readLongLE() { + throw new UnsupportedOperationException(); + } + + @Override + public char readChar() { + throw new UnsupportedOperationException(); + } + + @Override + public float readFloat() { + throw new UnsupportedOperationException(); + } + + @Override + public double readDouble() { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf readBytes(int length) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf readSlice(int length) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf readRetainedSlice(int length) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf readBytes(ByteBuf dst) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf readBytes(ByteBuf dst, int length) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf readBytes(ByteBuf dst, int dstIndex, int length) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf readBytes(byte[] dst) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf readBytes(byte[] dst, int dstIndex, int length) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf readBytes(ByteBuffer dst) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf readBytes(OutputStream out, int length) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int readBytes(GatheringByteChannel out, int length) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public CharSequence readCharSequence(int length, Charset charset) { + throw new UnsupportedOperationException(); + } + + @Override + public int readBytes(FileChannel out, long position, int length) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf skipBytes(int length) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf writeBoolean(boolean value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf writeByte(int value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf writeShort(int value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf writeShortLE(int value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf writeMedium(int value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf writeMediumLE(int value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf writeInt(int value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf writeIntLE(int value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf writeLong(long value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf writeLongLE(long value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf writeChar(int value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf writeFloat(float value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf writeDouble(double value) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf writeBytes(ByteBuf src) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf writeBytes(ByteBuf src, int length) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf writeBytes(ByteBuf src, int srcIndex, int length) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf writeBytes(byte[] src) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf writeBytes(byte[] src, int srcIndex, int length) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf writeBytes(ByteBuffer src) { + throw new UnsupportedOperationException(); + } + + @Override + public int writeBytes(InputStream in, int length) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int writeBytes(ScatteringByteChannel in, int length) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int writeBytes(FileChannel in, long position, int length) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf writeZero(int length) { + throw new UnsupportedOperationException(); + } + + @Override + public int writeCharSequence(CharSequence sequence, Charset charset) { + throw new UnsupportedOperationException(); + } + + @Override + public int indexOf(int fromIndex, int toIndex, byte value) { + throw new UnsupportedOperationException(); + } + + @Override + public int bytesBefore(byte value) { + throw new UnsupportedOperationException(); + } + + @Override + public int bytesBefore(int length, byte value) { + throw new UnsupportedOperationException(); + } + + @Override + public int bytesBefore(int index, int length, byte value) { + throw new UnsupportedOperationException(); + } + + @Override + public int forEachByte(ByteProcessor processor) { + throw new UnsupportedOperationException(); + } + + @Override + public int forEachByte(int index, int length, ByteProcessor processor) { + throw new UnsupportedOperationException(); + } + + @Override + public int forEachByteDesc(ByteProcessor processor) { + throw new UnsupportedOperationException(); + } + + @Override + public int forEachByteDesc(int index, int length, ByteProcessor processor) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf copy() { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf copy(int index, int length) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf slice() { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf retainedSlice() { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf slice(int index, int length) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf retainedSlice(int index, int length) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf duplicate() { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf retainedDuplicate() { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuffer nioBuffer() { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuffer nioBuffer(int index, int length) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuffer internalNioBuffer(int index, int length) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuffer[] nioBuffers() { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuffer[] nioBuffers(int index, int length) { + throw new UnsupportedOperationException(); + } + + + @Override + public byte[] array() { + throw new UnsupportedOperationException(); + } + + @Override + public int arrayOffset() { + throw new UnsupportedOperationException(); + } + @Override + public long memoryAddress() { + throw new UnsupportedOperationException(); + } + + @Override + public String toString(Charset charset) { + throw new UnsupportedOperationException(); + } + + @Override + public String toString(int index, int length, Charset charset) { + throw new UnsupportedOperationException(); + } + + @Override + public int compareTo(ByteBuf buffer) { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf retain(int increment) { + throw new UnsupportedOperationException(); + } + + @Override + public int refCnt() { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf retain() { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf touch() { + throw new UnsupportedOperationException(); + } + + @Override + public ByteBuf touch(Object hint) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean release() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean release(int decrement) { + throw new UnsupportedOperationException(); + } + + @Override + public String toString() { + return getClass().getSimpleName() + '@' + Integer.toHexString(System.identityHashCode(this)); + } + + @Override + public int hashCode() { + return System.identityHashCode(this); + } + + @Override + public boolean equals(Object obj) { + return obj == this; + } + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/CertUtils.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/CertUtils.java new file mode 100644 index 00000000000..ffdcbaf9478 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/CertUtils.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.util; + +import java.io.IOException; +import java.security.cert.X509Certificate; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import javax.naming.InvalidNameException; +import javax.naming.ldap.LdapName; +import javax.naming.ldap.Rdn; + +/** + * Certificate parsing utilities. + */ +public abstract class CertUtils { + + // OU values + public static final String OU_ROLE_NAME_CODE = "0"; + public static final String OU_CLUSTER_NAME_CODE = "1"; + + public static final String OU_VALUES_SEPARATOR = ";"; + public static final String OU_CODE_SEPARATOR = ":"; + public static final String OU_NAME_SEPARATOR = ","; + + static final Pattern OU_VALUES_SPLITTER = Pattern.compile(OU_VALUES_SEPARATOR); + static final Pattern OU_GENERAL_NAME_REGEX = Pattern.compile("^([0-9]+)" + OU_CODE_SEPARATOR + "(.*)$"); + static final Pattern OU_NAME_SPLITTER = Pattern.compile(OU_NAME_SEPARATOR); + + private CertUtils() { + } + + public static String getOUString(X509Certificate cert) throws IOException { + return getOUStringFromSubject(cert.getSubjectX500Principal().getName()); + } + + public static String getOUStringFromSubject(String subject) throws IOException { + try { + LdapName ldapDN = new LdapName(subject); + for (Rdn rdn : ldapDN.getRdns()) { + if ("OU".equalsIgnoreCase(rdn.getType())) { + return rdn.getValue().toString(); + } + } + return null; + } catch (InvalidNameException ine) { + throw new IOException(ine); + } + } + + public static Map getOUMapFromOUString(String ou) throws IOException { + Map ouMap = new HashMap<>(); + if (ou != null) { + String[] ouParts = OU_VALUES_SPLITTER.split(ou); + for (String ouPart : ouParts) { + Matcher matcher = OU_GENERAL_NAME_REGEX.matcher(ouPart); + if (matcher.find() && matcher.groupCount() == 2) { + ouMap.put(matcher.group(1).trim(), matcher.group(2).trim()); + } + } + } + return Collections.unmodifiableMap(ouMap); + } + + public static Map getOUMap(X509Certificate cert) throws IOException { + return getOUMapFromOUString(getOUString(cert)); + } + + public static String[] getRolesFromOU(X509Certificate cert) throws IOException { + return getRolesFromOUMap(getOUMap(cert)); + } + + public static String[] getRolesFromOUMap(Map ouMap) throws IOException { + String roleNames = ouMap.get(OU_ROLE_NAME_CODE); + if (roleNames != null) { + String[] roleParts = OU_NAME_SPLITTER.split(roleNames); + if (roleParts.length > 0) { + List roles = new ArrayList<>(roleParts.length); + for (String role : roleParts) { + roles.add(role.trim()); + } + return roles.toArray(new String[roles.size()]); + } + } + return null; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/DaemonThreadFactory.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/DaemonThreadFactory.java deleted file mode 100644 index cb8caae5751..00000000000 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/DaemonThreadFactory.java +++ /dev/null @@ -1,42 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.bookkeeper.util; - -import java.util.concurrent.Executors; -import java.util.concurrent.ThreadFactory; - -/** - * Daemon thread factory. - */ -public class DaemonThreadFactory implements ThreadFactory { - private ThreadFactory defaultThreadFactory = Executors.defaultThreadFactory(); - private int priority = Thread.NORM_PRIORITY; - public DaemonThreadFactory() { - } - public DaemonThreadFactory(int priority) { - assert priority >= Thread.MIN_PRIORITY && priority <= Thread.MAX_PRIORITY; - this.priority = priority; - } - public Thread newThread(Runnable r) { - Thread thread = defaultThreadFactory.newThread(r); - thread.setDaemon(true); - thread.setPriority(priority); - return thread; - } -} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/DirectMemoryUtils.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/DirectMemoryUtils.java deleted file mode 100644 index 8a5362ade20..00000000000 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/DirectMemoryUtils.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * - */ -package org.apache.bookkeeper.util; - -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkNotNull; - -import java.lang.reflect.Method; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * DirectMemory Utility. - */ -public class DirectMemoryUtils { - /** - * Returns the max configured size of direct memory for the JVM process. - * - *

          Direct memory can be specified with the flag -XX:MaxDirectMemorySize=8G on the command line. - * If not specified, the default value will be set to the max size of the JVM heap. - */ - public static long maxDirectMemory() { - try { - - Class vm = Class.forName("sun.misc.VM"); - Method maxDirectMemory = vm.getDeclaredMethod("maxDirectMemory"); - Object result = maxDirectMemory.invoke(null, (Object[]) null); - - checkNotNull(result); - checkArgument(result instanceof Long); - return (Long) result; - } catch (Exception e) { - if (LOG.isDebugEnabled()) { - LOG.debug("Failed to get maxDirectMemory size from sun.misc.VM, falling back to max heap size", e); - } - return Runtime.getRuntime().maxMemory(); - } - } - - private static final Logger LOG = LoggerFactory.getLogger(DirectMemoryUtils.class); -} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/DiskChecker.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/DiskChecker.java index f03d47519d9..0cac7b80433 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/DiskChecker.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/DiskChecker.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -19,7 +19,6 @@ package org.apache.bookkeeper.util; import com.google.common.annotations.VisibleForTesting; - import java.io.File; import java.io.IOException; import java.nio.file.FileStore; @@ -27,7 +26,6 @@ import java.util.HashSet; import java.util.List; import java.util.Set; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -140,7 +138,7 @@ private static boolean mkdirsWithExistsCheck(File dir) { * Directory to check for the disk space * @throws DiskOutOfSpaceException * Throws {@link DiskOutOfSpaceException} if available space is - * less than threshhold. + * less than threshold. */ @VisibleForTesting float checkDiskFull(File dir) throws DiskOutOfSpaceException, DiskWarnThresholdException { @@ -161,7 +159,7 @@ float checkDiskFull(File dir) throws DiskOutOfSpaceException, DiskWarnThresholdE // Warn should be triggered only if disk usage threshold doesn't trigger first. if (used > diskUsageWarnThreshold) { LOG.warn("Space left on device {} : {}, Used space fraction: {} > WarnThreshold {}.", - dir, usableSpace, used, diskUsageThreshold); + dir, usableSpace, used, diskUsageWarnThreshold); throw new DiskWarnThresholdException("Space left on device:" + usableSpace + " Used space fraction:" + used + " > WarnThreshold:" + diskUsageWarnThreshold, used); @@ -177,7 +175,7 @@ float checkDiskFull(File dir) throws DiskOutOfSpaceException, DiskWarnThresholdE * Calculate the total amount of free space available * in all of the ledger directories put together. * - * @return totalDiskSpace in bytes + * @return freeDiskSpace in bytes * @throws IOException */ public long getTotalFreeSpace(List dirs) throws IOException { @@ -193,10 +191,10 @@ public long getTotalFreeSpace(List dirs) throws IOException { } /** - * Calculate the total amount of free space available + * Calculate the total amount of disk space * in all of the ledger directories put together. * - * @return freeDiskSpace in bytes + * @return totalDiskSpace in bytes * @throws IOException */ public long getTotalDiskSpace(List dirs) throws IOException { @@ -239,28 +237,28 @@ public float getTotalDiskUsage(List dirs) throws IOException { * @throws DiskWarnThresholdException * If disk has less than configured amount of free space. * @throws DiskOutOfSpaceException - * If disk is full or having less space than threshhold + * If disk is full or having less space than threshold */ public float checkDir(File dir) throws DiskErrorException, DiskOutOfSpaceException, DiskWarnThresholdException { float usage = checkDiskFull(dir); if (!mkdirsWithExistsCheck(dir)) { throw new DiskErrorException("can not create directory: " - + dir.toString()); + + dir); } if (!dir.isDirectory()) { - throw new DiskErrorException("not a directory: " + dir.toString()); + throw new DiskErrorException("not a directory: " + dir); } if (!dir.canRead()) { throw new DiskErrorException("directory is not readable: " - + dir.toString()); + + dir); } if (!dir.canWrite()) { throw new DiskErrorException("directory is not writable: " - + dir.toString()); + + dir); } return usage; } @@ -278,7 +276,7 @@ void setDiskSpaceThreshold(float diskSpaceThreshold, float diskUsageWarnThreshol private void validateThreshold(float diskSpaceThreshold, float diskSpaceWarnThreshold) { if (diskSpaceThreshold <= 0 || diskSpaceThreshold >= 1 || diskSpaceWarnThreshold - diskSpaceThreshold > 1e-6) { - throw new IllegalArgumentException("Disk space threashold: " + throw new IllegalArgumentException("Disk space threshold: " + diskSpaceThreshold + " and warn threshold: " + diskSpaceWarnThreshold + " are not valid. Should be > 0 and < 1 and diskSpaceThreshold >= diskSpaceWarnThreshold"); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/EntryFormatter.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/EntryFormatter.java index c24e375d71f..4f2a3c393dd 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/EntryFormatter.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/EntryFormatter.java @@ -21,6 +21,7 @@ package org.apache.bookkeeper.util; +import org.apache.bookkeeper.common.util.ReflectionUtils; import org.apache.bookkeeper.conf.AbstractConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/EventLoopUtil.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/EventLoopUtil.java new file mode 100644 index 00000000000..47eaf5c4c5c --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/EventLoopUtil.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.util; + +import io.netty.channel.EventLoopGroup; +import io.netty.channel.SelectStrategy; +import io.netty.channel.epoll.EpollEventLoopGroup; +import io.netty.channel.nio.NioEventLoopGroup; +import io.netty.incubator.channel.uring.IOUring; +import io.netty.incubator.channel.uring.IOUringEventLoopGroup; +import java.util.concurrent.ThreadFactory; +import lombok.experimental.UtilityClass; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.common.util.affinity.CpuAffinity; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.commons.lang.SystemUtils; +import org.apache.commons.lang3.StringUtils; + + +/** + * Utility class to initialize Netty event loops. + */ +@Slf4j +@UtilityClass +public class EventLoopUtil { + + private static final String ENABLE_IO_URING = "enable.io_uring"; + + public static EventLoopGroup getClientEventLoopGroup(ClientConfiguration conf, ThreadFactory threadFactory) { + return getEventLoopGroup(threadFactory, conf.getNumIOThreads(), conf.isBusyWaitEnabled()); + } + + public static EventLoopGroup getServerEventLoopGroup(ServerConfiguration conf, ThreadFactory threadFactory) { + return getEventLoopGroup(threadFactory, conf.getServerNumIOThreads(), conf.isBusyWaitEnabled()); + } + + public static EventLoopGroup getServerAcceptorGroup(ServerConfiguration conf, ThreadFactory threadFactory) { + return getEventLoopGroup(threadFactory, conf.getServerNumAcceptorThreads(), false); + } + + private static EventLoopGroup getEventLoopGroup(ThreadFactory threadFactory, + int numThreads, boolean enableBusyWait) { + if (!SystemUtils.IS_OS_LINUX) { + return new NioEventLoopGroup(numThreads, threadFactory); + } + + String enableIoUring = System.getProperty(ENABLE_IO_URING); + + // By default, io_uring will not be enabled, even if available. The environment variable will be used: + // enable.io_uring=1 + if (StringUtils.equalsAnyIgnoreCase(enableIoUring, "1", "true")) { + // Throw exception if IOUring cannot be used + IOUring.ensureAvailability(); + return new IOUringEventLoopGroup(numThreads, threadFactory); + } else { + try { + if (!enableBusyWait) { + // Regular Epoll based event loop + return new EpollEventLoopGroup(numThreads, threadFactory); + } + + // With low latency setting, put the Netty event loop on busy-wait loop to reduce cost of + // context switches + EpollEventLoopGroup eventLoopGroup = new EpollEventLoopGroup(numThreads, threadFactory, + () -> (selectSupplier, hasTasks) -> SelectStrategy.BUSY_WAIT); + + // Enable CPU affinity on IO threads + for (int i = 0; i < numThreads; i++) { + eventLoopGroup.next().submit(() -> { + try { + CpuAffinity.acquireCore(); + } catch (Throwable t) { + log.warn("Failed to acquire CPU core for thread {} err {} {}", + Thread.currentThread().getName(), t.getMessage(), t); + } + }); + } + + return eventLoopGroup; + } catch (ExceptionInInitializerError | NoClassDefFoundError | UnsatisfiedLinkError e) { + log.warn("Could not use Netty Epoll event loop: {}", e.getMessage()); + return new NioEventLoopGroup(numThreads, threadFactory); + } + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/HardLink.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/HardLink.java index c8c7a71a3de..1d3a3645921 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/HardLink.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/HardLink.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -20,14 +20,20 @@ */ package org.apache.bookkeeper.util; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; +import com.google.common.annotations.VisibleForTesting; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.Arrays; +import java.util.concurrent.atomic.AtomicBoolean; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Class for creating hardlinks. @@ -42,7 +48,7 @@ * efficient - and minimizes the impact of the extra buffer creations. */ public class HardLink { - + private static final Logger LOG = LoggerFactory.getLogger(HardLink.class); /** * OS Types. */ @@ -395,12 +401,19 @@ protected static int getMaxAllowedCmdArgLength() { return getHardLinkCommand.getMaxAllowedCmdArgLength(); } + private static final AtomicBoolean CREATE_LINK_SUPPORTED = new AtomicBoolean(true); + /* * **************************************************** * Complexity is above. User-visible functionality is below * **************************************************** */ + @VisibleForTesting + static void enableJdkLinkApi(boolean enable) { + CREATE_LINK_SUPPORTED.set(enable); + } + /** * Creates a hardlink. * @param file - existing source file @@ -416,6 +429,23 @@ public static void createHardLink(File file, File linkName) throw new IOException( "invalid arguments to createHardLink: link name is null"); } + + // if createLink available try first, else fall back to shell command. + if (CREATE_LINK_SUPPORTED.get()) { + try { + Path newFile = Files.createLink(linkName.toPath(), file.toPath()); + if (newFile.toFile().exists()) { + return; + } + } catch (UnsupportedOperationException e) { + LOG.error("createLink not supported", e); + CREATE_LINK_SUPPORTED.set(false); + } catch (IOException e) { + LOG.error("error when create hard link use createLink", e); + CREATE_LINK_SUPPORTED.set(false); + } + } + // construct and execute shell command String[] hardLinkCommand = getHardLinkCommand.linkOne(file, linkName); Process process = Runtime.getRuntime().exec(hardLinkCommand); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/HexDumpEntryFormatter.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/HexDumpEntryFormatter.java index 7b831a2ca07..85dc4652e61 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/HexDumpEntryFormatter.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/HexDumpEntryFormatter.java @@ -22,7 +22,6 @@ package org.apache.bookkeeper.util; import java.io.IOException; - import org.apache.commons.io.HexDump; /** diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/IOUtils.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/IOUtils.java index 53ef2ac9f3d..c003fdf325c 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/IOUtils.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/IOUtils.java @@ -24,7 +24,6 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.WritableByteChannel; - import org.slf4j.Logger; /** @@ -43,13 +42,26 @@ public class IOUtils { */ public static void close(Logger log, java.io.Closeable... closeables) { for (java.io.Closeable c : closeables) { - if (c != null) { - try { - c.close(); - } catch (IOException e) { - if (log != null && log.isDebugEnabled()) { - log.debug("Exception in closing " + c, e); - } + close(log, c); + } + } + + /** + * Close the Closeable object and ignore any {@link IOException} or + * null pointers. Must only be used for cleanup in exception handlers. + * + * @param log + * the log to record problems to at debug level. Can be null. + * @param closeable + * the objects to close + */ + public static void close(Logger log, java.io.Closeable closeable) { + if (closeable != null) { + try { + closeable.close(); + } catch (IOException e) { + if (log != null && log.isDebugEnabled()) { + log.debug("Exception in closing " + closeable, e); } } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/IteratorUtility.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/IteratorUtility.java new file mode 100644 index 00000000000..701d31a6246 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/IteratorUtility.java @@ -0,0 +1,171 @@ +/** + * Copyright The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.util; + +import java.util.Comparator; +import java.util.Iterator; +import java.util.NoSuchElementException; +import java.util.PrimitiveIterator; +import java.util.PrimitiveIterator.OfLong; +import java.util.function.ToLongFunction; + +/** + * Utility class to merge iterators. + */ +public class IteratorUtility { + + private static final long INVALID_ELEMENT = -1; + + /** + * Merges two long primitive sorted iterators and returns merged iterator. + * It expects + * - input iterators to be sorted + * - input iterators to be non-repetitive for merged iterator to be non-repetitive + * It removes duplicates from the input iterators. + * + * @param iter1 + * first primitive oflong input iterator + * @param iter2 + * second primitive oflong input iterator + * @return merged primitive oflong iterator. + */ + public static OfLong mergePrimitiveLongIterator(OfLong iter1, OfLong iter2) { + return new PrimitiveIterator.OfLong() { + private long curIter1Element = INVALID_ELEMENT; + private long curIter2Element = INVALID_ELEMENT; + private boolean hasToPreFetch = true; + + @Override + public boolean hasNext() { + if (hasToPreFetch) { + if (curIter1Element == INVALID_ELEMENT) { + curIter1Element = iter1.hasNext() ? iter1.nextLong() : INVALID_ELEMENT; + } + if (curIter2Element == INVALID_ELEMENT) { + curIter2Element = iter2.hasNext() ? iter2.nextLong() : INVALID_ELEMENT; + } + } + hasToPreFetch = false; + return (curIter1Element != INVALID_ELEMENT || curIter2Element != INVALID_ELEMENT); + } + + @Override + public long nextLong() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + + long returnEntryId = INVALID_ELEMENT; + if (curIter1Element != INVALID_ELEMENT && curIter2Element != INVALID_ELEMENT) { + if (curIter1Element == curIter2Element) { + returnEntryId = curIter1Element; + curIter1Element = INVALID_ELEMENT; + curIter2Element = INVALID_ELEMENT; + } else if (curIter1Element < curIter2Element) { + returnEntryId = curIter1Element; + curIter1Element = INVALID_ELEMENT; + } else { + returnEntryId = curIter2Element; + curIter2Element = INVALID_ELEMENT; + } + } else if (curIter1Element != INVALID_ELEMENT) { + returnEntryId = curIter1Element; + curIter1Element = INVALID_ELEMENT; + } else { + returnEntryId = curIter2Element; + curIter2Element = INVALID_ELEMENT; + } + hasToPreFetch = true; + return returnEntryId; + } + }; + } + + /** + * Merges two sorted iterators and returns merged iterator sorted using + * comparator. It uses 'function' to convert T type to long, to return long + * iterator. + * It expects + * - input iterators to be sorted + * - input iterators to be non-repetitive for merged iterator to be non-repetitive + * It removes duplicates from the input iterators. + * + * @param iter1 + * first iterator of type T + * @param iter2 + * second iterator of type T + * @param comparator + * @param function + * @return + */ + public static OfLong mergeIteratorsForPrimitiveLongIterator(Iterator iter1, Iterator iter2, + Comparator comparator, ToLongFunction function) { + return new PrimitiveIterator.OfLong() { + private T curIter1Entry = null; + private T curIter2Entry = null; + private boolean hasToPreFetch = true; + + @Override + public boolean hasNext() { + if (hasToPreFetch) { + if (curIter1Entry == null) { + curIter1Entry = iter1.hasNext() ? iter1.next() : null; + } + if (curIter2Entry == null) { + curIter2Entry = iter2.hasNext() ? iter2.next() : null; + } + } + hasToPreFetch = false; + return (curIter1Entry != null || curIter2Entry != null); + } + + @Override + public long nextLong() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + + T returnEntry = null; + if (curIter1Entry != null && curIter2Entry != null) { + int compareValue = comparator.compare(curIter1Entry, curIter2Entry); + if (compareValue == 0) { + returnEntry = curIter1Entry; + curIter1Entry = null; + curIter2Entry = null; + } else if (compareValue < 0) { + returnEntry = curIter1Entry; + curIter1Entry = null; + } else { + returnEntry = curIter2Entry; + curIter2Entry = null; + } + } else if (curIter1Entry != null) { + returnEntry = curIter1Entry; + curIter1Entry = null; + } else { + returnEntry = curIter2Entry; + curIter2Entry = null; + } + hasToPreFetch = true; + return function.applyAsLong(returnEntry); + } + }; + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/LedgerDirUtil.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/LedgerDirUtil.java new file mode 100644 index 00000000000..d7711bbb1a3 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/LedgerDirUtil.java @@ -0,0 +1,104 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.util; + + +import java.io.File; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.commons.lang3.tuple.Pair; + +public class LedgerDirUtil { + + public static final Pattern FILE_PATTERN = Pattern.compile("^([0-9a-fA-F]+)\\.log$"); + public static final Pattern COMPACTED_FILE_PATTERN = + Pattern.compile("^([0-9a-fA-F]+)\\.log\\.([0-9a-fA-F]+)\\.compacted$"); + + public static List logIdsInDirectory(File directory) { + List ids = new ArrayList<>(); + if (directory.exists() && directory.isDirectory()) { + File[] files = directory.listFiles(); + if (files != null && files.length > 0) { + for (File f : files) { + Matcher m = FILE_PATTERN.matcher(f.getName()); + if (m.matches()) { + int logId = Integer.parseUnsignedInt(m.group(1), 16); + ids.add(logId); + } + } + } + } + return ids; + } + + public static List compactedLogIdsInDirectory(File directory) { + List ids = new ArrayList<>(); + if (directory.exists() && directory.isDirectory()) { + File[] files = directory.listFiles(); + if (files != null && files.length > 0) { + for (File f : files) { + Matcher m = COMPACTED_FILE_PATTERN.matcher(f.getName()); + if (m.matches()) { + int logId = Integer.parseUnsignedInt(m.group(1), 16); + ids.add(logId); + } + } + } + } + return ids; + } + + /** + * O(nlogn) algorithm to find largest contiguous gap between + * integers in a passed list. n should be relatively small. + * Entry logs should be about 1GB in size, so even if the node + * stores a PB, there should be only 1000000 entry logs. + */ + public static Pair findLargestGap(List currentIds) { + if (currentIds.isEmpty()) { + return Pair.of(0, Integer.MAX_VALUE); + } + + Collections.sort(currentIds); + + int nextIdCandidate = 0; + int maxIdCandidate = currentIds.get(0); + int maxGap = maxIdCandidate - nextIdCandidate; + for (int i = 0; i < currentIds.size(); i++) { + int gapStart = currentIds.get(i) + 1; + int j = i + 1; + int gapEnd = Integer.MAX_VALUE; + if (j < currentIds.size()) { + gapEnd = currentIds.get(j); + } + int gapSize = gapEnd - gapStart; + if (gapSize > maxGap) { + maxGap = gapSize; + nextIdCandidate = gapStart; + maxIdCandidate = gapEnd; + } + } + return Pair.of(nextIdCandidate, maxIdCandidate); + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/LedgerIdFormatter.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/LedgerIdFormatter.java index ec52cdf5cc8..77c17946d6b 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/LedgerIdFormatter.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/LedgerIdFormatter.java @@ -22,7 +22,7 @@ package org.apache.bookkeeper.util; import java.util.UUID; - +import org.apache.bookkeeper.common.util.ReflectionUtils; import org.apache.bookkeeper.conf.AbstractConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -62,8 +62,8 @@ public static LedgerIdFormatter newLedgerIdFormatter(AbstractConfiguration co formatter = ReflectionUtils.newInstance(ledgerIdFormatterClass); } catch (Exception e) { LOG.warn("No formatter class found", e); - LOG.warn("Using Default UUID Formatter."); - formatter = new UUIDLedgerIdFormatter(); + LOG.warn("Using Default Long Formatter."); + formatter = new LongLedgerIdFormatter(); } return formatter; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/LocalBookKeeper.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/LocalBookKeeper.java index 72ba48a69fc..0c793eb4e51 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/LocalBookKeeper.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/LocalBookKeeper.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,7 +17,8 @@ */ package org.apache.bookkeeper.util; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.bookkeeper.server.Main.storageDirectoriesFromConf; import static org.apache.bookkeeper.util.BookKeeperConstants.AVAILABLE_NODE; import static org.apache.bookkeeper.util.BookKeeperConstants.READONLY; @@ -33,21 +34,34 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; - +import java.util.function.Supplier; +import java.util.stream.Collectors; import org.apache.bookkeeper.bookie.Bookie; -import org.apache.bookkeeper.bookie.BookieException; -import org.apache.bookkeeper.client.BKException; -import org.apache.bookkeeper.conf.AbstractConfiguration; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.BookieResources; +import org.apache.bookkeeper.bookie.CookieValidation; +import org.apache.bookkeeper.bookie.LedgerDirsManager; +import org.apache.bookkeeper.bookie.LedgerStorage; +import org.apache.bookkeeper.bookie.LegacyCookieValidation; +import org.apache.bookkeeper.bookie.UncleanShutdownDetection; +import org.apache.bookkeeper.bookie.UncleanShutdownDetectionImpl; +import org.apache.bookkeeper.common.allocator.ByteBufAllocatorWithOomHandler; +import org.apache.bookkeeper.common.component.ComponentInfoPublisher; import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.discover.BookieServiceInfo; +import org.apache.bookkeeper.discover.BookieServiceInfo.Endpoint; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.LedgerManagerFactory; +import org.apache.bookkeeper.meta.MetadataBookieDriver; import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; import org.apache.bookkeeper.proto.BookieServer; -import org.apache.bookkeeper.replication.ReplicationException.CompatibilityException; -import org.apache.bookkeeper.replication.ReplicationException.UnavailableException; import org.apache.bookkeeper.shims.zk.ZooKeeperServerShim; import org.apache.bookkeeper.shims.zk.ZooKeeperServerShimFactory; -import org.apache.bookkeeper.tls.SecurityException; +import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.zookeeper.ZooKeeperClient; import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.Op; @@ -58,44 +72,52 @@ /** * Local Bookkeeper. */ -public class LocalBookKeeper { +public class LocalBookKeeper implements AutoCloseable { protected static final Logger LOG = LoggerFactory.getLogger(LocalBookKeeper.class); public static final int CONNECTION_TIMEOUT = 30000; - private static String newMetadataServiceUri(String zkServers, int port) { - return "zk://" + zkServers + ":" + port + "/ledgers"; + private static String newMetadataServiceUri(String zkServers, int port, String layout, String ledgerPath) { + return "zk+" + layout + "://" + zkServers + ":" + port + ledgerPath; } int numberOfBookies; - public LocalBookKeeper() { - this(3); - } - - public LocalBookKeeper(int numberOfBookies) { - this(numberOfBookies, 5000, defaultLocalBookiesConfigDir); - } - - public LocalBookKeeper(int numberOfBookies, int initialPort, String localBookiesConfigDirName) { + public LocalBookKeeper( + int numberOfBookies, + ServerConfiguration baseConf, + String localBookiesConfigDirName, + boolean stopOnExit, String dirSuffix, + String zkHost, int zkPort) { this.numberOfBookies = numberOfBookies; - this.initialPort = initialPort; this.localBookiesConfigDir = new File(localBookiesConfigDirName); - LOG.info("Running {} bookie(s) on zkServer {}.", this.numberOfBookies); + this.baseConf = baseConf; + this.localBookies = new ArrayList<>(); + this.stopOnExit = stopOnExit; + this.dirSuffix = dirSuffix; + this.zkHost = zkHost; + this.zkPort = zkPort; + this.dirsToCleanUp = new ArrayList<>(); + LOG.info("Running {} bookie(s) on zk ensemble = '{}:{}'.", this.numberOfBookies, + zooKeeperDefaultHost, zooKeeperDefaultPort); } private static String zooKeeperDefaultHost = "127.0.0.1"; private static int zooKeeperDefaultPort = 2181; private static int zkSessionTimeOut = 5000; - private static Integer bookieDefaultInitialPort = 5000; private static String defaultLocalBookiesConfigDir = "/tmp/localbookies-config"; //BookKeeper variables - File journalDirs[]; - BookieServer bs[]; - ServerConfiguration bsConfs[]; - Integer initialPort = 5000; - + List localBookies; + ZooKeeperServerShim zks; + String zkHost; + int zkPort; + String dirSuffix; + ByteBufAllocatorWithOomHandler allocator; + private ServerConfiguration baseConf; File localBookiesConfigDir; + List dirsToCleanUp; + boolean stopOnExit; + /** * @param maxCC * Max Concurrency of Client @@ -120,25 +142,23 @@ public static ZooKeeperServerShim runZookeeper(int maxCC, int zookeeperPort, Fil return server; } - @SuppressWarnings("deprecation") - private void initializeZookeeper(AbstractConfiguration conf, String zkHost, int zkPort) throws IOException { + private void initializeZookeeper() throws IOException { LOG.info("Instantiate ZK Client"); //initialize the zk client with values try (ZooKeeperClient zkc = ZooKeeperClient.newBuilder() .connectString(zkHost + ":" + zkPort) .sessionTimeoutMs(zkSessionTimeOut) .build()) { - List multiOps = Lists.newArrayListWithExpectedSize(3); - String zkLedgersRootPath = ZKMetadataDriverBase.resolveZkLedgersRootPath(conf); - multiOps.add( - Op.create(zkLedgersRootPath, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT)); + String zkLedgersRootPath = ZKMetadataDriverBase.resolveZkLedgersRootPath(baseConf); + ZkUtils.createFullPathOptimistic(zkc, zkLedgersRootPath, new byte[0], Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + List multiOps = Lists.newArrayListWithExpectedSize(2); multiOps.add( Op.create(zkLedgersRootPath + "/" + AVAILABLE_NODE, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT)); multiOps.add( Op.create(zkLedgersRootPath + "/" + AVAILABLE_NODE + "/" + READONLY, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT)); - zkc.multi(multiOps); // No need to create an entry for each requested bookie anymore as the // BookieServers will register themselves with ZooKeeper on startup. @@ -158,46 +178,11 @@ private static void cleanupDirectories(List dirs) throws IOException { } } - private List runBookies(ServerConfiguration baseConf, String dirSuffix) - throws IOException, KeeperException, InterruptedException, BookieException, - UnavailableException, CompatibilityException, SecurityException, BKException { - List tempDirs = new ArrayList(); - try { - runBookies(baseConf, tempDirs, dirSuffix); - return tempDirs; - } catch (IOException ioe) { - cleanupDirectories(tempDirs); - throw ioe; - } catch (KeeperException ke) { - cleanupDirectories(tempDirs); - throw ke; - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - cleanupDirectories(tempDirs); - throw ie; - } catch (BookieException be) { - cleanupDirectories(tempDirs); - throw be; - } catch (UnavailableException ue) { - cleanupDirectories(tempDirs); - throw ue; - } catch (CompatibilityException ce) { - cleanupDirectories(tempDirs); - throw ce; - } - } - - @SuppressWarnings("deprecation") - private void runBookies(ServerConfiguration baseConf, List tempDirs, String dirSuffix) - throws IOException, KeeperException, InterruptedException, BookieException, UnavailableException, - CompatibilityException, SecurityException, BKException { + private void runBookies() + throws Exception { LOG.info("Starting Bookie(s)"); // Create Bookie Servers (B1, B2, B3) - journalDirs = new File[numberOfBookies]; - bs = new BookieServer[numberOfBookies]; - bsConfs = new ServerConfiguration[numberOfBookies]; - if (localBookiesConfigDir.exists() && localBookiesConfigDir.isFile()) { throw new IOException("Unable to create LocalBookiesConfigDir, since there is a file at " + localBookiesConfigDir.getAbsolutePath()); @@ -206,69 +191,9 @@ private void runBookies(ServerConfiguration baseConf, List tempDirs, Strin throw new IOException( "Unable to create LocalBookiesConfigDir - " + localBookiesConfigDir.getAbsolutePath()); } - + allocator = BookieResources.createAllocator(baseConf); for (int i = 0; i < numberOfBookies; i++) { - if (null == baseConf.getJournalDirNameWithoutDefault()) { - journalDirs[i] = IOUtils.createTempDir("localbookkeeper" + Integer.toString(i), dirSuffix); - tempDirs.add(journalDirs[i]); - } else { - journalDirs[i] = new File(baseConf.getJournalDirName(), "bookie" + Integer.toString(i)); - } - if (journalDirs[i].exists()) { - if (journalDirs[i].isDirectory()) { - FileUtils.deleteDirectory(journalDirs[i]); - } else if (!journalDirs[i].delete()) { - throw new IOException("Couldn't cleanup bookie journal dir " + journalDirs[i]); - } - } - if (!journalDirs[i].mkdirs()) { - throw new IOException("Couldn't create bookie journal dir " + journalDirs[i]); - } - - String [] ledgerDirs = baseConf.getLedgerDirWithoutDefault(); - if ((null == ledgerDirs) || (0 == ledgerDirs.length)) { - ledgerDirs = new String[] { journalDirs[i].getPath() }; - } else { - for (int l = 0; l < ledgerDirs.length; l++) { - File dir = new File(ledgerDirs[l], "bookie" + Integer.toString(i)); - if (dir.exists()) { - if (dir.isDirectory()) { - FileUtils.deleteDirectory(dir); - } else if (!dir.delete()) { - throw new IOException("Couldn't cleanup bookie ledger dir " + dir); - } - } - if (!dir.mkdirs()) { - throw new IOException("Couldn't create bookie ledger dir " + dir); - } - ledgerDirs[l] = dir.getPath(); - } - } - - bsConfs[i] = new ServerConfiguration((ServerConfiguration) baseConf.clone()); - - // If the caller specified ephemeral ports then use ephemeral ports for all - // the bookies else use numBookie ports starting at initialPort - if (0 == initialPort) { - bsConfs[i].setBookiePort(0); - } else { - bsConfs[i].setBookiePort(initialPort + i); - } - - if (null == baseConf.getMetadataServiceUriUnchecked()) { - bsConfs[i].setMetadataServiceUri( - newMetadataServiceUri(InetAddress.getLocalHost().getHostAddress(), zooKeeperDefaultPort)); - } - - bsConfs[i].setJournalDirName(journalDirs[i].getPath()); - bsConfs[i].setLedgerDirNames(ledgerDirs); - - // write config into file before start so we can know what's wrong if start failed - String fileName = Bookie.getBookieAddress(bsConfs[i]).toString() + ".conf"; - serializeLocalBookieConfig(bsConfs[i], fileName); - - bs[i] = new BookieServer(bsConfs[i]); - bs[i].start(); + runBookie(i); } /* @@ -285,116 +210,121 @@ private void runBookies(ServerConfiguration baseConf, List tempDirs, Strin ServerConfiguration baseConfWithCorrectZKServers = new ServerConfiguration( (ServerConfiguration) baseConf.clone()); if (null == baseConf.getMetadataServiceUriUnchecked()) { - baseConfWithCorrectZKServers.setMetadataServiceUri( - newMetadataServiceUri(InetAddress.getLocalHost().getHostAddress(), zooKeeperDefaultPort)); + baseConfWithCorrectZKServers.setMetadataServiceUri(baseConf.getMetadataServiceUri()); } serializeLocalBookieConfig(baseConfWithCorrectZKServers, "baseconf.conf"); } - public static void startLocalBookies(String zkHost, - int zkPort, - int numBookies, - boolean shouldStartZK, - int initialBookiePort) - throws Exception { - ServerConfiguration conf = new ServerConfiguration(); - startLocalBookiesInternal( - conf, zkHost, zkPort, numBookies, shouldStartZK, - initialBookiePort, true, "test", null, defaultLocalBookiesConfigDir); - } - - public static void startLocalBookies(String zkHost, - int zkPort, - int numBookies, - boolean shouldStartZK, - int initialBookiePort, - ServerConfiguration conf) - throws Exception { - startLocalBookiesInternal( - conf, zkHost, zkPort, numBookies, shouldStartZK, - initialBookiePort, true, "test", null, defaultLocalBookiesConfigDir); - } - - public static void startLocalBookies(String zkHost, - int zkPort, - int numBookies, - boolean shouldStartZK, - int initialBookiePort, - String dirSuffix) - throws Exception { - ServerConfiguration conf = new ServerConfiguration(); - startLocalBookiesInternal( - conf, zkHost, zkPort, numBookies, shouldStartZK, - initialBookiePort, true, dirSuffix, null, defaultLocalBookiesConfigDir); - } - @SuppressWarnings("deprecation") - static void startLocalBookiesInternal(ServerConfiguration conf, - String zkHost, - int zkPort, - int numBookies, - boolean shouldStartZK, - int initialBookiePort, - boolean stopOnExit, - String dirSuffix, - String zkDataDir, - String localBookiesConfigDirName) - throws Exception { - LocalBookKeeper lb = new LocalBookKeeper(numBookies, initialBookiePort, localBookiesConfigDirName); + private void runBookie(int bookieIndex) throws Exception { + File journalDirs; + if (null == baseConf.getJournalDirNameWithoutDefault()) { + journalDirs = IOUtils.createTempDir("localbookkeeper" + bookieIndex, dirSuffix); + dirsToCleanUp.add(journalDirs); + } else { + journalDirs = new File(baseConf.getJournalDirName(), "bookie" + bookieIndex); + } + if (journalDirs.exists()) { + if (journalDirs.isDirectory()) { + FileUtils.deleteDirectory(journalDirs); + } else if (!journalDirs.delete()) { + throw new IOException("Couldn't cleanup bookie journal dir " + journalDirs); + } + } + if (!journalDirs.mkdirs()) { + throw new IOException("Couldn't create bookie journal dir " + journalDirs); + } - ZooKeeperServerShim zks = null; - File zkTmpDir = null; - List bkTmpDirs = null; - try { - if (shouldStartZK) { - File zkDataDirFile = null; - if (zkDataDir != null) { - zkDataDirFile = new File(zkDataDir); - if (zkDataDirFile.exists() && zkDataDirFile.isFile()) { - throw new IOException("Unable to create zkDataDir, since there is a file at " - + zkDataDirFile.getAbsolutePath()); - } - if (!zkDataDirFile.exists() && !zkDataDirFile.mkdirs()) { - throw new IOException("Unable to create zkDataDir - " + zkDataDirFile.getAbsolutePath()); + String[] ledgerDirs = baseConf.getLedgerDirWithoutDefault(); + if ((null == ledgerDirs) || (0 == ledgerDirs.length)) { + ledgerDirs = new String[]{journalDirs.getPath()}; + } else { + for (int l = 0; l < ledgerDirs.length; l++) { + File dir = new File(ledgerDirs[l], "bookie" + bookieIndex); + if (dir.exists()) { + if (dir.isDirectory()) { + FileUtils.deleteDirectory(dir); + } else if (!dir.delete()) { + throw new IOException("Couldn't cleanup bookie ledger dir " + dir); } } - zkTmpDir = IOUtils.createTempDir("zookeeper", dirSuffix, zkDataDirFile); - zkTmpDir.deleteOnExit(); - zks = LocalBookKeeper.runZookeeper(1000, zkPort, zkTmpDir); + if (!dir.mkdirs()) { + throw new IOException("Couldn't create bookie ledger dir " + dir); + } + dirsToCleanUp.add(dir); + ledgerDirs[l] = dir.getPath(); } + } + ServerConfiguration conf = new ServerConfiguration((ServerConfiguration) baseConf.clone()); - conf.setMetadataServiceUri(newMetadataServiceUri(zkHost, zkPort)); + conf.setBookiePort(PortManager.nextFreePort()); - lb.initializeZookeeper(conf, zkHost, zkPort); - bkTmpDirs = lb.runBookies(conf, dirSuffix); + if (null == baseConf.getMetadataServiceUriUnchecked()) { + conf.setMetadataServiceUri(baseConf.getMetadataServiceUri()); + } - try { - while (true) { - Thread.sleep(5000); - } - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - if (stopOnExit) { - lb.shutdownBookies(); + conf.setJournalDirName(journalDirs.getPath()); + conf.setLedgerDirNames(ledgerDirs); - if (null != zks) { - zks.stop(); - } + // write config into file before start so we can know what's wrong if start failed + String fileName = BookieImpl.getBookieId(conf).toString() + ".conf"; + serializeLocalBookieConfig(conf, fileName); + + LocalBookie b = new LocalBookie(conf); + b.start(); + localBookies.add(b); + } + + private void setZooKeeperShim(ZooKeeperServerShim zks, File zkTmpDir) { + this.zks = zks; + this.dirsToCleanUp.add(zkTmpDir); + } + + public static LocalBookKeeper getLocalBookies(String zkHost, + int zkPort, + int numBookies, + boolean shouldStartZK, + ServerConfiguration conf) throws Exception { + return getLocalBookiesInternal( + conf, zkHost, zkPort, numBookies, shouldStartZK, + true, "test", null, defaultLocalBookiesConfigDir); + } + + @SuppressWarnings("deprecation") + private static LocalBookKeeper getLocalBookiesInternal(ServerConfiguration conf, + String zkHost, + int zkPort, + int numBookies, + boolean shouldStartZK, + boolean stopOnExit, + String dirSuffix, + String zkDataDir, + String localBookiesConfigDirName) throws Exception { + conf.setMetadataServiceUri( + newMetadataServiceUri( + zkHost, + zkPort, + conf.getLedgerManagerLayoutStringFromFactoryClass(), + conf.getZkLedgersRootPath())); + LocalBookKeeper lb = new LocalBookKeeper(numBookies, conf, localBookiesConfigDirName, stopOnExit, + dirSuffix, zkHost, zkPort); + if (shouldStartZK) { + File zkDataDirFile = null; + if (zkDataDir != null) { + zkDataDirFile = new File(zkDataDir); + if (zkDataDirFile.exists() && zkDataDirFile.isFile()) { + throw new IOException("Unable to create zkDataDir, since there is a file at " + + zkDataDirFile.getAbsolutePath()); } - throw ie; - } - } catch (Exception e) { - LOG.error("Failed to run {} bookies : zk ensemble = '{}:{}'", - numBookies, zkHost, zkPort, e); - throw e; - } finally { - if (stopOnExit) { - cleanupDirectories(bkTmpDirs); - if (null != zkTmpDir) { - FileUtils.deleteDirectory(zkTmpDir); + if (!zkDataDirFile.exists() && !zkDataDirFile.mkdirs()) { + throw new IOException("Unable to create zkDataDir - " + zkDataDirFile.getAbsolutePath()); } } + File zkTmpDir = IOUtils.createTempDir("zookeeper", dirSuffix, zkDataDirFile); + lb.setZooKeeperShim(LocalBookKeeper.runZookeeper(1000, zkPort, zkTmpDir), zkTmpDir); } + + return lb; } /** @@ -407,6 +337,13 @@ static void startLocalBookiesInternal(ServerConfiguration conf, * @throws IOException */ private void serializeLocalBookieConfig(ServerConfiguration localBookieConfig, String fileName) throws IOException { + if (StringUtils.isBlank(fileName) + || fileName.contains("..") + || fileName.contains("/") + || fileName.contains("\\")) { + throw new IllegalArgumentException("Invalid filename: " + fileName); + } + File localBookieConfFile = new File(localBookiesConfigDir, fileName); if (localBookieConfFile.exists() && !localBookieConfFile.delete()) { throw new IOException( @@ -418,25 +355,33 @@ private void serializeLocalBookieConfig(ServerConfiguration localBookieConfig, S Iterator keys = localBookieConfig.getKeys(); try (PrintWriter writer = new PrintWriter(localBookieConfFile, "UTF-8")) { while (keys.hasNext()) { - String key = keys.next().toString(); + String key = keys.next(); String[] values = localBookieConfig.getStringArray(key); StringBuilder concatenatedValue = new StringBuilder(values[0]); for (int i = 1; i < values.length; i++) { - concatenatedValue.append("," + values[i]); + concatenatedValue.append(",").append(values[i]); } - writer.println(key + "=" + concatenatedValue.toString()); + writer.println(key + "=" + concatenatedValue); } } } - public static void main(String[] args) throws Exception, SecurityException { + public static void main(String[] args) { + System.setProperty("zookeeper.4lw.commands.whitelist", "*"); try { if (args.length < 1) { usage(); System.exit(-1); } - int numBookies = Integer.parseInt(args[0]); + int numBookies = 0; + try { + numBookies = Integer.parseInt(args[0]); + } catch (NumberFormatException nfe) { + LOG.error("Unrecognized number-of-bookies: {}", args[0]); + usage(); + System.exit(-1); + } ServerConfiguration conf = new ServerConfiguration(); conf.setAllowLoopback(true); @@ -444,10 +389,10 @@ public static void main(String[] args) throws Exception, SecurityException { String confFile = args[1]; try { conf.loadConf(new File(confFile).toURI().toURL()); - LOG.info("Using configuration file " + confFile); + LOG.info("Using configuration file {}", confFile); } catch (Exception e) { // load conf failed - LOG.warn("Error loading configuration file " + confFile, e); + LOG.warn("Error loading configuration file {}", confFile, e); } } @@ -461,11 +406,21 @@ public static void main(String[] args) throws Exception, SecurityException { localBookiesConfigDirName = args[3]; } - startLocalBookiesInternal(conf, zooKeeperDefaultHost, zooKeeperDefaultPort, numBookies, true, - bookieDefaultInitialPort, false, "test", zkDataDir, localBookiesConfigDirName); + try (LocalBookKeeper lb = getLocalBookiesInternal(conf, zooKeeperDefaultHost, zooKeeperDefaultPort, + numBookies, true, false, "test", zkDataDir, + localBookiesConfigDirName)) { + try { + lb.start(); + while (true) { + Thread.sleep(1000); + } + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw ie; + } + } } catch (Exception e) { LOG.error("Exiting LocalBookKeeper because of exception in main method", e); - e.printStackTrace(); /* * This is needed because, some non-daemon thread (probably in ZK or * some other dependent service) is preventing the JVM from exiting, though @@ -482,39 +437,28 @@ private static void usage() { } public static boolean waitForServerUp(String hp, long timeout) { - long start = MathUtils.now(); - String split[] = hp.split(":"); + long start = System.currentTimeMillis(); + String[] split = hp.split(":"); String host = split[0]; int port = Integer.parseInt(split[1]); while (true) { - try { - Socket sock = new Socket(host, port); - BufferedReader reader = null; - try { - OutputStream outstream = sock.getOutputStream(); - outstream.write("stat".getBytes(UTF_8)); - outstream.flush(); - - reader = - new BufferedReader( - new InputStreamReader(sock.getInputStream(), UTF_8)); - String line = reader.readLine(); - if (line != null && line.startsWith("Zookeeper version:")) { - LOG.info("Server UP"); - return true; - } - } finally { - sock.close(); - if (reader != null) { - reader.close(); - } + try (Socket sock = new Socket(host, port); + BufferedReader reader = new BufferedReader(new InputStreamReader(sock.getInputStream(), UTF_8))) { + OutputStream outstream = sock.getOutputStream(); + outstream.write("stat".getBytes(UTF_8)); + outstream.flush(); + + String line = reader.readLine(); + if (line != null && line.startsWith("Zookeeper version:")) { + LOG.info("Server UP"); + return true; } } catch (IOException e) { // ignore as this is expected LOG.info("server " + hp + " not up " + e); } - if (MathUtils.now() > start + timeout) { + if (System.currentTimeMillis() > start + timeout) { break; } try { @@ -527,10 +471,114 @@ public static boolean waitForServerUp(String hp, long timeout) { return false; } - public void shutdownBookies() { - for (BookieServer bookieServer: bs) { - bookieServer.shutdown(); + public void start() throws Exception { + initializeZookeeper(); + runBookies(); + } + + public void addBookie() throws Exception { + int bookieIndex = localBookies.size() + 1; + runBookie(bookieIndex); + } + + public void removeBookie() throws Exception { + int index = localBookies.size() - 1; + LocalBookie bookie = localBookies.get(index); + bookie.shutdown(); + localBookies.remove(index); + } + + public void shutdownBookies() throws Exception { + for (LocalBookie b : localBookies) { + b.shutdown(); + } + } + + @Override + public void close() throws Exception { + if (stopOnExit) { + shutdownBookies(); + + if (null != zks) { + zks.stop(); + } + } + + cleanupDirectories(dirsToCleanUp); + } + + private class LocalBookie { + final BookieServer server; + final Bookie bookie; + final MetadataBookieDriver metadataDriver; + final RegistrationManager registrationManager; + final LedgerManagerFactory lmFactory; + final LedgerManager ledgerManager; + + LocalBookie(ServerConfiguration conf) throws Exception { + metadataDriver = BookieResources.createMetadataDriver(conf, NullStatsLogger.INSTANCE); + registrationManager = metadataDriver.createRegistrationManager(); + lmFactory = metadataDriver.getLedgerManagerFactory(); + ledgerManager = lmFactory.newLedgerManager(); + + DiskChecker diskChecker = BookieResources.createDiskChecker(conf); + LedgerDirsManager ledgerDirsManager = BookieResources.createLedgerDirsManager( + conf, diskChecker, NullStatsLogger.INSTANCE); + LedgerDirsManager indexDirsManager = BookieResources.createIndexDirsManager( + conf, diskChecker, NullStatsLogger.INSTANCE, ledgerDirsManager); + LedgerStorage storage = BookieResources.createLedgerStorage( + conf, ledgerManager, ledgerDirsManager, indexDirsManager, + NullStatsLogger.INSTANCE, allocator); + + CookieValidation cookieValidation = new LegacyCookieValidation(conf, registrationManager); + cookieValidation.checkCookies(storageDirectoriesFromConf(conf)); + + UncleanShutdownDetection shutdownManager = new UncleanShutdownDetectionImpl(ledgerDirsManager); + + final ComponentInfoPublisher componentInfoPublisher = new ComponentInfoPublisher(); + final Supplier bookieServiceInfoProvider = + () -> buildBookieServiceInfo(componentInfoPublisher); + + componentInfoPublisher.startupFinished(); + bookie = new BookieImpl(conf, registrationManager, storage, diskChecker, + ledgerDirsManager, indexDirsManager, + NullStatsLogger.INSTANCE, allocator, bookieServiceInfoProvider); + server = new BookieServer(conf, bookie, NullStatsLogger.INSTANCE, allocator, + shutdownManager); + } + + void start() throws Exception { + server.start(); + } + + void shutdown() throws Exception { + server.shutdown(); + ledgerManager.close(); + lmFactory.close(); + registrationManager.close(); + metadataDriver.close(); } } + /** + * Create the {@link BookieServiceInfo} starting from the published endpoints. + * + * @see ComponentInfoPublisher + * @param componentInfoPublisher the endpoint publisher + * @return the created bookie service info + */ + private static BookieServiceInfo buildBookieServiceInfo(ComponentInfoPublisher componentInfoPublisher) { + List endpoints = componentInfoPublisher.getEndpoints().values() + .stream().map(e -> { + return new Endpoint( + e.getId(), + e.getPort(), + e.getHost(), + e.getProtocol(), + e.getAuth(), + e.getExtensions() + ); + }).collect(Collectors.toList()); + return new BookieServiceInfo(componentInfoPublisher.getProperties(), endpoints); + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/NativeIO.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/NativeIO.java deleted file mode 100644 index 6f0bdf16f90..00000000000 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/NativeIO.java +++ /dev/null @@ -1,121 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.bookkeeper.util; - -import com.sun.jna.LastErrorException; -import com.sun.jna.Native; - -import java.io.FileDescriptor; -import java.lang.reflect.Field; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Native I/O operations. - */ -public final class NativeIO { - private static final Logger LOG = LoggerFactory.getLogger(NativeIO.class); - - private static final int POSIX_FADV_DONTNEED = 4; /* fadvise.h */ - - private static boolean initialized = false; - private static boolean fadvisePossible = true; - - static { - try { - Native.register("c"); - initialized = true; - } catch (NoClassDefFoundError e) { - LOG.info("JNA not found. Native methods will be disabled."); - } catch (UnsatisfiedLinkError e) { - LOG.info("Unable to link C library. Native methods will be disabled."); - } catch (NoSuchMethodError e) { - LOG.warn("Obsolete version of JNA present; unable to register C library"); - } - } - - // fadvice - public static native int posix_fadvise(int fd, long offset, long len, int flag) throws LastErrorException; - - private NativeIO() {} - - private static Field getFieldByReflection(Class cls, String fieldName) { - Field field = null; - - try { - field = cls.getDeclaredField(fieldName); - field.setAccessible(true); - } catch (Exception e) { - // We don't really expect this so throw an assertion to - // catch this during development - assert false; - LOG.warn("Unable to read {} field from {}", fieldName, cls.getName()); - } - - return field; - } - /** - * Get system file descriptor (int) from FileDescriptor object. - * @param descriptor - FileDescriptor object to get fd from - * @return file descriptor, -1 or error - */ - public static int getSysFileDescriptor(FileDescriptor descriptor) { - Field field = getFieldByReflection(descriptor.getClass(), "fd"); - try { - return field.getInt(descriptor); - } catch (Exception e) { - LOG.warn("Unable to read fd field from java.io.FileDescriptor"); - } - - return -1; - } - - /** - * Remove pages from the file system page cache when they wont - * be accessed again. - * - * @param fd The file descriptor of the source file. - * @param offset The offset within the file. - * @param len The length to be flushed. - */ - public static void bestEffortRemoveFromPageCache(int fd, long offset, long len) { - if (!initialized || !fadvisePossible || fd < 0) { - return; - } - try { - posix_fadvise(fd, offset, len, POSIX_FADV_DONTNEED); - } catch (UnsupportedOperationException uoe) { - LOG.warn("posix_fadvise is not supported : ", uoe); - fadvisePossible = false; - } catch (UnsatisfiedLinkError ule) { - // if JNA is unavailable just skipping Direct I/O - // instance of this class will act like normal RandomAccessFile - LOG.warn("Unsatisfied Link error: posix_fadvise failed on file descriptor {}, offset {} : ", - fd, offset, ule); - fadvisePossible = false; - } catch (Exception e) { - // This is best effort anyway so lets just log that there was an - // exception and forget - LOG.warn("Unknown exception: posix_fadvise failed on file descriptor {}, offset {} : ", - fd, offset, e); - } - } - -} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/NettyChannelUtil.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/NettyChannelUtil.java new file mode 100644 index 00000000000..7add100e748 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/NettyChannelUtil.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.util; + +import io.netty.channel.ChannelFutureListener; +import io.netty.channel.ChannelOutboundInvoker; +import io.netty.channel.VoidChannelPromise; + +/** + * Contains utility methods for working with Netty Channels. + */ +public final class NettyChannelUtil { + + private NettyChannelUtil() { + } + + /** + * Write and flush the message to the channel. + * + * The promise is an instance of {@link VoidChannelPromise} that properly propagates exceptions up to the pipeline. + * Netty has many ad-hoc optimization if the promise is an instance of {@link VoidChannelPromise}. + * Lastly, it reduces pollution of useless {@link io.netty.channel.ChannelPromise} objects created + * by the default write and flush method {@link ChannelOutboundInvoker#writeAndFlush(Object)}. + * See https://stackoverflow.com/q/54169262 and https://stackoverflow.com/a/9030420 for more details. + * + * @param ctx channel's context + * @param msg buffer to write in the channel + */ + public static void writeAndFlushWithVoidPromise(ChannelOutboundInvoker ctx, Object msg) { + ctx.writeAndFlush(msg, ctx.voidPromise()); + } + + /** + * Write and flush the message to the channel and the close the channel. + * + * This method is particularly helpful when the connection is in an invalid state + * and therefore a new connection must be created to continue. + * + * @param ctx channel's context + * @param msg buffer to write in the channel + */ + public static void writeAndFlushWithClosePromise(ChannelOutboundInvoker ctx, Object msg) { + ctx.writeAndFlush(msg).addListener(ChannelFutureListener.CLOSE); + } + +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/OrderedGenericCallback.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/OrderedGenericCallback.java index 73150ad0112..3d128480019 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/OrderedGenericCallback.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/OrderedGenericCallback.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,13 +17,14 @@ */ package org.apache.bookkeeper.util; +import java.util.Map; import java.util.concurrent.RejectedExecutionException; - +import org.apache.bookkeeper.common.util.MdcUtils; import org.apache.bookkeeper.common.util.OrderedExecutor; -import org.apache.bookkeeper.common.util.SafeRunnable; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.slf4j.MDC; /** * Generic callback implementation which will run the @@ -34,6 +35,7 @@ public abstract class OrderedGenericCallback implements GenericCallback { private final OrderedExecutor executor; private final long orderingKey; + private final Map mdcContextMap; /** * @param executor The executor on which to run the callback @@ -43,33 +45,40 @@ public abstract class OrderedGenericCallback implements GenericCallback { public OrderedGenericCallback(OrderedExecutor executor, long orderingKey) { this.executor = executor; this.orderingKey = orderingKey; + this.mdcContextMap = executor.preserveMdc() ? MDC.getCopyOfContextMap() : null; } @Override public final void operationComplete(final int rc, final T result) { - // during closing, callbacks that are error out might try to submit to - // the scheduler again. if the submission will go to same thread, we - // don't need to submit to executor again. this is also an optimization for - // callback submission - if (Thread.currentThread().getId() == executor.getThreadID(orderingKey)) { - safeOperationComplete(rc, result); - } else { - try { - executor.executeOrdered(orderingKey, new SafeRunnable() { - @Override - public void safeRun() { - safeOperationComplete(rc, result); - } - @Override - public String toString() { - return String.format("Callback(key=%s, name=%s)", - orderingKey, - OrderedGenericCallback.this); - } - }); - } catch (RejectedExecutionException re) { - LOG.warn("Failed to submit callback for {} : ", orderingKey, re); + MdcUtils.restoreContext(mdcContextMap); + try { + // during closing, callbacks that are error out might try to submit to + // the scheduler again. if the submission will go to same thread, we + // don't need to submit to executor again. this is also an optimization for + // callback submission + if (Thread.currentThread().getId() == executor.getThreadID(orderingKey)) { + safeOperationComplete(rc, result); + } else { + try { + executor.executeOrdered(orderingKey, new Runnable() { + @Override + public void run() { + safeOperationComplete(rc, result); + } + + @Override + public String toString() { + return String.format("Callback(key=%s, name=%s)", + orderingKey, + OrderedGenericCallback.this); + } + }); + } catch (RejectedExecutionException re) { + LOG.warn("Failed to submit callback for {} : ", orderingKey, re); + } } + } finally { + MDC.clear(); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/PageCacheUtil.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/PageCacheUtil.java new file mode 100644 index 00000000000..08bf9cfb385 --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/PageCacheUtil.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.util; + +import java.io.FileDescriptor; +import java.lang.reflect.Field; +import lombok.experimental.UtilityClass; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.common.util.nativeio.NativeIO; +import org.apache.bookkeeper.common.util.nativeio.NativeIOImpl; + +/** + * Native I/O operations. + */ +@UtilityClass +@Slf4j +public final class PageCacheUtil { + + private static final int POSIX_FADV_DONTNEED = 4; /* fadvise.h */ + + private static boolean fadvisePossible = true; + + private static final NativeIO NATIVE_IO; + + static { + NativeIO nativeIO = null; + try { + nativeIO = new NativeIOImpl(); + } catch (Exception e) { + log.warn("Unable to initialize NativeIO for posix_fdavise: {}", e.getMessage()); + fadvisePossible = false; + } + + NATIVE_IO = nativeIO; + } + + private static Field getFieldByReflection(Class cls, String fieldName) { + Field field = null; + + try { + field = cls.getDeclaredField(fieldName); + field.setAccessible(true); + } catch (Exception e) { + // We don't really expect this so throw an assertion to + // catch this during development + log.warn("Unable to read {} field from {}", fieldName, cls.getName()); + assert false; + } + + return field; + } + /** + * Get system file descriptor (int) from FileDescriptor object. + * @param descriptor - FileDescriptor object to get fd from + * @return file descriptor, -1 or error + */ + public static int getSysFileDescriptor(FileDescriptor descriptor) { + Field field = getFieldByReflection(descriptor.getClass(), "fd"); + try { + return field.getInt(descriptor); + } catch (Exception e) { + log.warn("Unable to read fd field from java.io.FileDescriptor"); + } + + return -1; + } + + /** + * Remove pages from the file system page cache when they won't + * be accessed again. + * + * @param fd The file descriptor of the source file. + * @param offset The offset within the file. + * @param len The length to be flushed. + */ + public static void bestEffortRemoveFromPageCache(int fd, long offset, long len) { + if (!fadvisePossible || fd < 0) { + return; + } + try { + NATIVE_IO.posix_fadvise(fd, offset, len, POSIX_FADV_DONTNEED); + } catch (Throwable e) { + log.warn("Failed to perform posix_fadvise: {}", e.getMessage()); + fadvisePossible = false; + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/PortManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/PortManager.java new file mode 100644 index 00000000000..cf17533780a --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/PortManager.java @@ -0,0 +1,49 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.util; + +import java.net.ServerSocket; + +/** + * Port manager provides free ports to allows multiple instances + * of the bookkeeper tests to run at once. + */ +public class PortManager { + + /** + * Return an available port. + * + * @return available port. + */ + public static synchronized int nextFreePort() { + int exceptionCount = 0; + while (true) { + try (ServerSocket ss = new ServerSocket(0)) { + return ss.getLocalPort(); + } catch (Exception e) { + exceptionCount++; + if (exceptionCount > 100) { + throw new RuntimeException("Unable to allocate socket port", e); + } + } + } + } +} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/SafeRunnable.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/SafeRunnable.java index a03f5fc14c1..b4ce368febd 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/SafeRunnable.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/SafeRunnable.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/Shell.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/Shell.java index 9e37614a749..7a0c68f6277 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/Shell.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/Shell.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,11 +17,11 @@ */ package org.apache.bookkeeper.util; -import com.google.common.base.Charsets; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.Map; import java.util.Timer; import java.util.TimerTask; @@ -93,7 +93,7 @@ protected void setWorkingDirectory(File dir) { * Check to see if a command needs to be executed and execute if needed. */ protected void run() throws IOException { - if (lastTime + interval > MathUtils.now()) { + if (lastTime + interval > System.currentTimeMillis()) { return; } exitCode = 0; // reset for next run @@ -130,8 +130,9 @@ private void runCommand() throws IOException { timeOutTimer.schedule(timeoutTimerTask, timeOutInterval); } final BufferedReader errReader = new BufferedReader(new InputStreamReader(process.getErrorStream(), - Charsets.UTF_8)); - BufferedReader inReader = new BufferedReader(new InputStreamReader(process.getInputStream(), Charsets.UTF_8)); + StandardCharsets.UTF_8)); + BufferedReader inReader = new BufferedReader(new InputStreamReader(process.getInputStream(), + StandardCharsets.UTF_8)); final StringBuffer errMsg = new StringBuffer(); // read error and input streams as this would free up the buffers @@ -176,6 +177,11 @@ public void run() { //taken care in finally block if (exitCode != 0) { throw new ExitCodeException(exitCode, errMsg.toString()); + } else { + String errMsgStr = errMsg.toString(); + if (!errMsgStr.isEmpty()) { + LOG.error(errMsgStr); + } } } catch (InterruptedException ie) { Thread.currentThread().interrupt(); @@ -199,7 +205,7 @@ public void run() { LOG.warn("Error while closing the error stream", ioe); } process.destroy(); - lastTime = MathUtils.now(); + lastTime = System.currentTimeMillis(); } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/SnapshotMap.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/SnapshotMap.java index 8902d069eaa..8afba5bd594 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/SnapshotMap.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/SnapshotMap.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/StringEntryFormatter.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/StringEntryFormatter.java index c9cd39cc548..14ef44b7812 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/StringEntryFormatter.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/StringEntryFormatter.java @@ -22,7 +22,6 @@ package org.apache.bookkeeper.util; import com.google.protobuf.ByteString; - import java.io.IOException; /** diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/StringUtils.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/StringUtils.java index 73bf0187c30..edbe49989d2 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/StringUtils.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/StringUtils.java @@ -1,6 +1,6 @@ package org.apache.bookkeeper.util; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -19,11 +19,10 @@ */ import java.io.IOException; - import org.apache.bookkeeper.proto.BookkeeperProtocol; /** - * Provided utilites for parsing network addresses, ledger-id from node paths + * Provided utilities for parsing network addresses, ledger-id from node paths * etc. * */ @@ -72,10 +71,10 @@ public static String getShortHierarchicalLedgerPath(long ledgerId) { // do 2-4-4 split StringBuilder sb = new StringBuilder(); sb.append("/") - .append(ledgerIdStr.substring(0, 2)).append("/") - .append(ledgerIdStr.substring(2, 6)).append("/") + .append(ledgerIdStr, 0, 2).append("/") + .append(ledgerIdStr, 2, 6).append("/") .append(LEDGER_NODE_PREFIX) - .append(ledgerIdStr.substring(6, 10)); + .append(ledgerIdStr, 6, 10); return sb.toString(); } @@ -91,12 +90,12 @@ public static String getLongHierarchicalLedgerPath(long ledgerId) { // do 3-4-4-4-4 split StringBuilder sb = new StringBuilder(); sb.append("/") - .append(ledgerIdStr.substring(0, 3)).append("/") - .append(ledgerIdStr.substring(3, 7)).append("/") - .append(ledgerIdStr.substring(7, 11)).append("/") - .append(ledgerIdStr.substring(11, 15)).append("/") + .append(ledgerIdStr, 0, 3).append("/") + .append(ledgerIdStr, 3, 7).append("/") + .append(ledgerIdStr, 7, 11).append("/") + .append(ledgerIdStr, 11, 15).append("/") .append(LEDGER_NODE_PREFIX) - .append(ledgerIdStr.substring(15, 19)); + .append(ledgerIdStr, 15, 19); return sb.toString(); } @@ -164,7 +163,7 @@ public static long stringToHierarchicalLedgerId(String...levelNodes) throws IOEx } /** - * Builds string representation of teh request without extra (i.e. binary) data + * Builds string representation of the request without extra (i.e. binary) data * * @param request * @return string representation of request diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/SubTreeCache.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/SubTreeCache.java index c5e44066062..aefb97a4add 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/SubTreeCache.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/SubTreeCache.java @@ -27,7 +27,6 @@ import java.util.List; import java.util.Map; import java.util.Set; - import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/Tool.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/Tool.java index 8a60af039a1..8bf25e51321 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/Tool.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/Tool.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ZeroBuffer.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ZeroBuffer.java index 8778d6e1900..968011fe469 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ZeroBuffer.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ZeroBuffer.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -29,7 +29,7 @@ */ public class ZeroBuffer { - private static final byte zeroBytes[] = new byte[64 * 1024]; + private static final byte[] zeroBytes = new byte[64 * 1024]; /** * Fill zeros into given buffer. diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ZkUtils.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ZkUtils.java index 5ba4a855496..48ea04a3f4d 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ZkUtils.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ZkUtils.java @@ -26,7 +26,6 @@ import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicInteger; - import org.apache.bookkeeper.conf.AbstractConfiguration; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.zookeeper.AsyncCallback; @@ -42,7 +41,7 @@ import org.slf4j.LoggerFactory; /** - * Provided utilites for zookeeper access, etc. + * Provided utilities for zookeeper access, etc. */ public class ZkUtils { private static final Logger LOG = LoggerFactory.getLogger(ZkUtils.class); @@ -133,8 +132,17 @@ public static void asyncDeleteFullPathOptimistic(final ZooKeeper zk, final Strin public void processResult(int rc, String path, Object ctx) { if (rc == Code.OK.intValue()) { String parent = new File(originalPath).getParent().replace("\\", "/"); - asyncDeleteFullPathOptimistic(zk, parent, -1, callback, leafNodePath); + zk.getData(parent, false, (dRc, dPath, dCtx, data, stat) -> { + if (Code.OK.intValue() == dRc && (stat != null && stat.getNumChildren() == 0)) { + asyncDeleteFullPathOptimistic(zk, parent, -1, callback, leafNodePath); + } else { + // parent node is not empty so, complete the + // callback + callback.processResult(Code.OK.intValue(), path, leafNodePath); + } + }, null); } else { + // parent node deletion fails.. so, complete the callback if (path.equals(leafNodePath)) { callback.processResult(rc, path, leafNodePath); } else { @@ -221,7 +229,7 @@ private static class GetChildrenCtx { * @throws InterruptedException * @throws IOException */ - public static List getChildrenInSingleNode(final ZooKeeper zk, final String node) + public static List getChildrenInSingleNode(final ZooKeeper zk, final String node, long zkOpTimeoutMs) throws InterruptedException, IOException, KeeperException.NoNodeException { final GetChildrenCtx ctx = new GetChildrenCtx(); getChildrenInSingleNode(zk, node, new GenericCallback>() { @@ -239,8 +247,20 @@ public void operationComplete(int rc, List ledgers) { }); synchronized (ctx) { + long startTime = System.currentTimeMillis(); while (!ctx.done) { - ctx.wait(); + try { + ctx.wait(zkOpTimeoutMs > 0 ? zkOpTimeoutMs : 0); + } catch (InterruptedException e) { + ctx.rc = Code.OPERATIONTIMEOUT.intValue(); + ctx.done = true; + } + // timeout the process if get-children response not received + // zkOpTimeoutMs. + if (zkOpTimeoutMs > 0 && (System.currentTimeMillis() - startTime) >= zkOpTimeoutMs) { + ctx.rc = Code.OPERATIONTIMEOUT.intValue(); + ctx.done = true; + } } } if (Code.NONODE.intValue() == ctx.rc) { diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/ConcurrentLongHashMap.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/ConcurrentLongHashMap.java index d98d3a7722e..20094a106fd 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/ConcurrentLongHashMap.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/ConcurrentLongHashMap.java @@ -24,12 +24,10 @@ import static com.google.common.base.Preconditions.checkNotNull; import com.google.common.collect.Lists; - import java.util.Arrays; import java.util.List; import java.util.concurrent.locks.StampedLock; import java.util.function.LongFunction; - /** * Map from long to an Object. * @@ -39,6 +37,19 @@ *

        • Open hash map with linear probing, no node allocations to store the values * * + * WARN: method forEach do not guarantee thread safety, nor do the keys and values method. + *
          + * The forEach method is specifically designed for single-threaded usage. When iterating over a map + * with concurrent writes, it becomes possible for new values to be either observed or not observed. + * There is no guarantee that if we write value1 and value2, and are able to see value2, then we will also see value1. + * In some cases, it is even possible to encounter two mappings with the same key, + * leading the keys method to return a List containing two identical keys. + * + *
          + * It is crucial to understand that the results obtained from aggregate status methods such as keys and values + * are typically reliable only when the map is not undergoing concurrent updates from other threads. + * When concurrent updates are involved, the results of these methods reflect transient states + * that may be suitable for monitoring or estimation purposes, but not for program control. * @param */ @SuppressWarnings("unchecked") @@ -47,11 +58,76 @@ public class ConcurrentLongHashMap { private static final Object EmptyValue = null; private static final Object DeletedValue = new Object(); - private static final float MapFillFactor = 0.66f; - private static final int DefaultExpectedItems = 256; private static final int DefaultConcurrencyLevel = 16; + private static final float DefaultMapFillFactor = 0.66f; + private static final float DefaultMapIdleFactor = 0.15f; + + private static final float DefaultExpandFactor = 2; + private static final float DefaultShrinkFactor = 2; + + private static final boolean DefaultAutoShrink = false; + + + public static Builder newBuilder() { + return new Builder<>(); + } + + /** + * Builder of ConcurrentLongHashMap. + */ + public static class Builder { + int expectedItems = DefaultExpectedItems; + int concurrencyLevel = DefaultConcurrencyLevel; + float mapFillFactor = DefaultMapFillFactor; + float mapIdleFactor = DefaultMapIdleFactor; + float expandFactor = DefaultExpandFactor; + float shrinkFactor = DefaultShrinkFactor; + boolean autoShrink = DefaultAutoShrink; + + public Builder expectedItems(int expectedItems) { + this.expectedItems = expectedItems; + return this; + } + + public Builder concurrencyLevel(int concurrencyLevel) { + this.concurrencyLevel = concurrencyLevel; + return this; + } + + public Builder mapFillFactor(float mapFillFactor) { + this.mapFillFactor = mapFillFactor; + return this; + } + + public Builder mapIdleFactor(float mapIdleFactor) { + this.mapIdleFactor = mapIdleFactor; + return this; + } + + public Builder expandFactor(float expandFactor) { + this.expandFactor = expandFactor; + return this; + } + + public Builder shrinkFactor(float shrinkFactor) { + this.shrinkFactor = shrinkFactor; + return this; + } + + public Builder autoShrink(boolean autoShrink) { + this.autoShrink = autoShrink; + return this; + } + + public ConcurrentLongHashMap build() { + return new ConcurrentLongHashMap<>(expectedItems, concurrencyLevel, + mapFillFactor, mapIdleFactor, autoShrink, expandFactor, shrinkFactor); + } + } + + /** * Predicate specialization for (long, V) types. * @@ -63,26 +139,42 @@ public interface LongObjectPredicate { private final Section[] sections; + @Deprecated public ConcurrentLongHashMap() { this(DefaultExpectedItems); } + @Deprecated public ConcurrentLongHashMap(int expectedItems) { this(expectedItems, DefaultConcurrencyLevel); } + @Deprecated public ConcurrentLongHashMap(int expectedItems, int concurrencyLevel) { + this(expectedItems, concurrencyLevel, DefaultMapFillFactor, DefaultMapIdleFactor, + DefaultAutoShrink, DefaultExpandFactor, DefaultShrinkFactor); + } + + public ConcurrentLongHashMap(int expectedItems, int concurrencyLevel, + float mapFillFactor, float mapIdleFactor, + boolean autoShrink, float expandFactor, float shrinkFactor) { checkArgument(expectedItems > 0); checkArgument(concurrencyLevel > 0); checkArgument(expectedItems >= concurrencyLevel); + checkArgument(mapFillFactor > 0 && mapFillFactor < 1); + checkArgument(mapIdleFactor > 0 && mapIdleFactor < 1); + checkArgument(mapFillFactor > mapIdleFactor); + checkArgument(expandFactor > 1); + checkArgument(shrinkFactor > 1); int numSections = concurrencyLevel; int perSectionExpectedItems = expectedItems / numSections; - int perSectionCapacity = (int) (perSectionExpectedItems / MapFillFactor); + int perSectionCapacity = (int) (perSectionExpectedItems / mapFillFactor); this.sections = (Section[]) new Section[numSections]; for (int i = 0; i < numSections; i++) { - sections[i] = new Section<>(perSectionCapacity); + sections[i] = new Section<>(perSectionCapacity, mapFillFactor, mapIdleFactor, + autoShrink, expandFactor, shrinkFactor); } } @@ -181,6 +273,12 @@ public void clear() { } } + /** + * Iterate over all the entries in the map and apply the processor function to each of them. + *

          + * Warning: Do Not Guarantee Thread-Safety. + * @param processor the processor to apply to each entry + */ public void forEach(EntryProcessor processor) { for (Section s : sections) { s.forEach(processor); @@ -219,29 +317,47 @@ private static final class Section extends StampedLock { private volatile V[] values; private volatile int capacity; + private final int initCapacity; private volatile int size; private int usedBuckets; - private int resizeThreshold; - - Section(int capacity) { + private int resizeThresholdUp; + private int resizeThresholdBelow; + private final float mapFillFactor; + private final float mapIdleFactor; + private final float expandFactor; + private final float shrinkFactor; + private final boolean autoShrink; + + Section(int capacity, float mapFillFactor, float mapIdleFactor, boolean autoShrink, + float expandFactor, float shrinkFactor) { this.capacity = alignToPowerOfTwo(capacity); + this.initCapacity = this.capacity; this.keys = new long[this.capacity]; this.values = (V[]) new Object[this.capacity]; this.size = 0; this.usedBuckets = 0; - this.resizeThreshold = (int) (this.capacity * MapFillFactor); + this.autoShrink = autoShrink; + this.mapFillFactor = mapFillFactor; + this.mapIdleFactor = mapIdleFactor; + this.expandFactor = expandFactor; + this.shrinkFactor = shrinkFactor; + this.resizeThresholdUp = (int) (this.capacity * mapFillFactor); + this.resizeThresholdBelow = (int) (this.capacity * mapIdleFactor); } V get(long key, int keyHash) { - int bucket = keyHash; long stamp = tryOptimisticRead(); boolean acquiredLock = false; + // add local variable here, so OutOfBound won't happen + long[] keys = this.keys; + V[] values = this.values; + // calculate table.length as capacity to avoid rehash changing capacity + int bucket = signSafeMod(keyHash, values.length); + try { while (true) { - int capacity = this.capacity; - bucket = signSafeMod(bucket, capacity); // First try optimistic locking long storedKey = keys[bucket]; @@ -260,16 +376,15 @@ V get(long key, int keyHash) { if (!acquiredLock) { stamp = readLock(); acquiredLock = true; + + // update local variable + keys = this.keys; + values = this.values; + bucket = signSafeMod(keyHash, values.length); storedKey = keys[bucket]; storedValue = values[bucket]; } - if (capacity != this.capacity) { - // There has been a rehashing. We need to restart the search - bucket = keyHash; - continue; - } - if (storedKey == key) { return storedValue != DeletedValue ? storedValue : null; } else if (storedValue == EmptyValue) { @@ -278,7 +393,7 @@ V get(long key, int keyHash) { } } - ++bucket; + bucket = (bucket + 1) & (values.length - 1); } } finally { if (acquiredLock) { @@ -343,9 +458,10 @@ V put(long key, V value, int keyHash, boolean onlyIfAbsent, LongFunction valu ++bucket; } } finally { - if (usedBuckets > resizeThreshold) { + if (usedBuckets > resizeThresholdUp) { try { - rehash(); + int newCapacity = alignToPowerOfTwo((int) (capacity * expandFactor)); + rehash(newCapacity); } finally { unlockWrite(stamp); } @@ -355,6 +471,18 @@ V put(long key, V value, int keyHash, boolean onlyIfAbsent, LongFunction valu } } + private void cleanDeletedStatus(int startBucket) { + // Cleanup all the buckets that were in `DeletedValue` state, + // so that we can reduce unnecessary expansions + int lastBucket = signSafeMod(startBucket - 1, capacity); + while (values[lastBucket] == DeletedValue) { + values[lastBucket] = (V) EmptyValue; + --usedBuckets; + + lastBucket = signSafeMod(--lastBucket, capacity); + } + } + private V remove(long key, Object value, int keyHash) { int bucket = keyHash; long stamp = writeLock(); @@ -377,6 +505,8 @@ private V remove(long key, Object value, int keyHash) { if (nextValueInArray == EmptyValue) { values[bucket] = (V) EmptyValue; --usedBuckets; + + cleanDeletedStatus(bucket); } else { values[bucket] = (V) DeletedValue; } @@ -394,7 +524,24 @@ private V remove(long key, Object value, int keyHash) { } } finally { - unlockWrite(stamp); + if (autoShrink && size < resizeThresholdBelow) { + try { + // Shrinking must at least ensure initCapacity, + // so as to avoid frequent shrinking and expansion near initCapacity, + // frequent shrinking and expansion, + // additionally opened arrays will consume more memory and affect GC + int newCapacity = Math.max(alignToPowerOfTwo((int) (capacity / shrinkFactor)), initCapacity); + int newResizeThresholdUp = (int) (newCapacity * mapFillFactor); + if (newCapacity < capacity && newResizeThresholdUp > size) { + // shrink the hashmap + rehash(newCapacity); + } + } finally { + unlockWrite(stamp); + } + } else { + unlockWrite(stamp); + } } } @@ -405,7 +552,7 @@ int removeIf(LongObjectPredicate filter) { try { // Go through all the buckets for this section int capacity = this.capacity; - for (int bucket = 0; bucket < capacity; bucket++) { + for (int bucket = 0; size > 0 && bucket < capacity; bucket++) { long storedKey = keys[bucket]; V storedValue = values[bucket]; @@ -419,6 +566,8 @@ int removeIf(LongObjectPredicate filter) { if (nextValueInArray == EmptyValue) { values[bucket] = (V) EmptyValue; --usedBuckets; + + cleanDeletedStatus(bucket); } else { values[bucket] = (V) DeletedValue; } @@ -428,7 +577,24 @@ int removeIf(LongObjectPredicate filter) { return removedCount; } finally { - unlockWrite(stamp); + if (autoShrink && size < resizeThresholdBelow) { + try { + // Shrinking must at least ensure initCapacity, + // so as to avoid frequent shrinking and expansion near initCapacity, + // frequent shrinking and expansion, + // additionally opened arrays will consume more memory and affect GC + int newCapacity = Math.max(alignToPowerOfTwo((int) (capacity / shrinkFactor)), initCapacity); + int newResizeThresholdUp = (int) (newCapacity * mapFillFactor); + if (newCapacity < capacity && newResizeThresholdUp > size) { + // shrink the hashmap + rehash(newCapacity); + } + } finally { + unlockWrite(stamp); + } + } else { + unlockWrite(stamp); + } } } @@ -436,10 +602,14 @@ void clear() { long stamp = writeLock(); try { - Arrays.fill(keys, 0); - Arrays.fill(values, EmptyValue); - this.size = 0; - this.usedBuckets = 0; + if (autoShrink && capacity > initCapacity) { + shrinkToInitCapacity(); + } else { + Arrays.fill(keys, 0); + Arrays.fill(values, EmptyValue); + this.size = 0; + this.usedBuckets = 0; + } } finally { unlockWrite(stamp); } @@ -492,9 +662,8 @@ public void forEach(EntryProcessor processor) { } } - private void rehash() { + private void rehash(int newCapacity) { // Expand the hashmap - int newCapacity = capacity * 2; long[] newKeys = new long[newCapacity]; V[] newValues = (V[]) new Object[newCapacity]; @@ -513,7 +682,23 @@ private void rehash() { // Capacity needs to be updated after the values, so that we won't see // a capacity value bigger than the actual array size capacity = newCapacity; - resizeThreshold = (int) (capacity * MapFillFactor); + resizeThresholdUp = (int) (capacity * mapFillFactor); + resizeThresholdBelow = (int) (capacity * mapIdleFactor); + } + + private void shrinkToInitCapacity() { + long[] newKeys = new long[initCapacity]; + V[] newValues = (V[]) new Object[initCapacity]; + + keys = newKeys; + values = newValues; + size = 0; + usedBuckets = 0; + // Capacity needs to be updated after the values, so that we won't see + // a capacity value bigger than the actual array size + capacity = initCapacity; + resizeThresholdUp = (int) (capacity * mapFillFactor); + resizeThresholdBelow = (int) (capacity * mapIdleFactor); } private static void insertKeyValueNoLock(long[] keys, V[] values, long key, V value) { diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/ConcurrentLongHashSet.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/ConcurrentLongHashSet.java index b0edaadde6d..8383f5fac2c 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/ConcurrentLongHashSet.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/ConcurrentLongHashSet.java @@ -34,6 +34,19 @@ * no node allocations are required to store the values. * *

          Items MUST be >= 0. + * + *
          + * WARN: method forEach do not guarantee thread safety, nor does the items method. + *
          + * The forEach method is specifically designed for single-threaded usage. When iterating over a set + * with concurrent writes, it becomes possible for new values to be either observed or not observed. + * There is no guarantee that if we write value1 and value2, and are able to see value2, then we will also see value1. + * + *
          + * It is crucial to understand that the results obtained from aggregate status methods such as items + * are typically reliable only when the map is not undergoing concurrent updates from other threads. + * When concurrent updates are involved, the results of these methods reflect transient states + * that may be suitable for monitoring or estimation purposes, but not for program control. */ public class ConcurrentLongHashSet { @@ -45,8 +58,74 @@ public class ConcurrentLongHashSet { private static final int DefaultExpectedItems = 256; private static final int DefaultConcurrencyLevel = 16; + private static final float DefaultMapFillFactor = 0.66f; + private static final float DefaultMapIdleFactor = 0.15f; + + private static final float DefaultExpandFactor = 2; + private static final float DefaultShrinkFactor = 2; + + private static final boolean DefaultAutoShrink = false; + private final Section[] sections; + public static Builder newBuilder() { + return new Builder(); + } + + /** + * Builder of ConcurrentLongHashSet. + */ + public static class Builder { + int expectedItems = DefaultExpectedItems; + int concurrencyLevel = DefaultConcurrencyLevel; + float mapFillFactor = DefaultMapFillFactor; + float mapIdleFactor = DefaultMapIdleFactor; + float expandFactor = DefaultExpandFactor; + float shrinkFactor = DefaultShrinkFactor; + boolean autoShrink = DefaultAutoShrink; + + public Builder expectedItems(int expectedItems) { + this.expectedItems = expectedItems; + return this; + } + + public Builder concurrencyLevel(int concurrencyLevel) { + this.concurrencyLevel = concurrencyLevel; + return this; + } + + public Builder mapFillFactor(float mapFillFactor) { + this.mapFillFactor = mapFillFactor; + return this; + } + + public Builder mapIdleFactor(float mapIdleFactor) { + this.mapIdleFactor = mapIdleFactor; + return this; + } + + public Builder expandFactor(float expandFactor) { + this.expandFactor = expandFactor; + return this; + } + + public Builder shrinkFactor(float shrinkFactor) { + this.shrinkFactor = shrinkFactor; + return this; + } + + public Builder autoShrink(boolean autoShrink) { + this.autoShrink = autoShrink; + return this; + } + + public ConcurrentLongHashSet build() { + return new ConcurrentLongHashSet(expectedItems, concurrencyLevel, + mapFillFactor, mapIdleFactor, autoShrink, expandFactor, shrinkFactor); + } + } + + /** * A consumer of long values. */ @@ -54,18 +133,33 @@ public interface ConsumerLong { void accept(long item); } + @Deprecated public ConcurrentLongHashSet() { this(DefaultExpectedItems); } + @Deprecated public ConcurrentLongHashSet(int expectedItems) { this(expectedItems, DefaultConcurrencyLevel); } + @Deprecated public ConcurrentLongHashSet(int expectedItems, int concurrencyLevel) { + this(expectedItems, concurrencyLevel, DefaultMapFillFactor, DefaultMapIdleFactor, + DefaultAutoShrink, DefaultExpandFactor, DefaultShrinkFactor); + } + + public ConcurrentLongHashSet(int expectedItems, int concurrencyLevel, + float mapFillFactor, float mapIdleFactor, + boolean autoShrink, float expandFactor, float shrinkFactor) { checkArgument(expectedItems > 0); checkArgument(concurrencyLevel > 0); checkArgument(expectedItems >= concurrencyLevel); + checkArgument(mapFillFactor > 0 && mapFillFactor < 1); + checkArgument(mapIdleFactor > 0 && mapIdleFactor < 1); + checkArgument(mapFillFactor > mapIdleFactor); + checkArgument(expandFactor > 1); + checkArgument(shrinkFactor > 1); int numSections = concurrencyLevel; int perSectionExpectedItems = expectedItems / numSections; @@ -73,7 +167,8 @@ public ConcurrentLongHashSet(int expectedItems, int concurrencyLevel) { this.sections = new Section[numSections]; for (int i = 0; i < numSections; i++) { - sections[i] = new Section(perSectionCapacity); + sections[i] = new Section(perSectionCapacity, mapFillFactor, mapIdleFactor, + autoShrink, expandFactor, shrinkFactor); } } @@ -85,6 +180,14 @@ public long size() { return size; } + public long sizeInBytes() { + long size = 0; + for (Section s : sections) { + size += (long) s.table.length * Long.BYTES; + } + return size; + } + public long capacity() { long capacity = 0; for (Section s : sections) { @@ -169,23 +272,42 @@ private static final class Section extends StampedLock { private volatile long[] table; private volatile int capacity; + private final int initCapacity; private volatile int size; private int usedBuckets; - private int resizeThreshold; - - Section(int capacity) { + private int resizeThresholdUp; + private int resizeThresholdBelow; + private final float mapFillFactor; + private final float mapIdleFactor; + private final float expandFactor; + private final float shrinkFactor; + private final boolean autoShrink; + + Section(int capacity, float mapFillFactor, float mapIdleFactor, boolean autoShrink, + float expandFactor, float shrinkFactor) { this.capacity = alignToPowerOfTwo(capacity); + this.initCapacity = this.capacity; this.table = new long[this.capacity]; this.size = 0; this.usedBuckets = 0; - this.resizeThreshold = (int) (this.capacity * SetFillFactor); + this.autoShrink = autoShrink; + this.mapFillFactor = mapFillFactor; + this.mapIdleFactor = mapIdleFactor; + this.expandFactor = expandFactor; + this.shrinkFactor = shrinkFactor; + this.resizeThresholdUp = (int) (this.capacity * mapFillFactor); + this.resizeThresholdBelow = (int) (this.capacity * mapIdleFactor); Arrays.fill(table, EmptyItem); } boolean contains(long item, int hash) { long stamp = tryOptimisticRead(); boolean acquiredLock = false; - int bucket = signSafeMod(hash, capacity); + + // add local variable here, so OutOfBound won't happen + long[] table = this.table; + // calculate table.length as capacity to avoid rehash changing capacity + int bucket = signSafeMod(hash, table.length); try { while (true) { @@ -206,7 +328,9 @@ boolean contains(long item, int hash) { stamp = readLock(); acquiredLock = true; - bucket = signSafeMod(hash, capacity); + // update local variable + table = this.table; + bucket = signSafeMod(hash, table.length); storedItem = table[bucket]; } @@ -263,9 +387,11 @@ boolean add(long item, long hash) { bucket = (bucket + 1) & (table.length - 1); } } finally { - if (usedBuckets > resizeThreshold) { + if (usedBuckets > resizeThresholdUp) { try { - rehash(); + // Expand the hashmap + int newCapacity = alignToPowerOfTwo((int) (capacity * expandFactor)); + rehash(newCapacity); } finally { unlockWrite(stamp); } @@ -296,7 +422,24 @@ private boolean remove(long item, int hash) { bucket = (bucket + 1) & (table.length - 1); } } finally { - unlockWrite(stamp); + if (autoShrink && size < resizeThresholdBelow) { + try { + // Shrinking must at least ensure initCapacity, + // so as to avoid frequent shrinking and expansion near initCapacity, + // frequent shrinking and expansion, + // additionally opened arrays will consume more memory and affect GC + int newCapacity = Math.max(alignToPowerOfTwo((int) (capacity / shrinkFactor)), initCapacity); + int newResizeThresholdUp = (int) (newCapacity * mapFillFactor); + if (newCapacity < capacity && newResizeThresholdUp > size) { + // shrink the hashmap + rehash(newCapacity); + } + } finally { + unlockWrite(stamp); + } + } else { + unlockWrite(stamp); + } } } @@ -305,6 +448,16 @@ private void cleanBucket(int bucket) { if (table[nextInArray] == EmptyItem) { table[bucket] = EmptyItem; --usedBuckets; + + // Cleanup all the buckets that were in `DeletedKey` state, + // so that we can reduce unnecessary expansions + bucket = (bucket - 1) & (table.length - 1); + while (table[bucket] == DeletedItem) { + table[bucket] = EmptyItem; + --usedBuckets; + + bucket = (bucket - 1) & (table.length - 1); + } } else { table[bucket] = DeletedItem; } @@ -314,9 +467,13 @@ void clear() { long stamp = writeLock(); try { - Arrays.fill(table, EmptyItem); - this.size = 0; - this.usedBuckets = 0; + if (autoShrink && capacity > initCapacity) { + shrinkToInitCapacity(); + } else { + Arrays.fill(table, EmptyItem); + this.size = 0; + this.usedBuckets = 0; + } } finally { unlockWrite(stamp); } @@ -361,9 +518,8 @@ public void forEach(ConsumerLong processor) { } } - private void rehash() { + private void rehash(int newCapacity) { // Expand the hashmap - int newCapacity = capacity * 2; long[] newTable = new long[newCapacity]; Arrays.fill(newTable, EmptyItem); @@ -380,7 +536,22 @@ private void rehash() { // Capacity needs to be updated after the values, so that we won't see // a capacity value bigger than the actual array size capacity = newCapacity; - resizeThreshold = (int) (capacity * SetFillFactor); + resizeThresholdUp = (int) (capacity * mapFillFactor); + resizeThresholdBelow = (int) (capacity * mapIdleFactor); + } + + private void shrinkToInitCapacity() { + long[] newTable = new long[initCapacity]; + Arrays.fill(newTable, EmptyItem); + + table = newTable; + size = 0; + usedBuckets = 0; + // Capacity needs to be updated after the values, so that we won't see + // a capacity value bigger than the actual array size + capacity = initCapacity; + resizeThresholdUp = (int) (capacity * mapFillFactor); + resizeThresholdBelow = (int) (capacity * mapIdleFactor); } private static void insertKeyValueNoLock(long[] table, int capacity, long item) { diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/ConcurrentLongLongHashMap.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/ConcurrentLongLongHashMap.java index f3e3d56dd0b..2c8d7a07f5e 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/ConcurrentLongLongHashMap.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/ConcurrentLongLongHashMap.java @@ -25,7 +25,6 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; - import java.util.Arrays; import java.util.List; import java.util.Map; @@ -39,6 +38,21 @@ * no node allocations are required to store the values. * *

          Keys MUST be >= 0. + * + *
          + * WARN: method forEach do not guarantee thread safety, nor do the keys, values and asMap method. + *
          + * The forEach method is specifically designed for single-threaded usage. When iterating over a map + * with concurrent writes, it becomes possible for new values to be either observed or not observed. + * There is no guarantee that if we write value1 and value2, and are able to see value2, then we will also see value1. + * In some cases, it is even possible to encounter two mappings with the same key, + * leading the keys method to return a List containing two identical keys. + * + *
          + * It is crucial to understand that the results obtained from aggregate status methods such as keys, values, and asMap + * are typically reliable only when the map is not undergoing concurrent updates from other threads. + * When concurrent updates are involved, the results of these methods reflect transient states + * that may be suitable for monitoring or estimation purposes, but not for program control. */ public class ConcurrentLongLongHashMap { @@ -47,13 +61,76 @@ public class ConcurrentLongLongHashMap { private static final long ValueNotFound = -1L; - private static final float MapFillFactor = 0.66f; - private static final int DefaultExpectedItems = 256; private static final int DefaultConcurrencyLevel = 16; + private static final float DefaultMapFillFactor = 0.66f; + private static final float DefaultMapIdleFactor = 0.15f; + + private static final float DefaultExpandFactor = 2; + private static final float DefaultShrinkFactor = 2; + + private static final boolean DefaultAutoShrink = false; + private final Section[] sections; + public static Builder newBuilder() { + return new Builder(); + } + + /** + * Builder of ConcurrentLongLongHashMap. + */ + public static class Builder { + int expectedItems = DefaultExpectedItems; + int concurrencyLevel = DefaultConcurrencyLevel; + float mapFillFactor = DefaultMapFillFactor; + float mapIdleFactor = DefaultMapIdleFactor; + float expandFactor = DefaultExpandFactor; + float shrinkFactor = DefaultShrinkFactor; + boolean autoShrink = DefaultAutoShrink; + + public Builder expectedItems(int expectedItems) { + this.expectedItems = expectedItems; + return this; + } + + public Builder concurrencyLevel(int concurrencyLevel) { + this.concurrencyLevel = concurrencyLevel; + return this; + } + + public Builder mapFillFactor(float mapFillFactor) { + this.mapFillFactor = mapFillFactor; + return this; + } + + public Builder mapIdleFactor(float mapIdleFactor) { + this.mapIdleFactor = mapIdleFactor; + return this; + } + + public Builder expandFactor(float expandFactor) { + this.expandFactor = expandFactor; + return this; + } + + public Builder shrinkFactor(float shrinkFactor) { + this.shrinkFactor = shrinkFactor; + return this; + } + + public Builder autoShrink(boolean autoShrink) { + this.autoShrink = autoShrink; + return this; + } + + public ConcurrentLongLongHashMap build() { + return new ConcurrentLongLongHashMap(expectedItems, concurrencyLevel, + mapFillFactor, mapIdleFactor, autoShrink, expandFactor, shrinkFactor); + } + } + /** * A Long-Long BiConsumer. */ @@ -75,26 +152,42 @@ public interface LongLongPredicate { boolean test(long key, long value); } + @Deprecated public ConcurrentLongLongHashMap() { this(DefaultExpectedItems); } + @Deprecated public ConcurrentLongLongHashMap(int expectedItems) { this(expectedItems, DefaultConcurrencyLevel); } + @Deprecated public ConcurrentLongLongHashMap(int expectedItems, int concurrencyLevel) { + this(expectedItems, concurrencyLevel, DefaultMapFillFactor, DefaultMapIdleFactor, + DefaultAutoShrink, DefaultExpandFactor, DefaultShrinkFactor); + } + + public ConcurrentLongLongHashMap(int expectedItems, int concurrencyLevel, + float mapFillFactor, float mapIdleFactor, + boolean autoShrink, float expandFactor, float shrinkFactor) { checkArgument(expectedItems > 0); checkArgument(concurrencyLevel > 0); checkArgument(expectedItems >= concurrencyLevel); + checkArgument(mapFillFactor > 0 && mapFillFactor < 1); + checkArgument(mapIdleFactor > 0 && mapIdleFactor < 1); + checkArgument(mapFillFactor > mapIdleFactor); + checkArgument(expandFactor > 1); + checkArgument(shrinkFactor > 1); int numSections = concurrencyLevel; int perSectionExpectedItems = expectedItems / numSections; - int perSectionCapacity = (int) (perSectionExpectedItems / MapFillFactor); + int perSectionCapacity = (int) (perSectionExpectedItems / mapFillFactor); this.sections = new Section[numSections]; for (int i = 0; i < numSections; i++) { - sections[i] = new Section(perSectionCapacity); + sections[i] = new Section(perSectionCapacity, mapFillFactor, mapIdleFactor, + autoShrink, expandFactor, shrinkFactor); } } @@ -106,6 +199,14 @@ public long size() { return size; } + public long sizeInBytes() { + long size = 0; + for (Section s : sections) { + size += (long) s.table.length * Long.BYTES; + } + return size; + } + public long capacity() { long capacity = 0; for (Section s : sections) { @@ -284,27 +385,48 @@ public Map asMap() { // A section is a portion of the hash map that is covered by a single @SuppressWarnings("serial") private static final class Section extends StampedLock { + // Each item take up 2 continuous array space. + private static final int ITEM_SIZE = 2; + // Keys and values are stored interleaved in the table array private volatile long[] table; private volatile int capacity; + private final int initCapacity; private volatile int size; private int usedBuckets; - private int resizeThreshold; - - Section(int capacity) { + private int resizeThresholdUp; + private int resizeThresholdBelow; + private final float mapFillFactor; + private final float mapIdleFactor; + private final float expandFactor; + private final float shrinkFactor; + private final boolean autoShrink; + + Section(int capacity, float mapFillFactor, float mapIdleFactor, boolean autoShrink, + float expandFactor, float shrinkFactor) { this.capacity = alignToPowerOfTwo(capacity); - this.table = new long[2 * this.capacity]; + this.initCapacity = this.capacity; + this.table = new long[ITEM_SIZE * this.capacity]; this.size = 0; this.usedBuckets = 0; - this.resizeThreshold = (int) (this.capacity * MapFillFactor); + this.autoShrink = autoShrink; + this.mapFillFactor = mapFillFactor; + this.mapIdleFactor = mapIdleFactor; + this.expandFactor = expandFactor; + this.shrinkFactor = shrinkFactor; + this.resizeThresholdUp = (int) (this.capacity * mapFillFactor); + this.resizeThresholdBelow = (int) (this.capacity * mapIdleFactor); Arrays.fill(table, EmptyKey); } long get(long key, int keyHash) { long stamp = tryOptimisticRead(); boolean acquiredLock = false; - int bucket = signSafeMod(keyHash, capacity); + // add local variable here, so OutOfBound won't happen + long[] table = this.table; + // calculate table.length/2 as capacity to avoid rehash changing capacity + int bucket = signSafeMod(keyHash, table.length / ITEM_SIZE); try { while (true) { @@ -326,7 +448,9 @@ long get(long key, int keyHash) { stamp = readLock(); acquiredLock = true; - bucket = signSafeMod(keyHash, capacity); + // update local variable + table = this.table; + bucket = signSafeMod(keyHash, table.length / ITEM_SIZE); storedKey = table[bucket]; storedValue = table[bucket + 1]; } @@ -339,7 +463,7 @@ long get(long key, int keyHash) { } } - bucket = (bucket + 2) & (table.length - 1); + bucket = (bucket + ITEM_SIZE) & (table.length - 1); } } finally { if (acquiredLock) { @@ -392,12 +516,14 @@ long put(long key, long value, int keyHash, boolean onlyIfAbsent, LongLongFuncti } } - bucket = (bucket + 2) & (table.length - 1); + bucket = (bucket + ITEM_SIZE) & (table.length - 1); } } finally { - if (usedBuckets > resizeThreshold) { + if (usedBuckets > resizeThresholdUp) { try { - rehash(); + // Expand the hashmap + int newCapacity = alignToPowerOfTwo((int) (capacity * expandFactor)); + rehash(newCapacity); } finally { unlockWrite(stamp); } @@ -448,12 +574,14 @@ long addAndGet(long key, long delta, int keyHash) { } } - bucket = (bucket + 2) & (table.length - 1); + bucket = (bucket + ITEM_SIZE) & (table.length - 1); } } finally { - if (usedBuckets > resizeThreshold) { + if (usedBuckets > resizeThresholdUp) { try { - rehash(); + // Expand the hashmap + int newCapacity = alignToPowerOfTwo((int) (capacity * expandFactor)); + rehash(newCapacity); } finally { unlockWrite(stamp); } @@ -506,12 +634,14 @@ boolean compareAndSet(long key, long currentValue, long newValue, int keyHash) { } } - bucket = (bucket + 2) & (table.length - 1); + bucket = (bucket + ITEM_SIZE) & (table.length - 1); } } finally { - if (usedBuckets > resizeThreshold) { + if (usedBuckets > resizeThresholdUp) { try { - rehash(); + // Expand the hashmap + int newCapacity = alignToPowerOfTwo((int) (capacity * expandFactor)); + rehash(newCapacity); } finally { unlockWrite(stamp); } @@ -543,11 +673,28 @@ private long remove(long key, long value, int keyHash) { return ValueNotFound; } - bucket = (bucket + 2) & (table.length - 1); + bucket = (bucket + ITEM_SIZE) & (table.length - 1); } } finally { - unlockWrite(stamp); + if (autoShrink && size < resizeThresholdBelow) { + try { + // Shrinking must at least ensure initCapacity, + // so as to avoid frequent shrinking and expansion near initCapacity, + // frequent shrinking and expansion, + // additionally opened arrays will consume more memory and affect GC + int newCapacity = Math.max(alignToPowerOfTwo((int) (capacity / shrinkFactor)), initCapacity); + int newResizeThresholdUp = (int) (newCapacity * mapFillFactor); + if (newCapacity < capacity && newResizeThresholdUp > size) { + // shrink the hashmap + rehash(newCapacity); + } + } finally { + unlockWrite(stamp); + } + } else { + unlockWrite(stamp); + } } } @@ -557,7 +704,7 @@ int removeIf(LongPredicate filter) { int removedCount = 0; try { // Go through all the buckets for this section - for (int bucket = 0; bucket < table.length; bucket += 2) { + for (int bucket = 0; size > 0 && bucket < table.length; bucket += ITEM_SIZE) { long storedKey = table[bucket]; if (storedKey != DeletedKey && storedKey != EmptyKey) { @@ -572,7 +719,20 @@ int removeIf(LongPredicate filter) { return removedCount; } finally { - unlockWrite(stamp); + if (autoShrink && size < resizeThresholdBelow) { + try { + int newCapacity = Math.max(alignToPowerOfTwo((int) (capacity / shrinkFactor)), initCapacity); + int newResizeThresholdUp = (int) (newCapacity * mapFillFactor); + if (newCapacity < capacity && newResizeThresholdUp > size) { + // shrink the hashmap + rehash(newCapacity); + } + } finally { + unlockWrite(stamp); + } + } else { + unlockWrite(stamp); + } } } @@ -582,7 +742,7 @@ int removeIf(LongLongPredicate filter) { int removedCount = 0; try { // Go through all the buckets for this section - for (int bucket = 0; bucket < table.length; bucket += 2) { + for (int bucket = 0; size > 0 && bucket < table.length; bucket += ITEM_SIZE) { long storedKey = table[bucket]; long storedValue = table[bucket + 1]; @@ -598,16 +758,39 @@ int removeIf(LongLongPredicate filter) { return removedCount; } finally { - unlockWrite(stamp); + if (autoShrink && size < resizeThresholdBelow) { + try { + int newCapacity = Math.max(alignToPowerOfTwo((int) (capacity / shrinkFactor)), initCapacity); + int newResizeThresholdUp = (int) (newCapacity * mapFillFactor); + if (newCapacity < capacity && newResizeThresholdUp > size) { + // shrink the hashmap + rehash(newCapacity); + } + } finally { + unlockWrite(stamp); + } + } else { + unlockWrite(stamp); + } } } private void cleanBucket(int bucket) { - int nextInArray = (bucket + 2) & (table.length - 1); + int nextInArray = (bucket + ITEM_SIZE) & (table.length - 1); if (table[nextInArray] == EmptyKey) { table[bucket] = EmptyKey; table[bucket + 1] = ValueNotFound; --usedBuckets; + + // Cleanup all the buckets that were in `DeletedKey` state, so that we can reduce unnecessary expansions + bucket = (bucket - ITEM_SIZE) & (table.length - 1); + while (table[bucket] == DeletedKey) { + table[bucket] = EmptyKey; + table[bucket + 1] = ValueNotFound; + --usedBuckets; + + bucket = (bucket - ITEM_SIZE) & (table.length - 1); + } } else { table[bucket] = DeletedKey; table[bucket + 1] = ValueNotFound; @@ -618,9 +801,13 @@ void clear() { long stamp = writeLock(); try { - Arrays.fill(table, EmptyKey); - this.size = 0; - this.usedBuckets = 0; + if (autoShrink && capacity > initCapacity) { + shrinkToInitCapacity(); + } else { + Arrays.fill(table, EmptyKey); + this.size = 0; + this.usedBuckets = 0; + } } finally { unlockWrite(stamp); } @@ -643,7 +830,7 @@ public void forEach(BiConsumerLong processor) { } // Go through all the buckets for this section - for (int bucket = 0; bucket < table.length; bucket += 2) { + for (int bucket = 0; bucket < table.length; bucket += ITEM_SIZE) { long storedKey = table[bucket]; long storedValue = table[bucket + 1]; @@ -667,14 +854,13 @@ public void forEach(BiConsumerLong processor) { } } - private void rehash() { + private void rehash(int newCapacity) { // Expand the hashmap - int newCapacity = capacity * 2; - long[] newTable = new long[2 * newCapacity]; + long[] newTable = new long[ITEM_SIZE * newCapacity]; Arrays.fill(newTable, EmptyKey); // Re-hash table - for (int i = 0; i < table.length; i += 2) { + for (int i = 0; i < table.length; i += ITEM_SIZE) { long storedKey = table[i]; long storedValue = table[i + 1]; if (storedKey != EmptyKey && storedKey != DeletedKey) { @@ -687,7 +873,22 @@ private void rehash() { // Capacity needs to be updated after the values, so that we won't see // a capacity value bigger than the actual array size capacity = newCapacity; - resizeThreshold = (int) (capacity * MapFillFactor); + resizeThresholdUp = (int) (capacity * mapFillFactor); + resizeThresholdBelow = (int) (capacity * mapIdleFactor); + } + + private void shrinkToInitCapacity() { + long[] newTable = new long[ITEM_SIZE * initCapacity]; + Arrays.fill(newTable, EmptyKey); + + table = newTable; + size = 0; + usedBuckets = 0; + // Capacity needs to be updated after the values, so that we won't see + // a capacity value bigger than the actual array size + capacity = initCapacity; + resizeThresholdUp = (int) (capacity * mapFillFactor); + resizeThresholdBelow = (int) (capacity * mapIdleFactor); } private static void insertKeyValueNoLock(long[] table, int capacity, long key, long value) { @@ -703,7 +904,7 @@ private static void insertKeyValueNoLock(long[] table, int capacity, long key, l return; } - bucket = (bucket + 2) & (table.length - 1); + bucket = (bucket + ITEM_SIZE) & (table.length - 1); } } } @@ -719,6 +920,8 @@ static final long hash(long key) { } static final int signSafeMod(long n, int max) { + // as the ITEM_SIZE of Section is 2, so the index is the multiple of 2 + // that is to left shift 1 bit return (int) (n & (max - 1)) << 1; } @@ -731,4 +934,20 @@ private static void checkBiggerEqualZero(long n) { throw new IllegalArgumentException("Keys and values must be >= 0"); } } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("ConcurrentLongLongHashMap{"); + + int headerLen = sb.length(); + forEach((k, v) -> { + sb.append(k).append(" => ").append(v).append(", "); + }); + if (sb.length() > headerLen) { + sb.setLength(sb.length() - 2); + } + sb.append("}"); + return sb.toString(); + } } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/ConcurrentLongLongPairHashMap.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/ConcurrentLongLongPairHashMap.java index 42cb04bbf61..24259609a56 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/ConcurrentLongLongPairHashMap.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/ConcurrentLongLongPairHashMap.java @@ -24,7 +24,6 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; - import java.util.Arrays; import java.util.List; import java.util.Map; @@ -39,6 +38,20 @@ * no node allocations are required to store the keys and values, and no boxing is required. * *

          Keys MUST be >= 0. + *
          + * WARN: method forEach do not guarantee thread safety, nor do the keys, values and asMap method. + *
          + * The forEach method is specifically designed for single-threaded usage. When iterating over a map + * with concurrent writes, it becomes possible for new values to be either observed or not observed. + * There is no guarantee that if we write value1 and value2, and are able to see value2, then we will also see value1. + * In some cases, it is even possible to encounter two mappings with the same key, + * leading the keys method to return a List containing two identical keys. + * + *
          + * It is crucial to understand that the results obtained from aggregate status methods such as keys, values, and asMap + * are typically reliable only when the map is not undergoing concurrent updates from other threads. + * When concurrent updates are involved, the results of these methods reflect transient states + * that may be suitable for monitoring or estimation purposes, but not for program control. */ public class ConcurrentLongLongPairHashMap { @@ -47,13 +60,77 @@ public class ConcurrentLongLongPairHashMap { private static final long ValueNotFound = -1L; - private static final float MapFillFactor = 0.66f; private static final int DefaultExpectedItems = 256; private static final int DefaultConcurrencyLevel = 16; + private static final float DefaultMapFillFactor = 0.66f; + private static final float DefaultMapIdleFactor = 0.15f; + + private static final float DefaultExpandFactor = 2; + private static final float DefaultShrinkFactor = 2; + + private static final boolean DefaultAutoShrink = false; + private final Section[] sections; + public static Builder newBuilder() { + return new Builder(); + } + + /** + * Builder of ConcurrentLongLongPairHashMap. + */ + public static class Builder { + int expectedItems = DefaultExpectedItems; + int concurrencyLevel = DefaultConcurrencyLevel; + float mapFillFactor = DefaultMapFillFactor; + float mapIdleFactor = DefaultMapIdleFactor; + float expandFactor = DefaultExpandFactor; + float shrinkFactor = DefaultShrinkFactor; + boolean autoShrink = DefaultAutoShrink; + + public Builder expectedItems(int expectedItems) { + this.expectedItems = expectedItems; + return this; + } + + public Builder concurrencyLevel(int concurrencyLevel) { + this.concurrencyLevel = concurrencyLevel; + return this; + } + + public Builder mapFillFactor(float mapFillFactor) { + this.mapFillFactor = mapFillFactor; + return this; + } + + public Builder mapIdleFactor(float mapIdleFactor) { + this.mapIdleFactor = mapIdleFactor; + return this; + } + + public Builder expandFactor(float expandFactor) { + this.expandFactor = expandFactor; + return this; + } + + public Builder shrinkFactor(float shrinkFactor) { + this.shrinkFactor = shrinkFactor; + return this; + } + + public Builder autoShrink(boolean autoShrink) { + this.autoShrink = autoShrink; + return this; + } + + public ConcurrentLongLongPairHashMap build() { + return new ConcurrentLongLongPairHashMap(expectedItems, concurrencyLevel, + mapFillFactor, mapIdleFactor, autoShrink, expandFactor, shrinkFactor); + } + } + /** * A BiConsumer Long pair. */ @@ -75,26 +152,42 @@ public interface LongLongPairPredicate { boolean test(long key1, long key2, long value1, long value2); } + @Deprecated public ConcurrentLongLongPairHashMap() { this(DefaultExpectedItems); } + @Deprecated public ConcurrentLongLongPairHashMap(int expectedItems) { this(expectedItems, DefaultConcurrencyLevel); } + @Deprecated public ConcurrentLongLongPairHashMap(int expectedItems, int concurrencyLevel) { + this(expectedItems, concurrencyLevel, DefaultMapFillFactor, DefaultMapIdleFactor, + DefaultAutoShrink, DefaultExpandFactor, DefaultShrinkFactor); + } + + private ConcurrentLongLongPairHashMap(int expectedItems, int concurrencyLevel, + float mapFillFactor, float mapIdleFactor, + boolean autoShrink, float expandFactor, float shrinkFactor) { checkArgument(expectedItems > 0); checkArgument(concurrencyLevel > 0); checkArgument(expectedItems >= concurrencyLevel); + checkArgument(mapFillFactor > 0 && mapFillFactor < 1); + checkArgument(mapIdleFactor > 0 && mapIdleFactor < 1); + checkArgument(mapFillFactor > mapIdleFactor); + checkArgument(expandFactor > 1); + checkArgument(shrinkFactor > 1); int numSections = concurrencyLevel; int perSectionExpectedItems = expectedItems / numSections; - int perSectionCapacity = (int) (perSectionExpectedItems / MapFillFactor); + int perSectionCapacity = (int) (perSectionExpectedItems / mapFillFactor); this.sections = new Section[numSections]; for (int i = 0; i < numSections; i++) { - sections[i] = new Section(perSectionCapacity); + sections[i] = new Section(perSectionCapacity, mapFillFactor, mapIdleFactor, + autoShrink, expandFactor, shrinkFactor); } } @@ -106,6 +199,14 @@ public long size() { return size; } + public long sizeInBytes() { + long size = 0; + for (Section s : sections) { + size += (long) s.table.length * Long.BYTES; + } + return size; + } + public long capacity() { long capacity = 0; for (Section s : sections) { @@ -192,6 +293,12 @@ public void clear() { } } + /** + * Iterate over all the entries in the map and apply the processor function to each of them. + *

          + * Warning: Do Not Guarantee Thread-Safety. + * @param processor the processor to process the elements. + */ public void forEach(BiConsumerLongPair processor) { for (Section s : sections) { s.forEach(processor); @@ -222,27 +329,48 @@ public Map asMap() { // A section is a portion of the hash map that is covered by a single @SuppressWarnings("serial") private static final class Section extends StampedLock { + // Each item take up 4 continuous array space. + private static final int ITEM_SIZE = 4; + // Keys and values are stored interleaved in the table array private volatile long[] table; private volatile int capacity; + private final int initCapacity; private volatile int size; private int usedBuckets; - private int resizeThreshold; - - Section(int capacity) { + private int resizeThresholdUp; + private int resizeThresholdBelow; + private final float mapFillFactor; + private final float mapIdleFactor; + private final float expandFactor; + private final float shrinkFactor; + private final boolean autoShrink; + + Section(int capacity, float mapFillFactor, float mapIdleFactor, boolean autoShrink, + float expandFactor, float shrinkFactor) { this.capacity = alignToPowerOfTwo(capacity); - this.table = new long[4 * this.capacity]; + this.initCapacity = this.capacity; + this.table = new long[ITEM_SIZE * this.capacity]; this.size = 0; this.usedBuckets = 0; - this.resizeThreshold = (int) (this.capacity * MapFillFactor); + this.autoShrink = autoShrink; + this.mapFillFactor = mapFillFactor; + this.mapIdleFactor = mapIdleFactor; + this.expandFactor = expandFactor; + this.shrinkFactor = shrinkFactor; + this.resizeThresholdUp = (int) (this.capacity * mapFillFactor); + this.resizeThresholdBelow = (int) (this.capacity * mapIdleFactor); Arrays.fill(table, EmptyKey); } LongPair get(long key1, long key2, int keyHash) { long stamp = tryOptimisticRead(); boolean acquiredLock = false; - int bucket = signSafeMod(keyHash, capacity); + // add local variable here, so OutOfBound won't happen + long[] table = this.table; + // calculate table.length / 4 as capacity to avoid rehash changing capacity + int bucket = signSafeMod(keyHash, table.length / ITEM_SIZE); try { while (true) { @@ -265,8 +393,9 @@ LongPair get(long key1, long key2, int keyHash) { if (!acquiredLock) { stamp = readLock(); acquiredLock = true; - - bucket = signSafeMod(keyHash, capacity); + // update local variable + table = this.table; + bucket = signSafeMod(keyHash, table.length / ITEM_SIZE); storedKey1 = table[bucket]; storedKey2 = table[bucket + 1]; storedValue1 = table[bucket + 2]; @@ -281,7 +410,7 @@ LongPair get(long key1, long key2, int keyHash) { } } - bucket = (bucket + 4) & (table.length - 1); + bucket = (bucket + ITEM_SIZE) & (table.length - 1); } } finally { if (acquiredLock) { @@ -333,12 +462,14 @@ boolean put(long key1, long key2, long value1, long value2, int keyHash, boolean } } - bucket = (bucket + 4) & (table.length - 1); + bucket = (bucket + ITEM_SIZE) & (table.length - 1); } } finally { - if (usedBuckets > resizeThreshold) { + if (usedBuckets > resizeThresholdUp) { try { - rehash(); + // Expand the hashmap + int newCapacity = alignToPowerOfTwo((int) (capacity * expandFactor)); + rehash(newCapacity); } finally { unlockWrite(stamp); } @@ -372,22 +503,52 @@ private boolean remove(long key1, long key2, long value1, long value2, int keyHa return false; } - bucket = (bucket + 4) & (table.length - 1); + bucket = (bucket + ITEM_SIZE) & (table.length - 1); } } finally { - unlockWrite(stamp); + if (autoShrink && size < resizeThresholdBelow) { + try { + // Shrinking must at least ensure initCapacity, + // so as to avoid frequent shrinking and expansion near initCapacity, + // frequent shrinking and expansion, + // additionally opened arrays will consume more memory and affect GC + int newCapacity = Math.max(alignToPowerOfTwo((int) (capacity / shrinkFactor)), initCapacity); + int newResizeThresholdUp = (int) (newCapacity * mapFillFactor); + if (newCapacity < capacity && newResizeThresholdUp > size) { + // shrink the hashmap + rehash(newCapacity); + } + } finally { + unlockWrite(stamp); + } + } else { + unlockWrite(stamp); + } } } private void cleanBucket(int bucket) { - int nextInArray = (bucket + 4) & (table.length - 1); + int nextInArray = (bucket + ITEM_SIZE) & (table.length - 1); if (table[nextInArray] == EmptyKey) { table[bucket] = EmptyKey; table[bucket + 1] = EmptyKey; table[bucket + 2] = ValueNotFound; table[bucket + 3] = ValueNotFound; --usedBuckets; + + // Cleanup all the buckets that were in `DeletedKey` state, + // so that we can reduce unnecessary expansions + bucket = (bucket - ITEM_SIZE) & (table.length - 1); + while (table[bucket] == DeletedKey) { + table[bucket] = EmptyKey; + table[bucket + 1] = EmptyKey; + table[bucket + 2] = ValueNotFound; + table[bucket + 3] = ValueNotFound; + --usedBuckets; + + bucket = (bucket - ITEM_SIZE) & (table.length - 1); + } } else { table[bucket] = DeletedKey; table[bucket + 1] = DeletedKey; @@ -400,9 +561,13 @@ void clear() { long stamp = writeLock(); try { - Arrays.fill(table, EmptyKey); - this.size = 0; - this.usedBuckets = 0; + if (autoShrink && capacity > initCapacity) { + shrinkToInitCapacity(); + } else { + Arrays.fill(table, EmptyKey); + this.size = 0; + this.usedBuckets = 0; + } } finally { unlockWrite(stamp); } @@ -425,7 +590,7 @@ public void forEach(BiConsumerLongPair processor) { } // Go through all the buckets for this section - for (int bucket = 0; bucket < table.length; bucket += 4) { + for (int bucket = 0; bucket < table.length; bucket += ITEM_SIZE) { long storedKey1 = table[bucket]; long storedKey2 = table[bucket + 1]; long storedValue1 = table[bucket + 2]; @@ -453,14 +618,12 @@ public void forEach(BiConsumerLongPair processor) { } } - private void rehash() { - // Expand the hashmap - int newCapacity = capacity * 2; - long[] newTable = new long[4 * newCapacity]; + private void rehash(int newCapacity) { + long[] newTable = new long[ITEM_SIZE * newCapacity]; Arrays.fill(newTable, EmptyKey); // Re-hash table - for (int i = 0; i < table.length; i += 4) { + for (int i = 0; i < table.length; i += ITEM_SIZE) { long storedKey1 = table[i]; long storedKey2 = table[i + 1]; long storedValue1 = table[i + 2]; @@ -475,11 +638,26 @@ private void rehash() { // Capacity needs to be updated after the values, so that we won't see // a capacity value bigger than the actual array size capacity = newCapacity; - resizeThreshold = (int) (capacity * MapFillFactor); + resizeThresholdUp = (int) (capacity * mapFillFactor); + resizeThresholdBelow = (int) (capacity * mapIdleFactor); + } + + private void shrinkToInitCapacity() { + long[] newTable = new long[ITEM_SIZE * initCapacity]; + Arrays.fill(newTable, EmptyKey); + + table = newTable; + size = 0; + usedBuckets = 0; + // Capacity needs to be updated after the values, so that we won't see + // a capacity value bigger than the actual array size + capacity = initCapacity; + resizeThresholdUp = (int) (capacity * mapFillFactor); + resizeThresholdBelow = (int) (capacity * mapIdleFactor); } private static void insertKeyValueNoLock(long[] table, int capacity, long key1, long key2, long value1, - long value2) { + long value2) { int bucket = signSafeMod(hash(key1, key2), capacity); while (true) { @@ -494,7 +672,7 @@ private static void insertKeyValueNoLock(long[] table, int capacity, long key1, return; } - bucket = (bucket + 4) & (table.length - 1); + bucket = (bucket + ITEM_SIZE) & (table.length - 1); } } } @@ -513,6 +691,8 @@ static final long hash(long key1, long key2) { } static final int signSafeMod(long n, int max) { + // as the ITEM_SIZE of Section is 4, so the index is the multiple of 4 + // that is to left shift 2 bits return (int) (n & (max - 1)) << 2; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/ConcurrentOpenHashMap.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/ConcurrentOpenHashMap.java index 475b70e13ee..44215c63d79 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/ConcurrentOpenHashMap.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/ConcurrentOpenHashMap.java @@ -24,20 +24,32 @@ import static com.google.common.base.Preconditions.checkNotNull; import com.google.common.collect.Lists; - import java.util.Arrays; import java.util.List; import java.util.concurrent.locks.StampedLock; import java.util.function.BiConsumer; import java.util.function.BiPredicate; import java.util.function.Function; - /** * Concurrent hash map. * *

          Provides similar methods as a {@code ConcurrentMap} but since it's an open hash map with linear probing, * no node allocations are required to store the values * + *
          + * WARN: method forEach do not guarantee thread safety, nor do the keys and values method. + *
          + * The forEach method is specifically designed for single-threaded usage. When iterating over a map + * with concurrent writes, it becomes possible for new values to be either observed or not observed. + * There is no guarantee that if we write value1 and value2, and are able to see value2, then we will also see value1. + * In some cases, it is even possible to encounter two mappings with the same key, + * leading the keys method to return a List containing two identical keys. + * + *
          + * It is crucial to understand that the results obtained from aggregate status methods such as keys and values + * are typically reliable only when the map is not undergoing concurrent updates from other threads. + * When concurrent updates are involved, the results of these methods reflect transient states + * that may be suitable for monitoring or estimation purposes, but not for program control. * @param */ @SuppressWarnings("unchecked") @@ -46,34 +58,121 @@ public class ConcurrentOpenHashMap { private static final Object EmptyKey = null; private static final Object DeletedKey = new Object(); - private static final float MapFillFactor = 0.66f; - private static final int DefaultExpectedItems = 256; private static final int DefaultConcurrencyLevel = 16; + private static final float DefaultMapFillFactor = 0.66f; + private static final float DefaultMapIdleFactor = 0.15f; + + private static final float DefaultExpandFactor = 2; + private static final float DefaultShrinkFactor = 2; + + private static final boolean DefaultAutoShrink = false; + private final Section[] sections; + public static Builder newBuilder() { + return new Builder<>(); + } + + /** + * Builder of ConcurrentOpenHashMap. + */ + public static class Builder { + int expectedItems = DefaultExpectedItems; + int concurrencyLevel = DefaultConcurrencyLevel; + float mapFillFactor = DefaultMapFillFactor; + float mapIdleFactor = DefaultMapIdleFactor; + float expandFactor = DefaultExpandFactor; + float shrinkFactor = DefaultShrinkFactor; + boolean autoShrink = DefaultAutoShrink; + + public Builder expectedItems(int expectedItems) { + this.expectedItems = expectedItems; + return this; + } + + public Builder concurrencyLevel(int concurrencyLevel) { + this.concurrencyLevel = concurrencyLevel; + return this; + } + + public Builder mapFillFactor(float mapFillFactor) { + this.mapFillFactor = mapFillFactor; + return this; + } + + public Builder mapIdleFactor(float mapIdleFactor) { + this.mapIdleFactor = mapIdleFactor; + return this; + } + + public Builder expandFactor(float expandFactor) { + this.expandFactor = expandFactor; + return this; + } + + public Builder shrinkFactor(float shrinkFactor) { + this.shrinkFactor = shrinkFactor; + return this; + } + + public Builder autoShrink(boolean autoShrink) { + this.autoShrink = autoShrink; + return this; + } + + public ConcurrentOpenHashMap build() { + return new ConcurrentOpenHashMap<>(expectedItems, concurrencyLevel, + mapFillFactor, mapIdleFactor, autoShrink, expandFactor, shrinkFactor); + } + } + + @Deprecated public ConcurrentOpenHashMap() { this(DefaultExpectedItems); } + @Deprecated public ConcurrentOpenHashMap(int expectedItems) { this(expectedItems, DefaultConcurrencyLevel); } + @Deprecated public ConcurrentOpenHashMap(int expectedItems, int concurrencyLevel) { + this(expectedItems, concurrencyLevel, DefaultMapFillFactor, DefaultMapIdleFactor, + DefaultAutoShrink, DefaultExpandFactor, DefaultShrinkFactor); + } + + public ConcurrentOpenHashMap(int expectedItems, int concurrencyLevel, + float mapFillFactor, float mapIdleFactor, + boolean autoShrink, float expandFactor, float shrinkFactor) { checkArgument(expectedItems > 0); checkArgument(concurrencyLevel > 0); checkArgument(expectedItems >= concurrencyLevel); + checkArgument(mapFillFactor > 0 && mapFillFactor < 1); + checkArgument(mapIdleFactor > 0 && mapIdleFactor < 1); + checkArgument(mapFillFactor > mapIdleFactor); + checkArgument(expandFactor > 1); + checkArgument(shrinkFactor > 1); int numSections = concurrencyLevel; int perSectionExpectedItems = expectedItems / numSections; - int perSectionCapacity = (int) (perSectionExpectedItems / MapFillFactor); + int perSectionCapacity = (int) (perSectionExpectedItems / mapFillFactor); this.sections = (Section[]) new Section[numSections]; for (int i = 0; i < numSections; i++) { - sections[i] = new Section<>(perSectionCapacity); + sections[i] = new Section<>(perSectionCapacity, mapFillFactor, mapIdleFactor, + autoShrink, expandFactor, shrinkFactor); + } + } + + long getUsedBucketCount() { + long usedBucketCount = 0; + for (Section s : sections) { + usedBucketCount += s.usedBuckets; } + return usedBucketCount; } public long size() { @@ -158,6 +257,12 @@ public void clear() { } } + /** + * Iterate over all the entries in the map and apply the processor function to each of them. + *

          + * Warning: Do Not Guarantee Thread-Safety. + * @param processor the function to apply to each entry + */ public void forEach(BiConsumer processor) { for (Section s : sections) { s.forEach(processor); @@ -193,26 +298,48 @@ public List values() { // A section is a portion of the hash map that is covered by a single @SuppressWarnings("serial") private static final class Section extends StampedLock { + // Each item take up 2 continuous array space. + private static final int ITEM_SIZE = 2; + // Keys and values are stored interleaved in the table array private volatile Object[] table; private volatile int capacity; + private final int initCapacity; private volatile int size; private int usedBuckets; - private int resizeThreshold; - - Section(int capacity) { + private int resizeThresholdUp; + private int resizeThresholdBelow; + private final float mapFillFactor; + private final float mapIdleFactor; + private final float expandFactor; + private final float shrinkFactor; + private final boolean autoShrink; + + Section(int capacity, float mapFillFactor, float mapIdleFactor, boolean autoShrink, + float expandFactor, float shrinkFactor) { this.capacity = alignToPowerOfTwo(capacity); - this.table = new Object[2 * this.capacity]; + this.initCapacity = this.capacity; + this.table = new Object[ITEM_SIZE * this.capacity]; this.size = 0; this.usedBuckets = 0; - this.resizeThreshold = (int) (this.capacity * MapFillFactor); + this.autoShrink = autoShrink; + this.mapFillFactor = mapFillFactor; + this.mapIdleFactor = mapIdleFactor; + this.expandFactor = expandFactor; + this.shrinkFactor = shrinkFactor; + this.resizeThresholdUp = (int) (this.capacity * mapFillFactor); + this.resizeThresholdBelow = (int) (this.capacity * mapIdleFactor); } V get(K key, int keyHash) { long stamp = tryOptimisticRead(); boolean acquiredLock = false; - int bucket = signSafeMod(keyHash, capacity); + + // add local variable here, so OutOfBound won't happen + Object[] table = this.table; + // calculate table.length / 2 as capacity to avoid rehash changing capacity + int bucket = signSafeMod(keyHash, table.length / ITEM_SIZE); try { while (true) { @@ -234,7 +361,9 @@ V get(K key, int keyHash) { stamp = readLock(); acquiredLock = true; - bucket = signSafeMod(keyHash, capacity); + // update local variable + table = this.table; + bucket = signSafeMod(keyHash, table.length / ITEM_SIZE); storedKey = (K) table[bucket]; storedValue = (V) table[bucket + 1]; } @@ -247,7 +376,7 @@ V get(K key, int keyHash) { } } - bucket = (bucket + 2) & (table.length - 1); + bucket = (bucket + ITEM_SIZE) & (table.length - 1); } } finally { if (acquiredLock) { @@ -300,12 +429,14 @@ V put(K key, V value, int keyHash, boolean onlyIfAbsent, Function valuePro } } - bucket = (bucket + 2) & (table.length - 1); + bucket = (bucket + ITEM_SIZE) & (table.length - 1); } } finally { - if (usedBuckets > resizeThreshold) { + if (usedBuckets > resizeThresholdUp) { try { - rehash(); + // Expand the hashmap + int newCapacity = alignToPowerOfTwo((int) (capacity * expandFactor)); + rehash(newCapacity); } finally { unlockWrite(stamp); } @@ -336,11 +467,28 @@ private V remove(K key, Object value, int keyHash) { return null; } - bucket = (bucket + 2) & (table.length - 1); + bucket = (bucket + ITEM_SIZE) & (table.length - 1); } } finally { - unlockWrite(stamp); + if (autoShrink && size < resizeThresholdBelow) { + try { + // Shrinking must at least ensure initCapacity, + // so as to avoid frequent shrinking and expansion near initCapacity, + // frequent shrinking and expansion, + // additionally opened arrays will consume more memory and affect GC + int newCapacity = Math.max(alignToPowerOfTwo((int) (capacity / shrinkFactor)), initCapacity); + int newResizeThresholdUp = (int) (newCapacity * mapFillFactor); + if (newCapacity < capacity && newResizeThresholdUp > size) { + // shrink the hashmap + rehash(newCapacity); + } + } finally { + unlockWrite(stamp); + } + } else { + unlockWrite(stamp); + } } } @@ -348,9 +496,13 @@ void clear() { long stamp = writeLock(); try { - Arrays.fill(table, EmptyKey); - this.size = 0; - this.usedBuckets = 0; + if (autoShrink && capacity > initCapacity) { + shrinkToInitCapacity(); + } else { + Arrays.fill(table, EmptyKey); + this.size = 0; + this.usedBuckets = 0; + } } finally { unlockWrite(stamp); } @@ -373,7 +525,7 @@ public void forEach(BiConsumer processor) { } // Go through all the buckets for this section - for (int bucket = 0; bucket < table.length; bucket += 2) { + for (int bucket = 0; bucket < table.length; bucket += ITEM_SIZE) { K storedKey = (K) table[bucket]; V storedValue = (V) table[bucket + 1]; @@ -403,7 +555,7 @@ int removeIf(BiPredicate filter) { int removedCount = 0; try { // Go through all the buckets for this section - for (int bucket = 0; bucket < table.length; bucket += 2) { + for (int bucket = 0; size > 0 && bucket < table.length; bucket += ITEM_SIZE) { K storedKey = (K) table[bucket]; V storedValue = (V) table[bucket + 1]; @@ -419,29 +571,56 @@ int removeIf(BiPredicate filter) { return removedCount; } finally { - unlockWrite(stamp); + if (autoShrink && size < resizeThresholdBelow) { + try { + // Shrinking must at least ensure initCapacity, + // so as to avoid frequent shrinking and expansion near initCapacity, + // frequent shrinking and expansion, + // additionally opened arrays will consume more memory and affect GC + int newCapacity = Math.max(alignToPowerOfTwo((int) (capacity / shrinkFactor)), initCapacity); + int newResizeThresholdUp = (int) (newCapacity * mapFillFactor); + if (newCapacity < capacity && newResizeThresholdUp > size) { + // shrink the hashmap + rehash(newCapacity); + } + } finally { + unlockWrite(stamp); + } + } else { + unlockWrite(stamp); + } } } private void cleanBucket(int bucket) { - int nextInArray = (bucket + 2) & (table.length - 1); + int nextInArray = (bucket + ITEM_SIZE) & (table.length - 1); if (table[nextInArray] == EmptyKey) { table[bucket] = EmptyKey; table[bucket + 1] = null; --usedBuckets; + + // Cleanup all the buckets that were in `DeletedKey` state, + // so that we can reduce unnecessary expansions + bucket = (bucket - ITEM_SIZE) & (table.length - 1); + while (table[bucket] == DeletedKey) { + table[bucket] = EmptyKey; + table[bucket + 1] = null; + --usedBuckets; + + bucket = (bucket - ITEM_SIZE) & (table.length - 1); + } } else { table[bucket] = DeletedKey; table[bucket + 1] = null; } } - private void rehash() { + private void rehash(int newCapacity) { // Expand the hashmap - int newCapacity = capacity * 2; - Object[] newTable = new Object[2 * newCapacity]; + Object[] newTable = new Object[ITEM_SIZE * newCapacity]; // Re-hash table - for (int i = 0; i < table.length; i += 2) { + for (int i = 0; i < table.length; i += ITEM_SIZE) { K storedKey = (K) table[i]; V storedValue = (V) table[i + 1]; if (storedKey != EmptyKey && storedKey != DeletedKey) { @@ -454,7 +633,21 @@ private void rehash() { // Capacity needs to be updated after the values, so that we won't see // a capacity value bigger than the actual array size capacity = newCapacity; - resizeThreshold = (int) (capacity * MapFillFactor); + resizeThresholdUp = (int) (capacity * mapFillFactor); + resizeThresholdBelow = (int) (capacity * mapIdleFactor); + } + + private void shrinkToInitCapacity() { + Object[] newTable = new Object[ITEM_SIZE * initCapacity]; + + table = newTable; + size = 0; + usedBuckets = 0; + // Capacity needs to be updated after the values, so that we won't see + // a capacity value bigger than the actual array size + capacity = initCapacity; + resizeThresholdUp = (int) (capacity * mapFillFactor); + resizeThresholdBelow = (int) (capacity * mapIdleFactor); } private static void insertKeyValueNoLock(Object[] table, int capacity, K key, V value) { @@ -470,7 +663,7 @@ private static void insertKeyValueNoLock(Object[] table, int capacity, K return; } - bucket = (bucket + 2) & (table.length - 1); + bucket = (bucket + ITEM_SIZE) & (table.length - 1); } } } @@ -486,6 +679,8 @@ static final long hash(K key) { } static final int signSafeMod(long n, int max) { + // as the ITEM_SIZE of Section is 2, so the index is the multiple of 2 + // that is to left shift 1 bit return (int) (n & (max - 1)) << 1; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/ConcurrentOpenHashSet.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/ConcurrentOpenHashSet.java index 9cf6d221850..6aa5a389ce5 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/ConcurrentOpenHashSet.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/ConcurrentOpenHashSet.java @@ -24,7 +24,6 @@ import static com.google.common.base.Preconditions.checkNotNull; import com.google.common.collect.Lists; - import java.util.Arrays; import java.util.List; import java.util.concurrent.locks.StampedLock; @@ -36,6 +35,18 @@ *

          Provides similar methods as a {@code ConcurrentMap} but since it's an open hash map with linear probing, * no node allocations are required to store the values * + *
          + * WARN: method forEach do not guarantee thread safety, nor does the values method. + *
          + * The forEach method is specifically designed for single-threaded usage. When iterating over a set + * with concurrent writes, it becomes possible for new values to be either observed or not observed. + * There is no guarantee that if we write value1 and value2, and are able to see value2, then we will also see value1. + * + *
          + * It is crucial to understand that the results obtained from aggregate status methods such as values + * are typically reliable only when the map is not undergoing concurrent updates from other threads. + * When concurrent updates are involved, the results of these methods reflect transient states + * that may be suitable for monitoring or estimation purposes, but not for program control. * @param */ @SuppressWarnings("unchecked") @@ -44,33 +55,112 @@ public class ConcurrentOpenHashSet { private static final Object EmptyValue = null; private static final Object DeletedValue = new Object(); - private static final float MapFillFactor = 0.66f; - private static final int DefaultExpectedItems = 256; private static final int DefaultConcurrencyLevel = 16; + private static final float DefaultMapFillFactor = 0.66f; + private static final float DefaultMapIdleFactor = 0.15f; + + private static final float DefaultExpandFactor = 2; + private static final float DefaultShrinkFactor = 2; + + private static final boolean DefaultAutoShrink = false; + private final Section[] sections; + public static Builder newBuilder() { + return new Builder<>(); + } + + /** + * Builder of ConcurrentOpenHashSet. + */ + public static class Builder { + int expectedItems = DefaultExpectedItems; + int concurrencyLevel = DefaultConcurrencyLevel; + float mapFillFactor = DefaultMapFillFactor; + float mapIdleFactor = DefaultMapIdleFactor; + float expandFactor = DefaultExpandFactor; + float shrinkFactor = DefaultShrinkFactor; + boolean autoShrink = DefaultAutoShrink; + + public Builder expectedItems(int expectedItems) { + this.expectedItems = expectedItems; + return this; + } + + public Builder concurrencyLevel(int concurrencyLevel) { + this.concurrencyLevel = concurrencyLevel; + return this; + } + + public Builder mapFillFactor(float mapFillFactor) { + this.mapFillFactor = mapFillFactor; + return this; + } + + public Builder mapIdleFactor(float mapIdleFactor) { + this.mapIdleFactor = mapIdleFactor; + return this; + } + + public Builder expandFactor(float expandFactor) { + this.expandFactor = expandFactor; + return this; + } + + public Builder shrinkFactor(float shrinkFactor) { + this.shrinkFactor = shrinkFactor; + return this; + } + + public Builder autoShrink(boolean autoShrink) { + this.autoShrink = autoShrink; + return this; + } + + public ConcurrentOpenHashSet build() { + return new ConcurrentOpenHashSet<>(expectedItems, concurrencyLevel, + mapFillFactor, mapIdleFactor, autoShrink, expandFactor, shrinkFactor); + } + } + + @Deprecated public ConcurrentOpenHashSet() { this(DefaultExpectedItems); } + @Deprecated public ConcurrentOpenHashSet(int expectedItems) { this(expectedItems, DefaultConcurrencyLevel); } + @Deprecated public ConcurrentOpenHashSet(int expectedItems, int concurrencyLevel) { + this(expectedItems, concurrencyLevel, DefaultMapFillFactor, DefaultMapIdleFactor, + DefaultAutoShrink, DefaultExpandFactor, DefaultShrinkFactor); + } + + public ConcurrentOpenHashSet(int expectedItems, int concurrencyLevel, + float mapFillFactor, float mapIdleFactor, + boolean autoShrink, float expandFactor, float shrinkFactor) { checkArgument(expectedItems > 0); checkArgument(concurrencyLevel > 0); checkArgument(expectedItems >= concurrencyLevel); + checkArgument(mapFillFactor > 0 && mapFillFactor < 1); + checkArgument(mapIdleFactor > 0 && mapIdleFactor < 1); + checkArgument(mapFillFactor > mapIdleFactor); + checkArgument(expandFactor > 1); + checkArgument(shrinkFactor > 1); int numSections = concurrencyLevel; int perSectionExpectedItems = expectedItems / numSections; - int perSectionCapacity = (int) (perSectionExpectedItems / MapFillFactor); - this.sections = (Section[]) new Section[numSections]; + int perSectionCapacity = (int) (perSectionExpectedItems / mapFillFactor); + this.sections = new Section[numSections]; for (int i = 0; i < numSections; i++) { - sections[i] = new Section<>(perSectionCapacity); + sections[i] = new Section<>(perSectionCapacity, mapFillFactor, mapIdleFactor, + autoShrink, expandFactor, shrinkFactor); } } @@ -90,6 +180,14 @@ public long capacity() { return capacity; } + long getUsedBucketCount() { + long usedBucketCount = 0; + for (Section s : sections) { + usedBucketCount += s.usedBuckets; + } + return usedBucketCount; + } + public boolean isEmpty() { for (Section s : sections) { if (s.size != 0) { @@ -130,6 +228,12 @@ public void clear() { } } + /** + * Iterate over all the elements in the set and apply the provided function. + *

          + * Warning: Do Not Guarantee Thread-Safety. + * @param processor the function to apply to each element + */ public void forEach(Consumer processor) { for (Section s : sections) { s.forEach(processor); @@ -151,29 +255,44 @@ private static final class Section extends StampedLock { private volatile V[] values; private volatile int capacity; + private final int initCapacity; private volatile int size; private int usedBuckets; - private int resizeThreshold; - - Section(int capacity) { + private int resizeThresholdUp; + private int resizeThresholdBelow; + private final float mapFillFactor; + private final float mapIdleFactor; + private final float expandFactor; + private final float shrinkFactor; + private final boolean autoShrink; + + Section(int capacity, float mapFillFactor, float mapIdleFactor, boolean autoShrink, + float expandFactor, float shrinkFactor) { this.capacity = alignToPowerOfTwo(capacity); + this.initCapacity = this.capacity; this.values = (V[]) new Object[this.capacity]; this.size = 0; this.usedBuckets = 0; - this.resizeThreshold = (int) (this.capacity * MapFillFactor); + this.autoShrink = autoShrink; + this.mapFillFactor = mapFillFactor; + this.mapIdleFactor = mapIdleFactor; + this.expandFactor = expandFactor; + this.shrinkFactor = shrinkFactor; + this.resizeThresholdUp = (int) (this.capacity * mapFillFactor); + this.resizeThresholdBelow = (int) (this.capacity * mapIdleFactor); } boolean contains(V value, int keyHash) { - int bucket = keyHash; - long stamp = tryOptimisticRead(); boolean acquiredLock = false; + // add local variable here, so OutOfBound won't happen + V[] values = this.values; + // calculate table.length as capacity to avoid rehash changing capacity + int bucket = signSafeMod(keyHash, values.length); + try { while (true) { - int capacity = this.capacity; - bucket = signSafeMod(bucket, capacity); - // First try optimistic locking V storedValue = values[bucket]; @@ -191,15 +310,12 @@ boolean contains(V value, int keyHash) { stamp = readLock(); acquiredLock = true; + // update local variable + values = this.values; + bucket = signSafeMod(keyHash, values.length); storedValue = values[bucket]; } - if (capacity != this.capacity) { - // There has been a rehashing. We need to restart the search - bucket = keyHash; - continue; - } - if (value.equals(storedValue)) { return true; } else if (storedValue == EmptyValue) { @@ -207,8 +323,7 @@ boolean contains(V value, int keyHash) { return false; } } - - ++bucket; + bucket = (bucket + 1) & (values.length - 1); } } finally { if (acquiredLock) { @@ -256,9 +371,11 @@ boolean add(V value, int keyHash) { ++bucket; } } finally { - if (usedBuckets > resizeThreshold) { + if (usedBuckets > resizeThresholdUp) { try { - rehash(); + // Expand the hashmap + int newCapacity = alignToPowerOfTwo((int) (capacity * expandFactor)); + rehash(newCapacity); } finally { unlockWrite(stamp); } @@ -285,6 +402,16 @@ private boolean remove(V value, int keyHash) { if (values[nextInArray] == EmptyValue) { values[bucket] = (V) EmptyValue; --usedBuckets; + + // Cleanup all the buckets that were in `DeletedValue` state, + // so that we can reduce unnecessary expansions + int lastBucket = signSafeMod(bucket - 1, capacity); + while (values[lastBucket] == DeletedValue) { + values[lastBucket] = (V) EmptyValue; + --usedBuckets; + + lastBucket = signSafeMod(--lastBucket, capacity); + } } else { values[bucket] = (V) DeletedValue; } @@ -299,7 +426,24 @@ private boolean remove(V value, int keyHash) { } } finally { - unlockWrite(stamp); + if (autoShrink && size < resizeThresholdBelow) { + try { + // Shrinking must at least ensure initCapacity, + // so as to avoid frequent shrinking and expansion near initCapacity, + // frequent shrinking and expansion, + // additionally opened arrays will consume more memory and affect GC + int newCapacity = Math.max(alignToPowerOfTwo((int) (capacity / shrinkFactor)), initCapacity); + int newResizeThresholdUp = (int) (newCapacity * mapFillFactor); + if (newCapacity < capacity && newResizeThresholdUp > size) { + // shrink the hashmap + rehash(newCapacity); + } + } finally { + unlockWrite(stamp); + } + } else { + unlockWrite(stamp); + } } } @@ -307,9 +451,13 @@ void clear() { long stamp = writeLock(); try { - Arrays.fill(values, EmptyValue); - this.size = 0; - this.usedBuckets = 0; + if (autoShrink && capacity > initCapacity) { + shrinkToInitCapacity(); + } else { + Arrays.fill(values, EmptyValue); + this.size = 0; + this.usedBuckets = 0; + } } finally { unlockWrite(stamp); } @@ -358,9 +506,8 @@ public void forEach(Consumer processor) { } } - private void rehash() { + private void rehash(int newCapacity) { // Expand the hashmap - int newCapacity = capacity * 2; V[] newValues = (V[]) new Object[newCapacity]; // Re-hash table @@ -376,7 +523,21 @@ private void rehash() { // Capacity needs to be updated after the values, so that we won't see // a capacity value bigger than the actual array size capacity = newCapacity; - resizeThreshold = (int) (capacity * MapFillFactor); + resizeThresholdUp = (int) (capacity * mapFillFactor); + resizeThresholdBelow = (int) (capacity * mapIdleFactor); + } + + private void shrinkToInitCapacity() { + V[] newValues = (V[]) new Object[initCapacity]; + + values = newValues; + size = 0; + usedBuckets = 0; + // Capacity needs to be updated after the values, so that we won't see + // a capacity value bigger than the actual array size + capacity = initCapacity; + resizeThresholdUp = (int) (capacity * mapFillFactor); + resizeThresholdBelow = (int) (capacity * mapIdleFactor); } private static void insertValueNoLock(V[] values, V value) { diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/GrowableArrayBlockingQueue.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/GrowableArrayBlockingQueue.java deleted file mode 100644 index e8143a39aa3..00000000000 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/GrowableArrayBlockingQueue.java +++ /dev/null @@ -1,359 +0,0 @@ -/* - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * - */ -package org.apache.bookkeeper.util.collections; - -import java.util.AbstractQueue; -import java.util.Collection; -import java.util.Iterator; -import java.util.NoSuchElementException; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicIntegerFieldUpdater; -import java.util.concurrent.locks.Condition; -import java.util.concurrent.locks.ReentrantLock; - -import org.apache.bookkeeper.util.MathUtils; - - -/** - * This implements a {@link BlockingQueue} backed by an array with no fixed capacity. - * - *

          When the capacity is reached, data will be moved to a bigger array. - * - */ -public class GrowableArrayBlockingQueue extends AbstractQueue implements BlockingQueue { - - private final ReentrantLock headLock = new ReentrantLock(); - private final PaddedInt headIndex = new PaddedInt(); - private final PaddedInt tailIndex = new PaddedInt(); - private final ReentrantLock tailLock = new ReentrantLock(); - private final Condition isNotEmpty = headLock.newCondition(); - - private T[] data; - @SuppressWarnings("rawtypes") - private static final AtomicIntegerFieldUpdater SIZE_UPDATER = - AtomicIntegerFieldUpdater.newUpdater(GrowableArrayBlockingQueue.class, "size"); - @SuppressWarnings("unused") - private volatile int size = 0; - - public GrowableArrayBlockingQueue() { - this(64); - } - - @SuppressWarnings("unchecked") - public GrowableArrayBlockingQueue(int initialCapacity) { - headIndex.value = 0; - tailIndex.value = 0; - - int capacity = MathUtils.findNextPositivePowerOfTwo(initialCapacity); - data = (T[]) new Object[capacity]; - } - - @Override - public T remove() { - T item = poll(); - if (item == null) { - throw new NoSuchElementException(); - } - - return item; - } - - @Override - public T poll() { - headLock.lock(); - try { - if (SIZE_UPDATER.get(this) > 0) { - T item = data[headIndex.value]; - headIndex.value = (headIndex.value + 1) & (data.length - 1); - SIZE_UPDATER.decrementAndGet(this); - return item; - } else { - return null; - } - } finally { - headLock.unlock(); - } - } - - @Override - public T element() { - T item = peek(); - if (item == null) { - throw new NoSuchElementException(); - } - - return item; - } - - @Override - public T peek() { - headLock.lock(); - try { - if (SIZE_UPDATER.get(this) > 0) { - return data[headIndex.value]; - } else { - return null; - } - } finally { - headLock.unlock(); - } - } - - @Override - public boolean offer(T e) { - // Queue is unbounded and it will never reject new items - put(e); - return true; - } - - @Override - public void put(T e) { - tailLock.lock(); - - boolean wasEmpty = false; - - try { - if (SIZE_UPDATER.get(this) == data.length) { - expandArray(); - } - - data[tailIndex.value] = e; - tailIndex.value = (tailIndex.value + 1) & (data.length - 1); - if (SIZE_UPDATER.getAndIncrement(this) == 0) { - wasEmpty = true; - } - } finally { - tailLock.unlock(); - } - - if (wasEmpty) { - headLock.lock(); - try { - isNotEmpty.signal(); - } finally { - headLock.unlock(); - } - } - } - - @Override - public boolean add(T e) { - put(e); - return true; - } - - @Override - public boolean offer(T e, long timeout, TimeUnit unit) { - // Queue is unbounded and it will never reject new items - put(e); - return true; - } - - @Override - public T take() throws InterruptedException { - headLock.lockInterruptibly(); - - try { - while (SIZE_UPDATER.get(this) == 0) { - isNotEmpty.await(); - } - - T item = data[headIndex.value]; - data[headIndex.value] = null; - headIndex.value = (headIndex.value + 1) & (data.length - 1); - if (SIZE_UPDATER.decrementAndGet(this) > 0) { - // There are still entries to consume - isNotEmpty.signal(); - } - return item; - } finally { - headLock.unlock(); - } - } - - @Override - public T poll(long timeout, TimeUnit unit) throws InterruptedException { - headLock.lockInterruptibly(); - - try { - long timeoutNanos = unit.toNanos(timeout); - while (SIZE_UPDATER.get(this) == 0) { - if (timeoutNanos <= 0) { - return null; - } - - timeoutNanos = isNotEmpty.awaitNanos(timeoutNanos); - } - - T item = data[headIndex.value]; - data[headIndex.value] = null; - headIndex.value = (headIndex.value + 1) & (data.length - 1); - if (SIZE_UPDATER.decrementAndGet(this) > 0) { - // There are still entries to consume - isNotEmpty.signal(); - } - return item; - } finally { - headLock.unlock(); - } - } - - @Override - public int remainingCapacity() { - return Integer.MAX_VALUE; - } - - @Override - public int drainTo(Collection c) { - return drainTo(c, Integer.MAX_VALUE); - } - - @Override - public int drainTo(Collection c, int maxElements) { - headLock.lock(); - - try { - int drainedItems = 0; - int size = SIZE_UPDATER.get(this); - - while (size > 0 && drainedItems < maxElements) { - T item = data[headIndex.value]; - data[headIndex.value] = null; - c.add(item); - - headIndex.value = (headIndex.value + 1) & (data.length - 1); - --size; - ++drainedItems; - } - - if (SIZE_UPDATER.addAndGet(this, -drainedItems) > 0) { - // There are still entries to consume - isNotEmpty.signal(); - } - - return drainedItems; - } finally { - headLock.unlock(); - } - } - - @Override - public void clear() { - headLock.lock(); - - try { - int size = SIZE_UPDATER.get(this); - - for (int i = 0; i < size; i++) { - data[headIndex.value] = null; - headIndex.value = (headIndex.value + 1) & (data.length - 1); - } - - if (SIZE_UPDATER.addAndGet(this, -size) > 0) { - // There are still entries to consume - isNotEmpty.signal(); - } - } finally { - headLock.unlock(); - } - } - - @Override - public int size() { - return SIZE_UPDATER.get(this); - } - - @Override - public Iterator iterator() { - throw new UnsupportedOperationException(); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - - tailLock.lock(); - headLock.lock(); - - try { - int headIndex = this.headIndex.value; - int size = SIZE_UPDATER.get(this); - - sb.append('['); - - for (int i = 0; i < size; i++) { - T item = data[headIndex]; - if (i > 0) { - sb.append(", "); - } - - sb.append(item); - - headIndex = (headIndex + 1) & (data.length - 1); - } - - sb.append(']'); - } finally { - headLock.unlock(); - tailLock.unlock(); - } - return sb.toString(); - } - - @SuppressWarnings("unchecked") - private void expandArray() { - // We already hold the tailLock - headLock.lock(); - - try { - int size = SIZE_UPDATER.get(this); - int newCapacity = data.length * 2; - T[] newData = (T[]) new Object[newCapacity]; - - int oldHeadIndex = headIndex.value; - int newTailIndex = 0; - - for (int i = 0; i < size; i++) { - newData[newTailIndex++] = data[oldHeadIndex]; - oldHeadIndex = (oldHeadIndex + 1) & (data.length - 1); - } - - data = newData; - headIndex.value = 0; - tailIndex.value = size; - } finally { - headLock.unlock(); - } - } - - static final class PaddedInt { - private int value; - - // Padding to avoid false sharing - public volatile int pi1 = 1; - public volatile long p1 = 1L, p2 = 2L, p3 = 3L, p4 = 4L, p5 = 5L, p6 = 6L; - - public long exposeToAvoidOptimization() { - return pi1 + p1 + p2 + p3 + p4 + p5 + p6; - } - } -} diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/SynchronizedHashMultiMap.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/SynchronizedHashMultiMap.java index 6e6e3c189a6..db7bd0bf53e 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/SynchronizedHashMultiMap.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/collections/SynchronizedHashMultiMap.java @@ -28,13 +28,12 @@ import java.util.Set; import java.util.function.BiPredicate; import org.apache.commons.lang3.tuple.Pair; - /** * Simple multimap implementation that only stores key reference once. * *

          Implementation is aimed at storing PerChannelBookieClient completions when there * are duplicates. If the key is a pooled object, it must not exist once the value - * has been removed from the map, which can happen with guava multimap implemenations. + * has been removed from the map, which can happen with guava multimap implementations. * *

          With this map is implemented with pretty heavy locking, but this shouldn't be an * issue as the multimap only needs to be used in rare cases, i.e. when a user tries diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/verifier/BookkeeperVerifier.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/verifier/BookkeeperVerifier.java index 821d260c7be..f492fa8eb2a 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/verifier/BookkeeperVerifier.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/verifier/BookkeeperVerifier.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -15,10 +15,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.apache.bookkeeper.verifier; import static com.google.common.base.Preconditions.checkState; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -32,7 +34,6 @@ import java.util.TreeSet; import java.util.function.BiConsumer; import java.util.function.Consumer; - import org.apache.bookkeeper.client.BKException; /** @@ -180,6 +181,7 @@ private long getNextLedgerID() { /** * State required to regenerate an entry. */ + @SuppressFBWarnings("DMI_RANDOM_USED_ONLY_ONCE") class EntryInfo { private final long entryID; private final long seed; @@ -301,7 +303,7 @@ EntryIterator getIterator() { void openWrite(long entryID) { writesInProgress.add(entryID); - System.out.format("Open writes, %s%n", writesInProgress.toString()); + System.out.format("Open writes, %s%n", writesInProgress); } void incReads() { @@ -382,7 +384,7 @@ private void checkWriteComplete(Consumer cb) { System.out.format( "checkWriteComplete: ledger %d, writesInProgress %s%n", ledgerID, - writesInProgress.toString()); + writesInProgress); cb.accept(0); } } @@ -396,7 +398,7 @@ private void checkOpComplete(Consumer cb) { System.out.format( "checkOpComplete: ledger %d, writesInProgress %s, readsInProgress %d%n", ledgerID, - writesInProgress.toString(), readsInProgress); + writesInProgress, readsInProgress); cb.accept(0); } } @@ -471,7 +473,7 @@ private synchronized boolean startRead() { current, ledger.ledgerID, result.length, check.length) )); } - /* Verify contents */ + /* Verify contents */ if (!Arrays.equals(check, result)) { int i = 0; for (; i < check.length; ++i) { @@ -657,7 +659,7 @@ public synchronized void run() throws Exception { /* Wait for all in progress ops to complete, outstanding*Count is updated under the lock */ while ((System.currentTimeMillis() < testDrainEnd) - && (outstandingReadCount > 0 || outstandingWriteCount > 0)) { + && (outstandingReadCount > 0 || outstandingWriteCount > 0)) { System.out.format("reads: %d, writes: %d%n", outstandingReadCount, outstandingWriteCount); System.out.format("openingLedgers:%n"); for (LedgerInfo li: openingLedgers) { diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/verifier/BookkeeperVerifierMain.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/verifier/BookkeeperVerifierMain.java index 99b6afef550..c0d3f54c54e 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/verifier/BookkeeperVerifierMain.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/verifier/BookkeeperVerifierMain.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/verifier/DirectBookkeeperDriver.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/verifier/DirectBookkeeperDriver.java index 9611082cc00..772294a01af 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/verifier/DirectBookkeeperDriver.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/verifier/DirectBookkeeperDriver.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -25,7 +25,6 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.function.BiConsumer; import java.util.function.Consumer; - import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.client.BookKeeper; import org.apache.bookkeeper.client.LedgerHandle; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/versioning/Versioned.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/versioning/Versioned.java index f6926248d02..85e695cd534 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/versioning/Versioned.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/versioning/Versioned.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/BoundExponentialBackoffRetryPolicy.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/BoundExponentialBackoffRetryPolicy.java index 7fe237a900a..50d0807d001 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/BoundExponentialBackoffRetryPolicy.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/BoundExponentialBackoffRetryPolicy.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/ExponentialBackOffWithDeadlinePolicy.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/ExponentialBackOffWithDeadlinePolicy.java new file mode 100644 index 00000000000..0cc6ad95e2c --- /dev/null +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/ExponentialBackOffWithDeadlinePolicy.java @@ -0,0 +1,73 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.zookeeper; + +import java.util.Arrays; +import java.util.Random; +import lombok.extern.slf4j.Slf4j; + +/** + * Backoff time determined based as a multiple of baseBackoffTime. + * The multiple value depends on retryCount. + * If the retry schedule exceeds the deadline, we schedule a final attempt exactly at the deadline. + */ +@Slf4j +public class ExponentialBackOffWithDeadlinePolicy implements RetryPolicy { + + static final int [] RETRY_BACKOFF = {0, 1, 2, 3, 5, 5, 5, 10, 10, 10, 20, 40, 100}; + public static final int JITTER_PERCENT = 10; + private final Random random; + + private final long baseBackoffTime; + private final long deadline; + private final int maxRetries; + + public ExponentialBackOffWithDeadlinePolicy(long baseBackoffTime, long deadline, int maxRetries) { + this.baseBackoffTime = baseBackoffTime; + this.deadline = deadline; + this.maxRetries = maxRetries; + this.random = new Random(System.currentTimeMillis()); + } + + @Override + public boolean allowRetry(int retryCount, long elapsedRetryTime) { + return retryCount <= maxRetries && elapsedRetryTime < deadline; + } + + @Override + public long nextRetryWaitTime(int retryCount, long elapsedRetryTime) { + int idx = retryCount; + if (idx >= RETRY_BACKOFF.length) { + idx = RETRY_BACKOFF.length - 1; + } + + long waitTime = (baseBackoffTime * RETRY_BACKOFF[idx]); + long jitter = (random.nextInt(JITTER_PERCENT) * waitTime / 100); + + if (elapsedRetryTime + waitTime + jitter > deadline) { + log.warn("Final retry attempt: {}, timeleft: {}, stacktrace: {}", + retryCount, (deadline - elapsedRetryTime), Arrays.toString(Thread.currentThread().getStackTrace())); + return deadline - elapsedRetryTime; + } + + return waitTime + jitter; + } +} \ No newline at end of file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/ExponentialBackoffRetryPolicy.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/ExponentialBackoffRetryPolicy.java index bb66ee9e52c..bd246ab390d 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/ExponentialBackoffRetryPolicy.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/ExponentialBackoffRetryPolicy.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/RetryPolicy.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/RetryPolicy.java index fba8b1a879c..f7afeebee93 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/RetryPolicy.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/RetryPolicy.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/ZooKeeperClient.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/ZooKeeperClient.java index be037f506c5..3bddcb2f2ba 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/ZooKeeperClient.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/ZooKeeperClient.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -228,8 +228,11 @@ public ZooKeeperClient build() throws IOException, KeeperException, InterruptedE checkArgument(retryExecThreadCount > 0); if (null == connectRetryPolicy) { + // Session expiry event is received by client only when zk quorum is well established. + // All other connection loss retries happen at zk client library transparently. + // Hence, we don't need to wait before retrying. connectRetryPolicy = - new BoundExponentialBackoffRetryPolicy(sessionTimeoutMs, sessionTimeoutMs, Integer.MAX_VALUE); + new BoundExponentialBackoffRetryPolicy(0, 0, Integer.MAX_VALUE); } if (null == operationRetryPolicy) { operationRetryPolicy = @@ -238,9 +241,9 @@ public ZooKeeperClient build() throws IOException, KeeperException, InterruptedE // Create a watcher manager StatsLogger watcherStatsLogger = statsLogger.scope("watcher"); - ZooKeeperWatcherBase watcherManager = - null == watchers ? new ZooKeeperWatcherBase(sessionTimeoutMs, watcherStatsLogger) : - new ZooKeeperWatcherBase(sessionTimeoutMs, watchers, watcherStatsLogger); + ZooKeeperWatcherBase watcherManager = (null == watchers) + ? new ZooKeeperWatcherBase(sessionTimeoutMs, allowReadOnlyMode, watcherStatsLogger) + : new ZooKeeperWatcherBase(sessionTimeoutMs, allowReadOnlyMode, watchers, watcherStatsLogger); ZooKeeperClient client = new ZooKeeperClient( connectString, sessionTimeoutMs, @@ -271,7 +274,7 @@ public static Builder newBuilder() { return new Builder(); } - ZooKeeperClient(String connectString, + protected ZooKeeperClient(String connectString, int sessionTimeoutMs, ZooKeeperWatcherBase watcherManager, RetryPolicy connectRetryPolicy, @@ -329,7 +332,7 @@ private void closeZkHandle() throws InterruptedException { } } - protected void waitForConnection() throws KeeperException, InterruptedException { + public void waitForConnection() throws KeeperException, InterruptedException { watcherManager.waitForConnection(); } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/ZooKeeperWatcherBase.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/ZooKeeperWatcherBase.java index f5816b0c331..e44a5f364cd 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/ZooKeeperWatcherBase.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/ZooKeeperWatcherBase.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -44,6 +44,7 @@ public class ZooKeeperWatcherBase implements Watcher { .getLogger(ZooKeeperWatcherBase.class); private final int zkSessionTimeOut; + private final boolean allowReadOnlyMode; private volatile CountDownLatch clientConnectLatch = new CountDownLatch(1); private final CopyOnWriteArraySet childWatchers = new CopyOnWriteArraySet(); @@ -53,18 +54,20 @@ public class ZooKeeperWatcherBase implements Watcher { private final ConcurrentHashMap eventCounters = new ConcurrentHashMap(); - public ZooKeeperWatcherBase(int zkSessionTimeOut) { - this(zkSessionTimeOut, NullStatsLogger.INSTANCE); + public ZooKeeperWatcherBase(int zkSessionTimeOut, boolean allowReadOnlyMode) { + this(zkSessionTimeOut, allowReadOnlyMode, NullStatsLogger.INSTANCE); } - public ZooKeeperWatcherBase(int zkSessionTimeOut, StatsLogger statsLogger) { - this(zkSessionTimeOut, new HashSet(), statsLogger); + public ZooKeeperWatcherBase(int zkSessionTimeOut, boolean allowReadOnlyMode, StatsLogger statsLogger) { + this(zkSessionTimeOut, allowReadOnlyMode, new HashSet(), statsLogger); } public ZooKeeperWatcherBase(int zkSessionTimeOut, + boolean allowReadOnlyMode, Set childWatchers, StatsLogger statsLogger) { this.zkSessionTimeOut = zkSessionTimeOut; + this.allowReadOnlyMode = allowReadOnlyMode; this.childWatchers.addAll(childWatchers); this.statsLogger = statsLogger; } @@ -130,6 +133,14 @@ public void process(WatchedEvent event) { LOG.info("ZooKeeper client is connected now."); clientConnectLatch.countDown(); break; + case ConnectedReadOnly: + if (allowReadOnlyMode) { + LOG.info("ZooKeeper client is connected in read-only mode now."); + clientConnectLatch.countDown(); + } else { + LOG.warn("ZooKeeper client is connected in read-only mode, which is not allowed."); + } + break; case Disconnected: LOG.info("ZooKeeper client is disconnected from zookeeper now," + " but it is OK unless we received EXPIRED event."); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/ZooWorker.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/ZooWorker.java index c2ca0194785..48cd902ccc2 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/ZooWorker.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/zookeeper/ZooWorker.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -23,8 +23,8 @@ import com.google.common.util.concurrent.RateLimiter; import java.util.concurrent.Callable; import java.util.concurrent.TimeUnit; +import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.stats.OpStatsLogger; -import org.apache.bookkeeper.util.MathUtils; import org.apache.zookeeper.KeeperException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/bookkeeper-server/src/main/resources/log4j2.xml b/bookkeeper-server/src/main/resources/log4j2.xml new file mode 100644 index 00000000000..3e3e4bc8c8e --- /dev/null +++ b/bookkeeper-server/src/main/resources/log4j2.xml @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/auth/TestAuth.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/auth/TestAuth.java index cca97e8d404..5b4ed570002 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/auth/TestAuth.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/auth/TestAuth.java @@ -28,10 +28,10 @@ import java.io.IOException; import java.util.Arrays; +import java.util.Collection; import java.util.Enumeration; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; - import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.client.BKException.BKNotEnoughBookiesException; import org.apache.bookkeeper.client.BookKeeper; @@ -45,12 +45,16 @@ import org.apache.bookkeeper.proto.ClientConnectionPeer; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Test authentication. */ +@RunWith(Parameterized.class) public class TestAuth extends BookKeeperClusterTestCase { static final Logger LOG = LoggerFactory.getLogger(TestAuth.class); public static final String TEST_AUTH_PROVIDER_PLUGIN_NAME = "TestAuthProviderPlugin"; @@ -61,8 +65,29 @@ public class TestAuth extends BookKeeperClusterTestCase { private static final byte[] FAILURE_RESPONSE = {2}; private static final byte[] PAYLOAD_MESSAGE = {3}; - public TestAuth() { + enum ProtocolVersion { + ProtocolV2, ProtocolV3 + } + + @Parameters + public static Collection configs() { + return Arrays.asList(new Object[][] { + { ProtocolVersion.ProtocolV2 }, + { ProtocolVersion.ProtocolV3 }, + }); + } + + private final ProtocolVersion protocolVersion; + + public TestAuth(ProtocolVersion protocolVersion) { super(0); // start them later when auth providers are configured + this.protocolVersion = protocolVersion; + } + + protected ClientConfiguration newClientConfiguration() { + ClientConfiguration conf = super.newClientConfiguration(); + conf.setUseV2WireProtocol(protocolVersion == ProtocolVersion.ProtocolV2); + return conf; } // we pass in ledgerId because the method may throw exceptions @@ -74,7 +99,6 @@ private void connectAndWriteToBookie(ClientConfiguration conf, AtomicLong ledger PASSWD)) { ledgerWritten.set(l.getId()); l.addEntry(ENTRY); - l.close(); } } @@ -85,14 +109,14 @@ private void connectAndWriteToBookie(ClientConfiguration conf, AtomicLong ledger private int entryCount(long ledgerId, ServerConfiguration bookieConf, ClientConfiguration clientConf) throws Exception { LOG.info("Counting entries in {}", ledgerId); - for (ServerConfiguration conf : bsConfs) { - conf.setBookieAuthProviderFactoryClass( - AlwaysSucceedBookieAuthProviderFactory.class.getName()); - } clientConf.setClientAuthProviderFactoryClass( SendUntilCompleteClientAuthProviderFactory.class.getName()); - restartBookies(); + restartBookies(c -> { + c.setBookieAuthProviderFactoryClass( + AlwaysSucceedBookieAuthProviderFactory.class.getName()); + return c; + }); int count = 0; try (BookKeeper bkc = new BookKeeper(clientConf, zkc); @@ -136,6 +160,13 @@ public void testSingleMessageAuth() throws Exception { @Test public void testCloseMethodCalledOnAuthProvider() throws Exception { + LogCloseCallsBookieAuthProviderFactory.closeCountersOnFactory.set(0); + LogCloseCallsBookieAuthProviderFactory.closeCountersOnConnections.set(0); + LogCloseCallsBookieAuthProviderFactory.initCountersOnFactory.set(0); + LogCloseCallsBookieAuthProviderFactory.initCountersOnConnections.set(0); + LogCloseCallsClientAuthProviderFactory.initCountersOnFactory.set(0); + LogCloseCallsClientAuthProviderFactory.closeCountersOnFactory.set(0); + ServerConfiguration bookieConf = newServerConfiguration(); bookieConf.setBookieAuthProviderFactoryClass( LogCloseCallsBookieAuthProviderFactory.class.getName()); @@ -152,9 +183,7 @@ public void testCloseMethodCalledOnAuthProvider() throws Exception { assertFalse(ledgerId.get() == -1); assertEquals("Should have entry", 1, entryCount(ledgerId.get(), bookieConf, clientConf)); - for (BookieServer bks : bs) { - bks.shutdown(); - } + stopAllBookies(); assertEquals(LogCloseCallsBookieAuthProviderFactory.initCountersOnConnections.get(), LogCloseCallsBookieAuthProviderFactory.closeCountersOnConnections.get()); @@ -244,7 +273,7 @@ public void testMultiMessageAuthFailure() throws Exception { fail("Shouldn't get this far"); } catch (BKException.BKUnauthorizedAccessException bke) { // bookie should have sent a negative response before - // breaking the conneciton + // breaking the connection } assertFalse(ledgerId.get() == -1); assertEquals("Shouldn't have entry", 0, entryCount(ledgerId.get(), bookieConf, clientConf)); @@ -271,7 +300,12 @@ public void testDifferentPluginFailure() throws Exception { fail("Shouldn't get this far"); } catch (BKException.BKUnauthorizedAccessException bke) { // bookie should have sent a negative response before - // breaking the conneciton + // breaking the connection + assertEquals(ProtocolVersion.ProtocolV3, protocolVersion); + } catch (BKException.BKNotEnoughBookiesException nebe) { + // With V2 we don't get the authorization error, but rather just + // fail to write to bookies. + assertEquals(ProtocolVersion.ProtocolV2, protocolVersion); } assertFalse(ledgerId.get() == -1); assertEquals("Shouldn't have entry", 0, entryCount(ledgerId.get(), bookieConf, clientConf)); @@ -282,7 +316,7 @@ public void testDifferentPluginFailure() throws Exception { * doesn't implement the interface, we fail predictably. */ @Test - public void testExistantButNotValidPlugin() throws Exception { + public void testExistentButNotValidPlugin() throws Exception { ServerConfiguration bookieConf = newServerConfiguration(); bookieConf.setBookieAuthProviderFactoryClass( "java.lang.String"); @@ -317,14 +351,14 @@ public void testExistantButNotValidPlugin() throws Exception { * break. */ @Test - public void testNonExistantPlugin() throws Exception { + public void testNonExistentPlugin() throws Exception { ServerConfiguration bookieConf = newServerConfiguration(); bookieConf.setBookieAuthProviderFactoryClass( - "NonExistantClassNameForTestingAuthPlugins"); + "NonExistentClassNameForTestingAuthPlugins"); ClientConfiguration clientConf = newClientConfiguration(); clientConf.setClientAuthProviderFactoryClass( - "NonExistantClassNameForTestingAuthPlugins"); + "NonExistentClassNameForTestingAuthPlugins"); try { startAndStoreBookie(bookieConf); fail("Shouldn't get this far"); @@ -444,10 +478,7 @@ public void testDropConnectionFromBookieAuthPlugin() throws Exception { } BookieServer startAndStoreBookie(ServerConfiguration conf) throws Exception { - bsConfs.add(conf); - BookieServer s = startBookie(conf); - bs.add(s); - return s; + return startAndAddBookie(conf).getServer(); } /** diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/AdvertisedAddressTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/AdvertisedAddressTest.java index 0f893ac7f80..e5671280cb2 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/AdvertisedAddressTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/AdvertisedAddressTest.java @@ -24,16 +24,15 @@ import static org.junit.Assert.assertEquals; import java.io.File; -import java.io.IOException; import java.util.Collection; - +import java.util.UUID; import org.apache.bookkeeper.client.BookKeeperAdmin; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; -import org.apache.bookkeeper.test.PortManager; -import org.apache.bookkeeper.util.IOUtils; +import org.apache.bookkeeper.util.PortManager; import org.junit.Test; /** @@ -46,12 +45,11 @@ public AdvertisedAddressTest() { super(0); } - private String newDirectory(boolean createCurDir) throws IOException { - File d = IOUtils.createTempDir("cookie", "tmpdir"); + private String newDirectory(boolean createCurDir) throws Exception { + File d = tmpDirs.createNew("cookie", "tmpdir"); if (createCurDir) { new File(d, "current").mkdirs(); } - tmpDirs.add(d); return d.getPath(); } @@ -70,17 +68,18 @@ public void testSetAdvertisedAddress() throws Exception { assertEquals("10.0.0.1", conf.getAdvertisedAddress()); BookieSocketAddress bkAddress = new BookieSocketAddress("10.0.0.1", bookiePort); - assertEquals(bkAddress, Bookie.getBookieAddress(conf)); + assertEquals(bkAddress, BookieImpl.getBookieAddress(conf)); + assertEquals(bkAddress.toBookieId(), BookieImpl.getBookieId(conf)); - Bookie b = new Bookie(conf); + Bookie b = new TestBookieImpl(conf); b.start(); BookKeeperAdmin bka = new BookKeeperAdmin(baseClientConf); - Collection bookies = bka.getAvailableBookies(); + Collection bookies = bka.getAvailableBookies(); assertEquals(1, bookies.size()); - BookieSocketAddress address = bookies.iterator().next(); - assertEquals(bkAddress, address); + BookieId address = bookies.iterator().next(); + assertEquals(bkAddress.toBookieId(), address); b.shutdown(); bka.close(); @@ -99,7 +98,42 @@ public void testBothUseHostnameAndAdvertisedAddress() throws Exception { assertEquals("10.0.0.1", conf.getAdvertisedAddress()); BookieSocketAddress bkAddress = new BookieSocketAddress("10.0.0.1", bookiePort); - assertEquals(bkAddress, Bookie.getBookieAddress(conf)); + assertEquals(bkAddress, BookieImpl.getBookieAddress(conf)); + assertEquals(bkAddress.toBookieId(), BookieImpl.getBookieId(conf)); } + /** + * Test starting bookie with a bookieId. + */ + @Test + public void testSetBookieId() throws Exception { + String uuid = UUID.randomUUID().toString(); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(newDirectory(false)) + .setLedgerDirNames(new String[] { newDirectory(false) }) + .setBookiePort(bookiePort) + .setBookieId(uuid) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + + conf.setAdvertisedAddress("10.0.0.1"); + assertEquals("10.0.0.1", conf.getAdvertisedAddress()); + assertEquals(uuid, conf.getBookieId()); + + BookieSocketAddress bkAddress = new BookieSocketAddress("10.0.0.1", bookiePort); + assertEquals(bkAddress, BookieImpl.getBookieAddress(conf)); + assertEquals(uuid, BookieImpl.getBookieId(conf).getId()); + + Bookie b = new TestBookieImpl(conf); + b.start(); + + BookKeeperAdmin bka = new BookKeeperAdmin(baseClientConf); + Collection bookies = bka.getAvailableBookies(); + + assertEquals(1, bookies.size()); + BookieId address = bookies.iterator().next(); + assertEquals(BookieId.parse(uuid), address); + + b.shutdown(); + bka.close(); + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieAccessor.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieAccessor.java index 4c8ad468ec1..d0c8ab6a81d 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieAccessor.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieAccessor.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,7 +21,6 @@ package org.apache.bookkeeper.bookie; import java.io.IOException; - import org.apache.bookkeeper.bookie.CheckpointSource.Checkpoint; /** @@ -31,7 +30,7 @@ public class BookieAccessor { /** * Force a bookie to flush its ledger storage. */ - public static void forceFlush(Bookie b) throws IOException { + public static void forceFlush(BookieImpl b) throws IOException { CheckpointSourceList source = new CheckpointSourceList(b.journals); Checkpoint cp = source.newCheckpoint(); b.ledgerStorage.flush(); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieDeferredSyncTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieDeferredSyncTest.java index cce445efb8b..dcac8f03d20 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieDeferredSyncTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieDeferredSyncTest.java @@ -20,7 +20,7 @@ */ package org.apache.bookkeeper.bookie; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.bookkeeper.common.concurrent.FutureUtils.result; import static org.junit.Assert.assertEquals; @@ -68,7 +68,7 @@ public void testWriteAndRecovery() throws Exception { .withLedgerId(ledgerId) .withRecovery(true) .withPassword(new byte[0]) - .execute());) { + .execute())) { try (LedgerEntries entries = readLh.read(0, n - 1)) { for (int i = 0; i < n; i++) { @@ -121,7 +121,7 @@ private void testClose(boolean force) throws Exception { .withLedgerId(ledgerId) .withRecovery(true) .withPassword(new byte[0]) - .execute());) { + .execute())) { try (LedgerEntries entries = readLh.read(0, n - 1)) { for (int i = 0; i < n; i++) { @@ -143,7 +143,7 @@ private void testClose(boolean force) throws Exception { .withLedgerId(ledgerId) .withRecovery(true) .withPassword(new byte[0]) - .execute());) { + .execute())) { assertEquals(-1, readLh.getLastAddConfirmed()); // entry will be readable with readUnconfirmed @@ -176,15 +176,13 @@ private void testForce(EnumSet writeFlags) throws Exception { .withWriteFlags(writeFlags) .withDigestType(DigestType.CRC32C) .withPassword(new byte[0]) - .execute());) { + .execute())) { int n = 10; for (int i = 0; i < n; i++) { lh.append(("entry-" + i).getBytes(UTF_8)); } result(lh.force()); assertEquals(n - 1, lh.getLastAddConfirmed()); - - lh.close(); } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieImplTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieImplTest.java new file mode 100644 index 00000000000..4787ae8d36f --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieImplTest.java @@ -0,0 +1,195 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.spy; + +import com.google.protobuf.ByteString; +import com.google.protobuf.UnsafeByteOperations; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import io.netty.buffer.UnpooledByteBufAllocator; +import java.nio.charset.StandardCharsets; +import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.bookkeeper.client.BookKeeper; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.meta.MetadataBookieDriver; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks; +import org.apache.bookkeeper.proto.checksum.DigestManager; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.apache.bookkeeper.util.ByteBufList; +import org.apache.bookkeeper.util.PortManager; +import org.awaitility.Awaitility; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class BookieImplTest extends BookKeeperClusterTestCase { + private static final Logger log = LoggerFactory.getLogger(BookieImplTest.class); + + private static final int bookiePort = PortManager.nextFreePort(); + + private static final int ADD = 0; + private static final int RECOVERY_ADD = 1; + + public BookieImplTest() { + super(0); + } + + @Test + public void testWriteLac() throws Exception { + final String metadataServiceUri = zkUtil.getMetadataServiceUri(); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setMetadataServiceUri(metadataServiceUri); + + MetadataBookieDriver metadataDriver = BookieResources.createMetadataDriver( + conf, NullStatsLogger.INSTANCE); + RegistrationManager rm = metadataDriver.createRegistrationManager(); + TestBookieImpl.Resources resources = new TestBookieImpl.ResourceBuilder(conf) + .withMetadataDriver(metadataDriver).withRegistrationManager(rm).build(); + BookieImpl b = new TestBookieImpl(resources); + b.start(); + + final BookieImpl spyBookie = spy(b); + + final long ledgerId = 10; + final long lac = 23; + + DigestManager digestManager = DigestManager.instantiate(ledgerId, "".getBytes(StandardCharsets.UTF_8), + BookKeeper.DigestType.toProtoDigestType(BookKeeper.DigestType.CRC32), UnpooledByteBufAllocator.DEFAULT, + baseClientConf.getUseV2WireProtocol()); + + final ByteBufList toSend = digestManager.computeDigestAndPackageForSendingLac(lac); + ByteString body = UnsafeByteOperations.unsafeWrap(toSend.array(), toSend.arrayOffset(), toSend.readableBytes()); + + final ByteBuf lacToAdd = Unpooled.wrappedBuffer(body.asReadOnlyByteBuffer()); + final byte[] masterKey = ByteString.copyFrom("masterKey".getBytes()).toByteArray(); + + final ByteBuf explicitLACEntry = b.createExplicitLACEntry(ledgerId, lacToAdd); + lacToAdd.resetReaderIndex(); + + doReturn(explicitLACEntry) + .when(spyBookie) + .createExplicitLACEntry(eq(ledgerId), eq(lacToAdd)); + + AtomicBoolean complete = new AtomicBoolean(false); + final BookkeeperInternalCallbacks.WriteCallback writeCallback = + new BookkeeperInternalCallbacks.WriteCallback() { + @Override + public void writeComplete(int rc, long ledgerId, long entryId, BookieId addr, Object ctx) { + complete.set(true); + } + }; + + spyBookie.setExplicitLac(lacToAdd, writeCallback, null, masterKey); + + Awaitility.await().untilAsserted(() -> assertTrue(complete.get())); + + assertEquals(0, lacToAdd.refCnt()); + assertEquals(0, explicitLACEntry.refCnt()); + + b.shutdown(); + + } + + @Test + public void testAddEntry() throws Exception { + mockAddEntryReleased(ADD); + } + + @Test + public void testRecoveryAddEntry() throws Exception { + mockAddEntryReleased(RECOVERY_ADD); + } + + public void mockAddEntryReleased(int flag) throws Exception { + final String metadataServiceUri = zkUtil.getMetadataServiceUri(); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setMetadataServiceUri(metadataServiceUri); + + MetadataBookieDriver metadataDriver = BookieResources.createMetadataDriver( + conf, NullStatsLogger.INSTANCE); + RegistrationManager rm = metadataDriver.createRegistrationManager(); + TestBookieImpl.Resources resources = new TestBookieImpl.ResourceBuilder(conf) + .withMetadataDriver(metadataDriver).withRegistrationManager(rm).build(); + BookieImpl b = new TestBookieImpl(resources); + b.start(); + + final BookieImpl spyBookie = spy(b); + + final long ledgerId = 10; + + final byte[] masterKey = ByteString.copyFrom("masterKey".getBytes()).toByteArray(); + + final ByteBuf masterKeyEntry = b.createMasterKeyEntry(ledgerId, masterKey); + + doReturn(masterKeyEntry) + .when(spyBookie) + .createMasterKeyEntry(eq(ledgerId), eq(masterKey)); + + final ByteBuf entry = generateEntry(ledgerId, 0); + + AtomicBoolean complete = new AtomicBoolean(false); + final BookkeeperInternalCallbacks.WriteCallback writeCallback = + new BookkeeperInternalCallbacks.WriteCallback() { + @Override + public void writeComplete(int rc, long ledgerId, long entryId, BookieId addr, Object ctx) { + complete.set(true); + } + }; + + switch (flag) { + case ADD: + spyBookie.addEntry(entry, false, writeCallback, null, masterKey); + break; + case RECOVERY_ADD: + spyBookie.recoveryAddEntry(entry, writeCallback, null, masterKey); + break; + default: + throw new IllegalArgumentException("Only support ADD and RECOVERY_ADD flag."); + } + + Awaitility.await().untilAsserted(() -> assertTrue(complete.get())); + + assertEquals(0, entry.refCnt()); + assertEquals(0, masterKeyEntry.refCnt()); + + b.shutdown(); + + } + + private ByteBuf generateEntry(long ledger, long entry) { + byte[] data = ("ledger-" + ledger + "-" + entry).getBytes(); + ByteBuf bb = Unpooled.buffer(8 + 8 + data.length); + bb.writeLong(ledger); + bb.writeLong(entry); + bb.writeBytes(data); + return bb; + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieInitializationTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieInitializationTest.java index 48ea817f5ec..1b7aa371a82 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieInitializationTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieInitializationTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,69 +20,101 @@ */ package org.apache.bookkeeper.bookie; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.bookkeeper.bookie.BookieJournalTest.writeV5Journal; +import static org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithRegistrationManager; import static org.apache.bookkeeper.util.BookKeeperConstants.AVAILABLE_NODE; import static org.apache.bookkeeper.util.BookKeeperConstants.BOOKIE_STATUS_FILENAME; +import static org.apache.bookkeeper.util.TestUtils.countNumOfFiles; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.hasItem; +import static org.hamcrest.Matchers.hasProperty; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; -import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.doThrow; -import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import com.fasterxml.jackson.databind.ObjectMapper; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.UnpooledByteBufAllocator; +import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.net.BindException; import java.net.InetAddress; -import java.security.AccessControlException; +import java.net.URL; +import java.net.URLConnection; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.Random; - import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Supplier; import org.apache.bookkeeper.bookie.BookieException.DiskPartitionDuplicationException; import org.apache.bookkeeper.bookie.BookieException.MetadataStoreException; +import org.apache.bookkeeper.bookie.Journal.LastLogMark; import org.apache.bookkeeper.bookie.LedgerDirsManager.NoWritableLedgerDirException; import org.apache.bookkeeper.client.BookKeeper; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.client.BookKeeperClientStats; import org.apache.bookkeeper.client.LedgerHandle; import org.apache.bookkeeper.common.component.ComponentStarter; +import org.apache.bookkeeper.common.component.Lifecycle; +import org.apache.bookkeeper.common.component.LifecycleComponent; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.discover.BookieServiceInfo; +import org.apache.bookkeeper.discover.BookieServiceInfo.Endpoint; import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.http.HttpRouter; +import org.apache.bookkeeper.http.HttpServerLoader; import org.apache.bookkeeper.meta.MetadataBookieDriver; +import org.apache.bookkeeper.meta.exceptions.MetadataException; import org.apache.bookkeeper.meta.zk.ZKMetadataBookieDriver; import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookieServer; -import org.apache.bookkeeper.replication.ReplicationException.CompatibilityException; -import org.apache.bookkeeper.replication.ReplicationException.UnavailableException; +import org.apache.bookkeeper.proto.DataFormats.BookieServiceInfoFormat; +import org.apache.bookkeeper.replication.AutoRecoveryMain; +import org.apache.bookkeeper.replication.ReplicationStats; +import org.apache.bookkeeper.server.Main; import org.apache.bookkeeper.server.conf.BookieConfiguration; +import org.apache.bookkeeper.server.service.AutoRecoveryService; import org.apache.bookkeeper.server.service.BookieService; import org.apache.bookkeeper.stats.NullStatsLogger; -import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.prometheus.PrometheusMetricsProvider; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; -import org.apache.bookkeeper.test.PortManager; -import org.apache.bookkeeper.tls.SecurityException; import org.apache.bookkeeper.util.DiskChecker; +import org.apache.bookkeeper.util.LoggerOutput; +import org.apache.bookkeeper.util.PortManager; +import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; import org.apache.bookkeeper.zookeeper.ZooKeeperClient; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.data.Stat; -import org.junit.Assert; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TestName; -import org.powermock.reflect.Whitebox; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.slf4j.event.LoggingEvent; /** * Testing bookie initialization cases. @@ -91,9 +123,12 @@ public class BookieInitializationTest extends BookKeeperClusterTestCase { private static final Logger LOG = LoggerFactory .getLogger(BookieInitializationTest.class); + private static ObjectMapper om = new ObjectMapper(); + @Rule public final TestName runtime = new TestName(); - ZKMetadataBookieDriver driver; + @Rule + public LoggerOutput loggerOutput = new LoggerOutput(); public BookieInitializationTest() { super(0); @@ -104,51 +139,173 @@ public void setUp() throws Exception { String ledgersPath = "/ledgers" + runtime.getMethodName(); super.setUp(ledgersPath); zkUtil.createBKEnsemble(ledgersPath); - driver = new ZKMetadataBookieDriver(); } @Override public void tearDown() throws Exception { - if (driver != null) { - driver.close(); - } super.tearDown(); } + @Test + public void testOneJournalReplayForBookieRestartInReadOnlyMode() throws Exception { + testJournalReplayForBookieRestartInReadOnlyMode(1); + } + + @Test + public void testMultipleJournalReplayForBookieRestartInReadOnlyMode() throws Exception { + testJournalReplayForBookieRestartInReadOnlyMode(4); + } + + /** + * Tests that journal replay works correctly when bookie crashes and starts up in RO mode. + */ + private void testJournalReplayForBookieRestartInReadOnlyMode(int numOfJournalDirs) throws Exception { + File tmpLedgerDir = tmpDirs.createNew("DiskCheck", "test"); + File tmpJournalDir = tmpDirs.createNew("DiskCheck", "test"); + + String[] journalDirs = new String[numOfJournalDirs]; + for (int i = 0; i < numOfJournalDirs; i++) { + journalDirs[i] = tmpJournalDir.getAbsolutePath() + "/journal-" + i; + } + + final ServerConfiguration conf = newServerConfiguration() + .setJournalDirsName(journalDirs) + .setLedgerDirNames(new String[] { tmpLedgerDir.getPath() }) + .setDiskCheckInterval(1000) + .setLedgerStorageClass(SortedLedgerStorage.class.getName()) + .setAutoRecoveryDaemonEnabled(false) + .setZkTimeout(5000); + + BookieServer server = new MockBookieServer(conf); + server.start(); + + List lastLogMarkList = new ArrayList<>(journalDirs.length); + + for (int i = 0; i < journalDirs.length; i++) { + Journal journal = ((BookieImpl) server.getBookie()).journals.get(i); + // LastLogMark should be (0, 0) at the bookie clean start + journal.getLastLogMark().readLog(); + lastLogMarkList.add(journal.getLastLogMark().markLog()); + assertEquals(0, lastLogMarkList.get(i).getCurMark().compare(new LogMark(0, 0))); + } + + ClientConfiguration clientConf = new ClientConfiguration(); + clientConf.setMetadataServiceUri(metadataServiceUri); + BookKeeper bkClient = new BookKeeper(clientConf); + + // Create multiple ledgers for adding entries to multiple journals + for (int i = 0; i < journalDirs.length; i++) { + LedgerHandle lh = bkClient.createLedger(1, 1, 1, DigestType.CRC32, "passwd".getBytes()); + long entryId = -1; + // Ensure that we have non-zero number of entries + long numOfEntries = new Random().nextInt(10) + 3; + for (int j = 0; j < numOfEntries; j++) { + entryId = lh.addEntry("data".getBytes()); + } + assertEquals(entryId, (numOfEntries - 1)); + lh.close(); + } + + for (int i = 0; i < journalDirs.length; i++) { + Journal journal = ((BookieImpl) server.getBookie()).journals.get(i); + // In-memory LastLogMark should be updated with every write to journal + assertTrue(journal.getLastLogMark().getCurMark().compare(lastLogMarkList.get(i).getCurMark()) > 0); + lastLogMarkList.set(i, journal.getLastLogMark().markLog()); + } + + // Kill Bookie abruptly before entries are flushed to disk + server.shutdown(); + + conf.setDiskUsageThreshold(0.001f) + .setDiskUsageWarnThreshold(0.0f).setReadOnlyModeEnabled(true).setIsForceGCAllowWhenNoSpace(true) + .setMinUsableSizeForIndexFileCreation(5 * 1024); + server = new BookieServer( + conf, + TestBookieImpl.buildReadOnly(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); + + for (int i = 0; i < journalDirs.length; i++) { + Journal journal = ((BookieImpl) server.getBookie()).journals.get(i); + // LastLogMark should be (0, 0) before bookie restart since bookie crashed before persisting lastMark + assertEquals(0, journal.getLastLogMark().getCurMark().compare(new LogMark(0, 0))); + } + + int numOfRestarts = 3; + // Restart server multiple times to ensure that logs are never replayed and new files are not generated + for (int i = 0; i < numOfRestarts; i++) { + + int txnBefore = countNumOfFiles(conf.getJournalDirs(), "txn"); + int logBefore = countNumOfFiles(conf.getLedgerDirs(), "log"); + int idxBefore = countNumOfFiles(conf.getLedgerDirs(), "idx"); + + server.start(); + + for (int j = 0; j < journalDirs.length; j++) { + Journal journal = ((BookieImpl) server.getBookie()).journals.get(j); + assertTrue(journal.getLastLogMark().getCurMark().compare(lastLogMarkList.get(j).getCurMark()) > 0); + lastLogMarkList.set(j, journal.getLastLogMark().markLog()); + } + + server.shutdown(); + + // Every bookie restart initiates a new journal file + // Journals should not be replayed everytime since lastMark gets updated everytime + // New EntryLog files should not be generated. + assertEquals(journalDirs.length, (countNumOfFiles(conf.getJournalDirs(), "txn") - txnBefore)); + + // First restart should replay journal and generate new log/index files + // Subsequent runs should not generate new files (but can delete older ones) + if (i == 0) { + assertTrue((countNumOfFiles(conf.getLedgerDirs(), "log") - logBefore) > 0); + assertTrue((countNumOfFiles(conf.getLedgerDirs(), "idx") - idxBefore) > 0); + } else { + assertTrue((countNumOfFiles(conf.getLedgerDirs(), "log") - logBefore) <= 0); + assertTrue((countNumOfFiles(conf.getLedgerDirs(), "idx") - idxBefore) <= 0); + } + + server = new BookieServer( + conf, + TestBookieImpl.buildReadOnly(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); + } + bkClient.close(); + } + /** * Verify the bookie server exit code. On ZooKeeper exception, should return * exit code ZK_REG_FAIL = 4 */ @Test public void testExitCodeZK_REG_FAIL() throws Exception { - File tmpDir = createTempDir("bookie", "test"); + File tmpDir = tmpDirs.createNew("bookie", "test"); final ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); conf.setJournalDirName(tmpDir.getPath()) .setLedgerDirNames(new String[] { tmpDir.getPath() }) .setMetadataServiceUri(metadataServiceUri); - RegistrationManager rm = mock(RegistrationManager.class); - doThrow(new MetadataStoreException("mocked exception")) - .when(rm) - .registerBookie(anyString(), anyBoolean()); // simulating ZooKeeper exception by assigning a closed zk client to bk - BookieServer bkServer = new BookieServer(conf) { - protected Bookie newBookie(ServerConfiguration conf) - throws IOException, KeeperException, InterruptedException, - BookieException { - Bookie bookie = new Bookie(conf); - MetadataBookieDriver driver = Whitebox.getInternalState(bookie, "metadataDriver"); - ((ZKMetadataBookieDriver) driver).setRegManager(rm); - return bookie; - } - }; + MetadataBookieDriver metadataDriver = spy(BookieResources.createMetadataDriver(conf, NullStatsLogger.INSTANCE)); + RegistrationManager rm = spy(metadataDriver.createRegistrationManager()); + doThrow(new MetadataStoreException("mocked exception")) + .when(rm) + .registerBookie(any(BookieId.class), anyBoolean(), any(BookieServiceInfo.class)); + doReturn(rm) + .when(metadataDriver).createRegistrationManager(); + TestBookieImpl.Resources resources = new TestBookieImpl.ResourceBuilder(conf) + .withMetadataDriver(metadataDriver) + .withRegistrationManager(rm).build(); + BookieServer bkServer = new BookieServer(conf, new TestBookieImpl(resources), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); bkServer.start(); bkServer.join(); assertEquals("Failed to return ExitCode.ZK_REG_FAIL", - ExitCode.ZK_REG_FAIL, bkServer.getExitCode()); + ExitCode.ZK_REG_FAIL, bkServer.getExitCode()); } @Test @@ -157,20 +314,21 @@ public void testBookieRegistrationWithSameZooKeeperClient() throws Exception { conf.setMetadataServiceUri(metadataServiceUri) .setListeningInterface(null); - String bookieId = Bookie.getBookieAddress(conf).toString(); - - driver.initialize(conf, () -> {}, NullStatsLogger.INSTANCE); - try (StateManager manager = new BookieStateManager(conf, driver)) { + BookieId bookieId = BookieImpl.getBookieId(conf); + MetadataBookieDriver metadataDriver = BookieResources.createMetadataDriver( + conf, NullStatsLogger.INSTANCE); + try (RegistrationManager rm = metadataDriver.createRegistrationManager(); + StateManager manager = new BookieStateManager(conf, rm)) { manager.registerBookie(true).get(); assertTrue( "Bookie registration node doesn't exists!", - driver.getRegistrationManager().isBookieRegistered(bookieId)); + rm.isBookieRegistered(bookieId)); - // test register bookie again if the registeration node is created by itself. + // test register bookie again if the registration node is created by itself. manager.registerBookie(true).get(); assertTrue( "Bookie registration node doesn't exists!", - driver.getRegistrationManager().isBookieRegistered(bookieId)); + rm.isBookieRegistered(bookieId)); } } @@ -185,12 +343,15 @@ public void testBookieRegistration() throws Exception { conf.setMetadataServiceUri(metadataServiceUri) .setListeningInterface(null); - String bookieId = Bookie.getBookieAddress(conf).toString(); + String bookieId = BookieImpl.getBookieAddress(conf).toString(); final String bkRegPath = ZKMetadataDriverBase.resolveZkLedgersRootPath(conf) + "/" + AVAILABLE_NODE + "/" + bookieId; + MetadataBookieDriver metadataDriver = BookieResources.createMetadataDriver( + conf, NullStatsLogger.INSTANCE); + metadataDriver.initialize(conf, NullStatsLogger.INSTANCE); - driver.initialize(conf, () -> {}, NullStatsLogger.INSTANCE); - try (StateManager manager = new BookieStateManager(conf, driver)) { + try (RegistrationManager rm = metadataDriver.createRegistrationManager(); + StateManager manager = new BookieStateManager(conf, rm)) { manager.registerBookie(true).get(); } Stat bkRegNode1 = zkc.exists(bkRegPath, false); @@ -199,7 +360,7 @@ public void testBookieRegistration() throws Exception { // simulating bookie restart, on restart bookie will create new // zkclient and doing the registration. try (MetadataBookieDriver newDriver = new ZKMetadataBookieDriver()) { - newDriver.initialize(conf, () -> {}, NullStatsLogger.INSTANCE); + newDriver.initialize(conf, NullStatsLogger.INSTANCE); try (ZooKeeperClient newZk = createNewZKClient()) { // deleting the znode, so that the bookie registration should @@ -213,7 +374,9 @@ public void testBookieRegistration() throws Exception { LOG.error("Failed to delete the znode :" + bkRegPath, e); } }).start(); - try (StateManager newMgr = new BookieStateManager(conf, newDriver)) { + + try (RegistrationManager newRm = newDriver.createRegistrationManager(); + StateManager newMgr = new BookieStateManager(conf, newRm)) { newMgr.registerBookie(true).get(); } catch (IOException e) { Throwable t = e.getCause(); @@ -243,13 +406,17 @@ public void testBookieRegistrationWithFQDNHostNameAsBookieID() throws Exception .setUseHostNameAsBookieID(true) .setListeningInterface(null); - final String bookieId = InetAddress.getLocalHost().getCanonicalHostName() + ":" + conf.getBookiePort(); + final BookieId bookieId = + BookieId.parse(InetAddress.getLocalHost().getCanonicalHostName() + ":" + conf.getBookiePort()); - driver.initialize(conf, () -> {}, NullStatsLogger.INSTANCE); - try (StateManager manager = new BookieStateManager(conf, driver)) { + MetadataBookieDriver metadataDriver = BookieResources.createMetadataDriver( + conf, NullStatsLogger.INSTANCE); + + try (RegistrationManager rm = metadataDriver.createRegistrationManager(); + StateManager manager = new BookieStateManager(conf, rm)) { manager.registerBookie(true).get(); assertTrue("Bookie registration node doesn't exists!", - driver.getRegistrationManager().isBookieRegistered(bookieId)); + rm.isBookieRegistered(bookieId)); } } @@ -261,14 +428,16 @@ public void testBookieRegistrationWithShortHostNameAsBookieID() throws Exception .setUseShortHostName(true) .setListeningInterface(null); - final String bookieId = InetAddress.getLocalHost().getCanonicalHostName().split("\\.", 2)[0] - + ":" + conf.getBookiePort(); + final BookieId bookieId = BookieId.parse(InetAddress.getLocalHost().getCanonicalHostName().split("\\.", 2)[0] + + ":" + conf.getBookiePort()); + MetadataBookieDriver metadataDriver = BookieResources.createMetadataDriver( + conf, NullStatsLogger.INSTANCE); - driver.initialize(conf, () -> {}, NullStatsLogger.INSTANCE); - try (StateManager manager = new BookieStateManager(conf, driver)) { + try (RegistrationManager rm = metadataDriver.createRegistrationManager(); + StateManager manager = new BookieStateManager(conf, rm)) { manager.registerBookie(true).get(); assertTrue("Bookie registration node doesn't exists!", - driver.getRegistrationManager().isBookieRegistered(bookieId)); + rm.isBookieRegistered(bookieId)); } } @@ -283,15 +452,18 @@ public void testRegNodeExistsAfterSessionTimeOut() throws Exception { .setMetadataServiceUri(metadataServiceUri) .setListeningInterface(null); - String bookieId = InetAddress.getLocalHost().getHostAddress() + ":" - + conf.getBookiePort(); + BookieId bookieId = BookieId.parse(InetAddress.getLocalHost().getHostAddress() + ":" + + conf.getBookiePort()); String bkRegPath = ZKMetadataDriverBase.resolveZkLedgersRootPath(conf) + "/" + AVAILABLE_NODE + "/" + bookieId; - driver.initialize(conf, () -> {}, NullStatsLogger.INSTANCE); - try (StateManager manager = new BookieStateManager(conf, driver)) { + MetadataBookieDriver metadataDriver = BookieResources.createMetadataDriver( + conf, NullStatsLogger.INSTANCE); + + try (RegistrationManager rm = metadataDriver.createRegistrationManager(); + StateManager manager = new BookieStateManager(conf, rm)) { manager.registerBookie(true).get(); assertTrue("Bookie registration node doesn't exists!", - driver.getRegistrationManager().isBookieRegistered(bookieId)); + rm.isBookieRegistered(bookieId)); } Stat bkRegNode1 = zkc.exists(bkRegPath, false); assertNotNull("Bookie registration has been failed", @@ -300,8 +472,10 @@ public void testRegNodeExistsAfterSessionTimeOut() throws Exception { // simulating bookie restart, on restart bookie will create new // zkclient and doing the registration. try (MetadataBookieDriver newDriver = new ZKMetadataBookieDriver()) { - newDriver.initialize(conf, () -> {}, NullStatsLogger.INSTANCE); - try (StateManager newMgr = new BookieStateManager(conf, newDriver)) { + newDriver.initialize(conf, NullStatsLogger.INSTANCE); + + try (RegistrationManager newRm = newDriver.createRegistrationManager(); + StateManager newMgr = new BookieStateManager(conf, newRm)) { newMgr.registerBookie(true).get(); fail("Should throw NodeExistsException as the znode is not getting expired"); } catch (ExecutionException ee) { @@ -332,13 +506,59 @@ public void testRegNodeExistsAfterSessionTimeOut() throws Exception { } } + @Test(timeout = 20000) + public void testBookieRegistrationBookieServiceInfo() throws Exception { + final ServerConfiguration conf = TestBKConfiguration.newServerConfiguration() + .setMetadataServiceUri(metadataServiceUri) + .setUseHostNameAsBookieID(true) + .setUseShortHostName(true) + .setListeningInterface(null); + + final BookieId bookieId = BookieId.parse(InetAddress.getLocalHost().getCanonicalHostName().split("\\.", 2)[0] + + ":" + conf.getBookiePort()); + String bkRegPath = ZKMetadataDriverBase.resolveZkLedgersRootPath(conf) + "/" + AVAILABLE_NODE + "/" + bookieId; + + MetadataBookieDriver metadataDriver = BookieResources.createMetadataDriver( + conf, NullStatsLogger.INSTANCE); + + Endpoint endpoint = new Endpoint("test", 1281, "localhost", "bookie-rpc", + Collections.emptyList(), Collections.emptyList()); + BookieServiceInfo bsi = new BookieServiceInfo(Collections.emptyMap(), Arrays.asList(endpoint)); + Supplier supplier = () -> bsi; + + DiskChecker diskChecker = new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager( + conf, conf.getLedgerDirs(), diskChecker); + try (RegistrationManager rm = metadataDriver.createRegistrationManager(); + StateManager manager = new BookieStateManager(conf, + NullStatsLogger.INSTANCE, + rm, ledgerDirsManager, supplier)) { + manager.registerBookie(true).get(); + assertTrue("Bookie registration node doesn't exists!", + rm.isBookieRegistered(bookieId)); + } + Stat bkRegNode = zkc.exists(bkRegPath, false); + assertNotNull("Bookie registration has been failed", bkRegNode); + + byte[] bkRegNodeData = zkc.getData(bkRegPath, null, null); + assertFalse("Bookie service info not written", bkRegNodeData == null || bkRegNodeData.length == 0); + + BookieServiceInfoFormat serializedBookieServiceInfo = BookieServiceInfoFormat.parseFrom(bkRegNodeData); + BookieServiceInfoFormat.Endpoint serializedEndpoint = serializedBookieServiceInfo.getEndpoints(0); + assertNotNull("Serialized Bookie endpoint not found", serializedEndpoint); + + assertEquals(endpoint.getId(), serializedEndpoint.getId()); + assertEquals(endpoint.getHost(), serializedEndpoint.getHost()); + assertEquals(endpoint.getPort(), serializedEndpoint.getPort()); + } + /** * Verify user cannot start if user is in permittedStartupUsers conf list BKException BKUnauthorizedAccessException * if cannot start. */ @Test public void testUserNotPermittedToStart() throws Exception { - File tmpDir = createTempDir("bookie", "test"); + File tmpDir = tmpDirs.createNew("bookie", "test"); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); int port = PortManager.nextFreePort(); @@ -352,9 +572,13 @@ public void testUserNotPermittedToStart() throws Exception { boolean sawException = false; try { - bs1 = new BookieServer(conf); - Assert.fail("Bookkeeper should not have started since current user isn't in permittedStartupUsers"); - } catch (AccessControlException buae) { + bs1 = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); + + fail("Bookkeeper should not have started since current user isn't in permittedStartupUsers"); + } catch (BookieException.BookieUnauthorizedAccessException buae) { sawException = true; } finally { if (bs1 != null && bs1.isRunning()) { @@ -370,7 +594,7 @@ public void testUserNotPermittedToStart() throws Exception { */ @Test public void testUserPermittedToStart() throws Exception { - File tmpDir = createTempDir("bookie", "test"); + File tmpDir = tmpDirs.createNew("bookie", "test"); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); int port = PortManager.nextFreePort(); @@ -385,10 +609,13 @@ public void testUserPermittedToStart() throws Exception { String userString = "larry,,,curly ," + System.getProperty("user.name") + " ,moe"; conf.setPermittedStartupUsers(userString); try { - bs1 = new BookieServer(conf); + bs1 = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); bs1.start(); - } catch (AccessControlException buae) { - Assert.fail("Bookkeeper should have started since current user is in permittedStartupUsers"); + } catch (BookieException.BookieUnauthorizedAccessException buae) { + fail("Bookkeeper should have started since current user is in permittedStartupUsers"); } finally { if (bs1 != null && bs1.isRunning()) { bs1.shutdown(); @@ -399,10 +626,13 @@ public void testUserPermittedToStart() throws Exception { userString = "larry ,curly, moe," + System.getProperty("user.name") + ","; conf.setPermittedStartupUsers(userString); try { - bs1 = new BookieServer(conf); + bs1 = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); bs1.start(); - } catch (AccessControlException buae) { - Assert.fail("Bookkeeper should have started since current user is in permittedStartupUsers"); + } catch (BookieException.BookieUnauthorizedAccessException buae) { + fail("Bookkeeper should have started since current user is in permittedStartupUsers"); } finally { if (bs1 != null && bs1.isRunning()) { bs1.shutdown(); @@ -416,7 +646,7 @@ public void testUserPermittedToStart() throws Exception { */ @Test public void testUserPermittedToStartWithMissingProperty() throws Exception { - File tmpDir = createTempDir("bookie", "test"); + File tmpDir = tmpDirs.createNew("bookie", "test"); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); LOG.info("{}", conf); @@ -428,10 +658,13 @@ public void testUserPermittedToStartWithMissingProperty() throws Exception { .setLedgerDirNames(new String[] { tmpDir.getPath() }); BookieServer bs1 = null; try { - bs1 = new BookieServer(conf); + bs1 = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); bs1.start(); - } catch (AccessControlException buae) { - Assert.fail("Bookkeeper should have started since permittedStartupUser is not specified"); + } catch (BookieException.BookieUnauthorizedAccessException buae) { + fail("Bookkeeper should have started since permittedStartupUser is not specified"); } finally { if (bs1 != null && bs1.isRunning()) { bs1.shutdown(); @@ -445,7 +678,7 @@ public void testUserPermittedToStartWithMissingProperty() throws Exception { */ @Test public void testDuplicateBookieServerStartup() throws Exception { - File tmpDir = createTempDir("bookie", "test"); + File tmpDir = tmpDirs.createNew("bookie", "test"); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); int port = PortManager.nextFreePort(); @@ -453,12 +686,18 @@ public void testDuplicateBookieServerStartup() throws Exception { .setJournalDirName(tmpDir.getPath()) .setLedgerDirNames(new String[] { tmpDir.getPath() }) .setMetadataServiceUri(metadataServiceUri); - BookieServer bs1 = new BookieServer(conf); + BookieServer bs1 = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); bs1.start(); BookieServer bs2 = null; // starting bk server with same conf try { - bs2 = new BookieServer(conf); + bs2 = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); bs2.start(); fail("Should throw BindException, as the bk server is already running!"); } catch (BindException e) { @@ -476,7 +715,7 @@ public void testDuplicateBookieServerStartup() throws Exception { @Test public void testBookieServiceExceptionHandler() throws Exception { - File tmpDir = createTempDir("bookie", "exception-handler"); + File tmpDir = tmpDirs.createNew("bookie", "exception-handler"); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); int port = PortManager.nextFreePort(); conf.setBookiePort(port) @@ -485,7 +724,10 @@ public void testBookieServiceExceptionHandler() throws Exception { .setMetadataServiceUri(metadataServiceUri); BookieConfiguration bkConf = new BookieConfiguration(conf); - BookieService service = new BookieService(bkConf, NullStatsLogger.INSTANCE); + BookieService service = new BookieService( + bkConf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, BookieResources.createAllocator(conf), + new MockUncleanShutdownDetection()); CompletableFuture startFuture = ComponentStarter.startComponent(service); // shutdown the bookie service @@ -495,13 +737,150 @@ public void testBookieServiceExceptionHandler() throws Exception { startFuture.get(); } + /** + * Mock InterleavedLedgerStorage class where addEntry is mocked to throw + * OutOfMemoryError. + */ + public static class MockInterleavedLedgerStorage extends InterleavedLedgerStorage { + AtomicInteger atomicInt = new AtomicInteger(0); + + @Override + public long addEntry(ByteBuf entry) throws IOException { + if (atomicInt.incrementAndGet() == 10) { + throw new OutOfMemoryError("Some Injected Exception"); + } + return super.addEntry(entry); + } + } + + @Test + public void testBookieStartException() throws Exception { + File journalDir = tmpDirs.createNew("bookie", "journal"); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); + + File ledgerDir = tmpDirs.createNew("bookie", "ledger"); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); + + /* + * add few entries to journal file. + */ + int numOfEntries = 100; + writeV5Journal(BookieImpl.getCurrentDirectory(journalDir), numOfEntries, + "testV5Journal".getBytes()); + + /* + * This Bookie is configured to use MockInterleavedLedgerStorage. + * MockInterleavedLedgerStorage throws an Error for addEntry request. + * This is to simulate Bookie/BookieServer/BookieService 'start' failure + * because of 'Bookie.readJournal' failure. + */ + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + int port = PortManager.nextFreePort(); + conf.setBookiePort(port).setJournalDirName(journalDir.getPath()) + .setLedgerDirNames(new String[] { ledgerDir.getPath() }).setMetadataServiceUri(metadataServiceUri) + .setLedgerStorageClass(MockInterleavedLedgerStorage.class.getName()); + + BookieConfiguration bkConf = new BookieConfiguration(conf); + MetadataBookieDriver metadataDriver = BookieResources.createMetadataDriver( + conf, NullStatsLogger.INSTANCE); + try (RegistrationManager rm = metadataDriver.createRegistrationManager()) { + /* + * create cookie and write it to JournalDir/LedgerDir. + */ + String instanceId = rm.getClusterInstanceId(); + Cookie.Builder cookieBuilder = Cookie.generateCookie(conf).setInstanceId(instanceId); + Cookie cookie = cookieBuilder.build(); + cookie.writeToDirectory(new File(journalDir, "current")); + cookie.writeToDirectory(new File(ledgerDir, "current")); + Versioned newCookie = new Versioned<>( + cookie.toString().getBytes(UTF_8), Version.NEW + ); + rm.writeCookie(BookieImpl.getBookieId(conf), newCookie); + } + + /* + * Create LifecycleComponent for BookieServer and start it. + */ + LifecycleComponent server = Main.buildBookieServer(bkConf); + CompletableFuture startFuture = ComponentStarter.startComponent(server); + + /* + * Since Bookie/BookieServer/BookieService is expected to fail, it would + * cause bookie-server component's exceptionHandler to get triggered. + * This exceptionHandler will make sure all of the components to get + * closed and then finally completes the Future. + */ + startFuture.get(); + + /* + * make sure that Component's exceptionHandler is called by checking if + * the error message of ExceptionHandler is logged. This Log message is + * defined in anonymous exceptionHandler class defined in + * ComponentStarter.startComponent method. + */ + loggerOutput.expect((List logEvents) -> { + assertThat(logEvents, + hasItem(hasProperty("message", containsString("Triggered exceptionHandler of Component:")))); + }); + } + + /** + * Test that if the journal reads an entry with negative length, it shuts down + * the bookie normally. An admin should look to see what has + * happened in this case. + */ + @Test + public void testNegativeLengthEntryBookieShutdown() throws Exception { + File journalDir = tmpDirs.createNew("bookie", "journal"); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); + + File ledgerDir = tmpDirs.createNew("bookie", "ledger"); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); + + writeV5Journal(BookieImpl.getCurrentDirectory(journalDir), 5, + "testV5Journal".getBytes(), true); + + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(journalDir.getPath()) + .setLedgerDirNames(new String[] { ledgerDir.getPath() }) + .setMetadataServiceUri(null); + + Bookie b = null; + try { + b = new TestBookieImpl(conf); + b.start(); + assertFalse("Bookie should shutdown normally after catching IOException" + + " due to corrupt entry with negative length", b.isRunning()); + } finally { + if (b != null) { + b.shutdown(); + } + } + } + + @Test + public void testAutoRecoveryServiceExceptionHandler() throws Exception { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setMetadataServiceUri(metadataServiceUri); + + BookieConfiguration bkConf = new BookieConfiguration(conf); + AutoRecoveryService service = new AutoRecoveryService(bkConf, NullStatsLogger.INSTANCE); + CompletableFuture startFuture = ComponentStarter.startComponent(service); + + // shutdown the AutoRecovery service + service.getAutoRecoveryServer().shutdown(); + + // the AutoRecovery lifecycle component should be shutdown. + startFuture.get(); + } + /** * Verify bookie server starts up on ephemeral ports. */ @Test public void testBookieServerStartupOnEphemeralPorts() throws Exception { - File tmpDir1 = createTempDir("bookie", "test1"); - File tmpDir2 = createTempDir("bookie", "test2"); + File tmpDir1 = tmpDirs.createNew("bookie", "test1"); + File tmpDir2 = tmpDirs.createNew("bookie", "test2"); ServerConfiguration conf1 = TestBKConfiguration.newServerConfiguration(); conf1.setBookiePort(0) @@ -510,7 +889,10 @@ public void testBookieServerStartupOnEphemeralPorts() throws Exception { new String[] { tmpDir1.getPath() }) .setMetadataServiceUri(null); assertEquals(0, conf1.getBookiePort()); - BookieServer bs1 = new BookieServer(conf1); + BookieServer bs1 = new BookieServer( + conf1, new TestBookieImpl(conf1), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); bs1.start(); assertFalse(0 == conf1.getBookiePort()); @@ -521,7 +903,11 @@ public void testBookieServerStartupOnEphemeralPorts() throws Exception { .setLedgerDirNames( new String[] { tmpDir2.getPath() }) .setMetadataServiceUri(null); - BookieServer bs2 = new BookieServer(conf2); + BookieServer bs2 = new BookieServer( + conf2, new TestBookieImpl(conf2), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); + bs2.start(); assertFalse(0 == conf2.getBookiePort()); @@ -534,9 +920,9 @@ public void testBookieServerStartupOnEphemeralPorts() throws Exception { */ @Test public void testStartBookieWithoutZKServer() throws Exception { - zkUtil.killServer(); + zkUtil.killCluster(); - File tmpDir = createTempDir("bookie", "test"); + File tmpDir = tmpDirs.createNew("bookie", "test"); final ServerConfiguration conf = TestBKConfiguration.newServerConfiguration() .setJournalDirName(tmpDir.getPath()) @@ -544,7 +930,7 @@ public void testStartBookieWithoutZKServer() throws Exception { conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()).setZkTimeout(5000); try { - new Bookie(conf); + new TestBookieImpl(conf); fail("Should throw ConnectionLossException as ZKServer is not running!"); } catch (BookieException.MetadataStoreException e) { // expected behaviour @@ -557,7 +943,7 @@ public void testStartBookieWithoutZKServer() throws Exception { */ @Test public void testStartBookieWithoutZKInitialized() throws Exception { - File tmpDir = createTempDir("bookie", "test"); + File tmpDir = tmpDirs.createNew("bookie", "test"); final String zkRoot = "/ledgers2"; final ServerConfiguration conf = TestBKConfiguration.newServerConfiguration() @@ -566,7 +952,7 @@ public void testStartBookieWithoutZKInitialized() throws Exception { .setMetadataServiceUri(zkUtil.getMetadataServiceUri(zkRoot)) .setZkTimeout(5000); try { - new Bookie(conf); + new TestBookieImpl(conf); fail("Should throw NoNodeException"); } catch (Exception e) { // shouldn't be able to start @@ -575,7 +961,7 @@ public void testStartBookieWithoutZKInitialized() throws Exception { adminConf.setMetadataServiceUri(zkUtil.getMetadataServiceUri(zkRoot)); BookKeeperAdmin.format(adminConf, false, false); - Bookie b = new Bookie(conf); + Bookie b = new TestBookieImpl(conf); b.shutdown(); } @@ -584,7 +970,7 @@ public void testStartBookieWithoutZKInitialized() throws Exception { */ @Test public void testWithDiskFullReadOnlyDisabledOrForceGCAllowDisabled() throws Exception { - File tmpDir = createTempDir("DiskCheck", "test"); + File tmpDir = tmpDirs.createNew("DiskCheck", "test"); long usableSpace = tmpDir.getUsableSpace(); long totalSpace = tmpDir.getTotalSpace(); final ServerConfiguration conf = TestBKConfiguration.newServerConfiguration() @@ -603,7 +989,7 @@ public void testWithDiskFullReadOnlyDisabledOrForceGCAllowDisabled() throws Exce conf.setMinUsableSizeForEntryLogCreation(Long.MAX_VALUE) .setReadOnlyModeEnabled(false); try { - new Bookie(conf); + new TestBookieImpl(conf); fail("NoWritableLedgerDirException expected"); } catch (NoWritableLedgerDirException e) { // expected @@ -612,7 +998,7 @@ public void testWithDiskFullReadOnlyDisabledOrForceGCAllowDisabled() throws Exce conf.setMinUsableSizeForEntryLogCreation(Long.MIN_VALUE) .setReadOnlyModeEnabled(false); try { - new Bookie(conf); + new TestBookieImpl(conf); fail("NoWritableLedgerDirException expected"); } catch (NoWritableLedgerDirException e) { // expected @@ -624,7 +1010,7 @@ public void testWithDiskFullReadOnlyDisabledOrForceGCAllowDisabled() throws Exce try { // bookie is okay to start up when readonly mode is enabled because entry log file creation // is deferred. - bookie = new Bookie(conf); + bookie = new TestBookieImpl(conf); } catch (NoWritableLedgerDirException e) { fail("NoWritableLedgerDirException unexpected"); } finally { @@ -639,7 +1025,7 @@ public void testWithDiskFullReadOnlyDisabledOrForceGCAllowDisabled() throws Exce */ @Test public void testWithDiskFullReadOnlyEnabledAndForceGCAllowAllowed() throws Exception { - File tmpDir = createTempDir("DiskCheck", "test"); + File tmpDir = tmpDirs.createNew("DiskCheck", "test"); long usableSpace = tmpDir.getUsableSpace(); long totalSpace = tmpDir.getTotalSpace(); final ServerConfiguration conf = TestBKConfiguration.newServerConfiguration() @@ -656,7 +1042,7 @@ public void testWithDiskFullReadOnlyEnabledAndForceGCAllowAllowed() throws Excep // while replaying the journal) conf.setReadOnlyModeEnabled(true) .setIsForceGCAllowWhenNoSpace(true); - final Bookie bk = new Bookie(conf); + final Bookie bk = new TestBookieImpl(conf); bk.start(); Thread.sleep((conf.getDiskCheckInterval() * 2) + 100); @@ -664,26 +1050,50 @@ public void testWithDiskFullReadOnlyEnabledAndForceGCAllowAllowed() throws Excep bk.shutdown(); } + @Test + public void testStartUpRegisteredWithUncleanShutdownDetection() throws Exception { + MockUncleanShutdownDetection uncleanShutdownDetection = new MockUncleanShutdownDetection(); + final ServerConfiguration conf = newServerConfiguration(); + BookieServer server = new MockBookieServer(conf, uncleanShutdownDetection); + server.start(); + assertTrue(uncleanShutdownDetection.getStartRegistered()); + server.shutdown(); + } + + @Test + public void testShutdownRegisteredWithUncleanShutdownDetection() throws Exception { + MockUncleanShutdownDetection uncleanShutdownDetection = new MockUncleanShutdownDetection(); + final ServerConfiguration conf = newServerConfiguration(); + BookieServer server = new MockBookieServer(conf, uncleanShutdownDetection); + server.start(); + server.shutdown(); + assertTrue(uncleanShutdownDetection.getShutdownRegistered()); + } + class MockBookieServer extends BookieServer { ServerConfiguration conf; - public MockBookieServer(ServerConfiguration conf) throws IOException, KeeperException, InterruptedException, - BookieException, UnavailableException, CompatibilityException, SecurityException { - super(conf); + public MockBookieServer(ServerConfiguration conf) throws Exception { + super(conf, + new MockBookieWithNoopShutdown(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); this.conf = conf; } - @Override - protected Bookie newBookie(ServerConfiguration conf) - throws IOException, KeeperException, InterruptedException, BookieException { - return new MockBookieWithNoopShutdown(conf, NullStatsLogger.INSTANCE); + public MockBookieServer(ServerConfiguration conf, + MockUncleanShutdownDetection uncleanShutdownDetection) throws Exception { + super(conf, + new MockBookieWithNoopShutdown(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + uncleanShutdownDetection); + this.conf = conf; } } - class MockBookieWithNoopShutdown extends Bookie { - public MockBookieWithNoopShutdown(ServerConfiguration conf, StatsLogger statsLogger) - throws IOException, KeeperException, InterruptedException, BookieException { - super(conf, statsLogger); + class MockBookieWithNoopShutdown extends TestBookieImpl { + public MockBookieWithNoopShutdown(ServerConfiguration conf) throws Exception { + super(conf); } // making Bookie Shutdown no-op. Ideally for this testcase we need to @@ -699,7 +1109,7 @@ synchronized int shutdown(int exitCode) { @Test public void testWithDiskFullAndAbilityToCreateNewIndexFile() throws Exception { - File tmpDir = createTempDir("DiskCheck", "test"); + File tmpDir = tmpDirs.createNew("DiskCheck", "test"); final ServerConfiguration conf = newServerConfiguration() .setJournalDirName(tmpDir.getPath()) @@ -728,12 +1138,14 @@ public void testWithDiskFullAndAbilityToCreateNewIndexFile() throws Exception { // are injecting no-op shutdown. server.shutdown(); - long usableSpace = tmpDir.getUsableSpace(); - long totalSpace = tmpDir.getTotalSpace(); conf.setDiskUsageThreshold(0.001f) .setDiskUsageWarnThreshold(0.0f).setReadOnlyModeEnabled(true).setIsForceGCAllowWhenNoSpace(true) .setMinUsableSizeForIndexFileCreation(Long.MAX_VALUE); - server = new BookieServer(conf); + server = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); + // Now we are trying to start the Bookie, which tries to replay the // Journal. While replaying the Journal it tries to create the IndexFile // for the ledger (whose entries are not flushed). but since we set @@ -747,10 +1159,14 @@ public void testWithDiskFullAndAbilityToCreateNewIndexFile() throws Exception { // value. So if index dirs are full then it will consider the dirs which // have atleast MinUsableSizeForIndexFileCreation usable space for the // creation of new Index file. - conf.setMinUsableSizeForIndexFileCreation(5 * 1024); - server = new BookieServer(conf); + conf.setMinUsableSizeForIndexFileCreation(1 * 1024); + server = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); + server.start(); - Thread.sleep((conf.getDiskCheckInterval() * 2) + 100); + Thread.sleep((conf.getDiskCheckInterval() * 2) + 1000); assertTrue("Bookie should be up and running", server.getBookie().isRunning()); assertTrue(server.getBookie().isReadOnly()); server.shutdown(); @@ -762,7 +1178,7 @@ public void testWithDiskFullAndAbilityToCreateNewIndexFile() throws Exception { */ @Test public void testWithDiskError() throws Exception { - File parent = createTempDir("DiskCheck", "test"); + File parent = tmpDirs.createNew("DiskCheck", "test"); File child = File.createTempFile("DiskCheck", "test", parent); final ServerConfiguration conf = TestBKConfiguration.newServerConfiguration() .setJournalDirName(child.getPath()) @@ -775,7 +1191,8 @@ public void testWithDiskError() throws Exception { LedgerDirsManager ldm = new LedgerDirsManager(conf, conf.getLedgerDirs(), new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); LedgerDirsMonitor ledgerMonitor = new LedgerDirsMonitor(conf, - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()), ldm); + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()), + Collections.singletonList(ldm)); ledgerMonitor.init(); fail("should throw exception"); } catch (Exception e) { @@ -789,8 +1206,8 @@ public void testWithDiskError() throws Exception { */ @Test public void testAllowDiskPartitionDuplicationDisabled() throws Exception { - File tmpDir1 = createTempDir("bookie", "test"); - File tmpDir2 = createTempDir("bookie", "test"); + File tmpDir1 = tmpDirs.createNew("bookie", "test"); + File tmpDir2 = tmpDirs.createNew("bookie", "test"); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); int port = PortManager.nextFreePort(); @@ -804,7 +1221,10 @@ public void testAllowDiskPartitionDuplicationDisabled() throws Exception { .setAllowMultipleDirsUnderSameDiskPartition(false); BookieServer bs1 = null; try { - bs1 = new BookieServer(conf); + bs1 = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); fail("Bookkeeper should not have started since AllowMultipleDirsUnderSameDiskPartition is not enabled"); } catch (DiskPartitionDuplicationException dpde) { // Expected @@ -814,8 +1234,8 @@ public void testAllowDiskPartitionDuplicationDisabled() throws Exception { } } - tmpDir1 = createTempDir("bookie", "test"); - tmpDir2 = createTempDir("bookie", "test"); + tmpDir1 = tmpDirs.createNew("bookie", "test"); + tmpDir2 = tmpDirs.createNew("bookie", "test"); port = PortManager.nextFreePort(); // multiple indexdirs in same diskpartition conf.setMetadataServiceUri(metadataServiceUri) @@ -827,7 +1247,10 @@ public void testAllowDiskPartitionDuplicationDisabled() throws Exception { .setAllowMultipleDirsUnderSameDiskPartition(false); bs1 = null; try { - bs1 = new BookieServer(conf); + bs1 = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); fail("Bookkeeper should not have started since AllowMultipleDirsUnderSameDiskPartition is not enabled"); } catch (DiskPartitionDuplicationException dpde) { // Expected @@ -837,8 +1260,8 @@ public void testAllowDiskPartitionDuplicationDisabled() throws Exception { } } - tmpDir1 = createTempDir("bookie", "test"); - tmpDir2 = createTempDir("bookie", "test"); + tmpDir1 = tmpDirs.createNew("bookie", "test"); + tmpDir2 = tmpDirs.createNew("bookie", "test"); port = PortManager.nextFreePort(); // multiple journaldirs in same diskpartition conf.setMetadataServiceUri(metadataServiceUri) @@ -850,7 +1273,10 @@ public void testAllowDiskPartitionDuplicationDisabled() throws Exception { .setAllowMultipleDirsUnderSameDiskPartition(false); bs1 = null; try { - bs1 = new BookieServer(conf); + bs1 = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); fail("Bookkeeper should not have started since AllowMultipleDirsUnderSameDiskPartition is not enabled"); } catch (DiskPartitionDuplicationException dpde) { // Expected @@ -867,12 +1293,12 @@ public void testAllowDiskPartitionDuplicationDisabled() throws Exception { */ @Test public void testAllowDiskPartitionDuplicationAllowed() throws Exception { - File tmpDir1 = createTempDir("bookie", "test"); - File tmpDir2 = createTempDir("bookie", "test"); - File tmpDir3 = createTempDir("bookie", "test"); - File tmpDir4 = createTempDir("bookie", "test"); - File tmpDir5 = createTempDir("bookie", "test"); - File tmpDir6 = createTempDir("bookie", "test"); + File tmpDir1 = tmpDirs.createNew("bookie", "test"); + File tmpDir2 = tmpDirs.createNew("bookie", "test"); + File tmpDir3 = tmpDirs.createNew("bookie", "test"); + File tmpDir4 = tmpDirs.createNew("bookie", "test"); + File tmpDir5 = tmpDirs.createNew("bookie", "test"); + File tmpDir6 = tmpDirs.createNew("bookie", "test"); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); int port = 12555; @@ -885,7 +1311,11 @@ public void testAllowDiskPartitionDuplicationAllowed() throws Exception { conf.setAllowMultipleDirsUnderSameDiskPartition(true); BookieServer bs1 = null; try { - bs1 = new BookieServer(conf); + bs1 = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); + } catch (DiskPartitionDuplicationException dpde) { fail("Bookkeeper should have started since AllowMultipleDirsUnderSameDiskPartition is enabled"); } finally { @@ -897,7 +1327,9 @@ public void testAllowDiskPartitionDuplicationAllowed() throws Exception { private ZooKeeperClient createNewZKClient() throws Exception { // create a zookeeper client - LOG.debug("Instantiate ZK Client"); + if (LOG.isDebugEnabled()) { + LOG.debug("Instantiate ZK Client"); + } return ZooKeeperClient.newBuilder() .connectString(zkUtil.getZooKeeperConnectString()) .build(); @@ -909,32 +1341,41 @@ private ZooKeeperClient createNewZKClient() throws Exception { @Test(timeout = 10000) public void testPersistBookieStatus() throws Exception { // enable persistent bookie status - File tmpDir = createTempDir("bookie", "test"); + File tmpDir = tmpDirs.createNew("bookie", "test"); final ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); conf.setJournalDirName(tmpDir.getPath()) .setLedgerDirNames(new String[] { tmpDir.getPath() }) .setReadOnlyModeEnabled(true) .setPersistBookieStatusEnabled(true) .setMetadataServiceUri(metadataServiceUri); - BookieServer bookieServer = new BookieServer(conf); + BookieServer bookieServer = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); bookieServer.start(); Bookie bookie = bookieServer.getBookie(); assertFalse(bookie.isReadOnly()); // transition to readonly mode, bookie status should be persisted in ledger disks - bookie.getStateManager().doTransitionToReadOnlyMode(); + bookie.getStateManager().transitionToReadOnlyMode().get(); assertTrue(bookie.isReadOnly()); // restart bookie should start in read only mode bookieServer.shutdown(); - bookieServer = new BookieServer(conf); + bookieServer = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); bookieServer.start(); bookie = bookieServer.getBookie(); assertTrue(bookie.isReadOnly()); // transition to writable mode - bookie.getStateManager().doTransitionToWritableMode(); + bookie.getStateManager().transitionToWritableMode().get(); // restart bookie should start in writable mode bookieServer.shutdown(); - bookieServer = new BookieServer(conf); + bookieServer = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); bookieServer.start(); bookie = bookieServer.getBookie(); assertFalse(bookie.isReadOnly()); @@ -946,7 +1387,7 @@ public void testPersistBookieStatus() throws Exception { */ @Test(timeout = 10000) public void testReadOnlyBookieShouldIgnoreBookieStatus() throws Exception { - File tmpDir = createTempDir("bookie", "test"); + File tmpDir = tmpDirs.createNew("bookie", "test"); final ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); conf.setJournalDirName(tmpDir.getPath()) .setLedgerDirNames(new String[] { tmpDir.getPath() }) @@ -954,24 +1395,33 @@ public void testReadOnlyBookieShouldIgnoreBookieStatus() throws Exception { .setPersistBookieStatusEnabled(true) .setMetadataServiceUri(metadataServiceUri); // start new bookie - BookieServer bookieServer = new BookieServer(conf); + BookieServer bookieServer = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); bookieServer.start(); Bookie bookie = bookieServer.getBookie(); // persist bookie status - bookie.getStateManager().doTransitionToReadOnlyMode(); - bookie.getStateManager().doTransitionToWritableMode(); + bookie.getStateManager().transitionToReadOnlyMode().get(); + bookie.getStateManager().transitionToWritableMode().get(); assertFalse(bookie.isReadOnly()); bookieServer.shutdown(); // start read only bookie final ServerConfiguration readOnlyConf = TestBKConfiguration.newServerConfiguration(); readOnlyConf.loadConf(conf); readOnlyConf.setForceReadOnlyBookie(true); - bookieServer = new BookieServer(readOnlyConf); + + bookieServer = new BookieServer( + readOnlyConf, + TestBookieImpl.buildReadOnly(readOnlyConf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); + bookieServer.start(); bookie = bookieServer.getBookie(); assertTrue(bookie.isReadOnly()); // transition to writable should fail - bookie.getStateManager().doTransitionToWritableMode(); + bookie.getStateManager().transitionToWritableMode().get(); assertTrue(bookie.isReadOnly()); bookieServer.shutdown(); } @@ -985,7 +1435,7 @@ public void testRetrieveBookieStatusWhenStatusFileIsCorrupted() throws Exception File[] tmpLedgerDirs = new File[3]; String[] filePath = new String[tmpLedgerDirs.length]; for (int i = 0; i < tmpLedgerDirs.length; i++) { - tmpLedgerDirs[i] = createTempDir("bookie", "test" + i); + tmpLedgerDirs[i] = tmpDirs.createNew("bookie", "test" + i); filePath[i] = tmpLedgerDirs[i].getPath(); } final ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); @@ -995,20 +1445,26 @@ public void testRetrieveBookieStatusWhenStatusFileIsCorrupted() throws Exception .setPersistBookieStatusEnabled(true) .setMetadataServiceUri(metadataServiceUri); // start a new bookie - BookieServer bookieServer = new BookieServer(conf); + BookieServer bookieServer = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); bookieServer.start(); // transition in to read only and persist the status on disk - Bookie bookie = bookieServer.getBookie(); + Bookie bookie = (BookieImpl) bookieServer.getBookie(); assertFalse(bookie.isReadOnly()); - bookie.getStateManager().doTransitionToReadOnlyMode(); + bookie.getStateManager().transitionToReadOnlyMode().get(); assertTrue(bookie.isReadOnly()); // corrupt status file - List ledgerDirs = bookie.getLedgerDirsManager().getAllLedgerDirs(); + List ledgerDirs = ((BookieImpl) bookie).getLedgerDirsManager().getAllLedgerDirs(); corruptFile(new File(ledgerDirs.get(0), BOOKIE_STATUS_FILENAME)); corruptFile(new File(ledgerDirs.get(1), BOOKIE_STATUS_FILENAME)); // restart the bookie should be in read only mode bookieServer.shutdown(); - bookieServer = new BookieServer(conf); + bookieServer = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); bookieServer.start(); bookie = bookieServer.getBookie(); assertTrue(bookie.isReadOnly()); @@ -1024,7 +1480,7 @@ public void testReadLatestBookieStatus() throws Exception { File[] tmpLedgerDirs = new File[3]; String[] filePath = new String[tmpLedgerDirs.length]; for (int i = 0; i < tmpLedgerDirs.length; i++) { - tmpLedgerDirs[i] = createTempDir("bookie", "test" + i); + tmpLedgerDirs[i] = tmpDirs.createNew("bookie", "test" + i); filePath[i] = tmpLedgerDirs[i].getPath(); } final ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); @@ -1034,22 +1490,28 @@ public void testReadLatestBookieStatus() throws Exception { .setPersistBookieStatusEnabled(true) .setMetadataServiceUri(metadataServiceUri); // start a new bookie - BookieServer bookieServer = new BookieServer(conf); + BookieServer bookieServer = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); bookieServer.start(); // transition in to read only and persist the status on disk - Bookie bookie = bookieServer.getBookie(); + Bookie bookie = (BookieImpl) bookieServer.getBookie(); assertFalse(bookie.isReadOnly()); - bookie.getStateManager().doTransitionToReadOnlyMode(); + bookie.getStateManager().transitionToReadOnlyMode().get(); assertTrue(bookie.isReadOnly()); // Manually update a status file, so it becomes the latest Thread.sleep(1); BookieStatus status = new BookieStatus(); List dirs = new ArrayList(); - dirs.add(bookie.getLedgerDirsManager().getAllLedgerDirs().get(0)); + dirs.add(((BookieImpl) bookie).getLedgerDirsManager().getAllLedgerDirs().get(0)); status.writeToDirectories(dirs); // restart the bookie should start in writable state bookieServer.shutdown(); - bookieServer = new BookieServer(conf); + bookieServer = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); bookieServer.start(); bookie = bookieServer.getBookie(); assertFalse(bookie.isReadOnly()); @@ -1072,4 +1534,240 @@ private void corruptFile(File file) throws IOException { } } + @Test + public void testIOVertexHTTPServerEndpointForBookieWithPrometheusProvider() throws Exception { + File tmpDir = tmpDirs.createNew("bookie", "test"); + + final ServerConfiguration conf = TestBKConfiguration.newServerConfiguration() + .setJournalDirName(tmpDir.getPath()).setLedgerDirNames(new String[] { tmpDir.getPath() }) + .setBookiePort(PortManager.nextFreePort()).setMetadataServiceUri(metadataServiceUri) + .setListeningInterface(null); + + /* + * enable io.vertx http server + */ + int nextFreePort = PortManager.nextFreePort(); + conf.setStatsProviderClass(PrometheusMetricsProvider.class); + conf.setHttpServerEnabled(true); + conf.setProperty(HttpServerLoader.HTTP_SERVER_CLASS, "org.apache.bookkeeper.http.vertx.VertxHttpServer"); + conf.setHttpServerPort(nextFreePort); + + // 1. building the component stack: + LifecycleComponent server = Main.buildBookieServer(new BookieConfiguration(conf)); + // 2. start the server + CompletableFuture stackComponentFuture = ComponentStarter.startComponent(server); + while (server.lifecycleState() != Lifecycle.State.STARTED) { + Thread.sleep(100); + } + + // Now, hit the rest endpoint for metrics + URL url = new URL("http://localhost:" + nextFreePort + HttpRouter.METRICS); + URLConnection urlc = url.openConnection(); + BufferedReader in = new BufferedReader(new InputStreamReader(urlc.getInputStream())); + String inputLine; + StringBuilder metricsStringBuilder = new StringBuilder(); + while ((inputLine = in.readLine()) != null) { + metricsStringBuilder.append(inputLine); + } + in.close(); + String metrics = metricsStringBuilder.toString(); + // do primitive checks if metrics string contains some stats + assertTrue("Metrics should contain basic counters", metrics.contains(BookKeeperServerStats.BOOKIE_ADD_ENTRY)); + + // Now, hit the rest endpoint for configs + url = new URL("http://localhost:" + nextFreePort + HttpRouter.SERVER_CONFIG); + @SuppressWarnings("unchecked") + Map configMap = om.readValue(url, Map.class); + if (configMap.isEmpty() || !configMap.containsKey("bookiePort")) { + fail("Failed to map configurations to valid JSON entries."); + } + stackComponentFuture.cancel(true); + } + + @Test + public void testIOVertexHTTPServerEndpointForARWithPrometheusProvider() throws Exception { + final ServerConfiguration conf = TestBKConfiguration.newServerConfiguration() + .setMetadataServiceUri(metadataServiceUri).setListeningInterface(null); + + /* + * enable io.vertx http server + */ + int nextFreePort = PortManager.nextFreePort(); + conf.setStatsProviderClass(PrometheusMetricsProvider.class); + conf.setHttpServerEnabled(true); + conf.setProperty(HttpServerLoader.HTTP_SERVER_CLASS, "org.apache.bookkeeper.http.vertx.VertxHttpServer"); + conf.setHttpServerPort(nextFreePort); + + // 1. building the component stack: + LifecycleComponent server = AutoRecoveryMain.buildAutoRecoveryServer(new BookieConfiguration(conf)); + // 2. start the server + CompletableFuture stackComponentFuture = ComponentStarter.startComponent(server); + while (server.lifecycleState() != Lifecycle.State.STARTED) { + Thread.sleep(100); + } + + // Now, hit the rest endpoint for metrics + URL url = new URL("http://localhost:" + nextFreePort + HttpRouter.METRICS); + URLConnection urlc = url.openConnection(); + BufferedReader in = new BufferedReader(new InputStreamReader(urlc.getInputStream())); + String inputLine; + StringBuilder metricsStringBuilder = new StringBuilder(); + while ((inputLine = in.readLine()) != null) { + metricsStringBuilder.append(inputLine); + } + in.close(); + String metrics = metricsStringBuilder.toString(); + // do primitive checks if metrics string contains some stats + assertTrue("Metrics should contain basic counters", + metrics.contains(ReplicationStats.NUM_FULL_OR_PARTIAL_LEDGERS_REPLICATED)); + assertTrue("Metrics should contain basic counters from BookKeeper client", + metrics.contains(BookKeeperClientStats.CREATE_OP)); + + // Now, hit the rest endpoint for configs + url = new URL("http://localhost:" + nextFreePort + HttpRouter.SERVER_CONFIG); + @SuppressWarnings("unchecked") + Map configMap = om.readValue(url, Map.class); + if (configMap.isEmpty() || !configMap.containsKey("metadataServiceUri")) { + fail("Failed to map configurations to valid JSON entries."); + } + stackComponentFuture.cancel(true); + } + + /** + * Test that verifies if a bookie can't come up without its cookie in metadata store. + * @throws Exception + */ + @Test + public void testBookieConnectAfterCookieDelete() throws BookieException.UpgradeException { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + + try { + runFunctionWithRegistrationManager(conf, rm -> { + try { + bookieConnectAfterCookieDeleteWorker(conf, rm); + } catch (Exception e) { + fail("Test failed to run: " + e.getMessage()); + } + return null; + }); + } catch (MetadataException | ExecutionException e) { + throw new BookieException.UpgradeException(e); + } + } + + private void bookieConnectAfterCookieDeleteWorker(ServerConfiguration conf, RegistrationManager rm) + throws Exception { + + File tmpLedgerDir = tmpDirs.createNew("BootupTest", "test"); + File tmpJournalDir = tmpDirs.createNew("BootupTest", "test"); + Integer numOfJournalDirs = 2; + + String[] journalDirs = new String[numOfJournalDirs]; + for (int i = 0; i < numOfJournalDirs; i++) { + journalDirs[i] = tmpJournalDir.getAbsolutePath() + "/journal-" + i; + } + + conf.setJournalDirsName(journalDirs); + conf.setLedgerDirNames(new String[] { tmpLedgerDir.getPath() }); + + LifecycleComponent server = Main.buildBookieServer(new BookieConfiguration(conf)); + server.start(); + + final BookieId bookieAddress = BookieImpl.getBookieId(conf); + + // Read cookie from registration manager + Versioned rmCookie = Cookie.readFromRegistrationManager(rm, bookieAddress); + + // Shutdown bookie + server.stop(); + + // Remove cookie from registration manager + rmCookie.getValue().deleteFromRegistrationManager(rm, conf, rmCookie.getVersion()); + + try { + Main.buildBookieServer(new BookieConfiguration(conf)); + fail("Bookie should not have been buildable. Cookie no present in metadata store."); + } catch (Exception e) { + LOG.info("As expected Bookie fails to be built without a cookie in metadata store."); + } + } + + @Test + public void testInvalidServiceMetadataURI() throws Exception { + testInvalidServiceMetadataURICase("zk+null:///ledgers"); // no hostname + testInvalidServiceMetadataURICase("zk+null://ledgers"); + testInvalidServiceMetadataURICase("zk+null:ledgers"); + { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setMetadataServiceUri("//ledgers"); + try { + new BookieServer(conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); + fail("Bookie metadata initialization must fail with metadata service uri: //ledgers"); + } catch (NullPointerException e) { + assertTrue(e.getMessage().contains("Invalid metadata service uri : //ledgers")); + } + } + + { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setMetadataServiceUri(""); + try { + new BookieServer(conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); + fail("Bookie metadata initialization must fail with empty metadata service uri"); + } catch (NullPointerException e) { + assertTrue(e.getMessage().contains("Invalid metadata service uri :")); + } + } + } + + private void testInvalidServiceMetadataURICase(String uri) throws Exception { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setMetadataServiceUri(uri); + try { + new BookieServer(conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); + fail("Bookie metadata initialization must fail with an invalid metadata service uri: " + uri); + } catch (MetadataStoreException e) { + // ok + } + } + + @Test + public void testBookieIdSetting() throws Exception { + String customBookieId = "customId"; + // If BookieID is set, it takes precedence over network info. + final ServerConfiguration conf = newServerConfiguration().setBookieId(customBookieId); + BookieServer server = new MockBookieServer(conf); + server.start(); + assertEquals(customBookieId, server.getBookieId().toString()); + server.shutdown(); + } + + @Test + public void testBookieIdChange() throws Exception { + // By default, network info is set as Bookie Id and it is stored in the Cookie. + final ServerConfiguration conf = newServerConfiguration(); + LifecycleComponent server = Main.buildBookieServer(new BookieConfiguration(conf)); + server.start(); + server.stop(); + + // If BookieID is set, it takes precedence over network info. Because of that, the new Bookie start + // should fail with an InvalidCookieException, as now the custom BookieID takes precedence. + String customBookieId = "customId"; + conf.setBookieId(customBookieId); + try { + Main.buildBookieServer(new BookieConfiguration(conf)); + } catch (BookieException.InvalidCookieException e) { + // This is the expected case, as the customBookieId prevails over the default one. + } catch (Exception e) { + // Unexpected exception, failing. + fail(); + } + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalBypassTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalBypassTest.java new file mode 100644 index 00000000000..f1b8c2a3153 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalBypassTest.java @@ -0,0 +1,108 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie; + +import static org.junit.Assert.assertEquals; + +import lombok.Cleanup; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.client.BookKeeper; +import org.apache.bookkeeper.client.api.WriteHandle; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.junit.Test; + +/** + * Tests that we're skipping journal when it's configured to do so. + */ +@Slf4j +public class BookieJournalBypassTest extends BookKeeperClusterTestCase { + + private int bookieIdx = 0; + + public BookieJournalBypassTest() { + super(2); + } + + @Override + protected ServerTester startBookie(ServerConfiguration conf) throws Exception { + if (bookieIdx++ == 0) { + // First bookie will have the journal disabled + conf.setJournalWriteData(false); + } + return super.startBookie(conf); + } + + @Test + public void testJournalBypass() throws Exception { + ClientConfiguration conf = new ClientConfiguration(baseClientConf); + + BookieImpl bookieImpl = (BookieImpl) serverByIndex(0).getBookie(); + Journal journal0 = bookieImpl.journals.get(0); + LedgerStorage ls0 = serverByIndex(0).getBookie().getLedgerStorage(); + + bookieImpl = (BookieImpl) serverByIndex(1).getBookie(); + Journal journal1 = bookieImpl.journals.get(0); + LedgerStorage ls1 = serverByIndex(1).getBookie().getLedgerStorage(); + + ls0.flush(); + ls1.flush(); + + long bk0OffsetBefore = journal0.getLastLogMark().getCurMark().getLogFileOffset(); + long bk1OffsetBefore = journal1.getLastLogMark().getCurMark().getLogFileOffset(); + + writeEntries(conf); + ls0.flush(); + ls1.flush(); + + long bk0OffsetAfter = journal0.getLastLogMark().getCurMark().getLogFileOffset(); + long bk1OffsetAfter = journal1.getLastLogMark().getCurMark().getLogFileOffset(); + + int flushDelta = 10 * 1024; + int dataSize = 10 * 1024 * 1024; + + // Offset for journal-0 will be very close to previous point, just few KBs when flushing + assertEquals(bk0OffsetBefore, bk0OffsetAfter, flushDelta); + + // Offset for journal-0 should have changed with the data size + assertEquals(bk1OffsetBefore + dataSize, bk1OffsetAfter, flushDelta); + } + + private void writeEntries(ClientConfiguration conf) + throws Exception { + @Cleanup + BookKeeper bkc = new BookKeeper(conf); + + @Cleanup + WriteHandle wh = bkc.newCreateLedgerOp() + .withEnsembleSize(2) + .withWriteQuorumSize(2) + .withAckQuorumSize(2) + .withPassword("".getBytes()) + .execute() + .join(); + + for (int i = 0; i < 10; i++) { + wh.append(new byte[1024 * 1024]); + } + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalForceTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalForceTest.java index 76daa7d56ad..dee9502bda9 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalForceTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalForceTest.java @@ -23,28 +23,30 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.nullable; import static org.mockito.Mockito.atLeast; import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; -import static org.powermock.api.mockito.PowerMockito.whenNew; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; import java.io.File; -import java.util.concurrent.BlockingQueue; import java.util.concurrent.CountDownLatch; -import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; import lombok.extern.slf4j.Slf4j; import org.apache.bookkeeper.bookie.Journal.ForceWriteRequest; import org.apache.bookkeeper.bookie.Journal.LastLogMark; +import org.apache.bookkeeper.bookie.stats.JournalStats; +import org.apache.bookkeeper.common.collections.BatchedArrayBlockingQueue; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.conf.TestBKConfiguration; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback; import org.apache.bookkeeper.stats.Counter; import org.apache.bookkeeper.test.TestStatsProvider; @@ -53,16 +55,13 @@ import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; import org.mockito.invocation.InvocationOnMock; +import org.mockito.junit.MockitoJUnitRunner; import org.mockito.stubbing.Answer; -import org.powermock.core.classloader.annotations.PrepareForTest; -import org.powermock.modules.junit4.PowerMockRunner; -import org.powermock.reflect.Whitebox; /** * Test the bookie journal. */ -@RunWith(PowerMockRunner.class) -@PrepareForTest({JournalChannel.class, Journal.class}) +@RunWith(MockitoJUnitRunner.class) @Slf4j public class BookieJournalForceTest { @@ -74,24 +73,24 @@ public class BookieJournalForceTest { @Test public void testAckAfterSync() throws Exception { File journalDir = tempDir.newFolder(); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(journalDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration() .setJournalDirName(journalDir.getPath()) .setMetadataServiceUri(null) .setJournalAdaptiveGroupWrites(false); - JournalChannel jc = spy(new JournalChannel(journalDir, 1)); - whenNew(JournalChannel.class).withAnyArguments().thenReturn(jc); - LedgerDirsManager ledgerDirsManager = mock(LedgerDirsManager.class); Journal journal = new Journal(0, journalDir, conf, ledgerDirsManager); - // machinery to suspend ForceWriteThread CountDownLatch forceWriteThreadSuspendedLatch = new CountDownLatch(1); - LinkedBlockingQueue supportQueue = + BatchedArrayBlockingQueue supportQueue = enableForceWriteThreadSuspension(forceWriteThreadSuspendedLatch, journal); + journal = spy(journal); + JournalChannel jc = spy(new JournalChannel(journalDir, 1)); + doReturn(jc).when(journal).newLogFile(anyLong(), nullable(Long.class)); + journal.start(); LogMark lastLogMarkBeforeWrite = journal.getLastLogMark().markLog().getCurMark(); @@ -100,7 +99,7 @@ public void testAckAfterSync() throws Exception { long entryId = 0; journal.logAddEntry(ledgerId, entryId, DATA, false /* ackBeforeSync */, new WriteCallback() { @Override - public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddress addr, Object ctx) { + public void writeComplete(int rc, long ledgerId, long entryId, BookieId addr, Object ctx) { latch.countDown(); } }, null); @@ -137,22 +136,24 @@ public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddre @Test public void testAckBeforeSync() throws Exception { File journalDir = tempDir.newFolder(); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(journalDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); conf.setJournalDirName(journalDir.getPath()) .setMetadataServiceUri(null) .setJournalAdaptiveGroupWrites(false); - JournalChannel jc = spy(new JournalChannel(journalDir, 1)); - whenNew(JournalChannel.class).withAnyArguments().thenReturn(jc); - LedgerDirsManager ledgerDirsManager = mock(LedgerDirsManager.class); Journal journal = new Journal(0, journalDir, conf, ledgerDirsManager); - // machinery to suspend ForceWriteThread CountDownLatch forceWriteThreadSuspendedLatch = new CountDownLatch(1); - enableForceWriteThreadSuspension(forceWriteThreadSuspendedLatch, journal); + BatchedArrayBlockingQueue supportQueue = + enableForceWriteThreadSuspension(forceWriteThreadSuspendedLatch, journal); + + journal = spy(journal); + JournalChannel jc = spy(new JournalChannel(journalDir, 1)); + doReturn(jc).when(journal).newLogFile(anyLong(), nullable(Long.class)); + journal.start(); LogMark lastLogMarkBeforeWrite = journal.getLastLogMark().markLog().getCurMark(); @@ -161,7 +162,7 @@ public void testAckBeforeSync() throws Exception { long entryId = 0; journal.logAddEntry(ledgerId, entryId, DATA, true /* ackBeforeSync */, new WriteCallback() { @Override - public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddress addr, Object ctx) { + public void writeComplete(int rc, long ledgerId, long entryId, BookieId addr, Object ctx) { latch.countDown(); } }, null); @@ -188,7 +189,7 @@ public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddre @Test public void testAckBeforeSyncWithJournalBufferedEntriesThreshold() throws Exception { File journalDir = tempDir.newFolder(); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(journalDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); final int journalBufferedEntriesThreshold = 10; // sending a burst of entries, more than journalBufferedEntriesThreshold @@ -200,20 +201,21 @@ public void testAckBeforeSyncWithJournalBufferedEntriesThreshold() throws Except .setMetadataServiceUri(null) .setJournalAdaptiveGroupWrites(false); - JournalChannel jc = spy(new JournalChannel(journalDir, 1)); - whenNew(JournalChannel.class).withAnyArguments().thenReturn(jc); - LedgerDirsManager ledgerDirsManager = mock(LedgerDirsManager.class); Journal journal = new Journal(0, journalDir, conf, ledgerDirsManager); - // machinery to suspend ForceWriteThread CountDownLatch forceWriteThreadSuspendedLatch = new CountDownLatch(1); enableForceWriteThreadSuspension(forceWriteThreadSuspendedLatch, journal); + journal = spy(journal); + JournalChannel jc = spy(new JournalChannel(journalDir, 1)); + doReturn(jc).when(journal).newLogFile(anyLong(), nullable(Long.class)); + + JournalStats journalStats = journal.getJournalStats(); TestStatsProvider testStatsProvider = new TestStatsProvider(); Counter flushMaxOutstandingBytesCounter = testStatsProvider.getStatsLogger("test") .getCounter("flushMaxOutstandingBytesCounter"); - Whitebox.setInternalState(journal, "flushMaxOutstandingBytesCounter", flushMaxOutstandingBytesCounter); + journalStats.setFlushMaxOutstandingBytesCounter(flushMaxOutstandingBytesCounter); journal.start(); @@ -223,7 +225,7 @@ public void testAckBeforeSyncWithJournalBufferedEntriesThreshold() throws Except for (long entryId = 0; entryId < numEntries; entryId++) { journal.logAddEntry(ledgerId, entryId, DATA, true /* ackBeforeSync */, new WriteCallback() { @Override - public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddress addr, Object ctx) { + public void writeComplete(int rc, long ledgerId, long entryId, BookieId addr, Object ctx) { latch.countDown(); } }, null); @@ -253,17 +255,18 @@ public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddre @Test public void testInterleavedRequests() throws Exception { File journalDir = tempDir.newFolder(); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(journalDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); conf.setJournalDirName(journalDir.getPath()) .setMetadataServiceUri(null); JournalChannel jc = spy(new JournalChannel(journalDir, 1)); - whenNew(JournalChannel.class).withAnyArguments().thenReturn(jc); LedgerDirsManager ledgerDirsManager = mock(LedgerDirsManager.class); - Journal journal = new Journal(0, journalDir, conf, ledgerDirsManager); + Journal journal = spy(new Journal(0, journalDir, conf, ledgerDirsManager)); + doReturn(jc).when(journal).newLogFile(anyLong(), nullable(Long.class)); + journal.start(); final int numEntries = 100; @@ -275,13 +278,13 @@ public void testInterleavedRequests() throws Exception { for (long entryId = 0; entryId < numEntries; entryId++) { journal.logAddEntry(ledgerIdAckBeforeSync, entryId, DATA, true, new WriteCallback() { @Override - public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddress addr, Object ctx) { + public void writeComplete(int rc, long ledgerId, long entryId, BookieId addr, Object ctx) { latchAckBeforeSynch.countDown(); } }, null); journal.logAddEntry(ledgerIdAckAfterSync, entryId, DATA, false, new WriteCallback() { @Override - public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddress addr, Object ctx) { + public void writeComplete(int rc, long ledgerId, long entryId, BookieId addr, Object ctx) { latchAckAfterSynch.countDown(); } }, null); @@ -299,43 +302,44 @@ public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddre } @SuppressWarnings("unchecked") - private LinkedBlockingQueue enableForceWriteThreadSuspension( + private BatchedArrayBlockingQueue enableForceWriteThreadSuspension( CountDownLatch forceWriteThreadSuspendedLatch, Journal journal) throws InterruptedException { - LinkedBlockingQueue supportQueue = new LinkedBlockingQueue<>(); - BlockingQueue forceWriteRequests = mock(BlockingQueue.class); + BatchedArrayBlockingQueue supportQueue = new BatchedArrayBlockingQueue<>(10000); + BatchedArrayBlockingQueue forceWriteRequests = mock(BatchedArrayBlockingQueue.class); doAnswer((Answer) (InvocationOnMock iom) -> { supportQueue.put(iom.getArgument(0)); return null; }).when(forceWriteRequests).put(any(ForceWriteRequest.class)); - when(forceWriteRequests.take()).thenAnswer(i -> { - // suspend the force write thread + doAnswer((Answer) (InvocationOnMock iom) -> { forceWriteThreadSuspendedLatch.await(); - return supportQueue.take(); - }); - Whitebox.setInternalState(journal, "forceWriteRequests", forceWriteRequests); + ForceWriteRequest[] array = iom.getArgument(0); + return supportQueue.takeAll(array); + }).when(forceWriteRequests).takeAll(any()); + journal.setForceWriteRequests(forceWriteRequests); return supportQueue; } @Test public void testForceLedger() throws Exception { File journalDir = tempDir.newFolder(); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(journalDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); conf.setJournalDirName(journalDir.getPath()); conf.setJournalAdaptiveGroupWrites(false); - JournalChannel jc = spy(new JournalChannel(journalDir, 1)); - whenNew(JournalChannel.class).withAnyArguments().thenReturn(jc); - LedgerDirsManager ledgerDirsManager = mock(LedgerDirsManager.class); Journal journal = new Journal(0, journalDir, conf, ledgerDirsManager); - // machinery to suspend ForceWriteThread CountDownLatch forceWriteThreadSuspendedLatch = new CountDownLatch(1); - LinkedBlockingQueue supportQueue = + BatchedArrayBlockingQueue supportQueue = enableForceWriteThreadSuspension(forceWriteThreadSuspendedLatch, journal); + + JournalChannel jc = spy(new JournalChannel(journalDir, 1)); + journal = spy(journal); + doReturn(jc).when(journal).newLogFile(anyLong(), nullable(Long.class)); + journal.start(); LogMark lastLogMarkBeforeWrite = journal.getLastLogMark().markLog().getCurMark(); @@ -343,7 +347,7 @@ public void testForceLedger() throws Exception { long ledgerId = 1; journal.forceLedger(ledgerId, new WriteCallback() { @Override - public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddress addr, Object ctx) { + public void writeComplete(int rc, long ledgerId, long entryId, BookieId addr, Object ctx) { latch.countDown(); } }, null); @@ -377,4 +381,26 @@ public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddre journal.shutdown(); } + @Test + public void testFileChannelProvider() throws Exception { + File bookieFileDirectory = tempDir.newFile(); + ServerConfiguration config = TestBKConfiguration.newServerConfiguration(); + + DefaultFileChannel defaultFileChannel = spy(new DefaultFileChannel(bookieFileDirectory, config)); + + FileChannelProvider provider = spy(DefaultFileChannelProvider.class); + when(provider.open(bookieFileDirectory, config)).thenReturn(defaultFileChannel); + log.info("Journal Channel Provider: " + config.getJournalChannelProvider()); + // Open should return spied DefaultFileChannel here. + BookieFileChannel bookieFileChannel = provider.open(bookieFileDirectory, config); + bookieFileChannel.getFileChannel(); + verify(defaultFileChannel, times (1)).getFileChannel(); + bookieFileChannel.getFD(); + verify(defaultFileChannel, times (1)).getFD(); + bookieFileChannel.fileExists(bookieFileDirectory); + verify(defaultFileChannel, times (1)).fileExists(bookieFileDirectory); + provider.close(bookieFileChannel); + verify(defaultFileChannel, times (1)).close(); + } + } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalMaxMemoryTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalMaxMemoryTest.java new file mode 100644 index 00000000000..9a288b6b8bf --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalMaxMemoryTest.java @@ -0,0 +1,93 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie; + +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.nullable; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import java.io.File; +import java.util.concurrent.CountDownLatch; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.common.util.MemoryLimitController; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.mockito.junit.MockitoJUnitRunner; + +/** + * Test the bookie journal max memory controller. + */ +@RunWith(MockitoJUnitRunner.class) +@Slf4j +public class BookieJournalMaxMemoryTest { + + private static final ByteBuf DATA = Unpooled.wrappedBuffer(new byte[1024 * 1024]); + + @Rule + public TemporaryFolder tempDir = new TemporaryFolder(); + + @Test + public void testAckAfterSyncPageCacheFlush() throws Exception { + File journalDir = tempDir.newFolder(); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); + + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration() + .setJournalDirName(journalDir.getPath()) + .setJournalMaxMemorySizeMb(1); + + JournalChannel jc = spy(new JournalChannel(journalDir, 1)); + LedgerDirsManager ledgerDirsManager = mock(LedgerDirsManager.class); + Journal journal = spy(new Journal(0, journalDir, conf, ledgerDirsManager)); + doReturn(jc).when(journal).newLogFile(anyLong(), nullable(Long.class)); + MemoryLimitController mlc = spy(new MemoryLimitController(1)); + journal.setMemoryLimitController(mlc); + + journal.start(); + + CountDownLatch latch = new CountDownLatch(10); + + for (int i = 0; i < 10; i++) { + long ledgerId = 1; + long entryId = i; + + journal.logAddEntry(ledgerId, entryId, DATA, false, + (rc, ledgerId1, entryId1, addr, ctx) -> latch.countDown(), + null); + } + + latch.await(); + + verify(mlc, times(10)).reserveMemory(DATA.readableBytes()); + verify(mlc, times(10)).releaseMemory(DATA.readableBytes()); + + journal.shutdown(); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalNoSyncTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalNoSyncTest.java index d345225f08d..6d444f02e22 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalNoSyncTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalNoSyncTest.java @@ -23,7 +23,6 @@ import static org.junit.Assert.assertEquals; import java.util.Enumeration; - import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.client.LedgerEntry; import org.apache.bookkeeper.client.LedgerHandle; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalPageCacheFlushTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalPageCacheFlushTest.java new file mode 100644 index 00000000000..3683e948e1a --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalPageCacheFlushTest.java @@ -0,0 +1,297 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.nullable; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import java.io.File; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.Journal.ForceWriteRequest; +import org.apache.bookkeeper.bookie.Journal.LastLogMark; +import org.apache.bookkeeper.common.collections.BatchedArrayBlockingQueue; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.junit.MockitoJUnitRunner; +import org.mockito.stubbing.Answer; + +/** + * Test the bookie journal PageCache flush interval. + */ +@RunWith(MockitoJUnitRunner.class) +@Slf4j +public class BookieJournalPageCacheFlushTest { + + private static final ByteBuf DATA = Unpooled.wrappedBuffer(new byte[]{}); + + @Rule + public TemporaryFolder tempDir = new TemporaryFolder(); + + @SuppressWarnings("unchecked") + private BatchedArrayBlockingQueue enableForceWriteThreadSuspension( + CountDownLatch forceWriteThreadSuspendedLatch, + Journal journal) throws InterruptedException { + BatchedArrayBlockingQueue supportQueue = new BatchedArrayBlockingQueue<>(10000); + BatchedArrayBlockingQueue forceWriteRequests = mock(BatchedArrayBlockingQueue.class); + doAnswer((Answer) (InvocationOnMock iom) -> { + supportQueue.put(iom.getArgument(0)); + return null; + }).when(forceWriteRequests).put(any(ForceWriteRequest.class)); + doAnswer((Answer) (InvocationOnMock iom) -> { + forceWriteThreadSuspendedLatch.await(); + ForceWriteRequest[] array = iom.getArgument(0); + return supportQueue.takeAll(array); + }).when(forceWriteRequests).takeAll(any()); + journal.setForceWriteRequests(forceWriteRequests); + return supportQueue; + } + + @Test + public void testAckAfterSyncPageCacheFlush() throws Exception { + File journalDir = tempDir.newFolder(); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); + + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration() + .setJournalDirName(journalDir.getPath()) + .setMetadataServiceUri(null) + .setJournalAdaptiveGroupWrites(false) + .setJournalSyncData(true) + .setJournalPageCacheFlushIntervalMSec(5000); + + LedgerDirsManager ledgerDirsManager = mock(LedgerDirsManager.class); + Journal journal = new Journal(0, journalDir, conf, ledgerDirsManager); + CountDownLatch forceWriteThreadSuspendedLatch = new CountDownLatch(1); + BatchedArrayBlockingQueue supportQueue = + enableForceWriteThreadSuspension(forceWriteThreadSuspendedLatch, journal); + + journal = spy(journal); + JournalChannel jc = spy(new JournalChannel(journalDir, 1)); + doReturn(jc).when(journal).newLogFile(anyLong(), nullable(Long.class)); + + journal.start(); + + LogMark lastLogMarkBeforeWrite = journal.getLastLogMark().markLog().getCurMark(); + CountDownLatch latch = new CountDownLatch(1); + long ledgerId = 1; + long entryId = 0; + long startTime = System.currentTimeMillis(); + journal.logAddEntry(ledgerId, entryId, DATA, false /* ackBeforeSync */, new WriteCallback() { + @Override + public void writeComplete(int rc, long ledgerId, long entryId, BookieId addr, Object ctx) { + latch.countDown(); + } + }, null); + + while (supportQueue.isEmpty()) { + Thread.sleep(100); + } + + // forceWriteRequest insert into forceWriteRequestQueue not effected by journalPageCacheFlushInterval + assertTrue(System.currentTimeMillis() - startTime < 5000); + + assertEquals(1, latch.getCount()); + assertEquals(1, supportQueue.size()); + + // in constructor of JournalChannel we are calling forceWrite(true) but it is not tracked by PowerMock + // because the 'spy' is applied only on return from the constructor + verify(jc, times(0)).forceWrite(true); + + // should not call forceWrite + verify(jc, times(0)).forceWrite(false); + + // let ForceWriteThread work + forceWriteThreadSuspendedLatch.countDown(); + // callback should complete now + assertTrue(latch.await(20, TimeUnit.SECONDS)); + + verify(jc, times(1)).forceWrite(false); + assertEquals(0, supportQueue.size()); + + // verify that log marker advanced + LastLogMark lastLogMarkAfterForceWrite = journal.getLastLogMark(); + assertTrue(lastLogMarkAfterForceWrite.getCurMark().compare(lastLogMarkBeforeWrite) > 0); + + journal.shutdown(); + } + + @Test + public void testAckBeforeSyncPageCacheFlush() throws Exception { + File journalDir = tempDir.newFolder(); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); + + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration() + .setJournalDirName(journalDir.getPath()) + .setMetadataServiceUri(null) + .setJournalAdaptiveGroupWrites(false) + .setJournalSyncData(true) + .setJournalPageCacheFlushIntervalMSec(5000); + + LedgerDirsManager ledgerDirsManager = mock(LedgerDirsManager.class); + Journal journal = new Journal(0, journalDir, conf, ledgerDirsManager); + + CountDownLatch forceWriteThreadSuspendedLatch = new CountDownLatch(1); + BatchedArrayBlockingQueue supportQueue = + enableForceWriteThreadSuspension(forceWriteThreadSuspendedLatch, journal); + + journal = spy(journal); + JournalChannel jc = spy(new JournalChannel(journalDir, 1)); + doReturn(jc).when(journal).newLogFile(anyLong(), nullable(Long.class)); + + journal.start(); + + LogMark lastLogMarkBeforeWrite = journal.getLastLogMark().markLog().getCurMark(); + CountDownLatch latch = new CountDownLatch(1); + long ledgerId = 1; + long entryId = 0; + long startTime = System.currentTimeMillis(); + journal.logAddEntry(ledgerId, entryId, DATA, true /* ackBeforeSync */, new WriteCallback() { + @Override + public void writeComplete(int rc, long ledgerId, long entryId, BookieId addr, Object ctx) { + latch.countDown(); + } + }, null); + + while (supportQueue.isEmpty()) { + Thread.sleep(100); + } + + // forceWriteRequest insert into forceWriteRequestQueue not effected by journalPageCacheFlushInterval + assertTrue(System.currentTimeMillis() - startTime < 5000); + assertEquals(1, supportQueue.size()); + + // callback should completed now + assertTrue(latch.await(20, TimeUnit.SECONDS)); + + // in constructor of JournalChannel we are calling forceWrite(true) but it is not tracked by PowerMock + // because the 'spy' is applied only on return from the constructor + verify(jc, times(0)).forceWrite(true); + + // we are never calling foreWrite + verify(jc, times(0)).forceWrite(false); + + // verify that log marker did not advance + LastLogMark lastLogMarkAfterForceWrite = journal.getLastLogMark(); + assertEquals(0, lastLogMarkAfterForceWrite.getCurMark().compare(lastLogMarkBeforeWrite)); + + // let the forceWriteThread exit + forceWriteThreadSuspendedLatch.countDown(); + + journal.shutdown(); + } + + @Test + public void testAckBeforeUnSyncPageCacheFlush() throws Exception { + File journalDir = tempDir.newFolder(); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); + + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration() + .setJournalDirName(journalDir.getPath()) + .setMetadataServiceUri(null) + .setJournalAdaptiveGroupWrites(false) + .setJournalSyncData(false) + .setJournalPageCacheFlushIntervalMSec(5000); + + LedgerDirsManager ledgerDirsManager = mock(LedgerDirsManager.class); + Journal journal = new Journal(0, journalDir, conf, ledgerDirsManager); + + CountDownLatch forceWriteThreadSuspendedLatch = new CountDownLatch(1); + BatchedArrayBlockingQueue supportQueue = + enableForceWriteThreadSuspension(forceWriteThreadSuspendedLatch, journal); + + journal = spy(journal); + JournalChannel jc = spy(new JournalChannel(journalDir, 1)); + doReturn(jc).when(journal).newLogFile(anyLong(), nullable(Long.class)); + + journal.start(); + + CountDownLatch latch = new CountDownLatch(2); + long ledgerId = 1; + long entryId = 0; + LogMark lastLogMarkBeforeWrite = journal.getLastLogMark().markLog().getCurMark(); + journal.logAddEntry(ledgerId, entryId, DATA, true, new WriteCallback() { + @Override + public void writeComplete(int rc, long ledgerId, long entryId, BookieId addr, Object ctx) { + latch.countDown(); + } + }, null); + + // the forceWriteRequest should not generated because of journalPageCacheFlushIntervalMSec control + assertEquals(0, supportQueue.size()); + + // wait journalPageCacheFlushIntervalMsec timeout + Thread.sleep(10000); + + // add an entry to journal, wake up journal main thread which is blocked on queue.take() + journal.logAddEntry(ledgerId, entryId + 1, DATA, true, new WriteCallback() { + @Override + public void writeComplete(int rc, long ledgerId, long entryId, BookieId addr, Object ctx) { + latch.countDown(); + } + }, null); + + // wait forceWriteRequest generated + while (supportQueue.isEmpty()) { + Thread.sleep(100); + } + + // only one forceWriteRequest inserted into forceWriteRequestQueue + assertEquals(1, supportQueue.size()); + + // callback should completed now + assertTrue(latch.await(20, TimeUnit.SECONDS)); + + // in constructor of JournalChannel we are calling forceWrite(true) but it is not tracked by PowerMock + // because the 'spy' is applied only on return from the constructor + verify(jc, times(0)).forceWrite(true); + + // we are never calling foreWrite + verify(jc, times(0)).forceWrite(false); + + // verify that log marker did not advance + LastLogMark lastLogMarkAfterForceWrite = journal.getLastLogMark(); + assertEquals(0, lastLogMarkAfterForceWrite.getCurMark().compare(lastLogMarkBeforeWrite)); + + // let the forceWriteThread exit + forceWriteThreadSuspendedLatch.countDown(); + + journal.shutdown(); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalTest.java index 1a0342b46e7..21608a19b2b 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalTest.java @@ -24,10 +24,14 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockStatic; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; - +import io.netty.util.ReferenceCountUtil; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; @@ -37,7 +41,8 @@ import java.util.Arrays; import java.util.List; import java.util.Random; - +import lombok.Cleanup; +import org.apache.bookkeeper.bookie.Journal.LastLogMark; import org.apache.bookkeeper.client.ClientUtil; import org.apache.bookkeeper.client.LedgerHandle; import org.apache.bookkeeper.conf.ServerConfiguration; @@ -46,12 +51,16 @@ import org.apache.commons.io.FileUtils; import org.junit.After; import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.MockedStatic; +import org.mockito.junit.MockitoJUnitRunner; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Test the bookie journal. */ +@RunWith(MockitoJUnitRunner.class) public class BookieJournalTest { private static final Logger LOG = LoggerFactory.getLogger(BookieJournalTest.class); @@ -110,20 +119,20 @@ private void writePartialIndexFileForLedger(File indexDir, long ledgerId, /** * Generate fence entry. */ - private ByteBuf generateFenceEntry(long ledgerId) { + private static ByteBuf generateFenceEntry(long ledgerId) { ByteBuf bb = Unpooled.buffer(); bb.writeLong(ledgerId); - bb.writeLong(Bookie.METAENTRY_ID_FENCE_KEY); + bb.writeLong(BookieImpl.METAENTRY_ID_FENCE_KEY); return bb; } /** * Generate meta entry with given master key. */ - private ByteBuf generateMetaEntry(long ledgerId, byte[] masterKey) { + private static ByteBuf generateMetaEntry(long ledgerId, byte[] masterKey) { ByteBuf bb = Unpooled.buffer(); bb.writeLong(ledgerId); - bb.writeLong(Bookie.METAENTRY_ID_LEDGER_KEY); + bb.writeLong(BookieImpl.METAENTRY_ID_LEDGER_KEY); bb.writeInt(masterKey.length); bb.writeBytes(masterKey); return bb; @@ -165,13 +174,13 @@ private void writePreV2Journal(File journalDir, int numEntries) throws Exception fc.write(lenBuff); fc.write(packet.nioBuffer()); - packet.release(); + ReferenceCountUtil.release(packet); } } private static void moveToPosition(JournalChannel jc, long pos) throws IOException { jc.fc.position(pos); - jc.bc.position.set(pos); + jc.bc.position = pos; jc.bc.writeBufferStartPosition.set(pos); } @@ -209,7 +218,7 @@ private JournalChannel writeV2Journal(File journalDir, int numEntries) throws Ex bc.write(Unpooled.wrappedBuffer(lenBuff)); bc.write(packet); - packet.release(); + ReferenceCountUtil.release(packet); } bc.flushAndForceWrite(false); @@ -243,7 +252,7 @@ private JournalChannel writeV3Journal(File journalDir, int numEntries, byte[] ma bc.write(Unpooled.wrappedBuffer(lenBuff)); bc.write(packet); - packet.release(); + ReferenceCountUtil.release(packet); } bc.flushAndForceWrite(false); @@ -276,20 +285,72 @@ private JournalChannel writeV4Journal(File journalDir, int numEntries, byte[] ma lenBuff.flip(); bc.write(Unpooled.wrappedBuffer(lenBuff)); bc.write(packet); + ReferenceCountUtil.release(packet); + } + // write fence key + ByteBuf packet = generateFenceEntry(1); + ByteBuf lenBuf = Unpooled.buffer(); + lenBuf.writeInt(packet.readableBytes()); + bc.write(lenBuf); + bc.write(packet); + bc.flushAndForceWrite(false); + updateJournalVersion(jc, JournalChannel.V4); + return jc; + } + + private JournalChannel writeV4JournalWithInvalidRecord(File journalDir, + int numEntries, byte[] masterKey) throws Exception { + long logId = System.currentTimeMillis(); + JournalChannel jc = new JournalChannel(journalDir, logId); + + moveToPosition(jc, JournalChannel.VERSION_HEADER_SIZE); + + BufferedChannel bc = jc.getBufferedChannel(); + + byte[] data = new byte[1024]; + Arrays.fill(data, (byte) 'X'); + long lastConfirmed = LedgerHandle.INVALID_ENTRY_ID; + for (int i = 0; i <= numEntries; i++) { + ByteBuf packet; + if (i == 0) { + packet = generateMetaEntry(1, masterKey); + } else { + packet = ClientUtil.generatePacket(1, i, lastConfirmed, i * data.length, data); + } + lastConfirmed = i; + ByteBuffer lenBuff = ByteBuffer.allocate(4); + if (i == numEntries - 1) { + //mock when flush data to file ,it writes an invalid entry to journal + lenBuff.putInt(-1); + } else { + lenBuff.putInt(packet.readableBytes()); + } + lenBuff.flip(); + bc.write(Unpooled.wrappedBuffer(lenBuff)); + bc.write(packet); packet.release(); } + // write fence key ByteBuf packet = generateFenceEntry(1); ByteBuf lenBuf = Unpooled.buffer(); lenBuf.writeInt(packet.readableBytes()); + //mock bc.write(lenBuf); bc.write(packet); bc.flushAndForceWrite(false); updateJournalVersion(jc, JournalChannel.V4); + return jc; } - private JournalChannel writeV5Journal(File journalDir, int numEntries, byte[] masterKey) throws Exception { + static JournalChannel writeV5Journal(File journalDir, int numEntries, + byte[] masterKey) throws Exception { + return writeV5Journal(journalDir, numEntries, masterKey, false); + } + + static JournalChannel writeV5Journal(File journalDir, int numEntries, + byte[] masterKey, boolean corruptLength) throws Exception { long logId = System.currentTimeMillis(); JournalChannel jc = new JournalChannel(journalDir, logId); @@ -311,10 +372,14 @@ private JournalChannel writeV5Journal(File journalDir, int numEntries, byte[] ma lastConfirmed = i; length += i; ByteBuf lenBuff = Unpooled.buffer(); - lenBuff.writeInt(packet.readableBytes()); + if (corruptLength) { + lenBuff.writeInt(-1); + } else { + lenBuff.writeInt(packet.readableBytes()); + } bc.write(lenBuff); bc.write(packet); - packet.release(); + ReferenceCountUtil.release(packet); Journal.writePaddingBytes(jc, paddingBuff, JournalChannel.SECTOR_SIZE); } // write fence key @@ -337,21 +402,20 @@ private JournalChannel writeV5Journal(File journalDir, int numEntries, byte[] ma @Test public void testPreV2Journal() throws Exception { File journalDir = createTempDir("bookie", "journal"); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(journalDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); File ledgerDir = createTempDir("bookie", "ledger"); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(ledgerDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); - writePreV2Journal(Bookie.getCurrentDirectory(journalDir), 100); - writeIndexFileForLedger(Bookie.getCurrentDirectory(ledgerDir), 1, "testPasswd".getBytes()); + writePreV2Journal(BookieImpl.getCurrentDirectory(journalDir), 100); + writeIndexFileForLedger(BookieImpl.getCurrentDirectory(ledgerDir), 1, "testPasswd".getBytes()); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); conf.setJournalDirName(journalDir.getPath()) .setLedgerDirNames(new String[] { ledgerDir.getPath() }) .setMetadataServiceUri(null); - Bookie b = new Bookie(conf); - b.readJournal(); + Bookie b = createBookieAndReadJournal(conf); b.readEntry(1, 100); try { @@ -367,20 +431,19 @@ public void testPreV2Journal() throws Exception { @Test public void testV4Journal() throws Exception { File journalDir = createTempDir("bookie", "journal"); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(journalDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); File ledgerDir = createTempDir("bookie", "ledger"); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(ledgerDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); - writeV4Journal(Bookie.getCurrentDirectory(journalDir), 100, "testPasswd".getBytes()); + writeV4Journal(BookieImpl.getCurrentDirectory(journalDir), 100, "testPasswd".getBytes()); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); conf.setJournalDirName(journalDir.getPath()) .setLedgerDirNames(new String[] { ledgerDir.getPath() }) .setMetadataServiceUri(null); - Bookie b = new Bookie(conf); - b.readJournal(); + BookieImpl b = createBookieAndReadJournal(conf); b.readEntry(1, 100); try { @@ -397,12 +460,12 @@ public void testV4Journal() throws Exception { @Test public void testV5Journal() throws Exception { File journalDir = createTempDir("bookie", "journal"); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(journalDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); File ledgerDir = createTempDir("bookie", "ledger"); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(ledgerDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); - writeV5Journal(Bookie.getCurrentDirectory(journalDir), 2 * JournalChannel.SECTOR_SIZE, + writeV5Journal(BookieImpl.getCurrentDirectory(journalDir), 2 * JournalChannel.SECTOR_SIZE, "testV5Journal".getBytes()); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); @@ -410,8 +473,7 @@ public void testV5Journal() throws Exception { .setLedgerDirNames(new String[] { ledgerDir.getPath() }) .setMetadataServiceUri(null); - Bookie b = new Bookie(conf); - b.readJournal(); + BookieImpl b = createBookieAndReadJournal(conf); for (int i = 1; i <= 2 * JournalChannel.SECTOR_SIZE; i++) { b.readEntry(1, i); @@ -435,12 +497,12 @@ public void testV5Journal() throws Exception { @Test public void testAllJunkJournal() throws Exception { File journalDir = createTempDir("bookie", "journal"); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(journalDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); File ledgerDir = createTempDir("bookie", "ledger"); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(ledgerDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); - writeJunkJournal(Bookie.getCurrentDirectory(journalDir)); + writeJunkJournal(BookieImpl.getCurrentDirectory(journalDir)); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); conf.setJournalDirName(journalDir.getPath()) @@ -449,7 +511,7 @@ public void testAllJunkJournal() throws Exception { Bookie b = null; try { - b = new Bookie(conf); + b = new TestBookieImpl(conf); fail("Shouldn't have been able to start without admin"); } catch (Throwable t) { // correct behaviour @@ -470,19 +532,19 @@ public void testAllJunkJournal() throws Exception { @Test public void testEmptyJournal() throws Exception { File journalDir = createTempDir("bookie", "journal"); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(journalDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); File ledgerDir = createTempDir("bookie", "ledger"); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(ledgerDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); - writePreV2Journal(Bookie.getCurrentDirectory(journalDir), 0); + writePreV2Journal(BookieImpl.getCurrentDirectory(journalDir), 0); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); conf.setJournalDirName(journalDir.getPath()) .setLedgerDirNames(new String[] { ledgerDir.getPath() }) .setMetadataServiceUri(null); - Bookie b = new Bookie(conf); + Bookie b = new TestBookieImpl(conf); } /** @@ -492,19 +554,19 @@ public void testEmptyJournal() throws Exception { @Test public void testHeaderOnlyJournal() throws Exception { File journalDir = createTempDir("bookie", "journal"); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(journalDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); File ledgerDir = createTempDir("bookie", "ledger"); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(ledgerDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); - writeV2Journal(Bookie.getCurrentDirectory(journalDir), 0); + writeV2Journal(BookieImpl.getCurrentDirectory(journalDir), 0); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); conf.setJournalDirName(journalDir.getPath()) .setLedgerDirNames(new String[] { ledgerDir.getPath() }) .setMetadataServiceUri(null); - Bookie b = new Bookie(conf); + Bookie b = new TestBookieImpl(conf); } /** @@ -514,14 +576,15 @@ public void testHeaderOnlyJournal() throws Exception { @Test public void testJunkEndedJournal() throws Exception { File journalDir = createTempDir("bookie", "journal"); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(journalDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); File ledgerDir = createTempDir("bookie", "ledger"); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(ledgerDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); - JournalChannel jc = writeV2Journal(Bookie.getCurrentDirectory(journalDir), 0); + JournalChannel jc = writeV2Journal(BookieImpl.getCurrentDirectory(journalDir), 0); jc.getBufferedChannel().write(Unpooled.wrappedBuffer("JunkJunkJunk".getBytes())); jc.getBufferedChannel().flushAndForceWrite(false); + jc.close(); writeIndexFileForLedger(ledgerDir, 1, "testPasswd".getBytes()); @@ -532,7 +595,7 @@ public void testJunkEndedJournal() throws Exception { Bookie b = null; try { - b = new Bookie(conf); + b = new TestBookieImpl(conf); } catch (Throwable t) { // correct behaviour } @@ -549,20 +612,20 @@ public void testJunkEndedJournal() throws Exception { @Test public void testTruncatedInLenJournal() throws Exception { File journalDir = createTempDir("bookie", "journal"); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(journalDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); File ledgerDir = createTempDir("bookie", "ledger"); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(ledgerDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); JournalChannel jc = writeV2Journal( - Bookie.getCurrentDirectory(journalDir), 100); + BookieImpl.getCurrentDirectory(journalDir), 100); ByteBuffer zeros = ByteBuffer.allocate(2048); jc.fc.position(jc.getBufferedChannel().position() - 0x429); jc.fc.write(zeros); jc.fc.force(false); - writeIndexFileForLedger(Bookie.getCurrentDirectory(ledgerDir), + writeIndexFileForLedger(BookieImpl.getCurrentDirectory(ledgerDir), 1, "testPasswd".getBytes()); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); @@ -570,8 +633,7 @@ public void testTruncatedInLenJournal() throws Exception { .setLedgerDirNames(new String[] { ledgerDir.getPath() }) .setMetadataServiceUri(null); - Bookie b = new Bookie(conf); - b.readJournal(); + Bookie b = createBookieAndReadJournal(conf); b.readEntry(1, 99); @@ -593,20 +655,20 @@ public void testTruncatedInLenJournal() throws Exception { @Test public void testTruncatedInEntryJournal() throws Exception { File journalDir = createTempDir("bookie", "journal"); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(journalDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); File ledgerDir = createTempDir("bookie", "ledger"); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(ledgerDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); JournalChannel jc = writeV2Journal( - Bookie.getCurrentDirectory(journalDir), 100); + BookieImpl.getCurrentDirectory(journalDir), 100); ByteBuffer zeros = ByteBuffer.allocate(2048); jc.fc.position(jc.getBufferedChannel().position() - 0x300); jc.fc.write(zeros); jc.fc.force(false); - writeIndexFileForLedger(Bookie.getCurrentDirectory(ledgerDir), + writeIndexFileForLedger(BookieImpl.getCurrentDirectory(ledgerDir), 1, "testPasswd".getBytes()); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); @@ -614,8 +676,8 @@ public void testTruncatedInEntryJournal() throws Exception { .setLedgerDirNames(new String[] { ledgerDir.getPath() }) .setMetadataServiceUri(null); - Bookie b = new Bookie(conf); - b.readJournal(); + Bookie b = createBookieAndReadJournal(conf); + b.readEntry(1, 99); // still able to read last entry, but it's junk @@ -640,6 +702,49 @@ public void testTruncatedInEntryJournal() throws Exception { } } + private BookieImpl createBookieAndReadJournal(ServerConfiguration conf) throws Exception { + BookieImpl b = new TestBookieImpl(conf); + for (Journal journal : b.journals) { + LastLogMark lastLogMark = journal.getLastLogMark().markLog(); + b.readJournal(); + assertTrue(journal.getLastLogMark().getCurMark().compare(lastLogMark.getCurMark()) > 0); + } + return b; + } + + /** + * Test journal replay with SortedLedgerStorage and a very small max + * arena size. + */ + @Test + public void testSortedLedgerStorageReplayWithSmallMaxArenaSize() throws Exception { + File journalDir = createTempDir("bookie", "journal"); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); + + File ledgerDir = createTempDir("bookie", "ledger"); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); + + JournalChannel jc = writeV2Journal( + BookieImpl.getCurrentDirectory(journalDir), 100); + + jc.fc.force(false); + + writeIndexFileForLedger(BookieImpl.getCurrentDirectory(ledgerDir), + 1, "testPasswd".getBytes()); + + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setLedgerStorageClass("org.apache.bookkeeper.bookie.SortedLedgerStorage"); + conf.setSkipListArenaMaxAllocSize(0); + conf.setJournalDirName(journalDir.getPath()) + .setLedgerDirNames(new String[] { ledgerDir.getPath() }); + + BookieImpl b = new TestBookieImpl(conf); + b.readJournal(); + b.ledgerStorage.flush(); + b.readEntry(1, 80); + b.readEntry(1, 99); + } + /** * Test partial index (truncate master key) with pre-v3 journals. */ @@ -662,13 +767,13 @@ public void testPartialFileInfoPreV3Journal2() throws Exception { private void testPartialFileInfoPreV3Journal(boolean truncateMasterKey) throws Exception { File journalDir = createTempDir("bookie", "journal"); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(journalDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); File ledgerDir = createTempDir("bookie", "ledger"); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(ledgerDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); - writePreV2Journal(Bookie.getCurrentDirectory(journalDir), 100); - writePartialIndexFileForLedger(Bookie.getCurrentDirectory(ledgerDir), + writePreV2Journal(BookieImpl.getCurrentDirectory(journalDir), 100); + writePartialIndexFileForLedger(BookieImpl.getCurrentDirectory(ledgerDir), 1, "testPasswd".getBytes(), truncateMasterKey); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); @@ -678,13 +783,13 @@ private void testPartialFileInfoPreV3Journal(boolean truncateMasterKey) if (truncateMasterKey) { try { - Bookie b = new Bookie(conf); + BookieImpl b = new TestBookieImpl(conf); b.readJournal(); fail("Should not reach here!"); } catch (IOException ie) { } } else { - Bookie b = new Bookie(conf); + BookieImpl b = new TestBookieImpl(conf); b.readJournal(); b.readEntry(1, 100); try { @@ -718,15 +823,15 @@ public void testPartialFileInfoPostV3Journal2() throws Exception { private void testPartialFileInfoPostV3Journal(boolean truncateMasterKey) throws Exception { File journalDir = createTempDir("bookie", "journal"); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(journalDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); File ledgerDir = createTempDir("bookie", "ledger"); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(ledgerDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); byte[] masterKey = "testPasswd".getBytes(); - writeV3Journal(Bookie.getCurrentDirectory(journalDir), 100, masterKey); - writePartialIndexFileForLedger(Bookie.getCurrentDirectory(ledgerDir), 1, masterKey, + writeV3Journal(BookieImpl.getCurrentDirectory(journalDir), 100, masterKey); + writePartialIndexFileForLedger(BookieImpl.getCurrentDirectory(ledgerDir), 1, masterKey, truncateMasterKey); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); @@ -734,7 +839,7 @@ private void testPartialFileInfoPostV3Journal(boolean truncateMasterKey) .setLedgerDirNames(new String[] { ledgerDir.getPath() }) .setMetadataServiceUri(null); - Bookie b = new Bookie(conf); + BookieImpl b = new TestBookieImpl(conf); b.readJournal(); b.readEntry(1, 100); try { @@ -744,4 +849,125 @@ private void testPartialFileInfoPostV3Journal(boolean truncateMasterKey) // correct behaviour } } + + /** + * Test for fake IOException during read of Journal. + */ + @Test + public void testJournalScanIOException() throws Exception { + File journalDir = createTempDir("bookie", "journal"); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); + + File ledgerDir = createTempDir("bookie", "ledger"); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); + + writeV4Journal(BookieImpl.getCurrentDirectory(journalDir), 100, "testPasswd".getBytes()); + + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(journalDir.getPath()) + .setLedgerDirNames(new String[] { ledgerDir.getPath() }) + .setMetadataServiceUri(null); + + Journal.JournalScanner journalScanner = new DummyJournalScan(); + BookieFileChannel bookieFileChannel = mock(BookieFileChannel.class); + FileChannelProvider fileChannelProvider = mock(FileChannelProvider.class); + + @Cleanup + MockedStatic fileChannelProviderMockedStatic = mockStatic(FileChannelProvider.class); + fileChannelProviderMockedStatic.when(() -> FileChannelProvider.newProvider(any())) + .thenReturn(fileChannelProvider); + doReturn(bookieFileChannel).when(fileChannelProvider).open(any(), any()); + + BookieImpl b = new TestBookieImpl(conf); + + for (Journal journal : b.journals) { + List journalIds = journal.listJournalIds(journal.getJournalDirectory(), null); + + assertEquals(journalIds.size(), 1); + + try { + journal.scanJournal(journalIds.get(0), Long.MAX_VALUE, journalScanner, false); + fail("Should not have been able to scan the journal"); + } catch (Exception e) { + // Expected + } + } + + b.shutdown(); + } + + /** + * Test for invalid record data during read of Journal. + */ + @Test + public void testJournalScanInvalidRecordWithSkipFlag() throws Exception { + File journalDir = createTempDir("bookie", "journal"); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); + + File ledgerDir = createTempDir("bookie", "ledger"); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); + + try { + writeV4JournalWithInvalidRecord(BookieImpl.getCurrentDirectory(journalDir), + 100, "testPasswd".getBytes()); + } catch (Exception e) { + fail(); + } + + + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + // Disabled skip broken journal files by default + conf.setJournalDirName(journalDir.getPath()) + .setLedgerDirNames(new String[] { ledgerDir.getPath() }) + .setMetadataServiceUri(null) + .setSkipReplayJournalInvalidRecord(true); + + Journal.JournalScanner journalScanner = new DummyJournalScan(); + + BookieImpl b = new TestBookieImpl(conf); + + for (Journal journal : b.journals) { + List journalIds = Journal.listJournalIds(journal.getJournalDirectory(), null); + assertEquals(journalIds.size(), 1); + try { + journal.scanJournal(journalIds.get(0), 0, journalScanner, conf.isSkipReplayJournalInvalidRecord()); + } catch (Exception e) { + fail("Should pass the journal scanning because we enabled skip flag by default."); + } + } + + b.shutdown(); + + // Disabled skip broken journal files by default + conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(journalDir.getPath()) + .setLedgerDirNames(new String[] { ledgerDir.getPath() }) + .setMetadataServiceUri(null); + + journalScanner = new DummyJournalScan(); + + b = new TestBookieImpl(conf); + + for (Journal journal : b.journals) { + List journalIds = Journal.listJournalIds(journal.getJournalDirectory(), null); + assertEquals(journalIds.size(), 1); + try { + journal.scanJournal(journalIds.get(0), 0, journalScanner, conf.isSkipReplayJournalInvalidRecord()); + fail("Should fail the journal scanning because of disabled skip flag"); + } catch (Exception e) { + // expected. + } + } + + b.shutdown(); + } + + + static class DummyJournalScan implements Journal.JournalScanner { + + @Override + public void process(int journalVersion, long offset, ByteBuffer entry) throws IOException { + LOG.warn("Journal Version : " + journalVersion); + } + }; } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieMultipleJournalsTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieMultipleJournalsTest.java index bc30246637b..664e31541bf 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieMultipleJournalsTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieMultipleJournalsTest.java @@ -21,17 +21,20 @@ package org.apache.bookkeeper.bookie; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import java.io.File; +import java.lang.reflect.Field; import java.util.ArrayList; import java.util.Enumeration; import java.util.List; - import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.client.LedgerEntry; import org.apache.bookkeeper.client.LedgerHandle; import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.proto.BookieServer; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.awaitility.Awaitility; import org.junit.Test; /** @@ -57,6 +60,43 @@ protected ServerConfiguration newServerConfiguration(int port, File journalDir, return conf; } + @Test + @SuppressWarnings("unchecked") + public void testJournalExit() throws Exception { + + LedgerHandle ledgerHandle = bkc.createLedger(1, 1, DigestType.CRC32, new byte[0]); + for (int i = 0; i < 10; i++) { + ledgerHandle.addEntry(("entry-" + i).getBytes()); + } + + BookieServer bookieServer = serverByIndex(0); + BookieImpl bookie = (BookieImpl) bookieServer.getBookie(); + Field journalList = bookie.getClass().getDeclaredField("journals"); + journalList.setAccessible(true); + List journals = (List) journalList.get(bookie); + journals.get(0).interruptThread(); + Awaitility.await().untilAsserted(() -> assertFalse(bookie.isRunning())); + } + + @Test + @SuppressWarnings("unchecked") + public void testJournalExitAndShutdown() throws Exception { + + LedgerHandle ledgerHandle = bkc.createLedger(1, 1, DigestType.CRC32, new byte[0]); + for (int i = 0; i < 10; i++) { + ledgerHandle.addEntry(("entry-" + i).getBytes()); + } + + BookieServer bookieServer = serverByIndex(0); + BookieImpl bookie = (BookieImpl) bookieServer.getBookie(); + Field journalList = bookie.getClass().getDeclaredField("journals"); + journalList.setAccessible(true); + List journals = (List) journalList.get(bookie); + journals.get(0).interruptThread(); + bookie.shutdown(ExitCode.OK); + Awaitility.await().untilAsserted(() -> assertFalse(bookie.isRunning())); + } + @Test public void testMultipleWritesAndBookieRestart() throws Exception { // Creates few ledgers so that writes are spread across all journals diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieShellTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieShellTest.java index 9f12e15e4ca..fce3ce8bb30 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieShellTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieShellTest.java @@ -19,39 +19,40 @@ package org.apache.bookkeeper.bookie; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.ArgumentMatchers.same; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockStatic; import static org.mockito.Mockito.never; +import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; -import static org.powermock.api.mockito.PowerMockito.spy; -import static org.powermock.api.mockito.PowerMockito.verifyNew; -import static org.powermock.api.mockito.PowerMockito.whenNew; import com.google.common.collect.Maps; import java.util.Set; import java.util.SortedMap; import java.util.function.Function; +import lombok.Cleanup; import org.apache.bookkeeper.bookie.BookieShell.MyCommand; import org.apache.bookkeeper.bookie.BookieShell.RecoverCmd; import org.apache.bookkeeper.client.BookKeeperAdmin; -import org.apache.bookkeeper.client.LedgerMetadata; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.discover.RegistrationManager; -import org.apache.bookkeeper.meta.MetadataBookieDriver; import org.apache.bookkeeper.meta.MetadataDrivers; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.tools.cli.commands.bookie.LastMarkCommand; +import org.apache.bookkeeper.tools.cli.commands.bookies.ClusterInfoCommand; import org.apache.bookkeeper.tools.cli.commands.bookies.ListBookiesCommand; import org.apache.bookkeeper.tools.cli.commands.client.SimpleTestCommand; +import org.apache.bookkeeper.tools.cli.commands.client.SimpleTestCommand.Flags; import org.apache.bookkeeper.tools.framework.CliFlags; import org.apache.bookkeeper.util.EntryFormatter; import org.apache.bookkeeper.util.LedgerIdFormatter; @@ -62,108 +63,86 @@ import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.MissingArgumentException; import org.apache.commons.cli.ParseException; +import org.junit.After; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; -import org.powermock.api.mockito.PowerMockito; -import org.powermock.core.classloader.annotations.PrepareForTest; -import org.powermock.modules.junit4.PowerMockRunner; +import org.mockito.MockedStatic; +import org.mockito.junit.MockitoJUnitRunner; /** * Unit test for {@link BookieShell}. */ -@RunWith(PowerMockRunner.class) -@PrepareForTest({ BookieShell.class, MetadataDrivers.class }) +@RunWith(MockitoJUnitRunner.class) public class BookieShellTest { private ClientConfiguration clientConf; private BookieShell shell; private BookKeeperAdmin admin; private RegistrationManager rm; - private MetadataBookieDriver driver; private Cookie cookie; private Version version; - // commands - private LastMarkCommand mockLastMarkCommand; - private SimpleTestCommand.Flags mockSimpleTestFlags; - private SimpleTestCommand mockSimpleTestCommand; private ListBookiesCommand.Flags mockListBookiesFlags; private ListBookiesCommand mockListBookiesCommand; + private MockedStatic listBookiesCommandMockedStatic; + private MockedStatic metadataDriversMockedStatic; + private MockedStatic bookKeeperAdminMockedStatic; + private MockedStatic listBookiesCommandflagsMockedStatic; @Before public void setup() throws Exception { - // setup the required mocks before constructing bookie shell. - this.mockLastMarkCommand = mock(LastMarkCommand.class); - whenNew(LastMarkCommand.class) - .withNoArguments() - .thenReturn(mockLastMarkCommand); - - // setup the mocks for simple test command - this.mockSimpleTestFlags = spy(new SimpleTestCommand.Flags()); - whenNew(SimpleTestCommand.Flags.class) - .withNoArguments() - .thenReturn(mockSimpleTestFlags); - - this.mockSimpleTestCommand = spy(new SimpleTestCommand()); - doReturn(true).when(mockSimpleTestCommand) - .apply(any(ServerConfiguration.class), any(SimpleTestCommand.Flags.class)); - whenNew(SimpleTestCommand.class) - .withParameterTypes(SimpleTestCommand.Flags.class) - .withArguments(mockSimpleTestFlags) - .thenReturn(mockSimpleTestCommand); + this.shell = new BookieShell(LedgerIdFormatter.LONG_LEDGERID_FORMATTER, EntryFormatter.STRING_FORMATTER); - // setup the mocks for list bookies command this.mockListBookiesFlags = spy(new ListBookiesCommand.Flags()); - whenNew(ListBookiesCommand.Flags.class) - .withNoArguments() - .thenReturn(mockListBookiesFlags); - + this.listBookiesCommandflagsMockedStatic = mockStatic(ListBookiesCommand.Flags.class); + listBookiesCommandflagsMockedStatic.when(() -> ListBookiesCommand.Flags.newFlags()) + .thenReturn(mockListBookiesFlags); this.mockListBookiesCommand = spy(new ListBookiesCommand()); doReturn(true).when(mockListBookiesCommand) - .apply(any(ServerConfiguration.class), any(ListBookiesCommand.Flags.class)); - whenNew(ListBookiesCommand.class) - .withParameterTypes(ListBookiesCommand.Flags.class) - .withArguments(mockListBookiesFlags) - .thenReturn(mockListBookiesCommand); + .apply(any(ServerConfiguration.class), any(ListBookiesCommand.Flags.class)); + listBookiesCommandMockedStatic = mockStatic(ListBookiesCommand.class); + listBookiesCommandMockedStatic.when(() -> ListBookiesCommand.newListBookiesCommand(mockListBookiesFlags)) + .thenReturn(mockListBookiesCommand); // construct the bookie shell. - this.shell = new BookieShell(LedgerIdFormatter.LONG_LEDGERID_FORMATTER, EntryFormatter.STRING_FORMATTER); - this.admin = PowerMockito.mock(BookKeeperAdmin.class); - whenNew(BookKeeperAdmin.class) - .withParameterTypes(ClientConfiguration.class) - .withArguments(any(ClientConfiguration.class)) - .thenReturn(admin); + this.admin = mock(BookKeeperAdmin.class); + + bookKeeperAdminMockedStatic = mockStatic(BookKeeperAdmin.class); + bookKeeperAdminMockedStatic.when(() -> BookKeeperAdmin.newBookKeeperAdmin(any(ClientConfiguration.class))) + .thenReturn(admin); this.clientConf = new ClientConfiguration(); this.clientConf.setMetadataServiceUri("zk://127.0.0.1/path/to/ledgers"); when(admin.getConf()).thenReturn(this.clientConf); - this.rm = PowerMockito.mock(RegistrationManager.class); + this.rm = mock(RegistrationManager.class); this.cookie = Cookie.newBuilder() - .setBookieHost("127.0.0.1:3181") + .setBookieId("127.0.0.1:3181") .setInstanceId("xyz") .setJournalDirs("/path/to/journal/dir") .setLedgerDirs("/path/to/journal/dir") .setLayoutVersion(Cookie.CURRENT_COOKIE_LAYOUT_VERSION) .build(); this.version = new LongVersion(1L); - when(rm.readCookie(anyString())) + when(rm.readCookie(any(BookieId.class))) .thenReturn(new Versioned<>(cookie.toString().getBytes(UTF_8), version)); - this.driver = mock(MetadataBookieDriver.class); - when(driver.getRegistrationManager()) - .thenReturn(rm); - - PowerMockito.mockStatic(MetadataDrivers.class); - PowerMockito.doAnswer(invocationOnMock -> { - Function function = invocationOnMock.getArgument(1); - function.apply(rm); - return null; - }).when( - MetadataDrivers.class, - "runFunctionWithRegistrationManager", - any(ServerConfiguration.class), - any(Function.class) - ); + metadataDriversMockedStatic = mockStatic(MetadataDrivers.class); + metadataDriversMockedStatic + .when(() -> MetadataDrivers.runFunctionWithRegistrationManager( + any(ServerConfiguration.class), any(Function.class))) + .thenAnswer(invocationOnMock -> { + Function function = invocationOnMock.getArgument(1); + function.apply(rm); + return null; + }); + } + + @After + public void teardown() throws Exception { + listBookiesCommandMockedStatic.close(); + listBookiesCommandflagsMockedStatic.close(); + metadataDriversMockedStatic.close(); + bookKeeperAdminMockedStatic.close(); } private static CommandLine parseCommandLine(MyCommand cmd, String... args) throws ParseException { @@ -181,15 +160,17 @@ public void testRecoverCmdMissingArgument() throws Exception { } catch (MissingArgumentException e) { // expected } - PowerMockito.verifyNew(BookKeeperAdmin.class, never()).withArguments(any(ClientConfiguration.class)); + bookKeeperAdminMockedStatic.verify(() -> BookKeeperAdmin.newBookKeeperAdmin(any(ClientConfiguration.class)), + never()); } @Test public void testRecoverCmdInvalidBookieAddress() throws Exception { RecoverCmd cmd = (RecoverCmd) shell.commands.get("recover"); - CommandLine cmdLine = parseCommandLine(cmd, "127.0.0.1"); + CommandLine cmdLine = parseCommandLine(cmd, "non.valid$$bookie.id"); assertEquals(-1, cmd.runCmd(cmdLine)); - PowerMockito.verifyNew(BookKeeperAdmin.class, never()).withArguments(any(ClientConfiguration.class)); + bookKeeperAdminMockedStatic.verify(() -> BookKeeperAdmin.newBookKeeperAdmin(any(ClientConfiguration.class)), + never()); } @SuppressWarnings("unchecked") @@ -202,9 +183,8 @@ public void testRecoverCmdQuery() throws Exception { RecoverCmd cmd = (RecoverCmd) shell.commands.get("recover"); CommandLine cmdLine = parseCommandLine(cmd, "-force", "-q", "127.0.0.1:3181"); assertEquals(0, cmd.runCmd(cmdLine)); - PowerMockito - .verifyNew(BookKeeperAdmin.class, times(1)) - .withArguments(any(ClientConfiguration.class)); + bookKeeperAdminMockedStatic.verify(() -> BookKeeperAdmin.newBookKeeperAdmin(any(ClientConfiguration.class)), + times(1)); verify(admin, times(1)).getLedgersContainBookies(any(Set.class)); verify(admin, times(1)).close(); } @@ -258,20 +238,18 @@ void testRecoverCmdRecoverLedger(long ledgerId, RecoverCmd cmd = (RecoverCmd) shell.commands.get("recover"); CommandLine cmdLine = parseCommandLine(cmd, args); assertEquals(0, cmd.runCmd(cmdLine)); - PowerMockito - .verifyNew(BookKeeperAdmin.class, times(1)) - .withArguments(any(ClientConfiguration.class)); + bookKeeperAdminMockedStatic.verify(() -> BookKeeperAdmin.newBookKeeperAdmin(any(ClientConfiguration.class)), + times(1)); verify(admin, times(1)) .recoverBookieData(eq(ledgerId), any(Set.class), eq(dryrun), eq(skipOpenLedgers)); verify(admin, times(1)).close(); if (removeCookies) { - PowerMockito.verifyStatic(MetadataDrivers.class); MetadataDrivers.runFunctionWithRegistrationManager(any(ServerConfiguration.class), any(Function.class)); - verify(rm, times(1)).readCookie(anyString()); - verify(rm, times(1)).removeCookie(anyString(), eq(version)); + verify(rm, times(1)).readCookie(any(BookieId.class)); + verify(rm, times(1)).removeCookie(any(BookieId.class), eq(version)); } else { - verify(rm, times(0)).readCookie(anyString()); - verify(rm, times(0)).removeCookie(anyString(), eq(version)); + verify(rm, times(0)).readCookie(any(BookieId.class)); + verify(rm, times(0)).removeCookie(any(BookieId.class), eq(version)); } } @@ -279,7 +257,7 @@ void testRecoverCmdRecoverLedger(long ledgerId, public void testRecoverCmdRecoverDefault() throws Exception { // default behavior testRecoverCmdRecover( - false, false, false, + false, false, false, false, "-force", "127.0.0.1:3181"); } @@ -287,7 +265,7 @@ public void testRecoverCmdRecoverDefault() throws Exception { public void testRecoverCmdRecoverDeleteCookie() throws Exception { // dryrun testRecoverCmdRecover( - false, false, true, + false, false, true, false, "-force", "-deleteCookie", "127.0.0.1:3181"); } @@ -295,7 +273,7 @@ public void testRecoverCmdRecoverDeleteCookie() throws Exception { public void testRecoverCmdRecoverSkipOpenLedgersDeleteCookie() throws Exception { // dryrun testRecoverCmdRecover( - false, true, true, + false, true, true, false, "-force", "-deleteCookie", "-skipOpenLedgers", "127.0.0.1:3181"); } @@ -303,7 +281,7 @@ public void testRecoverCmdRecoverSkipOpenLedgersDeleteCookie() throws Exception public void testRecoverCmdRecoverDryrun() throws Exception { // dryrun testRecoverCmdRecover( - true, false, false, + true, false, false, false, "-force", "-dryrun", "127.0.0.1:3181"); } @@ -311,45 +289,73 @@ public void testRecoverCmdRecoverDryrun() throws Exception { public void testRecoverCmdRecoverDryrunDeleteCookie() throws Exception { // dryrun & removeCookie : removeCookie should be false testRecoverCmdRecover( - true, false, false, + true, false, false, false, "-force", "-dryrun", "-deleteCookie", "127.0.0.1:3181"); } + @Test + public void testRecoverCmdRecoverSkipUnrecoverableLedgers() throws Exception { + // skipUnrecoverableLedgers + testRecoverCmdRecover( + false, false, false, true, + "-force", "-sku", "127.0.0.1:3181"); + } + @SuppressWarnings("unchecked") void testRecoverCmdRecover(boolean dryrun, boolean skipOpenLedgers, boolean removeCookies, + boolean skipUnrecoverableLedgers, String... args) throws Exception { RecoverCmd cmd = (RecoverCmd) shell.commands.get("recover"); CommandLine cmdLine = parseCommandLine(cmd, args); assertEquals(0, cmd.runCmd(cmdLine)); - PowerMockito - .verifyNew(BookKeeperAdmin.class, times(1)) - .withArguments(any(ClientConfiguration.class)); + bookKeeperAdminMockedStatic.verify(() -> BookKeeperAdmin.newBookKeeperAdmin(any(ClientConfiguration.class)), + times(1)); verify(admin, times(1)) - .recoverBookieData(any(Set.class), eq(dryrun), eq(skipOpenLedgers)); + .recoverBookieData(any(Set.class), eq(dryrun), eq(skipOpenLedgers), eq(skipUnrecoverableLedgers)); verify(admin, times(1)).close(); if (removeCookies) { - PowerMockito.verifyStatic(MetadataDrivers.class); MetadataDrivers.runFunctionWithRegistrationManager(any(ServerConfiguration.class), any(Function.class)); - verify(rm, times(1)).readCookie(anyString()); - verify(rm, times(1)).removeCookie(anyString(), eq(version)); + verify(rm, times(1)).readCookie(any(BookieId.class)); + verify(rm, times(1)).removeCookie(any(BookieId.class), eq(version)); } else { - verify(rm, times(0)).readCookie(anyString()); - verify(rm, times(0)).removeCookie(anyString(), eq(version)); + verify(rm, times(0)).readCookie(any(BookieId.class)); + verify(rm, times(0)).removeCookie(any(BookieId.class), eq(version)); } } @Test public void testLastMarkCmd() throws Exception { + LastMarkCommand mockLastMarkCommand = mock(LastMarkCommand.class); + + @Cleanup + MockedStatic lastMarkCommandMockedStatic = mockStatic(LastMarkCommand.class); + lastMarkCommandMockedStatic.when(() -> LastMarkCommand.newLastMarkCommand()).thenReturn(mockLastMarkCommand); + shell.run(new String[] { "lastmark"}); - verifyNew(LastMarkCommand.class, times(1)).withNoArguments(); + lastMarkCommandMockedStatic.verify(() -> LastMarkCommand.newLastMarkCommand(), times(1)); verify(mockLastMarkCommand, times(1)) .apply(same(shell.bkConf), any(CliFlags.class)); } @Test public void testSimpleTestCmd() throws Exception { + SimpleTestCommand.Flags mockSimpleTestFlags = spy(new SimpleTestCommand.Flags()); + + @Cleanup + MockedStatic flagsMockedStatic = mockStatic(Flags.class); + flagsMockedStatic.when(() -> Flags.newFlags()).thenReturn(mockSimpleTestFlags); + + SimpleTestCommand mockSimpleTestCommand = spy(new SimpleTestCommand()); + doReturn(true).when(mockSimpleTestCommand) + .apply(any(ServerConfiguration.class), any(SimpleTestCommand.Flags.class)); + + @Cleanup + MockedStatic simpleTestCommandMockedStatic = mockStatic(SimpleTestCommand.class); + simpleTestCommandMockedStatic.when(() -> SimpleTestCommand.newSimpleTestCommand(mockSimpleTestFlags)) + .thenReturn(mockSimpleTestCommand); + shell.run(new String[] { "simpletest", "-e", "10", @@ -357,8 +363,8 @@ public void testSimpleTestCmd() throws Exception { "-a", "3", "-n", "200" }); - verifyNew(SimpleTestCommand.class, times(1)) - .withArguments(same(mockSimpleTestFlags)); + simpleTestCommandMockedStatic.verify(() -> SimpleTestCommand.newSimpleTestCommand(mockSimpleTestFlags), + times(1)); verify(mockSimpleTestCommand, times(1)) .apply(same(shell.bkConf), same(mockSimpleTestFlags)); verify(mockSimpleTestFlags, times(1)).ensembleSize(eq(10)); @@ -372,7 +378,9 @@ public void testListBookiesCmdNoArgs() throws Exception { assertEquals(1, shell.run(new String[] { "listbookies" })); - verifyNew(ListBookiesCommand.class, times(0)).withNoArguments(); + + listBookiesCommandMockedStatic.verify(() -> ListBookiesCommand.newListBookiesCommand(mockListBookiesFlags) + , times(0)); } @Test @@ -380,7 +388,8 @@ public void testListBookiesCmdConflictArgs() throws Exception { assertEquals(1, shell.run(new String[] { "listbookies", "-rw", "-ro" })); - verifyNew(ListBookiesCommand.class, times(0)).withNoArguments(); + listBookiesCommandMockedStatic.verify(() -> ListBookiesCommand.newListBookiesCommand(mockListBookiesFlags), + times(0)); } @Test @@ -388,12 +397,14 @@ public void testListBookiesCmdReadOnly() throws Exception { assertEquals(0, shell.run(new String[] { "listbookies", "-ro" })); - verifyNew(ListBookiesCommand.class, times(1)) - .withArguments(same(mockListBookiesFlags)); + + listBookiesCommandMockedStatic.verify(() -> ListBookiesCommand.newListBookiesCommand(mockListBookiesFlags), + times(1)); verify(mockListBookiesCommand, times(1)) .apply(same(shell.bkConf), same(mockListBookiesFlags)); verify(mockListBookiesFlags, times(1)).readonly(true); verify(mockListBookiesFlags, times(1)).readwrite(false); + verify(mockListBookiesFlags, times(1)).all(false); } @Test @@ -401,11 +412,63 @@ public void testListBookiesCmdReadWrite() throws Exception { assertEquals(0, shell.run(new String[] { "listbookies", "-rw" })); - verifyNew(ListBookiesCommand.class, times(1)) - .withArguments(same(mockListBookiesFlags)); + listBookiesCommandMockedStatic.verify(() -> ListBookiesCommand.newListBookiesCommand(mockListBookiesFlags), + times(1)); verify(mockListBookiesCommand, times(1)) .apply(same(shell.bkConf), same(mockListBookiesFlags)); verify(mockListBookiesFlags, times(1)).readonly(false); verify(mockListBookiesFlags, times(1)).readwrite(true); + verify(mockListBookiesFlags, times(1)).all(false); } + + @Test + public void testListBookiesCmdAll() throws Exception { + assertEquals(0, shell.run(new String[] { + "listbookies", "-a" + })); + listBookiesCommandMockedStatic.verify(() -> ListBookiesCommand.newListBookiesCommand(mockListBookiesFlags), + times(1)); + verify(mockListBookiesCommand, times(1)) + .apply(same(shell.bkConf), same(mockListBookiesFlags)); + verify(mockListBookiesFlags, times(1)).readonly(false); + verify(mockListBookiesFlags, times(1)).readwrite(false); + verify(mockListBookiesFlags, times(1)).all(true); + } + + @Test + public void testForceAuditChecksWithNoArgs() throws Exception { + assertEquals(-1, shell.run(new String[] { + "forceauditchecks" + })); + } + + @Test + public void testForceAuditChecksWithSomeArgs() throws Exception { + assertEquals(0, shell.run(new String[] { + "forceauditchecks", "-calc" + })); + } + + @Test + public void testForceAuditChecksWithAllArgs() throws Exception { + assertEquals(0, shell.run(new String[] { + "forceauditchecks", "-calc", "-rc", "-ppc" + })); + } + + @Test + public void testClusterInfoCmd() throws Exception { + ClusterInfoCommand mockClusterInfoCommand = spy(new ClusterInfoCommand()); + + @Cleanup + MockedStatic clusterInfoCommandMockedStatic = mockStatic(ClusterInfoCommand.class); + clusterInfoCommandMockedStatic.when(() -> ClusterInfoCommand.newClusterInfoCommand()) + .thenReturn(mockClusterInfoCommand); + + doReturn(true).when(mockClusterInfoCommand).apply(same(shell.bkConf), any(CliFlags.class)); + shell.run(new String[]{ "clusterinfo" }); + clusterInfoCommandMockedStatic.verify(() -> ClusterInfoCommand.newClusterInfoCommand(), + times(1)); + } + } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieShutdownTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieShutdownTest.java index 3fbb5c84035..7dd528f04be 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieShutdownTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieShutdownTest.java @@ -26,7 +26,6 @@ import java.util.Random; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; - import org.apache.bookkeeper.client.AsyncCallback.AddCallback; import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.client.BookKeeper.DigestType; @@ -84,9 +83,7 @@ public void testBookieRestartContinuously() throws Exception { + " and now going to fail bookie."); // Shutdown one Bookie server and restarting new one to continue // writing - bsConfs.remove(0); - bs.get(0).shutdown(); - bs.remove(0); + killBookie(0); startNewBookie(); LOG.info("Shutdown one bookie server and started new bookie server..."); } catch (BKException e) { @@ -118,11 +115,11 @@ public void addComplete(int rc, LedgerHandle lh, long entryId, */ @Test public void testBookieShutdownFromBookieThread() throws Exception { - ServerConfiguration conf = bsConfs.get(0); + ServerConfiguration conf = confByIndex(0); killBookie(0); final CountDownLatch latch = new CountDownLatch(1); final CountDownLatch shutdownComplete = new CountDownLatch(1); - Bookie bookie = new Bookie(conf) { + Bookie bookie = new TestBookieImpl(conf) { @Override public void run() { try { diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieStickyReadsTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieStickyReadsTest.java new file mode 100644 index 00000000000..99bcc78845f --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieStickyReadsTest.java @@ -0,0 +1,211 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import com.beust.jcommander.internal.Lists; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; +import lombok.Cleanup; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.client.BookKeeper; +import org.apache.bookkeeper.client.api.LedgerEntries; +import org.apache.bookkeeper.client.api.ReadHandle; +import org.apache.bookkeeper.client.api.WriteHandle; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.junit.Test; + +/** + * Tests of the main BookKeeper client. + */ +@Slf4j +public class BookieStickyReadsTest extends BookKeeperClusterTestCase { + + private static final int NUM_BOOKIES = 3; + + private static final String READ_ENTRY_SCHEDULING_DELAY_METRIC = "bookkeeper_server.READ_ENTRY_SCHEDULING_DELAY"; + + public BookieStickyReadsTest() { + super(NUM_BOOKIES); + } + + @Test + public void testNormalReads() throws Exception { + ClientConfiguration conf = new ClientConfiguration(baseClientConf); + + // Default should already be set to false + // conf.setStickyReadsEnabled(false); + + writeAndReadEntries(conf, 3, 3, 3); + + // All bookies should have received at least some read request + getBookieReadEntrySchedulingDelayStats().values() + .forEach(readRequests -> assertTrue(readRequests > 0)); + } + + @Test + public void testStickyFlagWithStriping() throws Exception { + ClientConfiguration conf = new ClientConfiguration(baseClientConf); + conf.setStickyReadsEnabled(true); + + writeAndReadEntries(conf, 3, 2, 2); + + // All bookies should have received at least some read request since we + // don't enable sticky reads when striping is enabled + getBookieReadEntrySchedulingDelayStats().values() + .forEach(readRequests -> assertTrue(readRequests > 0)); + } + + @Test + public void stickyReadsWithNoFailures() throws Exception { + ClientConfiguration conf = new ClientConfiguration(baseClientConf); + conf.setStickyReadsEnabled(true); + + writeAndReadEntries(conf, 3, 3, 3); + + // All read requests should have been made to a single bookie + Map stats = getBookieReadEntrySchedulingDelayStats(); + boolean foundBookieWithRequests = false; + for (long readRequests : stats.values()) { + if (readRequests > 0) { + assertFalse("Another bookie already had received requests", foundBookieWithRequests); + foundBookieWithRequests = true; + } + } + } + + @Test + public void stickyReadsWithFailures() throws Exception { + ClientConfiguration conf = new ClientConfiguration(baseClientConf); + conf.setStickyReadsEnabled(true); + + @Cleanup + BookKeeper bkc = new BookKeeper(conf); + + final int n = 10; + long ledgerId; + + try (WriteHandle wh = bkc.newCreateLedgerOp() + .withEnsembleSize(3) + .withWriteQuorumSize(3) + .withAckQuorumSize(3) + .withPassword("".getBytes()) + .execute() + .join()) { + ledgerId = wh.getId(); + + for (int i = 0; i < n; i++) { + wh.append(("entry-" + i).getBytes()); + } + } + + @Cleanup + ReadHandle rh = bkc.newOpenLedgerOp() + .withLedgerId(ledgerId) + .withPassword("".getBytes()) + .execute() + .join(); + + // Read 1 entry and detect which bookie was being used + @Cleanup + LedgerEntries entry0 = rh.read(0, 0); + assertArrayEquals("entry-0".getBytes(), entry0.getEntry(0).getEntryBytes()); + + // All read requests should have been made to a single bookie + int bookieWithRequests = -1; + for (int i = 0; i < NUM_BOOKIES; i++) { + long requests = getStatsProvider(i).getOpStatsLogger(READ_ENTRY_SCHEDULING_DELAY_METRIC) + .getSuccessCount(); + + log.info("Bookie {} --- requests: {}", i, requests); + + if (requests > 0) { + assertTrue("Another bookie already had received requests", bookieWithRequests == -1); + bookieWithRequests = i; + } + } + + // Suspend the sticky bookie. Reads should now go to a different sticky + // bookie + serverByIndex(bookieWithRequests).suspendProcessing(); + + for (int i = 0; i < n; i++) { + @Cleanup + LedgerEntries entries = rh.read(i, i); + + assertArrayEquals(("entry-" + i).getBytes(), entries.getEntry(i).getEntryBytes()); + } + + // At this point, we should have 1 bookie with 1 request (the initial + // request), and a second bookie with 10 requests. The 3rd bookie should + // have no requests + List requestCounts = Lists.newArrayList(getBookieReadEntrySchedulingDelayStats().values()); + Collections.sort(requestCounts); + + assertEquals(0, requestCounts.get(0).longValue()); + assertEquals(1, requestCounts.get(1).longValue()); + assertEquals(10, requestCounts.get(2).longValue()); + } + private Map getBookieReadEntrySchedulingDelayStats() throws Exception { + Map stats = new TreeMap<>(); + for (int i = 0; i < NUM_BOOKIES; i++) { + stats.put(i, getStatsProvider(i).getOpStatsLogger(READ_ENTRY_SCHEDULING_DELAY_METRIC) + .getSuccessCount()); + } + + return stats; + } + + private void writeAndReadEntries(ClientConfiguration conf, int ensembleSize, int writeQuorum, int ackQuorum) + throws Exception { + @Cleanup + BookKeeper bkc = new BookKeeper(conf); + + @Cleanup + WriteHandle wh = bkc.newCreateLedgerOp() + .withEnsembleSize(ensembleSize) + .withWriteQuorumSize(writeQuorum) + .withAckQuorumSize(ackQuorum) + .withPassword("".getBytes()) + .execute() + .join(); + + final int n = 10; + + for (int i = 0; i < n; i++) { + wh.append(("entry-" + i).getBytes()); + } + + for (int i = 0; i < n; i++) { + @Cleanup + LedgerEntries entries = wh.read(i, i); + + assertArrayEquals(("entry-" + i).getBytes(), entries.getEntry(i).getEntryBytes()); + } + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieStorageThresholdTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieStorageThresholdTest.java index 6f7d1c9887f..b242a0d8663 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieStorageThresholdTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieStorageThresholdTest.java @@ -26,10 +26,10 @@ import static org.junit.Assert.fail; import java.io.File; +import java.util.Collections; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; - import org.apache.bookkeeper.bookie.LedgerDirsManager.LedgerDirsListener; import org.apache.bookkeeper.bookie.LedgerDirsManager.NoWritableLedgerDirException; import org.apache.bookkeeper.client.BookKeeper.DigestType; @@ -144,24 +144,23 @@ public float checkDir(File dir) throws DiskErrorException, DiskOutOfSpaceExcepti public void testStorageThresholdCompaction() throws Exception { stopAllBookies(); ServerConfiguration conf = newServerConfiguration(); - File ledgerDir1 = createTempDir("ledger", "test1"); - File ledgerDir2 = createTempDir("ledger", "test2"); - File journalDir = createTempDir("journal", "test"); + File ledgerDir1 = tmpDirs.createNew("ledger", "test1"); + File ledgerDir2 = tmpDirs.createNew("ledger", "test2"); + File journalDir = tmpDirs.createNew("journal", "test"); String[] ledgerDirNames = new String[]{ ledgerDir1.getPath(), ledgerDir2.getPath() }; conf.setLedgerDirNames(ledgerDirNames); conf.setJournalDirName(journalDir.getPath()); - BookieServer server = startBookie(conf); - bs.add(server); - bsConfs.add(conf); - Bookie bookie = server.getBookie(); - // since we are going to set dependency injected ledgermonitor, so we need to shutdown - // the ledgermonitor which was created as part of the initialization of Bookie - bookie.ledgerMonitor.shutdown(); - LedgerDirsManager ledgerDirsManager = bookie.getLedgerDirsManager(); + BookieServer server = startAndAddBookie(conf).getServer(); + BookieImpl bookie = (BookieImpl) server.getBookie(); + // since we are going to set dependency injected dirsMonitor, so we need to shutdown + // the dirsMonitor which was created as part of the initialization of Bookie + bookie.dirsMonitor.shutdown(); + + LedgerDirsManager ledgerDirsManager = ((BookieImpl) bookie).getLedgerDirsManager(); // flag latches final CountDownLatch diskWritable = new CountDownLatch(1); @@ -183,13 +182,11 @@ public void diskFull(File disk) { // Dependency Injected class ThresholdTestDiskChecker thresholdTestDiskChecker = new ThresholdTestDiskChecker( baseConf.getDiskUsageThreshold(), baseConf.getDiskUsageWarnThreshold()); - LedgerDirsMonitor ledgerDirsMonitor = new LedgerDirsMonitor(baseConf, thresholdTestDiskChecker, - ledgerDirsManager); - // set the ledgermonitor and idxmonitor and initiate/start it - bookie.ledgerMonitor = ledgerDirsMonitor; - bookie.idxMonitor = ledgerDirsMonitor; - bookie.ledgerMonitor.init(); - bookie.ledgerMonitor.start(); + bookie.dirsMonitor = new LedgerDirsMonitor(baseConf, thresholdTestDiskChecker, + Collections.singletonList(ledgerDirsManager)); + // set the dirsMonitor and initiate/start it + bookie.dirsMonitor.init(); + bookie.dirsMonitor.start(); // create ledgers and add fragments LedgerHandle[] lhs = prepareData(3); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieThreadTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieThreadTest.java index adc187a9e2f..1f237f7249a 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieThreadTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieThreadTest.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -19,7 +19,6 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; - import org.junit.Assert; import org.junit.Test; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieWriteToJournalTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieWriteToJournalTest.java index 2d197d39fa0..0907a625282 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieWriteToJournalTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieWriteToJournalTest.java @@ -26,58 +26,76 @@ import static org.junit.Assert.assertSame; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; -import static org.powermock.api.mockito.PowerMockito.whenNew; +import static org.mockito.Mockito.mockStatic; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; import java.io.File; +import java.io.IOException; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import lombok.Cleanup; import lombok.extern.slf4j.Slf4j; import org.apache.bookkeeper.client.api.BKException; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.conf.TestBKConfiguration; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback; +import org.apache.bookkeeper.util.DiskChecker; import org.apache.commons.lang3.mutable.MutableBoolean; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; +import org.mockito.MockedStatic; import org.mockito.invocation.InvocationOnMock; +import org.mockito.junit.MockitoJUnitRunner; import org.mockito.stubbing.Answer; -import org.powermock.core.classloader.annotations.PrepareForTest; -import org.powermock.modules.junit4.PowerMockRunner; /** * Test the bookie journal. */ -@RunWith(PowerMockRunner.class) -@PrepareForTest({Bookie.class}) +@RunWith(MockitoJUnitRunner.Silent.class) @Slf4j public class BookieWriteToJournalTest { @Rule public TemporaryFolder tempDir = new TemporaryFolder(); + class NoOpJournalReplayBookie extends TestBookieImpl { + + public NoOpJournalReplayBookie(ServerConfiguration conf) + throws Exception { + super(conf); + } + + @Override + void readJournal() throws IOException, BookieException { + // Should be no-op since journal objects are mocked + } + } + /** * test that Bookie calls correctly Journal.logAddEntry about "ackBeforeSync" parameter. */ + @Test public void testJournalLogAddEntryCalledCorrectly() throws Exception { File journalDir = tempDir.newFolder(); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(journalDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); File ledgerDir = tempDir.newFolder(); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(ledgerDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); conf.setJournalDirName(journalDir.getPath()) .setLedgerDirNames(new String[]{ledgerDir.getPath()}) .setMetadataServiceUri(null); - BookieSocketAddress bookieAddress = Bookie.getBookieAddress(conf); + + BookieId bookieAddress = BookieImpl.getBookieId(conf); CountDownLatch journalJoinLatch = new CountDownLatch(1); Journal journal = mock(Journal.class); MutableBoolean effectiveAckBeforeSync = new MutableBoolean(false); @@ -100,9 +118,12 @@ public void testJournalLogAddEntryCalledCorrectly() throws Exception { return null; }).when(journal).joinThread(); - whenNew(Journal.class).withAnyArguments().thenReturn(journal); + @Cleanup + MockedStatic journalMockedStatic = mockStatic(Journal.class); + journalMockedStatic.when(() -> Journal.newJournal(anyInt(), any(), any(), any(), any(), any(), any())) + .thenReturn(journal); - Bookie b = new Bookie(conf); + Bookie b = new NoOpJournalReplayBookie(conf); b.start(); long ledgerId = 1; @@ -114,7 +135,7 @@ public void testJournalLogAddEntryCalledCorrectly() throws Exception { final ByteBuf data = buildEntry(ledgerId, entryId, -1); final long expectedEntryId = entryId; b.addEntry(data, ackBeforeSync, (int rc, long ledgerId1, long entryId1, - BookieSocketAddress addr, Object ctx) -> { + BookieId addr, Object ctx) -> { assertSame(expectedCtx, ctx); assertEquals(ledgerId, ledgerId1); assertEquals(expectedEntryId, entryId1); @@ -136,14 +157,14 @@ public void testJournalLogAddEntryCalledCorrectly() throws Exception { public void testForceLedger() throws Exception { File journalDir = tempDir.newFolder(); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(journalDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); File ledgerDir = tempDir.newFolder(); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(ledgerDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); conf.setJournalDirName(journalDir.getPath()) .setLedgerDirNames(new String[]{ledgerDir.getPath()}); - Bookie b = new Bookie(conf); + Bookie b = new TestBookieImpl(conf); b.start(); long ledgerId = 1; @@ -157,7 +178,7 @@ public void testForceLedger() throws Exception { final ByteBuf data = buildEntry(ledgerId, entryId, -1); final long expectedEntryId = entryId; b.forceLedger(ledgerId, (int rc, long ledgerId1, long entryId1, - BookieSocketAddress addr, Object ctx) -> { + BookieId addr, Object ctx) -> { if (rc != BKException.Code.OK) { latchForceLedger1.completeExceptionally(org.apache.bookkeeper.client.BKException.create(rc)); return; @@ -167,7 +188,7 @@ public void testForceLedger() throws Exception { result(latchForceLedger1); b.addEntry(data, true /* ackBeforesync */, (int rc, long ledgerId1, long entryId1, - BookieSocketAddress addr, Object ctx) -> { + BookieId addr, Object ctx) -> { if (rc != BKException.Code.OK) { latchAddEntry.completeExceptionally(org.apache.bookkeeper.client.BKException.create(rc)); return; @@ -178,7 +199,7 @@ public void testForceLedger() throws Exception { // issue a new "forceLedger" b.forceLedger(ledgerId, (int rc, long ledgerId1, long entryId1, - BookieSocketAddress addr, Object ctx) -> { + BookieId addr, Object ctx) -> { if (rc != BKException.Code.OK) { latchForceLedger2.completeExceptionally(org.apache.bookkeeper.client.BKException.create(rc)); return; @@ -190,6 +211,33 @@ public void testForceLedger() throws Exception { b.shutdown(); } + @Test + public void testSmallJournalQueueWithHighFlushFrequency() throws IOException, InterruptedException { + ServerConfiguration conf = new ServerConfiguration(); + conf.setJournalQueueSize(1); + conf.setJournalFlushWhenQueueEmpty(true); + conf.setJournalBufferedWritesThreshold(1); + + conf.setJournalDirName(tempDir.newFolder().getPath()); + conf.setLedgerDirNames(new String[]{tempDir.newFolder().getPath()}); + DiskChecker diskChecker = new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), diskChecker); + Journal journal = new Journal(0, conf.getJournalDirs()[0], conf, ledgerDirsManager); + journal.start(); + + final int entries = 1000; + CountDownLatch entriesLatch = new CountDownLatch(entries); + for (int j = 1; j <= entries; j++) { + ByteBuf entry = buildEntry(1, j, -1); + journal.logAddEntry(entry, false, (int rc, long ledgerId, long entryId, BookieId addr, Object ctx) -> { + entriesLatch.countDown(); + }, null); + } + entriesLatch.await(); + + journal.shutdown(); + } + private static ByteBuf buildEntry(long ledgerId, long entryId, long lastAddConfirmed) { final ByteBuf data = Unpooled.buffer(); data.writeLong(ledgerId); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BufferedChannelTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BufferedChannelTest.java index 86f3a8643f0..cd3e34d35e3 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BufferedChannelTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BufferedChannelTest.java @@ -23,11 +23,11 @@ import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; +import io.netty.buffer.UnpooledByteBufAllocator; import java.io.File; import java.io.RandomAccessFile; import java.nio.channels.FileChannel; import java.util.Random; - import org.junit.Assert; import org.junit.Test; @@ -76,8 +76,8 @@ public void testBufferedChannel(int byteBufLength, int numOfWrites, int unpersis newLogFile.deleteOnExit(); FileChannel fileChannel = new RandomAccessFile(newLogFile, "rw").getChannel(); - BufferedChannel logChannel = new BufferedChannel(fileChannel, INTERNAL_BUFFER_WRITE_CAPACITY, - INTERNAL_BUFFER_READ_CAPACITY, unpersistedBytesBound); + BufferedChannel logChannel = new BufferedChannel(UnpooledByteBufAllocator.DEFAULT, fileChannel, + INTERNAL_BUFFER_WRITE_CAPACITY, INTERNAL_BUFFER_READ_CAPACITY, unpersistedBytesBound); ByteBuf dataBuf = generateEntry(byteBufLength); dataBuf.markReaderIndex(); @@ -119,7 +119,9 @@ public void testBufferedChannel(int byteBufLength, int numOfWrites, int unpersis expectedNumOfUnpersistedBytes = (byteBufLength * numOfWrites) - unpersistedBytesBound; } - Assert.assertEquals("Unpersisted bytes", expectedNumOfUnpersistedBytes, logChannel.getUnpersistedBytes()); + if (unpersistedBytesBound > 0) { + Assert.assertEquals("Unpersisted bytes", expectedNumOfUnpersistedBytes, logChannel.getUnpersistedBytes()); + } logChannel.close(); fileChannel.close(); } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CheckpointOnNewLedgersTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CheckpointOnNewLedgersTest.java index e0a8289ad46..be8ad075d5b 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CheckpointOnNewLedgersTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CheckpointOnNewLedgersTest.java @@ -27,6 +27,7 @@ import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; +import io.netty.util.ReferenceCountUtil; import java.io.File; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ThreadLocalRandom; @@ -51,15 +52,15 @@ public class CheckpointOnNewLedgersTest { public final TemporaryFolder testDir = new TemporaryFolder(); private ServerConfiguration conf; - private Bookie bookie; + private BookieImpl bookie; private CountDownLatch getLedgerDescCalledLatch; private CountDownLatch getLedgerDescWaitLatch; @Before public void setup() throws Exception { File bkDir = testDir.newFolder("dbLedgerStorageCheckpointTest"); - File curDir = Bookie.getCurrentDirectory(bkDir); - Bookie.checkDirectoryStructure(curDir); + File curDir = BookieImpl.getCurrentDirectory(bkDir); + BookieImpl.checkDirectoryStructure(curDir); int gcWaitTime = 1000; conf = TestBKConfiguration.newServerConfiguration(); @@ -69,7 +70,7 @@ public void setup() throws Exception { conf.setLedgerDirNames(new String[] { bkDir.toString() }); conf.setEntryLogSizeLimit(10 * 1024); - bookie = spy(new Bookie(conf)); + bookie = spy(new TestBookieImpl(conf)); bookie.start(); getLedgerDescCalledLatch = new CountDownLatch(1); @@ -175,7 +176,7 @@ public void testCheckpoint() throws Exception { t1.join(); // construct a new bookie to simulate "bookie restart from crash" - Bookie newBookie = new Bookie(conf); + Bookie newBookie = new TestBookieImpl(conf); newBookie.start(); for (int i = 0; i < numEntries; i++) { @@ -183,14 +184,14 @@ public void testCheckpoint() throws Exception { assertNotNull(entry); assertEquals(l2, entry.readLong()); assertEquals((long) i, entry.readLong()); - entry.release(); + ReferenceCountUtil.release(entry); } ByteBuf entry = newBookie.readEntry(l1, 0L); assertNotNull(entry); assertEquals(l1, entry.readLong()); assertEquals(0L, entry.readLong()); - entry.release(); + ReferenceCountUtil.release(entry); newBookie.shutdown(); } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/ClusterInfoCommandTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/ClusterInfoCommandTest.java new file mode 100644 index 00000000000..2e1bb55ce1b --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/ClusterInfoCommandTest.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.bookie; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.apache.bookkeeper.tools.cli.commands.bookies.ClusterInfoCommand; +import org.apache.bookkeeper.tools.framework.CliFlags; +import org.junit.Test; + +/** + * Integration test of {@link org.apache.bookkeeper.tools.cli.commands.bookies.ClusterInfoCommand}. + */ +public class ClusterInfoCommandTest extends BookKeeperClusterTestCase { + + public ClusterInfoCommandTest() { + super(1); + } + + @Test + public void testClusterInfo() throws Exception { + ClusterInfoCommand clusterInfoCommand = new ClusterInfoCommand(); + final ServerConfiguration conf = confByIndex(0); + + assertNull(clusterInfoCommand.info()); + + clusterInfoCommand.apply(conf, new CliFlags()); + + assertNotNull(clusterInfoCommand.info()); + ClusterInfoCommand.ClusterInfo info = clusterInfoCommand.info(); + assertEquals(1, info.getTotalBookiesCount()); + assertEquals(1, info.getWritableBookiesCount()); + assertEquals(0, info.getReadonlyBookiesCount()); + assertEquals(0, info.getUnavailableBookiesCount()); + assertFalse(info.isAuditorElected()); + assertEquals("", info.getAuditorId()); + assertFalse(info.isClusterUnderReplicated()); + assertTrue(info.isLedgerReplicationEnabled()); + } + +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionByBytesTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionByBytesTest.java deleted file mode 100644 index 29303d742d5..00000000000 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionByBytesTest.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * - */ -package org.apache.bookkeeper.bookie; - -/** - * Test compaction by bytes. - */ -public class CompactionByBytesTest extends CompactionTest { - public CompactionByBytesTest() { - super(true); - } -} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionByBytesWithMetadataCacheTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionByBytesWithMetadataCacheTest.java new file mode 100644 index 00000000000..8f22f625175 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionByBytesWithMetadataCacheTest.java @@ -0,0 +1,30 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie; + +/** + * Test compaction by bytes. + */ +public class CompactionByBytesWithMetadataCacheTest extends CompactionTest { + public CompactionByBytesWithMetadataCacheTest() { + super(true, true); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionByBytesWithoutMetadataCacheTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionByBytesWithoutMetadataCacheTest.java new file mode 100644 index 00000000000..7b1418dab94 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionByBytesWithoutMetadataCacheTest.java @@ -0,0 +1,30 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie; + +/** + * Test compaction by bytes. + */ +public class CompactionByBytesWithoutMetadataCacheTest extends CompactionTest { + public CompactionByBytesWithoutMetadataCacheTest() { + super(true, false); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionByEntriesTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionByEntriesTest.java deleted file mode 100644 index df871a3d497..00000000000 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionByEntriesTest.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * - */ -package org.apache.bookkeeper.bookie; - -/** - * Test compactions by entries. - */ -public class CompactionByEntriesTest extends CompactionTest { - public CompactionByEntriesTest() { - super(false); - } -} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionByEntriesWithMetadataCacheTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionByEntriesWithMetadataCacheTest.java new file mode 100644 index 00000000000..86a1f3bd980 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionByEntriesWithMetadataCacheTest.java @@ -0,0 +1,30 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie; + +/** + * Test compactions by entries. + */ +public class CompactionByEntriesWithMetadataCacheTest extends CompactionTest { + public CompactionByEntriesWithMetadataCacheTest() { + super(false, true); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionByEntriesWithoutMetadataCacheTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionByEntriesWithoutMetadataCacheTest.java new file mode 100644 index 00000000000..2a02eb26b05 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionByEntriesWithoutMetadataCacheTest.java @@ -0,0 +1,30 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie; + +/** + * Test compactions by entries. + */ +public class CompactionByEntriesWithoutMetadataCacheTest extends CompactionTest { + public CompactionByEntriesWithoutMetadataCacheTest() { + super(false, false); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionTest.java index dbf09d98143..e8982bf0e46 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionTest.java @@ -37,31 +37,34 @@ import com.google.common.util.concurrent.UncheckedExecutionException; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; - +import io.netty.buffer.UnpooledByteBufAllocator; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Enumeration; -import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Set; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Supplier; +import org.apache.bookkeeper.bookie.BookieException.EntryLogMetadataMapException; import org.apache.bookkeeper.bookie.LedgerDirsManager.NoWritableLedgerDirException; +import org.apache.bookkeeper.bookie.storage.CompactionEntryLog; +import org.apache.bookkeeper.bookie.storage.ldb.PersistentEntryLogMetadataMap; import org.apache.bookkeeper.client.BookKeeper; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.client.LedgerEntry; import org.apache.bookkeeper.client.LedgerHandle; -import org.apache.bookkeeper.client.LedgerMetadata; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.conf.TestBKConfiguration; import org.apache.bookkeeper.meta.LedgerManager; import org.apache.bookkeeper.proto.BookieServer; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.LedgerMetadataListener; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.Processor; import org.apache.bookkeeper.proto.checksum.DigestManager; @@ -69,10 +72,9 @@ import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.apache.bookkeeper.test.TestStatsProvider; import org.apache.bookkeeper.util.DiskChecker; -import org.apache.bookkeeper.util.HardLink; -import org.apache.bookkeeper.util.MathUtils; import org.apache.bookkeeper.util.TestUtils; import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; import org.apache.zookeeper.AsyncCallback; import org.junit.Before; import org.junit.Test; @@ -99,11 +101,13 @@ public abstract class CompactionTest extends BookKeeperClusterTestCase { private final long minorCompactionInterval; private final long majorCompactionInterval; private final String msg; + private final boolean useMetadataCache; - public CompactionTest(boolean isByBytes) { + public CompactionTest(boolean isByBytes, boolean useMetadataCache) { super(NUM_BOOKIES); this.isThrottleByBytes = isByBytes; + this.useMetadataCache = useMetadataCache; this.digestType = DigestType.CRC32; this.passwdBytes = "".getBytes(); numEntries = 100; @@ -137,14 +141,14 @@ public void setUp() throws Exception { baseConf.setLedgerStorageClass(InterleavedLedgerStorage.class.getName()); baseConf.setIsThrottleByBytes(this.isThrottleByBytes); baseConf.setIsForceGCAllowWhenNoSpace(false); - + baseConf.setGcEntryLogMetadataCacheEnabled(useMetadataCache); super.setUp(); } - private GarbageCollectorThread getGCThread() { - assertEquals(1, bs.size()); - BookieServer server = bs.get(0); - return ((InterleavedLedgerStorage) server.getBookie().ledgerStorage).gcThread; + private GarbageCollectorThread getGCThread() throws Exception { + assertEquals(1, bookieCount()); + BookieServer server = serverByIndex(0); + return ((InterleavedLedgerStorage) server.getBookie().getLedgerStorage()).gcThread; } LedgerHandle[] prepareData(int numEntryLogs, boolean changeNum) @@ -197,11 +201,12 @@ public void testDisableCompaction() throws Exception { LedgerHandle[] lhs = prepareData(3, false); // disable compaction - baseConf.setMinorCompactionThreshold(0.0f); - baseConf.setMajorCompactionThreshold(0.0f); - // restart bookies - restartBookies(baseConf); + restartBookies(c -> { + c.setMinorCompactionThreshold(0.0f); + c.setMajorCompactionThreshold(0.0f); + return c; + }); long lastMinorCompactionTime = getGCThread().lastMinorCompactionTime; long lastMajorCompactionTime = getGCThread().lastMajorCompactionTime; @@ -215,24 +220,63 @@ public void testDisableCompaction() throws Exception { assertFalse(getGCThread().enableMajorCompaction); assertFalse(getGCThread().enableMinorCompaction); getGCThread().triggerGC().get(); + if (useMetadataCache) { + assertTrue(getGCThread().getEntryLogMetaMap() instanceof PersistentEntryLogMetadataMap); + } // after garbage collection, compaction should not be executed assertEquals(lastMinorCompactionTime, getGCThread().lastMinorCompactionTime); assertEquals(lastMajorCompactionTime, getGCThread().lastMajorCompactionTime); // entry logs ([0,1].log) should not be compacted. - for (File ledgerDirectory : tmpDirs) { + for (File ledgerDirectory : bookieLedgerDirs()) { assertTrue("Not Found entry log file ([0,1].log that should have been compacted in ledgerDirectory: " + ledgerDirectory, TestUtils.hasLogFiles(ledgerDirectory, false, 0, 1)); } } + @Test + public void testForceGarbageCollectionWhenDisableCompactionConfigurationSettings() throws Exception { + // prepare data + LedgerHandle[] lhs = prepareData(3, false); + + restartBookies(c -> { + c.setForceAllowCompaction(true); + c.setMajorCompactionThreshold(0.5f); + c.setMinorCompactionThreshold(0.2f); + c.setMajorCompactionInterval(0); + c.setMinorCompactionInterval(0); + return c; + }); + + assertFalse(getGCThread().enableMajorCompaction); + assertFalse(getGCThread().enableMinorCompaction); + assertTrue(getGCThread().isForceMajorCompactionAllow); + assertTrue(getGCThread().isForceMinorCompactionAllow); + + assertEquals(0.5f, getGCThread().majorCompactionThreshold, 0f); + assertEquals(0.2f, getGCThread().minorCompactionThreshold, 0f); + } + @Test public void testForceGarbageCollection() throws Exception { + testForceGarbageCollection(true); + testForceGarbageCollection(false); + } + + public void testForceGarbageCollection(boolean isForceCompactionAllowWhenDisableCompaction) throws Exception { ServerConfiguration conf = newServerConfiguration(); conf.setGcWaitTime(60000); - conf.setMinorCompactionInterval(120000); - conf.setMajorCompactionInterval(240000); + if (isForceCompactionAllowWhenDisableCompaction) { + conf.setMinorCompactionInterval(0); + conf.setMajorCompactionInterval(0); + conf.setForceAllowCompaction(true); + conf.setMajorCompactionThreshold(0.5f); + conf.setMinorCompactionThreshold(0.2f); + } else { + conf.setMinorCompactionInterval(120000); + conf.setMajorCompactionInterval(240000); + } LedgerDirsManager dirManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); CheckpointSource cp = new CheckpointSource() { @@ -249,10 +293,10 @@ public void checkpointComplete(Checkpoint checkPoint, boolean compact) } }; for (File journalDir : conf.getJournalDirs()) { - Bookie.checkDirectoryStructure(journalDir); + BookieImpl.checkDirectoryStructure(journalDir); } for (File dir : dirManager.getAllLedgerDirs()) { - Bookie.checkDirectoryStructure(dir); + BookieImpl.checkDirectoryStructure(dir); } runFunctionWithLedgerManagerFactory(conf, lmf -> { try (LedgerManager lm = lmf.newLedgerManager()) { @@ -262,12 +306,13 @@ public void checkpointComplete(Checkpoint checkPoint, boolean compact) lm, dirManager, dirManager, - null, - cp, - Checkpointer.NULL, - NullStatsLogger.INSTANCE); + NullStatsLogger.INSTANCE, + UnpooledByteBufAllocator.DEFAULT); + storage.setCheckpointSource(cp); + storage.setCheckpointer(Checkpointer.NULL); + storage.start(); - long startTime = MathUtils.now(); + long startTime = System.currentTimeMillis(); storage.gcThread.enableForceGC(); storage.gcThread.triggerGC().get(); //major storage.gcThread.triggerGC().get(); //minor @@ -284,6 +329,74 @@ public void checkpointComplete(Checkpoint checkPoint, boolean compact) }); } + @Test + public void testForceGarbageCollectionWhenDiskIsFull() throws Exception { + testForceGarbageCollectionWhenDiskIsFull(true); + testForceGarbageCollectionWhenDiskIsFull(false); + } + + public void testForceGarbageCollectionWhenDiskIsFull(boolean isForceCompactionAllowWhenDisableCompaction) + throws Exception { + + restartBookies(conf -> { + if (isForceCompactionAllowWhenDisableCompaction) { + conf.setMinorCompactionInterval(0); + conf.setMajorCompactionInterval(0); + conf.setForceAllowCompaction(true); + conf.setMajorCompactionThreshold(0.5f); + conf.setMinorCompactionThreshold(0.2f); + } else { + conf.setMinorCompactionInterval(120000); + conf.setMajorCompactionInterval(240000); + } + return conf; + }); + + getGCThread().suspendMajorGC(); + getGCThread().suspendMinorGC(); + long majorCompactionCntBeforeGC = 0; + long minorCompactionCntBeforeGC = 0; + long majorCompactionCntAfterGC = 0; + long minorCompactionCntAfterGC = 0; + + // disable forceMajor and forceMinor + majorCompactionCntBeforeGC = getGCThread().getGarbageCollectionStatus().getMajorCompactionCounter(); + minorCompactionCntBeforeGC = getGCThread().getGarbageCollectionStatus().getMinorCompactionCounter(); + getGCThread().triggerGC(true, true, true).get(); + majorCompactionCntAfterGC = getGCThread().getGarbageCollectionStatus().getMajorCompactionCounter(); + minorCompactionCntAfterGC = getGCThread().getGarbageCollectionStatus().getMinorCompactionCounter(); + assertEquals(majorCompactionCntBeforeGC, majorCompactionCntAfterGC); + assertEquals(minorCompactionCntBeforeGC, minorCompactionCntAfterGC); + + // enable forceMajor and forceMinor + majorCompactionCntBeforeGC = getGCThread().getGarbageCollectionStatus().getMajorCompactionCounter(); + minorCompactionCntBeforeGC = getGCThread().getGarbageCollectionStatus().getMinorCompactionCounter(); + getGCThread().triggerGC(true, false, false).get(); + majorCompactionCntAfterGC = getGCThread().getGarbageCollectionStatus().getMajorCompactionCounter(); + minorCompactionCntAfterGC = getGCThread().getGarbageCollectionStatus().getMinorCompactionCounter(); + assertEquals(majorCompactionCntBeforeGC + 1, majorCompactionCntAfterGC); + assertEquals(minorCompactionCntBeforeGC, minorCompactionCntAfterGC); + + // enable forceMajor and disable forceMinor + majorCompactionCntBeforeGC = getGCThread().getGarbageCollectionStatus().getMajorCompactionCounter(); + minorCompactionCntBeforeGC = getGCThread().getGarbageCollectionStatus().getMinorCompactionCounter(); + getGCThread().triggerGC(true, false, true).get(); + majorCompactionCntAfterGC = getGCThread().getGarbageCollectionStatus().getMajorCompactionCounter(); + minorCompactionCntAfterGC = getGCThread().getGarbageCollectionStatus().getMinorCompactionCounter(); + assertEquals(majorCompactionCntBeforeGC + 1, majorCompactionCntAfterGC); + assertEquals(minorCompactionCntBeforeGC, minorCompactionCntAfterGC); + + // disable forceMajor and enable forceMinor + majorCompactionCntBeforeGC = getGCThread().getGarbageCollectionStatus().getMajorCompactionCounter(); + minorCompactionCntBeforeGC = getGCThread().getGarbageCollectionStatus().getMinorCompactionCounter(); + getGCThread().triggerGC(true, true, false).get(); + majorCompactionCntAfterGC = getGCThread().getGarbageCollectionStatus().getMajorCompactionCounter(); + minorCompactionCntAfterGC = getGCThread().getGarbageCollectionStatus().getMinorCompactionCounter(); + assertEquals(majorCompactionCntBeforeGC, majorCompactionCntAfterGC); + assertEquals(minorCompactionCntBeforeGC + 1, minorCompactionCntAfterGC); + + } + @Test public void testMinorCompaction() throws Exception { // prepare data @@ -293,14 +406,96 @@ public void testMinorCompaction() throws Exception { lh.close(); } - // disable major compaction - baseConf.setMajorCompactionThreshold(0.0f); - baseConf.setGcWaitTime(60000); - baseConf.setMinorCompactionInterval(120000); - baseConf.setMajorCompactionInterval(240000); + // restart bookies + restartBookies(c -> { + // disable major compaction + c.setMajorCompactionThreshold(0.0f); + c.setGcWaitTime(60000); + c.setMinorCompactionInterval(120000); + c.setMajorCompactionInterval(240000); + return c; + }); + + + getGCThread().enableForceGC(); + getGCThread().triggerGC().get(); + if (useMetadataCache) { + assertTrue(getGCThread().getEntryLogMetaMap() instanceof PersistentEntryLogMetadataMap); + } + assertTrue( + "ACTIVE_ENTRY_LOG_COUNT should have been updated", + getStatsProvider(0) + .getGauge("bookie.gc." + ACTIVE_ENTRY_LOG_COUNT) + .getSample().intValue() > 0); + assertTrue( + "ACTIVE_ENTRY_LOG_SPACE_BYTES should have been updated", + getStatsProvider(0) + .getGauge("bookie.gc." + ACTIVE_ENTRY_LOG_SPACE_BYTES) + .getSample().intValue() > 0); + + long lastMinorCompactionTime = getGCThread().lastMinorCompactionTime; + long lastMajorCompactionTime = getGCThread().lastMajorCompactionTime; + assertFalse(getGCThread().enableMajorCompaction); + assertTrue(getGCThread().enableMinorCompaction); + + // remove ledger2 and ledger3 + bkc.deleteLedger(lhs[1].getId()); + bkc.deleteLedger(lhs[2].getId()); + + LOG.info("Finished deleting the ledgers contains most entries."); + getGCThread().enableForceGC(); + getGCThread().triggerGC().get(); + + // after garbage collection, major compaction should not be executed + assertEquals(lastMajorCompactionTime, getGCThread().lastMajorCompactionTime); + assertTrue(getGCThread().lastMinorCompactionTime > lastMinorCompactionTime); + + // entry logs ([0,1,2].log) should be compacted. + for (File ledgerDirectory : bookieLedgerDirs()) { + assertFalse("Found entry log file ([0,1,2].log that should have not been compacted in ledgerDirectory: " + + ledgerDirectory, TestUtils.hasLogFiles(ledgerDirectory, true, 0, 1, 2)); + } + // even though entry log files are removed, we still can access entries for ledger1 + // since those entries have been compacted to a new entry log + verifyLedger(lhs[0].getId(), 0, lhs[0].getLastAddConfirmed()); + + assertTrue( + "RECLAIMED_COMPACTION_SPACE_BYTES should have been updated", + getStatsProvider(0) + .getCounter("bookie.gc." + RECLAIMED_COMPACTION_SPACE_BYTES) + .get().intValue() > 0); + assertTrue( + "RECLAIMED_DELETION_SPACE_BYTES should have been updated", + getStatsProvider(0) + .getCounter("bookie.gc." + RECLAIMED_DELETION_SPACE_BYTES) + .get().intValue() > 0); + } + + @Test + public void testMinorCompactionWithMaxTimeMillisOk() throws Exception { + // prepare data + LedgerHandle[] lhs = prepareData(6, false); + + for (LedgerHandle lh : lhs) { + lh.close(); + } + + // disable major compaction // restart bookies - restartBookies(baseConf); + restartBookies(c-> { + c.setMajorCompactionThreshold(0.0f); + c.setGcWaitTime(60000); + c.setMinorCompactionInterval(120000); + c.setMajorCompactionInterval(240000); + + // Setup limit on compaction duration. + // The limit is enough to compact. + c.setMinorCompactionMaxTimeMillis(5000); + c.setMajorCompactionMaxTimeMillis(5000); + + return c; + }); getGCThread().enableForceGC(); getGCThread().triggerGC().get(); @@ -333,7 +528,7 @@ public void testMinorCompaction() throws Exception { assertTrue(getGCThread().lastMinorCompactionTime > lastMinorCompactionTime); // entry logs ([0,1,2].log) should be compacted. - for (File ledgerDirectory : tmpDirs) { + for (File ledgerDirectory : tmpDirs.getDirs()) { assertFalse("Found entry log file ([0,1,2].log that should have not been compacted in ledgerDirectory: " + ledgerDirectory, TestUtils.hasLogFiles(ledgerDirectory, true, 0, 1, 2)); } @@ -354,31 +549,238 @@ public void testMinorCompaction() throws Exception { .get().intValue() > 0); } + @Test - public void testMinorCompactionWithNoWritableLedgerDirs() throws Exception { + public void testMinorCompactionWithMaxTimeMillisTooShort() throws Exception { // prepare data - LedgerHandle[] lhs = prepareData(3, false); + LedgerHandle[] lhs = prepareData(6, false); for (LedgerHandle lh : lhs) { lh.close(); } // disable major compaction - baseConf.setMajorCompactionThreshold(0.0f); - baseConf.setGcWaitTime(60000); - baseConf.setMinorCompactionInterval(120000); - baseConf.setMajorCompactionInterval(240000); + // restart bookies + restartBookies(c-> { + c.setMajorCompactionThreshold(0.0f); + c.setGcWaitTime(60000); + c.setMinorCompactionInterval(120000); + c.setMajorCompactionInterval(240000); + + // Setup limit on compaction duration. + // The limit is not enough to finish the compaction + c.setMinorCompactionMaxTimeMillis(1); + c.setMajorCompactionMaxTimeMillis(1); + + return c; + }); + + getGCThread().enableForceGC(); + getGCThread().triggerGC().get(); + assertTrue( + "ACTIVE_ENTRY_LOG_COUNT should have been updated", + getStatsProvider(0) + .getGauge("bookie.gc." + ACTIVE_ENTRY_LOG_COUNT) + .getSample().intValue() > 0); + assertTrue( + "ACTIVE_ENTRY_LOG_SPACE_BYTES should have been updated", + getStatsProvider(0) + .getGauge("bookie.gc." + ACTIVE_ENTRY_LOG_SPACE_BYTES) + .getSample().intValue() > 0); + + long lastMinorCompactionTime = getGCThread().lastMinorCompactionTime; + long lastMajorCompactionTime = getGCThread().lastMajorCompactionTime; + assertFalse(getGCThread().enableMajorCompaction); + assertTrue(getGCThread().enableMinorCompaction); + + // remove ledger2 and ledger3 + bkc.deleteLedger(lhs[1].getId()); + bkc.deleteLedger(lhs[2].getId()); + + LOG.info("Finished deleting the ledgers contains most entries."); + getGCThread().enableForceGC(); + getGCThread().triggerGC().get(); + + // after garbage collection, major compaction should not be executed + assertEquals(lastMajorCompactionTime, getGCThread().lastMajorCompactionTime); + assertTrue(getGCThread().lastMinorCompactionTime > lastMinorCompactionTime); + + // entry logs ([0,1,2].log) should be compacted. + for (File ledgerDirectory : tmpDirs.getDirs()) { + // Compaction of at least one of the files should not finish up + assertTrue("Not found entry log file ([0,1,2].log that should not have been compacted in ledgerDirectory: " + + ledgerDirectory, TestUtils.hasLogFiles(ledgerDirectory, true, 0, 1, 2)); + } + + verifyLedger(lhs[0].getId(), 0, lhs[0].getLastAddConfirmed()); + } + + @Test + public void testForceMinorCompaction() throws Exception { + // prepare data + LedgerHandle[] lhs = prepareData(3, false); + + for (LedgerHandle lh : lhs) { + lh.close(); + } // restart bookies - restartBookies(baseConf); + restartBookies(c-> { + c.setMajorCompactionThreshold(0.0f); + c.setGcWaitTime(60000); + c.setMinorCompactionInterval(-1); + c.setMajorCompactionInterval(-1); + c.setForceAllowCompaction(true); + return c; + }); + + getGCThread().enableForceGC(); + getGCThread().triggerGC().get(); + assertTrue( + "ACTIVE_ENTRY_LOG_COUNT should have been updated", + getStatsProvider(0) + .getGauge("bookie.gc." + ACTIVE_ENTRY_LOG_COUNT) + .getSample().intValue() > 0); + assertTrue( + "ACTIVE_ENTRY_LOG_SPACE_BYTES should have been updated", + getStatsProvider(0) + .getGauge("bookie.gc." + ACTIVE_ENTRY_LOG_SPACE_BYTES) + .getSample().intValue() > 0); + + long lastMinorCompactionTime = getGCThread().lastMinorCompactionTime; + long lastMajorCompactionTime = getGCThread().lastMajorCompactionTime; + assertFalse(getGCThread().enableMajorCompaction); + assertFalse(getGCThread().enableMinorCompaction); + + // remove ledger2 and ledger3 + bkc.deleteLedger(lhs[1].getId()); + bkc.deleteLedger(lhs[2].getId()); + + LOG.info("Finished deleting the ledgers contains most entries."); + getGCThread().enableForceGC(); + getGCThread().triggerGC().get(); + + // after garbage collection, major compaction should not be executed + assertEquals(lastMajorCompactionTime, getGCThread().lastMajorCompactionTime); + assertTrue(getGCThread().lastMinorCompactionTime > lastMinorCompactionTime); + + // entry logs ([0,1,2].log) should be compacted. + for (File ledgerDirectory : tmpDirs.getDirs()) { + assertFalse("Found entry log file ([0,1,2].log that should have not been compacted in ledgerDirectory: " + + ledgerDirectory, TestUtils.hasLogFiles(ledgerDirectory, true, 0, 1, 2)); + } + + // even though entry log files are removed, we still can access entries for ledger1 + // since those entries have been compacted to a new entry log + verifyLedger(lhs[0].getId(), 0, lhs[0].getLastAddConfirmed()); + + assertTrue( + "RECLAIMED_COMPACTION_SPACE_BYTES should have been updated", + getStatsProvider(0) + .getCounter("bookie.gc." + RECLAIMED_COMPACTION_SPACE_BYTES) + .get().intValue() > 0); + assertTrue( + "RECLAIMED_DELETION_SPACE_BYTES should have been updated", + getStatsProvider(0) + .getCounter("bookie.gc." + RECLAIMED_DELETION_SPACE_BYTES) + .get().intValue() > 0); + } + + @Test + public void testMinorCompactionWithEntryLogPerLedgerEnabled() throws Exception { + // restart bookies + restartBookies(c-> { + c.setMajorCompactionThreshold(0.0f); + c.setGcWaitTime(60000); + c.setMinorCompactionInterval(120000); + c.setMajorCompactionInterval(240000); + c.setForceAllowCompaction(true); + c.setEntryLogPerLedgerEnabled(true); + return c; + }); + + // prepare data + LedgerHandle[] lhs = prepareData(3, false); + + for (LedgerHandle lh : lhs) { + lh.close(); + } + + long lastMinorCompactionTime = getGCThread().lastMinorCompactionTime; + long lastMajorCompactionTime = getGCThread().lastMajorCompactionTime; + assertFalse(getGCThread().enableMajorCompaction); + assertTrue(getGCThread().enableMinorCompaction); + + // remove ledgers 1 and 2 + bkc.deleteLedger(lhs[1].getId()); + bkc.deleteLedger(lhs[2].getId()); + + // Need to wait until entry log 3 gets flushed before initiating GC to satisfy assertions. + while (!getGCThread().entryLogger.getFlushedLogIds().contains(3L)) { + TimeUnit.MILLISECONDS.sleep(100); + } + + LOG.info("Finished deleting the ledgers contains most entries."); + getGCThread().triggerGC(true, false, false).get(); + + assertEquals(lastMajorCompactionTime, getGCThread().lastMajorCompactionTime); + assertTrue(getGCThread().lastMinorCompactionTime > lastMinorCompactionTime); + + // At this point, we have the following state of ledgers end entry logs: + // L0 (not deleted) -> E0 (un-flushed): Entry log should exist. + // L1 (deleted) -> E1 (un-flushed): Entry log should exist as un-flushed entry logs are not considered for GC. + // L2 (deleted) -> E2 (flushed): Entry log should have been garbage collected. + // E3 (flushed): Entry log should have been garbage collected. + // E4 (un-flushed): Entry log should exist as un-flushed entry logs are not considered for GC. + assertTrue("Not found entry log files [0, 1, 4].log that should not have been compacted in: " + + tmpDirs.getDirs().get(0), TestUtils.hasAllLogFiles(tmpDirs.getDirs().get(0), 0, 1, 4)); + assertTrue("Found entry log files [2, 3].log that should have been compacted in ledgerDirectory: " + + tmpDirs.getDirs().get(0), TestUtils.hasNoneLogFiles(tmpDirs.getDirs().get(0), 2, 3)); + + // Now, let's mark E1 as flushed, as its ledger L1 has been deleted already. In this case, the GC algorithm + // should consider it for deletion. + ((DefaultEntryLogger) getGCThread().entryLogger).recentlyCreatedEntryLogsStatus.flushRotatedEntryLog(1L); + getGCThread().triggerGC(true, false, false).get(); + assertTrue("Found entry log file 1.log that should have been compacted in ledgerDirectory: " + + tmpDirs.getDirs().get(0), TestUtils.hasNoneLogFiles(tmpDirs.getDirs().get(0), 1)); + + // Once removed the ledger L0, then deleting E0 is fine (only if it has been flushed). + bkc.deleteLedger(lhs[0].getId()); + getGCThread().triggerGC(true, false, false).get(); + assertTrue("Found entry log file 0.log that should not have been compacted in ledgerDirectory: " + + tmpDirs.getDirs().get(0), TestUtils.hasAllLogFiles(tmpDirs.getDirs().get(0), 0)); + ((DefaultEntryLogger) getGCThread().entryLogger).recentlyCreatedEntryLogsStatus.flushRotatedEntryLog(0L); + getGCThread().triggerGC(true, false, false).get(); + assertTrue("Found entry log file 0.log that should have been compacted in ledgerDirectory: " + + tmpDirs.getDirs().get(0), TestUtils.hasNoneLogFiles(tmpDirs.getDirs().get(0), 0)); + } + + @Test + public void testMinorCompactionWithNoWritableLedgerDirs() throws Exception { + // prepare data + LedgerHandle[] lhs = prepareData(3, false); + + for (LedgerHandle lh : lhs) { + lh.close(); + } + + // restart bookies + restartBookies(c -> { + // disable major compaction + c.setMajorCompactionThreshold(0.0f); + c.setGcWaitTime(60000); + c.setMinorCompactionInterval(120000); + c.setMajorCompactionInterval(240000); + return c; + }); long lastMinorCompactionTime = getGCThread().lastMinorCompactionTime; long lastMajorCompactionTime = getGCThread().lastMajorCompactionTime; assertFalse(getGCThread().enableMajorCompaction); assertTrue(getGCThread().enableMinorCompaction); - for (BookieServer bookieServer : bs) { - Bookie bookie = bookieServer.getBookie(); + for (int i = 0; i < bookieCount(); i++) { + BookieImpl bookie = ((BookieImpl) serverByIndex(i).getBookie()); LedgerDirsManager ledgerDirsManager = bookie.getLedgerDirsManager(); List ledgerDirs = ledgerDirsManager.getAllLedgerDirs(); // if all the discs are full then Major and Minor compaction would be disabled since @@ -394,13 +796,15 @@ public void testMinorCompactionWithNoWritableLedgerDirs() throws Exception { LOG.info("Finished deleting the ledgers contains most entries."); getGCThread().triggerGC().get(); - + if (useMetadataCache) { + assertTrue(getGCThread().getEntryLogMetaMap() instanceof PersistentEntryLogMetadataMap); + } // after garbage collection, major compaction should not be executed assertEquals(lastMajorCompactionTime, getGCThread().lastMajorCompactionTime); assertEquals(lastMinorCompactionTime, getGCThread().lastMinorCompactionTime); // entry logs ([0,1,2].log) should still remain, because both major and Minor compaction are disabled. - for (File ledgerDirectory : tmpDirs) { + for (File ledgerDirectory : bookieLedgerDirs()) { assertTrue( "All the entry log files ([0,1,2].log are not available, which is not expected" + ledgerDirectory, TestUtils.hasLogFiles(ledgerDirectory, false, 0, 1, 2)); @@ -422,18 +826,16 @@ public void testMinorCompactionWithNoWritableLedgerDirsButIsForceGCAllowWhenNoSp // We need at least 2 ledger dirs because compaction will flush ledger cache, and will // trigger relocateIndexFileAndFlushHeader. If we only have one ledger dir, compaction will always fail // when there's no writeable ledger dir. - File ledgerDir1 = createTempDir("ledger", "test1"); - File ledgerDir2 = createTempDir("ledger", "test2"); - File journalDir = createTempDir("journal", "test"); + File ledgerDir1 = tmpDirs.createNew("ledger", "test1"); + File ledgerDir2 = tmpDirs.createNew("ledger", "test2"); + File journalDir = tmpDirs.createNew("journal", "test"); String[] ledgerDirNames = new String[]{ ledgerDir1.getPath(), ledgerDir2.getPath() }; conf.setLedgerDirNames(ledgerDirNames); conf.setJournalDirName(journalDir.getPath()); - BookieServer server = startBookie(conf); - bs.add(server); - bsConfs.add(conf); + BookieServer server = startAndAddBookie(conf).getServer(); // prepare data LedgerHandle[] lhs = prepareData(3, false); @@ -445,11 +847,13 @@ public void testMinorCompactionWithNoWritableLedgerDirsButIsForceGCAllowWhenNoSp long lastMajorCompactionTime = getGCThread().lastMajorCompactionTime; assertFalse(getGCThread().enableMajorCompaction); assertTrue(getGCThread().enableMinorCompaction); - - for (BookieServer bookieServer : bs) { - Bookie bookie = bookieServer.getBookie(); - bookie.ledgerStorage.flush(); - bookie.ledgerMonitor.shutdown(); + if (useMetadataCache) { + assertTrue(getGCThread().getEntryLogMetaMap() instanceof PersistentEntryLogMetadataMap); + } + for (int i = 0; i < bookieCount(); i++) { + BookieImpl bookie = ((BookieImpl) serverByIndex(i).getBookie()); + bookie.getLedgerStorage().flush(); + bookie.dirsMonitor.shutdown(); LedgerDirsManager ledgerDirsManager = bookie.getLedgerDirsManager(); List ledgerDirs = ledgerDirsManager.getAllLedgerDirs(); // Major and Minor compaction are not disabled even though discs are full. Check LedgerDirsListener of @@ -474,7 +878,7 @@ public void testMinorCompactionWithNoWritableLedgerDirsButIsForceGCAllowWhenNoSp // allocating newlog // we get getWritableLedgerDirsForNewLog() of ledgerDirsManager instead of getWritableLedgerDirs() // entry logs ([0,1,2].log) should be compacted. - for (File ledgerDirectory : server.getBookie().getLedgerDirsManager().getAllLedgerDirs()) { + for (File ledgerDirectory : ((BookieImpl) server.getBookie()).getLedgerDirsManager().getAllLedgerDirs()) { assertFalse("Found entry log file ([0,1,2].log that should have not been compacted in ledgerDirectory: " + ledgerDirectory, TestUtils.hasLogFiles(ledgerDirectory.getParentFile(), true, 0, 1, 2)); } @@ -484,8 +888,8 @@ public void testMinorCompactionWithNoWritableLedgerDirsButIsForceGCAllowWhenNoSp verifyLedger(lhs[0].getId(), 0, lhs[0].getLastAddConfirmed()); // for the sake of validity of test lets make sure that there is no writableLedgerDir in the bookies - for (BookieServer bookieServer : bs) { - Bookie bookie = bookieServer.getBookie(); + for (int i = 0; i < bookieCount(); i++) { + BookieImpl bookie = (BookieImpl) serverByIndex(i).getBookie(); LedgerDirsManager ledgerDirsManager = bookie.getLedgerDirsManager(); try { List ledgerDirs = ledgerDirsManager.getWritableLedgerDirs(); @@ -507,14 +911,15 @@ public void testMajorCompaction() throws Exception { lh.close(); } - // disable minor compaction - baseConf.setMinorCompactionThreshold(0.0f); - baseConf.setGcWaitTime(60000); - baseConf.setMinorCompactionInterval(120000); - baseConf.setMajorCompactionInterval(240000); - // restart bookies - restartBookies(baseConf); + restartBookies(c -> { + // disable minor compaction + c.setMinorCompactionThreshold(0.0f); + c.setGcWaitTime(60000); + c.setMinorCompactionInterval(120000); + c.setMajorCompactionInterval(240000); + return c; + }); long lastMinorCompactionTime = getGCThread().lastMinorCompactionTime; long lastMajorCompactionTime = getGCThread().lastMajorCompactionTime; @@ -527,13 +932,15 @@ public void testMajorCompaction() throws Exception { LOG.info("Finished deleting the ledgers contains most entries."); getGCThread().enableForceGC(); getGCThread().triggerGC().get(); - + if (useMetadataCache) { + assertTrue(getGCThread().getEntryLogMetaMap() instanceof PersistentEntryLogMetadataMap); + } // after garbage collection, minor compaction should not be executed assertTrue(getGCThread().lastMinorCompactionTime > lastMinorCompactionTime); assertTrue(getGCThread().lastMajorCompactionTime > lastMajorCompactionTime); // entry logs ([0,1,2].log) should be compacted - for (File ledgerDirectory : tmpDirs) { + for (File ledgerDirectory : bookieLedgerDirs()) { assertFalse("Found entry log file ([0,1,2].log that should have not been compacted in ledgerDirectory: " + ledgerDirectory, TestUtils.hasLogFiles(ledgerDirectory, true, 0, 1, 2)); } @@ -543,6 +950,58 @@ public void testMajorCompaction() throws Exception { verifyLedger(lhs[1].getId(), 0, lhs[1].getLastAddConfirmed()); } + @Test + public void testForceMajorCompaction() throws Exception { + + // prepare data + LedgerHandle[] lhs = prepareData(3, true); + + for (LedgerHandle lh : lhs) { + lh.close(); + } + + // restart bookies + restartBookies(c-> { + // disable minor compaction + c.setMinorCompactionThreshold(0.0f); + c.setGcWaitTime(60000); + c.setMinorCompactionInterval(-1); + c.setMajorCompactionInterval(-1); + c.setForceAllowCompaction(true); + return c; + }); + + long lastMinorCompactionTime = getGCThread().lastMinorCompactionTime; + long lastMajorCompactionTime = getGCThread().lastMajorCompactionTime; + assertFalse(getGCThread().enableMajorCompaction); + assertFalse(getGCThread().enableMinorCompaction); + assertTrue(getGCThread().isForceMajorCompactionAllow); + assertFalse(getGCThread().isForceMinorCompactionAllow); + + // remove ledger1 and ledger3 + bkc.deleteLedger(lhs[0].getId()); + bkc.deleteLedger(lhs[2].getId()); + LOG.info("Finished deleting the ledgers contains most entries."); + getGCThread().enableForceGC(); + getGCThread().triggerGC().get(); + if (useMetadataCache) { + assertTrue(getGCThread().getEntryLogMetaMap() instanceof PersistentEntryLogMetadataMap); + } + // after garbage collection, minor compaction should not be executed + assertTrue(getGCThread().lastMinorCompactionTime > lastMinorCompactionTime); + assertTrue(getGCThread().lastMajorCompactionTime > lastMajorCompactionTime); + + // entry logs ([0,1,2].log) should be compacted + for (File ledgerDirectory : tmpDirs.getDirs()) { + assertFalse("Found entry log file ([0,1,2].log that should have not been compacted in ledgerDirectory: " + + ledgerDirectory, TestUtils.hasLogFiles(ledgerDirectory, true, 0, 1, 2)); + } + + // even entry log files are removed, we still can access entries for ledger2 + // since those entries has been compacted to new entry log + verifyLedger(lhs[1].getId(), 0, lhs[1].getLastAddConfirmed()); + } + @Test public void testCompactionPersistence() throws Exception { /* @@ -562,14 +1021,15 @@ public void testCompactionPersistence() throws Exception { lh.close(); } - // disable minor compaction - baseConf.setMinorCompactionThreshold(0.0f); - baseConf.setGcWaitTime(60000); - baseConf.setMinorCompactionInterval(120000); - baseConf.setMajorCompactionInterval(240000); - // restart bookies - restartBookies(baseConf); + restartBookies(c -> { + // disable minor compaction + c.setMinorCompactionThreshold(0.0f); + c.setGcWaitTime(60000); + c.setMinorCompactionInterval(120000); + c.setMajorCompactionInterval(240000); + return c; + }); long lastMinorCompactionTime = getGCThread().lastMinorCompactionTime; long lastMajorCompactionTime = getGCThread().lastMajorCompactionTime; @@ -588,7 +1048,7 @@ public void testCompactionPersistence() throws Exception { assertTrue(getGCThread().lastMajorCompactionTime > lastMajorCompactionTime); // entry logs ([0,1,2].log) should be compacted - for (File ledgerDirectory : tmpDirs) { + for (File ledgerDirectory : bookieLedgerDirs()) { assertFalse("Found entry log file ([0,1,2].log that should have not been compacted in ledgerDirectory: " + ledgerDirectory, TestUtils.hasLogFiles(ledgerDirectory, true, 0, 1, 2)); } @@ -604,7 +1064,7 @@ public void testCompactionPersistence() throws Exception { * there is only one bookie in the cluster so we should be able to read * entries from this bookie. */ - ServerConfiguration bookieServerConfig = bs.get(0).getBookie().conf; + ServerConfiguration bookieServerConfig = ((BookieImpl) serverByIndex(0).getBookie()).conf; ServerConfiguration newBookieConf = new ServerConfiguration(bookieServerConfig); /* * by reusing bookieServerConfig and setting metadataServiceUri to null @@ -613,10 +1073,13 @@ public void testCompactionPersistence() throws Exception { * purpose. */ newBookieConf.setMetadataServiceUri(null); - Bookie newbookie = new Bookie(newBookieConf); + String entryLogCachePath = tmpDirs.createNew("entry", "bk2").getAbsolutePath(); + newBookieConf.setGcEntryLogMetadataCachePath(entryLogCachePath); + Bookie newbookie = new TestBookieImpl(newBookieConf); DigestManager digestManager = DigestManager.instantiate(ledgerId, passwdBytes, - BookKeeper.DigestType.toProtoDigestType(digestType), baseClientConf.getUseV2WireProtocol()); + BookKeeper.DigestType.toProtoDigestType(digestType), UnpooledByteBufAllocator.DEFAULT, + baseClientConf.getUseV2WireProtocol()); for (long entryId = 0; entryId <= lastAddConfirmed; entryId++) { ByteBuf readEntryBufWithChecksum = newbookie.readEntry(ledgerId, entryId); @@ -633,8 +1096,8 @@ public void testCompactionWhenLedgerDirsAreFull() throws Exception { * for this test scenario we are assuming that there will be only one * bookie in the cluster */ - assertEquals("Numbers of Bookies in this cluster", 1, bsConfs.size()); - ServerConfiguration serverConfig = bsConfs.get(0); + assertEquals("Numbers of Bookies in this cluster", 1, bookieCount()); + ServerConfiguration serverConfig = confByIndex(0); File ledgerDir = serverConfig.getLedgerDirs()[0]; assertEquals("Number of Ledgerdirs for this bookie", 1, serverConfig.getLedgerDirs().length); assertTrue("indexdirs should be configured to null", null == serverConfig.getIndexDirs()); @@ -650,7 +1113,7 @@ public void testCompactionWhenLedgerDirsAreFull() throws Exception { lh.close(); } - bs.get(0).getBookie().getLedgerStorage().flush(); + serverByIndex(0).getBookie().getLedgerStorage().flush(); assertTrue( "entry log file ([0,1,2].log should be available in ledgerDirectory: " + serverConfig.getLedgerDirs()[0], @@ -659,18 +1122,6 @@ public void testCompactionWhenLedgerDirsAreFull() throws Exception { long usableSpace = ledgerDir.getUsableSpace(); long totalSpace = ledgerDir.getTotalSpace(); - baseConf.setForceReadOnlyBookie(true); - baseConf.setIsForceGCAllowWhenNoSpace(true); - // disable minor compaction - baseConf.setMinorCompactionThreshold(0.0f); - baseConf.setGcWaitTime(60000); - baseConf.setMinorCompactionInterval(120000); - baseConf.setMajorCompactionInterval(240000); - baseConf.setMinUsableSizeForEntryLogCreation(1); - baseConf.setMinUsableSizeForIndexFileCreation(1); - baseConf.setDiskUsageThreshold((1.0f - ((float) usableSpace / (float) totalSpace)) * 0.9f); - baseConf.setDiskUsageWarnThreshold(0.0f); - /* * because of the value set for diskUsageThreshold, when bookie is * restarted it wouldn't find any writableledgerdir. But we have set @@ -680,10 +1131,23 @@ public void testCompactionWhenLedgerDirsAreFull() throws Exception { */ // restart bookies - restartBookies(baseConf); + restartBookies(c -> { + c.setForceReadOnlyBookie(true); + c.setIsForceGCAllowWhenNoSpace(true); + // disable minor compaction + c.setMinorCompactionThreshold(0.0f); + c.setGcWaitTime(60000); + c.setMinorCompactionInterval(120000); + c.setMajorCompactionInterval(240000); + c.setMinUsableSizeForEntryLogCreation(1); + c.setMinUsableSizeForIndexFileCreation(1); + c.setDiskUsageThreshold((1.0f - ((float) usableSpace / (float) totalSpace)) * 0.9f); + c.setDiskUsageWarnThreshold(0.0f); + return c; + }); assertFalse("There shouldn't be any writable ledgerDir", - bs.get(0).getBookie().getLedgerDirsManager().hasWritableLedgerDirs()); + ((BookieImpl) serverByIndex(0).getBookie()).getLedgerDirsManager().hasWritableLedgerDirs()); long lastMinorCompactionTime = getGCThread().lastMinorCompactionTime; long lastMajorCompactionTime = getGCThread().lastMajorCompactionTime; @@ -696,7 +1160,9 @@ public void testCompactionWhenLedgerDirsAreFull() throws Exception { LOG.info("Finished deleting the ledgers contains most entries."); getGCThread().enableForceGC(); getGCThread().triggerGC().get(); - + if (useMetadataCache) { + assertTrue(getGCThread().getEntryLogMetaMap() instanceof PersistentEntryLogMetadataMap); + } // after garbage collection, minor compaction should not be executed assertTrue(getGCThread().lastMinorCompactionTime > lastMinorCompactionTime); assertTrue(getGCThread().lastMajorCompactionTime > lastMajorCompactionTime); @@ -707,7 +1173,7 @@ public void testCompactionWhenLedgerDirsAreFull() throws Exception { */ // entry logs ([0,1,2].log) should be compacted - for (File ledgerDirectory : tmpDirs) { + for (File ledgerDirectory : bookieLedgerDirs()) { assertFalse("Found entry log file ([0,1,2].log that should have not been compacted in ledgerDirectory: " + ledgerDirectory, TestUtils.hasLogFiles(ledgerDirectory, true, 0, 1, 2)); } @@ -746,7 +1212,7 @@ public void testMajorCompactionAboveThreshold() throws Exception { assertTrue(getGCThread().lastMajorCompactionTime > lastMajorCompactionTime); // entry logs ([0,1,2].log) should not be compacted - for (File ledgerDirectory : tmpDirs) { + for (File ledgerDirectory : bookieLedgerDirs()) { assertTrue("Not Found entry log file ([1,2].log that should have been compacted in ledgerDirectory: " + ledgerDirectory, TestUtils.hasLogFiles(ledgerDirectory, false, 0, 1, 2)); } @@ -781,10 +1247,12 @@ public void testCompactionSmallEntryLogs() throws Exception { getGCThread().enableForceGC(); getGCThread().triggerGC().get(); - + if (useMetadataCache) { + assertTrue(getGCThread().getEntryLogMetaMap() instanceof PersistentEntryLogMetadataMap); + } // entry logs (0.log) should not be compacted // entry logs ([1,2,3].log) should be compacted. - for (File ledgerDirectory : tmpDirs) { + for (File ledgerDirectory : bookieLedgerDirs()) { assertTrue("Not Found entry log file ([0].log that should have been compacted in ledgerDirectory: " + ledgerDirectory, TestUtils.hasLogFiles(ledgerDirectory, true, 0)); assertFalse("Found entry log file ([1,2,3].log that should have not been compacted in ledgerDirectory: " @@ -809,12 +1277,12 @@ public void testCompactionSafety() throws Exception { final Set ledgers = Collections.newSetFromMap(new ConcurrentHashMap()); LedgerManager manager = getLedgerManager(ledgers); - File tmpDir = createTempDir("bkTest", ".dir"); - File curDir = Bookie.getCurrentDirectory(tmpDir); - Bookie.checkDirectoryStructure(curDir); + File tmpDir = tmpDirs.createNew("bkTest", ".dir"); + File curDir = BookieImpl.getCurrentDirectory(tmpDir); + BookieImpl.checkDirectoryStructure(curDir); conf.setLedgerDirNames(new String[] {tmpDir.toString()}); - conf.setEntryLogSizeLimit(EntryLogger.LOGFILE_HEADER_SIZE + 3 * (4 + ENTRY_SIZE)); + conf.setEntryLogSizeLimit(DefaultEntryLogger.LOGFILE_HEADER_SIZE + 3 * (4 + ENTRY_SIZE)); conf.setGcWaitTime(100); conf.setMinorCompactionThreshold(0.7f); conf.setMajorCompactionThreshold(0.0f); @@ -857,10 +1325,11 @@ public void checkpointComplete(CheckpointSource.Checkpoint checkpoint, boolean c manager, dirs, dirs, - null, - checkpointSource, - Checkpointer.NULL, - NullStatsLogger.INSTANCE); + NullStatsLogger.INSTANCE, + UnpooledByteBufAllocator.DEFAULT); + storage.setCheckpointSource(checkpointSource); + storage.setCheckpointer(Checkpointer.NULL); + ledgers.add(1L); ledgers.add(2L); ledgers.add(3L); @@ -882,10 +1351,12 @@ public void checkpointComplete(CheckpointSource.Checkpoint checkpoint, boolean c storage.initialize( conf, manager, - dirs, dirs, null, - checkpointSource, - Checkpointer.NULL, - NullStatsLogger.INSTANCE); + dirs, dirs, + NullStatsLogger.INSTANCE, + UnpooledByteBufAllocator.DEFAULT); + storage.setCheckpointSource(checkpointSource); + storage.setCheckpointer(Checkpointer.NULL); + storage.start(); for (int i = 0; i < 10; i++) { if (!log0.exists()) { @@ -907,33 +1378,93 @@ public void checkpointComplete(CheckpointSource.Checkpoint checkpoint, boolean c manager, dirs, dirs, - null, - checkpointSource, - Checkpointer.NULL, - NullStatsLogger.INSTANCE); + NullStatsLogger.INSTANCE, + UnpooledByteBufAllocator.DEFAULT); + storage.setCheckpointSource(checkpointSource); + storage.setCheckpointer(Checkpointer.NULL); + storage.getEntry(1, 1); // entry should exist } + @Test + public void testCancelledCompactionWhenShuttingDown() throws Exception { + // prepare data + LedgerHandle[] lhs = prepareData(3, false); + + // change compaction in low throughput + // restart bookies + restartBookies(c -> { + c.setIsThrottleByBytes(true); + c.setCompactionRateByBytes(ENTRY_SIZE / 1000); + c.setMinorCompactionThreshold(0.2f); + c.setMajorCompactionThreshold(0.5f); + return c; + }); + + // remove ledger2 and ledger3 + // so entry log 1 and 2 would have ledger1 entries left + bkc.deleteLedger(lhs[1].getId()); + bkc.deleteLedger(lhs[2].getId()); + LOG.info("Finished deleting the ledgers contains most entries."); + + getGCThread().triggerGC(true, false, false); + getGCThread().throttler.cancelledAcquire(); + waitUntilTrue(() -> { + try { + return getGCThread().compacting.get(); + } catch (Exception e) { + fail("Get GC thread failed"); + } + return null; + }, () -> "Not attempting to complete", 10000, 200); + + getGCThread().shutdown(); + // after garbage collection shutdown, compaction should be cancelled when acquire permits + // and GC running flag should be false. + assertFalse(getGCThread().running); + + } + + private void waitUntilTrue(Supplier condition, + Supplier msg, + long waitTime, + long pause) throws InterruptedException { + long startTime = System.currentTimeMillis(); + while (true) { + if (condition.get()) { + return; + } + if (System.currentTimeMillis() > startTime + waitTime) { + fail(msg.get()); + } + Thread.sleep(Math.min(waitTime, pause)); + } + } + private LedgerManager getLedgerManager(final Set ledgers) { LedgerManager manager = new LedgerManager() { @Override - public void createLedgerMetadata(long lid, LedgerMetadata metadata, - GenericCallback cb) { + public CompletableFuture> createLedgerMetadata(long lid, + LedgerMetadata metadata) { unsupported(); + return null; } @Override - public void removeLedgerMetadata(long ledgerId, Version version, - GenericCallback vb) { + public CompletableFuture removeLedgerMetadata(long ledgerId, Version version) { unsupported(); + return null; } @Override - public void readLedgerMetadata(long ledgerId, GenericCallback readCb) { + public CompletableFuture> readLedgerMetadata(long ledgerId) { unsupported(); + return null; } @Override - public void writeLedgerMetadata(long ledgerId, LedgerMetadata metadata, - GenericCallback cb) { + public CompletableFuture> writeLedgerMetadata(long ledgerId, + LedgerMetadata metadata, + Version currentVersion) { unsupported(); + return null; } @Override public void asyncProcessLedgers(Processor processor, @@ -958,8 +1489,9 @@ void unsupported() { LOG.error("Unsupported operation called", new Exception()); throw new RuntimeException("Unsupported op"); } + @Override - public LedgerRangeIterator getLedgerRanges() { + public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) { final AtomicBoolean hasnext = new AtomicBoolean(true); return new LedgerManager.LedgerRangeIterator() { @Override @@ -985,9 +1517,9 @@ public LedgerManager.LedgerRange next() throws IOException { public void testWhenNoLogsToCompact() throws Exception { tearDown(); // I dont want the test infrastructure ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); - File tmpDir = createTempDir("bkTest", ".dir"); - File curDir = Bookie.getCurrentDirectory(tmpDir); - Bookie.checkDirectoryStructure(curDir); + File tmpDir = tmpDirs.createNew("bkTest", ".dir"); + File curDir = BookieImpl.getCurrentDirectory(tmpDir); + BookieImpl.checkDirectoryStructure(curDir); conf.setLedgerDirNames(new String[] { tmpDir.toString() }); LedgerDirsManager dirs = new LedgerDirsManager(conf, conf.getLedgerDirs(), @@ -1013,82 +1545,16 @@ public void checkpointComplete(Checkpoint checkpoint, manager, dirs, dirs, - null, - checkpointSource, - Checkpointer.NULL, - NullStatsLogger.INSTANCE); + NullStatsLogger.INSTANCE, + UnpooledByteBufAllocator.DEFAULT); + storage.setCheckpointSource(checkpointSource); + storage.setCheckpointer(Checkpointer.NULL); double threshold = 0.1; - // shouldn't throw exception - storage.gcThread.doCompactEntryLogs(threshold); - } - - /** - * Test extractMetaFromEntryLogs optimized method to avoid excess memory usage. - */ - public void testExtractMetaFromEntryLogs() throws Exception { - // Always run this test with Throttle enabled. - baseConf.setIsThrottleByBytes(true); - // restart bookies - restartBookies(baseConf); - ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); - File tmpDir = createTempDir("bkTest", ".dir"); - File curDir = Bookie.getCurrentDirectory(tmpDir); - Bookie.checkDirectoryStructure(curDir); - conf.setLedgerDirNames(new String[] { tmpDir.toString() }); + long limit = 0; - LedgerDirsManager dirs = new LedgerDirsManager(conf, conf.getLedgerDirs(), - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - final Set ledgers = Collections - .newSetFromMap(new ConcurrentHashMap()); - - LedgerManager manager = getLedgerManager(ledgers); - - CheckpointSource checkpointSource = new CheckpointSource() { - - @Override - public Checkpoint newCheckpoint() { - return null; - } - - @Override - public void checkpointComplete(Checkpoint checkpoint, - boolean compact) throws IOException { - } - }; - InterleavedLedgerStorage storage = new InterleavedLedgerStorage(); - storage.initialize(conf, manager, dirs, dirs, null, checkpointSource, - Checkpointer.NULL, NullStatsLogger.INSTANCE); - - for (long ledger = 0; ledger <= 10; ledger++) { - ledgers.add(ledger); - for (int entry = 1; entry <= 50; entry++) { - try { - storage.addEntry(genEntry(ledger, entry, ENTRY_SIZE)); - } catch (IOException e) { - //ignore exception on failure to add entry. - } - } - } - - storage.flush(); - storage.shutdown(); - - storage = new InterleavedLedgerStorage(); - storage.initialize(conf, manager, dirs, dirs, null, checkpointSource, - Checkpointer.NULL, NullStatsLogger.INSTANCE); - - long startingEntriesCount = storage.gcThread.entryLogger.getLeastUnflushedLogId() - - storage.gcThread.scannedLogId; - LOG.info("The old Log Entry count is: " + startingEntriesCount); - - Map entryLogMetaData = new HashMap<>(); - long finalEntriesCount = storage.gcThread.entryLogger.getLeastUnflushedLogId() - - storage.gcThread.scannedLogId; - LOG.info("The latest Log Entry count is: " + finalEntriesCount); - - assertTrue("The GC did not clean up entries...", startingEntriesCount != finalEntriesCount); - assertTrue("Entries Count is zero", finalEntriesCount == 0); + // shouldn't throw exception + storage.gcThread.doCompactEntryLogs(threshold, limit); } private ByteBuf genEntry(long ledger, long entry, int size) { @@ -1110,6 +1576,7 @@ public void testSuspendGarbageCollection() throws Exception { conf.setGcWaitTime(500); conf.setMinorCompactionInterval(1); conf.setMajorCompactionInterval(2); + conf.setMajorCompactionMaxTimeMillis(5000); runFunctionWithLedgerManagerFactory(conf, lmf -> { try (LedgerManager lm = lmf.newLedgerManager()) { testSuspendGarbageCollection(conf, lm); @@ -1138,10 +1605,10 @@ public void checkpointComplete(Checkpoint checkPoint, boolean compact) } }; for (File journalDir : conf.getJournalDirs()) { - Bookie.checkDirectoryStructure(journalDir); + BookieImpl.checkDirectoryStructure(journalDir); } for (File dir : dirManager.getAllLedgerDirs()) { - Bookie.checkDirectoryStructure(dir); + BookieImpl.checkDirectoryStructure(dir); } InterleavedLedgerStorage storage = new InterleavedLedgerStorage(); TestStatsProvider stats = new TestStatsProvider(); @@ -1150,16 +1617,18 @@ public void checkpointComplete(Checkpoint checkPoint, boolean compact) lm, dirManager, dirManager, - null, - cp, - Checkpointer.NULL, - stats.getStatsLogger("storage")); + stats.getStatsLogger("storage"), + UnpooledByteBufAllocator.DEFAULT); + storage.setCheckpointSource(cp); + storage.setCheckpointer(Checkpointer.NULL); + storage.start(); int majorCompactions = stats.getCounter("storage.gc." + MAJOR_COMPACTION_COUNT).get().intValue(); int minorCompactions = stats.getCounter("storage.gc." + MINOR_COMPACTION_COUNT).get().intValue(); - Thread.sleep(conf.getMajorCompactionInterval() * 1000 - + conf.getGcWaitTime()); + Thread.sleep(3 * (conf.getMajorCompactionInterval() * 1000 + + conf.getGcWaitTime() + + conf.getMajorCompactionMaxTimeMillis())); assertTrue( "Major compaction should have happened", stats.getCounter("storage.gc." + MAJOR_COMPACTION_COUNT).get() > majorCompactions); @@ -1168,7 +1637,7 @@ public void checkpointComplete(Checkpoint checkPoint, boolean compact) storage.gcThread.suspendMajorGC(); Thread.sleep(1000); - long startTime = MathUtils.now(); + long startTime = System.currentTimeMillis(); majorCompactions = stats.getCounter("storage.gc." + MAJOR_COMPACTION_COUNT).get().intValue(); Thread.sleep(conf.getMajorCompactionInterval() * 1000 + conf.getGcWaitTime()); @@ -1188,7 +1657,7 @@ public void checkpointComplete(Checkpoint checkPoint, boolean compact) storage.gcThread.suspendMinorGC(); Thread.sleep(1000); - startTime = MathUtils.now(); + startTime = System.currentTimeMillis(); minorCompactions = stats.getCounter("storage.gc." + MINOR_COMPACTION_COUNT).get().intValue(); Thread.sleep(conf.getMajorCompactionInterval() * 1000 + conf.getGcWaitTime()); @@ -1224,15 +1693,18 @@ public void testRecoverIndexWhenIndexIsPartiallyFlush() throws Exception { lh.close(); } - // disable compaction - baseConf.setMinorCompactionThreshold(0.0f); - baseConf.setMajorCompactionThreshold(0.0f); - baseConf.setGcWaitTime(600000); - // restart bookies - restartBookies(baseConf); + restartBookies(c -> { + // disable compaction + c.setMinorCompactionThreshold(0.0f); + c.setMajorCompactionThreshold(0.0f); + c.setGcWaitTime(600000); + return c; + }); + - Bookie bookie = bs.get(0).getBookie(); + + BookieImpl bookie = ((BookieImpl) serverByIndex(0).getBookie()); InterleavedLedgerStorage storage = (InterleavedLedgerStorage) bookie.ledgerStorage; // remove ledger2 and ledger3 @@ -1242,7 +1714,7 @@ public void testRecoverIndexWhenIndexIsPartiallyFlush() throws Exception { LOG.info("Finished deleting the ledgers contains most entries."); MockTransactionalEntryLogCompactor partialCompactionWorker = new MockTransactionalEntryLogCompactor( - ((InterleavedLedgerStorage) bookie.ledgerStorage).gcThread); + ((InterleavedLedgerStorage) bookie.getLedgerStorage()).gcThread); for (long logId = 0; logId < 3; logId++) { EntryLogMetadata meta = storage.entryLogger.getEntryLogMetadata(logId); @@ -1250,7 +1722,7 @@ public void testRecoverIndexWhenIndexIsPartiallyFlush() throws Exception { } // entry logs ([0,1,2].log) should not be compacted because of partial flush throw IOException - for (File ledgerDirectory : tmpDirs) { + for (File ledgerDirectory : bookieLedgerDirs()) { assertTrue("Entry log file ([0,1,2].log should not be compacted in ledgerDirectory: " + ledgerDirectory, TestUtils.hasLogFiles(ledgerDirectory, true, 0, 1, 2)); } @@ -1268,7 +1740,7 @@ public void testRecoverIndexWhenIndexIsPartiallyFlush() throws Exception { assertEquals(findCompactedEntryLogFiles().size(), 0); // compaction worker should recover partial flushed index and delete [0,1,2].log - for (File ledgerDirectory : tmpDirs) { + for (File ledgerDirectory : bookieLedgerDirs()) { assertFalse("Entry log file ([0,1,2].log should have been compacted in ledgerDirectory: " + ledgerDirectory, TestUtils.hasLogFiles(ledgerDirectory, true, 0, 1, 2)); } @@ -1287,13 +1759,14 @@ public void testCompactionFailureShouldNotResultInDuplicatedData() throws Except lh.close(); } - // disable compaction - baseConf.setMinorCompactionThreshold(0.0f); - baseConf.setMajorCompactionThreshold(0.0f); - baseConf.setUseTransactionalCompaction(true); - // restart bookies - restartBookies(baseConf); + restartBookies(c -> { + // disable compaction + c.setMinorCompactionThreshold(0.0f); + c.setMajorCompactionThreshold(0.0f); + c.setUseTransactionalCompaction(true); + return c; + }); // remove ledger2 and ledger3 bkc.deleteLedger(lhs[1].getId()); @@ -1302,7 +1775,7 @@ public void testCompactionFailureShouldNotResultInDuplicatedData() throws Except LOG.info("Finished deleting the ledgers contains most entries."); Thread.sleep(baseConf.getMajorCompactionInterval() * 1000 + baseConf.getGcWaitTime()); - Bookie bookie = bs.get(0).getBookie(); + BookieImpl bookie = (BookieImpl) serverByIndex(0).getBookie(); InterleavedLedgerStorage storage = (InterleavedLedgerStorage) bookie.ledgerStorage; List ledgerDirs = bookie.getLedgerDirsManager().getAllLedgerDirs(); @@ -1318,7 +1791,7 @@ public void testCompactionFailureShouldNotResultInDuplicatedData() throws Except } // entry logs ([0-4].log) should not be compacted because of failure in flush compaction log - for (File ledgerDirectory : tmpDirs) { + for (File ledgerDirectory : bookieLedgerDirs()) { assertTrue("Entry log file ([0,1,2].log should not be compacted in ledgerDirectory: " + ledgerDirectory, TestUtils.hasLogFiles(ledgerDirectory, true, 0, 1, 2, 3, 4)); } @@ -1334,16 +1807,23 @@ public void testCompactionFailureShouldNotResultInDuplicatedData() throws Except assertEquals(usageBeforeCompaction.get(i), freeSpaceAfterCompactionFailed.get(i)); } - // now enable normal compaction - baseConf.setMajorCompactionThreshold(0.5f); // restart bookies - restartBookies(baseConf); + restartBookies(c -> { + // now enable normal compaction + c.setMajorCompactionThreshold(0.5f); + c.setMajorCompactionMaxTimeMillis(5000); + return c; + }); - Thread.sleep(baseConf.getMajorCompactionInterval() * 1000 - + baseConf.getGcWaitTime()); + getGCThread().enableForceGC(); + getGCThread().triggerGC().get(); + + Thread.sleep(confByIndex(0).getMajorCompactionInterval() * 1000 + + confByIndex(0).getGcWaitTime() + + confByIndex(0).getMajorCompactionMaxTimeMillis()); // compaction worker should compact [0-4].log - for (File ledgerDirectory : tmpDirs) { + for (File ledgerDirectory : bookieLedgerDirs()) { assertFalse("Entry log file ([0,1,2].log should have been compacted in ledgerDirectory: " + ledgerDirectory, TestUtils.hasLogFiles(ledgerDirectory, true, 0, 1, 2, 3, 4)); } @@ -1361,15 +1841,13 @@ private long getDirectorySpaceUsage(File dir) { return size; } - private Set findCompactedEntryLogFiles() { + private Set findCompactedEntryLogFiles() throws Exception { Set compactedLogFiles = new HashSet<>(); - for (File ledgerDirectory : tmpDirs) { - File[] files = Bookie.getCurrentDirectory(ledgerDirectory).listFiles( + for (File ledgerDirectory : bookieLedgerDirs()) { + File[] files = BookieImpl.getCurrentDirectory(ledgerDirectory).listFiles( file -> file.getName().endsWith(COMPACTED_SUFFIX)); if (files != null) { - for (File file : files) { - compactedLogFiles.add(file); - } + Collections.addAll(compactedLogFiles, files); } } return compactedLogFiles; @@ -1378,93 +1856,92 @@ private Set findCompactedEntryLogFiles() { private static class MockTransactionalEntryLogCompactor extends TransactionalEntryLogCompactor { public MockTransactionalEntryLogCompactor(GarbageCollectorThread gcThread) { - super(gcThread); + super(gcThread.conf, + gcThread.entryLogger, + gcThread.ledgerStorage, + (long entry) -> { + try { + gcThread.removeEntryLog(entry); + } catch (EntryLogMetadataMapException e) { + LOG.warn("Failed to remove entry-log metadata {}", entry, e); + } + }); } - synchronized void compactWithIndexFlushFailure(EntryLogMetadata metadata) { + synchronized void compactWithIndexFlushFailure(EntryLogMetadata metadata) throws IOException { LOG.info("Compacting entry log {}.", metadata.getEntryLogId()); - CompactionPhase scanEntryLog = new ScanEntryLogPhase(metadata); + CompactionEntryLog compactionLog = entryLogger.newCompactionLog(metadata.getEntryLogId()); + + CompactionPhase scanEntryLog = new ScanEntryLogPhase(metadata, compactionLog); if (!scanEntryLog.run()) { LOG.info("Compaction for {} end in ScanEntryLogPhase.", metadata.getEntryLogId()); return; } - File compactionLogFile = entryLogger.getCurCompactionLogFile(); - CompactionPhase flushCompactionLog = new FlushCompactionLogPhase(metadata.getEntryLogId()); + CompactionPhase flushCompactionLog = new FlushCompactionLogPhase(compactionLog); if (!flushCompactionLog.run()) { LOG.info("Compaction for {} end in FlushCompactionLogPhase.", metadata.getEntryLogId()); return; } - File compactedLogFile = getCompactedLogFile(compactionLogFile, metadata.getEntryLogId()); - CompactionPhase partialFlushIndexPhase = new PartialFlushIndexPhase(compactedLogFile); + CompactionPhase partialFlushIndexPhase = new PartialFlushIndexPhase(compactionLog); if (!partialFlushIndexPhase.run()) { LOG.info("Compaction for {} end in PartialFlushIndexPhase.", metadata.getEntryLogId()); return; } - gcThread.removeEntryLog(metadata.getEntryLogId()); + logRemovalListener.removeEntryLog(metadata.getEntryLogId()); LOG.info("Compacted entry log : {}.", metadata.getEntryLogId()); } - synchronized void compactWithLogFlushFailure(EntryLogMetadata metadata) { + synchronized void compactWithLogFlushFailure(EntryLogMetadata metadata) throws IOException { LOG.info("Compacting entry log {}", metadata.getEntryLogId()); - CompactionPhase scanEntryLog = new ScanEntryLogPhase(metadata); + CompactionEntryLog compactionLog = entryLogger.newCompactionLog(metadata.getEntryLogId()); + + CompactionPhase scanEntryLog = new ScanEntryLogPhase(metadata, compactionLog); if (!scanEntryLog.run()) { LOG.info("Compaction for {} end in ScanEntryLogPhase.", metadata.getEntryLogId()); return; } - File compactionLogFile = entryLogger.getCurCompactionLogFile(); - CompactionPhase logFlushFailurePhase = new LogFlushFailurePhase(metadata.getEntryLogId()); + CompactionPhase logFlushFailurePhase = new LogFlushFailurePhase(compactionLog); if (!logFlushFailurePhase.run()) { LOG.info("Compaction for {} end in FlushCompactionLogPhase.", metadata.getEntryLogId()); return; } - File compactedLogFile = getCompactedLogFile(compactionLogFile, metadata.getEntryLogId()); - CompactionPhase updateIndex = new UpdateIndexPhase(compactedLogFile); + CompactionPhase updateIndex = new UpdateIndexPhase(compactionLog); if (!updateIndex.run()) { LOG.info("Compaction for entry log {} end in UpdateIndexPhase.", metadata.getEntryLogId()); return; } - gcThread.removeEntryLog(metadata.getEntryLogId()); + logRemovalListener.removeEntryLog(metadata.getEntryLogId()); LOG.info("Compacted entry log : {}.", metadata.getEntryLogId()); } private class PartialFlushIndexPhase extends UpdateIndexPhase { - public PartialFlushIndexPhase(File compactedLogFile) { - super(compactedLogFile); + public PartialFlushIndexPhase(CompactionEntryLog compactionLog) { + super(compactionLog); } @Override void start() throws IOException { - if (compactedLogFile != null && compactedLogFile.exists()) { - File dir = compactedLogFile.getParentFile(); - String compactedFilename = compactedLogFile.getName(); - // create a hard link "x.log" for file "x.log.y.compacted" - this.newEntryLogFile = new File(dir, compactedFilename.substring(0, - compactedFilename.indexOf(".log") + 4)); - File hardlinkFile = new File(dir, newEntryLogFile.getName()); - if (!hardlinkFile.exists()) { - HardLink.createHardLink(compactedLogFile, hardlinkFile); - } - assertTrue(offsets.size() > 1); - // only flush index for one entry location - EntryLocation el = offsets.get(0); - ledgerStorage.updateEntriesLocations(offsets); - ledgerStorage.flushEntriesLocationsIndex(); - throw new IOException("Flush ledger index encounter exception"); - } + compactionLog.makeAvailable(); + assertTrue(offsets.size() > 1); + // only flush index for one entry location + EntryLocation el = offsets.get(0); + ledgerStorage.updateEntriesLocations(offsets); + ledgerStorage.flushEntriesLocationsIndex(); + throw new IOException("Flush ledger index encounter exception"); } } private class LogFlushFailurePhase extends FlushCompactionLogPhase { - LogFlushFailurePhase(long compactingLogId) { - super(compactingLogId); + LogFlushFailurePhase(CompactionEntryLog compactionEntryLog) { + super(compactionEntryLog); } @Override void start() throws IOException { // flush the current compaction log - entryLogger.flushCompactionLog(); + compactionLog.flush(); throw new IOException("Encounter IOException when trying to flush compaction log"); } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CookieIndexDirTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CookieIndexDirTest.java new file mode 100644 index 00000000000..3caf076082e --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CookieIndexDirTest.java @@ -0,0 +1,1004 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.bookkeeper.bookie; + +import static org.apache.bookkeeper.bookie.UpgradeTest.initV1JournalDirectory; +import static org.apache.bookkeeper.bookie.UpgradeTest.initV1LedgerDirectory; +import static org.apache.bookkeeper.bookie.UpgradeTest.initV2JournalDirectory; +import static org.apache.bookkeeper.bookie.UpgradeTest.initV2LedgerDirectory; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import com.google.common.collect.Sets; +import java.io.File; +import java.io.IOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Random; +import java.util.Set; +import org.apache.bookkeeper.bookie.BookieException.InvalidCookieException; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.meta.MetadataBookieDriver; +import org.apache.bookkeeper.meta.MetadataDrivers; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.apache.bookkeeper.util.BookKeeperConstants; +import org.apache.bookkeeper.util.PortManager; +import org.apache.bookkeeper.versioning.LongVersion; +import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; +import org.apache.commons.io.FileUtils; +import org.junit.Assert; +import org.junit.Test; + +/** + * Test cookies. + */ +public class CookieIndexDirTest extends BookKeeperClusterTestCase { + + final int bookiePort = PortManager.nextFreePort(); + + public CookieIndexDirTest() { + super(0); + } + + private String newDirectory() throws Exception { + return newDirectory(true); + } + + private String newDirectory(boolean createCurDir) throws Exception { + File d = tmpDirs.createNew("cookie", "tmpdir"); + if (createCurDir) { + new File(d, "current").mkdirs(); + } + return d.getPath(); + } + + MetadataBookieDriver metadataBookieDriver; + RegistrationManager rm; + + @Override + public void setUp() throws Exception { + super.setUp(); + baseConf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + this.metadataBookieDriver = MetadataDrivers.getBookieDriver( + URI.create(baseConf.getMetadataServiceUri())); + this.metadataBookieDriver.initialize(baseConf, NullStatsLogger.INSTANCE); + this.rm = metadataBookieDriver.createRegistrationManager(); + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + if (rm != null) { + rm.close(); + } + if (metadataBookieDriver != null) { + metadataBookieDriver.close(); + } + } + + private static List currentDirectoryList(File[] dirs) { + return Arrays.asList(BookieImpl.getCurrentDirectories(dirs)); + } + + private void validateConfig(ServerConfiguration conf) throws Exception { + List dirs = new ArrayList<>(); + for (File f : conf.getJournalDirs()) { + File cur = BookieImpl.getCurrentDirectory(f); + dirs.add(cur); + BookieImpl.checkDirectoryStructure(cur); + } + for (File f : conf.getLedgerDirs()) { + File cur = BookieImpl.getCurrentDirectory(f); + dirs.add(cur); + BookieImpl.checkDirectoryStructure(cur); + } + if (conf.getIndexDirs() != null) { + for (File f : conf.getIndexDirs()) { + File cur = BookieImpl.getCurrentDirectory(f); + dirs.add(cur); + BookieImpl.checkDirectoryStructure(cur); + } + } + LegacyCookieValidation cookieValidation = new LegacyCookieValidation(conf, rm); + cookieValidation.checkCookies(dirs); + + } + + /** + * Test starting bookie with clean state. + */ + @Test + public void testCleanStart() throws Exception { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(newDirectory(true)) + .setLedgerDirNames(new String[] { newDirectory(true) }) + .setIndexDirName(new String[] { newDirectory(true) }) + .setBookiePort(bookiePort) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + + validateConfig(conf); + } + + /** + * Test that if a zookeeper cookie + * is different to a local cookie, the bookie + * will fail to start. + */ + @Test + public void testBadJournalCookie() throws Exception { + ServerConfiguration conf1 = TestBKConfiguration.newServerConfiguration() + .setJournalDirName(newDirectory()) + .setLedgerDirNames(new String[] { newDirectory() }) + .setIndexDirName(new String[] { newDirectory() }) + .setBookiePort(bookiePort); + Cookie.Builder cookieBuilder = Cookie.generateCookie(conf1); + Cookie c = cookieBuilder.build(); + c.writeToRegistrationManager(rm, conf1, Version.NEW); + + String journalDir = newDirectory(); + String ledgerDir = newDirectory(); + String indexDir = newDirectory(); + ServerConfiguration conf2 = TestBKConfiguration.newServerConfiguration(); + conf2.setJournalDirName(journalDir) + .setLedgerDirNames(new String[] { ledgerDir }) + .setIndexDirName(new String[] { indexDir }) + .setBookiePort(bookiePort) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + Cookie.Builder cookieBuilder2 = Cookie.generateCookie(conf2); + Cookie c2 = cookieBuilder2.build(); + c2.writeToDirectory(new File(journalDir, "current")); + c2.writeToDirectory(new File(ledgerDir, "current")); + c2.writeToDirectory(new File(indexDir, "current")); + + try { + validateConfig(conf2); + + fail("Shouldn't have been able to start"); + } catch (InvalidCookieException ice) { + // correct behaviour + } + } + + /** + * Test that if a directory is removed from + * the configuration, the bookie will fail to + * start. + */ + @Test + public void testDirectoryMissing() throws Exception { + String[] ledgerDirs = new String[] { + newDirectory(), newDirectory(), newDirectory() }; + String[] indexDirs = new String[] { + newDirectory(), newDirectory(), newDirectory() }; + String journalDir = newDirectory(); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(journalDir) + .setLedgerDirNames(ledgerDirs) + .setIndexDirName(indexDirs) + .setBookiePort(bookiePort) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + + validateConfig(conf); + + conf.setLedgerDirNames(new String[] { ledgerDirs[0], ledgerDirs[1] }); + try { + validateConfig(conf); + fail("Shouldn't have been able to start"); + } catch (InvalidCookieException ice) { + // correct behaviour + } + + conf.setIndexDirName(new String[] { indexDirs[0], indexDirs[1] }).setLedgerDirNames(ledgerDirs); + try { + validateConfig(conf); + fail("Shouldn't have been able to start"); + } catch (InvalidCookieException ice) { + // correct behaviour + } + + conf.setJournalDirName(newDirectory()).setLedgerDirNames(ledgerDirs).setIndexDirName(indexDirs); + try { + validateConfig(conf); + fail("Shouldn't have been able to start"); + } catch (InvalidCookieException ice) { + // correct behaviour + } + + conf.setJournalDirName(journalDir); + validateConfig(conf); + } + + /** + * Test that if a cookie is missing from a journal directory + * the bookie will fail to start. + */ + @Test + public void testCookieMissingOnJournalDir() throws Exception { + String[] ledgerDirs = new String[] { + newDirectory(), newDirectory(), newDirectory() }; + String[] indexDirs = new String[] { + newDirectory(), newDirectory(), newDirectory() }; + String journalDir = newDirectory(); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(journalDir) + .setLedgerDirNames(ledgerDirs) + .setIndexDirName(indexDirs) + .setBookiePort(bookiePort) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + + validateConfig(conf); + + File cookieFile = + new File(BookieImpl.getCurrentDirectory(new File(journalDir)), BookKeeperConstants.VERSION_FILENAME); + assertTrue(cookieFile.delete()); + try { + validateConfig(conf); + fail("Shouldn't have been able to start"); + } catch (InvalidCookieException ice) { + // correct behaviour + } + } + + /** + * Test that if a cookie is missing from a ledger directory + * the bookie will fail to start. + */ + @Test + public void testCookieMissingOnLedgerDir() throws Exception { + String[] ledgerDirs = new String[] { + newDirectory(), newDirectory(), newDirectory() }; + String journalDir = newDirectory(); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(journalDir) + .setLedgerDirNames(ledgerDirs) + .setBookiePort(bookiePort) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + + validateConfig(conf); + + File cookieFile = + new File(BookieImpl.getCurrentDirectory(new File(ledgerDirs[0])), BookKeeperConstants.VERSION_FILENAME); + assertTrue(cookieFile.delete()); + try { + validateConfig(conf); + fail("Shouldn't have been able to start"); + } catch (InvalidCookieException ice) { + // correct behaviour + } + } + + /** + * Test that if a cookie is missing from a index directory + * the bookie will fail to start. + */ + @Test + public void testCookieMissingOnIndexDir() throws Exception { + String[] ledgerDirs = new String[] { + newDirectory(), newDirectory(), newDirectory() }; + String[] indexDirs = new String[] { + newDirectory(), newDirectory(), newDirectory() }; + String journalDir = newDirectory(); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(journalDir) + .setLedgerDirNames(ledgerDirs) + .setIndexDirName(indexDirs) + .setBookiePort(bookiePort) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + + validateConfig(conf); + + File cookieFile = + new File(BookieImpl.getCurrentDirectory(new File(indexDirs[0])), BookKeeperConstants.VERSION_FILENAME); + assertTrue(cookieFile.delete()); + try { + validateConfig(conf); + fail("Shouldn't have been able to start"); + } catch (InvalidCookieException ice) { + // correct behaviour + } + } + + /** + * Test that if a ledger directory is added to a + * preexisting bookie, the bookie will fail + * to start. + */ + @Test + public void testLedgerDirectoryAdded() throws Exception { + String ledgerDir0 = newDirectory(); + String indexDir0 = newDirectory(); + String journalDir = newDirectory(); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(journalDir) + .setLedgerDirNames(new String[] { ledgerDir0 }) + .setIndexDirName(new String[] { indexDir0 }) + .setBookiePort(bookiePort) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + + validateConfig(conf); + + conf.setLedgerDirNames(new String[] { ledgerDir0, newDirectory() }); + try { + validateConfig(conf); + fail("Shouldn't have been able to start"); + } catch (InvalidCookieException ice) { + // correct behaviour + } + + conf.setLedgerDirNames(new String[] { ledgerDir0 }); + validateConfig(conf); + } + + /** + * Test that if a index directory is added to a + * preexisting bookie, the bookie will fail + * to start. + */ + @Test + public void testIndexDirectoryAdded() throws Exception { + String ledgerDir0 = newDirectory(); + String indexDir0 = newDirectory(); + String journalDir = newDirectory(); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(journalDir) + .setLedgerDirNames(new String[] { ledgerDir0 }) + .setIndexDirName(new String[] { indexDir0 }) + .setBookiePort(bookiePort) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + + validateConfig(conf); + + conf.setIndexDirName(new String[] { indexDir0, newDirectory() }); + try { + validateConfig(conf); + fail("Shouldn't have been able to start"); + } catch (InvalidCookieException ice) { + // correct behaviour + } + + conf.setIndexDirName(new String[] { indexDir0 }); + validateConfig(conf); + } + + /** + * Test that if a ledger directory is added to an existing bookie, and + * allowStorageExpansion option is true, the bookie should come online. + */ + @Test + public void testLedgerStorageExpansionOption() throws Exception { + String ledgerDir0 = newDirectory(); + String indexDir0 = newDirectory(); + String journalDir = newDirectory(); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(journalDir) + .setLedgerDirNames(new String[] { ledgerDir0 }) + .setIndexDirName(new String[] { indexDir0 }) + .setBookiePort(bookiePort) + .setAllowStorageExpansion(true) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + + validateConfig(conf); + + // add a few additional ledger dirs + String[] lPaths = new String[] {ledgerDir0, newDirectory(), newDirectory()}; + Set configuredLedgerDirs = Sets.newHashSet(lPaths); + conf.setLedgerDirNames(lPaths); + + // add an extra index dir + String[] iPaths = new String[] {indexDir0, newDirectory()}; + Set configuredIndexDirs = Sets.newHashSet(iPaths); + conf.setIndexDirName(iPaths); + + try { + validateConfig(conf); + } catch (InvalidCookieException ice) { + fail("Should have been able to start the bookie"); + } + + List l = currentDirectoryList(conf.getLedgerDirs()); + HashSet bookieLedgerDirs = Sets.newHashSet(); + for (File f : l) { + // Using the parent path because the bookie creates a 'current' + // dir under the ledger dir user provides + bookieLedgerDirs.add(f.getParent()); + } + assertTrue("Configured ledger dirs: " + configuredLedgerDirs + " doesn't match bookie's ledger dirs: " + + bookieLedgerDirs, + configuredLedgerDirs.equals(bookieLedgerDirs)); + + l = currentDirectoryList(conf.getIndexDirs()); + HashSet bookieIndexDirs = Sets.newHashSet(); + for (File f : l) { + bookieIndexDirs.add(f.getParent()); + } + assertTrue("Configured Index dirs: " + configuredIndexDirs + " doesn't match bookie's index dirs: " + + bookieIndexDirs, + configuredIndexDirs.equals(bookieIndexDirs)); + + // Make sure that substituting an older ledger directory + // is not allowed. + String[] lPaths2 = new String[] { lPaths[0], lPaths[1], newDirectory() }; + conf.setLedgerDirNames(lPaths2); + try { + validateConfig(conf); + fail("Should not have been able to start the bookie"); + } catch (InvalidCookieException ice) { + // correct behavior + } + + // Finally make sure that not including the older ledger directories + // is not allowed. Remove one of the older ledger dirs + lPaths2 = new String[] { lPaths[0], lPaths[1] }; + conf.setLedgerDirNames(lPaths2); + try { + validateConfig(conf); + fail("Should not have been able to start the bookie"); + } catch (InvalidCookieException ice) { + // correct behavior + } + } + + /** + * Test that if a ledger directory is added to an existing bookie, and + * allowStorageExpansion option is true, the bookie should come online. + */ + @Test + public void testIndexStorageExpansionOption() throws Exception { + String ledgerDir0 = newDirectory(); + String indexDir0 = newDirectory(); + String journalDir = newDirectory(); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(journalDir) + .setLedgerDirNames(new String[] { ledgerDir0 }) + .setIndexDirName(new String[] { indexDir0 }) + .setBookiePort(bookiePort) + .setAllowStorageExpansion(true) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + + validateConfig(conf); + + // add an extra index dir + String[] iPaths = new String[] {indexDir0, newDirectory(), newDirectory()}; + Set configuredIndexDirs = Sets.newHashSet(iPaths); + conf.setIndexDirName(iPaths); + + try { + validateConfig(conf); + } catch (InvalidCookieException ice) { + fail("Should have been able to start the bookie"); + } + + List l = currentDirectoryList(conf.getIndexDirs()); + HashSet bookieIndexDirs = Sets.newHashSet(); + for (File f : l) { + bookieIndexDirs.add(f.getParent()); + } + assertTrue("Configured Index dirs: " + configuredIndexDirs + " doesn't match bookie's index dirs: " + + bookieIndexDirs, + configuredIndexDirs.equals(bookieIndexDirs)); + + // Make sure that substituting an older index directory + // is not allowed. + String[] iPaths2 = new String[] { iPaths[0], iPaths[1], newDirectory() }; + conf.setIndexDirName(iPaths2); + try { + validateConfig(conf); + fail("Should not have been able to start the bookie"); + } catch (InvalidCookieException ice) { + // correct behavior + } + + // Finally make sure that not including the older index directories + // is not allowed. Remove one of the older index dirs + iPaths2 = new String[] { iPaths[0], iPaths[1] }; + conf.setIndexDirName(iPaths2); + try { + validateConfig(conf); + fail("Should not have been able to start the bookie"); + } catch (InvalidCookieException ice) { + // correct behavior + } + } + + /** + * Test that adding of a non-empty directory is not allowed + * even when allowStorageExpansion option is true. + */ + @Test + public void testNonEmptyDirAddWithStorageExpansionOption() throws Exception { + String ledgerDir0 = newDirectory(); + String indexDir0 = newDirectory(); + String journalDir = newDirectory(); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(journalDir) + .setLedgerDirNames(new String[] { ledgerDir0 }) + .setIndexDirName(new String[] { indexDir0 }) + .setBookiePort(bookiePort) + .setAllowStorageExpansion(true) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + + validateConfig(conf); + + // add an additional ledger dir + String[] lPaths = new String[] {ledgerDir0, newDirectory()}; + conf.setLedgerDirNames(lPaths); + + // create a file to make the dir non-empty + File currentDir = BookieImpl.getCurrentDirectory(new File(lPaths[1])); + new File(currentDir, "foo").createNewFile(); + assertTrue(currentDir.list().length == 1); + + try { + validateConfig(conf); + fail("Shouldn't have been able to start"); + } catch (InvalidCookieException ice) { + // correct behavior + } + + // Now test with a non-empty index dir + String[] iPaths = new String[] {indexDir0, newDirectory()}; + conf.setIndexDirName(iPaths); + + // create a dir to make it non-empty + currentDir = BookieImpl.getCurrentDirectory(new File(iPaths[1])); + new File(currentDir, "bar").mkdirs(); + assertTrue(currentDir.list().length == 1); + + try { + validateConfig(conf); + fail("Shouldn't have been able to start"); + } catch (InvalidCookieException ice) { + // correct behavior + } + } + + /** + * Test that if a directory's contents + * are emptied, the bookie will fail to start. + */ + @Test + public void testLedgerDirectoryCleared() throws Exception { + String ledgerDir0 = newDirectory(); + String indexDir = newDirectory(); + String journalDir = newDirectory(); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(journalDir) + .setLedgerDirNames(new String[] { ledgerDir0 , newDirectory() }) + .setIndexDirName(new String[] { indexDir }) + .setBookiePort(bookiePort) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + + validateConfig(conf); + + FileUtils.deleteDirectory(new File(ledgerDir0)); + try { + validateConfig(conf); + fail("Shouldn't have been able to start"); + } catch (InvalidCookieException ice) { + // correct behaviour + } + } + + /** + * Test that if a directory's contents + * are emptied, the bookie will fail to start. + */ + @Test + public void testIndexDirectoryCleared() throws Exception { + String ledgerDir = newDirectory(); + String indexDir0 = newDirectory(); + String journalDir = newDirectory(); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(journalDir) + .setLedgerDirNames(new String[] { ledgerDir }) + .setIndexDirName(new String[] { indexDir0 , newDirectory() }) + .setBookiePort(bookiePort) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + + validateConfig(conf); + + FileUtils.deleteDirectory(new File(indexDir0)); + try { + validateConfig(conf); + fail("Shouldn't have been able to start"); + } catch (InvalidCookieException ice) { + // correct behaviour + } + } + + /** + * Test that if a bookie's port is changed + * the bookie will fail to start. + */ + @Test + public void testBookiePortChanged() throws Exception { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(newDirectory()) + .setLedgerDirNames(new String[] { newDirectory() , newDirectory() }) + .setIndexDirName(new String[] { newDirectory() , newDirectory() }) + .setBookiePort(bookiePort) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + validateConfig(conf); + + conf.setBookiePort(3182); + try { + validateConfig(conf); + fail("Shouldn't have been able to start"); + } catch (InvalidCookieException ice) { + // correct behaviour + } + } + + /** + * Test that if a bookie tries to start + * with the address of a bookie which has already + * existed in the system, then the bookie will fail + * to start. + */ + @Test + public void testNewBookieStartingWithAnotherBookiesPort() throws Exception { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(newDirectory()) + .setLedgerDirNames(new String[] { newDirectory() , newDirectory() }) + .setIndexDirName(new String[] { newDirectory() , newDirectory() }) + .setBookiePort(bookiePort) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + validateConfig(conf); + + conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(newDirectory()) + .setLedgerDirNames(new String[] { newDirectory() , newDirectory() }) + .setIndexDirName(new String[] { newDirectory() , newDirectory() }) + .setBookiePort(bookiePort) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + try { + validateConfig(conf); + fail("Shouldn't have been able to start"); + } catch (InvalidCookieException ice) { + // correct behaviour + } + } + + /** + * Test Cookie verification with format. + */ + @Test + public void testVerifyCookieWithFormat() throws Exception { + ServerConfiguration adminConf = new ServerConfiguration(); + adminConf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + + adminConf.setProperty("bookkeeper.format", true); + // Format the BK Metadata and generate INSTANCEID + BookKeeperAdmin.format(adminConf, false, true); + + ServerConfiguration bookieConf = TestBKConfiguration.newServerConfiguration(); + bookieConf.setJournalDirName(newDirectory(true)) + .setLedgerDirNames(new String[] { newDirectory(true) }) + .setIndexDirName(new String[] { newDirectory(true) }) + .setBookiePort(bookiePort) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + // Bookie should start successfully for fresh env. + validateConfig(bookieConf); + + // Format metadata one more time. + BookKeeperAdmin.format(adminConf, false, true); + try { + validateConfig(bookieConf); + fail("Bookie should not start with previous instance id."); + } catch (InvalidCookieException e) { + assertTrue( + "Bookie startup should fail because of invalid instance id", + e.getMessage().contains("instanceId")); + } + + // Now format the Bookie and restart. + BookieImpl.format(bookieConf, false, true); + // After bookie format bookie should be able to start again. + validateConfig(bookieConf); + } + + /** + * Test that if a bookie is started with directories with + * version 2 data, that it will fail to start (it needs upgrade). + */ + @Test + public void testV2data() throws Exception { + File journalDir = initV2JournalDirectory(tmpDirs.createNew("bookie", "journal")); + File ledgerDir = initV2LedgerDirectory(tmpDirs.createNew("bookie", "ledger")); + + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(journalDir.getPath()) + .setLedgerDirNames(new String[] { ledgerDir.getPath() }) + .setIndexDirName(new String[] { ledgerDir.getPath() }) + .setBookiePort(bookiePort) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + try { + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); + fail("Shouldn't have been able to start"); + } catch (IOException ioe) { + // correct behaviour + assertTrue("wrong exception", ioe.getMessage().contains("upgrade needed")); + } + try { + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); + fail("Shouldn't have been able to start"); + } catch (IOException ioe) { + // correct behaviour + assertTrue("wrong exception", ioe.getMessage().contains("upgrade needed")); + } + } + + /** + * Test that if a bookie is started with directories with + * version 1 data, that it will fail to start (it needs upgrade). + */ + @Test + public void testV1data() throws Exception { + File journalDir = initV1JournalDirectory(tmpDirs.createNew("bookie", "journal")); + File ledgerDir = initV1LedgerDirectory(tmpDirs.createNew("bookie", "ledger")); + + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(journalDir.getPath()) + .setLedgerDirNames(new String[]{ledgerDir.getPath()}) + .setIndexDirName(new String[]{ledgerDir.getPath()}) + .setBookiePort(bookiePort) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + try { + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); + fail("Shouldn't have been able to start"); + } catch (IOException ioe) { + // correct behaviour + assertTrue("wrong exception", ioe.getMessage().contains("upgrade needed")); + } + + try { + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); + fail("Shouldn't have been able to start"); + } catch (IOException ioe) { + // correct behaviour + assertTrue("wrong exception", ioe.getMessage().contains("upgrade needed")); + } + } + + /** + * Test restart bookie with useHostNameAsBookieID=true, which had cookie generated + * with ipaddress. + */ + @Test + public void testRestartWithHostNameAsBookieID() throws Exception { + String[] ledgerDirs = new String[] { newDirectory(), newDirectory(), newDirectory() }; + String[] indexDirs = new String[] { newDirectory(), newDirectory(), newDirectory() }; + String journalDir = newDirectory(); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(journalDir) + .setLedgerDirNames(ledgerDirs) + .setIndexDirName(indexDirs) + .setBookiePort(bookiePort) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + validateConfig(conf); + + conf.setUseHostNameAsBookieID(true); + try { + validateConfig(conf); + fail("Should not start a bookie with hostname if the bookie has been started with an ip"); + } catch (InvalidCookieException e) { + // expected + } + } + + /** + * Test restart bookie with new advertisedAddress, which had cookie generated with ip. + */ + @Test + public void testRestartWithAdvertisedAddressAsBookieID() throws Exception { + String[] ledgerDirs = new String[] { newDirectory(), newDirectory(), newDirectory() }; + String[] indexDirs = new String[] { newDirectory(), newDirectory(), newDirectory() }; + String journalDir = newDirectory(); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(journalDir) + .setLedgerDirNames(ledgerDirs) + .setIndexDirName(indexDirs) + .setBookiePort(bookiePort) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + conf.setUseHostNameAsBookieID(false); + validateConfig(conf); + + conf.setAdvertisedAddress("unknown"); + try { + validateConfig(conf); + fail("Should not start a bookie with ip if the bookie has been started with an ip"); + } catch (InvalidCookieException e) { + // expected + } + } + + /** + * Test restart bookie with useHostNameAsBookieID=false, which had cookie generated + * with hostname. + */ + @Test + public void testRestartWithIpAddressAsBookieID() throws Exception { + String[] ledgerDirs = new String[] { newDirectory(), newDirectory(), newDirectory() }; + String[] indexDirs = new String[] { newDirectory(), newDirectory(), newDirectory() }; + String journalDir = newDirectory(); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(journalDir) + .setLedgerDirNames(ledgerDirs) + .setIndexDirName(indexDirs) + .setBookiePort(bookiePort) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + conf.setUseHostNameAsBookieID(true); + validateConfig(conf); + + conf.setUseHostNameAsBookieID(false); + try { + validateConfig(conf); + fail("Should not start a bookie with ip if the bookie has been started with an ip"); + } catch (InvalidCookieException e) { + // expected + } + } + + /** + * Test old version bookie starts with the cookies generated by new version + * (with useHostNameAsBookieID=true). + */ + @Test + public void testV2dataWithHostNameAsBookieID() throws Exception { + File journalDir = initV2JournalDirectory(tmpDirs.createNew("bookie", "journal")); + File ledgerDir = initV2LedgerDirectory(tmpDirs.createNew("bookie", "ledger")); + + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(journalDir.getPath()) + .setLedgerDirNames(new String[] { ledgerDir.getPath() }) + .setIndexDirName(new String[] { ledgerDir.getPath() }) + .setBookiePort(bookiePort) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + try { + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); + fail("Shouldn't have been able to start"); + } catch (IOException ioe) { + // correct behaviour + assertTrue("wrong exception", ioe.getMessage().contains("upgrade needed")); + } + + try { + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); + fail("Shouldn't have been able to start"); + } catch (IOException ioe) { + // correct behaviour + assertTrue("wrong exception", ioe.getMessage().contains("upgrade needed")); + } + } + + /** + * Test write cookie multiple times. + */ + @Test + public void testWriteToZooKeeper() throws Exception { + String[] ledgerDirs = new String[] { newDirectory(), newDirectory(), newDirectory() }; + String[] indexDirs = new String[] { newDirectory(), newDirectory(), newDirectory() }; + String journalDir = newDirectory(); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(journalDir) + .setLedgerDirNames(ledgerDirs) + .setIndexDirName(indexDirs) + .setBookiePort(bookiePort) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + validateConfig(conf); + Versioned zkCookie = Cookie.readFromRegistrationManager(rm, conf); + Version version1 = zkCookie.getVersion(); + assertTrue("Invalid type expected ZkVersion type", + version1 instanceof LongVersion); + LongVersion zkVersion1 = (LongVersion) version1; + Cookie cookie = zkCookie.getValue(); + cookie.writeToRegistrationManager(rm, conf, version1); + + zkCookie = Cookie.readFromRegistrationManager(rm, conf); + Version version2 = zkCookie.getVersion(); + assertTrue("Invalid type expected ZkVersion type", version2 instanceof LongVersion); + LongVersion zkVersion2 = (LongVersion) version2; + assertEquals("Version mismatches!", + zkVersion1.getLongVersion() + 1, zkVersion2.getLongVersion()); + } + + /** + * Test delete cookie. + */ + @Test + public void testDeleteFromZooKeeper() throws Exception { + String[] ledgerDirs = new String[] { newDirectory(), newDirectory(), newDirectory() }; + String[] indexDirs = new String[] { newDirectory(), newDirectory(), newDirectory() }; + String journalDir = newDirectory(); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(journalDir) + .setLedgerDirNames(ledgerDirs) + .setIndexDirName(indexDirs) + .setBookiePort(bookiePort) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + validateConfig(conf); + Versioned zkCookie = Cookie.readFromRegistrationManager(rm, conf); + Cookie cookie = zkCookie.getValue(); + cookie.deleteFromRegistrationManager(rm, conf, zkCookie.getVersion()); + } + + /** + * Tests that custom Bookie Id is properly set in the Cookie (via {@link LegacyCookieValidation}). + */ + @Test + public void testBookieIdSetting() throws Exception { + final String customBookieId = "myCustomBookieId" + new Random().nextInt(); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(newDirectory()) + .setLedgerDirNames(new String[] { newDirectory() , newDirectory() }) + .setIndexDirName(new String[] { newDirectory() , newDirectory() }) + .setBookiePort(bookiePort) + .setBookieId(customBookieId) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + validateConfig(conf); + Versioned zkCookie = Cookie.readFromRegistrationManager(rm, conf); + Version version1 = zkCookie.getVersion(); + assertTrue("Invalid type expected ZkVersion type", version1 instanceof LongVersion); + Cookie cookie = zkCookie.getValue(); + cookie.writeToRegistrationManager(rm, conf, version1); + Assert.assertTrue(cookie.toString().contains(customBookieId)); + } + + /** + * Compatibility test + * 1. First create bookie without indexDirName + * 2. Configure indexDirName to start bookie + */ + @Test + public void testNewBookieStartingWithOldCookie() throws Exception { + String journalDir = newDirectory(); + String[] ledgerDirs = {newDirectory(), newDirectory()}; + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(journalDir) + .setLedgerDirNames(ledgerDirs) + .setBookiePort(bookiePort) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + validateConfig(conf); + + conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(journalDir) + .setLedgerDirNames(ledgerDirs) + .setIndexDirName(ledgerDirs) + .setBookiePort(bookiePort) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + try { + validateConfig(conf); + } catch (InvalidCookieException ice) { + // error behaviour + fail("Validate failed, error info: " + ice.getMessage()); + } + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CookieTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CookieTest.java index e91500f5f00..9d068ad291a 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CookieTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CookieTest.java @@ -21,10 +21,10 @@ package org.apache.bookkeeper.bookie; -import static org.apache.bookkeeper.bookie.UpgradeTest.newV1JournalDirectory; -import static org.apache.bookkeeper.bookie.UpgradeTest.newV1LedgerDirectory; -import static org.apache.bookkeeper.bookie.UpgradeTest.newV2JournalDirectory; -import static org.apache.bookkeeper.bookie.UpgradeTest.newV2LedgerDirectory; +import static org.apache.bookkeeper.bookie.UpgradeTest.initV1JournalDirectory; +import static org.apache.bookkeeper.bookie.UpgradeTest.initV1LedgerDirectory; +import static org.apache.bookkeeper.bookie.UpgradeTest.initV2JournalDirectory; +import static org.apache.bookkeeper.bookie.UpgradeTest.initV2LedgerDirectory; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -33,8 +33,11 @@ import java.io.File; import java.io.IOException; import java.net.URI; +import java.util.ArrayList; +import java.util.Arrays; import java.util.HashSet; import java.util.List; +import java.util.Random; import java.util.Set; import org.apache.bookkeeper.bookie.BookieException.InvalidCookieException; import org.apache.bookkeeper.client.BookKeeperAdmin; @@ -45,35 +48,38 @@ import org.apache.bookkeeper.meta.MetadataDrivers; import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; -import org.apache.bookkeeper.test.PortManager; import org.apache.bookkeeper.util.BookKeeperConstants; -import org.apache.bookkeeper.util.IOUtils; +import org.apache.bookkeeper.util.PortManager; import org.apache.bookkeeper.versioning.LongVersion; import org.apache.bookkeeper.versioning.Version; import org.apache.bookkeeper.versioning.Versioned; import org.apache.commons.io.FileUtils; +import org.junit.Assert; import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Test cookies. */ public class CookieTest extends BookKeeperClusterTestCase { + private static final Logger log = LoggerFactory.getLogger(CookieTest.class); + final int bookiePort = PortManager.nextFreePort(); public CookieTest() { super(0); } - private String newDirectory() throws IOException { + private String newDirectory() throws Exception { return newDirectory(true); } - private String newDirectory(boolean createCurDir) throws IOException { - File d = IOUtils.createTempDir("cookie", "tmpdir"); + private String newDirectory(boolean createCurDir) throws Exception { + File d = tmpDirs.createNew("cookie", "tmpdir"); if (createCurDir) { new File(d, "current").mkdirs(); } - tmpDirs.add(d); return d.getPath(); } @@ -86,33 +92,54 @@ public void setUp() throws Exception { baseConf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); this.metadataBookieDriver = MetadataDrivers.getBookieDriver( URI.create(baseConf.getMetadataServiceUri())); - this.metadataBookieDriver.initialize(baseConf, () -> {}, NullStatsLogger.INSTANCE); - this.rm = metadataBookieDriver.getRegistrationManager(); + this.metadataBookieDriver.initialize(baseConf, NullStatsLogger.INSTANCE); + this.rm = metadataBookieDriver.createRegistrationManager(); } @Override public void tearDown() throws Exception { super.tearDown(); + if (rm != null) { + rm.close(); + } if (metadataBookieDriver != null) { metadataBookieDriver.close(); } } + private static List currentDirectoryList(File[] dirs) { + return Arrays.asList(BookieImpl.getCurrentDirectories(dirs)); + } + + private void validateConfig(ServerConfiguration conf) throws Exception { + List dirs = new ArrayList<>(); + for (File f : conf.getJournalDirs()) { + File cur = BookieImpl.getCurrentDirectory(f); + dirs.add(cur); + BookieImpl.checkDirectoryStructure(cur); + } + for (File f : conf.getLedgerDirs()) { + File cur = BookieImpl.getCurrentDirectory(f); + dirs.add(cur); + BookieImpl.checkDirectoryStructure(cur); + } + LegacyCookieValidation cookieValidation = new LegacyCookieValidation(conf, rm); + cookieValidation.checkCookies(dirs); + + } + /** * Test starting bookie with clean state. */ @Test public void testCleanStart() throws Exception { ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); - conf.setJournalDirName(newDirectory(false)) - .setLedgerDirNames(new String[] { newDirectory(false) }) + conf.setJournalDirName(newDirectory(true)) + .setLedgerDirNames(new String[] { newDirectory(true) }) .setBookiePort(bookiePort) .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); - try { - Bookie b = new Bookie(conf); - } catch (Exception e) { - fail("Should not reach here."); - } + + validateConfig(conf); } /** @@ -143,7 +170,8 @@ public void testBadJournalCookie() throws Exception { c2.writeToDirectory(new File(ledgerDir, "current")); try { - Bookie b = new Bookie(conf2); + validateConfig(conf2); + fail("Shouldn't have been able to start"); } catch (BookieException.InvalidCookieException ice) { // correct behaviour @@ -166,13 +194,11 @@ public void testDirectoryMissing() throws Exception { .setBookiePort(bookiePort) .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); - Bookie b = new Bookie(conf); // should work fine - b.start(); - b.shutdown(); + validateConfig(conf); conf.setLedgerDirNames(new String[] { ledgerDirs[0], ledgerDirs[1] }); try { - Bookie b2 = new Bookie(conf); + validateConfig(conf); fail("Shouldn't have been able to start"); } catch (BookieException.InvalidCookieException ice) { // correct behaviour @@ -180,16 +206,14 @@ public void testDirectoryMissing() throws Exception { conf.setJournalDirName(newDirectory()).setLedgerDirNames(ledgerDirs); try { - Bookie b2 = new Bookie(conf); + validateConfig(conf); fail("Shouldn't have been able to start"); } catch (BookieException.InvalidCookieException ice) { // correct behaviour } conf.setJournalDirName(journalDir); - b = new Bookie(conf); - b.start(); - b.shutdown(); + validateConfig(conf); } /** @@ -207,15 +231,13 @@ public void testCookieMissingOnJournalDir() throws Exception { .setBookiePort(bookiePort) .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); - Bookie b = new Bookie(conf); // should work fine - b.start(); - b.shutdown(); + validateConfig(conf); File cookieFile = - new File(Bookie.getCurrentDirectory(new File(journalDir)), BookKeeperConstants.VERSION_FILENAME); + new File(BookieImpl.getCurrentDirectory(new File(journalDir)), BookKeeperConstants.VERSION_FILENAME); assertTrue(cookieFile.delete()); try { - new Bookie(conf); + validateConfig(conf); fail("Shouldn't have been able to start"); } catch (BookieException.InvalidCookieException ice) { // correct behaviour @@ -237,15 +259,13 @@ public void testCookieMissingOnLedgerDir() throws Exception { .setBookiePort(bookiePort) .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); - Bookie b = new Bookie(conf); // should work fine - b.start(); - b.shutdown(); + validateConfig(conf); File cookieFile = - new File(Bookie.getCurrentDirectory(new File(ledgerDirs[0])), BookKeeperConstants.VERSION_FILENAME); + new File(BookieImpl.getCurrentDirectory(new File(ledgerDirs[0])), BookKeeperConstants.VERSION_FILENAME); assertTrue(cookieFile.delete()); try { - new Bookie(conf); + validateConfig(conf); fail("Shouldn't have been able to start"); } catch (BookieException.InvalidCookieException ice) { // correct behaviour @@ -267,22 +287,18 @@ public void testDirectoryAdded() throws Exception { .setBookiePort(bookiePort) .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); - Bookie b = new Bookie(conf); // should work fine - b.start(); - b.shutdown(); + validateConfig(conf); conf.setLedgerDirNames(new String[] { ledgerDir0, newDirectory() }); try { - Bookie b2 = new Bookie(conf); + validateConfig(conf); fail("Shouldn't have been able to start"); } catch (BookieException.InvalidCookieException ice) { // correct behaviour } conf.setLedgerDirNames(new String[] { ledgerDir0 }); - b = new Bookie(conf); - b.start(); - b.shutdown(); + validateConfig(conf); } /** @@ -302,10 +318,7 @@ public void testStorageExpansionOption() throws Exception { .setAllowStorageExpansion(true) .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); - Bookie b = new Bookie(conf); // should work fine - b.start(); - b.shutdown(); - b = null; + validateConfig(conf); // add a few additional ledger dirs String[] lPaths = new String[] {ledgerDir0, newDirectory(), newDirectory()}; @@ -318,12 +331,12 @@ public void testStorageExpansionOption() throws Exception { conf.setIndexDirName(iPaths); try { - b = new Bookie(conf); + validateConfig(conf); } catch (BookieException.InvalidCookieException ice) { fail("Should have been able to start the bookie"); } - List l = b.getLedgerDirsManager().getAllLedgerDirs(); + List l = currentDirectoryList(conf.getLedgerDirs()); HashSet bookieLedgerDirs = Sets.newHashSet(); for (File f : l) { // Using the parent path because the bookie creates a 'current' @@ -334,7 +347,7 @@ public void testStorageExpansionOption() throws Exception { + bookieLedgerDirs, configuredLedgerDirs.equals(bookieLedgerDirs)); - l = b.getIndexDirsManager().getAllLedgerDirs(); + l = currentDirectoryList(conf.getIndexDirs()); HashSet bookieIndexDirs = Sets.newHashSet(); for (File f : l) { bookieIndexDirs.add(f.getParent()); @@ -343,14 +356,12 @@ public void testStorageExpansionOption() throws Exception { + bookieIndexDirs, configuredIndexDirs.equals(bookieIndexDirs)); - b.shutdown(); - // Make sure that substituting an older ledger directory // is not allowed. String[] lPaths2 = new String[] { lPaths[0], lPaths[1], newDirectory() }; conf.setLedgerDirNames(lPaths2); try { - b = new Bookie(conf); + validateConfig(conf); fail("Should not have been able to start the bookie"); } catch (BookieException.InvalidCookieException ice) { // correct behavior @@ -361,7 +372,7 @@ public void testStorageExpansionOption() throws Exception { lPaths2 = new String[] { lPaths[0], lPaths[1] }; conf.setLedgerDirNames(lPaths2); try { - b = new Bookie(conf); + validateConfig(conf); fail("Should not have been able to start the bookie"); } catch (BookieException.InvalidCookieException ice) { // correct behavior @@ -385,22 +396,19 @@ public void testNonEmptyDirAddWithStorageExpansionOption() throws Exception { .setAllowStorageExpansion(true) .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); - Bookie b = new Bookie(conf); // should work fine - b.start(); - b.shutdown(); - b = null; + validateConfig(conf); // add an additional ledger dir String[] lPaths = new String[] {ledgerDir0, newDirectory()}; conf.setLedgerDirNames(lPaths); // create a file to make the dir non-empty - File currentDir = Bookie.getCurrentDirectory(new File(lPaths[1])); + File currentDir = BookieImpl.getCurrentDirectory(new File(lPaths[1])); new File(currentDir, "foo").createNewFile(); assertTrue(currentDir.list().length == 1); try { - b = new Bookie(conf); + validateConfig(conf); fail("Shouldn't have been able to start"); } catch (BookieException.InvalidCookieException ice) { // correct behavior @@ -411,12 +419,12 @@ public void testNonEmptyDirAddWithStorageExpansionOption() throws Exception { conf.setIndexDirName(iPaths); // create a dir to make it non-empty - currentDir = Bookie.getCurrentDirectory(new File(iPaths[1])); + currentDir = BookieImpl.getCurrentDirectory(new File(iPaths[1])); new File(currentDir, "bar").mkdirs(); assertTrue(currentDir.list().length == 1); try { - b = new Bookie(conf); + validateConfig(conf); fail("Shouldn't have been able to start"); } catch (BookieException.InvalidCookieException ice) { // correct behavior @@ -437,13 +445,11 @@ public void testDirectoryCleared() throws Exception { .setBookiePort(bookiePort) .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); - Bookie b = new Bookie(conf); // should work fine - b.start(); - b.shutdown(); + validateConfig(conf); FileUtils.deleteDirectory(new File(ledgerDir0)); try { - Bookie b2 = new Bookie(conf); + validateConfig(conf); fail("Shouldn't have been able to start"); } catch (BookieException.InvalidCookieException ice) { // correct behaviour @@ -461,13 +467,11 @@ public void testBookiePortChanged() throws Exception { .setLedgerDirNames(new String[] { newDirectory() , newDirectory() }) .setBookiePort(bookiePort) .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); - Bookie b = new Bookie(conf); // should work fine - b.start(); - b.shutdown(); + validateConfig(conf); conf.setBookiePort(3182); try { - b = new Bookie(conf); + validateConfig(conf); fail("Shouldn't have been able to start"); } catch (BookieException.InvalidCookieException ice) { // correct behaviour @@ -487,9 +491,7 @@ public void testNewBookieStartingWithAnotherBookiesPort() throws Exception { .setLedgerDirNames(new String[] { newDirectory() , newDirectory() }) .setBookiePort(bookiePort) .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); - Bookie b = new Bookie(conf); // should work fine - b.start(); - b.shutdown(); + validateConfig(conf); conf = TestBKConfiguration.newServerConfiguration(); conf.setJournalDirName(newDirectory()) @@ -497,7 +499,7 @@ public void testNewBookieStartingWithAnotherBookiesPort() throws Exception { .setBookiePort(bookiePort) .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); try { - b = new Bookie(conf); + validateConfig(conf); fail("Shouldn't have been able to start"); } catch (BookieException.InvalidCookieException ice) { // correct behaviour @@ -517,17 +519,17 @@ public void testVerifyCookieWithFormat() throws Exception { BookKeeperAdmin.format(adminConf, false, true); ServerConfiguration bookieConf = TestBKConfiguration.newServerConfiguration(); - bookieConf.setJournalDirName(newDirectory(false)) - .setLedgerDirNames(new String[] { newDirectory(false) }) + bookieConf.setJournalDirName(newDirectory(true)) + .setLedgerDirNames(new String[] { newDirectory(true) }) .setBookiePort(bookiePort) .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); // Bookie should start successfully for fresh env. - new Bookie(bookieConf); + validateConfig(bookieConf); // Format metadata one more time. BookKeeperAdmin.format(adminConf, false, true); try { - new Bookie(bookieConf); + validateConfig(bookieConf); fail("Bookie should not start with previous instance id."); } catch (BookieException.InvalidCookieException e) { assertTrue( @@ -536,9 +538,9 @@ public void testVerifyCookieWithFormat() throws Exception { } // Now format the Bookie and restart. - Bookie.format(bookieConf, false, true); + BookieImpl.format(bookieConf, false, true); // After bookie format bookie should be able to start again. - new Bookie(bookieConf); + validateConfig(bookieConf); } /** @@ -547,10 +549,8 @@ public void testVerifyCookieWithFormat() throws Exception { */ @Test public void testV2data() throws Exception { - File journalDir = newV2JournalDirectory(); - tmpDirs.add(journalDir); - File ledgerDir = newV2LedgerDirectory(); - tmpDirs.add(ledgerDir); + File journalDir = initV2JournalDirectory(tmpDirs.createNew("bookie", "journal")); + File ledgerDir = initV2LedgerDirectory(tmpDirs.createNew("bookie", "ledger")); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); conf.setJournalDirName(journalDir.getPath()) @@ -558,11 +558,18 @@ public void testV2data() throws Exception { .setBookiePort(bookiePort) .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); try { - Bookie b = new Bookie(conf); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); fail("Shouldn't have been able to start"); - } catch (BookieException.InvalidCookieException ice) { + } catch (IOException ioe) { // correct behaviour - assertTrue("wrong exception", ice.getCause().getMessage().contains("upgrade needed")); + assertTrue("wrong exception", ioe.getMessage().contains("upgrade needed")); + } + try { + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); + fail("Shouldn't have been able to start"); + } catch (IOException ioe) { + // correct behaviour + assertTrue("wrong exception", ioe.getMessage().contains("upgrade needed")); } } @@ -572,10 +579,8 @@ public void testV2data() throws Exception { */ @Test public void testV1data() throws Exception { - File journalDir = newV1JournalDirectory(); - tmpDirs.add(journalDir); - File ledgerDir = newV1LedgerDirectory(); - tmpDirs.add(ledgerDir); + File journalDir = initV1JournalDirectory(tmpDirs.createNew("bookie", "journal")); + File ledgerDir = initV1LedgerDirectory(tmpDirs.createNew("bookie", "ledger")); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); conf.setJournalDirName(journalDir.getPath()) @@ -583,11 +588,19 @@ public void testV1data() throws Exception { .setBookiePort(bookiePort) .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); try { - Bookie b = new Bookie(conf); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); fail("Shouldn't have been able to start"); - } catch (BookieException.InvalidCookieException ice) { + } catch (IOException ioe) { // correct behaviour - assertTrue("wrong exception", ice.getCause().getMessage().contains("upgrade needed")); + assertTrue("wrong exception", ioe.getMessage().contains("upgrade needed")); + } + + try { + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); + fail("Shouldn't have been able to start"); + } catch (IOException ioe) { + // correct behaviour + assertTrue("wrong exception", ioe.getMessage().contains("upgrade needed")); } } @@ -605,13 +618,11 @@ public void testRestartWithHostNameAsBookieID() throws Exception { .setLedgerDirNames(ledgerDirs) .setBookiePort(bookiePort) .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); - Bookie b = new Bookie(conf); // should work fine - b.start(); - b.shutdown(); + validateConfig(conf); conf.setUseHostNameAsBookieID(true); try { - new Bookie(conf); + validateConfig(conf); fail("Should not start a bookie with hostname if the bookie has been started with an ip"); } catch (InvalidCookieException e) { // expected @@ -632,13 +643,11 @@ public void testRestartWithAdvertisedAddressAsBookieID() throws Exception { .setBookiePort(bookiePort) .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); conf.setUseHostNameAsBookieID(false); - Bookie b = new Bookie(conf); // should work fine - b.start(); - b.shutdown(); + validateConfig(conf); conf.setAdvertisedAddress("unknown"); try { - new Bookie(conf); + validateConfig(conf); fail("Should not start a bookie with ip if the bookie has been started with an ip"); } catch (InvalidCookieException e) { // expected @@ -659,13 +668,11 @@ public void testRestartWithIpAddressAsBookieID() throws Exception { .setBookiePort(bookiePort) .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); conf.setUseHostNameAsBookieID(true); - Bookie b = new Bookie(conf); // should work fine - b.start(); - b.shutdown(); + validateConfig(conf); conf.setUseHostNameAsBookieID(false); try { - new Bookie(conf); + validateConfig(conf); fail("Should not start a bookie with ip if the bookie has been started with an ip"); } catch (InvalidCookieException e) { // expected @@ -678,10 +685,8 @@ public void testRestartWithIpAddressAsBookieID() throws Exception { */ @Test public void testV2dataWithHostNameAsBookieID() throws Exception { - File journalDir = newV2JournalDirectory(); - tmpDirs.add(journalDir); - File ledgerDir = newV2LedgerDirectory(); - tmpDirs.add(ledgerDir); + File journalDir = initV2JournalDirectory(tmpDirs.createNew("bookie", "journal")); + File ledgerDir = initV2LedgerDirectory(tmpDirs.createNew("bookie", "ledger")); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); conf.setJournalDirName(journalDir.getPath()) @@ -689,13 +694,19 @@ public void testV2dataWithHostNameAsBookieID() throws Exception { .setBookiePort(bookiePort) .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); try { - conf.setUseHostNameAsBookieID(true); - new Bookie(conf); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); fail("Shouldn't have been able to start"); - } catch (BookieException.InvalidCookieException ice) { + } catch (IOException ioe) { // correct behaviour - assertTrue("wrong exception", - ice.getCause().getMessage().contains("upgrade needed")); + assertTrue("wrong exception", ioe.getMessage().contains("upgrade needed")); + } + + try { + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); + fail("Shouldn't have been able to start"); + } catch (IOException ioe) { + // correct behaviour + assertTrue("wrong exception", ioe.getMessage().contains("upgrade needed")); } } @@ -711,9 +722,7 @@ public void testWriteToZooKeeper() throws Exception { .setLedgerDirNames(ledgerDirs) .setBookiePort(bookiePort) .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); - Bookie b = new Bookie(conf); // should work fine - b.start(); - b.shutdown(); + validateConfig(conf); Versioned zkCookie = Cookie.readFromRegistrationManager(rm, conf); Version version1 = zkCookie.getVersion(); assertTrue("Invalid type expected ZkVersion type", @@ -743,11 +752,31 @@ public void testDeleteFromZooKeeper() throws Exception { .setLedgerDirNames(ledgerDirs) .setBookiePort(bookiePort) .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); - Bookie b = new Bookie(conf); // should work fine - b.start(); - b.shutdown(); + validateConfig(conf); Versioned zkCookie = Cookie.readFromRegistrationManager(rm, conf); Cookie cookie = zkCookie.getValue(); cookie.deleteFromRegistrationManager(rm, conf, zkCookie.getVersion()); } + + /** + * Tests that custom Bookie Id is properly set in the Cookie (via {@link LegacyCookieValidation}). + */ + @Test + public void testBookieIdSetting() throws Exception { + final String customBookieId = "myCustomBookieId" + new Random().nextInt(); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(newDirectory()) + .setLedgerDirNames(new String[] { newDirectory() , newDirectory() }) + .setBookiePort(bookiePort) + .setBookieId(customBookieId) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + validateConfig(conf); + Versioned zkCookie = Cookie.readFromRegistrationManager(rm, conf); + Version version1 = zkCookie.getVersion(); + assertTrue("Invalid type expected ZkVersion type", + version1 instanceof LongVersion); + Cookie cookie = zkCookie.getValue(); + cookie.writeToRegistrationManager(rm, conf, version1); + Assert.assertTrue(cookie.toString().contains(customBookieId)); + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CreateNewLogTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CreateNewLogTest.java index f5d4edce967..4190feaeda9 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CreateNewLogTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CreateNewLogTest.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -21,7 +21,7 @@ import static org.junit.Assert.assertTrue; import com.google.common.util.concurrent.MoreExecutors; - +import io.netty.buffer.UnpooledByteBufAllocator; import java.io.File; import java.io.IOException; import java.util.HashMap; @@ -36,10 +36,7 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.Lock; import java.util.stream.IntStream; - import org.apache.bookkeeper.bookie.EntryLogManagerForEntryLogPerLedger.BufferedLogChannelWithDirInfo; -import org.apache.bookkeeper.bookie.EntryLogger.BufferedLogChannel; -import org.apache.bookkeeper.bookie.LedgerDirsManager.NoWritableLedgerDirException; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.conf.TestBKConfiguration; import org.apache.bookkeeper.stats.Counter; @@ -120,7 +117,7 @@ public void testCreateNewLog() throws Exception { File newLogFile = new File(dir, logFileName); newLogFile.createNewFile(); - EntryLogger el = new EntryLogger(conf, ledgerDirsManager); + DefaultEntryLogger el = new DefaultEntryLogger(conf, ledgerDirsManager); // Calls createNewLog, and with the number of directories we // are using, if it picks one at random it will fail. EntryLogManagerForSingleEntryLog entryLogManager = (EntryLogManagerForSingleEntryLog) el.getEntryLogManager(); @@ -152,7 +149,7 @@ public void testCreateNewLogWithNoWritableLedgerDirs() throws Exception { ledgerDirsManager.addToFilledDirs(tdir); } - EntryLogger el = new EntryLogger(conf, ledgerDirsManager); + DefaultEntryLogger el = new DefaultEntryLogger(conf, ledgerDirsManager); // Calls createNewLog, and with the number of directories we // are using, if it picks one at random it will fail. EntryLogManagerForSingleEntryLog entryLogManager = (EntryLogManagerForSingleEntryLog) el.getEntryLogManager(); @@ -189,7 +186,7 @@ public void testEntryLogPerLedgerCreationWithPreAllocation() throws Exception { conf.setEntryLogPerLedgerEnabled(true); LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - EntryLogger entryLogger = new EntryLogger(conf, ledgerDirsManager); + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf, ledgerDirsManager); EntryLoggerAllocator entryLoggerAllocator = entryLogger.entryLoggerAllocator; EntryLogManagerForEntryLogPerLedger entryLogManager = (EntryLogManagerForEntryLogPerLedger) entryLogger .getEntryLogManager(); @@ -242,7 +239,7 @@ public void testEntryLogPerLedgerCreationWithPreAllocation() throws Exception { expectedPreAllocatedLogID, entryLoggerAllocator.getPreallocatedLogId()); Assert.assertEquals("Number of current ", numOfLedgers, entryLogManager.getCopyOfCurrentLogs().size()); - List rotatedLogChannels = entryLogManager.getRotatedLogChannels(); + List rotatedLogChannels = entryLogManager.getRotatedLogChannels(); Assert.assertEquals("Number of LogChannels rotated", 1, rotatedLogChannels.size()); Assert.assertEquals("Rotated logchannel logid", rotatedLedger, rotatedLogChannels.iterator().next().getLogId()); entryLogger.flush(); @@ -297,7 +294,7 @@ public void testEntryLogCreationWithFilledDirs() throws Exception { conf.setEntryLogPerLedgerEnabled(true); LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - EntryLogger entryLogger = new EntryLogger(conf, ledgerDirsManager); + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf, ledgerDirsManager); EntryLoggerAllocator entryLoggerAllocator = entryLogger.entryLoggerAllocator; EntryLogManagerForEntryLogPerLedger entryLogManager = (EntryLogManagerForEntryLogPerLedger) entryLogger.getEntryLogManager(); @@ -321,20 +318,20 @@ public void testEntryLogCreationWithFilledDirs() throws Exception { expectedPreAllocatedLogIDDuringInitialization + 1, entryLoggerAllocator.getPreallocatedLogId()); for (int i = 0; i < numDirs - 1; i++) { - ledgerDirsManager.addToFilledDirs(Bookie.getCurrentDirectory(new File(ledgerDirs[i]))); + ledgerDirsManager.addToFilledDirs(BookieImpl.getCurrentDirectory(new File(ledgerDirs[i]))); } /* * this is the only non-filled ledgerDir so it should be used for creating new entryLog */ - File nonFilledLedgerDir = Bookie.getCurrentDirectory(new File(ledgerDirs[numDirs - 1])); + File nonFilledLedgerDir = BookieImpl.getCurrentDirectory(new File(ledgerDirs[numDirs - 1])); entryLogManager.createNewLog(ledgerId); - BufferedLogChannel newLogChannel = entryLogManager.getCurrentLogForLedger(ledgerId); + DefaultEntryLogger.BufferedLogChannel newLogChannel = entryLogManager.getCurrentLogForLedger(ledgerId); Assert.assertEquals("Directory of newly created BufferedLogChannel file", nonFilledLedgerDir.getAbsolutePath(), newLogChannel.getLogFile().getParentFile().getAbsolutePath()); - ledgerDirsManager.addToFilledDirs(Bookie.getCurrentDirectory(new File(ledgerDirs[numDirs - 1]))); + ledgerDirsManager.addToFilledDirs(BookieImpl.getCurrentDirectory(new File(ledgerDirs[numDirs - 1]))); // new entrylog creation should succeed, though there is no writable ledgerDir entryLogManager.createNewLog(ledgerId); @@ -355,7 +352,7 @@ public void testLedgerDirsUniformityDuringCreation() throws Exception { conf.setEntryLogPerLedgerEnabled(true); LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - EntryLogger entryLogger = new EntryLogger(conf, ledgerDirsManager); + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf, ledgerDirsManager); EntryLogManagerForEntryLogPerLedger entrylogManager = (EntryLogManagerForEntryLogPerLedger) entryLogger.getEntryLogManager(); @@ -420,7 +417,7 @@ public void testConcurrentCreateNewLog(boolean entryLogFilePreAllocationEnabled) LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - EntryLogger el = new EntryLogger(conf, ledgerDirsManager); + DefaultEntryLogger el = new DefaultEntryLogger(conf, ledgerDirsManager); EntryLogManagerBase entryLogManager = (EntryLogManagerBase) el.getEntryLogManager(); // set same thread executor for entryLoggerAllocator's allocatorExecutor setSameThreadExecutorForEntryLoggerAllocator(el.getEntryLoggerAllocator()); @@ -465,7 +462,7 @@ public void testCreateNewLogWithGaps() throws Exception { LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - EntryLogger el = new EntryLogger(conf, ledgerDirsManager); + DefaultEntryLogger el = new DefaultEntryLogger(conf, ledgerDirsManager); EntryLogManagerBase entryLogManagerBase = (EntryLogManagerBase) el.getEntryLogManager(); entryLogManagerBase.createNewLog(0L); @@ -504,7 +501,7 @@ public void testCreateNewLogAndCompactionLog() throws Exception { conf.setEntryLogFilePreAllocationEnabled(true); LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - EntryLogger el = new EntryLogger(conf, ledgerDirsManager); + DefaultEntryLogger el = new DefaultEntryLogger(conf, ledgerDirsManager); // set same thread executor for entryLoggerAllocator's allocatorExecutor setSameThreadExecutorForEntryLoggerAllocator(el.getEntryLoggerAllocator()); AtomicBoolean receivedException = new AtomicBoolean(false); @@ -514,7 +511,7 @@ public void testCreateNewLogAndCompactionLog() throws Exception { if (i % 2 == 0) { ((EntryLogManagerBase) el.getEntryLogManager()).createNewLog((long) i); } else { - el.createNewCompactionLog(); + el.newCompactionLog(i); } } catch (IOException e) { LOG.error("Received exception while creating newLog", e); @@ -528,6 +525,46 @@ public void testCreateNewLogAndCompactionLog() throws Exception { el.getPreviousAllocatedEntryLogId()); } + @Test + public void testLastIdCompatibleBetweenDefaultAndDirectEntryLogger() throws Exception { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + + // Creating a new configuration with a number of + // ledger directories. + conf.setLedgerDirNames(ledgerDirs); + conf.setEntryLogFilePreAllocationEnabled(false); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); + + DefaultEntryLogger el = new DefaultEntryLogger(conf, ledgerDirsManager); + EntryLogManagerBase entryLogManagerBase = (EntryLogManagerBase) el.getEntryLogManager(); + for (int i = 0; i < 10; i++) { + entryLogManagerBase.createNewLog(i); + } + + Assert.assertEquals(9, el.getPreviousAllocatedEntryLogId()); + + //Mock half ledgerDirs lastId is 3. + for (int i = 0; i < ledgerDirsManager.getAllLedgerDirs().size() / 2; i++) { + File dir = ledgerDirsManager.getAllLedgerDirs().get(i); + LOG.info("Picked this directory: {}", dir); + el.getEntryLoggerAllocator().setLastLogId(dir, 3); + } + + el = new DefaultEntryLogger(conf, ledgerDirsManager); + Assert.assertEquals(9, el.getPreviousAllocatedEntryLogId()); + + //Mock all ledgerDirs lastId is 3. + for (int i = 0; i < ledgerDirsManager.getAllLedgerDirs().size(); i++) { + File dir = ledgerDirsManager.getAllLedgerDirs().get(i); + LOG.info("Picked this directory: {}", dir); + el.getEntryLoggerAllocator().setLastLogId(dir, 3); + } + + el = new DefaultEntryLogger(conf, ledgerDirsManager); + Assert.assertEquals(9, el.getPreviousAllocatedEntryLogId()); + } + /* * In this testcase entrylogs for ledgers are tried to create concurrently. */ @@ -542,7 +579,7 @@ public void testConcurrentEntryLogCreations() throws Exception { conf.setEntryLogPerLedgerEnabled(true); LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - EntryLogger entryLogger = new EntryLogger(conf, ledgerDirsManager); + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf, ledgerDirsManager); EntryLogManagerForEntryLogPerLedger entrylogManager = (EntryLogManagerForEntryLogPerLedger) entryLogger.getEntryLogManager(); @@ -560,6 +597,7 @@ public void testConcurrentEntryLogCreations() throws Exception { startLatch.await(); entrylogManager.createNewLog(ledgerId); createdEntryLogs.incrementAndGet(); + Thread.sleep(2000); } catch (InterruptedException | IOException e) { LOG.error("Got exception while trying to createNewLog for Ledger: " + ledgerId, e); } finally { @@ -570,7 +608,7 @@ public void testConcurrentEntryLogCreations() throws Exception { } startLatch.countDown(); - createdLatch.await(5, TimeUnit.SECONDS); + createdLatch.await(20, TimeUnit.SECONDS); Assert.assertEquals("Created EntryLogs", numOfLedgers * numOfThreadsForSameLedger, createdEntryLogs.get()); Assert.assertEquals("Active currentlogs size", numOfLedgers, entrylogManager.getCopyOfCurrentLogs().size()); Assert.assertEquals("Rotated entrylogs size", (numOfThreadsForSameLedger - 1) * numOfLedgers, @@ -606,7 +644,8 @@ public void testEntryLogManagerMetrics() throws Exception { conf.setEntryLogPerLedgerCounterLimitsMultFactor(entryLogPerLedgerCounterLimitsMultFactor); LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - EntryLogger entryLogger = new EntryLogger(conf, ledgerDirsManager, null, statsLogger); + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf, ledgerDirsManager, null, statsLogger, + UnpooledByteBufAllocator.DEFAULT); EntryLogManagerForEntryLogPerLedger entrylogManager = (EntryLogManagerForEntryLogPerLedger) entryLogger .getEntryLogManager(); // set same thread executor for entryLoggerAllocator's allocatorExecutor @@ -731,7 +770,8 @@ public void testEntryLogManagerMetricsFromExpiryAspect() throws Exception { conf.setEntryLogPerLedgerCounterLimitsMultFactor(entryLogPerLedgerCounterLimitsMultFactor); LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - EntryLogger entryLogger = new EntryLogger(conf, ledgerDirsManager, null, statsLogger); + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf, ledgerDirsManager, null, statsLogger, + UnpooledByteBufAllocator.DEFAULT); EntryLogManagerForEntryLogPerLedger entrylogManager = (EntryLogManagerForEntryLogPerLedger) entryLogger .getEntryLogManager(); // set same thread executor for entryLoggerAllocator's allocatorExecutor @@ -809,7 +849,7 @@ public List getWritableLedgerDirsForNewLog() throws NoWritableLedgerDirExc } }; - EntryLogger el = new EntryLogger(conf, ledgerDirsManager); + DefaultEntryLogger el = new DefaultEntryLogger(conf, ledgerDirsManager); EntryLogManagerForEntryLogPerLedger entryLogManager = (EntryLogManagerForEntryLogPerLedger) el .getEntryLogManager(); @@ -822,7 +862,7 @@ public List getWritableLedgerDirsForNewLog() throws NoWritableLedgerDirExc /* * In a new thread, create newlog for 'firstLedgerId' and then set * 'newLogCreated' to true. Since this is the first createNewLog call, - * it is going to be blocked untill latch is countdowned to 0. + * it is going to be blocked until latch is countdowned to 0. */ new Thread() { @Override diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/DefaultEntryLogTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/DefaultEntryLogTest.java new file mode 100644 index 00000000000..4b92df528fe --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/DefaultEntryLogTest.java @@ -0,0 +1,1927 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie; + +import static org.apache.bookkeeper.bookie.storage.EntryLogTestUtils.assertEntryEquals; +import static org.apache.bookkeeper.bookie.storage.EntryLogTestUtils.makeEntry; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +import com.google.common.collect.Sets; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import io.netty.buffer.UnpooledByteBufAllocator; +import io.netty.util.ReferenceCountUtil; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.channels.FileChannel; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Random; +import java.util.Set; +import java.util.concurrent.BrokenBarrierException; +import java.util.concurrent.Callable; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.CyclicBarrier; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLongArray; +import java.util.concurrent.locks.Lock; +import org.apache.bookkeeper.bookie.DefaultEntryLogger.BufferedLogChannel; +import org.apache.bookkeeper.bookie.LedgerDirsManager.NoWritableLedgerDirException; +import org.apache.bookkeeper.common.testing.annotations.FlakyTest; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.test.TestStatsProvider; +import org.apache.bookkeeper.util.DiskChecker; +import org.apache.bookkeeper.util.IOUtils; +import org.apache.bookkeeper.util.collections.ConcurrentLongLongHashMap; +import org.apache.commons.io.FileUtils; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.FixMethodOrder; +import org.junit.Ignore; +import org.junit.Test; +import org.junit.runners.MethodSorters; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Tests for EntryLog. + */ +@FixMethodOrder(MethodSorters.NAME_ASCENDING) +public class DefaultEntryLogTest { + private static final Logger LOG = LoggerFactory.getLogger(DefaultEntryLogTest.class); + + final List tempDirs = new ArrayList(); + final Random rand = new Random(); + + File createTempDir(String prefix, String suffix) throws IOException { + File dir = IOUtils.createTempDir(prefix, suffix); + tempDirs.add(dir); + return dir; + } + + private File rootDir; + private File curDir; + private ServerConfiguration conf; + private LedgerDirsManager dirsMgr; + private DefaultEntryLogger entryLogger; + + @Before + public void setUp() throws Exception { + this.rootDir = createTempDir("bkTest", ".dir"); + this.curDir = BookieImpl.getCurrentDirectory(rootDir); + BookieImpl.checkDirectoryStructure(curDir); + this.conf = TestBKConfiguration.newServerConfiguration(); + this.dirsMgr = new LedgerDirsManager( + conf, + new File[] { rootDir }, + new DiskChecker( + conf.getDiskUsageThreshold(), + conf.getDiskUsageWarnThreshold())); + this.entryLogger = new DefaultEntryLogger(conf, dirsMgr); + } + + @After + public void tearDown() throws Exception { + if (null != this.entryLogger) { + entryLogger.close(); + } + + for (File dir : tempDirs) { + FileUtils.deleteDirectory(dir); + } + tempDirs.clear(); + } + + @Test + public void testDeferCreateNewLog() throws Exception { + entryLogger.close(); + + // mark `curDir` as filled + this.conf.setMinUsableSizeForEntryLogCreation(1); + this.dirsMgr = new LedgerDirsManager( + conf, + new File[] { rootDir }, + new DiskChecker( + conf.getDiskUsageThreshold(), + conf.getDiskUsageWarnThreshold())); + this.dirsMgr.addToFilledDirs(curDir); + + entryLogger = new DefaultEntryLogger(conf, dirsMgr); + EntryLogManagerForSingleEntryLog entryLogManager = + (EntryLogManagerForSingleEntryLog) entryLogger.getEntryLogManager(); + assertEquals(DefaultEntryLogger.UNINITIALIZED_LOG_ID, entryLogManager.getCurrentLogId()); + + // add the first entry will trigger file creation + entryLogger.addEntry(1L, generateEntry(1, 1).nioBuffer()); + assertEquals(0L, entryLogManager.getCurrentLogId()); + } + + @Test + public void testEntryLogIsSealedWithPerLedgerDisabled() throws Exception { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setEntryLogPerLedgerEnabled(false); + conf.setEntryLogFilePreAllocationEnabled(true); + + TestStatsProvider statsProvider = new TestStatsProvider(); + TestStatsProvider.TestStatsLogger statsLogger = + statsProvider.getStatsLogger(BookKeeperServerStats.ENTRYLOGGER_SCOPE); + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf, dirsMgr, null, statsLogger, + UnpooledByteBufAllocator.DEFAULT); + EntryLogManagerBase entrylogManager = (EntryLogManagerBase) entryLogger.getEntryLogManager(); + entrylogManager.createNewLog(0); + BufferedReadChannel channel = entryLogger.getChannelForLogId(0); + assertFalse(channel.sealed); + entrylogManager.createNewLog(1); + channel = entryLogger.getChannelForLogId(0); + assertFalse(channel.sealed); + entrylogManager.createNewLog(2); + channel = entryLogger.getChannelForLogId(1); + assertTrue(channel.sealed); + } + + @Test + public void testEntryLogIsSealedWithPerLedgerEnabled() throws Exception { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + //If entryLogPerLedgerEnabled is true, the buffer channel sealed flag always false. + conf.setEntryLogPerLedgerEnabled(true); + conf.setEntryLogFilePreAllocationEnabled(true); + + TestStatsProvider statsProvider = new TestStatsProvider(); + TestStatsProvider.TestStatsLogger statsLogger = + statsProvider.getStatsLogger(BookKeeperServerStats.ENTRYLOGGER_SCOPE); + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf, dirsMgr, null, statsLogger, + UnpooledByteBufAllocator.DEFAULT); + EntryLogManagerBase entrylogManager = (EntryLogManagerBase) entryLogger.getEntryLogManager(); + entrylogManager.createNewLog(0); + BufferedReadChannel channel = entryLogger.getChannelForLogId(0); + assertFalse(channel.sealed); + entrylogManager.createNewLog(1); + channel = entryLogger.getChannelForLogId(0); + assertFalse(channel.sealed); + entrylogManager.createNewLog(2); + channel = entryLogger.getChannelForLogId(1); + assertFalse(channel.sealed); + } + + @Test + public void testDeferCreateNewLogWithoutEnoughDiskSpaces() throws Exception { + entryLogger.close(); + + // mark `curDir` as filled + this.conf.setMinUsableSizeForEntryLogCreation(Long.MAX_VALUE); + this.dirsMgr = new LedgerDirsManager( + conf, + new File[] { rootDir }, + new DiskChecker( + conf.getDiskUsageThreshold(), + conf.getDiskUsageWarnThreshold())); + this.dirsMgr.addToFilledDirs(curDir); + + entryLogger = new DefaultEntryLogger(conf, dirsMgr); + EntryLogManagerForSingleEntryLog entryLogManager = + (EntryLogManagerForSingleEntryLog) entryLogger.getEntryLogManager(); + assertEquals(DefaultEntryLogger.UNINITIALIZED_LOG_ID, entryLogManager.getCurrentLogId()); + + // add the first entry will trigger file creation + try { + entryLogger.addEntry(1L, generateEntry(1, 1).nioBuffer()); + fail("Should fail to append entry if there is no enough reserved space left"); + } catch (NoWritableLedgerDirException e) { + assertEquals(DefaultEntryLogger.UNINITIALIZED_LOG_ID, entryLogManager.getCurrentLogId()); + } + } + + @Test + public void testCorruptEntryLog() throws Exception { + // create some entries + entryLogger.addEntry(1L, generateEntry(1, 1).nioBuffer()); + entryLogger.addEntry(3L, generateEntry(3, 1).nioBuffer()); + entryLogger.addEntry(2L, generateEntry(2, 1).nioBuffer()); + entryLogger.flush(); + entryLogger.close(); + // now lets truncate the file to corrupt the last entry, which simulates a partial write + File f = new File(curDir, "0.log"); + RandomAccessFile raf = new RandomAccessFile(f, "rw"); + raf.setLength(raf.length() - 10); + raf.close(); + // now see which ledgers are in the log + entryLogger = new DefaultEntryLogger(conf, dirsMgr); + + EntryLogMetadata meta = entryLogger.getEntryLogMetadata(0L); + String metaString = meta.toString(); + assertEquals(metaString, + "{totalSize = 60, remainingSize = 60, ledgersMap = ConcurrentLongLongHashMap{1 => 30, 3 => 30}}"); + LOG.info("Extracted Meta From Entry Log {}", meta); + assertTrue(meta.getLedgersMap().containsKey(1L)); + assertFalse(meta.getLedgersMap().containsKey(2L)); + assertTrue(meta.getLedgersMap().containsKey(3L)); + } + + private static ByteBuf generateEntry(long ledger, long entry) { + byte[] data = generateDataString(ledger, entry).getBytes(); + ByteBuf bb = Unpooled.buffer(8 + 8 + data.length); + bb.writeLong(ledger); + bb.writeLong(entry); + bb.writeBytes(data); + return bb; + } + + private ByteBuf generateEntry(long ledger, long entry, int length) { + ByteBuf bb = Unpooled.buffer(length); + bb.writeLong(ledger); + bb.writeLong(entry); + byte[] randbyteArray = new byte[length - 8 - 8]; + rand.nextBytes(randbyteArray); + bb.writeBytes(randbyteArray); + return bb; + } + + private static String generateDataString(long ledger, long entry) { + return ("ledger-" + ledger + "-" + entry); + } + + @Test + public void testMissingLogId() throws Exception { + // create some entries + int numLogs = 3; + int numEntries = 10; + long[][] positions = new long[2 * numLogs][]; + for (int i = 0; i < numLogs; i++) { + positions[i] = new long[numEntries]; + + DefaultEntryLogger logger = new DefaultEntryLogger(conf, dirsMgr); + for (int j = 0; j < numEntries; j++) { + positions[i][j] = logger.addEntry((long) i, generateEntry(i, j).nioBuffer()); + } + logger.flush(); + logger.close(); + } + // delete last log id + File lastLogId = new File(curDir, "lastId"); + lastLogId.delete(); + + // write another entries + for (int i = numLogs; i < 2 * numLogs; i++) { + positions[i] = new long[numEntries]; + + DefaultEntryLogger logger = new DefaultEntryLogger(conf, dirsMgr); + for (int j = 0; j < numEntries; j++) { + positions[i][j] = logger.addEntry((long) i, generateEntry(i, j).nioBuffer()); + } + logger.flush(); + logger.close(); + } + + DefaultEntryLogger newLogger = new DefaultEntryLogger(conf, dirsMgr); + for (int i = 0; i < (2 * numLogs + 1); i++) { + File logFile = new File(curDir, Long.toHexString(i) + ".log"); + assertTrue(logFile.exists()); + } + for (int i = 0; i < 2 * numLogs; i++) { + for (int j = 0; j < numEntries; j++) { + String expectedValue = "ledger-" + i + "-" + j; + ByteBuf value = newLogger.readEntry(i, j, positions[i][j]); + long ledgerId = value.readLong(); + long entryId = value.readLong(); + byte[] data = new byte[value.readableBytes()]; + value.readBytes(data); + value.release(); + assertEquals(i, ledgerId); + assertEquals(j, entryId); + assertEquals(expectedValue, new String(data)); + } + } + } + + /** + * Test that EntryLogger Should fail with FNFE, if entry logger directories does not exist. + */ + @Ignore // no longer valid as LedgerDirsManager creates the directory as needed + public void testEntryLoggerShouldThrowFNFEIfDirectoriesDoesNotExist() + throws Exception { + File tmpDir = createTempDir("bkTest", ".dir"); + DefaultEntryLogger entryLogger = null; + try { + entryLogger = new DefaultEntryLogger(conf, new LedgerDirsManager(conf, new File[] { tmpDir }, + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()))); + fail("Expecting FileNotFoundException"); + } catch (FileNotFoundException e) { + assertEquals("Entry log directory '" + tmpDir + "/current' does not exist", e + .getLocalizedMessage()); + } finally { + if (entryLogger != null) { + entryLogger.close(); + } + } + } + + /** + * Test to verify the DiskFull during addEntry. + */ + @Test + public void testAddEntryFailureOnDiskFull() throws Exception { + File ledgerDir1 = createTempDir("bkTest", ".dir"); + File ledgerDir2 = createTempDir("bkTest", ".dir"); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setLedgerStorageClass(InterleavedLedgerStorage.class.getName()); + conf.setJournalDirName(ledgerDir1.toString()); + conf.setLedgerDirNames(new String[] { ledgerDir1.getAbsolutePath(), + ledgerDir2.getAbsolutePath() }); + BookieImpl bookie = new TestBookieImpl(conf); + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf, + bookie.getLedgerDirsManager()); + InterleavedLedgerStorage ledgerStorage = + ((InterleavedLedgerStorage) bookie.ledgerStorage.getUnderlyingLedgerStorage()); + ledgerStorage.entryLogger = entryLogger; + // Create ledgers + ledgerStorage.setMasterKey(1, "key".getBytes()); + ledgerStorage.setMasterKey(2, "key".getBytes()); + ledgerStorage.setMasterKey(3, "key".getBytes()); + // Add entries + ledgerStorage.addEntry(generateEntry(1, 1)); + ledgerStorage.addEntry(generateEntry(2, 1)); + // Add entry with disk full failure simulation + bookie.getLedgerDirsManager().addToFilledDirs(((EntryLogManagerBase) entryLogger.getEntryLogManager()) + .getCurrentLogForLedger(DefaultEntryLogger.UNASSIGNED_LEDGERID).getLogFile().getParentFile()); + ledgerStorage.addEntry(generateEntry(3, 1)); + // Verify written entries + Assert.assertTrue(0 == generateEntry(1, 1).compareTo(ledgerStorage.getEntry(1, 1))); + Assert.assertTrue(0 == generateEntry(2, 1).compareTo(ledgerStorage.getEntry(2, 1))); + Assert.assertTrue(0 == generateEntry(3, 1).compareTo(ledgerStorage.getEntry(3, 1))); + } + + /** + * Explicitly try to recover using the ledgers map index at the end of the entry log. + */ + @Test + public void testRecoverFromLedgersMap() throws Exception { + // create some entries + entryLogger.addEntry(1L, generateEntry(1, 1).nioBuffer()); + entryLogger.addEntry(3L, generateEntry(3, 1).nioBuffer()); + entryLogger.addEntry(2L, generateEntry(2, 1).nioBuffer()); + entryLogger.addEntry(1L, generateEntry(1, 2).nioBuffer()); + + EntryLogManagerBase entryLogManager = (EntryLogManagerBase) entryLogger.getEntryLogManager(); + entryLogManager.createNewLog(DefaultEntryLogger.UNASSIGNED_LEDGERID); + entryLogManager.flushRotatedLogs(); + + EntryLogMetadata meta = entryLogger.extractEntryLogMetadataFromIndex(0L); + LOG.info("Extracted Meta From Entry Log {}", meta); + assertEquals(60, meta.getLedgersMap().get(1L)); + assertEquals(30, meta.getLedgersMap().get(2L)); + assertEquals(30, meta.getLedgersMap().get(3L)); + assertFalse(meta.getLedgersMap().containsKey(4L)); + assertEquals(120, meta.getTotalSize()); + assertEquals(120, meta.getRemainingSize()); + } + + /** + * Explicitly try to recover using the ledgers map index at the end of the entry log. + */ + @Test + public void testRecoverFromLedgersMapOnV0EntryLog() throws Exception { + // create some entries + entryLogger.addEntry(1L, generateEntry(1, 1).nioBuffer()); + entryLogger.addEntry(3L, generateEntry(3, 1).nioBuffer()); + entryLogger.addEntry(2L, generateEntry(2, 1).nioBuffer()); + entryLogger.addEntry(1L, generateEntry(1, 2).nioBuffer()); + ((EntryLogManagerBase) entryLogger.getEntryLogManager()).createNewLog(DefaultEntryLogger.UNASSIGNED_LEDGERID); + entryLogger.close(); + + // Rewrite the entry log header to be on V0 format + File f = new File(curDir, "0.log"); + RandomAccessFile raf = new RandomAccessFile(f, "rw"); + raf.seek(DefaultEntryLogger.HEADER_VERSION_POSITION); + // Write zeros to indicate V0 + no ledgers map info + raf.write(new byte[4 + 8]); + raf.close(); + + // now see which ledgers are in the log + entryLogger = new DefaultEntryLogger(conf, dirsMgr); + + try { + entryLogger.extractEntryLogMetadataFromIndex(0L); + fail("Should not be possible to recover from ledgers map index"); + } catch (IOException e) { + // Ok + } + + // Public method should succeed by falling back to scanning the file + EntryLogMetadata meta = entryLogger.getEntryLogMetadata(0L); + LOG.info("Extracted Meta From Entry Log {}", meta); + assertEquals(60, meta.getLedgersMap().get(1L)); + assertEquals(30, meta.getLedgersMap().get(2L)); + assertEquals(30, meta.getLedgersMap().get(3L)); + assertFalse(meta.getLedgersMap().containsKey(4L)); + assertEquals(120, meta.getTotalSize()); + assertEquals(120, meta.getRemainingSize()); + } + + /** + * Test pre-allocate for entry log in EntryLoggerAllocator. + * @throws Exception + */ + @Test + public void testPreAllocateLog() throws Exception { + entryLogger.close(); + + // enable pre-allocation case + conf.setEntryLogFilePreAllocationEnabled(true); + + entryLogger = new DefaultEntryLogger(conf, dirsMgr); + // create a logger whose initialization phase allocating a new entry log + ((EntryLogManagerBase) entryLogger.getEntryLogManager()).createNewLog(DefaultEntryLogger.UNASSIGNED_LEDGERID); + assertNotNull(entryLogger.getEntryLoggerAllocator().getPreallocationFuture()); + + entryLogger.addEntry(1L, generateEntry(1, 1).nioBuffer()); + // the Future is not null all the time + assertNotNull(entryLogger.getEntryLoggerAllocator().getPreallocationFuture()); + entryLogger.close(); + + // disable pre-allocation case + conf.setEntryLogFilePreAllocationEnabled(false); + // create a logger + entryLogger = new DefaultEntryLogger(conf, dirsMgr); + assertNull(entryLogger.getEntryLoggerAllocator().getPreallocationFuture()); + + entryLogger.addEntry(2L, generateEntry(1, 1).nioBuffer()); + + // the Future is null all the time + assertNull(entryLogger.getEntryLoggerAllocator().getPreallocationFuture()); + } + + /** + * Test the getEntryLogsSet() method. + */ + @Test + public void testGetEntryLogsSet() throws Exception { + // create some entries + EntryLogManagerBase entryLogManagerBase = ((EntryLogManagerBase) entryLogger.getEntryLogManager()); + assertEquals(Sets.newHashSet(), entryLogger.getEntryLogsSet()); + + entryLogManagerBase.createNewLog(DefaultEntryLogger.UNASSIGNED_LEDGERID); + entryLogManagerBase.flushRotatedLogs(); + + Thread.sleep(2000); + assertEquals(Sets.newHashSet(0L, 1L), entryLogger.getEntryLogsSet()); + + entryLogManagerBase.createNewLog(DefaultEntryLogger.UNASSIGNED_LEDGERID); + entryLogManagerBase.flushRotatedLogs(); + + assertEquals(Sets.newHashSet(0L, 1L, 2L), entryLogger.getEntryLogsSet()); + } + + /** + * In this testcase, entryLogger flush and entryLogger addEntry (which would + * call createNewLog) are called concurrently. Since entryLogger flush + * method flushes both currentlog and rotatedlogs, it is expected all the + * currentLog and rotatedLogs are supposed to be flush and forcewritten. + * + * @throws Exception + */ + @Test + public void testFlushOrder() throws Exception { + entryLogger.close(); + + int logSizeLimit = 256 * 1024; + conf.setEntryLogPerLedgerEnabled(false); + conf.setEntryLogFilePreAllocationEnabled(false); + conf.setFlushIntervalInBytes(0); + conf.setEntryLogSizeLimit(logSizeLimit); + + entryLogger = new DefaultEntryLogger(conf, dirsMgr); + EntryLogManagerBase entryLogManager = (EntryLogManagerBase) entryLogger.getEntryLogManager(); + AtomicBoolean exceptionHappened = new AtomicBoolean(false); + + CyclicBarrier barrier = new CyclicBarrier(2); + List rotatedLogChannels; + BufferedLogChannel currentActiveChannel; + + exceptionHappened.set(false); + + /* + * higher the number of rotated logs, it would be easier to reproduce + * the issue regarding flush order + */ + addEntriesAndRotateLogs(entryLogger, 30); + + rotatedLogChannels = new LinkedList(entryLogManager.getRotatedLogChannels()); + currentActiveChannel = entryLogManager.getCurrentLogForLedger(DefaultEntryLogger.UNASSIGNED_LEDGERID); + long currentActiveChannelUnpersistedBytes = currentActiveChannel.getUnpersistedBytes(); + + Thread flushThread = new Thread(new Runnable() { + @Override + public void run() { + try { + barrier.await(); + entryLogger.flush(); + } catch (InterruptedException | BrokenBarrierException | IOException e) { + LOG.error("Exception happened for entryLogger.flush", e); + exceptionHappened.set(true); + } + } + }); + + Thread createdNewLogThread = new Thread(new Runnable() { + @Override + public void run() { + try { + barrier.await(); + /* + * here we are adding entry of size logSizeLimit with + * rolllog=true, so it would create a new entrylog. + */ + entryLogger.addEntry(123, generateEntry(123, 456, logSizeLimit)); + } catch (InterruptedException | BrokenBarrierException | IOException e) { + LOG.error("Exception happened for entryLogManager.createNewLog", e); + exceptionHappened.set(true); + } + } + }); + + /* + * concurrently entryLogger flush and entryLogger addEntry (which would + * call createNewLog) would be called from different threads. + */ + flushThread.start(); + createdNewLogThread.start(); + flushThread.join(); + createdNewLogThread.join(); + + Assert.assertFalse("Exception happened in one of the operation", exceptionHappened.get()); + + if (conf.getFlushIntervalInBytes() > 0) { + /* + * if flush of the previous current channel is called then the + * unpersistedBytes should be less than what it was before, actually + * it would be close to zero (but when new log is created with + * addEntry call, ledgers map will be appended at the end of entry + * log) + */ + Assert.assertTrue( + "previous currentChannel unpersistedBytes should be less than " + + currentActiveChannelUnpersistedBytes + + ", but it is actually " + currentActiveChannel.getUnpersistedBytes(), + currentActiveChannel.getUnpersistedBytes() < currentActiveChannelUnpersistedBytes); + } + for (BufferedLogChannel rotatedLogChannel : rotatedLogChannels) { + Assert.assertEquals("previous rotated entrylog should be flushandforcewritten", 0, + rotatedLogChannel.getUnpersistedBytes()); + } + } + + void addEntriesAndRotateLogs(DefaultEntryLogger entryLogger, int numOfRotations) + throws IOException { + EntryLogManagerBase entryLogManager = (EntryLogManagerBase) entryLogger.getEntryLogManager(); + entryLogManager.setCurrentLogForLedgerAndAddToRotate(DefaultEntryLogger.UNASSIGNED_LEDGERID, null); + for (int i = 0; i < numOfRotations; i++) { + addEntries(entryLogger, 10); + entryLogManager.setCurrentLogForLedgerAndAddToRotate(DefaultEntryLogger.UNASSIGNED_LEDGERID, null); + } + addEntries(entryLogger, 10); + } + + void addEntries(DefaultEntryLogger entryLogger, int noOfEntries) throws IOException { + for (int j = 0; j < noOfEntries; j++) { + int ledgerId = Math.abs(rand.nextInt()); + int entryId = Math.abs(rand.nextInt()); + entryLogger.addEntry(ledgerId, generateEntry(ledgerId, entryId).nioBuffer()); + } + } + + static class LedgerStorageWriteTask implements Callable { + long ledgerId; + int entryId; + LedgerStorage ledgerStorage; + + LedgerStorageWriteTask(long ledgerId, int entryId, LedgerStorage ledgerStorage) { + this.ledgerId = ledgerId; + this.entryId = entryId; + this.ledgerStorage = ledgerStorage; + } + + @Override + public Boolean call() throws IOException, BookieException { + try { + ledgerStorage.addEntry(generateEntry(ledgerId, entryId)); + } catch (IOException e) { + LOG.error("Got Exception for AddEntry call. LedgerId: " + ledgerId + " entryId: " + entryId, e); + throw new IOException("Got Exception for AddEntry call. LedgerId: " + ledgerId + " entryId: " + entryId, + e); + } + return true; + } + } + + static class LedgerStorageFlushTask implements Callable { + LedgerStorage ledgerStorage; + + LedgerStorageFlushTask(LedgerStorage ledgerStorage) { + this.ledgerStorage = ledgerStorage; + } + + @Override + public Boolean call() throws IOException { + try { + ledgerStorage.flush(); + } catch (IOException e) { + LOG.error("Got Exception for flush call", e); + throw new IOException("Got Exception for Flush call", e); + } + return true; + } + } + + static class LedgerStorageReadTask implements Callable { + long ledgerId; + int entryId; + LedgerStorage ledgerStorage; + + LedgerStorageReadTask(long ledgerId, int entryId, LedgerStorage ledgerStorage) { + this.ledgerId = ledgerId; + this.entryId = entryId; + this.ledgerStorage = ledgerStorage; + } + + @Override + public Boolean call() throws IOException, BookieException { + try { + ByteBuf expectedByteBuf = generateEntry(ledgerId, entryId); + ByteBuf actualByteBuf = ledgerStorage.getEntry(ledgerId, entryId); + if (!expectedByteBuf.equals(actualByteBuf)) { + LOG.error("Expected Entry: {} Actual Entry: {}", expectedByteBuf.toString(Charset.defaultCharset()), + actualByteBuf.toString(Charset.defaultCharset())); + throw new IOException("Expected Entry: " + expectedByteBuf.toString(Charset.defaultCharset()) + + " Actual Entry: " + actualByteBuf.toString(Charset.defaultCharset())); + } + } catch (IOException e) { + LOG.error("Got Exception for GetEntry call. LedgerId: " + ledgerId + " entryId: " + entryId, e); + throw new IOException("Got Exception for GetEntry call. LedgerId: " + ledgerId + " entryId: " + entryId, + e); + } + return true; + } + } + + /** + * test concurrent write operations and then concurrent read operations + * using InterleavedLedgerStorage. + */ + @FlakyTest(value = "https://github.com/apache/bookkeeper/issues/1516") + public void testConcurrentWriteAndReadCallsOfInterleavedLedgerStorage() throws Exception { + testConcurrentWriteAndReadCalls(InterleavedLedgerStorage.class.getName(), false); + } + + /** + * test concurrent write operations and then concurrent read operations + * using InterleavedLedgerStorage with EntryLogPerLedger enabled. + */ + @FlakyTest(value = "https://github.com/apache/bookkeeper/issues/1516") + public void testConcurrentWriteAndReadCallsOfInterleavedLedgerStorageWithELPLEnabled() throws Exception { + testConcurrentWriteAndReadCalls(InterleavedLedgerStorage.class.getName(), true); + } + + /** + * test concurrent write operations and then concurrent read operations + * using SortedLedgerStorage. + */ + @FlakyTest(value = "https://github.com/apache/bookkeeper/issues/1516") + public void testConcurrentWriteAndReadCallsOfSortedLedgerStorage() throws Exception { + testConcurrentWriteAndReadCalls(SortedLedgerStorage.class.getName(), false); + } + + /** + * test concurrent write operations and then concurrent read operations + * using SortedLedgerStorage with EntryLogPerLedger enabled. + */ + @FlakyTest(value = "https://github.com/apache/bookkeeper/issues/1516") + public void testConcurrentWriteAndReadCallsOfSortedLedgerStorageWithELPLEnabled() throws Exception { + testConcurrentWriteAndReadCalls(SortedLedgerStorage.class.getName(), true); + } + + public void testConcurrentWriteAndReadCalls(String ledgerStorageClass, boolean entryLogPerLedgerEnabled) + throws Exception { + File ledgerDir = createTempDir("bkTest", ".dir"); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setJournalDirName(ledgerDir.toString()); + conf.setLedgerDirNames(new String[] { ledgerDir.getAbsolutePath()}); + conf.setLedgerStorageClass(ledgerStorageClass); + conf.setEntryLogPerLedgerEnabled(entryLogPerLedgerEnabled); + BookieImpl bookie = new TestBookieImpl(conf); + CompactableLedgerStorage ledgerStorage = (CompactableLedgerStorage) bookie.ledgerStorage; + Random rand = new Random(0); + + if (ledgerStorageClass.equals(SortedLedgerStorage.class.getName())) { + Assert.assertEquals("LedgerStorage Class", SortedLedgerStorage.class, ledgerStorage.getClass()); + if (entryLogPerLedgerEnabled) { + Assert.assertEquals("MemTable Class", EntryMemTableWithParallelFlusher.class, + ((SortedLedgerStorage) ledgerStorage).memTable.getClass()); + } else { + Assert.assertEquals("MemTable Class", EntryMemTable.class, + ((SortedLedgerStorage) ledgerStorage).memTable.getClass()); + } + } + + int numOfLedgers = 70; + int numEntries = 1500; + // Create ledgers + for (int i = 0; i < numOfLedgers; i++) { + ledgerStorage.setMasterKey(i, "key".getBytes()); + } + + ExecutorService executor = Executors.newFixedThreadPool(10); + List> writeAndFlushTasks = new ArrayList>(); + for (int j = 0; j < numEntries; j++) { + for (int i = 0; i < numOfLedgers; i++) { + writeAndFlushTasks.add(new LedgerStorageWriteTask(i, j, ledgerStorage)); + } + } + + /* + * add some flush tasks to the list of writetasks list. + */ + for (int i = 0; i < (numOfLedgers * numEntries) / 500; i++) { + writeAndFlushTasks.add(rand.nextInt(writeAndFlushTasks.size()), new LedgerStorageFlushTask(ledgerStorage)); + } + + // invoke all those write/flush tasks all at once concurrently + executor.invokeAll(writeAndFlushTasks).forEach((future) -> { + try { + future.get(); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + LOG.error("Write/Flush task failed because of InterruptedException", ie); + Assert.fail("Write/Flush task interrupted"); + } catch (Exception ex) { + LOG.error("Write/Flush task failed because of exception", ex); + Assert.fail("Write/Flush task failed " + ex.getMessage()); + } + }); + + List> readAndFlushTasks = new ArrayList>(); + for (int j = 0; j < numEntries; j++) { + for (int i = 0; i < numOfLedgers; i++) { + readAndFlushTasks.add(new LedgerStorageReadTask(i, j, ledgerStorage)); + } + } + + /* + * add some flush tasks to the list of readtasks list. + */ + for (int i = 0; i < (numOfLedgers * numEntries) / 500; i++) { + readAndFlushTasks.add(rand.nextInt(readAndFlushTasks.size()), new LedgerStorageFlushTask(ledgerStorage)); + } + + // invoke all those read/flush tasks all at once concurrently + executor.invokeAll(readAndFlushTasks).forEach((future) -> { + try { + future.get(); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + LOG.error("Read/Flush task failed because of InterruptedException", ie); + Assert.fail("Read/Flush task interrupted"); + } catch (Exception ex) { + LOG.error("Read/Flush task failed because of exception", ex); + Assert.fail("Read/Flush task failed " + ex.getMessage()); + } + }); + + executor.shutdownNow(); + } + + /** + * Test to verify the leastUnflushedLogId logic in EntryLogsStatus. + */ + @Test + public void testEntryLoggersRecentEntryLogsStatus() throws Exception { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setLedgerDirNames(createAndGetLedgerDirs(2)); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); + + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf, ledgerDirsManager); + DefaultEntryLogger.RecentEntryLogsStatus recentlyCreatedLogsStatus = entryLogger.recentlyCreatedEntryLogsStatus; + + recentlyCreatedLogsStatus.createdEntryLog(0L); + Assert.assertEquals("entryLogger's leastUnflushedLogId ", 0L, entryLogger.getLeastUnflushedLogId()); + recentlyCreatedLogsStatus.flushRotatedEntryLog(0L); + // since we marked entrylog - 0 as rotated, LeastUnflushedLogId would be previous rotatedlog+1 + Assert.assertEquals("entryLogger's leastUnflushedLogId ", 1L, entryLogger.getLeastUnflushedLogId()); + recentlyCreatedLogsStatus.createdEntryLog(1L); + Assert.assertEquals("entryLogger's leastUnflushedLogId ", 1L, entryLogger.getLeastUnflushedLogId()); + recentlyCreatedLogsStatus.createdEntryLog(2L); + recentlyCreatedLogsStatus.createdEntryLog(3L); + recentlyCreatedLogsStatus.createdEntryLog(4L); + Assert.assertEquals("entryLogger's leastUnflushedLogId ", 1L, entryLogger.getLeastUnflushedLogId()); + recentlyCreatedLogsStatus.flushRotatedEntryLog(1L); + Assert.assertEquals("entryLogger's leastUnflushedLogId ", 2L, entryLogger.getLeastUnflushedLogId()); + recentlyCreatedLogsStatus.flushRotatedEntryLog(3L); + // here though we rotated entrylog-3, entrylog-2 is not yet rotated so + // LeastUnflushedLogId should be still 2 + Assert.assertEquals("entryLogger's leastUnflushedLogId ", 2L, entryLogger.getLeastUnflushedLogId()); + recentlyCreatedLogsStatus.flushRotatedEntryLog(2L); + // entrylog-3 is already rotated, so leastUnflushedLogId should be 4 + Assert.assertEquals("entryLogger's leastUnflushedLogId ", 4L, entryLogger.getLeastUnflushedLogId()); + recentlyCreatedLogsStatus.flushRotatedEntryLog(4L); + Assert.assertEquals("entryLogger's leastUnflushedLogId ", 5L, entryLogger.getLeastUnflushedLogId()); + recentlyCreatedLogsStatus.createdEntryLog(5L); + recentlyCreatedLogsStatus.createdEntryLog(7L); + recentlyCreatedLogsStatus.createdEntryLog(9L); + Assert.assertEquals("entryLogger's leastUnflushedLogId ", 5L, entryLogger.getLeastUnflushedLogId()); + recentlyCreatedLogsStatus.flushRotatedEntryLog(5L); + // since we marked entrylog-5 as rotated, LeastUnflushedLogId would be previous rotatedlog+1 + Assert.assertEquals("entryLogger's leastUnflushedLogId ", 6L, entryLogger.getLeastUnflushedLogId()); + recentlyCreatedLogsStatus.flushRotatedEntryLog(7L); + Assert.assertEquals("entryLogger's leastUnflushedLogId ", 8L, entryLogger.getLeastUnflushedLogId()); + } + + String[] createAndGetLedgerDirs(int numOfLedgerDirs) throws IOException { + File ledgerDir; + File curDir; + String[] ledgerDirsPath = new String[numOfLedgerDirs]; + for (int i = 0; i < numOfLedgerDirs; i++) { + ledgerDir = createTempDir("bkTest", ".dir"); + curDir = BookieImpl.getCurrentDirectory(ledgerDir); + BookieImpl.checkDirectoryStructure(curDir); + ledgerDirsPath[i] = ledgerDir.getAbsolutePath(); + } + return ledgerDirsPath; + } + + /* + * test for validating if the EntryLog/BufferedChannel flushes/forcewrite if the bytes written to it are more than + * flushIntervalInBytes + */ + @Test + public void testFlushIntervalInBytes() throws Exception { + long flushIntervalInBytes = 5000; + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setEntryLogPerLedgerEnabled(true); + conf.setFlushIntervalInBytes(flushIntervalInBytes); + conf.setLedgerDirNames(createAndGetLedgerDirs(2)); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf, ledgerDirsManager); + EntryLogManagerBase entryLogManagerBase = ((EntryLogManagerBase) entryLogger.getEntryLogManager()); + + /* + * when entryLogger is created Header of length EntryLogger.LOGFILE_HEADER_SIZE is created + */ + long ledgerId = 0L; + int firstEntrySize = 1000; + long entry0Position = entryLogger.addEntry(0L, generateEntry(ledgerId, 0L, firstEntrySize)); + // entrylogger writes length of the entry (4 bytes) before writing entry + long expectedUnpersistedBytes = DefaultEntryLogger.LOGFILE_HEADER_SIZE + firstEntrySize + 4; + Assert.assertEquals("Unpersisted Bytes of entrylog", expectedUnpersistedBytes, + entryLogManagerBase.getCurrentLogForLedger(ledgerId).getUnpersistedBytes()); + + /* + * 'flushIntervalInBytes' number of bytes are flushed so BufferedChannel should be forcewritten + */ + int secondEntrySize = (int) (flushIntervalInBytes - expectedUnpersistedBytes); + long entry1Position = entryLogger.addEntry(0L, generateEntry(ledgerId, 1L, secondEntrySize)); + Assert.assertEquals("Unpersisted Bytes of entrylog", 0, + entryLogManagerBase.getCurrentLogForLedger(ledgerId).getUnpersistedBytes()); + + /* + * since entrylog/Bufferedchannel is persisted (forcewritten), we should be able to read the entrylog using + * newEntryLogger + */ + conf.setEntryLogPerLedgerEnabled(false); + DefaultEntryLogger newEntryLogger = new DefaultEntryLogger(conf, ledgerDirsManager); + EntryLogManager newEntryLogManager = newEntryLogger.getEntryLogManager(); + Assert.assertEquals("EntryLogManager class type", EntryLogManagerForSingleEntryLog.class, + newEntryLogManager.getClass()); + + ByteBuf buf = newEntryLogger.readEntry(ledgerId, 0L, entry0Position); + long readLedgerId = buf.readLong(); + long readEntryId = buf.readLong(); + Assert.assertEquals("LedgerId", ledgerId, readLedgerId); + Assert.assertEquals("EntryId", 0L, readEntryId); + + buf = newEntryLogger.readEntry(ledgerId, 1L, entry1Position); + readLedgerId = buf.readLong(); + readEntryId = buf.readLong(); + Assert.assertEquals("LedgerId", ledgerId, readLedgerId); + Assert.assertEquals("EntryId", 1L, readEntryId); + } + + @Test + public void testReadEntryWithoutLedgerID() throws Exception { + List locations = new ArrayList<>(); + // `+ 1` is not a typo: create one more log file than the max number of o cached readers + for (int i = 0; i < 10; i++) { + ByteBuf e = makeEntry(1L, i, 100); + long loc = entryLogger.addEntry(1L, e.slice()); + locations.add(loc); + } + entryLogger.flush(); + for (Long loc : locations) { + int i = locations.indexOf(loc); + ByteBuf data = entryLogger.readEntry(loc); + assertEntryEquals(data, makeEntry(1L, i, 100)); + long readLedgerId = data.readLong(); + long readEntryId = data.readLong(); + Assert.assertEquals("LedgerId", 1L, readLedgerId); + Assert.assertEquals("EntryId", i, readEntryId); + ReferenceCountUtil.release(data); + } + } + + + /* + * tests basic logic of EntryLogManager interface for + * EntryLogManagerForEntryLogPerLedger. + */ + @Test + public void testEntryLogManagerInterfaceForEntryLogPerLedger() throws Exception { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setEntryLogFilePreAllocationEnabled(true); + conf.setEntryLogPerLedgerEnabled(true); + conf.setLedgerDirNames(createAndGetLedgerDirs(2)); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); + + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf, ledgerDirsManager); + EntryLogManagerForEntryLogPerLedger entryLogManager = (EntryLogManagerForEntryLogPerLedger) entryLogger + .getEntryLogManager(); + + Assert.assertEquals("Number of current active EntryLogs ", 0, entryLogManager.getCopyOfCurrentLogs().size()); + Assert.assertEquals("Number of Rotated Logs ", 0, entryLogManager.getRotatedLogChannels().size()); + + int numOfLedgers = 5; + int numOfThreadsPerLedger = 10; + validateLockAcquireAndRelease(numOfLedgers, numOfThreadsPerLedger, entryLogManager); + + for (long i = 0; i < numOfLedgers; i++) { + entryLogManager.setCurrentLogForLedgerAndAddToRotate(i, + createDummyBufferedLogChannel(entryLogger, i, conf)); + } + + for (long i = 0; i < numOfLedgers; i++) { + Assert.assertEquals("LogChannel for ledger: " + i, entryLogManager.getCurrentLogIfPresent(i), + entryLogManager.getCurrentLogForLedger(i)); + } + + Assert.assertEquals("Number of current active EntryLogs ", numOfLedgers, + entryLogManager.getCopyOfCurrentLogs().size()); + Assert.assertEquals("Number of Rotated Logs ", 0, entryLogManager.getRotatedLogChannels().size()); + + for (long i = 0; i < numOfLedgers; i++) { + entryLogManager.setCurrentLogForLedgerAndAddToRotate(i, + createDummyBufferedLogChannel(entryLogger, numOfLedgers + i, conf)); + } + + /* + * since new entryLogs are set for all the ledgers, previous entrylogs would be added to rotatedLogChannels + */ + Assert.assertEquals("Number of current active EntryLogs ", numOfLedgers, + entryLogManager.getCopyOfCurrentLogs().size()); + Assert.assertEquals("Number of Rotated Logs ", numOfLedgers, + entryLogManager.getRotatedLogChannels().size()); + + for (long i = 0; i < numOfLedgers; i++) { + entryLogManager.setCurrentLogForLedgerAndAddToRotate(i, + createDummyBufferedLogChannel(entryLogger, 2 * numOfLedgers + i, conf)); + } + + /* + * again since new entryLogs are set for all the ledgers, previous entrylogs would be added to + * rotatedLogChannels + */ + Assert.assertEquals("Number of current active EntryLogs ", numOfLedgers, + entryLogManager.getCopyOfCurrentLogs().size()); + Assert.assertEquals("Number of Rotated Logs ", 2 * numOfLedgers, + entryLogManager.getRotatedLogChannels().size()); + + for (BufferedLogChannel logChannel : entryLogManager.getRotatedLogChannels()) { + entryLogManager.getRotatedLogChannels().remove(logChannel); + } + Assert.assertEquals("Number of Rotated Logs ", 0, entryLogManager.getRotatedLogChannels().size()); + + // entrylogid is sequential + for (long i = 0; i < numOfLedgers; i++) { + assertEquals("EntryLogid for Ledger " + i, 2 * numOfLedgers + i, + entryLogManager.getCurrentLogForLedger(i).getLogId()); + } + + for (long i = 2 * numOfLedgers; i < (3 * numOfLedgers); i++) { + assertTrue("EntryLog with logId: " + i + " should be present", + entryLogManager.getCurrentLogIfPresent(i) != null); + } + } + + private DefaultEntryLogger.BufferedLogChannel createDummyBufferedLogChannel(DefaultEntryLogger entryLogger, + long logid, + ServerConfiguration servConf) + throws IOException { + File tmpFile = File.createTempFile("entrylog", logid + ""); + tmpFile.deleteOnExit(); + FileChannel fc = new RandomAccessFile(tmpFile, "rw").getChannel(); + DefaultEntryLogger.BufferedLogChannel logChannel = + new BufferedLogChannel(UnpooledByteBufAllocator.DEFAULT, fc, 10, 10, + logid, tmpFile, servConf.getFlushIntervalInBytes()); + return logChannel; + } + + /* + * validates the concurrency aspect of entryLogManager's lock + * + * Executor of fixedThreadPool of size 'numOfLedgers * numOfThreadsPerLedger' is created and the same number + * of tasks are submitted to the Executor. In each task, lock of that ledger is acquired and then released. + */ + private void validateLockAcquireAndRelease(int numOfLedgers, int numOfThreadsPerLedger, + EntryLogManagerForEntryLogPerLedger entryLogManager) throws InterruptedException { + ExecutorService tpe = Executors.newFixedThreadPool(numOfLedgers * numOfThreadsPerLedger); + CountDownLatch latchToStart = new CountDownLatch(1); + CountDownLatch latchToWait = new CountDownLatch(1); + AtomicInteger numberOfThreadsAcquiredLock = new AtomicInteger(0); + AtomicBoolean irptExceptionHappened = new AtomicBoolean(false); + Random rand = new Random(); + + for (int i = 0; i < numOfLedgers * numOfThreadsPerLedger; i++) { + long ledgerId = i % numOfLedgers; + tpe.submit(() -> { + try { + latchToStart.await(); + Lock lock = entryLogManager.getLock(ledgerId); + lock.lock(); + numberOfThreadsAcquiredLock.incrementAndGet(); + latchToWait.await(); + lock.unlock(); + } catch (InterruptedException | IOException e) { + irptExceptionHappened.set(true); + } + }); + } + + assertEquals("Number Of Threads acquired Lock", 0, numberOfThreadsAcquiredLock.get()); + latchToStart.countDown(); + Thread.sleep(1000); + /* + * since there are only "numOfLedgers" ledgers, only < "numOfLedgers" + * threads should have been able to acquire lock, because multiple + * ledgers can end up getting same lock because their hashcode might + * fall in the same bucket. + * + * + * After acquiring the lock there must be waiting on 'latchToWait' latch + */ + int currentNumberOfThreadsAcquiredLock = numberOfThreadsAcquiredLock.get(); + assertTrue("Number Of Threads acquired Lock " + currentNumberOfThreadsAcquiredLock, + (currentNumberOfThreadsAcquiredLock > 0) && (currentNumberOfThreadsAcquiredLock <= numOfLedgers)); + latchToWait.countDown(); + Thread.sleep(2000); + assertEquals("Number Of Threads acquired Lock", numOfLedgers * numOfThreadsPerLedger, + numberOfThreadsAcquiredLock.get()); + } + + /* + * test EntryLogManager.EntryLogManagerForEntryLogPerLedger removes the + * ledger from its cache map if entry is not added to that ledger or its + * corresponding state is not accessed for more than evictionPeriod + * + * @throws Exception + */ + @Test + public void testEntryLogManagerExpiryRemoval() throws Exception { + int evictionPeriod = 1; + + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setEntryLogFilePreAllocationEnabled(false); + conf.setEntryLogPerLedgerEnabled(true); + conf.setLedgerDirNames(createAndGetLedgerDirs(2)); + conf.setEntrylogMapAccessExpiryTimeInSeconds(evictionPeriod); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); + + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf, ledgerDirsManager); + EntryLogManagerForEntryLogPerLedger entryLogManager = + (EntryLogManagerForEntryLogPerLedger) entryLogger.getEntryLogManager(); + + long ledgerId = 0L; + + BufferedLogChannel logChannel = createDummyBufferedLogChannel(entryLogger, 0, conf); + entryLogManager.setCurrentLogForLedgerAndAddToRotate(ledgerId, logChannel); + + BufferedLogChannel currentLogForLedger = entryLogManager.getCurrentLogForLedger(ledgerId); + assertEquals("LogChannel for ledger " + ledgerId + " should match", logChannel, currentLogForLedger); + + Thread.sleep(evictionPeriod * 1000 + 100); + entryLogManager.doEntryLogMapCleanup(); + + /* + * since for more than evictionPeriod, that ledger is not accessed and cache is cleaned up, mapping for that + * ledger should not be available anymore + */ + currentLogForLedger = entryLogManager.getCurrentLogForLedger(ledgerId); + assertEquals("LogChannel for ledger " + ledgerId + " should be null", null, currentLogForLedger); + Assert.assertEquals("Number of current active EntryLogs ", 0, entryLogManager.getCopyOfCurrentLogs().size()); + Assert.assertEquals("Number of rotated EntryLogs ", 1, entryLogManager.getRotatedLogChannels().size()); + Assert.assertTrue("CopyOfRotatedLogChannels should contain the created LogChannel", + entryLogManager.getRotatedLogChannels().contains(logChannel)); + + Assert.assertTrue("since mapentry must have been evicted, it should be null", + (entryLogManager.getCacheAsMap().get(ledgerId) == null) + || (entryLogManager.getCacheAsMap().get(ledgerId).getEntryLogWithDirInfo() == null)); + } + + /* + * tests if the maximum size of cache (maximumNumberOfActiveEntryLogs) is + * honored in EntryLogManagerForEntryLogPerLedger's cache eviction policy. + */ + @Test + public void testCacheMaximumSizeEvictionPolicy() throws Exception { + entryLogger.close(); + final int cacheMaximumSize = 20; + + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setEntryLogFilePreAllocationEnabled(true); + conf.setEntryLogPerLedgerEnabled(true); + conf.setLedgerDirNames(createAndGetLedgerDirs(1)); + conf.setMaximumNumberOfActiveEntryLogs(cacheMaximumSize); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); + + entryLogger = new DefaultEntryLogger(conf, ledgerDirsManager); + EntryLogManagerForEntryLogPerLedger entryLogManager = + (EntryLogManagerForEntryLogPerLedger) entryLogger.getEntryLogManager(); + + for (int i = 0; i < cacheMaximumSize + 10; i++) { + entryLogManager.createNewLog(i); + int cacheSize = entryLogManager.getCacheAsMap().size(); + Assert.assertTrue("Cache maximum size is expected to be less than " + cacheMaximumSize + + " but current cacheSize is " + cacheSize, cacheSize <= cacheMaximumSize); + } + } + + @Test + public void testLongLedgerIdsWithEntryLogPerLedger() throws Exception { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setEntryLogFilePreAllocationEnabled(true); + conf.setEntryLogPerLedgerEnabled(true); + conf.setLedgerDirNames(createAndGetLedgerDirs(1)); + conf.setLedgerStorageClass(InterleavedLedgerStorage.class.getName()); + + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); + + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf, ledgerDirsManager); + EntryLogManagerForEntryLogPerLedger entryLogManager = (EntryLogManagerForEntryLogPerLedger) entryLogger + .getEntryLogManager(); + + int numOfLedgers = 5; + int numOfEntries = 4; + long[][] pos = new long[numOfLedgers][numOfEntries]; + for (int i = 0; i < numOfLedgers; i++) { + long ledgerId = Long.MAX_VALUE - i; + entryLogManager.createNewLog(ledgerId); + for (int entryId = 0; entryId < numOfEntries; entryId++) { + pos[i][entryId] = entryLogger.addEntry(ledgerId, generateEntry(ledgerId, entryId).nioBuffer()); + } + } + /* + * do checkpoint to make sure entrylog files are persisted + */ + entryLogger.checkpoint(); + + for (int i = 0; i < numOfLedgers; i++) { + long ledgerId = Long.MAX_VALUE - i; + for (int entryId = 0; entryId < numOfEntries; entryId++) { + String expectedValue = generateDataString(ledgerId, entryId); + ByteBuf buf = entryLogger.readEntry(ledgerId, entryId, pos[i][entryId]); + long readLedgerId = buf.readLong(); + long readEntryId = buf.readLong(); + byte[] readData = new byte[buf.readableBytes()]; + buf.readBytes(readData); + assertEquals("LedgerId ", ledgerId, readLedgerId); + assertEquals("EntryId ", entryId, readEntryId); + assertEquals("Entry Data ", expectedValue, new String(readData)); + } + } + } + + /* + * when entrylog for ledger is removed from ledgerIdEntryLogMap, then + * ledgermap should be appended to that entrylog, before moving that + * entrylog to rotatedlogchannels. + */ + @Test + public void testAppendLedgersMapOnCacheRemoval() throws Exception { + final int cacheMaximumSize = 5; + + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setEntryLogFilePreAllocationEnabled(true); + conf.setEntryLogPerLedgerEnabled(true); + conf.setLedgerDirNames(createAndGetLedgerDirs(1)); + conf.setMaximumNumberOfActiveEntryLogs(cacheMaximumSize); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); + + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf, ledgerDirsManager); + EntryLogManagerForEntryLogPerLedger entryLogManager = (EntryLogManagerForEntryLogPerLedger) entryLogger + .getEntryLogManager(); + + long ledgerId = 0L; + entryLogManager.createNewLog(ledgerId); + int entrySize = 200; + int numOfEntries = 4; + for (int i = 0; i < numOfEntries; i++) { + entryLogger.addEntry(ledgerId, generateEntry(ledgerId, i, entrySize)); + } + + BufferedLogChannel logChannelForledger = entryLogManager.getCurrentLogForLedger(ledgerId); + long logIdOfLedger = logChannelForledger.getLogId(); + /* + * do checkpoint to make sure entrylog files are persisted + */ + entryLogger.checkpoint(); + + try { + entryLogger.extractEntryLogMetadataFromIndex(logIdOfLedger); + } catch (IOException ie) { + // expected because appendLedgersMap wouldn't have been called + } + + /* + * create entrylogs for more ledgers, so that ledgerIdEntryLogMap would + * reach its limit and remove the oldest entrylog. + */ + for (int i = 1; i <= cacheMaximumSize; i++) { + entryLogManager.createNewLog(i); + } + /* + * do checkpoint to make sure entrylog files are persisted + */ + entryLogger.checkpoint(); + + EntryLogMetadata entryLogMetadata = entryLogger.extractEntryLogMetadataFromIndex(logIdOfLedger); + ConcurrentLongLongHashMap ledgersMap = entryLogMetadata.getLedgersMap(); + Assert.assertEquals("There should be only one entry in entryLogMetadata", 1, ledgersMap.size()); + Assert.assertTrue("Usage should be 1", Double.compare(1.0, entryLogMetadata.getUsage()) == 0); + Assert.assertEquals("Total size of entries", (entrySize + 4) * numOfEntries, ledgersMap.get(ledgerId)); + } + + /** + * test EntryLogManager.EntryLogManagerForEntryLogPerLedger doesn't removes + * the ledger from its cache map if ledger's corresponding state is accessed + * within the evictionPeriod. + * + * @throws Exception + */ + @Test + public void testExpiryRemovalByAccessingOnAnotherThread() throws Exception { + int evictionPeriod = 1; + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setEntryLogFilePreAllocationEnabled(false); + conf.setEntryLogPerLedgerEnabled(true); + conf.setLedgerDirNames(createAndGetLedgerDirs(2)); + conf.setEntrylogMapAccessExpiryTimeInSeconds(evictionPeriod); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); + + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf, ledgerDirsManager); + EntryLogManagerForEntryLogPerLedger entryLogManager = + (EntryLogManagerForEntryLogPerLedger) entryLogger.getEntryLogManager(); + + long ledgerId = 0L; + + BufferedLogChannel newLogChannel = createDummyBufferedLogChannel(entryLogger, 1, conf); + entryLogManager.setCurrentLogForLedgerAndAddToRotate(ledgerId, newLogChannel); + + Thread t = new Thread() { + public void run() { + try { + Thread.sleep((evictionPeriod * 1000) / 2); + entryLogManager.getCurrentLogForLedger(ledgerId); + } catch (InterruptedException | IOException e) { + } + } + }; + + t.start(); + Thread.sleep(evictionPeriod * 1000 + 100); + entryLogManager.doEntryLogMapCleanup(); + + /* + * in this scenario, that ledger is accessed by other thread during + * eviction period time, so it should not be evicted. + */ + BufferedLogChannel currentLogForLedger = entryLogManager.getCurrentLogForLedger(ledgerId); + assertEquals("LogChannel for ledger " + ledgerId, newLogChannel, currentLogForLedger); + Assert.assertEquals("Number of current active EntryLogs ", 1, entryLogManager.getCopyOfCurrentLogs().size()); + Assert.assertEquals("Number of rotated EntryLogs ", 0, entryLogManager.getRotatedLogChannels().size()); + } + + /** + * test EntryLogManager.EntryLogManagerForEntryLogPerLedger removes the + * ledger from its cache map if entry is not added to that ledger or its + * corresponding state is not accessed for more than evictionPeriod. In this + * testcase we try to call unrelated methods or access state of other + * ledgers within the eviction period. + * + * @throws Exception + */ + @Test + public void testExpiryRemovalByAccessingNonCacheRelatedMethods() throws Exception { + int evictionPeriod = 1; + + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setEntryLogFilePreAllocationEnabled(false); + conf.setEntryLogPerLedgerEnabled(true); + conf.setLedgerDirNames(createAndGetLedgerDirs(2)); + conf.setEntrylogMapAccessExpiryTimeInSeconds(evictionPeriod); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); + + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf, ledgerDirsManager); + EntryLogManagerForEntryLogPerLedger entryLogManager = + (EntryLogManagerForEntryLogPerLedger) entryLogger.getEntryLogManager(); + + long ledgerId = 0L; + + BufferedLogChannel newLogChannel = createDummyBufferedLogChannel(entryLogger, 1, conf); + entryLogManager.setCurrentLogForLedgerAndAddToRotate(ledgerId, newLogChannel); + + AtomicBoolean exceptionOccurred = new AtomicBoolean(false); + Thread t = new Thread() { + public void run() { + try { + Thread.sleep(500); + /* + * any of the following operations should not access entry + * of 'ledgerId' in the cache + */ + entryLogManager.getCopyOfCurrentLogs(); + entryLogManager.getRotatedLogChannels(); + entryLogManager.getCurrentLogIfPresent(newLogChannel.getLogId()); + entryLogManager.getDirForNextEntryLog(ledgerDirsManager.getWritableLedgerDirs()); + long newLedgerId = 100; + BufferedLogChannel logChannelForNewLedger = + createDummyBufferedLogChannel(entryLogger, newLedgerId, conf); + entryLogManager.setCurrentLogForLedgerAndAddToRotate(newLedgerId, logChannelForNewLedger); + entryLogManager.getCurrentLogIfPresent(newLedgerId); + } catch (Exception e) { + LOG.error("Got Exception in thread", e); + exceptionOccurred.set(true); + } + } + }; + + t.start(); + Thread.sleep(evictionPeriod * 1000 + 100); + entryLogManager.doEntryLogMapCleanup(); + Assert.assertFalse("Exception occurred in thread, which is not expected", exceptionOccurred.get()); + + /* + * since for more than evictionPeriod, that ledger is not accessed and cache is cleaned up, mapping for that + * ledger should not be available anymore + */ + BufferedLogChannel currentLogForLedger = entryLogManager.getCurrentLogForLedger(ledgerId); + assertEquals("LogChannel for ledger " + ledgerId + " should be null", null, currentLogForLedger); + // expected number of current active entryLogs is 1 since we created entrylog for 'newLedgerId' + Assert.assertEquals("Number of current active EntryLogs ", 1, entryLogManager.getCopyOfCurrentLogs().size()); + Assert.assertEquals("Number of rotated EntryLogs ", 1, entryLogManager.getRotatedLogChannels().size()); + Assert.assertTrue("CopyOfRotatedLogChannels should contain the created LogChannel", + entryLogManager.getRotatedLogChannels().contains(newLogChannel)); + + Assert.assertTrue("since mapentry must have been evicted, it should be null", + (entryLogManager.getCacheAsMap().get(ledgerId) == null) + || (entryLogManager.getCacheAsMap().get(ledgerId).getEntryLogWithDirInfo() == null)); + } + + /* + * testing EntryLogger functionality (addEntry/createNewLog/flush) and EntryLogManager with entryLogPerLedger + * enabled + */ + @Test + public void testEntryLogManagerForEntryLogPerLedger() throws Exception { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setEntryLogPerLedgerEnabled(true); + conf.setFlushIntervalInBytes(10000000); + conf.setLedgerDirNames(createAndGetLedgerDirs(2)); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf, ledgerDirsManager); + EntryLogManagerBase entryLogManager = (EntryLogManagerBase) entryLogger.getEntryLogManager(); + Assert.assertEquals("EntryLogManager class type", EntryLogManagerForEntryLogPerLedger.class, + entryLogManager.getClass()); + + int numOfActiveLedgers = 20; + int numEntries = 5; + + for (int j = 0; j < numEntries; j++) { + for (long i = 0; i < numOfActiveLedgers; i++) { + entryLogger.addEntry(i, generateEntry(i, j)); + } + } + + for (long i = 0; i < numOfActiveLedgers; i++) { + BufferedLogChannel logChannel = entryLogManager.getCurrentLogForLedger(i); + Assert.assertTrue("unpersistedBytes should be greater than LOGFILE_HEADER_SIZE", + logChannel.getUnpersistedBytes() > DefaultEntryLogger.LOGFILE_HEADER_SIZE); + } + + for (long i = 0; i < numOfActiveLedgers; i++) { + entryLogManager.createNewLog(i); + } + + /* + * since we created new entrylog for all the activeLedgers, entrylogs of all the ledgers + * should be rotated and hence the size of copyOfRotatedLogChannels should be numOfActiveLedgers + */ + List rotatedLogs = entryLogManager.getRotatedLogChannels(); + Assert.assertEquals("Number of rotated entrylogs", numOfActiveLedgers, rotatedLogs.size()); + + /* + * Since newlog is created for all slots, so they are moved to rotated logs and hence unpersistedBytes of all + * the slots should be just EntryLogger.LOGFILE_HEADER_SIZE + * + */ + for (long i = 0; i < numOfActiveLedgers; i++) { + BufferedLogChannel logChannel = entryLogManager.getCurrentLogForLedger(i); + Assert.assertEquals("unpersistedBytes should be LOGFILE_HEADER_SIZE", + DefaultEntryLogger.LOGFILE_HEADER_SIZE, logChannel.getUnpersistedBytes()); + } + + for (int j = numEntries; j < 2 * numEntries; j++) { + for (long i = 0; i < numOfActiveLedgers; i++) { + entryLogger.addEntry(i, generateEntry(i, j)); + } + } + + for (long i = 0; i < numOfActiveLedgers; i++) { + BufferedLogChannel logChannel = entryLogManager.getCurrentLogForLedger(i); + Assert.assertTrue("unpersistedBytes should be greater than LOGFILE_HEADER_SIZE", + logChannel.getUnpersistedBytes() > DefaultEntryLogger.LOGFILE_HEADER_SIZE); + } + + Assert.assertEquals("LeastUnflushedloggerID", 0, entryLogger.getLeastUnflushedLogId()); + + /* + * here flush is called so all the rotatedLogChannels should be file closed and there shouldn't be any + * rotatedlogchannel and also leastUnflushedLogId should be advanced to numOfActiveLedgers + */ + entryLogger.flush(); + Assert.assertEquals("Number of rotated entrylogs", 0, entryLogManager.getRotatedLogChannels().size()); + Assert.assertEquals("LeastUnflushedloggerID", numOfActiveLedgers, entryLogger.getLeastUnflushedLogId()); + + /* + * after flush (flushCurrentLogs) unpersistedBytes should be 0. + */ + for (long i = 0; i < numOfActiveLedgers; i++) { + BufferedLogChannel logChannel = entryLogManager.getCurrentLogForLedger(i); + Assert.assertEquals("unpersistedBytes should be 0", 0L, logChannel.getUnpersistedBytes()); + } + } + + @Test + public void testSingleEntryLogCreateNewLog() throws Exception { + Assert.assertTrue(entryLogger.getEntryLogManager() instanceof EntryLogManagerForSingleEntryLog); + EntryLogManagerForSingleEntryLog singleEntryLog = + (EntryLogManagerForSingleEntryLog) entryLogger.getEntryLogManager(); + EntryLogManagerForSingleEntryLog mockSingleEntryLog = spy(singleEntryLog); + BufferedLogChannel activeLogChannel = mockSingleEntryLog.getCurrentLogForLedgerForAddEntry(1, 1024, true); + Assert.assertTrue(activeLogChannel != null); + + verify(mockSingleEntryLog, times(1)).createNewLog(anyLong(), anyString()); + // `readEntryLogHardLimit` and `reachEntryLogLimit` should not call if new create log + verify(mockSingleEntryLog, times(0)).reachEntryLogLimit(any(), anyLong()); + verify(mockSingleEntryLog, times(0)).readEntryLogHardLimit(any(), anyLong()); + } + + /* + * with entryLogPerLedger enabled, create multiple entrylogs, add entries of ledgers and read them before and after + * flush + */ + @Test + public void testReadAddCallsOfMultipleEntryLogs() throws Exception { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setEntryLogPerLedgerEnabled(true); + conf.setLedgerDirNames(createAndGetLedgerDirs(2)); + // pre allocation enabled + conf.setEntryLogFilePreAllocationEnabled(true); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); + + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf, ledgerDirsManager); + EntryLogManagerBase entryLogManagerBase = ((EntryLogManagerBase) entryLogger.getEntryLogManager()); + + int numOfActiveLedgers = 10; + int numEntries = 10; + long[][] positions = new long[numOfActiveLedgers][]; + for (int i = 0; i < numOfActiveLedgers; i++) { + positions[i] = new long[numEntries]; + } + + /* + * addentries to the ledgers + */ + for (int j = 0; j < numEntries; j++) { + for (int i = 0; i < numOfActiveLedgers; i++) { + positions[i][j] = entryLogger.addEntry((long) i, generateEntry(i, j)); + long entryLogId = (positions[i][j] >> 32L); + /** + * + * Though EntryLogFilePreAllocation is enabled, Since things are not done concurrently here, + * entryLogIds will be sequential. + */ + Assert.assertEquals("EntryLogId for ledger: " + i, i, entryLogId); + } + } + + /* + * read the entries which are written + */ + for (int j = 0; j < numEntries; j++) { + for (int i = 0; i < numOfActiveLedgers; i++) { + String expectedValue = "ledger-" + i + "-" + j; + ByteBuf buf = entryLogger.readEntry(i, j, positions[i][j]); + long ledgerId = buf.readLong(); + long entryId = buf.readLong(); + byte[] data = new byte[buf.readableBytes()]; + buf.readBytes(data); + assertEquals("LedgerId ", i, ledgerId); + assertEquals("EntryId ", j, entryId); + assertEquals("Entry Data ", expectedValue, new String(data)); + } + } + + for (long i = 0; i < numOfActiveLedgers; i++) { + entryLogManagerBase.createNewLog(i); + } + + entryLogManagerBase.flushRotatedLogs(); + + // reading after flush of rotatedlogs + for (int j = 0; j < numEntries; j++) { + for (int i = 0; i < numOfActiveLedgers; i++) { + String expectedValue = "ledger-" + i + "-" + j; + ByteBuf buf = entryLogger.readEntry(i, j, positions[i][j]); + long ledgerId = buf.readLong(); + long entryId = buf.readLong(); + byte[] data = new byte[buf.readableBytes()]; + buf.readBytes(data); + assertEquals("LedgerId ", i, ledgerId); + assertEquals("EntryId ", j, entryId); + assertEquals("Entry Data ", expectedValue, new String(data)); + } + } + } + + class ReadTask implements Callable { + long ledgerId; + int entryId; + long position; + DefaultEntryLogger entryLogger; + + ReadTask(long ledgerId, int entryId, long position, DefaultEntryLogger entryLogger) { + this.ledgerId = ledgerId; + this.entryId = entryId; + this.position = position; + this.entryLogger = entryLogger; + } + + @Override + public Boolean call() throws IOException { + try { + ByteBuf expectedByteBuf = generateEntry(ledgerId, entryId); + ByteBuf actualByteBuf = entryLogger.readEntry(ledgerId, entryId, position); + if (!expectedByteBuf.equals(actualByteBuf)) { + LOG.error("Expected Entry: {} Actual Entry: {}", expectedByteBuf.toString(Charset.defaultCharset()), + actualByteBuf.toString(Charset.defaultCharset())); + throw new IOException("Expected Entry: " + expectedByteBuf.toString(Charset.defaultCharset()) + + " Actual Entry: " + actualByteBuf.toString(Charset.defaultCharset())); + } + } catch (IOException e) { + LOG.error("Got Exception for GetEntry call. LedgerId: " + ledgerId + " entryId: " + entryId, e); + throw new IOException("Got Exception for GetEntry call. LedgerId: " + ledgerId + " entryId: " + entryId, + e); + } + return true; + } + } + + /* + * test concurrent read operations of entries from flushed rotatedlogs with entryLogPerLedgerEnabled + */ + @Test + public void testConcurrentReadCallsAfterEntryLogsAreRotated() throws Exception { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setEntryLogPerLedgerEnabled(true); + conf.setFlushIntervalInBytes(1000 * 25); + conf.setLedgerDirNames(createAndGetLedgerDirs(3)); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); + + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf, ledgerDirsManager); + int numOfActiveLedgers = 15; + int numEntries = 2000; + final AtomicLongArray positions = new AtomicLongArray(numOfActiveLedgers * numEntries); + EntryLogManagerForEntryLogPerLedger entryLogManager = (EntryLogManagerForEntryLogPerLedger) + entryLogger.getEntryLogManager(); + + for (int i = 0; i < numOfActiveLedgers; i++) { + for (int j = 0; j < numEntries; j++) { + positions.set(i * numEntries + j, entryLogger.addEntry((long) i, generateEntry(i, j))); + long entryLogId = (positions.get(i * numEntries + j) >> 32L); + /** + * + * Though EntryLogFilePreAllocation is enabled, Since things are not done concurrently here, entryLogIds + * will be sequential. + */ + Assert.assertEquals("EntryLogId for ledger: " + i, i, entryLogId); + } + } + + for (long i = 0; i < numOfActiveLedgers; i++) { + entryLogManager.createNewLog(i); + } + entryLogManager.flushRotatedLogs(); + + // reading after flush of rotatedlogs + ArrayList readTasks = new ArrayList(); + for (int i = 0; i < numOfActiveLedgers; i++) { + for (int j = 0; j < numEntries; j++) { + readTasks.add(new ReadTask(i, j, positions.get(i * numEntries + j), entryLogger)); + } + } + + ExecutorService executor = Executors.newFixedThreadPool(40); + executor.invokeAll(readTasks).forEach((future) -> { + try { + future.get(); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + LOG.error("Read/Flush task failed because of InterruptedException", ie); + Assert.fail("Read/Flush task interrupted"); + } catch (Exception ex) { + LOG.error("Read/Flush task failed because of exception", ex); + Assert.fail("Read/Flush task failed " + ex.getMessage()); + } + }); + } + + /** + * testcase to validate when ledgerdirs become full and eventually all + * ledgerdirs become full. Later a ledgerdir becomes writable. + */ + @Test + public void testEntryLoggerAddEntryWhenLedgerDirsAreFull() throws Exception { + int numberOfLedgerDirs = 3; + List ledgerDirs = new ArrayList(); + String[] ledgerDirsPath = new String[numberOfLedgerDirs]; + List curDirs = new ArrayList(); + + File ledgerDir; + File curDir; + for (int i = 0; i < numberOfLedgerDirs; i++) { + ledgerDir = createTempDir("bkTest", ".dir").getAbsoluteFile(); + curDir = BookieImpl.getCurrentDirectory(ledgerDir); + BookieImpl.checkDirectoryStructure(curDir); + ledgerDirs.add(ledgerDir); + ledgerDirsPath[i] = ledgerDir.getPath(); + curDirs.add(curDir); + } + + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + // pre-allocation is disabled + conf.setEntryLogFilePreAllocationEnabled(false); + conf.setEntryLogPerLedgerEnabled(true); + conf.setLedgerDirNames(ledgerDirsPath); + + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); + + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf, ledgerDirsManager); + EntryLogManagerForEntryLogPerLedger entryLogManager = (EntryLogManagerForEntryLogPerLedger) + entryLogger.getEntryLogManager(); + Assert.assertEquals("EntryLogManager class type", EntryLogManagerForEntryLogPerLedger.class, + entryLogManager.getClass()); + + entryLogger.addEntry(0L, generateEntry(0, 1)); + entryLogger.addEntry(1L, generateEntry(1, 1)); + entryLogger.addEntry(2L, generateEntry(2, 1)); + + File ledgerDirForLedger0 = entryLogManager.getCurrentLogForLedger(0L).getLogFile().getParentFile(); + File ledgerDirForLedger1 = entryLogManager.getCurrentLogForLedger(1L).getLogFile().getParentFile(); + File ledgerDirForLedger2 = entryLogManager.getCurrentLogForLedger(2L).getLogFile().getParentFile(); + + Set ledgerDirsSet = new HashSet(); + ledgerDirsSet.add(ledgerDirForLedger0); + ledgerDirsSet.add(ledgerDirForLedger1); + ledgerDirsSet.add(ledgerDirForLedger2); + + /* + * since there are 3 ledgerdirs, entrylogs for all the 3 ledgers should be in different ledgerdirs. + */ + Assert.assertEquals("Current active LedgerDirs size", 3, ledgerDirs.size()); + Assert.assertEquals("Number of rotated logchannels", 0, entryLogManager.getRotatedLogChannels().size()); + + /* + * ledgerDirForLedger0 is added to filledDirs, for ledger0 new entrylog should not be created in + * ledgerDirForLedger0 + */ + ledgerDirsManager.addToFilledDirs(ledgerDirForLedger0); + addEntryAndValidateFolders(entryLogger, entryLogManager, 2, ledgerDirForLedger0, false, ledgerDirForLedger1, + ledgerDirForLedger2); + Assert.assertEquals("Number of rotated logchannels", 1, entryLogManager.getRotatedLogChannels().size()); + + /* + * ledgerDirForLedger1 is also added to filledDirs, so for all the ledgers new entryLogs should be in + * ledgerDirForLedger2 + */ + ledgerDirsManager.addToFilledDirs(ledgerDirForLedger1); + addEntryAndValidateFolders(entryLogger, entryLogManager, 3, ledgerDirForLedger2, true, ledgerDirForLedger2, + ledgerDirForLedger2); + Assert.assertTrue("Number of rotated logchannels", (2 <= entryLogManager.getRotatedLogChannels().size()) + && (entryLogManager.getRotatedLogChannels().size() <= 3)); + int numOfRotatedLogChannels = entryLogManager.getRotatedLogChannels().size(); + + /* + * since ledgerDirForLedger2 is added to filleddirs, all the dirs are full. If all the dirs are full then it + * will continue to use current entrylogs for new entries instead of creating new one. So for all the ledgers + * ledgerdirs should be same as before - ledgerDirForLedger2 + */ + ledgerDirsManager.addToFilledDirs(ledgerDirForLedger2); + addEntryAndValidateFolders(entryLogger, entryLogManager, 4, ledgerDirForLedger2, true, ledgerDirForLedger2, + ledgerDirForLedger2); + Assert.assertEquals("Number of rotated logchannels", numOfRotatedLogChannels, + entryLogManager.getRotatedLogChannels().size()); + + /* + * ledgerDirForLedger1 is added back to writableDirs, so new entrylog for all the ledgers should be created in + * ledgerDirForLedger1 + */ + ledgerDirsManager.addToWritableDirs(ledgerDirForLedger1, true); + addEntryAndValidateFolders(entryLogger, entryLogManager, 4, ledgerDirForLedger1, true, ledgerDirForLedger1, + ledgerDirForLedger1); + Assert.assertEquals("Number of rotated logchannels", numOfRotatedLogChannels + 3, + entryLogManager.getRotatedLogChannels().size()); + } + + /* + * in this method we add an entry and validate the ledgerdir of the + * currentLogForLedger against the provided expected ledgerDirs. + */ + void addEntryAndValidateFolders(DefaultEntryLogger entryLogger, EntryLogManagerBase entryLogManager, int entryId, + File expectedDirForLedger0, boolean equalsForLedger0, File expectedDirForLedger1, + File expectedDirForLedger2) throws IOException { + entryLogger.addEntry(0L, generateEntry(0, entryId)); + entryLogger.addEntry(1L, generateEntry(1, entryId)); + entryLogger.addEntry(2L, generateEntry(2, entryId)); + + if (equalsForLedger0) { + Assert.assertEquals("LedgerDir for ledger 0 after adding entry " + entryId, expectedDirForLedger0, + entryLogManager.getCurrentLogForLedger(0L).getLogFile().getParentFile()); + } else { + Assert.assertNotEquals("LedgerDir for ledger 0 after adding entry " + entryId, expectedDirForLedger0, + entryLogManager.getCurrentLogForLedger(0L).getLogFile().getParentFile()); + } + Assert.assertEquals("LedgerDir for ledger 1 after adding entry " + entryId, expectedDirForLedger1, + entryLogManager.getCurrentLogForLedger(1L).getLogFile().getParentFile()); + Assert.assertEquals("LedgerDir for ledger 2 after adding entry " + entryId, expectedDirForLedger2, + entryLogManager.getCurrentLogForLedger(2L).getLogFile().getParentFile()); + } + + /* + * entries added using entrylogger with entryLogPerLedger enabled and the same entries are read using entrylogger + * with entryLogPerLedger disabled + */ + @Test + public void testSwappingEntryLogManagerFromEntryLogPerLedgerToSingle() throws Exception { + testSwappingEntryLogManager(true, false); + } + + /* + * entries added using entrylogger with entryLogPerLedger disabled and the same entries are read using entrylogger + * with entryLogPerLedger enabled + */ + @Test + public void testSwappingEntryLogManagerFromSingleToEntryLogPerLedger() throws Exception { + testSwappingEntryLogManager(false, true); + } + + public void testSwappingEntryLogManager(boolean initialEntryLogPerLedgerEnabled, + boolean laterEntryLogPerLedgerEnabled) throws Exception { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setEntryLogPerLedgerEnabled(initialEntryLogPerLedgerEnabled); + conf.setLedgerDirNames(createAndGetLedgerDirs(2)); + // pre allocation enabled + conf.setEntryLogFilePreAllocationEnabled(true); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); + + DefaultEntryLogger defaultEntryLogger = new DefaultEntryLogger(conf, ledgerDirsManager); + EntryLogManagerBase entryLogManager = (EntryLogManagerBase) defaultEntryLogger.getEntryLogManager(); + Assert.assertEquals( + "EntryLogManager class type", initialEntryLogPerLedgerEnabled + ? EntryLogManagerForEntryLogPerLedger.class : EntryLogManagerForSingleEntryLog.class, + entryLogManager.getClass()); + + int numOfActiveLedgers = 10; + int numEntries = 10; + long[][] positions = new long[numOfActiveLedgers][]; + for (int i = 0; i < numOfActiveLedgers; i++) { + positions[i] = new long[numEntries]; + } + + /* + * addentries to the ledgers + */ + for (int j = 0; j < numEntries; j++) { + for (int i = 0; i < numOfActiveLedgers; i++) { + positions[i][j] = defaultEntryLogger.addEntry((long) i, generateEntry(i, j)); + long entryLogId = (positions[i][j] >> 32L); + if (initialEntryLogPerLedgerEnabled) { + Assert.assertEquals("EntryLogId for ledger: " + i, i, entryLogId); + } else { + Assert.assertEquals("EntryLogId for ledger: " + i, 0, entryLogId); + } + } + } + + for (long i = 0; i < numOfActiveLedgers; i++) { + entryLogManager.createNewLog(i); + } + + /** + * since new entrylog is created for all the ledgers, the previous + * entrylogs must be rotated and with the following flushRotatedLogs + * call they should be forcewritten and file should be closed. + */ + entryLogManager.flushRotatedLogs(); + + /* + * new entrylogger and entryLogManager are created with + * 'laterEntryLogPerLedgerEnabled' conf + */ + conf.setEntryLogPerLedgerEnabled(laterEntryLogPerLedgerEnabled); + LedgerDirsManager newLedgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); + DefaultEntryLogger newEntryLogger = new DefaultEntryLogger(conf, newLedgerDirsManager); + EntryLogManager newEntryLogManager = newEntryLogger.getEntryLogManager(); + Assert.assertEquals("EntryLogManager class type", + laterEntryLogPerLedgerEnabled ? EntryLogManagerForEntryLogPerLedger.class + : EntryLogManagerForSingleEntryLog.class, + newEntryLogManager.getClass()); + + /* + * read the entries (which are written with previous entrylogger) with + * new entrylogger + */ + for (int j = 0; j < numEntries; j++) { + for (int i = 0; i < numOfActiveLedgers; i++) { + String expectedValue = "ledger-" + i + "-" + j; + ByteBuf buf = newEntryLogger.readEntry(i, j, positions[i][j]); + long ledgerId = buf.readLong(); + long entryId = buf.readLong(); + byte[] data = new byte[buf.readableBytes()]; + buf.readBytes(data); + assertEquals("LedgerId ", i, ledgerId); + assertEquals("EntryId ", j, entryId); + assertEquals("Entry Data ", expectedValue, new String(data)); + } + } + } + +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/EnableZkSecurityBasicTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/EnableZkSecurityBasicTest.java index 34ad145499f..67343a6e6f4 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/EnableZkSecurityBasicTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/EnableZkSecurityBasicTest.java @@ -24,7 +24,6 @@ import java.nio.file.Files; import java.util.List; import javax.security.auth.login.Configuration; - import org.apache.bookkeeper.client.BookKeeper; import org.apache.bookkeeper.client.LedgerHandle; import org.apache.bookkeeper.conf.ClientConfiguration; @@ -53,7 +52,7 @@ public EnableZkSecurityBasicTest() { @BeforeClass public static void setupJAAS() throws IOException { System.setProperty("zookeeper.authProvider.1", "org.apache.zookeeper.server.auth.SASLAuthenticationProvider"); - File tmpJaasDir = new File("target").getAbsoluteFile(); + File tmpJaasDir = Files.createTempDirectory("jassTmpDir").toFile(); File tmpJaasFile = new File(tmpJaasDir, "jaas.conf"); String jassFileContent = "Server {\n" + " org.apache.zookeeper.server.auth.DigestLoginModule required\n" @@ -118,7 +117,8 @@ private void checkACls(ZooKeeper zk, String path) throws KeeperException, Interr if (!fullPath.startsWith("/zookeeper") // skip zookeeper internal nodes && !fullPath.equals("/ledgers") // node created by test setup - && !fullPath.equals("/ledgers/" + BookKeeperConstants.AVAILABLE_NODE) // node created by test setup + && !fullPath.equals("/ledgers/" + BookKeeperConstants.AVAILABLE_NODE) + && !fullPath.equals("/ledgers/" + BookKeeperConstants.INSTANCEID) // node created by test setup ) { assertEquals(1, acls.size()); assertEquals(31, acls.get(0).getPerms()); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/EntryLogTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/EntryLogTest.java deleted file mode 100644 index 9694fae3e98..00000000000 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/EntryLogTest.java +++ /dev/null @@ -1,1816 +0,0 @@ -/** - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * - */ -package org.apache.bookkeeper.bookie; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; -import com.google.common.collect.Sets; - -import io.netty.buffer.ByteBuf; -import io.netty.buffer.Unpooled; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.RandomAccessFile; -import java.nio.channels.FileChannel; -import java.nio.charset.Charset; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Random; -import java.util.Set; -import java.util.concurrent.BrokenBarrierException; -import java.util.concurrent.Callable; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.CyclicBarrier; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLongArray; -import java.util.concurrent.locks.Lock; - -import org.apache.bookkeeper.bookie.EntryLogger.BufferedLogChannel; -import org.apache.bookkeeper.bookie.LedgerDirsManager.NoWritableLedgerDirException; -import org.apache.bookkeeper.common.testing.annotations.FlakyTest; -import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.conf.TestBKConfiguration; -import org.apache.bookkeeper.util.DiskChecker; -import org.apache.bookkeeper.util.IOUtils; -import org.apache.bookkeeper.util.collections.ConcurrentLongLongHashMap; -import org.apache.commons.io.FileUtils; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Tests for EntryLog. - */ -public class EntryLogTest { - private static final Logger LOG = LoggerFactory.getLogger(EntryLogTest.class); - - final List tempDirs = new ArrayList(); - final Random rand = new Random(); - - File createTempDir(String prefix, String suffix) throws IOException { - File dir = IOUtils.createTempDir(prefix, suffix); - tempDirs.add(dir); - return dir; - } - - private File rootDir; - private File curDir; - private ServerConfiguration conf; - private LedgerDirsManager dirsMgr; - private EntryLogger entryLogger; - - @Before - public void setUp() throws Exception { - this.rootDir = createTempDir("bkTest", ".dir"); - this.curDir = Bookie.getCurrentDirectory(rootDir); - Bookie.checkDirectoryStructure(curDir); - this.conf = TestBKConfiguration.newServerConfiguration(); - this.dirsMgr = new LedgerDirsManager( - conf, - new File[] { rootDir }, - new DiskChecker( - conf.getDiskUsageThreshold(), - conf.getDiskUsageWarnThreshold())); - this.entryLogger = new EntryLogger(conf, dirsMgr); - } - - @After - public void tearDown() throws Exception { - if (null != this.entryLogger) { - entryLogger.shutdown(); - } - - for (File dir : tempDirs) { - FileUtils.deleteDirectory(dir); - } - tempDirs.clear(); - } - - @Test - public void testDeferCreateNewLog() throws Exception { - entryLogger.shutdown(); - - // mark `curDir` as filled - this.conf.setMinUsableSizeForEntryLogCreation(1); - this.dirsMgr = new LedgerDirsManager( - conf, - new File[] { rootDir }, - new DiskChecker( - conf.getDiskUsageThreshold(), - conf.getDiskUsageWarnThreshold())); - this.dirsMgr.addToFilledDirs(curDir); - - entryLogger = new EntryLogger(conf, dirsMgr); - EntryLogManagerForSingleEntryLog entryLogManager = - (EntryLogManagerForSingleEntryLog) entryLogger.getEntryLogManager(); - assertEquals(EntryLogger.UNINITIALIZED_LOG_ID, entryLogManager.getCurrentLogId()); - - // add the first entry will trigger file creation - entryLogger.addEntry(1L, generateEntry(1, 1).nioBuffer()); - assertEquals(0L, entryLogManager.getCurrentLogId()); - } - - @Test - public void testDeferCreateNewLogWithoutEnoughDiskSpaces() throws Exception { - entryLogger.shutdown(); - - // mark `curDir` as filled - this.conf.setMinUsableSizeForEntryLogCreation(Long.MAX_VALUE); - this.dirsMgr = new LedgerDirsManager( - conf, - new File[] { rootDir }, - new DiskChecker( - conf.getDiskUsageThreshold(), - conf.getDiskUsageWarnThreshold())); - this.dirsMgr.addToFilledDirs(curDir); - - entryLogger = new EntryLogger(conf, dirsMgr); - EntryLogManagerForSingleEntryLog entryLogManager = - (EntryLogManagerForSingleEntryLog) entryLogger.getEntryLogManager(); - assertEquals(EntryLogger.UNINITIALIZED_LOG_ID, entryLogManager.getCurrentLogId()); - - // add the first entry will trigger file creation - try { - entryLogger.addEntry(1L, generateEntry(1, 1).nioBuffer()); - fail("Should fail to append entry if there is no enough reserved space left"); - } catch (NoWritableLedgerDirException e) { - assertEquals(EntryLogger.UNINITIALIZED_LOG_ID, entryLogManager.getCurrentLogId()); - } - } - - @Test - public void testCorruptEntryLog() throws Exception { - // create some entries - entryLogger.addEntry(1L, generateEntry(1, 1).nioBuffer()); - entryLogger.addEntry(3L, generateEntry(3, 1).nioBuffer()); - entryLogger.addEntry(2L, generateEntry(2, 1).nioBuffer()); - entryLogger.flush(); - entryLogger.shutdown(); - // now lets truncate the file to corrupt the last entry, which simulates a partial write - File f = new File(curDir, "0.log"); - RandomAccessFile raf = new RandomAccessFile(f, "rw"); - raf.setLength(raf.length() - 10); - raf.close(); - // now see which ledgers are in the log - entryLogger = new EntryLogger(conf, dirsMgr); - - EntryLogMetadata meta = entryLogger.getEntryLogMetadata(0L); - LOG.info("Extracted Meta From Entry Log {}", meta); - assertTrue(meta.getLedgersMap().containsKey(1L)); - assertFalse(meta.getLedgersMap().containsKey(2L)); - assertTrue(meta.getLedgersMap().containsKey(3L)); - } - - private static ByteBuf generateEntry(long ledger, long entry) { - byte[] data = generateDataString(ledger, entry).getBytes(); - ByteBuf bb = Unpooled.buffer(8 + 8 + data.length); - bb.writeLong(ledger); - bb.writeLong(entry); - bb.writeBytes(data); - return bb; - } - - private ByteBuf generateEntry(long ledger, long entry, int length) { - ByteBuf bb = Unpooled.buffer(length); - bb.writeLong(ledger); - bb.writeLong(entry); - byte[] randbyteArray = new byte[length - 8 - 8]; - rand.nextBytes(randbyteArray); - bb.writeBytes(randbyteArray); - return bb; - } - - private static String generateDataString(long ledger, long entry) { - return ("ledger-" + ledger + "-" + entry); - } - - @Test - public void testMissingLogId() throws Exception { - // create some entries - int numLogs = 3; - int numEntries = 10; - long[][] positions = new long[2 * numLogs][]; - for (int i = 0; i < numLogs; i++) { - positions[i] = new long[numEntries]; - - EntryLogger logger = new EntryLogger(conf, dirsMgr); - for (int j = 0; j < numEntries; j++) { - positions[i][j] = logger.addEntry((long) i, generateEntry(i, j).nioBuffer()); - } - logger.flush(); - logger.shutdown(); - } - // delete last log id - File lastLogId = new File(curDir, "lastId"); - lastLogId.delete(); - - // write another entries - for (int i = numLogs; i < 2 * numLogs; i++) { - positions[i] = new long[numEntries]; - - EntryLogger logger = new EntryLogger(conf, dirsMgr); - for (int j = 0; j < numEntries; j++) { - positions[i][j] = logger.addEntry((long) i, generateEntry(i, j).nioBuffer()); - } - logger.flush(); - logger.shutdown(); - } - - EntryLogger newLogger = new EntryLogger(conf, dirsMgr); - for (int i = 0; i < (2 * numLogs + 1); i++) { - File logFile = new File(curDir, Long.toHexString(i) + ".log"); - assertTrue(logFile.exists()); - } - for (int i = 0; i < 2 * numLogs; i++) { - for (int j = 0; j < numEntries; j++) { - String expectedValue = "ledger-" + i + "-" + j; - ByteBuf value = newLogger.readEntry(i, j, positions[i][j]); - long ledgerId = value.readLong(); - long entryId = value.readLong(); - byte[] data = new byte[value.readableBytes()]; - value.readBytes(data); - value.release(); - assertEquals(i, ledgerId); - assertEquals(j, entryId); - assertEquals(expectedValue, new String(data)); - } - } - } - - /** - * Test that EntryLogger Should fail with FNFE, if entry logger directories does not exist. - */ - @Test - public void testEntryLoggerShouldThrowFNFEIfDirectoriesDoesNotExist() - throws Exception { - File tmpDir = createTempDir("bkTest", ".dir"); - EntryLogger entryLogger = null; - try { - entryLogger = new EntryLogger(conf, new LedgerDirsManager(conf, new File[] { tmpDir }, - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()))); - fail("Expecting FileNotFoundException"); - } catch (FileNotFoundException e) { - assertEquals("Entry log directory '" + tmpDir + "/current' does not exist", e - .getLocalizedMessage()); - } finally { - if (entryLogger != null) { - entryLogger.shutdown(); - } - } - } - - /** - * Test to verify the DiskFull during addEntry. - */ - @Test - public void testAddEntryFailureOnDiskFull() throws Exception { - File ledgerDir1 = createTempDir("bkTest", ".dir"); - File ledgerDir2 = createTempDir("bkTest", ".dir"); - ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); - conf.setLedgerStorageClass(InterleavedLedgerStorage.class.getName()); - conf.setJournalDirName(ledgerDir1.toString()); - conf.setLedgerDirNames(new String[] { ledgerDir1.getAbsolutePath(), - ledgerDir2.getAbsolutePath() }); - Bookie bookie = new Bookie(conf); - EntryLogger entryLogger = new EntryLogger(conf, - bookie.getLedgerDirsManager()); - InterleavedLedgerStorage ledgerStorage = - ((InterleavedLedgerStorage) bookie.ledgerStorage.getUnderlyingLedgerStorage()); - ledgerStorage.entryLogger = entryLogger; - // Create ledgers - ledgerStorage.setMasterKey(1, "key".getBytes()); - ledgerStorage.setMasterKey(2, "key".getBytes()); - ledgerStorage.setMasterKey(3, "key".getBytes()); - // Add entries - ledgerStorage.addEntry(generateEntry(1, 1)); - ledgerStorage.addEntry(generateEntry(2, 1)); - // Add entry with disk full failure simulation - bookie.getLedgerDirsManager().addToFilledDirs(((EntryLogManagerBase) entryLogger.getEntryLogManager()) - .getCurrentLogForLedger(EntryLogger.UNASSIGNED_LEDGERID).getLogFile().getParentFile()); - ledgerStorage.addEntry(generateEntry(3, 1)); - // Verify written entries - Assert.assertTrue(0 == generateEntry(1, 1).compareTo(ledgerStorage.getEntry(1, 1))); - Assert.assertTrue(0 == generateEntry(2, 1).compareTo(ledgerStorage.getEntry(2, 1))); - Assert.assertTrue(0 == generateEntry(3, 1).compareTo(ledgerStorage.getEntry(3, 1))); - } - - /** - * Explicitly try to recover using the ledgers map index at the end of the entry log. - */ - @Test - public void testRecoverFromLedgersMap() throws Exception { - // create some entries - entryLogger.addEntry(1L, generateEntry(1, 1).nioBuffer()); - entryLogger.addEntry(3L, generateEntry(3, 1).nioBuffer()); - entryLogger.addEntry(2L, generateEntry(2, 1).nioBuffer()); - entryLogger.addEntry(1L, generateEntry(1, 2).nioBuffer()); - - EntryLogManagerBase entryLogManager = (EntryLogManagerBase) entryLogger.getEntryLogManager(); - entryLogManager.createNewLog(EntryLogger.UNASSIGNED_LEDGERID); - entryLogManager.flushRotatedLogs(); - - EntryLogMetadata meta = entryLogger.extractEntryLogMetadataFromIndex(0L); - LOG.info("Extracted Meta From Entry Log {}", meta); - assertEquals(60, meta.getLedgersMap().get(1L)); - assertEquals(30, meta.getLedgersMap().get(2L)); - assertEquals(30, meta.getLedgersMap().get(3L)); - assertFalse(meta.getLedgersMap().containsKey(4L)); - assertEquals(120, meta.getTotalSize()); - assertEquals(120, meta.getRemainingSize()); - } - - /** - * Explicitly try to recover using the ledgers map index at the end of the entry log. - */ - @Test - public void testRecoverFromLedgersMapOnV0EntryLog() throws Exception { - // create some entries - entryLogger.addEntry(1L, generateEntry(1, 1).nioBuffer()); - entryLogger.addEntry(3L, generateEntry(3, 1).nioBuffer()); - entryLogger.addEntry(2L, generateEntry(2, 1).nioBuffer()); - entryLogger.addEntry(1L, generateEntry(1, 2).nioBuffer()); - ((EntryLogManagerBase) entryLogger.getEntryLogManager()).createNewLog(EntryLogger.UNASSIGNED_LEDGERID); - entryLogger.shutdown(); - - // Rewrite the entry log header to be on V0 format - File f = new File(curDir, "0.log"); - RandomAccessFile raf = new RandomAccessFile(f, "rw"); - raf.seek(EntryLogger.HEADER_VERSION_POSITION); - // Write zeros to indicate V0 + no ledgers map info - raf.write(new byte[4 + 8]); - raf.close(); - - // now see which ledgers are in the log - entryLogger = new EntryLogger(conf, dirsMgr); - - try { - entryLogger.extractEntryLogMetadataFromIndex(0L); - fail("Should not be possible to recover from ledgers map index"); - } catch (IOException e) { - // Ok - } - - // Public method should succeed by falling back to scanning the file - EntryLogMetadata meta = entryLogger.getEntryLogMetadata(0L); - LOG.info("Extracted Meta From Entry Log {}", meta); - assertEquals(60, meta.getLedgersMap().get(1L)); - assertEquals(30, meta.getLedgersMap().get(2L)); - assertEquals(30, meta.getLedgersMap().get(3L)); - assertFalse(meta.getLedgersMap().containsKey(4L)); - assertEquals(120, meta.getTotalSize()); - assertEquals(120, meta.getRemainingSize()); - } - - /** - * Test pre-allocate for entry log in EntryLoggerAllocator. - * @throws Exception - */ - @Test - public void testPreAllocateLog() throws Exception { - entryLogger.shutdown(); - - // enable pre-allocation case - conf.setEntryLogFilePreAllocationEnabled(true); - - entryLogger = new EntryLogger(conf, dirsMgr); - // create a logger whose initialization phase allocating a new entry log - ((EntryLogManagerBase) entryLogger.getEntryLogManager()).createNewLog(EntryLogger.UNASSIGNED_LEDGERID); - assertNotNull(entryLogger.getEntryLoggerAllocator().getPreallocationFuture()); - - entryLogger.addEntry(1L, generateEntry(1, 1).nioBuffer()); - // the Future is not null all the time - assertNotNull(entryLogger.getEntryLoggerAllocator().getPreallocationFuture()); - entryLogger.shutdown(); - - // disable pre-allocation case - conf.setEntryLogFilePreAllocationEnabled(false); - // create a logger - entryLogger = new EntryLogger(conf, dirsMgr); - assertNull(entryLogger.getEntryLoggerAllocator().getPreallocationFuture()); - - entryLogger.addEntry(2L, generateEntry(1, 1).nioBuffer()); - - // the Future is null all the time - assertNull(entryLogger.getEntryLoggerAllocator().getPreallocationFuture()); - } - - /** - * Test the getEntryLogsSet() method. - */ - @Test - public void testGetEntryLogsSet() throws Exception { - // create some entries - EntryLogManagerBase entryLogManagerBase = ((EntryLogManagerBase) entryLogger.getEntryLogManager()); - assertEquals(Sets.newHashSet(), entryLogger.getEntryLogsSet()); - - entryLogManagerBase.createNewLog(EntryLogger.UNASSIGNED_LEDGERID); - entryLogManagerBase.flushRotatedLogs(); - - assertEquals(Sets.newHashSet(0L, 1L), entryLogger.getEntryLogsSet()); - - entryLogManagerBase.createNewLog(EntryLogger.UNASSIGNED_LEDGERID); - entryLogManagerBase.flushRotatedLogs(); - - assertEquals(Sets.newHashSet(0L, 1L, 2L), entryLogger.getEntryLogsSet()); - } - - /** - * In this testcase, entryLogger flush and entryLogger addEntry (which would - * call createNewLog) are called concurrently. Since entryLogger flush - * method flushes both currentlog and rotatedlogs, it is expected all the - * currentLog and rotatedLogs are supposed to be flush and forcewritten. - * - * @throws Exception - */ - @Test - public void testFlushOrder() throws Exception { - entryLogger.shutdown(); - - int logSizeLimit = 256 * 1024; - conf.setEntryLogPerLedgerEnabled(false); - conf.setEntryLogFilePreAllocationEnabled(false); - conf.setFlushIntervalInBytes(0); - conf.setEntryLogSizeLimit(logSizeLimit); - - entryLogger = new EntryLogger(conf, dirsMgr); - EntryLogManagerBase entryLogManager = (EntryLogManagerBase) entryLogger.getEntryLogManager(); - AtomicBoolean exceptionHappened = new AtomicBoolean(false); - - CyclicBarrier barrier = new CyclicBarrier(2); - List rotatedLogChannels; - BufferedLogChannel currentActiveChannel; - - exceptionHappened.set(false); - - /* - * higher the number of rotated logs, it would be easier to reproduce - * the issue regarding flush order - */ - addEntriesAndRotateLogs(entryLogger, 30); - - rotatedLogChannels = new LinkedList(entryLogManager.getRotatedLogChannels()); - currentActiveChannel = entryLogManager.getCurrentLogForLedger(EntryLogger.UNASSIGNED_LEDGERID); - long currentActiveChannelUnpersistedBytes = currentActiveChannel.getUnpersistedBytes(); - - Thread flushThread = new Thread(new Runnable() { - @Override - public void run() { - try { - barrier.await(); - entryLogger.flush(); - } catch (InterruptedException | BrokenBarrierException | IOException e) { - LOG.error("Exception happened for entryLogger.flush", e); - exceptionHappened.set(true); - } - } - }); - - Thread createdNewLogThread = new Thread(new Runnable() { - @Override - public void run() { - try { - barrier.await(); - /* - * here we are adding entry of size logSizeLimit with - * rolllog=true, so it would create a new entrylog. - */ - entryLogger.addEntry(123, generateEntry(123, 456, logSizeLimit), true); - } catch (InterruptedException | BrokenBarrierException | IOException e) { - LOG.error("Exception happened for entryLogManager.createNewLog", e); - exceptionHappened.set(true); - } - } - }); - - /* - * concurrently entryLogger flush and entryLogger addEntry (which would - * call createNewLog) would be called from different threads. - */ - flushThread.start(); - createdNewLogThread.start(); - flushThread.join(); - createdNewLogThread.join(); - - Assert.assertFalse("Exception happened in one of the operation", exceptionHappened.get()); - - /* - * if flush of the previous current channel is called then the - * unpersistedBytes should be less than what it was before, actually it - * would be close to zero (but when new log is created with addEntry - * call, ledgers map will be appended at the end of entry log) - */ - Assert.assertTrue( - "previous currentChannel unpersistedBytes should be less than " + currentActiveChannelUnpersistedBytes - + ", but it is actually " + currentActiveChannel.getUnpersistedBytes(), - currentActiveChannel.getUnpersistedBytes() < currentActiveChannelUnpersistedBytes); - for (BufferedLogChannel rotatedLogChannel : rotatedLogChannels) { - Assert.assertEquals("previous rotated entrylog should be flushandforcewritten", 0, - rotatedLogChannel.getUnpersistedBytes()); - } - } - - void addEntriesAndRotateLogs(EntryLogger entryLogger, int numOfRotations) - throws IOException { - EntryLogManagerBase entryLogManager = (EntryLogManagerBase) entryLogger.getEntryLogManager(); - entryLogManager.setCurrentLogForLedgerAndAddToRotate(EntryLogger.UNASSIGNED_LEDGERID, null); - for (int i = 0; i < numOfRotations; i++) { - addEntries(entryLogger, 10); - entryLogManager.setCurrentLogForLedgerAndAddToRotate(EntryLogger.UNASSIGNED_LEDGERID, null); - } - addEntries(entryLogger, 10); - } - - void addEntries(EntryLogger entryLogger, int noOfEntries) throws IOException { - for (int j = 0; j < noOfEntries; j++) { - int ledgerId = Math.abs(rand.nextInt()); - int entryId = Math.abs(rand.nextInt()); - entryLogger.addEntry(ledgerId, generateEntry(ledgerId, entryId).nioBuffer()); - } - } - - static class LedgerStorageWriteTask implements Callable { - long ledgerId; - int entryId; - LedgerStorage ledgerStorage; - - LedgerStorageWriteTask(long ledgerId, int entryId, LedgerStorage ledgerStorage) { - this.ledgerId = ledgerId; - this.entryId = entryId; - this.ledgerStorage = ledgerStorage; - } - - @Override - public Boolean call() throws IOException, BookieException { - try { - ledgerStorage.addEntry(generateEntry(ledgerId, entryId)); - } catch (IOException e) { - LOG.error("Got Exception for AddEntry call. LedgerId: " + ledgerId + " entryId: " + entryId, e); - throw new IOException("Got Exception for AddEntry call. LedgerId: " + ledgerId + " entryId: " + entryId, - e); - } - return true; - } - } - - static class LedgerStorageFlushTask implements Callable { - LedgerStorage ledgerStorage; - - LedgerStorageFlushTask(LedgerStorage ledgerStorage) { - this.ledgerStorage = ledgerStorage; - } - - @Override - public Boolean call() throws IOException { - try { - ledgerStorage.flush(); - } catch (IOException e) { - LOG.error("Got Exception for flush call", e); - throw new IOException("Got Exception for Flush call", e); - } - return true; - } - } - - static class LedgerStorageReadTask implements Callable { - long ledgerId; - int entryId; - LedgerStorage ledgerStorage; - - LedgerStorageReadTask(long ledgerId, int entryId, LedgerStorage ledgerStorage) { - this.ledgerId = ledgerId; - this.entryId = entryId; - this.ledgerStorage = ledgerStorage; - } - - @Override - public Boolean call() throws IOException { - try { - ByteBuf expectedByteBuf = generateEntry(ledgerId, entryId); - ByteBuf actualByteBuf = ledgerStorage.getEntry(ledgerId, entryId); - if (!expectedByteBuf.equals(actualByteBuf)) { - LOG.error("Expected Entry: {} Actual Entry: {}", expectedByteBuf.toString(Charset.defaultCharset()), - actualByteBuf.toString(Charset.defaultCharset())); - throw new IOException("Expected Entry: " + expectedByteBuf.toString(Charset.defaultCharset()) - + " Actual Entry: " + actualByteBuf.toString(Charset.defaultCharset())); - } - } catch (IOException e) { - LOG.error("Got Exception for GetEntry call. LedgerId: " + ledgerId + " entryId: " + entryId, e); - throw new IOException("Got Exception for GetEntry call. LedgerId: " + ledgerId + " entryId: " + entryId, - e); - } - return true; - } - } - - /** - * test concurrent write operations and then concurrent read operations - * using InterleavedLedgerStorage. - */ - @FlakyTest(value = "https://github.com/apache/bookkeeper/issues/1516") - public void testConcurrentWriteAndReadCallsOfInterleavedLedgerStorage() throws Exception { - testConcurrentWriteAndReadCalls(InterleavedLedgerStorage.class.getName(), false); - } - - /** - * test concurrent write operations and then concurrent read operations - * using InterleavedLedgerStorage with EntryLogPerLedger enabled. - */ - @FlakyTest(value = "https://github.com/apache/bookkeeper/issues/1516") - public void testConcurrentWriteAndReadCallsOfInterleavedLedgerStorageWithELPLEnabled() throws Exception { - testConcurrentWriteAndReadCalls(InterleavedLedgerStorage.class.getName(), true); - } - - /** - * test concurrent write operations and then concurrent read operations - * using SortedLedgerStorage. - */ - @FlakyTest(value = "https://github.com/apache/bookkeeper/issues/1516") - public void testConcurrentWriteAndReadCallsOfSortedLedgerStorage() throws Exception { - testConcurrentWriteAndReadCalls(SortedLedgerStorage.class.getName(), false); - } - - /** - * test concurrent write operations and then concurrent read operations - * using SortedLedgerStorage with EntryLogPerLedger enabled. - */ - @FlakyTest(value = "https://github.com/apache/bookkeeper/issues/1516") - public void testConcurrentWriteAndReadCallsOfSortedLedgerStorageWithELPLEnabled() throws Exception { - testConcurrentWriteAndReadCalls(SortedLedgerStorage.class.getName(), true); - } - - public void testConcurrentWriteAndReadCalls(String ledgerStorageClass, boolean entryLogPerLedgerEnabled) - throws Exception { - File ledgerDir = createTempDir("bkTest", ".dir"); - ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); - conf.setJournalDirName(ledgerDir.toString()); - conf.setLedgerDirNames(new String[] { ledgerDir.getAbsolutePath()}); - conf.setLedgerStorageClass(ledgerStorageClass); - conf.setEntryLogPerLedgerEnabled(entryLogPerLedgerEnabled); - Bookie bookie = new Bookie(conf); - CompactableLedgerStorage ledgerStorage = (CompactableLedgerStorage) bookie.ledgerStorage; - Random rand = new Random(0); - - if (ledgerStorageClass.equals(SortedLedgerStorage.class.getName())) { - Assert.assertEquals("LedgerStorage Class", SortedLedgerStorage.class, ledgerStorage.getClass()); - if (entryLogPerLedgerEnabled) { - Assert.assertEquals("MemTable Class", EntryMemTableWithParallelFlusher.class, - ((SortedLedgerStorage) ledgerStorage).memTable.getClass()); - } else { - Assert.assertEquals("MemTable Class", EntryMemTable.class, - ((SortedLedgerStorage) ledgerStorage).memTable.getClass()); - } - } - - int numOfLedgers = 70; - int numEntries = 1500; - // Create ledgers - for (int i = 0; i < numOfLedgers; i++) { - ledgerStorage.setMasterKey(i, "key".getBytes()); - } - - ExecutorService executor = Executors.newFixedThreadPool(10); - List> writeAndFlushTasks = new ArrayList>(); - for (int j = 0; j < numEntries; j++) { - for (int i = 0; i < numOfLedgers; i++) { - writeAndFlushTasks.add(new LedgerStorageWriteTask(i, j, ledgerStorage)); - } - } - - /* - * add some flush tasks to the list of writetasks list. - */ - for (int i = 0; i < (numOfLedgers * numEntries) / 500; i++) { - writeAndFlushTasks.add(rand.nextInt(writeAndFlushTasks.size()), new LedgerStorageFlushTask(ledgerStorage)); - } - - // invoke all those write/flush tasks all at once concurrently - executor.invokeAll(writeAndFlushTasks).forEach((future) -> { - try { - future.get(); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - LOG.error("Write/Flush task failed because of InterruptedException", ie); - Assert.fail("Write/Flush task interrupted"); - } catch (Exception ex) { - LOG.error("Write/Flush task failed because of exception", ex); - Assert.fail("Write/Flush task failed " + ex.getMessage()); - } - }); - - List> readAndFlushTasks = new ArrayList>(); - for (int j = 0; j < numEntries; j++) { - for (int i = 0; i < numOfLedgers; i++) { - readAndFlushTasks.add(new LedgerStorageReadTask(i, j, ledgerStorage)); - } - } - - /* - * add some flush tasks to the list of readtasks list. - */ - for (int i = 0; i < (numOfLedgers * numEntries) / 500; i++) { - readAndFlushTasks.add(rand.nextInt(readAndFlushTasks.size()), new LedgerStorageFlushTask(ledgerStorage)); - } - - // invoke all those read/flush tasks all at once concurrently - executor.invokeAll(readAndFlushTasks).forEach((future) -> { - try { - future.get(); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - LOG.error("Read/Flush task failed because of InterruptedException", ie); - Assert.fail("Read/Flush task interrupted"); - } catch (Exception ex) { - LOG.error("Read/Flush task failed because of exception", ex); - Assert.fail("Read/Flush task failed " + ex.getMessage()); - } - }); - - executor.shutdownNow(); - } - - /** - * Test to verify the leastUnflushedLogId logic in EntryLogsStatus. - */ - @Test - public void testEntryLoggersRecentEntryLogsStatus() throws Exception { - ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); - conf.setLedgerDirNames(createAndGetLedgerDirs(2)); - LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - - EntryLogger entryLogger = new EntryLogger(conf, ledgerDirsManager); - EntryLogger.RecentEntryLogsStatus recentlyCreatedLogsStatus = entryLogger.recentlyCreatedEntryLogsStatus; - - recentlyCreatedLogsStatus.createdEntryLog(0L); - Assert.assertEquals("entryLogger's leastUnflushedLogId ", 0L, entryLogger.getLeastUnflushedLogId()); - recentlyCreatedLogsStatus.flushRotatedEntryLog(0L); - // since we marked entrylog - 0 as rotated, LeastUnflushedLogId would be previous rotatedlog+1 - Assert.assertEquals("entryLogger's leastUnflushedLogId ", 1L, entryLogger.getLeastUnflushedLogId()); - recentlyCreatedLogsStatus.createdEntryLog(1L); - Assert.assertEquals("entryLogger's leastUnflushedLogId ", 1L, entryLogger.getLeastUnflushedLogId()); - recentlyCreatedLogsStatus.createdEntryLog(2L); - recentlyCreatedLogsStatus.createdEntryLog(3L); - recentlyCreatedLogsStatus.createdEntryLog(4L); - Assert.assertEquals("entryLogger's leastUnflushedLogId ", 1L, entryLogger.getLeastUnflushedLogId()); - recentlyCreatedLogsStatus.flushRotatedEntryLog(1L); - Assert.assertEquals("entryLogger's leastUnflushedLogId ", 2L, entryLogger.getLeastUnflushedLogId()); - recentlyCreatedLogsStatus.flushRotatedEntryLog(3L); - // here though we rotated entrylog-3, entrylog-2 is not yet rotated so - // LeastUnflushedLogId should be still 2 - Assert.assertEquals("entryLogger's leastUnflushedLogId ", 2L, entryLogger.getLeastUnflushedLogId()); - recentlyCreatedLogsStatus.flushRotatedEntryLog(2L); - // entrylog-3 is already rotated, so leastUnflushedLogId should be 4 - Assert.assertEquals("entryLogger's leastUnflushedLogId ", 4L, entryLogger.getLeastUnflushedLogId()); - recentlyCreatedLogsStatus.flushRotatedEntryLog(4L); - Assert.assertEquals("entryLogger's leastUnflushedLogId ", 5L, entryLogger.getLeastUnflushedLogId()); - recentlyCreatedLogsStatus.createdEntryLog(5L); - recentlyCreatedLogsStatus.createdEntryLog(7L); - recentlyCreatedLogsStatus.createdEntryLog(9L); - Assert.assertEquals("entryLogger's leastUnflushedLogId ", 5L, entryLogger.getLeastUnflushedLogId()); - recentlyCreatedLogsStatus.flushRotatedEntryLog(5L); - // since we marked entrylog-5 as rotated, LeastUnflushedLogId would be previous rotatedlog+1 - Assert.assertEquals("entryLogger's leastUnflushedLogId ", 6L, entryLogger.getLeastUnflushedLogId()); - recentlyCreatedLogsStatus.flushRotatedEntryLog(7L); - Assert.assertEquals("entryLogger's leastUnflushedLogId ", 8L, entryLogger.getLeastUnflushedLogId()); - } - - String[] createAndGetLedgerDirs(int numOfLedgerDirs) throws IOException { - File ledgerDir; - File curDir; - String[] ledgerDirsPath = new String[numOfLedgerDirs]; - for (int i = 0; i < numOfLedgerDirs; i++) { - ledgerDir = createTempDir("bkTest", ".dir"); - curDir = Bookie.getCurrentDirectory(ledgerDir); - Bookie.checkDirectoryStructure(curDir); - ledgerDirsPath[i] = ledgerDir.getAbsolutePath(); - } - return ledgerDirsPath; - } - - /* - * test for validating if the EntryLog/BufferedChannel flushes/forcewrite if the bytes written to it are more than - * flushIntervalInBytes - */ - @Test - public void testFlushIntervalInBytes() throws Exception { - long flushIntervalInBytes = 5000; - ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); - conf.setEntryLogPerLedgerEnabled(true); - conf.setFlushIntervalInBytes(flushIntervalInBytes); - conf.setLedgerDirNames(createAndGetLedgerDirs(2)); - LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - EntryLogger entryLogger = new EntryLogger(conf, ledgerDirsManager); - EntryLogManagerBase entryLogManagerBase = ((EntryLogManagerBase) entryLogger.getEntryLogManager()); - - /* - * when entryLogger is created Header of length EntryLogger.LOGFILE_HEADER_SIZE is created - */ - long ledgerId = 0L; - int firstEntrySize = 1000; - long entry0Position = entryLogger.addEntry(0L, generateEntry(ledgerId, 0L, firstEntrySize)); - // entrylogger writes length of the entry (4 bytes) before writing entry - long expectedUnpersistedBytes = EntryLogger.LOGFILE_HEADER_SIZE + firstEntrySize + 4; - Assert.assertEquals("Unpersisted Bytes of entrylog", expectedUnpersistedBytes, - entryLogManagerBase.getCurrentLogForLedger(ledgerId).getUnpersistedBytes()); - - /* - * 'flushIntervalInBytes' number of bytes are flushed so BufferedChannel should be forcewritten - */ - int secondEntrySize = (int) (flushIntervalInBytes - expectedUnpersistedBytes); - long entry1Position = entryLogger.addEntry(0L, generateEntry(ledgerId, 1L, secondEntrySize)); - Assert.assertEquals("Unpersisted Bytes of entrylog", 0, - entryLogManagerBase.getCurrentLogForLedger(ledgerId).getUnpersistedBytes()); - - /* - * since entrylog/Bufferedchannel is persisted (forcewritten), we should be able to read the entrylog using - * newEntryLogger - */ - conf.setEntryLogPerLedgerEnabled(false); - EntryLogger newEntryLogger = new EntryLogger(conf, ledgerDirsManager); - EntryLogManager newEntryLogManager = newEntryLogger.getEntryLogManager(); - Assert.assertEquals("EntryLogManager class type", EntryLogManagerForSingleEntryLog.class, - newEntryLogManager.getClass()); - - ByteBuf buf = newEntryLogger.readEntry(ledgerId, 0L, entry0Position); - long readLedgerId = buf.readLong(); - long readEntryId = buf.readLong(); - Assert.assertEquals("LedgerId", ledgerId, readLedgerId); - Assert.assertEquals("EntryId", 0L, readEntryId); - - buf = newEntryLogger.readEntry(ledgerId, 1L, entry1Position); - readLedgerId = buf.readLong(); - readEntryId = buf.readLong(); - Assert.assertEquals("LedgerId", ledgerId, readLedgerId); - Assert.assertEquals("EntryId", 1L, readEntryId); - } - - /* - * tests basic logic of EntryLogManager interface for - * EntryLogManagerForEntryLogPerLedger. - */ - @Test - public void testEntryLogManagerInterfaceForEntryLogPerLedger() throws Exception { - ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); - conf.setEntryLogFilePreAllocationEnabled(true); - conf.setEntryLogPerLedgerEnabled(true); - conf.setLedgerDirNames(createAndGetLedgerDirs(2)); - LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - - EntryLogger entryLogger = new EntryLogger(conf, ledgerDirsManager); - EntryLogManagerForEntryLogPerLedger entryLogManager = (EntryLogManagerForEntryLogPerLedger) entryLogger - .getEntryLogManager(); - - Assert.assertEquals("Number of current active EntryLogs ", 0, entryLogManager.getCopyOfCurrentLogs().size()); - Assert.assertEquals("Number of Rotated Logs ", 0, entryLogManager.getRotatedLogChannels().size()); - - int numOfLedgers = 5; - int numOfThreadsPerLedger = 10; - validateLockAcquireAndRelease(numOfLedgers, numOfThreadsPerLedger, entryLogManager); - - for (long i = 0; i < numOfLedgers; i++) { - entryLogManager.setCurrentLogForLedgerAndAddToRotate(i, - createDummyBufferedLogChannel(entryLogger, i, conf)); - } - - for (long i = 0; i < numOfLedgers; i++) { - Assert.assertEquals("LogChannel for ledger: " + i, entryLogManager.getCurrentLogIfPresent(i), - entryLogManager.getCurrentLogForLedger(i)); - } - - Assert.assertEquals("Number of current active EntryLogs ", numOfLedgers, - entryLogManager.getCopyOfCurrentLogs().size()); - Assert.assertEquals("Number of Rotated Logs ", 0, entryLogManager.getRotatedLogChannels().size()); - - for (long i = 0; i < numOfLedgers; i++) { - entryLogManager.setCurrentLogForLedgerAndAddToRotate(i, - createDummyBufferedLogChannel(entryLogger, numOfLedgers + i, conf)); - } - - /* - * since new entryLogs are set for all the ledgers, previous entrylogs would be added to rotatedLogChannels - */ - Assert.assertEquals("Number of current active EntryLogs ", numOfLedgers, - entryLogManager.getCopyOfCurrentLogs().size()); - Assert.assertEquals("Number of Rotated Logs ", numOfLedgers, - entryLogManager.getRotatedLogChannels().size()); - - for (long i = 0; i < numOfLedgers; i++) { - entryLogManager.setCurrentLogForLedgerAndAddToRotate(i, - createDummyBufferedLogChannel(entryLogger, 2 * numOfLedgers + i, conf)); - } - - /* - * again since new entryLogs are set for all the ledgers, previous entrylogs would be added to - * rotatedLogChannels - */ - Assert.assertEquals("Number of current active EntryLogs ", numOfLedgers, - entryLogManager.getCopyOfCurrentLogs().size()); - Assert.assertEquals("Number of Rotated Logs ", 2 * numOfLedgers, - entryLogManager.getRotatedLogChannels().size()); - - for (BufferedLogChannel logChannel : entryLogManager.getRotatedLogChannels()) { - entryLogManager.getRotatedLogChannels().remove(logChannel); - } - Assert.assertEquals("Number of Rotated Logs ", 0, entryLogManager.getRotatedLogChannels().size()); - - // entrylogid is sequential - for (long i = 0; i < numOfLedgers; i++) { - assertEquals("EntryLogid for Ledger " + i, 2 * numOfLedgers + i, - entryLogManager.getCurrentLogForLedger(i).getLogId()); - } - - for (long i = 2 * numOfLedgers; i < (3 * numOfLedgers); i++) { - assertTrue("EntryLog with logId: " + i + " should be present", - entryLogManager.getCurrentLogIfPresent(i) != null); - } - } - - private EntryLogger.BufferedLogChannel createDummyBufferedLogChannel(EntryLogger entryLogger, long logid, - ServerConfiguration servConf) throws IOException { - File tmpFile = File.createTempFile("entrylog", logid + ""); - tmpFile.deleteOnExit(); - FileChannel fc = new RandomAccessFile(tmpFile, "rw").getChannel(); - EntryLogger.BufferedLogChannel logChannel = new BufferedLogChannel(fc, 10, 10, logid, tmpFile, - servConf.getFlushIntervalInBytes()); - return logChannel; - } - - /* - * validates the concurrency aspect of entryLogManager's lock - * - * Executor of fixedThreadPool of size 'numOfLedgers * numOfThreadsPerLedger' is created and the same number - * of tasks are submitted to the Executor. In each task, lock of that ledger is acquired and then released. - */ - private void validateLockAcquireAndRelease(int numOfLedgers, int numOfThreadsPerLedger, - EntryLogManagerForEntryLogPerLedger entryLogManager) throws InterruptedException { - ExecutorService tpe = Executors.newFixedThreadPool(numOfLedgers * numOfThreadsPerLedger); - CountDownLatch latchToStart = new CountDownLatch(1); - CountDownLatch latchToWait = new CountDownLatch(1); - AtomicInteger numberOfThreadsAcquiredLock = new AtomicInteger(0); - AtomicBoolean irptExceptionHappened = new AtomicBoolean(false); - Random rand = new Random(); - - for (int i = 0; i < numOfLedgers * numOfThreadsPerLedger; i++) { - long ledgerId = i % numOfLedgers; - tpe.submit(() -> { - try { - latchToStart.await(); - Lock lock = entryLogManager.getLock(ledgerId); - lock.lock(); - numberOfThreadsAcquiredLock.incrementAndGet(); - latchToWait.await(); - lock.unlock(); - } catch (InterruptedException | IOException e) { - irptExceptionHappened.set(true); - } - }); - } - - assertEquals("Number Of Threads acquired Lock", 0, numberOfThreadsAcquiredLock.get()); - latchToStart.countDown(); - Thread.sleep(1000); - /* - * since there are only "numOfLedgers" ledgers, only < "numOfLedgers" - * threads should have been able to acquire lock, because multiple - * ledgers can end up getting same lock because their hashcode might - * fall in the same bucket. - * - * - * After acquiring the lock there must be waiting on 'latchToWait' latch - */ - int currentNumberOfThreadsAcquiredLock = numberOfThreadsAcquiredLock.get(); - assertTrue("Number Of Threads acquired Lock " + currentNumberOfThreadsAcquiredLock, - (currentNumberOfThreadsAcquiredLock > 0) && (currentNumberOfThreadsAcquiredLock <= numOfLedgers)); - latchToWait.countDown(); - Thread.sleep(2000); - assertEquals("Number Of Threads acquired Lock", numOfLedgers * numOfThreadsPerLedger, - numberOfThreadsAcquiredLock.get()); - } - - /* - * test EntryLogManager.EntryLogManagerForEntryLogPerLedger removes the - * ledger from its cache map if entry is not added to that ledger or its - * corresponding state is not accessed for more than evictionPeriod - * - * @throws Exception - */ - @Test - public void testEntryLogManagerExpiryRemoval() throws Exception { - int evictionPeriod = 1; - - ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); - conf.setEntryLogFilePreAllocationEnabled(false); - conf.setEntryLogPerLedgerEnabled(true); - conf.setLedgerDirNames(createAndGetLedgerDirs(2)); - conf.setEntrylogMapAccessExpiryTimeInSeconds(evictionPeriod); - LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - - EntryLogger entryLogger = new EntryLogger(conf, ledgerDirsManager); - EntryLogManagerForEntryLogPerLedger entryLogManager = - (EntryLogManagerForEntryLogPerLedger) entryLogger.getEntryLogManager(); - - long ledgerId = 0L; - - BufferedLogChannel logChannel = createDummyBufferedLogChannel(entryLogger, 0, conf); - entryLogManager.setCurrentLogForLedgerAndAddToRotate(ledgerId, logChannel); - - BufferedLogChannel currentLogForLedger = entryLogManager.getCurrentLogForLedger(ledgerId); - assertEquals("LogChannel for ledger " + ledgerId + " should match", logChannel, currentLogForLedger); - - Thread.sleep(evictionPeriod * 1000 + 100); - entryLogManager.doEntryLogMapCleanup(); - - /* - * since for more than evictionPeriod, that ledger is not accessed and cache is cleaned up, mapping for that - * ledger should not be available anymore - */ - currentLogForLedger = entryLogManager.getCurrentLogForLedger(ledgerId); - assertEquals("LogChannel for ledger " + ledgerId + " should be null", null, currentLogForLedger); - Assert.assertEquals("Number of current active EntryLogs ", 0, entryLogManager.getCopyOfCurrentLogs().size()); - Assert.assertEquals("Number of rotated EntryLogs ", 1, entryLogManager.getRotatedLogChannels().size()); - Assert.assertTrue("CopyOfRotatedLogChannels should contain the created LogChannel", - entryLogManager.getRotatedLogChannels().contains(logChannel)); - - Assert.assertTrue("since mapentry must have been evicted, it should be null", - (entryLogManager.getCacheAsMap().get(ledgerId) == null) - || (entryLogManager.getCacheAsMap().get(ledgerId).getEntryLogWithDirInfo() == null)); - } - - /* - * tests if the maximum size of cache (maximumNumberOfActiveEntryLogs) is - * honored in EntryLogManagerForEntryLogPerLedger's cache eviction policy. - */ - @Test - public void testCacheMaximumSizeEvictionPolicy() throws Exception { - final int cacheMaximumSize = 20; - - ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); - conf.setEntryLogFilePreAllocationEnabled(true); - conf.setEntryLogPerLedgerEnabled(true); - conf.setLedgerDirNames(createAndGetLedgerDirs(1)); - conf.setMaximumNumberOfActiveEntryLogs(cacheMaximumSize); - LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - - EntryLogger entryLogger = new EntryLogger(conf, ledgerDirsManager); - EntryLogManagerForEntryLogPerLedger entryLogManager = - (EntryLogManagerForEntryLogPerLedger) entryLogger.getEntryLogManager(); - - for (int i = 0; i < cacheMaximumSize + 10; i++) { - entryLogManager.createNewLog(i); - int cacheSize = entryLogManager.getCacheAsMap().size(); - Assert.assertTrue("Cache maximum size is expected to be less than " + cacheMaximumSize - + " but current cacheSize is " + cacheSize, cacheSize <= cacheMaximumSize); - } - } - - @Test - public void testLongLedgerIdsWithEntryLogPerLedger() throws Exception { - ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); - conf.setEntryLogFilePreAllocationEnabled(true); - conf.setEntryLogPerLedgerEnabled(true); - conf.setLedgerDirNames(createAndGetLedgerDirs(1)); - conf.setLedgerStorageClass(InterleavedLedgerStorage.class.getName()); - - LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - - EntryLogger entryLogger = new EntryLogger(conf, ledgerDirsManager); - EntryLogManagerForEntryLogPerLedger entryLogManager = (EntryLogManagerForEntryLogPerLedger) entryLogger - .getEntryLogManager(); - - int numOfLedgers = 5; - int numOfEntries = 4; - long[][] pos = new long[numOfLedgers][numOfEntries]; - for (int i = 0; i < numOfLedgers; i++) { - long ledgerId = Long.MAX_VALUE - i; - entryLogManager.createNewLog(ledgerId); - for (int entryId = 0; entryId < numOfEntries; entryId++) { - pos[i][entryId] = entryLogger.addEntry(ledgerId, generateEntry(ledgerId, entryId).nioBuffer()); - } - } - /* - * do checkpoint to make sure entrylog files are persisted - */ - entryLogger.checkpoint(); - - for (int i = 0; i < numOfLedgers; i++) { - long ledgerId = Long.MAX_VALUE - i; - for (int entryId = 0; entryId < numOfEntries; entryId++) { - String expectedValue = generateDataString(ledgerId, entryId); - ByteBuf buf = entryLogger.readEntry(ledgerId, entryId, pos[i][entryId]); - long readLedgerId = buf.readLong(); - long readEntryId = buf.readLong(); - byte[] readData = new byte[buf.readableBytes()]; - buf.readBytes(readData); - assertEquals("LedgerId ", ledgerId, readLedgerId); - assertEquals("EntryId ", entryId, readEntryId); - assertEquals("Entry Data ", expectedValue, new String(readData)); - } - } - } - - /* - * when entrylog for ledger is removed from ledgerIdEntryLogMap, then - * ledgermap should be appended to that entrylog, before moving that - * entrylog to rotatedlogchannels. - */ - @Test - public void testAppendLedgersMapOnCacheRemoval() throws Exception { - final int cacheMaximumSize = 5; - - ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); - conf.setEntryLogFilePreAllocationEnabled(true); - conf.setEntryLogPerLedgerEnabled(true); - conf.setLedgerDirNames(createAndGetLedgerDirs(1)); - conf.setMaximumNumberOfActiveEntryLogs(cacheMaximumSize); - LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - - EntryLogger entryLogger = new EntryLogger(conf, ledgerDirsManager); - EntryLogManagerForEntryLogPerLedger entryLogManager = (EntryLogManagerForEntryLogPerLedger) entryLogger - .getEntryLogManager(); - - long ledgerId = 0L; - entryLogManager.createNewLog(ledgerId); - int entrySize = 200; - int numOfEntries = 4; - for (int i = 0; i < numOfEntries; i++) { - entryLogger.addEntry(ledgerId, generateEntry(ledgerId, i, entrySize)); - } - - BufferedLogChannel logChannelForledger = entryLogManager.getCurrentLogForLedger(ledgerId); - long logIdOfLedger = logChannelForledger.getLogId(); - /* - * do checkpoint to make sure entrylog files are persisted - */ - entryLogger.checkpoint(); - - try { - entryLogger.extractEntryLogMetadataFromIndex(logIdOfLedger); - } catch (IOException ie) { - // expected because appendLedgersMap wouldn't have been called - } - - /* - * create entrylogs for more ledgers, so that ledgerIdEntryLogMap would - * reach its limit and remove the oldest entrylog. - */ - for (int i = 1; i <= cacheMaximumSize; i++) { - entryLogManager.createNewLog(i); - } - /* - * do checkpoint to make sure entrylog files are persisted - */ - entryLogger.checkpoint(); - - EntryLogMetadata entryLogMetadata = entryLogger.extractEntryLogMetadataFromIndex(logIdOfLedger); - ConcurrentLongLongHashMap ledgersMap = entryLogMetadata.getLedgersMap(); - Assert.assertEquals("There should be only one entry in entryLogMetadata", 1, ledgersMap.size()); - Assert.assertTrue("Usage should be 1", Double.compare(1.0, entryLogMetadata.getUsage()) == 0); - Assert.assertEquals("Total size of entries", (entrySize + 4) * numOfEntries, ledgersMap.get(ledgerId)); - } - - /** - * test EntryLogManager.EntryLogManagerForEntryLogPerLedger doesn't removes - * the ledger from its cache map if ledger's corresponding state is accessed - * within the evictionPeriod. - * - * @throws Exception - */ - @Test - public void testExpiryRemovalByAccessingOnAnotherThread() throws Exception { - int evictionPeriod = 1; - ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); - conf.setEntryLogFilePreAllocationEnabled(false); - conf.setEntryLogPerLedgerEnabled(true); - conf.setLedgerDirNames(createAndGetLedgerDirs(2)); - conf.setEntrylogMapAccessExpiryTimeInSeconds(evictionPeriod); - LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - - EntryLogger entryLogger = new EntryLogger(conf, ledgerDirsManager); - EntryLogManagerForEntryLogPerLedger entryLogManager = - (EntryLogManagerForEntryLogPerLedger) entryLogger.getEntryLogManager(); - - long ledgerId = 0L; - - BufferedLogChannel newLogChannel = createDummyBufferedLogChannel(entryLogger, 1, conf); - entryLogManager.setCurrentLogForLedgerAndAddToRotate(ledgerId, newLogChannel); - - Thread t = new Thread() { - public void run() { - try { - Thread.sleep((evictionPeriod * 1000) / 2); - entryLogManager.getCurrentLogForLedger(ledgerId); - } catch (InterruptedException | IOException e) { - } - } - }; - - t.start(); - Thread.sleep(evictionPeriod * 1000 + 100); - entryLogManager.doEntryLogMapCleanup(); - - /* - * in this scenario, that ledger is accessed by other thread during - * eviction period time, so it should not be evicted. - */ - BufferedLogChannel currentLogForLedger = entryLogManager.getCurrentLogForLedger(ledgerId); - assertEquals("LogChannel for ledger " + ledgerId, newLogChannel, currentLogForLedger); - Assert.assertEquals("Number of current active EntryLogs ", 1, entryLogManager.getCopyOfCurrentLogs().size()); - Assert.assertEquals("Number of rotated EntryLogs ", 0, entryLogManager.getRotatedLogChannels().size()); - } - - /** - * test EntryLogManager.EntryLogManagerForEntryLogPerLedger removes the - * ledger from its cache map if entry is not added to that ledger or its - * corresponding state is not accessed for more than evictionPeriod. In this - * testcase we try to call unrelated methods or access state of other - * ledgers within the eviction period. - * - * @throws Exception - */ - @Test - public void testExpiryRemovalByAccessingNonCacheRelatedMethods() throws Exception { - int evictionPeriod = 1; - - ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); - conf.setEntryLogFilePreAllocationEnabled(false); - conf.setEntryLogPerLedgerEnabled(true); - conf.setLedgerDirNames(createAndGetLedgerDirs(2)); - conf.setEntrylogMapAccessExpiryTimeInSeconds(evictionPeriod); - LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - - EntryLogger entryLogger = new EntryLogger(conf, ledgerDirsManager); - EntryLogManagerForEntryLogPerLedger entryLogManager = - (EntryLogManagerForEntryLogPerLedger) entryLogger.getEntryLogManager(); - - long ledgerId = 0L; - - BufferedLogChannel newLogChannel = createDummyBufferedLogChannel(entryLogger, 1, conf); - entryLogManager.setCurrentLogForLedgerAndAddToRotate(ledgerId, newLogChannel); - - AtomicBoolean exceptionOccured = new AtomicBoolean(false); - Thread t = new Thread() { - public void run() { - try { - Thread.sleep(500); - /* - * any of the following operations should not access entry - * of 'ledgerId' in the cache - */ - entryLogManager.getCopyOfCurrentLogs(); - entryLogManager.getRotatedLogChannels(); - entryLogManager.getCurrentLogIfPresent(newLogChannel.getLogId()); - entryLogManager.getDirForNextEntryLog(ledgerDirsManager.getWritableLedgerDirs()); - long newLedgerId = 100; - BufferedLogChannel logChannelForNewLedger = - createDummyBufferedLogChannel(entryLogger, newLedgerId, conf); - entryLogManager.setCurrentLogForLedgerAndAddToRotate(newLedgerId, logChannelForNewLedger); - entryLogManager.getCurrentLogIfPresent(newLedgerId); - } catch (Exception e) { - LOG.error("Got Exception in thread", e); - exceptionOccured.set(true); - } - } - }; - - t.start(); - Thread.sleep(evictionPeriod * 1000 + 100); - entryLogManager.doEntryLogMapCleanup(); - Assert.assertFalse("Exception occured in thread, which is not expected", exceptionOccured.get()); - - /* - * since for more than evictionPeriod, that ledger is not accessed and cache is cleaned up, mapping for that - * ledger should not be available anymore - */ - BufferedLogChannel currentLogForLedger = entryLogManager.getCurrentLogForLedger(ledgerId); - assertEquals("LogChannel for ledger " + ledgerId + " should be null", null, currentLogForLedger); - // expected number of current active entryLogs is 1 since we created entrylog for 'newLedgerId' - Assert.assertEquals("Number of current active EntryLogs ", 1, entryLogManager.getCopyOfCurrentLogs().size()); - Assert.assertEquals("Number of rotated EntryLogs ", 1, entryLogManager.getRotatedLogChannels().size()); - Assert.assertTrue("CopyOfRotatedLogChannels should contain the created LogChannel", - entryLogManager.getRotatedLogChannels().contains(newLogChannel)); - - Assert.assertTrue("since mapentry must have been evicted, it should be null", - (entryLogManager.getCacheAsMap().get(ledgerId) == null) - || (entryLogManager.getCacheAsMap().get(ledgerId).getEntryLogWithDirInfo() == null)); - } - - /* - * testing EntryLogger functionality (addEntry/createNewLog/flush) and EntryLogManager with entryLogPerLedger - * enabled - */ - @Test - public void testEntryLogManagerForEntryLogPerLedger() throws Exception { - ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); - conf.setEntryLogPerLedgerEnabled(true); - conf.setFlushIntervalInBytes(10000000); - conf.setLedgerDirNames(createAndGetLedgerDirs(2)); - LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - EntryLogger entryLogger = new EntryLogger(conf, ledgerDirsManager); - EntryLogManagerBase entryLogManager = (EntryLogManagerBase) entryLogger.getEntryLogManager(); - Assert.assertEquals("EntryLogManager class type", EntryLogManagerForEntryLogPerLedger.class, - entryLogManager.getClass()); - - int numOfActiveLedgers = 20; - int numEntries = 5; - - for (int j = 0; j < numEntries; j++) { - for (long i = 0; i < numOfActiveLedgers; i++) { - entryLogger.addEntry(i, generateEntry(i, j)); - } - } - - for (long i = 0; i < numOfActiveLedgers; i++) { - BufferedLogChannel logChannel = entryLogManager.getCurrentLogForLedger(i); - Assert.assertTrue("unpersistedBytes should be greater than LOGFILE_HEADER_SIZE", - logChannel.getUnpersistedBytes() > EntryLogger.LOGFILE_HEADER_SIZE); - } - - for (long i = 0; i < numOfActiveLedgers; i++) { - entryLogManager.createNewLog(i); - } - - /* - * since we created new entrylog for all the activeLedgers, entrylogs of all the ledgers - * should be rotated and hence the size of copyOfRotatedLogChannels should be numOfActiveLedgers - */ - List rotatedLogs = entryLogManager.getRotatedLogChannels(); - Assert.assertEquals("Number of rotated entrylogs", numOfActiveLedgers, rotatedLogs.size()); - - /* - * Since newlog is created for all slots, so they are moved to rotated logs and hence unpersistedBytes of all - * the slots should be just EntryLogger.LOGFILE_HEADER_SIZE - * - */ - for (long i = 0; i < numOfActiveLedgers; i++) { - BufferedLogChannel logChannel = entryLogManager.getCurrentLogForLedger(i); - Assert.assertEquals("unpersistedBytes should be LOGFILE_HEADER_SIZE", EntryLogger.LOGFILE_HEADER_SIZE, - logChannel.getUnpersistedBytes()); - } - - for (int j = numEntries; j < 2 * numEntries; j++) { - for (long i = 0; i < numOfActiveLedgers; i++) { - entryLogger.addEntry(i, generateEntry(i, j)); - } - } - - for (long i = 0; i < numOfActiveLedgers; i++) { - BufferedLogChannel logChannel = entryLogManager.getCurrentLogForLedger(i); - Assert.assertTrue("unpersistedBytes should be greater than LOGFILE_HEADER_SIZE", - logChannel.getUnpersistedBytes() > EntryLogger.LOGFILE_HEADER_SIZE); - } - - Assert.assertEquals("LeastUnflushedloggerID", 0, entryLogger.getLeastUnflushedLogId()); - - /* - * here flush is called so all the rotatedLogChannels should be file closed and there shouldn't be any - * rotatedlogchannel and also leastUnflushedLogId should be advanced to numOfActiveLedgers - */ - entryLogger.flush(); - Assert.assertEquals("Number of rotated entrylogs", 0, entryLogManager.getRotatedLogChannels().size()); - Assert.assertEquals("LeastUnflushedloggerID", numOfActiveLedgers, entryLogger.getLeastUnflushedLogId()); - - /* - * after flush (flushCurrentLogs) unpersistedBytes should be 0. - */ - for (long i = 0; i < numOfActiveLedgers; i++) { - BufferedLogChannel logChannel = entryLogManager.getCurrentLogForLedger(i); - Assert.assertEquals("unpersistedBytes should be 0", 0L, logChannel.getUnpersistedBytes()); - } - } - - /* - * with entryLogPerLedger enabled, create multiple entrylogs, add entries of ledgers and read them before and after - * flush - */ - @Test - public void testReadAddCallsOfMultipleEntryLogs() throws Exception { - ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); - conf.setEntryLogPerLedgerEnabled(true); - conf.setLedgerDirNames(createAndGetLedgerDirs(2)); - // pre allocation enabled - conf.setEntryLogFilePreAllocationEnabled(true); - LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - - EntryLogger entryLogger = new EntryLogger(conf, ledgerDirsManager); - EntryLogManagerBase entryLogManagerBase = ((EntryLogManagerBase) entryLogger.getEntryLogManager()); - - int numOfActiveLedgers = 10; - int numEntries = 10; - long[][] positions = new long[numOfActiveLedgers][]; - for (int i = 0; i < numOfActiveLedgers; i++) { - positions[i] = new long[numEntries]; - } - - /* - * addentries to the ledgers - */ - for (int j = 0; j < numEntries; j++) { - for (int i = 0; i < numOfActiveLedgers; i++) { - positions[i][j] = entryLogger.addEntry((long) i, generateEntry(i, j)); - long entryLogId = (positions[i][j] >> 32L); - /** - * - * Though EntryLogFilePreAllocation is enabled, Since things are not done concurrently here, - * entryLogIds will be sequential. - */ - Assert.assertEquals("EntryLogId for ledger: " + i, i, entryLogId); - } - } - - /* - * read the entries which are written - */ - for (int j = 0; j < numEntries; j++) { - for (int i = 0; i < numOfActiveLedgers; i++) { - String expectedValue = "ledger-" + i + "-" + j; - ByteBuf buf = entryLogger.readEntry(i, j, positions[i][j]); - long ledgerId = buf.readLong(); - long entryId = buf.readLong(); - byte[] data = new byte[buf.readableBytes()]; - buf.readBytes(data); - assertEquals("LedgerId ", i, ledgerId); - assertEquals("EntryId ", j, entryId); - assertEquals("Entry Data ", expectedValue, new String(data)); - } - } - - for (long i = 0; i < numOfActiveLedgers; i++) { - entryLogManagerBase.createNewLog(i); - } - - entryLogManagerBase.flushRotatedLogs(); - - // reading after flush of rotatedlogs - for (int j = 0; j < numEntries; j++) { - for (int i = 0; i < numOfActiveLedgers; i++) { - String expectedValue = "ledger-" + i + "-" + j; - ByteBuf buf = entryLogger.readEntry(i, j, positions[i][j]); - long ledgerId = buf.readLong(); - long entryId = buf.readLong(); - byte[] data = new byte[buf.readableBytes()]; - buf.readBytes(data); - assertEquals("LedgerId ", i, ledgerId); - assertEquals("EntryId ", j, entryId); - assertEquals("Entry Data ", expectedValue, new String(data)); - } - } - } - - class ReadTask implements Callable { - long ledgerId; - int entryId; - long position; - EntryLogger entryLogger; - - ReadTask(long ledgerId, int entryId, long position, EntryLogger entryLogger) { - this.ledgerId = ledgerId; - this.entryId = entryId; - this.position = position; - this.entryLogger = entryLogger; - } - - @Override - public Boolean call() throws IOException { - try { - ByteBuf expectedByteBuf = generateEntry(ledgerId, entryId); - ByteBuf actualByteBuf = entryLogger.readEntry(ledgerId, entryId, position); - if (!expectedByteBuf.equals(actualByteBuf)) { - LOG.error("Expected Entry: {} Actual Entry: {}", expectedByteBuf.toString(Charset.defaultCharset()), - actualByteBuf.toString(Charset.defaultCharset())); - throw new IOException("Expected Entry: " + expectedByteBuf.toString(Charset.defaultCharset()) - + " Actual Entry: " + actualByteBuf.toString(Charset.defaultCharset())); - } - } catch (IOException e) { - LOG.error("Got Exception for GetEntry call. LedgerId: " + ledgerId + " entryId: " + entryId, e); - throw new IOException("Got Exception for GetEntry call. LedgerId: " + ledgerId + " entryId: " + entryId, - e); - } - return true; - } - } - - /* - * test concurrent read operations of entries from flushed rotatedlogs with entryLogPerLedgerEnabled - */ - @Test - public void testConcurrentReadCallsAfterEntryLogsAreRotated() throws Exception { - ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); - conf.setEntryLogPerLedgerEnabled(true); - conf.setFlushIntervalInBytes(1000 * 25); - conf.setLedgerDirNames(createAndGetLedgerDirs(3)); - LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - - EntryLogger entryLogger = new EntryLogger(conf, ledgerDirsManager); - int numOfActiveLedgers = 15; - int numEntries = 2000; - final AtomicLongArray positions = new AtomicLongArray(numOfActiveLedgers * numEntries); - EntryLogManagerForEntryLogPerLedger entryLogManager = (EntryLogManagerForEntryLogPerLedger) - entryLogger.getEntryLogManager(); - - for (int i = 0; i < numOfActiveLedgers; i++) { - for (int j = 0; j < numEntries; j++) { - positions.set(i * numEntries + j, entryLogger.addEntry((long) i, generateEntry(i, j))); - long entryLogId = (positions.get(i * numEntries + j) >> 32L); - /** - * - * Though EntryLogFilePreAllocation is enabled, Since things are not done concurrently here, entryLogIds - * will be sequential. - */ - Assert.assertEquals("EntryLogId for ledger: " + i, i, entryLogId); - } - } - - for (long i = 0; i < numOfActiveLedgers; i++) { - entryLogManager.createNewLog(i); - } - entryLogManager.flushRotatedLogs(); - - // reading after flush of rotatedlogs - ArrayList readTasks = new ArrayList(); - for (int i = 0; i < numOfActiveLedgers; i++) { - for (int j = 0; j < numEntries; j++) { - readTasks.add(new ReadTask(i, j, positions.get(i * numEntries + j), entryLogger)); - } - } - - ExecutorService executor = Executors.newFixedThreadPool(40); - executor.invokeAll(readTasks).forEach((future) -> { - try { - future.get(); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - LOG.error("Read/Flush task failed because of InterruptedException", ie); - Assert.fail("Read/Flush task interrupted"); - } catch (Exception ex) { - LOG.error("Read/Flush task failed because of exception", ex); - Assert.fail("Read/Flush task failed " + ex.getMessage()); - } - }); - } - - /** - * testcase to validate when ledgerdirs become full and eventually all - * ledgerdirs become full. Later a ledgerdir becomes writable. - */ - @Test - public void testEntryLoggerAddEntryWhenLedgerDirsAreFull() throws Exception { - int numberOfLedgerDirs = 3; - List ledgerDirs = new ArrayList(); - String[] ledgerDirsPath = new String[numberOfLedgerDirs]; - List curDirs = new ArrayList(); - - File ledgerDir; - File curDir; - for (int i = 0; i < numberOfLedgerDirs; i++) { - ledgerDir = createTempDir("bkTest", ".dir").getAbsoluteFile(); - curDir = Bookie.getCurrentDirectory(ledgerDir); - Bookie.checkDirectoryStructure(curDir); - ledgerDirs.add(ledgerDir); - ledgerDirsPath[i] = ledgerDir.getPath(); - curDirs.add(curDir); - } - - ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); - // pre-allocation is disabled - conf.setEntryLogFilePreAllocationEnabled(false); - conf.setEntryLogPerLedgerEnabled(true); - conf.setLedgerDirNames(ledgerDirsPath); - - LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - - EntryLogger entryLogger = new EntryLogger(conf, ledgerDirsManager); - EntryLogManagerForEntryLogPerLedger entryLogManager = (EntryLogManagerForEntryLogPerLedger) - entryLogger.getEntryLogManager(); - Assert.assertEquals("EntryLogManager class type", EntryLogManagerForEntryLogPerLedger.class, - entryLogManager.getClass()); - - entryLogger.addEntry(0L, generateEntry(0, 1)); - entryLogger.addEntry(1L, generateEntry(1, 1)); - entryLogger.addEntry(2L, generateEntry(2, 1)); - - File ledgerDirForLedger0 = entryLogManager.getCurrentLogForLedger(0L).getLogFile().getParentFile(); - File ledgerDirForLedger1 = entryLogManager.getCurrentLogForLedger(1L).getLogFile().getParentFile(); - File ledgerDirForLedger2 = entryLogManager.getCurrentLogForLedger(2L).getLogFile().getParentFile(); - - Set ledgerDirsSet = new HashSet(); - ledgerDirsSet.add(ledgerDirForLedger0); - ledgerDirsSet.add(ledgerDirForLedger1); - ledgerDirsSet.add(ledgerDirForLedger2); - - /* - * since there are 3 ledgerdirs, entrylogs for all the 3 ledgers should be in different ledgerdirs. - */ - Assert.assertEquals("Current active LedgerDirs size", 3, ledgerDirs.size()); - Assert.assertEquals("Number of rotated logchannels", 0, entryLogManager.getRotatedLogChannels().size()); - - /* - * ledgerDirForLedger0 is added to filledDirs, for ledger0 new entrylog should not be created in - * ledgerDirForLedger0 - */ - ledgerDirsManager.addToFilledDirs(ledgerDirForLedger0); - addEntryAndValidateFolders(entryLogger, entryLogManager, 2, ledgerDirForLedger0, false, ledgerDirForLedger1, - ledgerDirForLedger2); - Assert.assertEquals("Number of rotated logchannels", 1, entryLogManager.getRotatedLogChannels().size()); - - /* - * ledgerDirForLedger1 is also added to filledDirs, so for all the ledgers new entryLogs should be in - * ledgerDirForLedger2 - */ - ledgerDirsManager.addToFilledDirs(ledgerDirForLedger1); - addEntryAndValidateFolders(entryLogger, entryLogManager, 3, ledgerDirForLedger2, true, ledgerDirForLedger2, - ledgerDirForLedger2); - Assert.assertTrue("Number of rotated logchannels", (2 <= entryLogManager.getRotatedLogChannels().size()) - && (entryLogManager.getRotatedLogChannels().size() <= 3)); - int numOfRotatedLogChannels = entryLogManager.getRotatedLogChannels().size(); - - /* - * since ledgerDirForLedger2 is added to filleddirs, all the dirs are full. If all the dirs are full then it - * will continue to use current entrylogs for new entries instead of creating new one. So for all the ledgers - * ledgerdirs should be same as before - ledgerDirForLedger2 - */ - ledgerDirsManager.addToFilledDirs(ledgerDirForLedger2); - addEntryAndValidateFolders(entryLogger, entryLogManager, 4, ledgerDirForLedger2, true, ledgerDirForLedger2, - ledgerDirForLedger2); - Assert.assertEquals("Number of rotated logchannels", numOfRotatedLogChannels, - entryLogManager.getRotatedLogChannels().size()); - - /* - * ledgerDirForLedger1 is added back to writableDirs, so new entrylog for all the ledgers should be created in - * ledgerDirForLedger1 - */ - ledgerDirsManager.addToWritableDirs(ledgerDirForLedger1, true); - addEntryAndValidateFolders(entryLogger, entryLogManager, 4, ledgerDirForLedger1, true, ledgerDirForLedger1, - ledgerDirForLedger1); - Assert.assertEquals("Number of rotated logchannels", numOfRotatedLogChannels + 3, - entryLogManager.getRotatedLogChannels().size()); - } - - /* - * in this method we add an entry and validate the ledgerdir of the - * currentLogForLedger against the provided expected ledgerDirs. - */ - void addEntryAndValidateFolders(EntryLogger entryLogger, EntryLogManagerBase entryLogManager, int entryId, - File expectedDirForLedger0, boolean equalsForLedger0, File expectedDirForLedger1, - File expectedDirForLedger2) throws IOException { - entryLogger.addEntry(0L, generateEntry(0, entryId)); - entryLogger.addEntry(1L, generateEntry(1, entryId)); - entryLogger.addEntry(2L, generateEntry(2, entryId)); - - if (equalsForLedger0) { - Assert.assertEquals("LedgerDir for ledger 0 after adding entry " + entryId, expectedDirForLedger0, - entryLogManager.getCurrentLogForLedger(0L).getLogFile().getParentFile()); - } else { - Assert.assertNotEquals("LedgerDir for ledger 0 after adding entry " + entryId, expectedDirForLedger0, - entryLogManager.getCurrentLogForLedger(0L).getLogFile().getParentFile()); - } - Assert.assertEquals("LedgerDir for ledger 1 after adding entry " + entryId, expectedDirForLedger1, - entryLogManager.getCurrentLogForLedger(1L).getLogFile().getParentFile()); - Assert.assertEquals("LedgerDir for ledger 2 after adding entry " + entryId, expectedDirForLedger2, - entryLogManager.getCurrentLogForLedger(2L).getLogFile().getParentFile()); - } - - /* - * entries added using entrylogger with entryLogPerLedger enabled and the same entries are read using entrylogger - * with entryLogPerLedger disabled - */ - @Test - public void testSwappingEntryLogManagerFromEntryLogPerLedgerToSingle() throws Exception { - testSwappingEntryLogManager(true, false); - } - - /* - * entries added using entrylogger with entryLogPerLedger disabled and the same entries are read using entrylogger - * with entryLogPerLedger enabled - */ - @Test - public void testSwappingEntryLogManagerFromSingleToEntryLogPerLedger() throws Exception { - testSwappingEntryLogManager(false, true); - } - - public void testSwappingEntryLogManager(boolean initialEntryLogPerLedgerEnabled, - boolean laterEntryLogPerLedgerEnabled) throws Exception { - ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); - conf.setEntryLogPerLedgerEnabled(initialEntryLogPerLedgerEnabled); - conf.setLedgerDirNames(createAndGetLedgerDirs(2)); - // pre allocation enabled - conf.setEntryLogFilePreAllocationEnabled(true); - LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - - EntryLogger entryLogger = new EntryLogger(conf, ledgerDirsManager); - EntryLogManagerBase entryLogManager = (EntryLogManagerBase) entryLogger.getEntryLogManager(); - Assert.assertEquals( - "EntryLogManager class type", initialEntryLogPerLedgerEnabled - ? EntryLogManagerForEntryLogPerLedger.class : EntryLogManagerForSingleEntryLog.class, - entryLogManager.getClass()); - - int numOfActiveLedgers = 10; - int numEntries = 10; - long[][] positions = new long[numOfActiveLedgers][]; - for (int i = 0; i < numOfActiveLedgers; i++) { - positions[i] = new long[numEntries]; - } - - /* - * addentries to the ledgers - */ - for (int j = 0; j < numEntries; j++) { - for (int i = 0; i < numOfActiveLedgers; i++) { - positions[i][j] = entryLogger.addEntry((long) i, generateEntry(i, j)); - long entryLogId = (positions[i][j] >> 32L); - if (initialEntryLogPerLedgerEnabled) { - Assert.assertEquals("EntryLogId for ledger: " + i, i, entryLogId); - } else { - Assert.assertEquals("EntryLogId for ledger: " + i, 0, entryLogId); - } - } - } - - for (long i = 0; i < numOfActiveLedgers; i++) { - entryLogManager.createNewLog(i); - } - - /** - * since new entrylog is created for all the ledgers, the previous - * entrylogs must be rotated and with the following flushRotatedLogs - * call they should be forcewritten and file should be closed. - */ - entryLogManager.flushRotatedLogs(); - - /* - * new entrylogger and entryLogManager are created with - * 'laterEntryLogPerLedgerEnabled' conf - */ - conf.setEntryLogPerLedgerEnabled(laterEntryLogPerLedgerEnabled); - LedgerDirsManager newLedgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - EntryLogger newEntryLogger = new EntryLogger(conf, newLedgerDirsManager); - EntryLogManager newEntryLogManager = newEntryLogger.getEntryLogManager(); - Assert.assertEquals("EntryLogManager class type", - laterEntryLogPerLedgerEnabled ? EntryLogManagerForEntryLogPerLedger.class - : EntryLogManagerForSingleEntryLog.class, - newEntryLogManager.getClass()); - - /* - * read the entries (which are written with previous entrylogger) with - * new entrylogger - */ - for (int j = 0; j < numEntries; j++) { - for (int i = 0; i < numOfActiveLedgers; i++) { - String expectedValue = "ledger-" + i + "-" + j; - ByteBuf buf = newEntryLogger.readEntry(i, j, positions[i][j]); - long ledgerId = buf.readLong(); - long entryId = buf.readLong(); - byte[] data = new byte[buf.readableBytes()]; - buf.readBytes(data); - assertEquals("LedgerId ", i, ledgerId); - assertEquals("EntryId ", j, entryId); - assertEquals("Entry Data ", expectedValue, new String(data)); - } - } - } - -} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/EntryMemTableTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/EntryMemTableTest.java new file mode 100644 index 00000000000..f2dbf643831 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/EntryMemTableTest.java @@ -0,0 +1,472 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.bookie; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import io.netty.buffer.ByteBuf; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.PrimitiveIterator.OfLong; +import java.util.Random; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Consumer; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.Bookie.NoLedgerException; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +/** + * Test the EntryMemTable class. + */ +@Slf4j +@RunWith(Parameterized.class) +public class EntryMemTableTest implements CacheCallback, SkipListFlusher, CheckpointSource { + + private Class entryMemTableClass; + private EntryMemTable memTable; + private final Random random = new Random(); + private TestCheckPoint curCheckpoint = new TestCheckPoint(0, 0); + + @Parameters + public static Collection memTableClass() { + return Arrays.asList(new Object[][] { { EntryMemTable.class }, { EntryMemTableWithParallelFlusher.class } }); + } + + public EntryMemTableTest(Class entryMemTableClass) { + this.entryMemTableClass = entryMemTableClass; + } + + @Override + public Checkpoint newCheckpoint() { + return curCheckpoint; + } + + @Override + public void checkpointComplete(Checkpoint checkpoint, boolean compact) + throws IOException { + } + + @Before + public void setUp() throws Exception { + if (entryMemTableClass.equals(EntryMemTableWithParallelFlusher.class)) { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + this.memTable = new EntryMemTableWithParallelFlusher(conf, this, NullStatsLogger.INSTANCE); + } else { + this.memTable = new EntryMemTable(TestBKConfiguration.newServerConfiguration(), this, + NullStatsLogger.INSTANCE); + } + } + + @After + public void cleanup() throws Exception{ + this.memTable.close(); + } + + @Test + public void testLogMark() throws IOException { + LogMark mark = new LogMark(); + assertTrue(mark.compare(new LogMark()) == 0); + assertTrue(mark.compare(LogMark.MAX_VALUE) < 0); + mark.setLogMark(3, 11); + byte[] data = new byte[16]; + ByteBuffer buf = ByteBuffer.wrap(data); + mark.writeLogMark(buf); + buf.flip(); + LogMark mark1 = new LogMark(9, 13); + assertTrue(mark1.compare(mark) > 0); + mark1.readLogMark(buf); + assertTrue(mark1.compare(mark) == 0); + } + + /** + * Basic put/get. + * @throws IOException + * */ + @Test + public void testBasicOps() throws IOException { + long ledgerId = 1; + long entryId = 1; + byte[] data = new byte[10]; + random.nextBytes(data); + ByteBuffer buf = ByteBuffer.wrap(data); + memTable.addEntry(ledgerId, entryId, buf, this); + buf.rewind(); + EntryKeyValue kv = memTable.getEntry(ledgerId, entryId); + assertTrue(kv.getLedgerId() == ledgerId); + assertTrue(kv.getEntryId() == entryId); + assertTrue(kv.getValueAsByteBuffer().nioBuffer().equals(buf)); + memTable.flush(this); + } + + @Override + public void onSizeLimitReached(Checkpoint cp) throws IOException { + // No-op + } + + public void process(long ledgerId, long entryId, ByteBuf entry) + throws IOException { + // No-op + } + + /** + * Test read/write across snapshot. + * @throws IOException + */ + @Test + public void testScanAcrossSnapshot() throws IOException { + byte[] data = new byte[10]; + List keyValues = new ArrayList(); + for (long entryId = 1; entryId < 100; entryId++) { + for (long ledgerId = 1; ledgerId < 3; ledgerId++) { + random.nextBytes(data); + memTable.addEntry(ledgerId, entryId, ByteBuffer.wrap(data), this); + keyValues.add(memTable.getEntry(ledgerId, entryId)); + if (random.nextInt(16) == 0) { + memTable.snapshot(); + } + } + } + + for (EntryKeyValue kv : keyValues) { + assertTrue(memTable.getEntry(kv.getLedgerId(), kv.getEntryId()).equals(kv)); + } + memTable.flush(this, Checkpoint.MAX); + } + + private class KVFLusher implements SkipListFlusher { + final Set keyValues; + + KVFLusher(final Set keyValues) { + this.keyValues = keyValues; + } + + @Override + public void process(long ledgerId, long entryId, ByteBuf entry) throws IOException { + assertTrue(ledgerId + ":" + entryId + " is duplicate in store!", + keyValues.add(new EntryKeyValue(ledgerId, entryId, entry.array()))); + } + } + + private class NoLedgerFLusher implements SkipListFlusher { + @Override + public void process(long ledgerId, long entryId, ByteBuf entry) throws IOException { + throw new NoLedgerException(ledgerId); + } + } + + /** + * Test flush w/ logMark parameter. + * @throws IOException + */ + @Test + public void testFlushLogMark() throws IOException { + Set flushedKVs = Collections.newSetFromMap(new ConcurrentHashMap()); + KVFLusher flusher = new KVFLusher(flushedKVs); + + curCheckpoint.setCheckPoint(2, 2); + + byte[] data = new byte[10]; + long ledgerId = 100; + for (long entryId = 1; entryId < 100; entryId++) { + random.nextBytes(data); + memTable.addEntry(ledgerId, entryId, ByteBuffer.wrap(data), this); + } + + assertNull(memTable.snapshot(new TestCheckPoint(1, 1))); + assertNotNull(memTable.snapshot(new TestCheckPoint(3, 3))); + + assertTrue(0 < memTable.flush(flusher)); + assertTrue(0 == memTable.flush(flusher)); + + curCheckpoint.setCheckPoint(4, 4); + + random.nextBytes(data); + memTable.addEntry(ledgerId, 101, ByteBuffer.wrap(data), this); + assertTrue(0 == memTable.flush(flusher)); + + assertTrue(0 == memTable.flush(flusher, new TestCheckPoint(3, 3))); + assertTrue(0 < memTable.flush(flusher, new TestCheckPoint(4, 5))); + } + + /** + * Test snapshot/flush interaction. + * @throws IOException + */ + @Test + public void testFlushSnapshot() throws IOException { + HashSet keyValues = new HashSet(); + Set flushedKVs = Collections.newSetFromMap(new ConcurrentHashMap()); + KVFLusher flusher = new KVFLusher(flushedKVs); + + byte[] data = new byte[10]; + for (long entryId = 1; entryId < 100; entryId++) { + for (long ledgerId = 1; ledgerId < 100; ledgerId++) { + random.nextBytes(data); + assertTrue(ledgerId + ":" + entryId + " is duplicate in mem-table!", + memTable.addEntry(ledgerId, entryId, ByteBuffer.wrap(data), this) != 0); + assertTrue(ledgerId + ":" + entryId + " is duplicate in hash-set!", + keyValues.add(memTable.getEntry(ledgerId, entryId))); + if (random.nextInt(16) == 0) { + if (null != memTable.snapshot()) { + if (random.nextInt(2) == 0) { + memTable.flush(flusher); + } + } + } + } + } + + memTable.flush(flusher, Checkpoint.MAX); + for (EntryKeyValue kv : keyValues) { + assertTrue("kv " + kv.toString() + " was not flushed!", flushedKVs.contains(kv)); + } + } + + /** + * Test NoLedger exception/flush interaction. + * @throws IOException + */ + @Test + public void testNoLedgerException() throws IOException { + NoLedgerFLusher flusher = new NoLedgerFLusher(); + + byte[] data = new byte[10]; + for (long entryId = 1; entryId < 100; entryId++) { + for (long ledgerId = 1; ledgerId < 100; ledgerId++) { + random.nextBytes(data); + if (random.nextInt(16) == 0) { + if (null != memTable.snapshot()) { + memTable.flush(flusher); + } + } + } + } + + memTable.flush(flusher, Checkpoint.MAX); + } + + private static class TestCheckPoint implements Checkpoint { + + LogMark mark; + + public TestCheckPoint(long fid, long fpos) { + mark = new LogMark(fid, fpos); + } + + private void setCheckPoint(long fid, long fpos) { + mark.setLogMark(fid, fpos); + } + + @Override + public int compareTo(Checkpoint o) { + if (Checkpoint.MAX == o) { + return -1; + } + return mark.compare(((TestCheckPoint) o).mark); + } + + } + + @Test + public void testGetListOfEntriesOfLedger() throws IOException { + Set flushedKVs = Collections.newSetFromMap(new ConcurrentHashMap()); + KVFLusher flusher = new KVFLusher(flushedKVs); + int numofEntries = 100; + int numOfLedgers = 5; + byte[] data = new byte[10]; + for (long entryId = 1; entryId <= numofEntries; entryId++) { + for (long ledgerId = 1; ledgerId <= numOfLedgers; ledgerId++) { + random.nextBytes(data); + assertTrue(ledgerId + ":" + entryId + " is duplicate in mem-table!", + memTable.addEntry(ledgerId, entryId, ByteBuffer.wrap(data), this) != 0); + } + } + for (long ledgerId = 1; ledgerId <= numOfLedgers; ledgerId++) { + OfLong entriesItr = memTable.getListOfEntriesOfLedger((random.nextInt((int) ledgerId) + 1)); + ArrayList listOfEntries = new ArrayList(); + Consumer addMethod = listOfEntries::add; + entriesItr.forEachRemaining(addMethod); + assertEquals("Number of Entries", numofEntries, listOfEntries.size()); + for (int i = 0; i < numofEntries; i++) { + assertEquals("listOfEntries should be sorted", Long.valueOf(i + 1), listOfEntries.get(i)); + } + } + assertTrue("Snapshot is expected to be empty since snapshot is not done", memTable.snapshot.isEmpty()); + assertTrue("Take snapshot and returned checkpoint should not be empty", memTable.snapshot() != null); + assertFalse("After taking snapshot, snapshot should not be empty ", memTable.snapshot.isEmpty()); + for (long ledgerId = 1; ledgerId <= numOfLedgers; ledgerId++) { + OfLong entriesItr = memTable.getListOfEntriesOfLedger((random.nextInt((int) ledgerId) + 1)); + ArrayList listOfEntries = new ArrayList(); + Consumer addMethod = listOfEntries::add; + entriesItr.forEachRemaining(addMethod); + assertEquals("Number of Entries should be the same even after taking snapshot", numofEntries, + listOfEntries.size()); + for (int i = 0; i < numofEntries; i++) { + assertEquals("listOfEntries should be sorted", Long.valueOf(i + 1), listOfEntries.get(i)); + } + } + + memTable.flush(flusher); + for (long ledgerId = 1; ledgerId <= numOfLedgers; ledgerId++) { + OfLong entriesItr = memTable.getListOfEntriesOfLedger((random.nextInt((int) ledgerId) + 1)); + assertFalse("After flushing there shouldn't be entries in memtable", entriesItr.hasNext()); + } + } + + @Test + public void testGetListOfEntriesOfLedgerFromBothKVMapAndSnapshot() throws IOException { + int numofEntries = 100; + int newNumOfEntries = 200; + int numOfLedgers = 5; + byte[] data = new byte[10]; + for (long entryId = 1; entryId <= numofEntries; entryId++) { + for (long ledgerId = 1; ledgerId <= numOfLedgers; ledgerId++) { + random.nextBytes(data); + assertTrue(ledgerId + ":" + entryId + " is duplicate in mem-table!", + memTable.addEntry(ledgerId, entryId, ByteBuffer.wrap(data), this) != 0); + } + } + + assertTrue("Snapshot is expected to be empty since snapshot is not done", memTable.snapshot.isEmpty()); + assertTrue("Take snapshot and returned checkpoint should not be empty", memTable.snapshot() != null); + assertFalse("After taking snapshot, snapshot should not be empty ", memTable.snapshot.isEmpty()); + + for (long entryId = numofEntries + 1; entryId <= newNumOfEntries; entryId++) { + for (long ledgerId = 1; ledgerId <= numOfLedgers; ledgerId++) { + random.nextBytes(data); + assertTrue(ledgerId + ":" + entryId + " is duplicate in mem-table!", + memTable.addEntry(ledgerId, entryId, ByteBuffer.wrap(data), this) != 0); + } + } + + for (long ledgerId = 1; ledgerId <= numOfLedgers; ledgerId++) { + OfLong entriesItr = memTable.getListOfEntriesOfLedger((random.nextInt((int) ledgerId) + 1)); + ArrayList listOfEntries = new ArrayList(); + Consumer addMethod = listOfEntries::add; + entriesItr.forEachRemaining(addMethod); + assertEquals("Number of Entries should be the same", newNumOfEntries, listOfEntries.size()); + for (int i = 0; i < newNumOfEntries; i++) { + assertEquals("listOfEntries should be sorted", Long.valueOf(i + 1), listOfEntries.get(i)); + } + } + } + + @Test + public void testGetListOfEntriesOfLedgerWhileAddingConcurrently() throws IOException, InterruptedException { + final int numofEntries = 100; + final int newNumOfEntries = 200; + final int concurrentAddOfEntries = 300; + long ledgerId = 5; + byte[] data = new byte[10]; + for (long entryId = 1; entryId <= numofEntries; entryId++) { + random.nextBytes(data); + assertTrue(ledgerId + ":" + entryId + " is duplicate in mem-table!", + memTable.addEntry(ledgerId, entryId, ByteBuffer.wrap(data), this) != 0); + } + + assertTrue("Snapshot is expected to be empty since snapshot is not done", memTable.snapshot.isEmpty()); + assertTrue("Take snapshot and returned checkpoint should not be empty", memTable.snapshot() != null); + assertFalse("After taking snapshot, snapshot should not be empty ", memTable.snapshot.isEmpty()); + + for (long entryId = numofEntries + 1; entryId <= newNumOfEntries; entryId++) { + random.nextBytes(data); + assertTrue(ledgerId + ":" + entryId + " is duplicate in mem-table!", + memTable.addEntry(ledgerId, entryId, ByteBuffer.wrap(data), this) != 0); + } + + AtomicBoolean successfullyAdded = new AtomicBoolean(true); + + Thread threadToAdd = new Thread(new Runnable() { + @Override + public void run() { + try { + for (long entryId = newNumOfEntries + 1; entryId <= concurrentAddOfEntries; entryId++) { + random.nextBytes(data); + boolean thisEntryAddedSuccessfully = (memTable.addEntry(ledgerId, entryId, + ByteBuffer.wrap(data), EntryMemTableTest.this) != 0); + successfullyAdded.set(successfullyAdded.get() && thisEntryAddedSuccessfully); + Thread.sleep(10); + } + } catch (IOException e) { + log.error("Got Unexpected exception while adding entries"); + successfullyAdded.set(false); + } catch (InterruptedException e) { + log.error("Got InterruptedException while waiting"); + successfullyAdded.set(false); + } + } + }); + threadToAdd.start(); + + Thread.sleep(200); + OfLong entriesItr = memTable.getListOfEntriesOfLedger(ledgerId); + ArrayList listOfEntries = new ArrayList(); + while (entriesItr.hasNext()) { + listOfEntries.add(entriesItr.next()); + Thread.sleep(5); + } + threadToAdd.join(5000); + assertTrue("Entries should be added successfully in the spawned thread", successfullyAdded.get()); + + for (int i = 0; i < newNumOfEntries; i++) { + assertEquals("listOfEntries should be sorted", Long.valueOf(i + 1), listOfEntries.get(i)); + } + } + + @Test + public void testAddSameEntries() throws IOException { + final long ledgerId = 1; + final long entryId = 1; + final int size = 10; + final byte[] bytes = new byte[size]; + final int initialPermits = memTable.skipListSemaphore.availablePermits(); + + for (int i = 0; i < 5; i++) { + memTable.addEntry(ledgerId, entryId, ByteBuffer.wrap(bytes), this); + assertEquals(memTable.kvmap.size(), 1); + assertEquals(memTable.skipListSemaphore.availablePermits(), initialPermits - size); + } + + memTable.snapshot(Checkpoint.MAX); + memTable.flush(this); + assertEquals(memTable.kvmap.size(), 0); + assertEquals(memTable.skipListSemaphore.availablePermits(), initialPermits); + } +} + diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/FileInfoBackingCacheTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/FileInfoBackingCacheTest.java new file mode 100644 index 00000000000..e7d27ea8a80 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/FileInfoBackingCacheTest.java @@ -0,0 +1,288 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.RemovalNotification; +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import java.io.File; +import java.io.IOException; +import java.security.SecureRandom; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Random; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.LongStream; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.FileInfoBackingCache.CachedFileInfo; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +/** + * Tests for FileInfoBackingCache. + */ +@Slf4j +public class FileInfoBackingCacheTest { + final byte[] masterKey = new byte[0]; + final File baseDir; + final ThreadFactory threadFactory = new ThreadFactoryBuilder() + .setNameFormat("backing-cache-test-%d").setDaemon(true).build(); + ExecutorService executor; + + public FileInfoBackingCacheTest() throws Exception { + baseDir = File.createTempFile("foo", "bar"); + } + + @Before + public void setup() throws Exception { + Assert.assertTrue(baseDir.delete()); + Assert.assertTrue(baseDir.mkdirs()); + baseDir.deleteOnExit(); + + executor = Executors.newCachedThreadPool(threadFactory); + } + + @After + public void tearDown() throws Exception { + if (executor != null) { + executor.shutdown(); + } + } + + @Test(timeout = 30000) + public void basicTest() throws Exception { + FileInfoBackingCache cache = new FileInfoBackingCache( + (ledgerId, createIfNotFound) -> { + File f = new File(baseDir, String.valueOf(ledgerId)); + f.deleteOnExit(); + return f; + }, FileInfo.CURRENT_HEADER_VERSION); + CachedFileInfo fi = cache.loadFileInfo(1, masterKey); + Assert.assertEquals(fi.getRefCount(), 1); + CachedFileInfo fi2 = cache.loadFileInfo(2, masterKey); + Assert.assertEquals(fi2.getRefCount(), 1); + CachedFileInfo fi3 = cache.loadFileInfo(1, null); + Assert.assertEquals(fi, fi3); + Assert.assertEquals(fi3.getRefCount(), 2); + + // check that it expires correctly + fi.release(); + fi3.release(); + + Assert.assertEquals(fi.getRefCount(), FileInfoBackingCache.DEAD_REF); + CachedFileInfo fi4 = cache.loadFileInfo(1, null); + Assert.assertFalse(fi4 == fi); + Assert.assertEquals(fi.getRefCount(), FileInfoBackingCache.DEAD_REF); + Assert.assertEquals(fi4.getRefCount(), 1); + Assert.assertEquals(fi.getLf(), fi4.getLf()); + } + + @Test(expected = IOException.class, timeout = 30000) + public void testNoKey() throws Exception { + FileInfoBackingCache cache = new FileInfoBackingCache( + (ledgerId, createIfNotFound) -> { + Assert.assertFalse(createIfNotFound); + throw new Bookie.NoLedgerException(ledgerId); + }, FileInfo.CURRENT_HEADER_VERSION); + cache.loadFileInfo(1, null); + } + + /** + * Of course this can't prove they don't exist, but + * try to shake them out none the less. + */ + @Test(timeout = 30000) + public void testForDeadlocks() throws Exception { + int numRunners = 20; + int maxLedgerId = 10; + AtomicBoolean done = new AtomicBoolean(false); + + FileInfoBackingCache cache = new FileInfoBackingCache( + (ledgerId, createIfNotFound) -> { + File f = new File(baseDir, String.valueOf(ledgerId)); + f.deleteOnExit(); + return f; + }, FileInfo.CURRENT_HEADER_VERSION); + Iterable>> futures = + IntStream.range(0, numRunners).mapToObj( + (i) -> { + Callable> c = () -> { + Random r = new Random(); + List fileInfos = new ArrayList<>(); + Set allFileInfos = new HashSet<>(); + while (!done.get()) { + if (r.nextBoolean() && fileInfos.size() < 5) { // take a reference + CachedFileInfo fi = cache.loadFileInfo(r.nextInt(maxLedgerId), masterKey); + Assert.assertFalse(fi.isClosed()); + allFileInfos.add(fi); + fileInfos.add(fi); + } else { // release a reference + Collections.shuffle(fileInfos); + if (!fileInfos.isEmpty()) { + fileInfos.remove(0).release(); + } + } + } + for (CachedFileInfo fi : fileInfos) { + Assert.assertFalse(fi.isClosed()); + fi.release(); + } + return allFileInfos; + }; + return executor.submit(c); + }).collect(Collectors.toList()); + Thread.sleep(TimeUnit.SECONDS.toMillis(10)); + done.set(true); + + // ensure all threads are finished operating on cache, before checking any + for (Future> f : futures) { + f.get(); + } + + for (Future> f : futures) { + for (CachedFileInfo fi : f.get()) { + Assert.assertTrue(fi.isClosed()); + Assert.assertEquals(FileInfoBackingCache.DEAD_REF, fi.getRefCount()); + } + } + + // try to load all ledgers again. + // They should be loaded fresh (i.e. this load should be only reference) + for (int i = 0; i < maxLedgerId; i++) { + Assert.assertEquals(1, cache.loadFileInfo(i, masterKey).getRefCount()); + } + } + + @Test(timeout = 30000) + public void testRefCountRace() throws Exception { + AtomicBoolean done = new AtomicBoolean(false); + FileInfoBackingCache cache = new FileInfoBackingCache( + (ledgerId, createIfNotFound) -> { + File f = new File(baseDir, String.valueOf(ledgerId)); + f.deleteOnExit(); + return f; + }, FileInfo.CURRENT_HEADER_VERSION); + + Iterable>> futures = + IntStream.range(0, 2).mapToObj( + (i) -> { + Callable> c = () -> { + Set allFileInfos = new HashSet<>(); + while (!done.get()) { + CachedFileInfo fi = cache.loadFileInfo(1, masterKey); + Assert.assertFalse(fi.isClosed()); + allFileInfos.add(fi); + fi.release(); + } + return allFileInfos; + }; + return executor.submit(c); + }).collect(Collectors.toList()); + Thread.sleep(TimeUnit.SECONDS.toMillis(10)); + done.set(true); + + // ensure all threads are finished operating on cache, before checking any + for (Future> f : futures) { + f.get(); + } + + for (Future> f : futures) { + for (CachedFileInfo fi : f.get()) { + Assert.assertTrue(fi.isClosed()); + Assert.assertEquals(FileInfoBackingCache.DEAD_REF, fi.getRefCount()); + } + } + } + + private void guavaEvictionListener(RemovalNotification notification) { + notification.getValue().release(); + } + + @Test(timeout = 30000) + public void testRaceGuavaEvictAndReleaseBeforeRetain() throws Exception { + AtomicBoolean done = new AtomicBoolean(false); + Random random = new SecureRandom(); + FileInfoBackingCache cache = new FileInfoBackingCache( + (ledgerId, createIfNotFound) -> { + File f = new File(baseDir, String.valueOf(ledgerId)); + f.deleteOnExit(); + return f; + }, FileInfo.CURRENT_HEADER_VERSION); + + Cache guavaCache = CacheBuilder.newBuilder() + .maximumSize(1) + .removalListener(this::guavaEvictionListener) + .build(); + + Iterable>> futures = + LongStream.range(0L, 2L).mapToObj( + (i) -> { + Callable> c = () -> { + Set allFileInfos = new HashSet<>(); + while (!done.get()) { + CachedFileInfo fi = null; + + do { + fi = guavaCache.get( + i, () -> cache.loadFileInfo(i, masterKey)); + allFileInfos.add(fi); + Thread.sleep(random.nextInt(100)); + } while (!fi.tryRetain()); + + Assert.assertFalse(fi.isClosed()); + fi.release(); + } + return allFileInfos; + }; + return executor.submit(c); + }).collect(Collectors.toList()); + Thread.sleep(TimeUnit.SECONDS.toMillis(10)); + done.set(true); + + // ensure all threads are finished operating on cache, before checking any + for (Future> f : futures) { + f.get(); + } + guavaCache.invalidateAll(); + + for (Future> f : futures) { + for (CachedFileInfo fi : f.get()) { + Assert.assertTrue(fi.isClosed()); + Assert.assertEquals(FileInfoBackingCache.DEAD_REF, fi.getRefCount()); + } + } + + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/ForceAuditorChecksCmdTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/ForceAuditorChecksCmdTest.java new file mode 100644 index 00000000000..18785a58b48 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/ForceAuditorChecksCmdTest.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.bookie; + +import static org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithLedgerManagerFactory; + +import com.google.common.util.concurrent.UncheckedExecutionException; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; +import org.apache.bookkeeper.replication.ReplicationException; +import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.junit.Assert; +import org.junit.Test; + + +/** + * Integration test of {@link BookieShell.TriggerAuditCmd}. + */ +public class ForceAuditorChecksCmdTest extends BookKeeperClusterTestCase { + + public ForceAuditorChecksCmdTest() { + super(1); + baseConf.setAuditorPeriodicPlacementPolicyCheckInterval(10000); + baseConf.setAuditorPeriodicReplicasCheckInterval(10000); + } + + /** + * Verify that the auditor checks last execution time (stored in zookeeper) is reset to an older value + * when triggeraudit command is run with certain parameters. Rebooting the auditor after this would + * result in immediate run of audit checks. + */ + @Test + public void verifyAuditCTimeReset() throws Exception { + String[] argv = new String[] { "forceauditchecks", "-calc", "-ppc", "-rc" }; + long curTime = System.currentTimeMillis(); + + final ServerConfiguration conf = confByIndex(0); + BookieShell bkShell = new BookieShell(); + bkShell.setConf(conf); + + // Add dummy last execution time for audit checks + runFunctionWithLedgerManagerFactory(conf, mFactory -> { + try (LedgerUnderreplicationManager urM = + mFactory.newLedgerUnderreplicationManager()) { + urM.setCheckAllLedgersCTime(curTime); + urM.setPlacementPolicyCheckCTime(curTime); + urM.setReplicasCheckCTime(curTime); + } catch (InterruptedException | ReplicationException e) { + throw new UncheckedExecutionException(e); + } + return null; + }); + + // Run the actual shell command + Assert.assertEquals("Failed to return exit code!", 0, bkShell.run(argv)); + + // Verify that the time has been reset to an older value (at least 20 days) + runFunctionWithLedgerManagerFactory(conf, mFactory -> { + try (LedgerUnderreplicationManager urm = + mFactory.newLedgerUnderreplicationManager()) { + long checkAllLedgersCTime = urm.getCheckAllLedgersCTime(); + if (checkAllLedgersCTime > (curTime - (20 * 24 * 60 * 60 * 1000))) { + Assert.fail("The checkAllLedgersCTime should have been reset to atleast 20 days old"); + } + long placementPolicyCheckCTime = urm.getPlacementPolicyCheckCTime(); + if (placementPolicyCheckCTime > (curTime - (20 * 24 * 60 * 60 * 1000))) { + Assert.fail("The placementPolicyCheckCTime should have been reset to atleast 20 days old"); + } + long replicasCheckCTime = urm.getReplicasCheckCTime(); + if (replicasCheckCTime > (curTime - (20 * 24 * 60 * 60 * 1000))) { + Assert.fail("The replicasCheckCTime should have been reset to atleast 20 days old"); + } + } catch (InterruptedException | ReplicationException e) { + throw new UncheckedExecutionException(e); + } + return null; + }); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/GarbageCollectorThreadTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/GarbageCollectorThreadTest.java new file mode 100644 index 00000000000..01d7e80f10c --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/GarbageCollectorThreadTest.java @@ -0,0 +1,375 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie; + +import static org.apache.bookkeeper.bookie.storage.EntryLogTestUtils.logIdFromLocation; +import static org.apache.bookkeeper.bookie.storage.EntryLogTestUtils.makeEntry; +import static org.apache.bookkeeper.bookie.storage.EntryLogTestUtils.newDirectEntryLogger; +import static org.apache.bookkeeper.bookie.storage.EntryLogTestUtils.newDirsManager; +import static org.apache.bookkeeper.bookie.storage.EntryLogTestUtils.newLegacyEntryLogger; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; +import static org.mockito.MockitoAnnotations.openMocks; + +import java.io.File; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.bookkeeper.bookie.storage.EntryLogger; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.MockLedgerManager; +import org.apache.bookkeeper.slogger.Slogger; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.test.TmpDirs; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.Spy; + +/** + * Unit test for {@link GarbageCollectorThread}. + */ +@SuppressWarnings("deprecation") +public class GarbageCollectorThreadTest { + private static final Slogger slog = Slogger.CONSOLE; + + private final TmpDirs tmpDirs = new TmpDirs(); + + @InjectMocks + @Spy + private GarbageCollectorThread mockGCThread; + + @Mock + private LedgerManager ledgerManager; + @Mock + private StatsLogger statsLogger; + @Mock + private ScheduledExecutorService gcExecutor; + + private ServerConfiguration conf = spy(new ServerConfiguration().setAllowLoopback(true)); + private CompactableLedgerStorage ledgerStorage = mock(CompactableLedgerStorage.class); + + @Before + public void setUp() throws Exception { + conf.setAllowLoopback(true); + openMocks(this); + } + + @After + public void cleanup() throws Exception { + tmpDirs.cleanup(); + } + + @Test + public void testCompactEntryLogWithException() throws Exception { + AbstractLogCompactor mockCompactor = mock(AbstractLogCompactor.class); + when(mockCompactor.compact(any(EntryLogMetadata.class))) + .thenThrow(new RuntimeException("Unexpected compaction error")); + mockGCThread.compactor = mockCompactor; + + // Although compaction of an entry log fails due to an unexpected error, + // the `compacting` flag should return to false + AtomicBoolean compacting = mockGCThread.compacting; + assertFalse(compacting.get()); + mockGCThread.compactEntryLog(new EntryLogMetadata(9999)); + assertFalse(compacting.get()); + } + + @Test + public void testCalculateUsageBucket() { + // Valid range for usage is [0.0 to 1.0] + final int numBuckets = 10; + int[] usageBuckets = new int[numBuckets]; + String[] bucketNames = new String[numBuckets]; + for (int i = 0; i < numBuckets; i++) { + usageBuckets[i] = 0; + bucketNames[i] = String.format("%d%%", (i + 1) * 10); + } + + int items = 10000; + + for (int item = 0; item <= items; item++) { + double usage = ((double) item / (double) items); + int index = mockGCThread.calculateUsageIndex(numBuckets, usage); + assertFalse("Boundary condition exceeded", index < 0 || index >= numBuckets); + slog.kv("usage", usage) + .kv("index", index) + .info("Mapped usage to index"); + usageBuckets[index]++; + } + + Slogger sl = slog.ctx(); + for (int i = 0; i < numBuckets; i++) { + sl = sl.kv(bucketNames[i], usageBuckets[i]); + } + sl.info("Compaction: entry log usage buckets"); + + int sum = 0; + for (int i = 0; i < numBuckets; i++) { + sum += usageBuckets[i]; + } + Assert.assertEquals("Incorrect number of items", items + 1, sum); + } + + @Test + public void testExtractMetaFromEntryLogsLegacy() throws Exception { + File ledgerDir = tmpDirs.createNew("testExtractMeta", "ledgers"); + testExtractMetaFromEntryLogs( + newLegacyEntryLogger(20000, ledgerDir), ledgerDir); + } + + @Test + public void testExtractMetaFromEntryLogsDirect() throws Exception { + File ledgerDir = tmpDirs.createNew("testExtractMeta", "ledgers"); + testExtractMetaFromEntryLogs( + newDirectEntryLogger(23000, // direct header is 4kb rather than 1kb + ledgerDir), ledgerDir); + } + + private void testExtractMetaFromEntryLogs(EntryLogger entryLogger, File ledgerDir) + throws Exception { + + MockLedgerStorage storage = new MockLedgerStorage(); + MockLedgerManager lm = new MockLedgerManager(); + + GarbageCollectorThread gcThread = new GarbageCollectorThread( + TestBKConfiguration.newServerConfiguration(), lm, + newDirsManager(ledgerDir), + storage, entryLogger, + NullStatsLogger.INSTANCE); + + // Add entries. + // Ledger 1 is on first entry log + // Ledger 2 spans first, second and third entry log + // Ledger 3 is on the third entry log (which is still active when extract meta) + long loc1 = entryLogger.addEntry(1L, makeEntry(1L, 1L, 5000)); + long loc2 = entryLogger.addEntry(2L, makeEntry(2L, 1L, 5000)); + assertThat(logIdFromLocation(loc2), equalTo(logIdFromLocation(loc1))); + long loc3 = entryLogger.addEntry(2L, makeEntry(2L, 1L, 15000)); + assertThat(logIdFromLocation(loc3), greaterThan(logIdFromLocation(loc2))); + long loc4 = entryLogger.addEntry(2L, makeEntry(2L, 1L, 15000)); + assertThat(logIdFromLocation(loc4), greaterThan(logIdFromLocation(loc3))); + long loc5 = entryLogger.addEntry(3L, makeEntry(3L, 1L, 1000)); + assertThat(logIdFromLocation(loc5), equalTo(logIdFromLocation(loc4))); + + long logId1 = logIdFromLocation(loc2); + long logId2 = logIdFromLocation(loc3); + long logId3 = logIdFromLocation(loc5); + entryLogger.flush(); + + storage.setMasterKey(1L, new byte[0]); + storage.setMasterKey(2L, new byte[0]); + storage.setMasterKey(3L, new byte[0]); + + assertThat(entryLogger.getFlushedLogIds(), containsInAnyOrder(logId1, logId2)); + assertTrue(entryLogger.logExists(logId3)); + + // all ledgers exist, nothing should disappear + final EntryLogMetadataMap entryLogMetaMap = gcThread.getEntryLogMetaMap(); + gcThread.extractMetaFromEntryLogs(); + + assertThat(entryLogger.getFlushedLogIds(), containsInAnyOrder(logId1, logId2)); + assertTrue(entryLogMetaMap.containsKey(logId1)); + assertTrue(entryLogMetaMap.containsKey(logId2)); + assertTrue(entryLogger.logExists(logId3)); + + // log 2 is 100% ledger 2, so it should disappear if ledger 2 is deleted + entryLogMetaMap.clear(); + storage.deleteLedger(2L); + gcThread.extractMetaFromEntryLogs(); + + assertThat(entryLogger.getFlushedLogIds(), containsInAnyOrder(logId1)); + assertTrue(entryLogMetaMap.containsKey(logId1)); + assertTrue(entryLogger.logExists(logId3)); + + // delete all ledgers, all logs except the current should be deleted + entryLogMetaMap.clear(); + storage.deleteLedger(1L); + storage.deleteLedger(3L); + gcThread.extractMetaFromEntryLogs(); + + assertThat(entryLogger.getFlushedLogIds(), empty()); + assertTrue(entryLogMetaMap.isEmpty()); + assertTrue(entryLogger.logExists(logId3)); + + // add enough entries to roll log, log 3 can not be GC'd + long loc6 = entryLogger.addEntry(3L, makeEntry(3L, 1L, 25000)); + assertThat(logIdFromLocation(loc6), greaterThan(logIdFromLocation(loc5))); + entryLogger.flush(); + assertThat(entryLogger.getFlushedLogIds(), containsInAnyOrder(logId3)); + + entryLogMetaMap.clear(); + gcThread.extractMetaFromEntryLogs(); + + assertThat(entryLogger.getFlushedLogIds(), empty()); + assertTrue(entryLogMetaMap.isEmpty()); + assertFalse(entryLogger.logExists(logId3)); + } + + @Test + public void testCompactionWithFileSizeCheck() throws Exception { + File ledgerDir = tmpDirs.createNew("testFileSize", "ledgers"); + EntryLogger entryLogger = newLegacyEntryLogger(20000, ledgerDir); + + MockLedgerStorage storage = new MockLedgerStorage(); + MockLedgerManager lm = new MockLedgerManager(); + + GarbageCollectorThread gcThread = new GarbageCollectorThread( + TestBKConfiguration.newServerConfiguration().setUseTargetEntryLogSizeForGc(true), lm, + newDirsManager(ledgerDir), + storage, entryLogger, NullStatsLogger.INSTANCE); + + // Add entries. + // Ledger 1 is on first entry log + // Ledger 2 spans first, second and third entry log + // Ledger 3 is on the third entry log (which is still active when extract meta) + long loc1 = entryLogger.addEntry(1L, makeEntry(1L, 1L, 5000)); + long loc2 = entryLogger.addEntry(2L, makeEntry(2L, 1L, 5000)); + assertThat(logIdFromLocation(loc2), equalTo(logIdFromLocation(loc1))); + long loc3 = entryLogger.addEntry(2L, makeEntry(2L, 2L, 15000)); + assertThat(logIdFromLocation(loc3), greaterThan(logIdFromLocation(loc2))); + long loc4 = entryLogger.addEntry(2L, makeEntry(2L, 3L, 15000)); + assertThat(logIdFromLocation(loc4), greaterThan(logIdFromLocation(loc3))); + long loc5 = entryLogger.addEntry(3L, makeEntry(3L, 1L, 1000)); + assertThat(logIdFromLocation(loc5), equalTo(logIdFromLocation(loc4))); + long loc6 = entryLogger.addEntry(3L, makeEntry(3L, 2L, 5000)); + + long logId1 = logIdFromLocation(loc2); + long logId2 = logIdFromLocation(loc3); + long logId3 = logIdFromLocation(loc5); + long logId4 = logIdFromLocation(loc6); + entryLogger.flush(); + + storage.setMasterKey(1L, new byte[0]); + storage.setMasterKey(2L, new byte[0]); + storage.setMasterKey(3L, new byte[0]); + + assertThat(entryLogger.getFlushedLogIds(), containsInAnyOrder(logId1, logId2, logId3)); + assertTrue(entryLogger.logExists(logId1)); + assertTrue(entryLogger.logExists(logId2)); + assertTrue(entryLogger.logExists(logId3)); + assertTrue(entryLogger.logExists(logId4)); + + // all ledgers exist, nothing should disappear + final EntryLogMetadataMap entryLogMetaMap = gcThread.getEntryLogMetaMap(); + gcThread.extractMetaFromEntryLogs(); + + assertThat(entryLogger.getFlushedLogIds(), containsInAnyOrder(logId1, logId2, logId3)); + assertTrue(entryLogMetaMap.containsKey(logId1)); + assertTrue(entryLogMetaMap.containsKey(logId2)); + assertTrue(entryLogger.logExists(logId3)); + + storage.deleteLedger(1); + // only logId 1 will be compacted. + gcThread.runWithFlags(true, true, false); + + // logId1 and logId2 should be compacted + assertFalse(entryLogger.logExists(logId1)); + assertTrue(entryLogger.logExists(logId2)); + assertTrue(entryLogger.logExists(logId3)); + assertFalse(entryLogMetaMap.containsKey(logId1)); + assertTrue(entryLogMetaMap.containsKey(logId2)); + + assertEquals(1, storage.getUpdatedLocations().size()); + + EntryLocation location2 = storage.getUpdatedLocations().get(0); + assertEquals(2, location2.getLedger()); + assertEquals(1, location2.getEntry()); + assertEquals(logIdFromLocation(location2.getLocation()), logId4); + } + + @Test + public void testCompactionWithoutFileSizeCheck() throws Exception { + File ledgerDir = tmpDirs.createNew("testFileSize", "ledgers"); + EntryLogger entryLogger = newLegacyEntryLogger(20000, ledgerDir); + + MockLedgerStorage storage = new MockLedgerStorage(); + MockLedgerManager lm = new MockLedgerManager(); + + GarbageCollectorThread gcThread = new GarbageCollectorThread( + TestBKConfiguration.newServerConfiguration(), lm, + newDirsManager(ledgerDir), + storage, entryLogger, NullStatsLogger.INSTANCE); + + // Add entries. + // Ledger 1 is on first entry log + // Ledger 2 spans first, second and third entry log + // Ledger 3 is on the third entry log (which is still active when extract meta) + long loc1 = entryLogger.addEntry(1L, makeEntry(1L, 1L, 5000)); + long loc2 = entryLogger.addEntry(2L, makeEntry(2L, 1L, 5000)); + assertThat(logIdFromLocation(loc2), equalTo(logIdFromLocation(loc1))); + long loc3 = entryLogger.addEntry(2L, makeEntry(2L, 2L, 15000)); + assertThat(logIdFromLocation(loc3), greaterThan(logIdFromLocation(loc2))); + long loc4 = entryLogger.addEntry(2L, makeEntry(2L, 3L, 15000)); + assertThat(logIdFromLocation(loc4), greaterThan(logIdFromLocation(loc3))); + long loc5 = entryLogger.addEntry(3L, makeEntry(3L, 1L, 1000)); + assertThat(logIdFromLocation(loc5), equalTo(logIdFromLocation(loc4))); + + long logId1 = logIdFromLocation(loc2); + long logId2 = logIdFromLocation(loc3); + long logId3 = logIdFromLocation(loc5); + entryLogger.flush(); + + storage.setMasterKey(1L, new byte[0]); + storage.setMasterKey(2L, new byte[0]); + storage.setMasterKey(3L, new byte[0]); + + assertThat(entryLogger.getFlushedLogIds(), containsInAnyOrder(logId1, logId2)); + assertTrue(entryLogger.logExists(logId1)); + assertTrue(entryLogger.logExists(logId2)); + assertTrue(entryLogger.logExists(logId3)); + + // all ledgers exist, nothing should disappear + final EntryLogMetadataMap entryLogMetaMap = gcThread.getEntryLogMetaMap(); + gcThread.extractMetaFromEntryLogs(); + + assertThat(entryLogger.getFlushedLogIds(), containsInAnyOrder(logId1, logId2)); + assertTrue(entryLogMetaMap.containsKey(logId1)); + assertTrue(entryLogMetaMap.containsKey(logId2)); + assertTrue(entryLogger.logExists(logId3)); + + gcThread.runWithFlags(true, true, false); + + assertTrue(entryLogger.logExists(logId1)); + assertTrue(entryLogger.logExists(logId2)); + assertTrue(entryLogger.logExists(logId3)); + assertTrue(entryLogMetaMap.containsKey(logId1)); + assertTrue(entryLogMetaMap.containsKey(logId2)); + + assertEquals(0, storage.getUpdatedLocations().size()); + } + +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/GcOverreplicatedLedgerTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/GcOverreplicatedLedgerTest.java new file mode 100644 index 00000000000..6c17bd68c0a --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/GcOverreplicatedLedgerTest.java @@ -0,0 +1,224 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.bookkeeper.bookie; + +import com.google.common.collect.Lists; +import java.io.IOException; +import java.net.URI; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.SortedMap; +import java.util.concurrent.TimeUnit; +import lombok.Cleanup; +import org.apache.bookkeeper.bookie.GarbageCollector.GarbageCleaner; +import org.apache.bookkeeper.client.BookKeeper.DigestType; +import org.apache.bookkeeper.client.LedgerHandle; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.meta.HierarchicalLedgerManagerFactory; +import org.apache.bookkeeper.meta.LedgerManagerFactory; +import org.apache.bookkeeper.meta.LedgerManagerTestCase; +import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; +import org.apache.bookkeeper.meta.MetadataBookieDriver; +import org.apache.bookkeeper.meta.MetadataDrivers; +import org.apache.bookkeeper.meta.ZkLedgerUnderreplicationManager; +import org.apache.bookkeeper.meta.exceptions.MetadataException; +import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.util.SnapshotMap; +import org.apache.commons.configuration.ConfigurationException; +import org.apache.zookeeper.ZooDefs; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +/** + * Test GC-overreplicated ledger. + */ +@RunWith(Parameterized.class) +public class GcOverreplicatedLedgerTest extends LedgerManagerTestCase { + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + ledgerManager = ledgerManagerFactory.newLedgerManager(); + activeLedgers = new SnapshotMap(); + } + + public GcOverreplicatedLedgerTest(Class lmFactoryCls) { + super(lmFactoryCls, 3); + } + + @Parameters + public static Collection configs() { + return Arrays.asList(new Object[][] { { HierarchicalLedgerManagerFactory.class } }); + } + + @Test + public void testGcOverreplicatedLedger() throws Exception { + LedgerHandle lh = bkc.createLedger(2, 2, DigestType.MAC, "".getBytes()); + activeLedgers.put(lh.getId(), true); + + LedgerMetadata newLedgerMetadata = ledgerManager.readLedgerMetadata(lh.getId()).get().getValue(); + + BookieId bookieNotInEnsemble = getBookieNotInEnsemble(newLedgerMetadata); + ServerConfiguration bkConf = getBkConf(bookieNotInEnsemble); + + @Cleanup + final MetadataBookieDriver metadataDriver = instantiateMetadataDriver(bkConf); + @Cleanup + final LedgerManagerFactory lmf = metadataDriver.getLedgerManagerFactory(); + @Cleanup + final LedgerUnderreplicationManager lum = lmf.newLedgerUnderreplicationManager(); + + Assert.assertFalse(lum.isLedgerBeingReplicated(lh.getId())); + + bkConf.setGcOverreplicatedLedgerWaitTime(10, TimeUnit.MILLISECONDS); + + lh.close(); + + final CompactableLedgerStorage mockLedgerStorage = new MockLedgerStorage(); + final GarbageCollector garbageCollector = new ScanAndCompareGarbageCollector(ledgerManager, mockLedgerStorage, + bkConf, NullStatsLogger.INSTANCE); + Thread.sleep(bkConf.getGcOverreplicatedLedgerWaitTimeMillis() + 1); + garbageCollector.gc(new GarbageCleaner() { + + @Override + public void clean(long ledgerId) { + try { + mockLedgerStorage.deleteLedger(ledgerId); + } catch (IOException e) { + e.printStackTrace(); + return; + } + } + }); + + Assert.assertFalse(lum.isLedgerBeingReplicated(lh.getId())); + Assert.assertFalse(activeLedgers.containsKey(lh.getId())); + } + + private static MetadataBookieDriver instantiateMetadataDriver(ServerConfiguration conf) + throws BookieException { + try { + final String metadataServiceUriStr = conf.getMetadataServiceUri(); + final MetadataBookieDriver driver = MetadataDrivers.getBookieDriver(URI.create(metadataServiceUriStr)); + driver.initialize(conf, NullStatsLogger.INSTANCE); + return driver; + } catch (MetadataException me) { + throw new BookieException.MetadataStoreException("Failed to initialize metadata bookie driver", me); + } catch (ConfigurationException e) { + throw new BookieException.BookieIllegalOpException(e); + } + } + + @Test + public void testNoGcOfLedger() throws Exception { + LedgerHandle lh = bkc.createLedger(2, 2, DigestType.MAC, "".getBytes()); + activeLedgers.put(lh.getId(), true); + + LedgerMetadata newLedgerMetadata = ledgerManager.readLedgerMetadata(lh.getId()).get().getValue(); + BookieId address = null; + SortedMap> ensembleMap = newLedgerMetadata.getAllEnsembles(); + for (List ensemble : ensembleMap.values()) { + address = ensemble.get(0); + } + ServerConfiguration bkConf = getBkConf(address); + bkConf.setGcOverreplicatedLedgerWaitTime(10, TimeUnit.MILLISECONDS); + + lh.close(); + + final CompactableLedgerStorage mockLedgerStorage = new MockLedgerStorage(); + final GarbageCollector garbageCollector = new ScanAndCompareGarbageCollector(ledgerManager, mockLedgerStorage, + bkConf, NullStatsLogger.INSTANCE); + Thread.sleep(bkConf.getGcOverreplicatedLedgerWaitTimeMillis() + 1); + garbageCollector.gc(new GarbageCleaner() { + + @Override + public void clean(long ledgerId) { + try { + mockLedgerStorage.deleteLedger(ledgerId); + } catch (IOException e) { + e.printStackTrace(); + return; + } + } + }); + + Assert.assertTrue(activeLedgers.containsKey(lh.getId())); + } + + @Test + public void testNoGcIfLedgerBeingReplicated() throws Exception { + LedgerHandle lh = bkc.createLedger(2, 2, DigestType.MAC, "".getBytes()); + activeLedgers.put(lh.getId(), true); + + LedgerMetadata newLedgerMetadata = ledgerManager.readLedgerMetadata(lh.getId()).get().getValue(); + BookieId bookieNotInEnsemble = getBookieNotInEnsemble(newLedgerMetadata); + ServerConfiguration bkConf = getBkConf(bookieNotInEnsemble); + bkConf.setGcOverreplicatedLedgerWaitTime(10, TimeUnit.MILLISECONDS); + + lh.close(); + + ZkLedgerUnderreplicationManager.acquireUnderreplicatedLedgerLock( + zkc, + ZKMetadataDriverBase.resolveZkLedgersRootPath(baseConf), + lh.getId(), + ZooDefs.Ids.OPEN_ACL_UNSAFE); + + final CompactableLedgerStorage mockLedgerStorage = new MockLedgerStorage(); + final GarbageCollector garbageCollector = new ScanAndCompareGarbageCollector(ledgerManager, mockLedgerStorage, + bkConf, NullStatsLogger.INSTANCE); + Thread.sleep(bkConf.getGcOverreplicatedLedgerWaitTimeMillis() + 1); + garbageCollector.gc(new GarbageCleaner() { + + @Override + public void clean(long ledgerId) { + try { + mockLedgerStorage.deleteLedger(ledgerId); + } catch (IOException e) { + e.printStackTrace(); + return; + } + } + }); + + Assert.assertTrue(activeLedgers.containsKey(lh.getId())); + } + + private BookieId getBookieNotInEnsemble(LedgerMetadata ledgerMetadata) throws Exception { + List allAddresses = Lists.newArrayList(); + allAddresses.addAll(bookieAddresses()); + SortedMap> ensembles = ledgerMetadata.getAllEnsembles(); + for (List fragmentEnsembles : ensembles.values()) { + allAddresses.removeAll(fragmentEnsembles); + } + Assert.assertEquals(allAddresses.size(), 1); + return allAddresses.get(0); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/IndexCorruptionTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/IndexCorruptionTest.java index b12dfe9ea25..492c37d60eb 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/IndexCorruptionTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/IndexCorruptionTest.java @@ -26,7 +26,6 @@ import static org.junit.Assert.assertTrue; import java.util.Enumeration; - import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.client.LedgerEntry; import org.apache.bookkeeper.client.LedgerHandle; @@ -53,9 +52,11 @@ public IndexCorruptionTest() { @Test public void testNoSuchLedger() throws Exception { - LOG.debug("Testing NoSuchLedger"); + if (LOG.isDebugEnabled()) { + LOG.debug("Testing NoSuchLedger"); + } - SyncThread syncThread = bs.get(0).getBookie().syncThread; + SyncThread syncThread = ((BookieImpl) serverByIndex(0).getBookie()).syncThread; syncThread.suspendSync(); // Create a ledger LedgerHandle lh = bkc.createLedger(1, 1, digestType, "".getBytes()); @@ -94,9 +95,11 @@ public void testNoSuchLedger() throws Exception { @Test public void testEmptyIndexPage() throws Exception { - LOG.debug("Testing EmptyIndexPage"); + if (LOG.isDebugEnabled()) { + LOG.debug("Testing EmptyIndexPage"); + } - SyncThread syncThread = bs.get(0).getBookie().syncThread; + SyncThread syncThread = ((BookieImpl) serverByIndex(0).getBookie()).syncThread; assertNotNull("Not found SyncThread.", syncThread); syncThread.suspendSync(); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/IndexPersistenceMgrTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/IndexPersistenceMgrTest.java index 70f2a0ecb07..0635d1dd213 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/IndexPersistenceMgrTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/IndexPersistenceMgrTest.java @@ -20,16 +20,19 @@ */ package org.apache.bookkeeper.bookie; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.spy; import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBufUtil; - +import io.netty.buffer.UnpooledByteBufAllocator; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; @@ -37,7 +40,7 @@ import java.nio.channels.FileChannel; import java.security.GeneralSecurityException; import java.util.Arrays; - +import java.util.Collections; import org.apache.bookkeeper.bookie.FileInfoBackingCache.CachedFileInfo; import org.apache.bookkeeper.client.BookKeeper; import org.apache.bookkeeper.common.util.Watcher; @@ -57,7 +60,7 @@ public class IndexPersistenceMgrTest { ServerConfiguration conf; - File journalDir, ledgerDir; + File journalDir, ledgerDir1, ledgerDir2; LedgerDirsManager ledgerDirsManager; LedgerDirsMonitor ledgerMonitor; @@ -66,22 +69,27 @@ public void setUp() throws Exception { journalDir = File.createTempFile("IndexPersistenceMgr", "Journal"); journalDir.delete(); journalDir.mkdir(); - ledgerDir = File.createTempFile("IndexPersistenceMgr", "Ledger"); - ledgerDir.delete(); - ledgerDir.mkdir(); + ledgerDir1 = File.createTempFile("IndexPersistenceMgr", "Ledger1"); + ledgerDir1.delete(); + ledgerDir1.mkdir(); + ledgerDir2 = File.createTempFile("IndexPersistenceMgr", "Ledger2"); + ledgerDir2.delete(); + ledgerDir2.mkdir(); // Create current directories - Bookie.getCurrentDirectory(journalDir).mkdir(); - Bookie.getCurrentDirectory(ledgerDir).mkdir(); + BookieImpl.getCurrentDirectory(journalDir).mkdir(); + BookieImpl.getCurrentDirectory(ledgerDir1).mkdir(); + BookieImpl.getCurrentDirectory(ledgerDir2).mkdir(); conf = new ServerConfiguration(); conf.setMetadataServiceUri(null); conf.setJournalDirName(journalDir.getPath()); - conf.setLedgerDirNames(new String[] { ledgerDir.getPath() }); + conf.setLedgerDirNames(new String[] { ledgerDir1.getPath(), ledgerDir2.getPath() }); ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); ledgerMonitor = new LedgerDirsMonitor(conf, - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()), ledgerDirsManager); + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()), + Collections.singletonList(ledgerDirsManager)); ledgerMonitor.init(); } @@ -89,7 +97,8 @@ public void setUp() throws Exception { public void tearDown() throws Exception { ledgerMonitor.shutdown(); FileUtils.deleteDirectory(journalDir); - FileUtils.deleteDirectory(ledgerDir); + FileUtils.deleteDirectory(ledgerDir1); + FileUtils.deleteDirectory(ledgerDir2); } private IndexPersistenceMgr createIndexPersistenceManager(int openFileLimit) throws Exception { @@ -155,7 +164,7 @@ public void testGetFileInfoReadBeforeWrite() throws Exception { indexPersistenceMgr.getFileInfo(lid, null); fail("Should fail get file info for reading if the file doesn't exist"); } catch (Bookie.NoLedgerException nle) { - // exepcted + // expected } assertEquals(0, indexPersistenceMgr.writeFileInfoCache.size()); assertEquals(0, indexPersistenceMgr.readFileInfoCache.size()); @@ -335,14 +344,114 @@ public void testFileInfosOfVariousHeaderVersions() throws Exception { } } + @Test + public void testIndexFileRelocation() throws Exception { + final long ledgerId = Integer.MAX_VALUE; + final String ledgerName = IndexPersistenceMgr.getLedgerName(ledgerId); + + IndexPersistenceMgr indexPersistenceMgr = createIndexPersistenceManager(1); + preCreateFileInfoForLedgerInDir1(ledgerId, FileInfo.V1); + + ledgerDirsManager.addToFilledDirs(BookieImpl.getCurrentDirectory(ledgerDir1)); + indexPersistenceMgr.flushLedgerHeader(ledgerId); + + File expectedIndexFile = new File(BookieImpl.getCurrentDirectory(ledgerDir2), ledgerName); + CachedFileInfo fileInfo = indexPersistenceMgr.getFileInfo(ledgerId, null); + assertTrue(fileInfo.isSameFile(expectedIndexFile)); + assertFalse(fileInfo.isDeleted()); + + indexPersistenceMgr.close(); + + // Test startup after clean shutdown. + // + // Index file should stay in original location. + IndexPersistenceMgr indexPersistenceMgr2 = createIndexPersistenceManager(1); + CachedFileInfo fileInfo2 = indexPersistenceMgr2.getFileInfo(ledgerId, null); + assertTrue(fileInfo2.isSameFile(expectedIndexFile)); + indexPersistenceMgr2.close(); + } + + @Test + public void testIndexFileRelocationCrashBeforeOriginalFileDeleted() throws Exception { + final long ledgerId = Integer.MAX_VALUE; + final String ledgerName = IndexPersistenceMgr.getLedgerName(ledgerId); + final String reason = "crash before original file deleted"; + + try { + IndexPersistenceMgr indexPersistenceMgr = createIndexPersistenceManager(1); + preCreateFileInfoForLedgerInDir1(ledgerId, FileInfo.V1); + + CachedFileInfo fileInfo = spy(indexPersistenceMgr.getFileInfo(ledgerId, null)); + doAnswer(invocation -> { + throw new RuntimeException(reason); + }).when(fileInfo).delete(); + indexPersistenceMgr.readFileInfoCache.put(ledgerId, fileInfo); + + ledgerDirsManager.addToFilledDirs(BookieImpl.getCurrentDirectory(ledgerDir1)); + indexPersistenceMgr.flushLedgerHeader(ledgerId); + fail("should fail due to " + reason); + } catch (RuntimeException ex) { + assertEquals(reason, ex.getMessage()); + } + + // Test startup after: + // 1. relocation file created. + // 2. crashed with possible corrupted relocation file. + // + // Index file should stay in original location in this case. + IndexPersistenceMgr indexPersistenceMgr2 = createIndexPersistenceManager(1); + File expectedIndexFile = new File(BookieImpl.getCurrentDirectory(ledgerDir1), ledgerName); + CachedFileInfo fileInfo2 = indexPersistenceMgr2.getFileInfo(ledgerId, null); + assertTrue(fileInfo2.isSameFile(expectedIndexFile)); + indexPersistenceMgr2.close(); + } + + @Test + public void testIndexFileRelocationCrashAfterOriginalFileDeleted() throws Exception { + final long ledgerId = Integer.MAX_VALUE; + final String ledgerName = IndexPersistenceMgr.getLedgerName(ledgerId); + final String reason = "crash after original file deleted"; + + try { + IndexPersistenceMgr indexPersistenceMgr = createIndexPersistenceManager(1); + preCreateFileInfoForLedgerInDir1(ledgerId, FileInfo.V1); + + CachedFileInfo fileInfo = spy(indexPersistenceMgr.getFileInfo(ledgerId, null)); + doAnswer(invocation -> { + invocation.callRealMethod(); + throw new RuntimeException(reason); + }).when(fileInfo).delete(); + indexPersistenceMgr.readFileInfoCache.put(ledgerId, fileInfo); + + ledgerDirsManager.addToFilledDirs(BookieImpl.getCurrentDirectory(ledgerDir1)); + indexPersistenceMgr.flushLedgerHeader(ledgerId); + fail("should fail due to " + reason); + } catch (RuntimeException ex) { + assertEquals(reason, ex.getMessage()); + } + + // Test startup after: + // 1. relocation file created, filled and synced. + // 2. original index file deleted. + // 3. crashed. + // + // Index file should stay in new location in this case. + IndexPersistenceMgr indexPersistenceMgr2 = createIndexPersistenceManager(1); + File expectedIndexFile = new File(BookieImpl.getCurrentDirectory(ledgerDir2), ledgerName); + CachedFileInfo fileInfo2 = indexPersistenceMgr2.getFileInfo(ledgerId, null); + assertTrue(fileInfo2.isSameFile(expectedIndexFile)); + indexPersistenceMgr2.close(); + } + void validateFileInfo(IndexPersistenceMgr indexPersistenceMgr, long ledgerId, int headerVersion) throws IOException, GeneralSecurityException { BookKeeper.DigestType digestType = BookKeeper.DigestType.CRC32; boolean getUseV2WireProtocol = true; - preCreateFileInfoForLedger(ledgerId, headerVersion); + preCreateFileInfoForLedgerInDir1(ledgerId, headerVersion); DigestManager digestManager = DigestManager.instantiate(ledgerId, masterKey, - BookKeeper.DigestType.toProtoDigestType(digestType), getUseV2WireProtocol); + BookKeeper.DigestType.toProtoDigestType(digestType), UnpooledByteBufAllocator.DEFAULT, + getUseV2WireProtocol); CachedFileInfo fileInfo = indexPersistenceMgr.getFileInfo(ledgerId, masterKey); fileInfo.readHeader(); @@ -359,7 +468,7 @@ void validateFileInfo(IndexPersistenceMgr indexPersistenceMgr, long ledgerId, in assertEquals("explicitLac ByteBuf contents should match", 0, ByteBufUtil.compare(explicitLacByteBuf, indexPersistenceMgr.getExplicitLac(ledgerId))); /* - * release fileInfo untill it is marked dead and closed, so that + * release fileInfo until it is marked dead and closed, so that * contents of it are persisted. */ while (fileInfo.refCount.get() != FileInfoBackingCache.DEAD_REF) { @@ -385,8 +494,8 @@ void validateFileInfo(IndexPersistenceMgr indexPersistenceMgr, long ledgerId, in } } - void preCreateFileInfoForLedger(long ledgerId, int headerVersion) throws IOException { - File ledgerCurDir = Bookie.getCurrentDirectory(ledgerDir); + void preCreateFileInfoForLedgerInDir1(long ledgerId, int headerVersion) throws IOException { + File ledgerCurDir = BookieImpl.getCurrentDirectory(ledgerDir1); String ledgerName = IndexPersistenceMgr.getLedgerName(ledgerId); File indexFile = new File(ledgerCurDir, ledgerName); indexFile.getParentFile().mkdirs(); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/InterleavedLedgerStorageTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/InterleavedLedgerStorageTest.java new file mode 100644 index 00000000000..ce514632033 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/InterleavedLedgerStorageTest.java @@ -0,0 +1,452 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie; + +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_SCOPE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.STORAGE_SCRUB_PAGE_RETRIES; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import io.netty.buffer.UnpooledByteBufAllocator; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; +import java.util.PrimitiveIterator.OfLong; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.Semaphore; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Consumer; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.IntStream; +import org.apache.bookkeeper.bookie.CheckpointSource.Checkpoint; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.test.TestStatsProvider; +import org.apache.bookkeeper.util.DiskChecker; +import org.apache.bookkeeper.util.EntryFormatter; +import org.apache.bookkeeper.util.LedgerIdFormatter; +import org.apache.commons.lang.mutable.MutableInt; +import org.apache.commons.lang.mutable.MutableLong; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Test for InterleavedLedgerStorage. + */ +@RunWith(Parameterized.class) +public class InterleavedLedgerStorageTest { + private static final Logger LOG = LoggerFactory.getLogger(InterleavedLedgerStorageTest.class); + + @Parameterized.Parameters + public static Iterable elplSetting() { + return Arrays.asList(true, false); + } + + public InterleavedLedgerStorageTest(boolean elplSetting) { + conf.setEntryLogSizeLimit(2048); + conf.setEntryLogPerLedgerEnabled(elplSetting); + } + + CheckpointSource checkpointSource = new CheckpointSource() { + @Override + public Checkpoint newCheckpoint() { + return Checkpoint.MAX; + } + + @Override + public void checkpointComplete(Checkpoint checkpoint, boolean compact) throws IOException { + } + }; + + Checkpointer checkpointer = new Checkpointer() { + @Override + public void startCheckpoint(Checkpoint checkpoint) { + // No-op + } + + @Override + public void start() { + // no-op + } + }; + + static class TestableDefaultEntryLogger extends DefaultEntryLogger { + public interface CheckEntryListener { + void accept(long ledgerId, + long entryId, + long entryLogId, + long pos); + } + volatile CheckEntryListener testPoint; + + public TestableDefaultEntryLogger( + ServerConfiguration conf, + LedgerDirsManager ledgerDirsManager, + EntryLogListener listener, + StatsLogger statsLogger) throws IOException { + super(conf, ledgerDirsManager, listener, statsLogger, UnpooledByteBufAllocator.DEFAULT); + } + + void setCheckEntryTestPoint(CheckEntryListener testPoint) throws InterruptedException { + this.testPoint = testPoint; + } + + @Override + void checkEntry(long ledgerId, long entryId, long location) throws EntryLookupException, IOException { + CheckEntryListener runBefore = testPoint; + if (runBefore != null) { + runBefore.accept(ledgerId, entryId, logIdForOffset(location), posForOffset(location)); + } + super.checkEntry(ledgerId, entryId, location); + } + } + + TestStatsProvider statsProvider = new TestStatsProvider(); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + LedgerDirsManager ledgerDirsManager; + TestableDefaultEntryLogger entryLogger; + InterleavedLedgerStorage interleavedStorage = new InterleavedLedgerStorage(); + final long numWrites = 2000; + final long moreNumOfWrites = 3000; + final long entriesPerWrite = 2; + final long numOfLedgers = 5; + + @Before + public void setUp() throws Exception { + File tmpDir = File.createTempFile("bkTest", ".dir"); + tmpDir.delete(); + tmpDir.mkdir(); + File curDir = BookieImpl.getCurrentDirectory(tmpDir); + BookieImpl.checkDirectoryStructure(curDir); + + conf.setLedgerDirNames(new String[]{tmpDir.toString()}); + ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); + + entryLogger = new TestableDefaultEntryLogger( + conf, ledgerDirsManager, null, NullStatsLogger.INSTANCE); + interleavedStorage.initializeWithEntryLogger( + conf, null, ledgerDirsManager, ledgerDirsManager, + entryLogger, statsProvider.getStatsLogger(BOOKIE_SCOPE)); + interleavedStorage.setCheckpointer(checkpointer); + interleavedStorage.setCheckpointSource(checkpointSource); + + // Insert some ledger & entries in the interleaved storage + for (long entryId = 0; entryId < numWrites; entryId++) { + for (long ledgerId = 0; ledgerId < numOfLedgers; ledgerId++) { + if (entryId == 0) { + interleavedStorage.setMasterKey(ledgerId, ("ledger-" + ledgerId).getBytes()); + interleavedStorage.setFenced(ledgerId); + } + ByteBuf entry = Unpooled.buffer(128); + entry.writeLong(ledgerId); + entry.writeLong(entryId * entriesPerWrite); + entry.writeBytes(("entry-" + entryId).getBytes()); + + interleavedStorage.addEntry(entry); + } + } + } + + @Test + public void testIndexEntryIterator() throws Exception { + try (LedgerCache.PageEntriesIterable pages = interleavedStorage.getIndexEntries(0)) { + MutableLong curEntry = new MutableLong(0); + for (LedgerCache.PageEntries page : pages) { + try (LedgerEntryPage lep = page.getLEP()) { + lep.getEntries((entry, offset) -> { + Assert.assertEquals(curEntry.longValue(), entry); + Assert.assertNotEquals(0, offset); + curEntry.setValue(entriesPerWrite + entry); + return true; + }); + } + } + Assert.assertEquals(entriesPerWrite * numWrites, curEntry.longValue()); + } + } + + @Test + public void testGetListOfEntriesOfLedger() throws IOException { + for (long ledgerId = 0; ledgerId < numOfLedgers; ledgerId++) { + OfLong entriesOfLedger = interleavedStorage.getListOfEntriesOfLedger(ledgerId); + ArrayList arrayList = new ArrayList(); + Consumer addMethod = arrayList::add; + entriesOfLedger.forEachRemaining(addMethod); + assertEquals("Number of entries", numWrites, arrayList.size()); + assertTrue("Entries of Ledger", IntStream.range(0, arrayList.size()).allMatch(i -> { + return arrayList.get(i) == (i * entriesPerWrite); + })); + } + + long nonExistingLedger = 456789L; + OfLong entriesOfLedger = interleavedStorage.getListOfEntriesOfLedger(nonExistingLedger); + assertFalse("There shouldn't be any entry", entriesOfLedger.hasNext()); + } + + @Test + public void testGetListOfEntriesOfLedgerAfterFlush() throws IOException { + interleavedStorage.flush(); + + // Insert some more ledger & entries in the interleaved storage + for (long entryId = numWrites; entryId < moreNumOfWrites; entryId++) { + for (long ledgerId = 0; ledgerId < numOfLedgers; ledgerId++) { + ByteBuf entry = Unpooled.buffer(128); + entry.writeLong(ledgerId); + entry.writeLong(entryId * entriesPerWrite); + entry.writeBytes(("entry-" + entryId).getBytes()); + + interleavedStorage.addEntry(entry); + } + } + + for (long ledgerId = 0; ledgerId < numOfLedgers; ledgerId++) { + OfLong entriesOfLedger = interleavedStorage.getListOfEntriesOfLedger(ledgerId); + ArrayList arrayList = new ArrayList(); + Consumer addMethod = arrayList::add; + entriesOfLedger.forEachRemaining(addMethod); + assertEquals("Number of entries", moreNumOfWrites, arrayList.size()); + assertTrue("Entries of Ledger", IntStream.range(0, arrayList.size()).allMatch(i -> { + return arrayList.get(i) == (i * entriesPerWrite); + })); + } + } + + @Test + public void testConsistencyCheckConcurrentGC() throws Exception { + final long signalDone = -1; + final List asyncErrors = new ArrayList<>(); + final LinkedBlockingQueue toCompact = new LinkedBlockingQueue<>(); + final Semaphore awaitingCompaction = new Semaphore(0); + + interleavedStorage.flush(); + final long lastLogId = entryLogger.getLeastUnflushedLogId(); + + final MutableInt counter = new MutableInt(0); + entryLogger.setCheckEntryTestPoint((ledgerId, entryId, entryLogId, pos) -> { + if (entryLogId < lastLogId) { + if (counter.intValue() % 100 == 0) { + try { + toCompact.put(entryLogId); + awaitingCompaction.acquire(); + } catch (InterruptedException e) { + asyncErrors.add(e); + } + } + counter.increment(); + } + }); + + Thread mutator = new Thread(() -> { + EntryLogCompactor compactor = new EntryLogCompactor( + conf, + entryLogger, + interleavedStorage, + entryLogger::removeEntryLog); + while (true) { + Long next = null; + try { + next = toCompact.take(); + if (next == null || next == signalDone) { + break; + } + compactor.compact(entryLogger.getEntryLogMetadata(next)); + } catch (BufferedChannelBase.BufferedChannelClosedException e) { + // next was already removed, ignore + } catch (Exception e) { + asyncErrors.add(e); + break; + } finally { + if (next != null) { + awaitingCompaction.release(); + } + } + } + }); + mutator.start(); + + List inconsistencies = interleavedStorage.localConsistencyCheck( + Optional.empty()); + for (LedgerStorage.DetectedInconsistency e: inconsistencies) { + LOG.error("Found: {}", e); + } + Assert.assertEquals(0, inconsistencies.size()); + + toCompact.offer(signalDone); + mutator.join(); + for (Exception e: asyncErrors) { + throw e; + } + + if (!conf.isEntryLogPerLedgerEnabled()) { + Assert.assertNotEquals( + 0, + statsProvider.getCounter(BOOKIE_SCOPE + "." + STORAGE_SCRUB_PAGE_RETRIES).get().longValue()); + } + } + + @Test + public void testConsistencyMissingEntry() throws Exception { + // set 1, 1 to nonsense + interleavedStorage.ledgerCache.putEntryOffset(1, 1, 0xFFFFFFFFFFFFFFFFL); + + List errors = interleavedStorage.localConsistencyCheck(Optional.empty()); + Assert.assertEquals(1, errors.size()); + LedgerStorage.DetectedInconsistency inconsistency = errors.remove(0); + Assert.assertEquals(1, inconsistency.getEntryId()); + Assert.assertEquals(1, inconsistency.getLedgerId()); + } + + @Test + public void testWrongEntry() throws Exception { + // set 1, 1 to nonsense + interleavedStorage.ledgerCache.putEntryOffset( + 1, + 1, + interleavedStorage.ledgerCache.getEntryOffset(0, 0)); + + List errors = interleavedStorage.localConsistencyCheck(Optional.empty()); + Assert.assertEquals(1, errors.size()); + LedgerStorage.DetectedInconsistency inconsistency = errors.remove(0); + Assert.assertEquals(1, inconsistency.getEntryId()); + Assert.assertEquals(1, inconsistency.getLedgerId()); + } + + @Test + public void testShellCommands() throws Exception { + interleavedStorage.flush(); + interleavedStorage.shutdown(); + final Pattern entryPattern = Pattern.compile( + "entry (?\\d+)\t:\t((?N/A)|\\(log:(?\\d+), pos: (?\\d+)\\))"); + + class Metadata { + final Pattern keyPattern = Pattern.compile("master key +: ([0-9a-f])"); + final Pattern sizePattern = Pattern.compile("size +: (\\d+)"); + final Pattern entriesPattern = Pattern.compile("entries +: (\\d+)"); + final Pattern isFencedPattern = Pattern.compile("isFenced +: (\\w+)"); + + public String masterKey; + public long size = -1; + public long entries = -1; + public boolean foundFenced = false; + + void check(String s) { + Matcher keyMatcher = keyPattern.matcher(s); + if (keyMatcher.matches()) { + masterKey = keyMatcher.group(1); + return; + } + + Matcher sizeMatcher = sizePattern.matcher(s); + if (sizeMatcher.matches()) { + size = Long.parseLong(sizeMatcher.group(1)); + return; + } + + Matcher entriesMatcher = entriesPattern.matcher(s); + if (entriesMatcher.matches()) { + entries = Long.parseLong(entriesMatcher.group(1)); + return; + } + + Matcher isFencedMatcher = isFencedPattern.matcher(s); + if (isFencedMatcher.matches()) { + Assert.assertEquals("true", isFencedMatcher.group(1)); + foundFenced = true; + return; + } + } + + void validate(long foundEntries) { + Assert.assertTrue(entries >= numWrites * entriesPerWrite); + Assert.assertEquals(entries, foundEntries); + Assert.assertTrue(foundFenced); + Assert.assertNotEquals(-1, size); + } + } + final Metadata foundMetadata = new Metadata(); + + AtomicLong curEntry = new AtomicLong(0); + AtomicLong someEntryLogger = new AtomicLong(-1); + BookieShell shell = new BookieShell( + LedgerIdFormatter.LONG_LEDGERID_FORMATTER, EntryFormatter.STRING_FORMATTER) { + @Override + void printInfoLine(String s) { + Matcher matcher = entryPattern.matcher(s); + System.out.println(s); + if (matcher.matches()) { + assertEquals(Long.toString(curEntry.get()), matcher.group("entry")); + + if (matcher.group("na") == null) { + String logId = matcher.group("logid"); + Assert.assertNotEquals(matcher.group("logid"), null); + Assert.assertNotEquals(matcher.group("pos"), null); + Assert.assertTrue((curEntry.get() % entriesPerWrite) == 0); + Assert.assertTrue(curEntry.get() <= numWrites * entriesPerWrite); + if (someEntryLogger.get() == -1) { + someEntryLogger.set(Long.parseLong(logId)); + } + } else { + Assert.assertEquals(matcher.group("logid"), null); + Assert.assertEquals(matcher.group("pos"), null); + Assert.assertTrue(((curEntry.get() % entriesPerWrite) != 0) + || ((curEntry.get() >= (entriesPerWrite * numWrites)))); + } + curEntry.incrementAndGet(); + } else { + foundMetadata.check(s); + } + } + }; + shell.setConf(conf); + int res = shell.run(new String[] { "ledger", "-m", "0" }); + Assert.assertEquals(0, res); + Assert.assertTrue(curEntry.get() >= numWrites * entriesPerWrite); + foundMetadata.validate(curEntry.get()); + + // Should pass consistency checker + res = shell.run(new String[] { "localconsistencycheck" }); + Assert.assertEquals(0, res); + + + // Remove a logger + DefaultEntryLogger entryLogger = new DefaultEntryLogger(conf); + entryLogger.removeEntryLog(someEntryLogger.get()); + + // Should fail consistency checker + res = shell.run(new String[] { "localconsistencycheck" }); + Assert.assertEquals(1, res); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/LedgerCacheTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/LedgerCacheTest.java index 1a01299a6cd..d854e3b837b 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/LedgerCacheTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/LedgerCacheTest.java @@ -28,24 +28,24 @@ import static org.junit.Assert.fail; import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; import io.netty.buffer.Unpooled; - import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; - import org.apache.bookkeeper.bookie.Bookie.NoLedgerException; import org.apache.bookkeeper.bookie.CheckpointSource.Checkpoint; import org.apache.bookkeeper.bookie.FileInfoBackingCache.CachedFileInfo; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.conf.TestBKConfiguration; import org.apache.bookkeeper.meta.LedgerManager; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks; import org.apache.bookkeeper.stats.StatsLogger; import org.apache.bookkeeper.util.BookKeeperConstants; @@ -73,7 +73,7 @@ public class LedgerCacheTest { private final List tempDirs = new ArrayList(); - private Bookie bookie; + private BookieImpl bookie; @Before public void setUp() throws Exception { @@ -86,10 +86,10 @@ public void setUp() throws Exception { conf.setMetadataServiceUri(null); conf.setJournalDirName(txnDir.getPath()); conf.setLedgerDirNames(new String[] { ledgerDir.getPath() }); - bookie = new Bookie(conf); + bookie = new TestBookieImpl(conf); activeLedgers = new SnapshotMap(); - ledgerCache = ((InterleavedLedgerStorage) bookie.ledgerStorage.getUnderlyingLedgerStorage()).ledgerCache; + ledgerCache = ((InterleavedLedgerStorage) bookie.getLedgerStorage().getUnderlyingLedgerStorage()).ledgerCache; } @After @@ -98,7 +98,7 @@ public void tearDown() throws Exception { flushThread.interrupt(); flushThread.join(); } - bookie.ledgerStorage.shutdown(); + bookie.getLedgerStorage().shutdown(); FileUtils.deleteDirectory(txnDir); FileUtils.deleteDirectory(ledgerDir); for (File dir : tempDirs) { @@ -116,8 +116,8 @@ private void newLedgerCache() throws IOException { if (ledgerCache != null) { ledgerCache.close(); } - ledgerCache = ((InterleavedLedgerStorage) bookie.ledgerStorage.getUnderlyingLedgerStorage()) - .ledgerCache = new LedgerCacheImpl(conf, activeLedgers, bookie.getIndexDirsManager()); + ledgerCache = ((InterleavedLedgerStorage) bookie.getLedgerStorage().getUnderlyingLedgerStorage()) + .ledgerCache = new LedgerCacheImpl(conf, activeLedgers, bookie.getIndexDirsManager()); flushThread = new Thread() { public void run() { while (true) { @@ -225,7 +225,7 @@ public void testPageEviction() throws Exception { // create ledger cache newLedgerCache(); try { - // create serveral ledgers + // create several ledgers for (int i = 1; i <= numLedgers; i++) { ledgerCache.setMasterKey((long) i, masterKey); ledgerCache.putEntryOffset(i, 0, i * 8); @@ -237,7 +237,7 @@ public void testPageEviction() throws Exception { // flush all ledgerCache.flushLedger(true); - // delete serveral ledgers + // delete several ledgers for (int i = 1; i <= numLedgers / 2; i++) { ledgerCache.deleteLedger(i); } @@ -274,14 +274,14 @@ public void testLedgerCacheFlushFailureOnDiskFull() throws Exception { ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); conf.setLedgerDirNames(new String[] { ledgerDir1.getAbsolutePath(), ledgerDir2.getAbsolutePath() }); - Bookie bookie = new Bookie(conf); + BookieImpl bookie = new TestBookieImpl(conf); InterleavedLedgerStorage ledgerStorage = - ((InterleavedLedgerStorage) bookie.ledgerStorage.getUnderlyingLedgerStorage()); + ((InterleavedLedgerStorage) bookie.getLedgerStorage().getUnderlyingLedgerStorage()); LedgerCacheImpl ledgerCache = (LedgerCacheImpl) ledgerStorage.ledgerCache; // Create ledger index file ledgerStorage.setMasterKey(1, "key".getBytes()); - CachedFileInfo fileInfo = ledgerCache.getIndexPersistenceManager().getFileInfo(Long.valueOf(1), null); + CachedFileInfo fileInfo = ledgerCache.getIndexPersistenceManager().getFileInfo(1L, null); // Add entries ledgerStorage.addEntry(generateEntry(1, 1)); @@ -314,10 +314,10 @@ public void testLedgerCacheFlushFailureOnDiskFull() throws Exception { public void testIndexPageEvictionWriteOrder() throws Exception { final int numLedgers = 10; File journalDir = createTempDir("bookie", "journal"); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(journalDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(journalDir)); File ledgerDir = createTempDir("bookie", "ledger"); - Bookie.checkDirectoryStructure(Bookie.getCurrentDirectory(ledgerDir)); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(ledgerDir)); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); conf.setMetadataServiceUri(null); @@ -327,11 +327,11 @@ public void testIndexPageEvictionWriteOrder() throws Exception { .setPageLimit(1) .setLedgerStorageClass(InterleavedLedgerStorage.class.getName()); - Bookie b = new Bookie(conf); + Bookie b = new TestBookieImpl(conf); b.start(); for (int i = 1; i <= numLedgers; i++) { ByteBuf packet = generateEntry(i, 1); - b.addEntry(packet, false, new Bookie.NopWriteCallback(), null, "passwd".getBytes()); + b.addEntry(packet, false, new BookieImpl.NopWriteCallback(), null, "passwd".getBytes()); } conf = TestBKConfiguration.newServerConfiguration(); @@ -339,7 +339,7 @@ public void testIndexPageEvictionWriteOrder() throws Exception { conf.setJournalDirName(journalDir.getPath()) .setLedgerDirNames(new String[] { ledgerDir.getPath() }); - b = new Bookie(conf); + b = new TestBookieImpl(conf); for (int i = 1; i <= numLedgers; i++) { try { b.readEntry(i, 1); @@ -351,8 +351,13 @@ public void testIndexPageEvictionWriteOrder() throws Exception { // this is fine, means the ledger was written to the index cache, but not // the entry log } catch (IOException ioe) { - LOG.info("Shouldn't have received IOException", ioe); - fail("Shouldn't throw IOException, should say that entry is not found"); + if (ioe.getCause() instanceof DefaultEntryLogger.EntryLookupException) { + // this is fine, means the ledger was not fully written to + // the entry log + } else { + LOG.info("Shouldn't have received IOException for entry {}", i, ioe); + fail("Shouldn't throw IOException, should say that entry is not found"); + } } } } @@ -378,25 +383,39 @@ public void testSyncThreadNPE() throws IOException { } } + /** - * Race where a flush would fail because a garbage collection occurred at - * the wrong time. - * {@link https://issues.apache.org/jira/browse/BOOKKEEPER-604} + * Test for race between putEntryOffset and flush. + * {@link https://github.com/apache/bookkeeper/issues/1919} */ @Test - public void testFlushDeleteRace() throws Exception { + public void testPutEntryOffsetDeleteRace() throws Exception { newLedgerCache(); final AtomicInteger rc = new AtomicInteger(0); - final LinkedBlockingQueue ledgerQ = new LinkedBlockingQueue(1); + final LinkedBlockingQueue putQ = new LinkedBlockingQueue<>(100); + final LinkedBlockingQueue deleteQ = new LinkedBlockingQueue<>(100); final byte[] masterKey = "masterKey".getBytes(); + final long numLedgers = 1000; + final int numPutters = 10; + final int numDeleters = 10; + final AtomicBoolean running = new AtomicBoolean(true); Thread newLedgerThread = new Thread() { public void run() { try { - for (int i = 0; i < 1000 && rc.get() == 0; i++) { + for (long i = 0; i < numLedgers && rc.get() == 0; i++) { ledgerCache.setMasterKey(i, masterKey); - ledgerQ.put((long) i); + + ledgerCache.putEntryOffset(i, 1, 0); + deleteQ.put(i); + putQ.put(i); + } + for (int i = 0; i < numPutters; ++i) { + putQ.put(-1L); + } + for (int i = 0; i < numDeleters; ++i) { + deleteQ.put(-1L); } - } catch (Exception e) { + } catch (Throwable e) { rc.set(-1); LOG.error("Exception in new ledger thread", e); } @@ -404,51 +423,190 @@ public void run() { }; newLedgerThread.start(); - Thread flushThread = new Thread() { + Thread[] flushThreads = new Thread[numPutters]; + for (int i = 0; i < numPutters; ++i) { + Thread flushThread = new Thread() { public void run() { try { while (true) { - Long id = ledgerQ.peek(); - if (id == null) { - continue; + long id = putQ.take(); + if (id == -1L) { + break; } - LOG.info("Put entry for {}", id); + LOG.info("Putting {}", id); + try { + ledgerCache.putEntryOffset(id, 2, 0); + ledgerCache.deleteLedger(id); + } catch (NoLedgerException e) { + // No problem + } + } + } catch (Throwable e) { + rc.set(-1); + LOG.error("Exception in put thread", e); + } + } + }; + flushThread.start(); + flushThreads[i] = flushThread; + } + + Thread[] deleteThreads = new Thread[numDeleters]; + for (int i = 0; i < numDeleters; ++i) { + Thread deleteThread = new Thread() { + public void run() { + try { + while (true) { + long id = deleteQ.take(); + if (id == -1L) { + break; + } + LOG.info("Deleting {}", id); try { - ledgerCache.putEntryOffset((long) id, 1, 0); - } catch (Bookie.NoLedgerException nle) { - //ignore + ledgerCache.deleteLedger(id); + } catch (NoLedgerException e) { + // No problem } + } + } catch (Throwable e) { + rc.set(-1); + LOG.error("Exception in delete thread", e); + } + } + }; + deleteThread.start(); + deleteThreads[i] = deleteThread; + } + + newLedgerThread.join(); + + for (Thread deleteThread : deleteThreads) { + deleteThread.join(); + } + + running.set(false); + for (Thread flushThread : flushThreads) { + flushThread.join(); + } + + assertEquals("Should have been no errors", rc.get(), 0); + for (long i = 0L; i < numLedgers; ++i) { + boolean gotError = false; + try { + LOG.error("Checking {}", i); + ledgerCache.getEntryOffset(i, 0); + } catch (NoLedgerException e) { + gotError = true; + } + if (!gotError) { + LOG.error("Ledger {} is still around", i); + fail("Found ledger " + i + ", which should have been removed"); + } + } + } + + /** + * Test for race between delete and flush. + * {@link https://issues.apache.org/jira/browse/BOOKKEEPER-604} + * {@link https://github.com/apache/bookkeeper/issues/1757} + */ + @Test + public void testFlushDeleteRace() throws Exception { + newLedgerCache(); + final AtomicInteger rc = new AtomicInteger(0); + final LinkedBlockingQueue ledgerQ = new LinkedBlockingQueue<>(100); + final byte[] masterKey = "masterKey".getBytes(); + final long numLedgers = 1000; + final int numFlushers = 10; + final int numDeleters = 10; + final AtomicBoolean running = new AtomicBoolean(true); + Thread newLedgerThread = new Thread() { + public void run() { + try { + for (long i = 0; i < numLedgers && rc.get() == 0; i++) { + ledgerCache.setMasterKey(i, masterKey); + + ledgerCache.putEntryOffset(i, 1, 0); + ledgerQ.put(i); + } + for (int i = 0; i < numDeleters; ++i) { + ledgerQ.put(-1L); + } + } catch (Throwable e) { + rc.set(-1); + LOG.error("Exception in new ledger thread", e); + } + } + }; + newLedgerThread.start(); + + Thread[] flushThreads = new Thread[numFlushers]; + for (int i = 0; i < numFlushers; ++i) { + Thread flushThread = new Thread() { + public void run() { + try { + while (running.get()) { ledgerCache.flushLedger(true); } - } catch (Exception e) { + } catch (Throwable e) { rc.set(-1); LOG.error("Exception in flush thread", e); } + LOG.error("Shutting down flush thread"); } }; - flushThread.start(); + flushThread.start(); + flushThreads[i] = flushThread; + } - Thread deleteThread = new Thread() { + Thread[] deleteThreads = new Thread[numDeleters]; + for (int i = 0; i < numDeleters; ++i) { + Thread deleteThread = new Thread() { public void run() { try { while (true) { long id = ledgerQ.take(); + if (id == -1L) { + break; + } LOG.info("Deleting {}", id); ledgerCache.deleteLedger(id); } - } catch (Exception e) { + } catch (Throwable e) { rc.set(-1); LOG.error("Exception in delete thread", e); } } }; - deleteThread.start(); + deleteThread.start(); + deleteThreads[i] = deleteThread; + } newLedgerThread.join(); - assertEquals("Should have been no errors", rc.get(), 0); - deleteThread.interrupt(); - flushThread.interrupt(); + for (Thread deleteThread : deleteThreads) { + deleteThread.join(); + } + + running.set(false); + for (Thread flushThread : flushThreads) { + flushThread.join(); + } + + assertEquals("Should have been no errors", rc.get(), 0); + for (long i = 0L; i < numLedgers; ++i) { + boolean gotError = false; + try { + LOG.error("Checking {}", i); + ledgerCache.getEntryOffset(i, 0); + } catch (NoLedgerException e) { + gotError = true; + } + if (!gotError) { + LOG.error("Ledger {} is still around", i); + fail("Found ledger " + i + ", which should have been removed"); + } + } } // Mock SortedLedgerStorage to simulate flush failure (Dependency Fault Injection) @@ -458,6 +616,8 @@ static class FlushTestSortedLedgerStorage extends SortedLedgerStorage { final AtomicLong injectFlushExceptionForLedger; final AtomicInteger numOfTimesFlushSnapshotCalled = new AtomicInteger(0); static final long FORALLLEDGERS = -1; + ServerConfiguration conf; + StatsLogger statsLogger; public FlushTestSortedLedgerStorage() { super(); @@ -488,19 +648,22 @@ public void initialize(ServerConfiguration conf, LedgerManager ledgerManager, LedgerDirsManager ledgerDirsManager, LedgerDirsManager indexDirsManager, - StateManager stateManager, - CheckpointSource checkpointSource, - Checkpointer checkpointer, - StatsLogger statsLogger) throws IOException { + StatsLogger statsLogger, + ByteBufAllocator allocator) throws IOException { super.initialize( conf, ledgerManager, ledgerDirsManager, indexDirsManager, - stateManager, - checkpointSource, - checkpointer, - statsLogger); + statsLogger, + allocator); + this.conf = conf; + this.statsLogger = statsLogger; + } + + @Override + public void setCheckpointSource(CheckpointSource checkpointSource) { + super.setCheckpointSource(checkpointSource); if (this.memTable instanceof EntryMemTableWithParallelFlusher) { this.memTable = new EntryMemTableWithParallelFlusher(conf, checkpointSource, statsLogger) { @Override @@ -558,8 +721,8 @@ public void onSizeLimitReached(final CheckpointSource.Checkpoint cp) throws IOEx @Test public void testEntryMemTableFlushFailure() throws Exception { File tmpDir = createTempDir("bkTest", ".dir"); - File curDir = Bookie.getCurrentDirectory(tmpDir); - Bookie.checkDirectoryStructure(curDir); + File curDir = BookieImpl.getCurrentDirectory(tmpDir); + BookieImpl.checkDirectoryStructure(curDir); int gcWaitTime = 1000; ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); @@ -567,14 +730,15 @@ public void testEntryMemTableFlushFailure() throws Exception { conf.setLedgerDirNames(new String[] { tmpDir.toString() }); conf.setLedgerStorageClass(FlushTestSortedLedgerStorage.class.getName()); - Bookie bookie = new Bookie(conf); - FlushTestSortedLedgerStorage flushTestSortedLedgerStorage = (FlushTestSortedLedgerStorage) bookie.ledgerStorage; + Bookie bookie = new TestBookieImpl(conf); + FlushTestSortedLedgerStorage flushTestSortedLedgerStorage = + (FlushTestSortedLedgerStorage) bookie.getLedgerStorage(); EntryMemTable memTable = flushTestSortedLedgerStorage.memTable; // this bookie.addEntry call is required. FileInfo for Ledger 1 would be created with this call. // without the fileinfo, 'flushTestSortedLedgerStorage.addEntry' calls will fail // because of BOOKKEEPER-965 change. - bookie.addEntry(generateEntry(1, 1), false, new Bookie.NopWriteCallback(), null, "passwd".getBytes()); + bookie.addEntry(generateEntry(1, 1), false, new BookieImpl.NopWriteCallback(), null, "passwd".getBytes()); flushTestSortedLedgerStorage.addEntry(generateEntry(1, 2)); assertFalse("Bookie is expected to be in ReadWrite mode", bookie.isReadOnly()); @@ -603,8 +767,8 @@ public void testEntryMemTableFlushFailure() throws Exception { public void testSortedLedgerFlushFailure() throws Exception { // most of the code is same to the testEntryMemTableFlushFailure File tmpDir = createTempDir("bkTest", ".dir"); - File curDir = Bookie.getCurrentDirectory(tmpDir); - Bookie.checkDirectoryStructure(curDir); + File curDir = BookieImpl.getCurrentDirectory(tmpDir); + BookieImpl.checkDirectoryStructure(curDir); int gcWaitTime = 1000; ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); @@ -613,12 +777,13 @@ public void testSortedLedgerFlushFailure() throws Exception { .setJournalDirName(tmpDir.toString()) .setLedgerStorageClass(FlushTestSortedLedgerStorage.class.getName()); - Bookie bookie = new Bookie(conf); + Bookie bookie = new TestBookieImpl(conf); bookie.start(); - FlushTestSortedLedgerStorage flushTestSortedLedgerStorage = (FlushTestSortedLedgerStorage) bookie.ledgerStorage; + FlushTestSortedLedgerStorage flushTestSortedLedgerStorage = (FlushTestSortedLedgerStorage) bookie. + getLedgerStorage(); EntryMemTable memTable = flushTestSortedLedgerStorage.memTable; - bookie.addEntry(generateEntry(1, 1), false, new Bookie.NopWriteCallback(), null, "passwd".getBytes()); + bookie.addEntry(generateEntry(1, 1), false, new BookieImpl.NopWriteCallback(), null, "passwd".getBytes()); flushTestSortedLedgerStorage.addEntry(generateEntry(1, 2)); assertFalse("Bookie is expected to be in ReadWrite mode", bookie.isReadOnly()); assertTrue("EntryMemTable SnapShot is expected to be empty", memTable.snapshot.isEmpty()); @@ -633,13 +798,17 @@ public void testSortedLedgerFlushFailure() throws Exception { // after flush failure, the bookie is set to readOnly assertTrue("Bookie is expected to be in Read mode", bookie.isReadOnly()); // write fail + CountDownLatch latch = new CountDownLatch(1); bookie.addEntry(generateEntry(1, 3), false, new BookkeeperInternalCallbacks.WriteCallback(){ - public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddress addr, Object ctx){ - LOG.info("fail write to bk"); - assertTrue(rc != OK); - }; + public void writeComplete(int rc, long ledgerId, long entryId, BookieId addr, Object ctx){ + LOG.info("Write to bk succeed due to the bookie readOnly mode check is in the request parse step. " + + "In the addEntry step, we won't check bookie's mode"); + assertEquals(OK, rc); + latch.countDown(); + } }, null, "passwd".getBytes()); + latch.await(); bookie.shutdown(); } @@ -663,8 +832,9 @@ public void testEntryMemTableParallelFlush() throws Exception { // enable entrylog per ledger conf.setEntryLogPerLedgerEnabled(true); - Bookie bookie = new Bookie(conf); - FlushTestSortedLedgerStorage flushTestSortedLedgerStorage = (FlushTestSortedLedgerStorage) bookie.ledgerStorage; + Bookie bookie = new TestBookieImpl(conf); + FlushTestSortedLedgerStorage flushTestSortedLedgerStorage = + (FlushTestSortedLedgerStorage) bookie.getLedgerStorage(); EntryMemTable memTable = flushTestSortedLedgerStorage.memTable; /* @@ -673,9 +843,9 @@ public void testEntryMemTableParallelFlush() throws Exception { * 'flushTestSortedLedgerStorage.addEntry' calls will fail because of * BOOKKEEPER-965 change. */ - bookie.addEntry(generateEntry(1, 1), false, new Bookie.NopWriteCallback(), null, "passwd".getBytes()); - bookie.addEntry(generateEntry(2, 1), false, new Bookie.NopWriteCallback(), null, "passwd".getBytes()); - bookie.addEntry(generateEntry(3, 1), false, new Bookie.NopWriteCallback(), null, "passwd".getBytes()); + bookie.addEntry(generateEntry(1, 1), false, new BookieImpl.NopWriteCallback(), null, "passwd".getBytes()); + bookie.addEntry(generateEntry(2, 1), false, new BookieImpl.NopWriteCallback(), null, "passwd".getBytes()); + bookie.addEntry(generateEntry(3, 1), false, new BookieImpl.NopWriteCallback(), null, "passwd".getBytes()); flushTestSortedLedgerStorage.addEntry(generateEntry(1, 2)); flushTestSortedLedgerStorage.addEntry(generateEntry(2, 2)); @@ -703,8 +873,9 @@ public void testEntryMemTableParallelFlushWithFlushException() throws Exception // enable entrylog per ledger conf.setEntryLogPerLedgerEnabled(true); - Bookie bookie = new Bookie(conf); - FlushTestSortedLedgerStorage flushTestSortedLedgerStorage = (FlushTestSortedLedgerStorage) bookie.ledgerStorage; + Bookie bookie = new TestBookieImpl(conf); + FlushTestSortedLedgerStorage flushTestSortedLedgerStorage = + (FlushTestSortedLedgerStorage) bookie.getLedgerStorage(); EntryMemTable memTable = flushTestSortedLedgerStorage.memTable; /* @@ -713,9 +884,9 @@ public void testEntryMemTableParallelFlushWithFlushException() throws Exception * 'flushTestSortedLedgerStorage.addEntry' calls will fail because of * BOOKKEEPER-965 change. */ - bookie.addEntry(generateEntry(1, 1), false, new Bookie.NopWriteCallback(), null, "passwd".getBytes()); - bookie.addEntry(generateEntry(2, 1), false, new Bookie.NopWriteCallback(), null, "passwd".getBytes()); - bookie.addEntry(generateEntry(3, 1), false, new Bookie.NopWriteCallback(), null, "passwd".getBytes()); + bookie.addEntry(generateEntry(1, 1), false, new BookieImpl.NopWriteCallback(), null, "passwd".getBytes()); + bookie.addEntry(generateEntry(2, 1), false, new BookieImpl.NopWriteCallback(), null, "passwd".getBytes()); + bookie.addEntry(generateEntry(3, 1), false, new BookieImpl.NopWriteCallback(), null, "passwd".getBytes()); flushTestSortedLedgerStorage.addEntry(generateEntry(1, 4)); flushTestSortedLedgerStorage.addEntry(generateEntry(2, 4)); @@ -746,8 +917,8 @@ String[] createAndGetLedgerDirs(int numOfLedgerDirs) throws IOException { String[] ledgerDirsPath = new String[numOfLedgerDirs]; for (int i = 0; i < numOfLedgerDirs; i++) { ledgerDir = createTempDir("bkTest", ".dir"); - curDir = Bookie.getCurrentDirectory(ledgerDir); - Bookie.checkDirectoryStructure(curDir); + curDir = BookieImpl.getCurrentDirectory(ledgerDir); + BookieImpl.checkDirectoryStructure(curDir); ledgerDirsPath[i] = ledgerDir.getAbsolutePath(); } return ledgerDirsPath; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/LedgerDirsManagerTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/LedgerDirsManagerTest.java new file mode 100644 index 00000000000..d6e01cec457 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/LedgerDirsManagerTest.java @@ -0,0 +1,662 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie; + +import static org.hamcrest.Matchers.equalTo; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockStatic; + +import java.io.File; +import java.io.IOException; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import org.apache.bookkeeper.bookie.LedgerDirsManager.LedgerDirsListener; +import org.apache.bookkeeper.bookie.LedgerDirsManager.NoWritableLedgerDirException; +import org.apache.bookkeeper.common.testing.executors.MockExecutorController; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.stats.Gauge; +import org.apache.bookkeeper.test.TestStatsProvider; +import org.apache.bookkeeper.util.DiskChecker; +import org.apache.bookkeeper.util.IOUtils; +import org.apache.commons.io.FileUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.MockedStatic; +import org.mockito.junit.MockitoJUnitRunner; + +/** + * Test LedgerDirsManager. + */ +@RunWith(MockitoJUnitRunner.class) +public class LedgerDirsManagerTest { + + ServerConfiguration conf; + File curDir; + LedgerDirsManager dirsManager; + LedgerDirsMonitor ledgerMonitor; + MockDiskChecker mockDiskChecker; + private TestStatsProvider statsProvider; + private TestStatsProvider.TestStatsLogger statsLogger; + int diskCheckInterval = 1000; + float threshold = 0.5f; + float warnThreshold = 0.5f; + + final List tempDirs = new ArrayList(); + + // Thread used by monitor + ScheduledExecutorService executor; + MockExecutorController executorController; + MockedStatic executorsMockedStatic; + + File createTempDir(String prefix, String suffix) throws IOException { + File dir = IOUtils.createTempDir(prefix, suffix); + tempDirs.add(dir); + return dir; + } + + @Before + public void setUp() throws Exception { + executorsMockedStatic = mockStatic(Executors.class); + + File tmpDir = createTempDir("bkTest", ".dir"); + curDir = BookieImpl.getCurrentDirectory(tmpDir); + BookieImpl.checkDirectoryStructure(curDir); + + conf = TestBKConfiguration.newServerConfiguration(); + conf.setLedgerDirNames(new String[] { tmpDir.toString() }); + conf.setDiskLowWaterMarkUsageThreshold(conf.getDiskUsageThreshold()); + conf.setDiskCheckInterval(diskCheckInterval); + conf.setIsForceGCAllowWhenNoSpace(true); + conf.setMinUsableSizeForEntryLogCreation(Long.MIN_VALUE); + + executor = mock(ScheduledExecutorService.class); + executorController = new MockExecutorController() + .controlScheduleAtFixedRate(executor, 10); + executorsMockedStatic.when(()->Executors.newSingleThreadScheduledExecutor(any())).thenReturn(executor); + + mockDiskChecker = new MockDiskChecker(threshold, warnThreshold); + statsProvider = new TestStatsProvider(); + statsLogger = statsProvider.getStatsLogger("test"); + dirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()), statsLogger); + ledgerMonitor = new LedgerDirsMonitor(conf, + mockDiskChecker, Collections.singletonList(dirsManager)); + ledgerMonitor.init(); + } + + @After + public void tearDown() throws Exception { + executorsMockedStatic.close(); + ledgerMonitor.shutdown(); + for (File dir : tempDirs) { + FileUtils.deleteDirectory(dir); + } + tempDirs.clear(); + } + + @Test + public void testGetWritableDir() throws Exception { + try { + List writeDirs = dirsManager.getWritableLedgerDirs(); + assertTrue("Must have a writable ledgerDir", writeDirs.size() > 0); + } catch (NoWritableLedgerDirException nwlde) { + fail("We should have a writable ledgerDir"); + } + } + + @Test + public void testPickWritableDirExclusive() throws Exception { + try { + dirsManager.pickRandomWritableDir(curDir); + fail("Should not reach here due to there is no writable ledger dir."); + } catch (NoWritableLedgerDirException nwlde) { + // expected to fail with no writable ledger dir + assertTrue(true); + } + } + + @Test + public void testNoWritableDir() throws Exception { + try { + dirsManager.addToFilledDirs(curDir); + dirsManager.pickRandomWritableDir(); + fail("Should not reach here due to there is no writable ledger dir."); + } catch (NoWritableLedgerDirException nwlde) { + // expected to fail with no writable ledger dir + assertEquals("Should got NoWritableLedgerDirException w/ 'All ledger directories are non writable'.", + "All ledger directories are non writable", nwlde.getMessage()); + } + } + + @Test + public void testGetWritableDirForLog() throws Exception { + List writeDirs; + try { + dirsManager.addToFilledDirs(curDir); + dirsManager.getWritableLedgerDirs(); + fail("Should not reach here due to there is no writable ledger dir."); + } catch (NoWritableLedgerDirException nwlde) { + // expected to fail with no writable ledger dir + // Now make sure we can get one for log + try { + writeDirs = dirsManager.getWritableLedgerDirsForNewLog(); + assertTrue("Must have a writable ledgerDir", writeDirs.size() > 0); + } catch (NoWritableLedgerDirException e) { + fail("We should have a writeble ledgerDir"); + } + } + } + + @Test + public void testGetWritableDirForLogNoEnoughDiskSpace() throws Exception { + conf.setMinUsableSizeForEntryLogCreation(curDir.getUsableSpace() + 1024); + dirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()), statsLogger); + try { + dirsManager.addToFilledDirs(curDir); + dirsManager.getWritableLedgerDirs(); + fail("Should not reach here due to there is no writable ledger dir."); + } catch (NoWritableLedgerDirException nwlde) { + // expected to fail with no writable ledger dir + // Now make sure we can get one for log + try { + dirsManager.getWritableLedgerDirsForNewLog(); + fail("Should not reach here due to there is no enough disk space left"); + } catch (NoWritableLedgerDirException e) { + // expected. + } + } + } + + @Test + public void testLedgerDirsMonitorDuringTransition() throws Exception { + testLedgerDirsMonitorDuringTransition(true); + } + + @Test + public void testHighPriorityWritesDisallowedDuringTransition() throws Exception { + testLedgerDirsMonitorDuringTransition(false); + } + + private void testLedgerDirsMonitorDuringTransition(boolean highPriorityWritesAllowed) throws Exception { + if (!highPriorityWritesAllowed) { + ledgerMonitor.shutdown(); + conf.setMinUsableSizeForHighPriorityWrites(curDir.getUsableSpace() + 1024); + dirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()), statsLogger); + ledgerMonitor = new LedgerDirsMonitor(conf, mockDiskChecker, Collections.singletonList(dirsManager)); + ledgerMonitor.init(); + } + + MockLedgerDirsListener mockLedgerDirsListener = new MockLedgerDirsListener(); + dirsManager.addLedgerDirsListener(mockLedgerDirsListener); + ledgerMonitor.start(); + + assertFalse(mockLedgerDirsListener.readOnly); + assertTrue(mockLedgerDirsListener.highPriorityWritesAllowed); + + mockDiskChecker.setUsage(threshold + 0.05f); + executorController.advance(Duration.ofMillis(diskCheckInterval)); + + assertTrue(mockLedgerDirsListener.readOnly); + assertEquals(highPriorityWritesAllowed, mockLedgerDirsListener.highPriorityWritesAllowed); + + mockDiskChecker.setUsage(threshold - 0.05f); + executorController.advance(Duration.ofMillis(diskCheckInterval)); + + assertFalse(mockLedgerDirsListener.readOnly); + assertTrue(mockLedgerDirsListener.highPriorityWritesAllowed); + } + + @Test + public void testIsReadOnlyModeOnAnyDiskFullEnabled() throws Exception { + testAnyLedgerFullTransitToReadOnly(true); + testAnyLedgerFullTransitToReadOnly(false); + } + + public void testAnyLedgerFullTransitToReadOnly(boolean isReadOnlyModeOnAnyDiskFullEnabled) throws Exception { + ledgerMonitor.shutdown(); + + final float nospace = 0.90f; + final float lwm = 0.80f; + HashMap usageMap; + + File tmpDir1 = createTempDir("bkTest", ".dir"); + File curDir1 = BookieImpl.getCurrentDirectory(tmpDir1); + BookieImpl.checkDirectoryStructure(curDir1); + + File tmpDir2 = createTempDir("bkTest", ".dir"); + File curDir2 = BookieImpl.getCurrentDirectory(tmpDir2); + BookieImpl.checkDirectoryStructure(curDir2); + + conf.setDiskUsageThreshold(nospace); + conf.setDiskLowWaterMarkUsageThreshold(lwm); + conf.setDiskUsageWarnThreshold(nospace); + conf.setReadOnlyModeOnAnyDiskFullEnabled(isReadOnlyModeOnAnyDiskFullEnabled); + conf.setLedgerDirNames(new String[] { tmpDir1.toString(), tmpDir2.toString() }); + + mockDiskChecker = new MockDiskChecker(nospace, warnThreshold); + dirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()), statsLogger); + ledgerMonitor = new LedgerDirsMonitor(conf, mockDiskChecker, Collections.singletonList(dirsManager)); + usageMap = new HashMap<>(); + usageMap.put(curDir1, 0.1f); + usageMap.put(curDir2, 0.1f); + mockDiskChecker.setUsageMap(usageMap); + ledgerMonitor.init(); + final MockLedgerDirsListener mockLedgerDirsListener = new MockLedgerDirsListener(); + dirsManager.addLedgerDirsListener(mockLedgerDirsListener); + ledgerMonitor.start(); + + Thread.sleep((diskCheckInterval * 2) + 100); + assertFalse(mockLedgerDirsListener.readOnly); + + if (isReadOnlyModeOnAnyDiskFullEnabled) { + setUsageAndThenVerify(curDir1, 0.1f, curDir2, nospace + 0.05f, mockDiskChecker, + mockLedgerDirsListener, true); + setUsageAndThenVerify(curDir1, nospace + 0.05f, curDir2, 0.1f, mockDiskChecker, + mockLedgerDirsListener, true); + setUsageAndThenVerify(curDir1, nospace + 0.05f, curDir2, nospace + 0.05f, mockDiskChecker, + mockLedgerDirsListener, true); + setUsageAndThenVerify(curDir1, nospace - 0.30f, curDir2, nospace + 0.05f, mockDiskChecker, + mockLedgerDirsListener, true); + setUsageAndThenVerify(curDir1, nospace - 0.20f, curDir2, nospace - 0.20f, mockDiskChecker, + mockLedgerDirsListener, false); + } else { + setUsageAndThenVerify(curDir1, 0.1f, curDir2, 0.1f, mockDiskChecker, + mockLedgerDirsListener, false); + setUsageAndThenVerify(curDir1, 0.1f, curDir2, nospace + 0.05f, mockDiskChecker, + mockLedgerDirsListener, false); + setUsageAndThenVerify(curDir1, nospace + 0.05f, curDir2, 0.1f, mockDiskChecker, + mockLedgerDirsListener, false); + setUsageAndThenVerify(curDir1, nospace + 0.05f, curDir2, nospace + 0.05f, mockDiskChecker, + mockLedgerDirsListener, true); + setUsageAndThenVerify(curDir1, nospace - 0.30f, curDir2, nospace + 0.05f, mockDiskChecker, + mockLedgerDirsListener, false); + setUsageAndThenVerify(curDir1, nospace - 0.20f, curDir2, nospace - 0.20f, mockDiskChecker, + mockLedgerDirsListener, false); + } + } + + @Test + public void testLedgerDirsMonitorHandlingLowWaterMark() throws Exception { + ledgerMonitor.shutdown(); + + final float warn = 0.90f; + final float nospace = 0.98f; + final float lwm = (warn + nospace) / 2; + final float lwm2warn = (warn + lwm) / 2; + final float lwm2nospace = (lwm + nospace) / 2; + final float nospaceExceeded = nospace + 0.005f; + + conf.setDiskUsageThreshold(nospace); + conf.setDiskLowWaterMarkUsageThreshold(lwm); + conf.setDiskUsageWarnThreshold(warn); + + mockDiskChecker = new MockDiskChecker(nospace, warnThreshold); + dirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); + ledgerMonitor = new LedgerDirsMonitor(conf, mockDiskChecker, Collections.singletonList(dirsManager)); + ledgerMonitor.init(); + final MockLedgerDirsListener mockLedgerDirsListener = new MockLedgerDirsListener(); + dirsManager.addLedgerDirsListener(mockLedgerDirsListener); + ledgerMonitor.start(); + + executorController.advance(Duration.ofMillis(diskCheckInterval)); + assertFalse(mockLedgerDirsListener.readOnly); + + // go above LWM but below threshold + // should still be writable + mockDiskChecker.setUsage(lwm2nospace); + executorController.advance(Duration.ofMillis(diskCheckInterval)); + assertFalse(mockLedgerDirsListener.readOnly); + + // exceed the threshold, should go to readonly + mockDiskChecker.setUsage(nospaceExceeded); + executorController.advance(Duration.ofMillis(diskCheckInterval)); + assertTrue(mockLedgerDirsListener.readOnly); + + // drop below threshold but above LWM + // should stay read-only + mockDiskChecker.setUsage(lwm2nospace); + executorController.advance(Duration.ofMillis(diskCheckInterval)); + assertTrue(mockLedgerDirsListener.readOnly); + + // drop below LWM + // should become writable + mockDiskChecker.setUsage(lwm2warn); + executorController.advance(Duration.ofMillis(diskCheckInterval)); + assertFalse(mockLedgerDirsListener.readOnly); + + // go above LWM but below threshold + // should still be writable + mockDiskChecker.setUsage(lwm2nospace); + executorController.advance(Duration.ofMillis(diskCheckInterval)); + assertFalse(mockLedgerDirsListener.readOnly); + } + + @Test + public void testLedgerDirsMonitorHandlingWithMultipleLedgerDirectories() throws Exception { + ledgerMonitor.shutdown(); + + final float nospace = 0.90f; + final float lwm = 0.80f; + HashMap usageMap; + + File tmpDir1 = createTempDir("bkTest", ".dir"); + File curDir1 = BookieImpl.getCurrentDirectory(tmpDir1); + BookieImpl.checkDirectoryStructure(curDir1); + + File tmpDir2 = createTempDir("bkTest", ".dir"); + File curDir2 = BookieImpl.getCurrentDirectory(tmpDir2); + BookieImpl.checkDirectoryStructure(curDir2); + + conf.setDiskUsageThreshold(nospace); + conf.setDiskLowWaterMarkUsageThreshold(lwm); + conf.setDiskUsageWarnThreshold(nospace); + conf.setLedgerDirNames(new String[] { tmpDir1.toString(), tmpDir2.toString() }); + + mockDiskChecker = new MockDiskChecker(nospace, warnThreshold); + dirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()), + statsLogger); + ledgerMonitor = new LedgerDirsMonitor(conf, mockDiskChecker, Collections.singletonList(dirsManager)); + usageMap = new HashMap(); + usageMap.put(curDir1, 0.1f); + usageMap.put(curDir2, 0.1f); + mockDiskChecker.setUsageMap(usageMap); + ledgerMonitor.init(); + final MockLedgerDirsListener mockLedgerDirsListener = new MockLedgerDirsListener(); + dirsManager.addLedgerDirsListener(mockLedgerDirsListener); + ledgerMonitor.start(); + + Thread.sleep((diskCheckInterval * 2) + 100); + assertFalse(mockLedgerDirsListener.readOnly); + + // go above LWM but below threshold + // should still be writable + setUsageAndThenVerify(curDir1, lwm + 0.05f, curDir2, lwm + 0.05f, mockDiskChecker, mockLedgerDirsListener, + false); + + // one dir usagespace above storagethreshold, another dir below storagethreshold + // should still be writable + setUsageAndThenVerify(curDir1, nospace + 0.02f, curDir2, nospace - 0.05f, mockDiskChecker, + mockLedgerDirsListener, false); + + // should remain readonly + setUsageAndThenVerify(curDir1, nospace + 0.05f, curDir2, nospace + 0.02f, mockDiskChecker, + mockLedgerDirsListener, true); + + // bring the disk usages to less than the threshold, + // but more than the LWM. + // should still be readonly + setUsageAndThenVerify(curDir1, nospace - 0.05f, curDir2, nospace - 0.05f, mockDiskChecker, + mockLedgerDirsListener, true); + + // bring one dir diskusage to less than lwm, + // the other dir to be more than lwm, but the + // overall diskusage to be more than lwm + // should still be readonly + setUsageAndThenVerify(curDir1, lwm - 0.03f, curDir2, lwm + 0.07f, mockDiskChecker, mockLedgerDirsListener, + true); + + // bring one dir diskusage to much less than lwm, + // the other dir to be more than storage threahold, but the + // overall diskusage is less than lwm + // should goto readwrite + setUsageAndThenVerify(curDir1, lwm - 0.17f, curDir2, nospace + 0.03f, mockDiskChecker, mockLedgerDirsListener, + false); + assertEquals("Only one LedgerDir should be writable", 1, dirsManager.getWritableLedgerDirs().size()); + + // bring both the dirs below lwm + // should still be readwrite + setUsageAndThenVerify(curDir1, lwm - 0.03f, curDir2, lwm - 0.02f, mockDiskChecker, mockLedgerDirsListener, + false); + assertEquals("Both the LedgerDirs should be writable", 2, dirsManager.getWritableLedgerDirs().size()); + + // bring both the dirs above lwm but < threshold + // should still be readwrite + setUsageAndThenVerify(curDir1, lwm + 0.02f, curDir2, lwm + 0.08f, mockDiskChecker, mockLedgerDirsListener, + false); + } + + @Test + public void testLedgerDirsMonitorStartReadOnly() throws Exception { + ledgerMonitor.shutdown(); + + final float nospace = 0.90f; + final float lwm = 0.80f; + + File tmpDir1 = createTempDir("bkTest", ".dir"); + File curDir1 = BookieImpl.getCurrentDirectory(tmpDir1); + BookieImpl.checkDirectoryStructure(curDir1); + + File tmpDir2 = createTempDir("bkTest", ".dir"); + File curDir2 = BookieImpl.getCurrentDirectory(tmpDir2); + BookieImpl.checkDirectoryStructure(curDir2); + + conf.setDiskUsageThreshold(nospace); + conf.setDiskLowWaterMarkUsageThreshold(lwm); + conf.setDiskUsageWarnThreshold(nospace); + conf.setLedgerDirNames(new String[] { tmpDir1.toString(), tmpDir2.toString() }); + + // Both disks are out of space at the start. + HashMap usageMap = new HashMap<>(); + usageMap.put(curDir1, nospace + 0.05f); + usageMap.put(curDir2, nospace + 0.05f); + + mockDiskChecker = new MockDiskChecker(nospace, warnThreshold); + mockDiskChecker.setUsageMap(usageMap); + dirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()), + statsLogger); + + ledgerMonitor = new LedgerDirsMonitor(conf, mockDiskChecker, Collections.singletonList(dirsManager)); + try { + ledgerMonitor.init(); + fail("NoWritableLedgerDirException expected"); + } catch (NoWritableLedgerDirException exception) { + // ok + } + final MockLedgerDirsListener mockLedgerDirsListener = new MockLedgerDirsListener(); + dirsManager.addLedgerDirsListener(mockLedgerDirsListener); + ledgerMonitor.start(); + + Thread.sleep((diskCheckInterval * 2) + 100); + verifyUsage(curDir1, nospace + 0.05f, curDir2, nospace + 0.05f, mockLedgerDirsListener, true); + } + + @Test + public void testValidateLwmThreshold() { + final ServerConfiguration configuration = TestBKConfiguration.newServerConfiguration(); + // check failed because diskSpaceThreshold < diskSpaceLwmThreshold + configuration.setDiskUsageThreshold(0.65f); + configuration.setDiskLowWaterMarkUsageThreshold(0.90f); + try { + new LedgerDirsMonitor(configuration, mockDiskChecker, Collections.singletonList(dirsManager)); + fail("diskSpaceThreshold < diskSpaceLwmThreshold, should be failed."); + } catch (Exception e) { + assertTrue(e.getMessage().contains("diskSpaceThreshold >= diskSpaceLwmThreshold")); + } + + // check failed because diskSpaceThreshold = 0 and diskUsageLwmThreshold = 1 + configuration.setDiskUsageThreshold(0f); + configuration.setDiskLowWaterMarkUsageThreshold(1f); + try { + new LedgerDirsMonitor(configuration, mockDiskChecker, Collections.singletonList(dirsManager)); + fail("diskSpaceThreshold = 0 and diskUsageLwmThreshold = 1, should be failed."); + } catch (Exception e) { + assertTrue(e.getMessage().contains("Should be > 0 and < 1")); + } + + // check succeeded + configuration.setDiskUsageThreshold(0.95f); + configuration.setDiskLowWaterMarkUsageThreshold(0.90f); + new LedgerDirsMonitor(configuration, mockDiskChecker, Collections.singletonList(dirsManager)); + } + + private void setUsageAndThenVerify(File dir1, float dir1Usage, File dir2, float dir2Usage, + MockDiskChecker mockDiskChecker, MockLedgerDirsListener mockLedgerDirsListener, boolean verifyReadOnly) + throws InterruptedException { + HashMap usageMap = new HashMap(); + usageMap.put(dir1, dir1Usage); + usageMap.put(dir2, dir2Usage); + mockDiskChecker.setUsageMap(usageMap); + verifyUsage(dir1, dir1Usage, dir2, dir2Usage, mockLedgerDirsListener, verifyReadOnly); + } + + private void verifyUsage(File dir1, float dir1Usage, File dir2, float dir2Usage, + MockLedgerDirsListener mockLedgerDirsListener, boolean verifyReadOnly) { + executorController.advance(Duration.ofMillis(diskCheckInterval)); + + float sample1 = getGauge(dir1.getParent()).getSample().floatValue(); + float sample2 = getGauge(dir2.getParent()).getSample().floatValue(); + + assertEquals(mockLedgerDirsListener.readOnly, verifyReadOnly); + assertThat(sample1, equalTo(dir1Usage * 100f)); + assertThat(sample2, equalTo(dir2Usage * 100f)); + } + + private Gauge getGauge(String path) { + String gaugeName = String.format("test.dir_%s_usage", path.replace('/', '_')); + return statsProvider.getGauge(gaugeName); + } + + private class MockDiskChecker extends DiskChecker { + + private volatile float used; + private volatile Map usageMap = null; + + public MockDiskChecker(float threshold, float warnThreshold) { + super(threshold, warnThreshold); + used = 0f; + } + + @Override + public float checkDir(File dir) throws DiskErrorException, DiskOutOfSpaceException, DiskWarnThresholdException { + float dirUsage = getDirUsage(dir); + + if (dirUsage > getDiskUsageThreshold()) { + throw new DiskOutOfSpaceException("", dirUsage); + } + if (dirUsage > getDiskUsageWarnThreshold()) { + throw new DiskWarnThresholdException("", dirUsage); + } + return dirUsage; + } + + @Override + public float getTotalDiskUsage(List dirs) { + float accumulatedDiskUsage = 0f; + for (File dir : dirs) { + accumulatedDiskUsage += getDirUsage(dir); + } + return (accumulatedDiskUsage / dirs.size()); + } + + public float getDirUsage(File dir) { + float dirUsage; + if ((usageMap == null) || (!usageMap.containsKey(dir))) { + dirUsage = used; + } else { + dirUsage = usageMap.get(dir); + } + return dirUsage; + } + + public void setUsage(float usage) { + this.used = usage; + } + + public void setUsageMap(Map usageMap) { + this.usageMap = usageMap; + } + } + + private class MockLedgerDirsListener implements LedgerDirsListener { + + public volatile boolean highPriorityWritesAllowed; + public volatile boolean readOnly; + + public MockLedgerDirsListener() { + reset(); + } + + @Override + public void diskWritable(File disk) { + if (conf.isReadOnlyModeOnAnyDiskFullEnabled()) { + return; + } + readOnly = false; + highPriorityWritesAllowed = true; + } + + @Override + public void diskJustWritable(File disk) { + if (conf.isReadOnlyModeOnAnyDiskFullEnabled()) { + return; + } + readOnly = false; + highPriorityWritesAllowed = true; + } + + @Override + public void allDisksFull(boolean highPriorityWritesAllowed) { + this.readOnly = true; + this.highPriorityWritesAllowed = highPriorityWritesAllowed; + } + + @Override + public void anyDiskFull(boolean highPriorityWritesAllowed) { + if (conf.isReadOnlyModeOnAnyDiskFullEnabled()) { + this.readOnly = true; + this.highPriorityWritesAllowed = highPriorityWritesAllowed; + } + } + + @Override + public void allDisksWritable() { + this.readOnly = false; + this.highPriorityWritesAllowed = true; + } + + public void reset() { + readOnly = false; + highPriorityWritesAllowed = true; + } + + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/LedgerStorageCheckpointTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/LedgerStorageCheckpointTest.java index 921d3101888..681d415dbf2 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/LedgerStorageCheckpointTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/LedgerStorageCheckpointTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -22,7 +22,12 @@ import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.CALLS_REAL_METHODS; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockStatic; +import static org.mockito.Mockito.when; +import io.netty.buffer.PooledByteBufAllocator; import java.io.File; import java.io.FileInputStream; import java.io.IOException; @@ -33,15 +38,12 @@ import java.util.List; import java.util.Random; import java.util.Set; -import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.IntStream; import java.util.stream.LongStream; - import org.apache.bookkeeper.bookie.EntryLogManagerForEntryLogPerLedger.BufferedLogChannelWithDirInfo; -import org.apache.bookkeeper.bookie.EntryLogger.BufferedLogChannel; import org.apache.bookkeeper.bookie.Journal.LastLogMark; import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.client.BookKeeper; @@ -53,9 +55,11 @@ import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.conf.TestBKConfiguration; import org.apache.bookkeeper.proto.BookieServer; -import org.apache.bookkeeper.test.PortManager; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.stats.ThreadRegistry; import org.apache.bookkeeper.test.ZooKeeperUtil; import org.apache.bookkeeper.util.IOUtils; +import org.apache.bookkeeper.util.PortManager; import org.apache.commons.io.FileUtils; import org.junit.After; import org.junit.Assert; @@ -64,19 +68,15 @@ import org.junit.Test; import org.junit.rules.TestName; import org.junit.runner.RunWith; -import org.powermock.api.mockito.PowerMockito; -import org.powermock.core.classloader.annotations.PowerMockIgnore; -import org.powermock.core.classloader.annotations.PrepareForTest; -import org.powermock.modules.junit4.PowerMockRunner; +import org.mockito.MockedStatic; +import org.mockito.junit.MockitoJUnitRunner; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * LedgerStorageCheckpointTest. */ -@RunWith(PowerMockRunner.class) -@PrepareForTest(SyncThread.class) -@PowerMockIgnore("javax.*") +@RunWith(MockitoJUnitRunner.class) public class LedgerStorageCheckpointTest { private static final Logger LOG = LoggerFactory .getLogger(LedgerStorageCheckpointTest.class); @@ -92,11 +92,14 @@ public class LedgerStorageCheckpointTest { // ScheduledExecutorService used by SyncThread MockExecutorController executorController; + private MockedStatic syncThreadMockedStatic; + private MockedStatic garbageCollectorThreadMockedStatic; + private MockedStatic sortedLedgerStorageMockedStatic; @Before public void setUp() throws Exception { + ThreadRegistry.clear(); LOG.info("Setting up test {}", getClass()); - PowerMockito.mockStatic(Executors.class); try { // start zookeeper service @@ -106,17 +109,34 @@ public void setUp() throws Exception { throw e; } - ScheduledExecutorService scheduledExecutorService = PowerMockito.mock(ScheduledExecutorService.class); + sortedLedgerStorageMockedStatic = mockStatic(SortedLedgerStorage.class); + ScheduledExecutorService scheduledExecutorService = mock(ScheduledExecutorService.class); executorController = new MockExecutorController() .controlSubmit(scheduledExecutorService) + .controlExecute(scheduledExecutorService) .controlScheduleAtFixedRate(scheduledExecutorService, 10); - PowerMockito.when(scheduledExecutorService.awaitTermination(anyLong(), any(TimeUnit.class))).thenReturn(true); - PowerMockito.when(Executors.newSingleThreadScheduledExecutor(any())).thenReturn(scheduledExecutorService); + when(scheduledExecutorService.awaitTermination(anyLong(), any(TimeUnit.class))).thenReturn(true); + sortedLedgerStorageMockedStatic.when(() -> SortedLedgerStorage.newScheduledExecutorService()) + .thenReturn(scheduledExecutorService); + + syncThreadMockedStatic = mockStatic(SyncThread.class, CALLS_REAL_METHODS); + syncThreadMockedStatic.when(() -> SyncThread.newExecutor()) + .thenReturn(scheduledExecutorService); + + garbageCollectorThreadMockedStatic = mockStatic(GarbageCollectorThread.class); + garbageCollectorThreadMockedStatic.when(() -> GarbageCollectorThread.newExecutor()) + .thenReturn(scheduledExecutorService); } @After public void tearDown() throws Exception { + ThreadRegistry.clear(); LOG.info("TearDown"); + + sortedLedgerStorageMockedStatic.close(); + syncThreadMockedStatic.close(); + garbageCollectorThreadMockedStatic.close(); + Exception tearDownException = null; // stop zookeeper service try { @@ -143,7 +163,7 @@ public void tearDown() throws Exception { * @throws Exception */ protected void startZKCluster() throws Exception { - zkUtil.startServer(); + zkUtil.startCluster(); } /** @@ -152,7 +172,7 @@ protected void startZKCluster() throws Exception { * @throws Exception */ protected void stopZKCluster() throws Exception { - zkUtil.killServer(); + zkUtil.killCluster(); } protected void cleanupTempDirs() throws Exception { @@ -168,7 +188,7 @@ protected File createTempDir(String prefix, String suffix) throws IOException { } private LogMark readLastMarkFile(File lastMarkFile) throws IOException { - byte buff[] = new byte[16]; + byte[] buff = new byte[16]; ByteBuffer bb = ByteBuffer.wrap(buff); LogMark rolledLogMark = new LogMark(); FileInputStream fis = new FileInputStream(lastMarkFile); @@ -216,8 +236,11 @@ public void testPeriodicCheckpointForLedgerStorage(String ledgerStorageClassName .setLedgerStorageClass(ledgerStorageClassName); Assert.assertEquals("Number of JournalDirs", 1, conf.getJournalDirs().length); // we know there is only one ledgerDir - File ledgerDir = Bookie.getCurrentDirectories(conf.getLedgerDirs())[0]; - BookieServer server = new BookieServer(conf); + File ledgerDir = BookieImpl.getCurrentDirectories(conf.getLedgerDirs())[0]; + BookieServer server = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, PooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); server.start(); ClientConfiguration clientConf = new ClientConfiguration(); clientConf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); @@ -237,11 +260,11 @@ public void testPeriodicCheckpointForLedgerStorage(String ledgerStorageClassName handle.close(); } - LastLogMark lastLogMarkAfterFirstSetOfAdds = server.getBookie().journals.get(0).getLastLogMark(); + LastLogMark lastLogMarkAfterFirstSetOfAdds = ((BookieImpl) server.getBookie()).journals.get(0).getLastLogMark(); LogMark curMarkAfterFirstSetOfAdds = lastLogMarkAfterFirstSetOfAdds.getCurMark(); File lastMarkFile = new File(ledgerDir, "lastMark"); - // lastMark file should be zero, because checkpoint hasn't happenend + // lastMark file should be zero, because checkpoint hasn't happened LogMark logMarkFileBeforeCheckpoint = readLastMarkFile(lastMarkFile); Assert.assertEquals("lastMarkFile before checkpoint should be zero", 0, logMarkFileBeforeCheckpoint.compare(new LogMark())); @@ -258,11 +281,11 @@ public void testPeriodicCheckpointForLedgerStorage(String ledgerStorageClassName Assert.assertTrue("lastMark file must be existing, because checkpoint should have happened", lastMarkFile.exists()); - LastLogMark lastLogMarkAfterCheckpoint = server.getBookie().journals.get(0).getLastLogMark(); + LastLogMark lastLogMarkAfterCheckpoint = ((BookieImpl) server.getBookie()).journals.get(0).getLastLogMark(); LogMark curMarkAfterCheckpoint = lastLogMarkAfterCheckpoint.getCurMark(); LogMark rolledLogMark = readLastMarkFile(lastMarkFile); - Assert.assertNotEquals("rolledLogMark should not be zero, since checkpoint has happenend", 0, + Assert.assertNotEquals("rolledLogMark should not be zero, since checkpoint has happened", 0, rolledLogMark.compare(new LogMark())); /* * Curmark should be equal before and after checkpoint, because we didnt @@ -291,7 +314,8 @@ public void testPeriodicCheckpointForLedgerStorage(String ledgerStorageClassName // wait for flushInterval for SyncThread to do next iteration of checkpoint executorController.advance(Duration.ofMillis(conf.getFlushInterval())); - LastLogMark lastLogMarkAfterSecondSetOfAdds = server.getBookie().journals.get(0).getLastLogMark(); + LastLogMark lastLogMarkAfterSecondSetOfAdds = ((BookieImpl) server.getBookie()). + journals.get(0).getLastLogMark(); LogMark curMarkAfterSecondSetOfAdds = lastLogMarkAfterSecondSetOfAdds.getCurMark(); rolledLogMark = readLastMarkFile(lastMarkFile); @@ -344,13 +368,16 @@ public void testCheckpointofILSWhenEntryLogIsRotated(boolean entryLogPerLedgerEn Assert.assertEquals("Number of JournalDirs", 1, conf.getJournalDirs().length); // we know there is only one ledgerDir - File ledgerDir = Bookie.getCurrentDirectories(conf.getLedgerDirs())[0]; - BookieServer server = new BookieServer(conf); + File ledgerDir = BookieImpl.getCurrentDirectories(conf.getLedgerDirs())[0]; + BookieServer server = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, PooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); server.start(); ClientConfiguration clientConf = new ClientConfiguration(); clientConf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); BookKeeper bkClient = new BookKeeper(clientConf); - InterleavedLedgerStorage ledgerStorage = (InterleavedLedgerStorage) server.getBookie().ledgerStorage; + InterleavedLedgerStorage ledgerStorage = (InterleavedLedgerStorage) server.getBookie().getLedgerStorage(); int numOfEntries = 5; byte[] dataBytes = "data".getBytes(); @@ -362,7 +389,8 @@ public void testCheckpointofILSWhenEntryLogIsRotated(boolean entryLogPerLedgerEn } handle.close(); // simulate rolling entrylog - ((EntryLogManagerBase) ledgerStorage.getEntryLogger().getEntryLogManager()).createNewLog(ledgerId); + ((EntryLogManagerBase) ledgerStorage.getEntryLogger().getEntryLogManager()) + .createNewLog(ledgerId); // sleep for a bit for checkpoint to do its task executorController.advance(Duration.ofMillis(500)); @@ -422,8 +450,11 @@ public void testCheckpointOfSLSWhenEntryLogIsRotated(boolean entryLogPerLedgerEn Assert.assertEquals("Number of JournalDirs", 1, conf.getJournalDirs().length); // we know there is only one ledgerDir - File ledgerDir = Bookie.getCurrentDirectories(conf.getLedgerDirs())[0]; - BookieServer server = new BookieServer(conf); + File ledgerDir = BookieImpl.getCurrentDirectories(conf.getLedgerDirs())[0]; + BookieServer server = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, PooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); server.start(); ClientConfiguration clientConf = new ClientConfiguration(); clientConf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); @@ -485,14 +516,17 @@ public void testIfEntryLogPerLedgerEnabledCheckpointFlushesAllLogs() throws Exce Assert.assertEquals("Number of JournalDirs", 1, conf.getJournalDirs().length); // we know there is only one ledgerDir - File ledgerDir = Bookie.getCurrentDirectories(conf.getLedgerDirs())[0]; - BookieServer server = new BookieServer(conf); + File ledgerDir = BookieImpl.getCurrentDirectories(conf.getLedgerDirs())[0]; + BookieServer server = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, PooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); server.start(); ClientConfiguration clientConf = new ClientConfiguration(); clientConf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); BookKeeper bkClient = new BookKeeper(clientConf); - InterleavedLedgerStorage ledgerStorage = (InterleavedLedgerStorage) server.getBookie().ledgerStorage; - EntryLogger entryLogger = ledgerStorage.entryLogger; + InterleavedLedgerStorage ledgerStorage = (InterleavedLedgerStorage) server.getBookie().getLedgerStorage(); + DefaultEntryLogger entryLogger = ledgerStorage.entryLogger; EntryLogManagerForEntryLogPerLedger entryLogManager = (EntryLogManagerForEntryLogPerLedger) entryLogger .getEntryLogManager(); @@ -527,10 +561,10 @@ public void testIfEntryLogPerLedgerEnabledCheckpointFlushesAllLogs() throws Exce executorController.advance(Duration.ofMillis(conf.getFlushInterval())); /* - * since checkpoint happenend, there shouldn't be any logChannelsToFlush + * since checkpoint happened, there shouldn't be any logChannelsToFlush * and bytesWrittenSinceLastFlush should be zero. */ - List copyOfRotatedLogChannels = entryLogManager.getRotatedLogChannels(); + List copyOfRotatedLogChannels = entryLogManager.getRotatedLogChannels(); Assert.assertTrue("There shouldn't be logChannelsToFlush", ((copyOfRotatedLogChannels == null) || (copyOfRotatedLogChannels.size() == 0))); @@ -589,8 +623,11 @@ public void testCheckPointForEntryLoggerWithMultipleActiveEntryLogs() throws Exc Assert.assertEquals("Number of JournalDirs", 1, conf.getJournalDirs().length); // we know there is only one ledgerDir - File ledgerDir = Bookie.getCurrentDirectories(conf.getLedgerDirs())[0]; - BookieServer server = new BookieServer(conf); + File ledgerDir = BookieImpl.getCurrentDirectories(conf.getLedgerDirs())[0]; + BookieServer server = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, PooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); server.start(); ClientConfiguration clientConf = new ClientConfiguration(); clientConf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); @@ -639,7 +676,7 @@ public void testCheckPointForEntryLoggerWithMultipleActiveEntryLogs() throws Exc Assert.assertTrue("lastMark file must be existing, because checkpoint should have happened", lastMarkFile.exists()); LogMark rolledLogMark = readLastMarkFile(lastMarkFile); - Assert.assertNotEquals("rolledLogMark should not be zero, since checkpoint has happenend", 0, + Assert.assertNotEquals("rolledLogMark should not be zero, since checkpoint has happened", 0, rolledLogMark.compare(new LogMark())); bkClient.close(); @@ -653,7 +690,7 @@ public void testCheckPointForEntryLoggerWithMultipleActiveEntryLogs() throws Exc // Journal file File[] journalDirs = conf.getJournalDirs(); for (File journalDir : journalDirs) { - File journalDirectory = Bookie.getCurrentDirectory(journalDir); + File journalDirectory = BookieImpl.getCurrentDirectory(journalDir); List journalLogsId = Journal.listJournalIds(journalDirectory, null); for (long journalId : journalLogsId) { File journalFile = new File(journalDirectory, Long.toHexString(journalId) + ".txn"); @@ -667,7 +704,10 @@ public void testCheckPointForEntryLoggerWithMultipleActiveEntryLogs() throws Exc // now we are restarting BookieServer conf.setLedgerStorageClass(InterleavedLedgerStorage.class.getName()); - server = new BookieServer(conf); + server = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, PooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); server.start(); BookKeeper newBKClient = new BookKeeper(clientConf); // since Bookie checkpointed successfully before shutdown/crash, diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/LedgerStorageTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/LedgerStorageTest.java index 697d7c0e88d..fd1851703b1 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/LedgerStorageTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/LedgerStorageTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -23,11 +23,15 @@ import static org.junit.Assert.assertEquals; import io.netty.buffer.ByteBuf; +import io.netty.buffer.UnpooledByteBufAllocator; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; +import java.util.ArrayList; +import java.util.PrimitiveIterator.OfLong; import java.util.concurrent.CountDownLatch; - +import java.util.function.Consumer; +import java.util.stream.IntStream; import org.apache.bookkeeper.client.BookKeeper; import org.apache.bookkeeper.client.LedgerHandle; import org.apache.bookkeeper.conf.ClientConfiguration; @@ -35,6 +39,7 @@ import org.apache.bookkeeper.proto.checksum.DigestManager; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.apache.bookkeeper.util.TestUtils; +import org.junit.Assert; import org.junit.Test; /** @@ -47,7 +52,7 @@ public LedgerStorageTest() { @Test public void testLedgerDeleteNotification() throws Exception { - LedgerStorage ledgerStorage = bs.get(0).getBookie().ledgerStorage; + LedgerStorage ledgerStorage = serverByIndex(0).getBookie().getLedgerStorage(); long deletedLedgerId = 5; ledgerStorage.setMasterKey(deletedLedgerId, new byte[0]); @@ -85,11 +90,12 @@ public void testExplicitLacWriteToJournalWithOlderVersions() throws Exception { public void testExplicitLacWriteToJournal(int journalFormatVersionToWrite, int fileInfoFormatVersionToWrite) throws Exception { - ServerConfiguration bookieServerConfig = bsConfs.get(0); - bookieServerConfig.setJournalFormatVersionToWrite(journalFormatVersionToWrite); - bookieServerConfig.setFileInfoFormatVersionToWrite(fileInfoFormatVersionToWrite); + restartBookies(c -> { + c.setJournalFormatVersionToWrite(journalFormatVersionToWrite); + c.setFileInfoFormatVersionToWrite(fileInfoFormatVersionToWrite); + return c; + }); - restartBookies(bookieServerConfig); ClientConfiguration confWithExplicitLAC = new ClientConfiguration(); confWithExplicitLAC.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); @@ -126,7 +132,7 @@ public void testExplicitLacWriteToJournal(int journalFormatVersionToWrite, int f assertEquals("Read explicit LAC of rlh after wait for explicitlacflush", (numOfEntries - 1), readExplicitLastConfirmed); - ServerConfiguration newBookieConf = new ServerConfiguration(bsConfs.get(0)); + ServerConfiguration newBookieConf = new ServerConfiguration(confByIndex(0)); /* * by reusing bookieServerConfig and setting metadataServiceUri to null * we can create/start new Bookie instance using the same data @@ -134,7 +140,7 @@ public void testExplicitLacWriteToJournal(int journalFormatVersionToWrite, int f * purpose. */ newBookieConf.setMetadataServiceUri(null); - Bookie newbookie = new Bookie(newBookieConf); + BookieImpl newbookie = new TestBookieImpl(newBookieConf); /* * since 'newbookie' uses the same data as original Bookie, it should be * able to read journal of the original bookie and hence explicitLac buf @@ -145,7 +151,8 @@ public void testExplicitLacWriteToJournal(int journalFormatVersionToWrite, int f if ((journalFormatVersionToWrite >= 6) && (fileInfoFormatVersionToWrite >= 1)) { DigestManager digestManager = DigestManager.instantiate(ledgerId, passwdBytes, - BookKeeper.DigestType.toProtoDigestType(digestType), confWithExplicitLAC.getUseV2WireProtocol()); + BookKeeper.DigestType.toProtoDigestType(digestType), UnpooledByteBufAllocator.DEFAULT, + confWithExplicitLAC.getUseV2WireProtocol()); long explicitLacPersistedInJournal = digestManager.verifyDigestAndReturnLac(explicitLacBuf); assertEquals("explicitLac persisted in journal", (numOfEntries - 1), explicitLacPersistedInJournal); } else { @@ -174,11 +181,11 @@ public void testExplicitLacWriteToFileInfoWithOlderVersions() throws Exception { public void testExplicitLacWriteToFileInfo(int journalFormatVersionToWrite, int fileInfoFormatVersionToWrite) throws Exception { - ServerConfiguration bookieServerConfig = bsConfs.get(0); - bookieServerConfig.setJournalFormatVersionToWrite(journalFormatVersionToWrite); - bookieServerConfig.setFileInfoFormatVersionToWrite(fileInfoFormatVersionToWrite); - - restartBookies(bookieServerConfig); + restartBookies(c -> { + c.setJournalFormatVersionToWrite(journalFormatVersionToWrite); + c.setFileInfoFormatVersionToWrite(fileInfoFormatVersionToWrite); + return c; + }); ClientConfiguration confWithExplicitLAC = new ClientConfiguration(); confWithExplicitLAC.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); @@ -218,15 +225,17 @@ public void testExplicitLacWriteToFileInfo(int journalFormatVersionToWrite, int /* * flush ledgerStorage so that header of fileinfo is flushed. */ - bs.get(0).getBookie().ledgerStorage.flush(); + serverByIndex(0).getBookie().getLedgerStorage().flush(); - ReadOnlyFileInfo fileInfo = getFileInfo(ledgerId, Bookie.getCurrentDirectories(bsConfs.get(0).getLedgerDirs())); + ReadOnlyFileInfo fileInfo = getFileInfo(ledgerId, + BookieImpl.getCurrentDirectories(confByIndex(0).getLedgerDirs())); fileInfo.readHeader(); ByteBuf explicitLacBufReadFromFileInfo = fileInfo.getExplicitLac(); if ((journalFormatVersionToWrite >= 6) && (fileInfoFormatVersionToWrite >= 1)) { DigestManager digestManager = DigestManager.instantiate(ledgerId, passwdBytes, - BookKeeper.DigestType.toProtoDigestType(digestType), confWithExplicitLAC.getUseV2WireProtocol()); + BookKeeper.DigestType.toProtoDigestType(digestType), UnpooledByteBufAllocator.DEFAULT, + confWithExplicitLAC.getUseV2WireProtocol()); long explicitLacReadFromFileInfo = digestManager.verifyDigestAndReturnLac(explicitLacBufReadFromFileInfo); assertEquals("explicitLac persisted in FileInfo", (numOfEntries - 1), explicitLacReadFromFileInfo); } else { @@ -273,4 +282,44 @@ ReadOnlyFileInfo getFileInfo(long ledgerId, File[] indexDirectories) throws IOEx fi.readHeader(); return fi; } + + @Test + public void testGetListOfEntriesOfLedger() throws Exception { + ClientConfiguration conf = new ClientConfiguration(); + conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + int numOfBookies = bookieCount(); + int numOfEntries = 5; + BookKeeper.DigestType digestType = BookKeeper.DigestType.CRC32; + BookKeeper bkc = new BookKeeper(conf); + LedgerHandle lh = bkc.createLedger(numOfBookies, numOfBookies, digestType, "testPasswd".getBytes()); + long lId = lh.getId(); + for (int i = 0; i < numOfEntries; i++) { + lh.addEntry("000".getBytes()); + } + + ServerConfiguration newBookieConf = new ServerConfiguration(confByIndex(0)); + /* + * by reusing bookieServerConfig and setting metadataServiceUri to null + * we can create/start new Bookie instance using the same data + * (journal/ledger/index) of the existing BookeieServer for our testing + * purpose. + */ + newBookieConf.setMetadataServiceUri(null); + BookieImpl newbookie = new TestBookieImpl(newBookieConf); + /* + * since 'newbookie' uses the same data as original Bookie, it should be + * able to read journal of the original bookie. + */ + newbookie.readJournal(); + + OfLong listOfEntriesItr = newbookie.getListOfEntriesOfLedger(lId); + ArrayList arrayList = new ArrayList(); + Consumer addMethod = arrayList::add; + listOfEntriesItr.forEachRemaining(addMethod); + + assertEquals("Num Of Entries", numOfEntries, arrayList.size()); + Assert.assertTrue("Iterator should be sorted", + IntStream.range(0, arrayList.size() - 1).allMatch(k -> arrayList.get(k) <= arrayList.get(k + 1))); + bkc.close(); + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/LedgerStorageTestBase.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/LedgerStorageTestBase.java index 3de15cd8098..f483ceec4d6 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/LedgerStorageTestBase.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/LedgerStorageTestBase.java @@ -60,8 +60,8 @@ public void setUp() throws Exception { ledgerDir = createTempDir("ledger"); // create current directories - Bookie.getCurrentDirectory(journalDir).mkdir(); - Bookie.getCurrentDirectory(ledgerDir).mkdir(); + BookieImpl.getCurrentDirectory(journalDir).mkdir(); + BookieImpl.getCurrentDirectory(ledgerDir).mkdir(); // build the configuration conf.setMetadataServiceUri(null); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/MockLedgerStorage.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/MockLedgerStorage.java new file mode 100644 index 00000000000..e1979dbdc4f --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/MockLedgerStorage.java @@ -0,0 +1,358 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie; + +import com.google.common.util.concurrent.RateLimiter; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.EnumSet; +import java.util.List; +import java.util.Optional; +import java.util.PrimitiveIterator; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.bookkeeper.bookie.CheckpointSource.Checkpoint; +import org.apache.bookkeeper.common.util.Watcher; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.stats.StatsLogger; + +/** + * A mock for running tests that require ledger storage. + */ +public class MockLedgerStorage implements CompactableLedgerStorage { + + private static class LedgerInfo { + boolean limbo = false; + boolean fenced = false; + long lac = -1; + final byte[] masterKey; + + LedgerInfo(byte[] masterKey) { + this.masterKey = Arrays.copyOf(masterKey, masterKey.length); + } + + ConcurrentHashMap entries = new ConcurrentHashMap<>(); + } + + private final ConcurrentHashMap ledgers = new ConcurrentHashMap<>(); + private final EnumSet storageStateFlags = EnumSet.noneOf(StorageState.class); + private final List entryLocations = new ArrayList<>(); + + @Override + public void initialize(ServerConfiguration conf, + LedgerManager ledgerManager, + LedgerDirsManager ledgerDirsManager, + LedgerDirsManager indexDirsManager, + StatsLogger statsLogger, + ByteBufAllocator allocator) + throws IOException {} + + @Override + public void setStateManager(StateManager stateManager) {} + @Override + public void setCheckpointSource(CheckpointSource checkpointSource) {} + @Override + public void setCheckpointer(Checkpointer checkpointer) {} + + @Override + public void start() {} + @Override + public void shutdown() throws InterruptedException {} + + @Override + public boolean ledgerExists(long ledgerId) throws IOException { + return ledgers.containsKey(ledgerId); + } + + @Override + public boolean entryExists(long ledgerId, long entryId) throws IOException { + LedgerInfo info = ledgers.get(ledgerId); + if (info == null) { + throw new Bookie.NoLedgerException(ledgerId); + } + return info != null && info.entries.containsKey(entryId); + } + + @Override + public boolean setFenced(long ledgerId) throws IOException { + AtomicBoolean ret = new AtomicBoolean(false); + LedgerInfo previous = ledgers.computeIfPresent(ledgerId, (ledgerId1, current) -> { + if (!current.fenced) { + current.fenced = true; + ret.set(true); + } else { + ret.set(false); + } + return current; + }); + if (previous == null) { + throw new Bookie.NoLedgerException(ledgerId); + } + return ret.get(); + } + + @Override + public boolean isFenced(long ledgerId) throws IOException { + LedgerInfo info = ledgers.get(ledgerId); + if (info == null) { + throw new Bookie.NoLedgerException(ledgerId); + } + return info != null && info.fenced; + } + + @Override + public void setLimboState(long ledgerId) throws IOException { + LedgerInfo previous = ledgers.computeIfPresent(ledgerId, (ledgerId1, current) -> { + current.limbo = true; + return current; + }); + if (previous == null) { + throw new Bookie.NoLedgerException(ledgerId); + } + } + + @Override + public boolean hasLimboState(long ledgerId) throws IOException { + LedgerInfo info = ledgers.get(ledgerId); + if (info == null) { + throw new Bookie.NoLedgerException(ledgerId); + } + return info.limbo; + } + @Override + public void clearLimboState(long ledgerId) throws IOException { + LedgerInfo previous = ledgers.computeIfPresent(ledgerId, (ledgerId1, current) -> { + current.limbo = false; + return current; + }); + if (previous == null) { + throw new Bookie.NoLedgerException(ledgerId); + } + } + + @Override + public void setMasterKey(long ledgerId, byte[] masterKey) throws IOException { + LedgerInfo previous = ledgers.compute(ledgerId, (ledgerId1, current) -> { + if (current != null) { + return current; + } + return new LedgerInfo(masterKey); + }); + if (previous != null && !Arrays.equals(masterKey, previous.masterKey)) { + throw new IOException(BookieException.create(BookieException.Code.IllegalOpException)); + } + } + @Override + public byte[] readMasterKey(long ledgerId) throws IOException, BookieException { + LedgerInfo info = ledgers.get(ledgerId); + if (info == null) { + throw new Bookie.NoLedgerException(ledgerId); + } + return Arrays.copyOf(info.masterKey, info.masterKey.length); + } + + public long extractLedgerId(ByteBuf entry) { + return entry.getLong(entry.readerIndex()); + } + + public long extractEntryId(ByteBuf entry) { + return entry.getLong(entry.readerIndex() + 8); + } + + public long extractLac(ByteBuf entry) { + return entry.getLong(entry.readerIndex() + 16); + + } + + @Override + public long addEntry(ByteBuf entry) throws IOException, BookieException { + ByteBuf copy = entry.retain().duplicate(); + long ledgerId = extractLedgerId(copy); + long entryId = extractEntryId(copy); + long lac = extractLac(copy); + + LedgerInfo previous = ledgers.computeIfPresent(ledgerId, (ledgerId1, current) -> { + if (lac > current.lac) { + current.lac = lac; + } + current.entries.put(entryId, copy); + return current; + }); + if (previous == null) { + throw new Bookie.NoLedgerException(ledgerId); + } + return entryId; + } + + @Override + public ByteBuf getEntry(long ledgerId, long entryId) throws IOException { + throw new UnsupportedOperationException("Not supported in mock, implement if you need it"); + } + + @Override + public long getLastAddConfirmed(long ledgerId) throws IOException { + throw new UnsupportedOperationException("Not supported in mock, implement if you need it"); + } + + @Override + public boolean waitForLastAddConfirmedUpdate( + long ledgerId, + long previousLAC, + Watcher watcher) throws IOException { + throw new UnsupportedOperationException("Not supported in mock, implement if you need it"); + } + + @Override + public void cancelWaitForLastAddConfirmedUpdate( + long ledgerId, + Watcher watcher) throws IOException { + throw new UnsupportedOperationException("Not supported in mock, implement if you need it"); + } + + @Override + public void flush() throws IOException { + // this is a noop, as we dont hit disk anyhow + } + + @Override + public void checkpoint(Checkpoint checkpoint) throws IOException { + throw new UnsupportedOperationException("Not supported in mock, implement if you need it"); + } + + @Override + public void deleteLedger(long ledgerId) throws IOException { + ledgers.remove(ledgerId); + } + + @Override + public void registerLedgerDeletionListener(LedgerDeletionListener listener) { + throw new UnsupportedOperationException("Not supported in mock, implement if you need it"); + } + + @Override + public void setExplicitLac(long ledgerId, ByteBuf lac) throws IOException { + throw new UnsupportedOperationException("Not supported in mock, implement if you need it"); + } + + @Override + public ByteBuf getExplicitLac(long ledgerId) throws IOException { + throw new UnsupportedOperationException("Not supported in mock, implement if you need it"); + } + + @Override + public LedgerStorage getUnderlyingLedgerStorage() { + return CompactableLedgerStorage.super.getUnderlyingLedgerStorage(); + } + + @Override + public void forceGC() { + CompactableLedgerStorage.super.forceGC(); + } + + @Override + public void forceGC(boolean forceMajor, boolean forceMinor) { + CompactableLedgerStorage.super.forceGC(forceMajor, forceMinor); + } + + public void suspendMinorGC() { + CompactableLedgerStorage.super.suspendMinorGC(); + } + + public void suspendMajorGC() { + CompactableLedgerStorage.super.suspendMajorGC(); + } + + public void resumeMinorGC() { + CompactableLedgerStorage.super.resumeMinorGC(); + } + + public void resumeMajorGC() { + CompactableLedgerStorage.super.suspendMajorGC(); + } + + public boolean isMajorGcSuspended() { + return CompactableLedgerStorage.super.isMajorGcSuspended(); + } + + public boolean isMinorGcSuspended() { + return CompactableLedgerStorage.super.isMinorGcSuspended(); + } + + @Override + public List localConsistencyCheck(Optional rateLimiter) throws IOException { + return CompactableLedgerStorage.super.localConsistencyCheck(rateLimiter); + } + + @Override + public boolean isInForceGC() { + return CompactableLedgerStorage.super.isInForceGC(); + } + + @Override + public List getGarbageCollectionStatus() { + return CompactableLedgerStorage.super.getGarbageCollectionStatus(); + } + + @Override + public PrimitiveIterator.OfLong getListOfEntriesOfLedger(long ledgerId) throws IOException { + throw new UnsupportedOperationException("Not supported in mock, implement if you need it"); + } + + @Override + public Iterable getActiveLedgersInRange(long firstLedgerId, long lastLedgerId) + throws IOException { + throw new UnsupportedOperationException("Not supported in mock, implement if you need it"); + } + + public List getUpdatedLocations() { + return entryLocations; + } + + @Override + public void updateEntriesLocations(Iterable locations) throws IOException { + synchronized (entryLocations) { + for (EntryLocation l : locations) { + entryLocations.add(l); + } + } + } + + @Override + public EnumSet getStorageStateFlags() throws IOException { + return storageStateFlags; + } + + @Override + public void setStorageStateFlag(StorageState flag) throws IOException { + storageStateFlags.add(flag); + } + + @Override + public void clearStorageStateFlag(StorageState flag) throws IOException { + storageStateFlags.remove(flag); + } + + @Override + public void flushEntriesLocationsIndex() throws IOException { } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/MockUncleanShutdownDetection.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/MockUncleanShutdownDetection.java new file mode 100644 index 00000000000..bf907e12091 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/MockUncleanShutdownDetection.java @@ -0,0 +1,53 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie; + +/** + * Mock implementation of UncleanShutdownDetection. + */ +public class MockUncleanShutdownDetection implements UncleanShutdownDetection { + + private boolean startRegistered; + private boolean shutdownRegistered; + + @Override + public void registerStartUp() { + startRegistered = true; + } + + @Override + public void registerCleanShutdown() { + shutdownRegistered = true; + } + + @Override + public boolean lastShutdownWasUnclean() { + return startRegistered && !shutdownRegistered; + } + + public boolean getStartRegistered() { + return startRegistered; + } + + public boolean getShutdownRegistered() { + return shutdownRegistered; + } +} \ No newline at end of file diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/SingleBookieInitializationTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/SingleBookieInitializationTest.java index 20856aacaba..11206b8cce9 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/SingleBookieInitializationTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/SingleBookieInitializationTest.java @@ -89,7 +89,7 @@ public void testInitBookieNoWritableDirsButHasEnoughSpaces() throws Exception { conf.setMinUsableSizeForEntryLogCreation(Long.MIN_VALUE); conf.setLedgerStorageClass(InterleavedLedgerStorage.class.getName()); - bookie = new Bookie(conf); + bookie = new TestBookieImpl(conf); bookie.start(); CompletableFuture writeFuture = new CompletableFuture<>(); @@ -111,7 +111,7 @@ public void testInitBookieNoWritableDirsAndNoEnoughSpaces() throws Exception { conf.setMinUsableSizeForEntryLogCreation(Long.MAX_VALUE); conf.setLedgerStorageClass(InterleavedLedgerStorage.class.getName()); - bookie = new Bookie(conf); + bookie = new TestBookieImpl(conf); bookie.start(); try { diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/SkipListArenaTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/SkipListArenaTest.java new file mode 100644 index 00000000000..f972c3ef19a --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/SkipListArenaTest.java @@ -0,0 +1,206 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.bookie; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import com.google.common.primitives.Ints; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Random; +import java.util.Set; +import java.util.TreeMap; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.CountDownLatch; +import org.apache.bookkeeper.bookie.SkipListArena.MemorySlice; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.junit.Test; + +/** + * Test the SkipListArena class. + */ +public class SkipListArenaTest { + + class CustomConfiguration extends ServerConfiguration { + @Override + public int getSkipListArenaChunkSize() { + return 4096; + } + @Override + public int getSkipListArenaMaxAllocSize() { + return 1024; + } + @Override + public boolean getJournalFlushWhenQueueEmpty() { + return true; + } + + } + + final CustomConfiguration cfg = new CustomConfiguration(); + + /** + * Test random allocations. + */ + @Test + public void testRandomAllocation() { + Random rand = new Random(); + SkipListArena arena = new SkipListArena(cfg); + int expectedOff = 0; + byte[] lastBuffer = null; + + // 10K iterations by 0-512 alloc -> 2560kB expected + // should be reasonable for unit test and also cover wraparound + // behavior + for (int i = 0; i < 10000; i++) { + int size = rand.nextInt(512); + MemorySlice alloc = arena.allocateBytes(size); + + if (alloc.getData() != lastBuffer) { + expectedOff = 0; + lastBuffer = alloc.getData(); + } + assertTrue(expectedOff == alloc.getOffset()); + assertTrue("Allocation " + alloc + " overruns buffer", + alloc.getOffset() + size <= alloc.getData().length); + expectedOff += size; + } + } + + @Test + public void testLargeAllocation() { + SkipListArena arena = new SkipListArena(cfg); + MemorySlice alloc = arena.allocateBytes(1024 + 1024); + assertNull("2KB allocation shouldn't be satisfied by LAB.", alloc); + } + + private class ByteArray { + final byte[] bytes; + ByteArray(final byte[] bytes) { + this.bytes = bytes; + } + + @Override + public int hashCode() { + return bytes.hashCode(); + } + + @Override + public boolean equals(Object object) { + if (object instanceof ByteArray) { + ByteArray other = (ByteArray) object; + return this.bytes.equals(other.bytes); + } + return false; + } + } + + private static class AllocBuffer implements Comparable{ + private final MemorySlice alloc; + private final int size; + public AllocBuffer(MemorySlice alloc, int size) { + super(); + this.alloc = alloc; + this.size = size; + } + + @Override + public int compareTo(AllocBuffer e) { + assertTrue(alloc.getData() == e.alloc.getData()); + return Ints.compare(alloc.getOffset(), e.alloc.getOffset()); + } + + @Override + public String toString() { + return alloc + ":" + size; + } + } + + private Thread getAllocThread(final ConcurrentLinkedQueue queue, + final CountDownLatch latch, + final SkipListArena arena) { + return new Thread(new Runnable() { + @Override + public void run() { + Random rand = new Random(); + for (int j = 0; j < 1000; j++) { + int size = rand.nextInt(512); + MemorySlice alloc = arena.allocateBytes(size); + queue.add(new AllocBuffer(alloc, size)); + } + latch.countDown(); + } + }); + } + + /** + * Test concurrent allocation, check the results don't overlap. + */ + @Test + public void testConcurrency() throws Exception { + final SkipListArena arena = new SkipListArena(cfg); + final CountDownLatch latch = new CountDownLatch(10); + final ConcurrentLinkedQueue queue = new ConcurrentLinkedQueue(); + + Set testThreads = new HashSet(); + for (int i = 0; i < 10; i++) { + testThreads.add(getAllocThread(queue, latch, arena)); + } + + for (Thread thread : testThreads) { + thread.start(); + } + latch.await(); + + // Partition the allocations by the actual byte[] they share, + // make sure offsets are unique and non-overlap for each buffer. + Map> mapsByArray = new HashMap>(); + boolean overlapped = false; + + final AllocBuffer[] buffers = queue.toArray(new AllocBuffer[0]); + for (AllocBuffer buf : buffers) { + if (buf.size != 0) { + ByteArray ptr = new ByteArray(buf.alloc.getData()); + Map treeMap = mapsByArray.get(ptr); + if (treeMap == null) { + treeMap = new TreeMap(); + mapsByArray.put(ptr, treeMap); + } + AllocBuffer other = treeMap.put(buf.alloc.getOffset(), buf); + if (other != null) { + fail("Buffer " + other + " overlapped with " + buf); + } + } + } + + // Now check each byte array to make sure allocations don't overlap + for (Map treeMap : mapsByArray.values()) { + int expectedOff = 0; + for (AllocBuffer buf : treeMap.values()) { + assertEquals(expectedOff, buf.alloc.getOffset()); + expectedOff += buf.size; + } + } + } +} + diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/SlowInterleavedLedgerStorage.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/SlowInterleavedLedgerStorage.java index 645af9c498e..a4061b574f2 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/SlowInterleavedLedgerStorage.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/SlowInterleavedLedgerStorage.java @@ -22,8 +22,9 @@ */ import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.UnpooledByteBufAllocator; import java.io.IOException; - import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.meta.LedgerManager; import org.apache.bookkeeper.stats.StatsLogger; @@ -41,27 +42,29 @@ public class SlowInterleavedLedgerStorage extends InterleavedLedgerStorage { /** * Strictly for testing. */ - public static class SlowEntryLogger extends EntryLogger { + public static class SlowDefaultEntryLogger extends DefaultEntryLogger { public volatile long getDelay = 0; public volatile long addDelay = 0; public volatile long flushDelay = 0; - public SlowEntryLogger(ServerConfiguration conf, LedgerDirsManager ledgerDirsManager, EntryLogListener listener, - StatsLogger statsLogger) throws IOException { - super(conf, ledgerDirsManager, listener, statsLogger); + public SlowDefaultEntryLogger(ServerConfiguration conf, + LedgerDirsManager ledgerDirsManager, + EntryLogListener listener, + StatsLogger statsLogger) throws IOException { + super(conf, ledgerDirsManager, listener, statsLogger, UnpooledByteBufAllocator.DEFAULT); } - public SlowEntryLogger setAddDelay(long delay) { + public SlowDefaultEntryLogger setAddDelay(long delay) { addDelay = delay; return this; } - public SlowEntryLogger setGetDelay(long delay) { + public SlowDefaultEntryLogger setGetDelay(long delay) { getDelay = delay; return this; } - public SlowEntryLogger setFlushDelay(long delay) { + public SlowDefaultEntryLogger setFlushDelay(long delay) { flushDelay = delay; return this; } @@ -73,9 +76,9 @@ public void flush() throws IOException { } @Override - public long addEntry(long ledger, ByteBuf entry, boolean rollLog) throws IOException { + public long addEntry(long ledger, ByteBuf entry) throws IOException { delayMs(addDelay); - return super.addEntry(ledger, entry, rollLog); + return super.addEntry(ledger, entry); } @Override @@ -107,34 +110,32 @@ public void initialize(ServerConfiguration conf, LedgerManager ledgerManager, LedgerDirsManager ledgerDirsManager, LedgerDirsManager indexDirsManager, - StateManager stateManager, - CheckpointSource checkpointSource, - Checkpointer checkpointer, - StatsLogger statsLogger) + StatsLogger statsLogger, + ByteBufAllocator allocator) throws IOException { super.initialize(conf, ledgerManager, ledgerDirsManager, indexDirsManager, - stateManager, checkpointSource, checkpointer, statsLogger); + statsLogger, allocator); // do not want to add these to config class, reading throw "raw" interface long getDelay = conf.getLong(PROP_SLOW_STORAGE_GET_DELAY, 0); long addDelay = conf.getLong(PROP_SLOW_STORAGE_ADD_DELAY, 0); long flushDelay = conf.getLong(PROP_SLOW_STORAGE_FLUSH_DELAY, 0); - entryLogger = new SlowEntryLogger(conf, ledgerDirsManager, this, statsLogger) + entryLogger = new SlowDefaultEntryLogger(conf, ledgerDirsManager, this, statsLogger) .setAddDelay(addDelay) .setGetDelay(getDelay) .setFlushDelay(flushDelay); } public void setAddDelay(long delay) { - ((SlowEntryLogger) entryLogger).setAddDelay(delay); + ((SlowDefaultEntryLogger) entryLogger).setAddDelay(delay); } public void setGetDelay(long delay) { - ((SlowEntryLogger) entryLogger).setGetDelay(delay); + ((SlowDefaultEntryLogger) entryLogger).setGetDelay(delay); } public void setFlushDelay(long delay) { - ((SlowEntryLogger) entryLogger).setFlushDelay(delay); + ((SlowDefaultEntryLogger) entryLogger).setFlushDelay(delay); } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/SortedLedgerStorageCheckpointTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/SortedLedgerStorageCheckpointTest.java index 322cdd08990..42cdb943eee 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/SortedLedgerStorageCheckpointTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/SortedLedgerStorageCheckpointTest.java @@ -26,6 +26,7 @@ import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; +import io.netty.buffer.UnpooledByteBufAllocator; import java.io.IOException; import java.util.concurrent.CountDownLatch; import java.util.concurrent.LinkedBlockingQueue; @@ -99,7 +100,7 @@ public void checkpointComplete(Checkpoint checkpoint, boolean compact) public SortedLedgerStorageCheckpointTest() { super(); - conf.setEntryLogSizeLimit(1); + conf.setEntryLogSizeLimit(1024); conf.setEntryLogFilePreAllocationEnabled(false); this.checkpoints = new LinkedBlockingQueue<>(); } @@ -138,10 +139,10 @@ public void start() { mock(LedgerManager.class), ledgerDirsManager, ledgerDirsManager, - null, - checkpointSrc, - checkpointer, - NullStatsLogger.INSTANCE); + NullStatsLogger.INSTANCE, + UnpooledByteBufAllocator.DEFAULT); + this.storage.setCheckpointer(checkpointer); + this.storage.setCheckpointSource(checkpointSrc); } @After @@ -222,12 +223,11 @@ public void testCheckpointAfterEntryLogRotated() throws Exception { }); // simulate entry log is rotated (due to compaction) - EntryLogManagerForSingleEntryLog entryLogManager = (EntryLogManagerForSingleEntryLog) storage.getEntryLogger() - .getEntryLogManager(); - entryLogManager.createNewLog(EntryLogger.UNASSIGNED_LEDGERID); - long leastUnflushedLogId = storage.getEntryLogger().getLeastUnflushedLogId(); + DefaultEntryLogger elogger = storage.getEntryLogger(); + EntryLogManagerForSingleEntryLog entryLogManager = + (EntryLogManagerForSingleEntryLog) elogger.getEntryLogManager(); + entryLogManager.createNewLog(DefaultEntryLogger.UNASSIGNED_LEDGERID); long currentLogId = entryLogManager.getCurrentLogId(); - log.info("Least unflushed entry log : current = {}, leastUnflushed = {}", currentLogId, leastUnflushedLogId); readyLatch.countDown(); assertNull(checkpoints.poll()); @@ -244,8 +244,8 @@ public void testCheckpointAfterEntryLogRotated() throws Exception { assertEquals(0, storage.memTable.kvmap.size()); assertTrue( "current log " + currentLogId + " contains entries added from memtable should be forced to disk" - + " but least unflushed log is " + storage.getEntryLogger().getLeastUnflushedLogId(), - storage.getEntryLogger().getLeastUnflushedLogId() > currentLogId); + + " but flushed logs are " + elogger.getFlushedLogIds(), + elogger.getFlushedLogIds().contains(currentLogId)); } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/SortedLedgerStorageTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/SortedLedgerStorageTest.java new file mode 100644 index 00000000000..db83f096d95 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/SortedLedgerStorageTest.java @@ -0,0 +1,194 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie; + +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_SCOPE; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import io.netty.buffer.UnpooledByteBufAllocator; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.PrimitiveIterator.OfLong; +import java.util.function.Consumer; +import java.util.stream.IntStream; +import org.apache.bookkeeper.bookie.CheckpointSource.Checkpoint; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.test.TestStatsProvider; +import org.apache.bookkeeper.util.DiskChecker; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +/** + * Testing SortedLedgerStorage. + */ +@RunWith(Parameterized.class) +public class SortedLedgerStorageTest { + + TestStatsProvider statsProvider = new TestStatsProvider(); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + LedgerDirsManager ledgerDirsManager; + SortedLedgerStorage sortedLedgerStorage = new SortedLedgerStorage(); + + final long numWrites = 2000; + final long moreNumOfWrites = 3000; + final long entriesPerWrite = 2; + final long numOfLedgers = 5; + + @Parameterized.Parameters + public static Iterable elplSetting() { + return Arrays.asList(true, false); + } + + public SortedLedgerStorageTest(boolean elplSetting) { + conf.setEntryLogSizeLimit(2048); + conf.setEntryLogPerLedgerEnabled(elplSetting); + } + + CheckpointSource checkpointSource = new CheckpointSource() { + @Override + public Checkpoint newCheckpoint() { + return Checkpoint.MAX; + } + + @Override + public void checkpointComplete(Checkpoint checkpoint, boolean compact) throws IOException { + } + }; + + Checkpointer checkpointer = new Checkpointer() { + @Override + public void startCheckpoint(Checkpoint checkpoint) { + // No-op + } + + @Override + public void start() { + // no-op + } + }; + + @Before + public void setUp() throws Exception { + File tmpDir = File.createTempFile("bkTest", ".dir"); + tmpDir.delete(); + tmpDir.mkdir(); + File curDir = BookieImpl.getCurrentDirectory(tmpDir); + BookieImpl.checkDirectoryStructure(curDir); + + conf.setLedgerDirNames(new String[] { tmpDir.toString() }); + ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); + sortedLedgerStorage.initialize(conf, null, ledgerDirsManager, ledgerDirsManager, + statsProvider.getStatsLogger(BOOKIE_SCOPE), UnpooledByteBufAllocator.DEFAULT); + sortedLedgerStorage.setCheckpointSource(checkpointSource); + sortedLedgerStorage.setCheckpointer(checkpointer); + } + + @Test + public void testGetListOfEntriesOfLedger() throws Exception { + long nonExistingLedgerId = 123456L; + OfLong entriesItr = sortedLedgerStorage.getListOfEntriesOfLedger(nonExistingLedgerId); + assertFalse("There shouldn't be any entries for this ledger", entriesItr.hasNext()); + // Insert some ledger & entries in the interleaved storage + for (long entryId = 0; entryId < numWrites; entryId++) { + for (long ledgerId = 0; ledgerId < numOfLedgers; ledgerId++) { + if (entryId == 0) { + sortedLedgerStorage.setMasterKey(ledgerId, ("ledger-" + ledgerId).getBytes()); + sortedLedgerStorage.setFenced(ledgerId); + } + ByteBuf entry = Unpooled.buffer(128); + entry.writeLong(ledgerId); + entry.writeLong(entryId * entriesPerWrite); + entry.writeBytes(("entry-" + entryId).getBytes()); + + sortedLedgerStorage.addEntry(entry); + } + } + + for (long ledgerId = 0; ledgerId < numOfLedgers; ledgerId++) { + OfLong entriesOfLedger = sortedLedgerStorage.getListOfEntriesOfLedger(ledgerId); + ArrayList arrayList = new ArrayList(); + Consumer addMethod = arrayList::add; + entriesOfLedger.forEachRemaining(addMethod); + assertEquals("Number of entries", numWrites, arrayList.size()); + assertTrue("Entries of Ledger", IntStream.range(0, arrayList.size()).allMatch(i -> { + return arrayList.get(i) == (i * entriesPerWrite); + })); + } + + nonExistingLedgerId = 456789L; + entriesItr = sortedLedgerStorage.getListOfEntriesOfLedger(nonExistingLedgerId); + assertFalse("There shouldn't be any entry", entriesItr.hasNext()); + } + + @Test + public void testGetListOfEntriesOfLedgerAfterFlush() throws IOException { + // Insert some ledger & entries in the interleaved storage + for (long entryId = 0; entryId < numWrites; entryId++) { + for (long ledgerId = 0; ledgerId < numOfLedgers; ledgerId++) { + if (entryId == 0) { + sortedLedgerStorage.setMasterKey(ledgerId, ("ledger-" + ledgerId).getBytes()); + sortedLedgerStorage.setFenced(ledgerId); + } + ByteBuf entry = Unpooled.buffer(128); + entry.writeLong(ledgerId); + entry.writeLong(entryId * entriesPerWrite); + entry.writeBytes(("entry-" + entryId).getBytes()); + + sortedLedgerStorage.addEntry(entry); + } + } + + sortedLedgerStorage.flush(); + + // Insert some more ledger & entries in the interleaved storage + for (long entryId = numWrites; entryId < moreNumOfWrites; entryId++) { + for (long ledgerId = 0; ledgerId < numOfLedgers; ledgerId++) { + ByteBuf entry = Unpooled.buffer(128); + entry.writeLong(ledgerId); + entry.writeLong(entryId * entriesPerWrite); + entry.writeBytes(("entry-" + entryId).getBytes()); + + sortedLedgerStorage.addEntry(entry); + } + } + + for (long ledgerId = 0; ledgerId < numOfLedgers; ledgerId++) { + OfLong entriesOfLedger = sortedLedgerStorage.getListOfEntriesOfLedger(ledgerId); + ArrayList arrayList = new ArrayList(); + Consumer addMethod = arrayList::add; + entriesOfLedger.forEachRemaining(addMethod); + assertEquals("Number of entries", moreNumOfWrites, arrayList.size()); + assertTrue("Entries of Ledger", IntStream.range(0, arrayList.size()).allMatch(i -> { + return arrayList.get(i) == (i * entriesPerWrite); + })); + } + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/StateManagerTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/StateManagerTest.java index 8da2ffaea93..2f3c71c1764 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/StateManagerTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/StateManagerTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -22,9 +22,11 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; + import java.io.File; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.discover.RegistrationManager; import org.apache.bookkeeper.meta.MetadataBookieDriver; import org.apache.bookkeeper.meta.zk.ZKMetadataBookieDriver; import org.apache.bookkeeper.stats.NullStatsLogger; @@ -57,7 +59,7 @@ public StateManagerTest(){ public void setUp() throws Exception { super.setUp(); zkUtil.createBKEnsemble("/" + runtime.getMethodName()); - File tmpDir = createTempDir("stateManger", "test"); + File tmpDir = tmpDirs.createNew("stateManger", "test"); conf.setJournalDirName(tmpDir.getPath()) .setLedgerDirNames(new String[] { tmpDir.getPath() }) .setJournalDirName(tmpDir.toString()) @@ -77,35 +79,41 @@ public void tearDown() throws Exception { */ @Test public void testNormalBookieTransitions() throws Exception { - BookieStateManager stateManager = new BookieStateManager(conf, driver); - driver.initialize(conf, () -> { - stateManager.forceToUnregistered(); - // schedule a re-register operation - stateManager.registerBookie(false); - }, NullStatsLogger.INSTANCE); - - stateManager.initState(); - stateManager.registerBookie(true).get(); - - assertTrue(stateManager.isRunning()); - assertTrue(stateManager.isRegistered()); + driver.initialize(conf, NullStatsLogger.INSTANCE); + try (RegistrationManager rm = driver.createRegistrationManager(); + BookieStateManager stateManager = new BookieStateManager(conf, rm)) { + rm.addRegistrationListener(() -> { + stateManager.forceToUnregistered(); + // schedule a re-register operation + stateManager.registerBookie(false); + }); + stateManager.initState(); + stateManager.registerBookie(true).get(); - stateManager.transitionToReadOnlyMode().get(); - assertTrue(stateManager.isReadOnly()); + assertTrue(stateManager.isRunning()); + assertTrue(stateManager.isRegistered()); - stateManager.transitionToWritableMode().get(); - assertTrue(stateManager.isRunning()); - assertFalse(stateManager.isReadOnly()); + stateManager.transitionToReadOnlyMode().get(); + assertTrue(stateManager.isReadOnly()); - stateManager.close(); - assertFalse(stateManager.isRunning()); + stateManager.transitionToWritableMode().get(); + assertTrue(stateManager.isRunning()); + assertFalse(stateManager.isReadOnly()); + stateManager.close(); + assertFalse(stateManager.isRunning()); + } } @Test public void testReadOnlyDisableBookieTransitions() throws Exception { conf.setReadOnlyModeEnabled(false); // readOnly disabled bk stateManager - BookieStateManager stateManager = new BookieStateManager(conf, driver); + driver.initialize( + conf, + NullStatsLogger.INSTANCE); + + RegistrationManager rm = driver.createRegistrationManager(); + BookieStateManager stateManager = new BookieStateManager(conf, rm); // simulate sync shutdown logic in bookie stateManager.setShutdownHandler(new StateManager.ShutdownHandler() { @Override @@ -121,14 +129,11 @@ public void shutdown(int code) { } } }); - driver.initialize( - conf, - () -> { + rm.addRegistrationListener(() -> { stateManager.forceToUnregistered(); // schedule a re-register operation stateManager.registerBookie(false); - }, - NullStatsLogger.INSTANCE); + }); stateManager.initState(); stateManager.registerBookie(true).get(); @@ -145,14 +150,18 @@ public void shutdown(int code) { @Test public void testReadOnlyBookieTransitions() throws Exception{ // readOnlybk, which use override stateManager impl - File tmpDir = createTempDir("stateManger", "test-readonly"); + File tmpDir = tmpDirs.createNew("stateManger", "test-readonly"); final ServerConfiguration readOnlyConf = TestBKConfiguration.newServerConfiguration(); readOnlyConf.setJournalDirName(tmpDir.getPath()) .setLedgerDirNames(new String[] { tmpDir.getPath() }) .setJournalDirName(tmpDir.toString()) .setMetadataServiceUri(zkUtil.getMetadataServiceUri()) .setForceReadOnlyBookie(true); - ReadOnlyBookie readOnlyBookie = new ReadOnlyBookie(readOnlyConf, NullStatsLogger.INSTANCE); + driver.initialize(readOnlyConf, NullStatsLogger.INSTANCE); + + ReadOnlyBookie readOnlyBookie = TestBookieImpl.buildReadOnly( + new TestBookieImpl.ResourceBuilder(readOnlyConf) + .withMetadataDriver(driver).build()); readOnlyBookie.start(); assertTrue(readOnlyBookie.isRunning()); assertTrue(readOnlyBookie.isReadOnly()); @@ -170,15 +179,18 @@ public void testReadOnlyBookieTransitions() throws Exception{ */ @Test public void testRegistration() throws Exception { - BookieStateManager stateManager = new BookieStateManager(conf, driver); driver.initialize( conf, - () -> { + NullStatsLogger.INSTANCE); + + RegistrationManager rm = driver.createRegistrationManager(); + BookieStateManager stateManager = new BookieStateManager(conf, rm); + rm.addRegistrationListener(() -> { stateManager.forceToUnregistered(); // schedule a re-register operation stateManager.registerBookie(false); - }, - NullStatsLogger.INSTANCE); + }); + // simulate sync shutdown logic in bookie stateManager.setShutdownHandler(new StateManager.ShutdownHandler() { @Override diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/SyncThreadTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/SyncThreadTest.java new file mode 100644 index 00000000000..6df1bacb80d --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/SyncThreadTest.java @@ -0,0 +1,421 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import java.io.IOException; +import java.util.EnumSet; +import java.util.PrimitiveIterator.OfLong; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.bookkeeper.bookie.CheckpointSource.Checkpoint; +import org.apache.bookkeeper.bookie.LedgerDirsManager.LedgerDirsListener; +import org.apache.bookkeeper.bookie.LedgerDirsManager.NoWritableLedgerDirException; +import org.apache.bookkeeper.common.util.Watcher; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.stats.StatsLogger; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Test a synchronization thread. + */ +public class SyncThreadTest { + private static final Logger LOG = LoggerFactory.getLogger(SyncThreadTest.class); + + ExecutorService executor = null; + + @Before + public void setupExecutor() { + executor = Executors.newSingleThreadExecutor(); + } + + @After + public void teardownExecutor() { + if (executor != null) { + executor.shutdownNow(); + executor = null; + } + } + + /** + * Test that if a flush is taking a long time, + * the sync thread will not shutdown until it + * has finished. + */ + @Test + public void testSyncThreadLongShutdown() throws Exception { + int flushInterval = 100; + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setFlushInterval(flushInterval); + CheckpointSource checkpointSource = new DummyCheckpointSource(); + LedgerDirsListener listener = new LedgerDirsListener() {}; + + final CountDownLatch checkpointCalledLatch = new CountDownLatch(1); + final CountDownLatch checkpointLatch = new CountDownLatch(1); + + final CountDownLatch flushCalledLatch = new CountDownLatch(1); + final CountDownLatch flushLatch = new CountDownLatch(1); + final AtomicBoolean failedSomewhere = new AtomicBoolean(false); + LedgerStorage storage = new DummyLedgerStorage() { + @Override + public void flush() throws IOException { + flushCalledLatch.countDown(); + try { + flushLatch.await(); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + LOG.error("Interrupted in flush thread", ie); + failedSomewhere.set(true); + } + } + + @Override + public void checkpoint(Checkpoint checkpoint) + throws IOException { + checkpointCalledLatch.countDown(); + try { + checkpointLatch.await(); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + LOG.error("Interrupted in checkpoint thread", ie); + failedSomewhere.set(true); + } + } + }; + + final SyncThread t = new SyncThread(conf, listener, storage, checkpointSource, NullStatsLogger.INSTANCE); + t.startCheckpoint(Checkpoint.MAX); + assertTrue("Checkpoint should have been called", + checkpointCalledLatch.await(10, TimeUnit.SECONDS)); + Future done = executor.submit(() -> { + try { + t.shutdown(); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + LOG.error("Interrupted shutting down sync thread", ie); + failedSomewhere.set(true); + return false; + } + return true; + }); + checkpointLatch.countDown(); + assertFalse("Shutdown shouldn't have finished", done.isDone()); + assertTrue("Flush should have been called", + flushCalledLatch.await(10, TimeUnit.SECONDS)); + + assertFalse("Shutdown shouldn't have finished", done.isDone()); + flushLatch.countDown(); + + assertTrue("Shutdown should have finished successfully", done.get(10, TimeUnit.SECONDS)); + assertFalse("Shouldn't have failed anywhere", failedSomewhere.get()); + } + + /** + * Test that sync thread suspension works. + * i.e. when we suspend the syncthread, nothing + * will be synced. + */ + @Test + public void testSyncThreadSuspension() throws Exception { + int flushInterval = 100; + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setFlushInterval(flushInterval); + CheckpointSource checkpointSource = new DummyCheckpointSource(); + LedgerDirsListener listener = new LedgerDirsListener() {}; + + final AtomicInteger checkpointCount = new AtomicInteger(0); + LedgerStorage storage = new DummyLedgerStorage() { + @Override + public void checkpoint(Checkpoint checkpoint) + throws IOException { + checkpointCount.incrementAndGet(); + } + }; + final SyncThread t = new SyncThread(conf, listener, storage, checkpointSource, NullStatsLogger.INSTANCE); + t.startCheckpoint(Checkpoint.MAX); + while (checkpointCount.get() == 0) { + Thread.sleep(flushInterval); + } + t.suspendSync(); + Thread.sleep(flushInterval); + int count = checkpointCount.get(); + for (int i = 0; i < 10; i++) { + t.startCheckpoint(Checkpoint.MAX); + assertEquals("Checkpoint count shouldn't change", count, checkpointCount.get()); + } + t.resumeSync(); + int i = 0; + while (checkpointCount.get() == count) { + Thread.sleep(flushInterval); + i++; + if (i > 100) { + fail("Checkpointing never resumed"); + } + } + t.shutdown(); + } + + /** + * Test that if the ledger storage throws a + * runtime exception, the bookie will be told + * to shutdown. + */ + @Test + public void testSyncThreadShutdownOnError() throws Exception { + int flushInterval = 100; + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setFlushInterval(flushInterval); + CheckpointSource checkpointSource = new DummyCheckpointSource(); + final CountDownLatch fatalLatch = new CountDownLatch(1); + LedgerDirsListener listener = new LedgerDirsListener() { + @Override + public void fatalError() { + fatalLatch.countDown(); + } + }; + + LedgerStorage storage = new DummyLedgerStorage() { + @Override + public void checkpoint(Checkpoint checkpoint) + throws IOException { + throw new RuntimeException("Fatal error in sync thread"); + } + }; + final SyncThread t = new SyncThread(conf, listener, storage, checkpointSource, NullStatsLogger.INSTANCE); + t.startCheckpoint(Checkpoint.MAX); + assertTrue("Should have called fatal error", fatalLatch.await(10, TimeUnit.SECONDS)); + t.shutdown(); + } + + /** + * Test that if the ledger storage throws + * a disk full exception, the owner of the sync + * thread will be notified. + */ + @Test + public void testSyncThreadDisksFull() throws Exception { + int flushInterval = 100; + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setFlushInterval(flushInterval); + CheckpointSource checkpointSource = new DummyCheckpointSource(); + final CountDownLatch diskFullLatch = new CountDownLatch(1); + LedgerDirsListener listener = new LedgerDirsListener() { + @Override + public void allDisksFull(boolean highPriorityWritesAllowed) { + diskFullLatch.countDown(); + } + }; + + LedgerStorage storage = new DummyLedgerStorage() { + @Override + public void checkpoint(Checkpoint checkpoint) + throws IOException { + throw new NoWritableLedgerDirException("Disk full error in sync thread"); + } + }; + final SyncThread t = new SyncThread(conf, listener, storage, checkpointSource, NullStatsLogger.INSTANCE); + t.startCheckpoint(Checkpoint.MAX); + assertTrue("Should have disk full error", diskFullLatch.await(10, TimeUnit.SECONDS)); + t.shutdown(); + } + + private static class DummyCheckpointSource implements CheckpointSource { + @Override + public Checkpoint newCheckpoint() { + return Checkpoint.MAX; + } + + @Override + public void checkpointComplete(Checkpoint checkpoint, boolean compact) + throws IOException { + } + } + + private static class DummyLedgerStorage implements LedgerStorage { + @Override + public void initialize( + ServerConfiguration conf, + LedgerManager ledgerManager, + LedgerDirsManager ledgerDirsManager, + LedgerDirsManager indexDirsManager, + StatsLogger statsLogger, + ByteBufAllocator allocator) + throws IOException { + } + @Override + public void setStateManager(StateManager stateManager) {} + @Override + public void setCheckpointSource(CheckpointSource checkpointSource) {} + @Override + public void setCheckpointer(Checkpointer checkpointer) {} + + @Override + public void deleteLedger(long ledgerId) throws IOException { + } + + @Override + public void start() { + } + + @Override + public void shutdown() throws InterruptedException { + } + + @Override + public boolean ledgerExists(long ledgerId) throws IOException { + return true; + } + + @Override + public boolean entryExists(long ledgerId, long entryId) throws IOException { + return false; + } + + @Override + public boolean setFenced(long ledgerId) throws IOException { + return true; + } + + @Override + public boolean isFenced(long ledgerId) throws IOException { + return false; + } + + @Override + public void setMasterKey(long ledgerId, byte[] masterKey) + throws IOException { + } + + @Override + public byte[] readMasterKey(long ledgerId) + throws IOException, BookieException { + return new byte[0]; + } + + @Override + public long addEntry(ByteBuf entry) throws IOException { + return 1L; + } + + @Override + public ByteBuf getEntry(long ledgerId, long entryId) + throws IOException { + return null; + } + + @Override + public long getLastAddConfirmed(long ledgerId) throws IOException { + return 0; + } + + @Override + public void flush() throws IOException { + } + + @Override + public void setExplicitLac(long ledgerId, ByteBuf lac) { + } + + @Override + public ByteBuf getExplicitLac(long ledgerId) { + return null; + } + + @Override + public boolean waitForLastAddConfirmedUpdate(long ledgerId, + long previousLAC, + Watcher watcher) + throws IOException { + return false; + } + + @Override + public void cancelWaitForLastAddConfirmedUpdate(long ledgerId, + Watcher watcher) + throws IOException { + } + + @Override + public void checkpoint(Checkpoint checkpoint) + throws IOException { + } + + @Override + public void registerLedgerDeletionListener(LedgerDeletionListener listener) { + } + + @Override + public OfLong getListOfEntriesOfLedger(long ledgerId) { + return null; + } + + @Override + public void setLimboState(long ledgerId) throws IOException { + throw new UnsupportedOperationException( + "Limbo state only supported for DbLedgerStorage"); + } + + @Override + public boolean hasLimboState(long ledgerId) throws IOException { + throw new UnsupportedOperationException( + "Limbo state only supported for DbLedgerStorage"); + } + + @Override + public void clearLimboState(long ledgerId) throws IOException { + throw new UnsupportedOperationException( + "Limbo state only supported for DbLedgerStorage"); + } + + @Override + public EnumSet getStorageStateFlags() throws IOException { + return EnumSet.noneOf(StorageState.class); + } + + @Override + public void setStorageStateFlag(StorageState flag) throws IOException { + } + + @Override + public void clearStorageStateFlag(StorageState flag) throws IOException { + } + + } + +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/TestBookieImpl.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/TestBookieImpl.java new file mode 100644 index 00000000000..cd0e967b61c --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/TestBookieImpl.java @@ -0,0 +1,213 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie; + +import io.netty.buffer.UnpooledByteBufAllocator; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.LedgerManagerFactory; +import org.apache.bookkeeper.meta.MetadataBookieDriver; +import org.apache.bookkeeper.meta.NullMetadataBookieDriver; +import org.apache.bookkeeper.proto.SimpleBookieServiceInfoProvider; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.util.DiskChecker; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Test wrapper for BookieImpl that chooses defaults for dependencies. + */ +public class TestBookieImpl extends BookieImpl { + private static final Logger log = LoggerFactory.getLogger(TestBookieImpl.class); + + private final Resources resources; + + public TestBookieImpl(ServerConfiguration conf) throws Exception { + this(new ResourceBuilder(conf).build()); + } + + public TestBookieImpl(Resources resources, StatsLogger statsLogger) throws Exception { + super(resources.conf, + resources.registrationManager, + resources.storage, + resources.diskChecker, + resources.ledgerDirsManager, + resources.indexDirsManager, + statsLogger, + UnpooledByteBufAllocator.DEFAULT, + new SimpleBookieServiceInfoProvider(resources.conf)); + this.resources = resources; + } + + public TestBookieImpl(Resources resources) throws Exception { + super(resources.conf, + resources.registrationManager, + resources.storage, + resources.diskChecker, + resources.ledgerDirsManager, + resources.indexDirsManager, + NullStatsLogger.INSTANCE, + UnpooledByteBufAllocator.DEFAULT, + new SimpleBookieServiceInfoProvider(resources.conf)); + this.resources = resources; + } + + public static ReadOnlyBookie buildReadOnly(Resources resources) throws Exception { + return new ReadOnlyBookie(resources.conf, + resources.registrationManager, + resources.storage, + resources.diskChecker, + resources.ledgerDirsManager, + resources.indexDirsManager, + NullStatsLogger.INSTANCE, + UnpooledByteBufAllocator.DEFAULT, + new SimpleBookieServiceInfoProvider(resources.conf)); + } + + public static ReadOnlyBookie buildReadOnly(ServerConfiguration conf) throws Exception { + return buildReadOnly(new ResourceBuilder(conf).build()); + } + + @Override + int shutdown(int exitCode) { + int ret = super.shutdown(exitCode); + resources.cleanup(); + return ret; + } + + /** + * Manages bookie resources including their cleanup. + */ + public static class Resources { + private final ServerConfiguration conf; + private final MetadataBookieDriver metadataDriver; + private final RegistrationManager registrationManager; + private final LedgerManagerFactory ledgerManagerFactory; + private final LedgerManager ledgerManager; + private final LedgerStorage storage; + private final DiskChecker diskChecker; + private final LedgerDirsManager ledgerDirsManager; + private final LedgerDirsManager indexDirsManager; + + Resources(ServerConfiguration conf, + MetadataBookieDriver metadataDriver, + RegistrationManager registrationManager, + LedgerManagerFactory ledgerManagerFactory, + LedgerManager ledgerManager, + LedgerStorage storage, + DiskChecker diskChecker, + LedgerDirsManager ledgerDirsManager, + LedgerDirsManager indexDirsManager) { + this.conf = conf; + this.metadataDriver = metadataDriver; + this.registrationManager = registrationManager; + this.ledgerManagerFactory = ledgerManagerFactory; + this.ledgerManager = ledgerManager; + this.storage = storage; + this.diskChecker = diskChecker; + this.ledgerDirsManager = ledgerDirsManager; + this.indexDirsManager = indexDirsManager; + } + + void cleanup() { + try { + ledgerManager.close(); + } catch (Exception e) { + log.warn("Error shutting down ledger manager", e); + } + try { + ledgerManagerFactory.close(); + } catch (Exception e) { + log.warn("Error shutting down ledger manager factory", e); + } + registrationManager.close(); + try { + metadataDriver.close(); + } catch (Exception e) { + log.warn("Error shutting down metadata driver", e); + } + } + } + + /** + * Builder for resources. + */ + public static class ResourceBuilder { + private final ServerConfiguration conf; + private MetadataBookieDriver metadataBookieDriver; + private RegistrationManager registrationManager; + + public ResourceBuilder(ServerConfiguration conf) { + this.conf = conf; + } + + public ResourceBuilder withMetadataDriver(MetadataBookieDriver driver) { + this.metadataBookieDriver = driver; + return this; + } + + public ResourceBuilder withRegistrationManager(RegistrationManager registrationManager) { + this.registrationManager = registrationManager; + return this; + } + + public Resources build() throws Exception { + return build(NullStatsLogger.INSTANCE); + } + + public Resources build(StatsLogger statsLogger) throws Exception { + if (metadataBookieDriver == null) { + if (conf.getMetadataServiceUri() == null) { + metadataBookieDriver = new NullMetadataBookieDriver(); + } else { + metadataBookieDriver = BookieResources.createMetadataDriver(conf, statsLogger); + } + } + if (registrationManager == null) { + registrationManager = metadataBookieDriver.createRegistrationManager(); + } + LedgerManagerFactory ledgerManagerFactory = metadataBookieDriver.getLedgerManagerFactory(); + LedgerManager ledgerManager = ledgerManagerFactory.newLedgerManager(); + + DiskChecker diskChecker = BookieResources.createDiskChecker(conf); + LedgerDirsManager ledgerDirsManager = BookieResources.createLedgerDirsManager( + conf, diskChecker, statsLogger); + LedgerDirsManager indexDirsManager = BookieResources.createIndexDirsManager( + conf, diskChecker, statsLogger, ledgerDirsManager); + + LedgerStorage storage = BookieResources.createLedgerStorage( + conf, ledgerManager, ledgerDirsManager, indexDirsManager, + statsLogger, UnpooledByteBufAllocator.DEFAULT); + + return new Resources(conf, + metadataBookieDriver, + registrationManager, + ledgerManagerFactory, + ledgerManager, + storage, + diskChecker, + ledgerDirsManager, + indexDirsManager); + } + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/TestEntryMemTable.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/TestEntryMemTable.java deleted file mode 100644 index 68e3eeba969..00000000000 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/TestEntryMemTable.java +++ /dev/null @@ -1,301 +0,0 @@ -/* - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bookkeeper.bookie; - -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; - -import io.netty.buffer.ByteBuf; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Random; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; - -import org.apache.bookkeeper.bookie.Bookie.NoLedgerException; -import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.conf.TestBKConfiguration; -import org.apache.bookkeeper.stats.NullStatsLogger; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -/** - * Test the EntryMemTable class. - */ -@RunWith(Parameterized.class) -public class TestEntryMemTable implements CacheCallback, SkipListFlusher, CheckpointSource { - - private Class entryMemTableClass; - private EntryMemTable memTable; - private final Random random = new Random(); - private TestCheckPoint curCheckpoint = new TestCheckPoint(0, 0); - - @Parameters - public static Collection memTableClass() { - return Arrays.asList(new Object[][] { { EntryMemTable.class }, { EntryMemTableWithParallelFlusher.class } }); - } - - public TestEntryMemTable(Class entryMemTableClass) { - this.entryMemTableClass = entryMemTableClass; - } - - @Override - public Checkpoint newCheckpoint() { - return curCheckpoint; - } - - @Override - public void checkpointComplete(Checkpoint checkpoint, boolean compact) - throws IOException { - } - - @Before - public void setUp() throws Exception { - if (entryMemTableClass.equals(EntryMemTableWithParallelFlusher.class)) { - ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); - this.memTable = new EntryMemTableWithParallelFlusher(conf, this, NullStatsLogger.INSTANCE); - } else { - this.memTable = new EntryMemTable(TestBKConfiguration.newServerConfiguration(), this, - NullStatsLogger.INSTANCE); - } - } - - @After - public void cleanup() throws Exception{ - this.memTable.close(); - } - - @Test - public void testLogMark() throws IOException { - LogMark mark = new LogMark(); - assertTrue(mark.compare(new LogMark()) == 0); - assertTrue(mark.compare(LogMark.MAX_VALUE) < 0); - mark.setLogMark(3, 11); - byte[] data = new byte[16]; - ByteBuffer buf = ByteBuffer.wrap(data); - mark.writeLogMark(buf); - buf.flip(); - LogMark mark1 = new LogMark(9, 13); - assertTrue(mark1.compare(mark) > 0); - mark1.readLogMark(buf); - assertTrue(mark1.compare(mark) == 0); - } - - /** - * Basic put/get. - * @throws IOException - * */ - @Test - public void testBasicOps() throws IOException { - long ledgerId = 1; - long entryId = 1; - byte[] data = new byte[10]; - random.nextBytes(data); - ByteBuffer buf = ByteBuffer.wrap(data); - memTable.addEntry(ledgerId, entryId, buf, this); - buf.rewind(); - EntryKeyValue kv = memTable.getEntry(ledgerId, entryId); - assertTrue(kv.getLedgerId() == ledgerId); - assertTrue(kv.getEntryId() == entryId); - assertTrue(kv.getValueAsByteBuffer().nioBuffer().equals(buf)); - memTable.flush(this); - } - - @Override - public void onSizeLimitReached(Checkpoint cp) throws IOException { - // No-op - } - - public void process(long ledgerId, long entryId, ByteBuf entry) - throws IOException { - // No-op - } - - /** - * Test read/write across snapshot. - * @throws IOException - */ - @Test - public void testScanAcrossSnapshot() throws IOException { - byte[] data = new byte[10]; - List keyValues = new ArrayList(); - for (long entryId = 1; entryId < 100; entryId++) { - for (long ledgerId = 1; ledgerId < 3; ledgerId++) { - random.nextBytes(data); - memTable.addEntry(ledgerId, entryId, ByteBuffer.wrap(data), this); - keyValues.add(memTable.getEntry(ledgerId, entryId)); - if (random.nextInt(16) == 0) { - memTable.snapshot(); - } - } - } - - for (EntryKeyValue kv : keyValues) { - assertTrue(memTable.getEntry(kv.getLedgerId(), kv.getEntryId()).equals(kv)); - } - memTable.flush(this, Checkpoint.MAX); - } - - private class KVFLusher implements SkipListFlusher { - final Set keyValues; - - KVFLusher(final Set keyValues) { - this.keyValues = keyValues; - } - - @Override - public void process(long ledgerId, long entryId, ByteBuf entry) throws IOException { - assertTrue(ledgerId + ":" + entryId + " is duplicate in store!", - keyValues.add(new EntryKeyValue(ledgerId, entryId, entry.array()))); - } - } - - private class NoLedgerFLusher implements SkipListFlusher { - @Override - public void process(long ledgerId, long entryId, ByteBuf entry) throws IOException { - throw new NoLedgerException(ledgerId); - } - } - - /** - * Test flush w/ logMark parameter. - * @throws IOException - */ - @Test - public void testFlushLogMark() throws IOException { - Set flushedKVs = Collections.newSetFromMap(new ConcurrentHashMap()); - KVFLusher flusher = new KVFLusher(flushedKVs); - - curCheckpoint.setCheckPoint(2, 2); - - byte[] data = new byte[10]; - long ledgerId = 100; - for (long entryId = 1; entryId < 100; entryId++) { - random.nextBytes(data); - memTable.addEntry(ledgerId, entryId, ByteBuffer.wrap(data), this); - } - - assertNull(memTable.snapshot(new TestCheckPoint(1, 1))); - assertNotNull(memTable.snapshot(new TestCheckPoint(3, 3))); - - assertTrue(0 < memTable.flush(flusher)); - assertTrue(0 == memTable.flush(flusher)); - - curCheckpoint.setCheckPoint(4, 4); - - random.nextBytes(data); - memTable.addEntry(ledgerId, 101, ByteBuffer.wrap(data), this); - assertTrue(0 == memTable.flush(flusher)); - - assertTrue(0 == memTable.flush(flusher, new TestCheckPoint(3, 3))); - assertTrue(0 < memTable.flush(flusher, new TestCheckPoint(4, 5))); - } - - /** - * Test snapshot/flush interaction. - * @throws IOException - */ - @Test - public void testFlushSnapshot() throws IOException { - HashSet keyValues = new HashSet(); - Set flushedKVs = Collections.newSetFromMap(new ConcurrentHashMap()); - KVFLusher flusher = new KVFLusher(flushedKVs); - - byte[] data = new byte[10]; - for (long entryId = 1; entryId < 100; entryId++) { - for (long ledgerId = 1; ledgerId < 100; ledgerId++) { - random.nextBytes(data); - assertTrue(ledgerId + ":" + entryId + " is duplicate in mem-table!", - memTable.addEntry(ledgerId, entryId, ByteBuffer.wrap(data), this) != 0); - assertTrue(ledgerId + ":" + entryId + " is duplicate in hash-set!", - keyValues.add(memTable.getEntry(ledgerId, entryId))); - if (random.nextInt(16) == 0) { - if (null != memTable.snapshot()) { - if (random.nextInt(2) == 0) { - memTable.flush(flusher); - } - } - } - } - } - - memTable.flush(flusher, Checkpoint.MAX); - for (EntryKeyValue kv : keyValues) { - assertTrue("kv " + kv.toString() + " was not flushed!", flushedKVs.contains(kv)); - } - } - - /** - * Test NoLedger exception/flush interaction. - * @throws IOException - */ - @Test - public void testNoLedgerException() throws IOException { - NoLedgerFLusher flusher = new NoLedgerFLusher(); - - byte[] data = new byte[10]; - for (long entryId = 1; entryId < 100; entryId++) { - for (long ledgerId = 1; ledgerId < 100; ledgerId++) { - random.nextBytes(data); - if (random.nextInt(16) == 0) { - if (null != memTable.snapshot()) { - memTable.flush(flusher); - } - } - } - } - - memTable.flush(flusher, Checkpoint.MAX); - } - - private static class TestCheckPoint implements Checkpoint { - - LogMark mark; - - public TestCheckPoint(long fid, long fpos) { - mark = new LogMark(fid, fpos); - } - - private void setCheckPoint(long fid, long fpos) { - mark.setLogMark(fid, fpos); - } - - @Override - public int compareTo(Checkpoint o) { - if (Checkpoint.MAX == o) { - return -1; - } - return mark.compare(((TestCheckPoint) o).mark); - } - - } -} - diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/TestFileInfoBackingCache.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/TestFileInfoBackingCache.java deleted file mode 100644 index 77f5ebace77..00000000000 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/TestFileInfoBackingCache.java +++ /dev/null @@ -1,288 +0,0 @@ -/** - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * - */ -package org.apache.bookkeeper.bookie; - -import com.google.common.cache.Cache; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.RemovalNotification; -import com.google.common.util.concurrent.ThreadFactoryBuilder; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Random; -import java.util.Set; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.ThreadFactory; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.stream.Collectors; -import java.util.stream.IntStream; -import java.util.stream.LongStream; -import lombok.extern.slf4j.Slf4j; -import org.apache.bookkeeper.bookie.FileInfoBackingCache.CachedFileInfo; - -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -/** - * Tests for FileInfoBackingCache. - */ -@Slf4j -public class TestFileInfoBackingCache { - final byte[] masterKey = new byte[0]; - final File baseDir; - final ThreadFactory threadFactory = new ThreadFactoryBuilder() - .setNameFormat("backing-cache-test-%d").setDaemon(true).build(); - ExecutorService executor; - - public TestFileInfoBackingCache() throws Exception { - baseDir = File.createTempFile("foo", "bar"); - } - - @Before - public void setup() throws Exception { - Assert.assertTrue(baseDir.delete()); - Assert.assertTrue(baseDir.mkdirs()); - baseDir.deleteOnExit(); - - executor = Executors.newCachedThreadPool(threadFactory); - } - - @After - public void tearDown() throws Exception { - if (executor != null) { - executor.shutdown(); - } - } - - @Test - public void basicTest() throws Exception { - FileInfoBackingCache cache = new FileInfoBackingCache( - (ledgerId, createIfNotFound) -> { - File f = new File(baseDir, String.valueOf(ledgerId)); - f.deleteOnExit(); - return f; - }, FileInfo.CURRENT_HEADER_VERSION); - CachedFileInfo fi = cache.loadFileInfo(1, masterKey); - Assert.assertEquals(fi.getRefCount(), 1); - CachedFileInfo fi2 = cache.loadFileInfo(2, masterKey); - Assert.assertEquals(fi2.getRefCount(), 1); - CachedFileInfo fi3 = cache.loadFileInfo(1, null); - Assert.assertEquals(fi, fi3); - Assert.assertEquals(fi3.getRefCount(), 2); - - // check that it expires correctly - fi.release(); - fi3.release(); - - Assert.assertEquals(fi.getRefCount(), FileInfoBackingCache.DEAD_REF); - CachedFileInfo fi4 = cache.loadFileInfo(1, null); - Assert.assertFalse(fi4 == fi); - Assert.assertEquals(fi.getRefCount(), FileInfoBackingCache.DEAD_REF); - Assert.assertEquals(fi4.getRefCount(), 1); - Assert.assertEquals(fi.getLf(), fi4.getLf()); - } - - @Test(expected = IOException.class) - public void testNoKey() throws Exception { - FileInfoBackingCache cache = new FileInfoBackingCache( - (ledgerId, createIfNotFound) -> { - Assert.assertFalse(createIfNotFound); - throw new Bookie.NoLedgerException(ledgerId); - }, FileInfo.CURRENT_HEADER_VERSION); - cache.loadFileInfo(1, null); - } - - /** - * Of course this can't prove they don't exist, but - * try to shake them out none the less. - */ - @Test - public void testForDeadlocks() throws Exception { - int numRunners = 20; - int maxLedgerId = 10; - AtomicBoolean done = new AtomicBoolean(false); - - FileInfoBackingCache cache = new FileInfoBackingCache( - (ledgerId, createIfNotFound) -> { - File f = new File(baseDir, String.valueOf(ledgerId)); - f.deleteOnExit(); - return f; - }, FileInfo.CURRENT_HEADER_VERSION); - Iterable>> futures = - IntStream.range(0, numRunners).mapToObj( - (i) -> { - Callable> c = () -> { - Random r = new Random(); - List fileInfos = new ArrayList<>(); - Set allFileInfos = new HashSet<>(); - while (!done.get()) { - if (r.nextBoolean() && fileInfos.size() < 5) { // take a reference - CachedFileInfo fi = cache.loadFileInfo(r.nextInt(maxLedgerId), masterKey); - Assert.assertFalse(fi.isClosed()); - allFileInfos.add(fi); - fileInfos.add(fi); - } else { // release a reference - Collections.shuffle(fileInfos); - if (!fileInfos.isEmpty()) { - fileInfos.remove(0).release(); - } - } - } - for (CachedFileInfo fi : fileInfos) { - Assert.assertFalse(fi.isClosed()); - fi.release(); - } - return allFileInfos; - }; - return executor.submit(c); - }).collect(Collectors.toList()); - Thread.sleep(TimeUnit.SECONDS.toMillis(10)); - done.set(true); - - // ensure all threads are finished operating on cache, before checking any - for (Future> f : futures) { - f.get(); - } - - for (Future> f : futures) { - for (CachedFileInfo fi : f.get()) { - Assert.assertTrue(fi.isClosed()); - Assert.assertEquals(FileInfoBackingCache.DEAD_REF, fi.getRefCount()); - } - } - - // try to load all ledgers again. - // They should be loaded fresh (i.e. this load should be only reference) - for (int i = 0; i < maxLedgerId; i++) { - Assert.assertEquals(1, cache.loadFileInfo(i, masterKey).getRefCount()); - } - } - - @Test - public void testRefCountRace() throws Exception { - AtomicBoolean done = new AtomicBoolean(false); - FileInfoBackingCache cache = new FileInfoBackingCache( - (ledgerId, createIfNotFound) -> { - File f = new File(baseDir, String.valueOf(ledgerId)); - f.deleteOnExit(); - return f; - }, FileInfo.CURRENT_HEADER_VERSION); - - Iterable>> futures = - IntStream.range(0, 2).mapToObj( - (i) -> { - Callable> c = () -> { - Set allFileInfos = new HashSet<>(); - while (!done.get()) { - CachedFileInfo fi = cache.loadFileInfo(1, masterKey); - Assert.assertFalse(fi.isClosed()); - allFileInfos.add(fi); - fi.release(); - } - return allFileInfos; - }; - return executor.submit(c); - }).collect(Collectors.toList()); - Thread.sleep(TimeUnit.SECONDS.toMillis(10)); - done.set(true); - - // ensure all threads are finished operating on cache, before checking any - for (Future> f : futures) { - f.get(); - } - - for (Future> f : futures) { - for (CachedFileInfo fi : f.get()) { - Assert.assertTrue(fi.isClosed()); - Assert.assertEquals(FileInfoBackingCache.DEAD_REF, fi.getRefCount()); - } - } - } - - private void guavaEvictionListener(RemovalNotification notification) { - notification.getValue().release(); - } - - @Test - public void testRaceGuavaEvictAndReleaseBeforeRetain() throws Exception { - AtomicBoolean done = new AtomicBoolean(false); - FileInfoBackingCache cache = new FileInfoBackingCache( - (ledgerId, createIfNotFound) -> { - File f = new File(baseDir, String.valueOf(ledgerId)); - f.deleteOnExit(); - return f; - }, FileInfo.CURRENT_HEADER_VERSION); - - Cache guavaCache = CacheBuilder.newBuilder() - .maximumSize(1) - .removalListener(this::guavaEvictionListener) - .build(); - - Iterable>> futures = - LongStream.range(0L, 2L).mapToObj( - (i) -> { - Callable> c = () -> { - Set allFileInfos = new HashSet<>(); - while (!done.get()) { - CachedFileInfo fi = null; - - do { - fi = guavaCache.get( - i, () -> cache.loadFileInfo(i, masterKey)); - allFileInfos.add(fi); - Thread.sleep(100); - } while (!fi.tryRetain()); - - Assert.assertFalse(fi.isClosed()); - fi.release(); - } - return allFileInfos; - }; - return executor.submit(c); - }).collect(Collectors.toList()); - Thread.sleep(TimeUnit.SECONDS.toMillis(10)); - done.set(true); - - // ensure all threads are finished operating on cache, before checking any - for (Future> f : futures) { - f.get(); - } - guavaCache.invalidateAll(); - - for (Future> f : futures) { - for (CachedFileInfo fi : f.get()) { - Assert.assertTrue(fi.isClosed()); - Assert.assertEquals(FileInfoBackingCache.DEAD_REF, fi.getRefCount()); - } - } - - } -} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/TestGcOverreplicatedLedger.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/TestGcOverreplicatedLedger.java deleted file mode 100644 index 9feb9df1025..00000000000 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/TestGcOverreplicatedLedger.java +++ /dev/null @@ -1,245 +0,0 @@ -/** - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * - */ - -package org.apache.bookkeeper.bookie; - -import com.google.common.collect.Lists; - -import java.io.IOException; -import java.net.UnknownHostException; -import java.util.Arrays; -import java.util.Collection; -import java.util.List; -import java.util.SortedMap; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicReference; -import org.apache.bookkeeper.bookie.GarbageCollector.GarbageCleaner; -import org.apache.bookkeeper.client.BKException; -import org.apache.bookkeeper.client.BookKeeper.DigestType; -import org.apache.bookkeeper.client.LedgerHandle; -import org.apache.bookkeeper.client.LedgerMetadata; -import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.meta.HierarchicalLedgerManagerFactory; -import org.apache.bookkeeper.meta.LedgerManagerFactory; -import org.apache.bookkeeper.meta.LedgerManagerTestCase; -import org.apache.bookkeeper.meta.ZkLedgerUnderreplicationManager; -import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; -import org.apache.bookkeeper.net.BookieSocketAddress; -import org.apache.bookkeeper.proto.BookieServer; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; -import org.apache.bookkeeper.stats.NullStatsLogger; -import org.apache.bookkeeper.util.SnapshotMap; -import org.apache.zookeeper.ZooDefs; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -/** - * Test GC-overreplicated ledger. - */ -@RunWith(Parameterized.class) -public class TestGcOverreplicatedLedger extends LedgerManagerTestCase { - - @Before - @Override - public void setUp() throws Exception { - super.setUp(); - ledgerManager = ledgerManagerFactory.newLedgerManager(); - activeLedgers = new SnapshotMap(); - } - - public TestGcOverreplicatedLedger(Class lmFactoryCls) { - super(lmFactoryCls, 3); - } - - @Parameters - public static Collection configs() { - return Arrays.asList(new Object[][] { { HierarchicalLedgerManagerFactory.class } }); - } - - @Test - public void testGcOverreplicatedLedger() throws Exception { - LedgerHandle lh = bkc.createLedger(2, 2, DigestType.MAC, "".getBytes()); - activeLedgers.put(lh.getId(), true); - - final AtomicReference newLedgerMetadata = new AtomicReference<>(null); - final CountDownLatch latch = new CountDownLatch(1); - ledgerManager.readLedgerMetadata(lh.getId(), new GenericCallback() { - - @Override - public void operationComplete(int rc, LedgerMetadata result) { - if (rc == BKException.Code.OK) { - newLedgerMetadata.set(result); - } - latch.countDown(); - } - }); - latch.await(); - if (newLedgerMetadata.get() == null) { - Assert.fail("No ledger metadata found"); - } - BookieSocketAddress bookieNotInEnsemble = getBookieNotInEnsemble(newLedgerMetadata.get()); - ServerConfiguration bkConf = getBkConf(bookieNotInEnsemble); - bkConf.setGcOverreplicatedLedgerWaitTime(10, TimeUnit.MILLISECONDS); - - lh.close(); - - final CompactableLedgerStorage mockLedgerStorage = new MockLedgerStorage(); - final GarbageCollector garbageCollector = new ScanAndCompareGarbageCollector(ledgerManager, mockLedgerStorage, - bkConf, NullStatsLogger.INSTANCE); - Thread.sleep(bkConf.getGcOverreplicatedLedgerWaitTimeMillis() + 1); - garbageCollector.gc(new GarbageCleaner() { - - @Override - public void clean(long ledgerId) { - try { - mockLedgerStorage.deleteLedger(ledgerId); - } catch (IOException e) { - e.printStackTrace(); - return; - } - } - }); - - Assert.assertFalse(activeLedgers.containsKey(lh.getId())); - } - - @Test - public void testNoGcOfLedger() throws Exception { - LedgerHandle lh = bkc.createLedger(2, 2, DigestType.MAC, "".getBytes()); - activeLedgers.put(lh.getId(), true); - - final AtomicReference newLedgerMetadata = new AtomicReference<>(null); - final CountDownLatch latch = new CountDownLatch(1); - ledgerManager.readLedgerMetadata(lh.getId(), new GenericCallback() { - - @Override - public void operationComplete(int rc, LedgerMetadata result) { - if (rc == BKException.Code.OK) { - newLedgerMetadata.set(result); - } - latch.countDown(); - } - }); - latch.await(); - if (newLedgerMetadata.get() == null) { - Assert.fail("No ledger metadata found"); - } - BookieSocketAddress address = null; - SortedMap> ensembleMap = newLedgerMetadata.get().getEnsembles(); - for (List ensemble : ensembleMap.values()) { - address = ensemble.get(0); - } - ServerConfiguration bkConf = getBkConf(address); - bkConf.setGcOverreplicatedLedgerWaitTime(10, TimeUnit.MILLISECONDS); - - lh.close(); - - final CompactableLedgerStorage mockLedgerStorage = new MockLedgerStorage(); - final GarbageCollector garbageCollector = new ScanAndCompareGarbageCollector(ledgerManager, mockLedgerStorage, - bkConf, NullStatsLogger.INSTANCE); - Thread.sleep(bkConf.getGcOverreplicatedLedgerWaitTimeMillis() + 1); - garbageCollector.gc(new GarbageCleaner() { - - @Override - public void clean(long ledgerId) { - try { - mockLedgerStorage.deleteLedger(ledgerId); - } catch (IOException e) { - e.printStackTrace(); - return; - } - } - }); - - Assert.assertTrue(activeLedgers.containsKey(lh.getId())); - } - - @Test - public void testNoGcIfLedgerBeingReplicated() throws Exception { - LedgerHandle lh = bkc.createLedger(2, 2, DigestType.MAC, "".getBytes()); - activeLedgers.put(lh.getId(), true); - - final AtomicReference newLedgerMetadata = new AtomicReference<>(null); - final CountDownLatch latch = new CountDownLatch(1); - ledgerManager.readLedgerMetadata(lh.getId(), new GenericCallback() { - - @Override - public void operationComplete(int rc, LedgerMetadata result) { - if (rc == BKException.Code.OK) { - newLedgerMetadata.set(result); - } - latch.countDown(); - } - }); - latch.await(); - if (newLedgerMetadata.get() == null) { - Assert.fail("No ledger metadata found"); - } - BookieSocketAddress bookieNotInEnsemble = getBookieNotInEnsemble(newLedgerMetadata.get()); - ServerConfiguration bkConf = getBkConf(bookieNotInEnsemble); - bkConf.setGcOverreplicatedLedgerWaitTime(10, TimeUnit.MILLISECONDS); - - lh.close(); - - ZkLedgerUnderreplicationManager.acquireUnderreplicatedLedgerLock( - zkc, - ZKMetadataDriverBase.resolveZkLedgersRootPath(baseConf), - lh.getId(), - ZooDefs.Ids.OPEN_ACL_UNSAFE); - - final CompactableLedgerStorage mockLedgerStorage = new MockLedgerStorage(); - final GarbageCollector garbageCollector = new ScanAndCompareGarbageCollector(ledgerManager, mockLedgerStorage, - bkConf, NullStatsLogger.INSTANCE); - Thread.sleep(bkConf.getGcOverreplicatedLedgerWaitTimeMillis() + 1); - garbageCollector.gc(new GarbageCleaner() { - - @Override - public void clean(long ledgerId) { - try { - mockLedgerStorage.deleteLedger(ledgerId); - } catch (IOException e) { - e.printStackTrace(); - return; - } - } - }); - - Assert.assertTrue(activeLedgers.containsKey(lh.getId())); - } - - private BookieSocketAddress getBookieNotInEnsemble(LedgerMetadata ledgerMetadata) throws UnknownHostException { - List allAddresses = Lists.newArrayList(); - for (BookieServer bk : bs) { - allAddresses.add(bk.getLocalAddress()); - } - SortedMap> ensembles = ledgerMetadata.getEnsembles(); - for (List fragmentEnsembles : ensembles.values()) { - allAddresses.removeAll(fragmentEnsembles); - } - Assert.assertEquals(allAddresses.size(), 1); - return allAddresses.get(0); - } -} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/TestLedgerDirsManager.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/TestLedgerDirsManager.java deleted file mode 100644 index b8555ca999e..00000000000 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/TestLedgerDirsManager.java +++ /dev/null @@ -1,505 +0,0 @@ -/** - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * - */ -package org.apache.bookkeeper.bookie; - -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.greaterThan; -import static org.hamcrest.Matchers.lessThan; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertThat; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; -import static org.mockito.ArgumentMatchers.any; - -import java.io.File; -import java.io.IOException; -import java.time.Duration; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import org.apache.bookkeeper.bookie.LedgerDirsManager.LedgerDirsListener; -import org.apache.bookkeeper.bookie.LedgerDirsManager.NoWritableLedgerDirException; -import org.apache.bookkeeper.common.testing.executors.MockExecutorController; -import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.conf.TestBKConfiguration; -import org.apache.bookkeeper.stats.Gauge; -import org.apache.bookkeeper.test.TestStatsProvider; -import org.apache.bookkeeper.util.DiskChecker; -import org.apache.bookkeeper.util.IOUtils; -import org.apache.commons.io.FileUtils; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.powermock.api.mockito.PowerMockito; -import org.powermock.core.classloader.annotations.PrepareForTest; -import org.powermock.modules.junit4.PowerMockRunner; - -/** - * Test LedgerDirsManager. - */ -@RunWith(PowerMockRunner.class) -@PrepareForTest(LedgerDirsMonitor.class) -public class TestLedgerDirsManager { - - ServerConfiguration conf; - File curDir; - LedgerDirsManager dirsManager; - LedgerDirsMonitor ledgerMonitor; - MockDiskChecker mockDiskChecker; - private TestStatsProvider statsProvider; - private TestStatsProvider.TestStatsLogger statsLogger; - int diskCheckInterval = 1000; - float threshold = 0.5f; - float warnThreshold = 0.5f; - - final List tempDirs = new ArrayList(); - - // Thread used by monitor - ScheduledExecutorService executor; - MockExecutorController executorController; - - File createTempDir(String prefix, String suffix) throws IOException { - File dir = IOUtils.createTempDir(prefix, suffix); - tempDirs.add(dir); - return dir; - } - - @Before - public void setUp() throws Exception { - PowerMockito.mockStatic(Executors.class); - - File tmpDir = createTempDir("bkTest", ".dir"); - curDir = Bookie.getCurrentDirectory(tmpDir); - Bookie.checkDirectoryStructure(curDir); - - conf = TestBKConfiguration.newServerConfiguration(); - conf.setLedgerDirNames(new String[] { tmpDir.toString() }); - conf.setDiskLowWaterMarkUsageThreshold(conf.getDiskUsageThreshold()); - conf.setDiskCheckInterval(diskCheckInterval); - conf.setIsForceGCAllowWhenNoSpace(true); - conf.setMinUsableSizeForEntryLogCreation(Long.MIN_VALUE); - - executor = PowerMockito.mock(ScheduledExecutorService.class); - executorController = new MockExecutorController() - .controlScheduleAtFixedRate(executor, 10); - PowerMockito.when(Executors.newSingleThreadScheduledExecutor(any())) - .thenReturn(executor); - - mockDiskChecker = new MockDiskChecker(threshold, warnThreshold); - statsProvider = new TestStatsProvider(); - statsLogger = statsProvider.getStatsLogger("test"); - dirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()), statsLogger); - ledgerMonitor = new LedgerDirsMonitor(conf, - mockDiskChecker, dirsManager); - ledgerMonitor.init(); - } - - @After - public void tearDown() throws Exception { - ledgerMonitor.shutdown(); - for (File dir : tempDirs) { - FileUtils.deleteDirectory(dir); - } - tempDirs.clear(); - } - - @Test - public void testGetWritableDir() throws Exception { - try { - List writeDirs = dirsManager.getWritableLedgerDirs(); - assertTrue("Must have a writable ledgerDir", writeDirs.size() > 0); - } catch (NoWritableLedgerDirException nwlde) { - fail("We should have a writable ledgerDir"); - } - } - - @Test - public void testPickWritableDirExclusive() throws Exception { - try { - dirsManager.pickRandomWritableDir(curDir); - fail("Should not reach here due to there is no writable ledger dir."); - } catch (NoWritableLedgerDirException nwlde) { - // expected to fail with no writable ledger dir - assertTrue(true); - } - } - - @Test - public void testNoWritableDir() throws Exception { - try { - dirsManager.addToFilledDirs(curDir); - dirsManager.pickRandomWritableDir(); - fail("Should not reach here due to there is no writable ledger dir."); - } catch (NoWritableLedgerDirException nwlde) { - // expected to fail with no writable ledger dir - assertEquals("Should got NoWritableLedgerDirException w/ 'All ledger directories are non writable'.", - "All ledger directories are non writable", nwlde.getMessage()); - } - } - - @Test - public void testGetWritableDirForLog() throws Exception { - List writeDirs; - try { - dirsManager.addToFilledDirs(curDir); - dirsManager.getWritableLedgerDirs(); - fail("Should not reach here due to there is no writable ledger dir."); - } catch (NoWritableLedgerDirException nwlde) { - // expected to fail with no writable ledger dir - // Now make sure we can get one for log - try { - writeDirs = dirsManager.getWritableLedgerDirsForNewLog(); - assertTrue("Must have a writable ledgerDir", writeDirs.size() > 0); - } catch (NoWritableLedgerDirException e) { - fail("We should have a writeble ledgerDir"); - } - } - } - - @Test - public void testGetWritableDirForLogNoEnoughDiskSpace() throws Exception { - conf.setMinUsableSizeForEntryLogCreation(curDir.getUsableSpace() + 1024); - dirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()), statsLogger); - try { - dirsManager.addToFilledDirs(curDir); - dirsManager.getWritableLedgerDirs(); - fail("Should not reach here due to there is no writable ledger dir."); - } catch (NoWritableLedgerDirException nwlde) { - // expected to fail with no writable ledger dir - // Now make sure we can get one for log - try { - dirsManager.getWritableLedgerDirsForNewLog(); - fail("Should not reach here due to there is no enough disk space left"); - } catch (NoWritableLedgerDirException e) { - // expected. - } - } - } - - @Test - public void testLedgerDirsMonitorDuringTransition() throws Exception { - testLedgerDirsMonitorDuringTransition(true); - } - - @Test - public void testHighPriorityWritesDisallowedDuringTransition() throws Exception { - testLedgerDirsMonitorDuringTransition(false); - } - - private void testLedgerDirsMonitorDuringTransition(boolean highPriorityWritesAllowed) throws Exception { - if (!highPriorityWritesAllowed) { - ledgerMonitor.shutdown(); - conf.setMinUsableSizeForHighPriorityWrites(curDir.getUsableSpace() + 1024); - dirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()), statsLogger); - ledgerMonitor = new LedgerDirsMonitor(conf, mockDiskChecker, dirsManager); - ledgerMonitor.init(); - } - - MockLedgerDirsListener mockLedgerDirsListener = new MockLedgerDirsListener(); - dirsManager.addLedgerDirsListener(mockLedgerDirsListener); - ledgerMonitor.start(); - - assertFalse(mockLedgerDirsListener.readOnly); - assertTrue(mockLedgerDirsListener.highPriorityWritesAllowed); - - mockDiskChecker.setUsage(threshold + 0.05f); - executorController.advance(Duration.ofMillis(diskCheckInterval)); - - assertTrue(mockLedgerDirsListener.readOnly); - assertEquals(highPriorityWritesAllowed, mockLedgerDirsListener.highPriorityWritesAllowed); - - mockDiskChecker.setUsage(threshold - 0.05f); - executorController.advance(Duration.ofMillis(diskCheckInterval)); - - assertFalse(mockLedgerDirsListener.readOnly); - assertTrue(mockLedgerDirsListener.highPriorityWritesAllowed); - } - - @Test - public void testLedgerDirsMonitorHandlingLowWaterMark() throws Exception { - - ledgerMonitor.shutdown(); - - final float warn = 0.90f; - final float nospace = 0.98f; - final float lwm = (warn + nospace) / 2; - final float lwm2warn = (warn + lwm) / 2; - final float lwm2nospace = (lwm + nospace) / 2; - final float nospaceExceeded = nospace + 0.005f; - - conf.setDiskUsageThreshold(nospace); - conf.setDiskLowWaterMarkUsageThreshold(lwm); - conf.setDiskUsageWarnThreshold(warn); - - mockDiskChecker = new MockDiskChecker(nospace, warnThreshold); - dirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); - ledgerMonitor = new LedgerDirsMonitor(conf, mockDiskChecker, dirsManager); - ledgerMonitor.init(); - final MockLedgerDirsListener mockLedgerDirsListener = new MockLedgerDirsListener(); - dirsManager.addLedgerDirsListener(mockLedgerDirsListener); - ledgerMonitor.start(); - - executorController.advance(Duration.ofMillis(diskCheckInterval)); - assertFalse(mockLedgerDirsListener.readOnly); - - // go above LWM but below threshold - // should still be writable - mockDiskChecker.setUsage(lwm2nospace); - executorController.advance(Duration.ofMillis(diskCheckInterval)); - assertFalse(mockLedgerDirsListener.readOnly); - - // exceed the threshold, should go to readonly - mockDiskChecker.setUsage(nospaceExceeded); - executorController.advance(Duration.ofMillis(diskCheckInterval)); - assertTrue(mockLedgerDirsListener.readOnly); - - // drop below threshold but above LWM - // should stay read-only - mockDiskChecker.setUsage(lwm2nospace); - executorController.advance(Duration.ofMillis(diskCheckInterval)); - assertTrue(mockLedgerDirsListener.readOnly); - - // drop below LWM - // should become writable - mockDiskChecker.setUsage(lwm2warn); - executorController.advance(Duration.ofMillis(diskCheckInterval)); - assertFalse(mockLedgerDirsListener.readOnly); - - // go above LWM but below threshold - // should still be writable - mockDiskChecker.setUsage(lwm2nospace); - executorController.advance(Duration.ofMillis(diskCheckInterval)); - assertFalse(mockLedgerDirsListener.readOnly); - } - - @Test - public void testLedgerDirsMonitorHandlingWithMultipleLedgerDirectories() throws Exception { - ledgerMonitor.shutdown(); - - final float nospace = 0.90f; - final float lwm = 0.80f; - HashMap usageMap; - - File tmpDir1 = createTempDir("bkTest", ".dir"); - File curDir1 = Bookie.getCurrentDirectory(tmpDir1); - Bookie.checkDirectoryStructure(curDir1); - - File tmpDir2 = createTempDir("bkTest", ".dir"); - File curDir2 = Bookie.getCurrentDirectory(tmpDir2); - Bookie.checkDirectoryStructure(curDir2); - - conf.setDiskUsageThreshold(nospace); - conf.setDiskLowWaterMarkUsageThreshold(lwm); - conf.setDiskUsageWarnThreshold(nospace); - conf.setLedgerDirNames(new String[] { tmpDir1.toString(), tmpDir2.toString() }); - - mockDiskChecker = new MockDiskChecker(nospace, warnThreshold); - dirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), - new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()), - statsLogger); - ledgerMonitor = new LedgerDirsMonitor(conf, mockDiskChecker, dirsManager); - usageMap = new HashMap(); - usageMap.put(curDir1, 0.1f); - usageMap.put(curDir2, 0.1f); - mockDiskChecker.setUsageMap(usageMap); - ledgerMonitor.init(); - final MockLedgerDirsListener mockLedgerDirsListener = new MockLedgerDirsListener(); - dirsManager.addLedgerDirsListener(mockLedgerDirsListener); - ledgerMonitor.start(); - - Thread.sleep((diskCheckInterval * 2) + 100); - assertFalse(mockLedgerDirsListener.readOnly); - - // go above LWM but below threshold - // should still be writable - setUsageAndThenVerify(curDir1, lwm + 0.05f, curDir2, lwm + 0.05f, mockDiskChecker, mockLedgerDirsListener, - false); - - // one dir usagespace above storagethreshold, another dir below storagethreshold - // should still be writable - setUsageAndThenVerify(curDir1, nospace + 0.02f, curDir2, nospace - 0.05f, mockDiskChecker, - mockLedgerDirsListener, false); - - // should remain readonly - setUsageAndThenVerify(curDir1, nospace + 0.05f, curDir2, nospace + 0.02f, mockDiskChecker, - mockLedgerDirsListener, true); - - // bring the disk usages to less than the threshold, - // but more than the LWM. - // should still be readonly - setUsageAndThenVerify(curDir1, nospace - 0.05f, curDir2, nospace - 0.05f, mockDiskChecker, - mockLedgerDirsListener, true); - - // bring one dir diskusage to less than lwm, - // the other dir to be more than lwm, but the - // overall diskusage to be more than lwm - // should still be readonly - setUsageAndThenVerify(curDir1, lwm - 0.03f, curDir2, lwm + 0.07f, mockDiskChecker, mockLedgerDirsListener, - true); - - // bring one dir diskusage to much less than lwm, - // the other dir to be more than storage threahold, but the - // overall diskusage is less than lwm - // should goto readwrite - setUsageAndThenVerify(curDir1, lwm - 0.17f, curDir2, nospace + 0.03f, mockDiskChecker, mockLedgerDirsListener, - false); - assertTrue("Only one LedgerDir should be writable", dirsManager.getWritableLedgerDirs().size() == 1); - - // bring both the dirs below lwm - // should still be readwrite - setUsageAndThenVerify(curDir1, lwm - 0.03f, curDir2, lwm - 0.02f, mockDiskChecker, mockLedgerDirsListener, - false); - assertTrue("Both the LedgerDirs should be writable", dirsManager.getWritableLedgerDirs().size() == 2); - - // bring both the dirs above lwm but < threshold - // should still be readwrite - setUsageAndThenVerify(curDir1, lwm + 0.02f, curDir2, lwm + 0.08f, mockDiskChecker, mockLedgerDirsListener, - false); - } - - private void setUsageAndThenVerify(File dir1, float dir1Usage, File dir2, float dir2Usage, - MockDiskChecker mockDiskChecker, MockLedgerDirsListener mockLedgerDirsListener, boolean verifyReadOnly) - throws InterruptedException { - HashMap usageMap = new HashMap(); - usageMap.put(dir1, dir1Usage); - usageMap.put(dir2, dir2Usage); - mockDiskChecker.setUsageMap(usageMap); - executorController.advance(Duration.ofMillis(diskCheckInterval)); - - float sample1 = getGauge(dir1.getParent()).getSample().floatValue(); - float sample2 = getGauge(dir2.getParent()).getSample().floatValue(); - - if (verifyReadOnly) { - assertTrue(mockLedgerDirsListener.readOnly); - - // LedgerDirsMonitor stops updating diskUsages when the bookie is in the readonly mode, - // so the stats will reflect an older value at the time when the bookie became readonly - assertThat(sample1, greaterThan(90f)); - assertThat(sample1, lessThan(100f)); - assertThat(sample2, greaterThan(90f)); - assertThat(sample2, lessThan(100f)); - } else { - assertFalse(mockLedgerDirsListener.readOnly); - - assertThat(sample1, equalTo(dir1Usage * 100f)); - assertThat(sample2, equalTo(dir2Usage * 100f)); - } - } - - private Gauge getGauge(String path) { - String gaugeName = String.format("test.dir_%s_usage", path.replace('/', '_')); - return statsProvider.getGauge(gaugeName); - } - - private class MockDiskChecker extends DiskChecker { - - private volatile float used; - private volatile Map usageMap = null; - - public MockDiskChecker(float threshold, float warnThreshold) { - super(threshold, warnThreshold); - used = 0f; - } - - @Override - public float checkDir(File dir) throws DiskErrorException, DiskOutOfSpaceException, DiskWarnThresholdException { - float dirUsage = getDirUsage(dir); - - if (dirUsage > getDiskUsageThreshold()) { - throw new DiskOutOfSpaceException("", dirUsage); - } - if (dirUsage > getDiskUsageWarnThreshold()) { - throw new DiskWarnThresholdException("", dirUsage); - } - return dirUsage; - } - - @Override - public float getTotalDiskUsage(List dirs) { - float accumulatedDiskUsage = 0f; - for (File dir : dirs) { - accumulatedDiskUsage += getDirUsage(dir); - } - return (accumulatedDiskUsage / dirs.size()); - } - - public float getDirUsage(File dir) { - float dirUsage; - if ((usageMap == null) || (!usageMap.containsKey(dir))) { - dirUsage = used; - } else { - dirUsage = usageMap.get(dir); - } - return dirUsage; - } - - public void setUsage(float usage) { - this.used = usage; - } - - public void setUsageMap(Map usageMap) { - this.usageMap = usageMap; - } - } - - private class MockLedgerDirsListener implements LedgerDirsListener { - - public volatile boolean highPriorityWritesAllowed; - public volatile boolean readOnly; - - public MockLedgerDirsListener() { - reset(); - } - - @Override - public void diskWritable(File disk) { - readOnly = false; - highPriorityWritesAllowed = true; - } - - @Override - public void diskJustWritable(File disk) { - readOnly = false; - highPriorityWritesAllowed = true; - } - - @Override - public void allDisksFull(boolean highPriorityWritesAllowed) { - this.readOnly = true; - this.highPriorityWritesAllowed = highPriorityWritesAllowed; - } - - public void reset() { - readOnly = false; - highPriorityWritesAllowed = true; - } - - } -} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/TestSkipListArena.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/TestSkipListArena.java deleted file mode 100644 index 83e4794d0e7..00000000000 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/TestSkipListArena.java +++ /dev/null @@ -1,208 +0,0 @@ -/** - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bookkeeper.bookie; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import com.google.common.primitives.Ints; - -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Random; -import java.util.Set; -import java.util.TreeMap; -import java.util.concurrent.ConcurrentLinkedQueue; -import java.util.concurrent.CountDownLatch; - -import org.apache.bookkeeper.bookie.SkipListArena.MemorySlice; -import org.apache.bookkeeper.conf.ServerConfiguration; -import org.junit.Test; - -/** - * Test the SkipListArena class. - */ -public class TestSkipListArena { - - class CustomConfiguration extends ServerConfiguration { - @Override - public int getSkipListArenaChunkSize() { - return 4096; - } - @Override - public int getSkipListArenaMaxAllocSize() { - return 1024; - } - @Override - public boolean getJournalFlushWhenQueueEmpty() { - return true; - } - - } - - final CustomConfiguration cfg = new CustomConfiguration(); - - /** - * Test random allocations. - */ - @Test - public void testRandomAllocation() { - Random rand = new Random(); - SkipListArena arena = new SkipListArena(cfg); - int expectedOff = 0; - byte[] lastBuffer = null; - - // 10K iterations by 0-512 alloc -> 2560kB expected - // should be reasonable for unit test and also cover wraparound - // behavior - for (int i = 0; i < 10000; i++) { - int size = rand.nextInt(512); - MemorySlice alloc = arena.allocateBytes(size); - - if (alloc.getData() != lastBuffer) { - expectedOff = 0; - lastBuffer = alloc.getData(); - } - assertTrue(expectedOff == alloc.getOffset()); - assertTrue("Allocation " + alloc + " overruns buffer", - alloc.getOffset() + size <= alloc.getData().length); - expectedOff += size; - } - } - - @Test - public void testLargeAllocation() { - SkipListArena arena = new SkipListArena(cfg); - MemorySlice alloc = arena.allocateBytes(1024 + 1024); - assertNull("2KB allocation shouldn't be satisfied by LAB.", alloc); - } - - private class ByteArray { - final byte[] bytes; - ByteArray(final byte[] bytes) { - this.bytes = bytes; - } - - @Override - public int hashCode() { - return bytes.hashCode(); - } - - @Override - public boolean equals(Object object) { - if (object instanceof ByteArray) { - ByteArray other = (ByteArray) object; - return this.bytes.equals(other.bytes); - } - return false; - } - } - - private static class AllocBuffer implements Comparable{ - private final MemorySlice alloc; - private final int size; - public AllocBuffer(MemorySlice alloc, int size) { - super(); - this.alloc = alloc; - this.size = size; - } - - @Override - public int compareTo(AllocBuffer e) { - assertTrue(alloc.getData() == e.alloc.getData()); - return Ints.compare(alloc.getOffset(), e.alloc.getOffset()); - } - - @Override - public String toString() { - return alloc + ":" + size; - } - } - - private Thread getAllocThread(final ConcurrentLinkedQueue queue, - final CountDownLatch latch, - final SkipListArena arena) { - return new Thread(new Runnable() { - @Override - public void run() { - Random rand = new Random(); - for (int j = 0; j < 1000; j++) { - int size = rand.nextInt(512); - MemorySlice alloc = arena.allocateBytes(size); - queue.add(new AllocBuffer(alloc, size)); - } - latch.countDown(); - } - }); - } - - /** - * Test concurrent allocation, check the results don't overlap. - */ - @Test - public void testConcurrency() throws Exception { - final SkipListArena arena = new SkipListArena(cfg); - final CountDownLatch latch = new CountDownLatch(10); - final ConcurrentLinkedQueue queue = new ConcurrentLinkedQueue(); - - Set testThreads = new HashSet(); - for (int i = 0; i < 10; i++) { - testThreads.add(getAllocThread(queue, latch, arena)); - } - - for (Thread thread : testThreads) { - thread.start(); - } - latch.await(); - - // Partition the allocations by the actual byte[] they share, - // make sure offsets are unique and non-overlap for each buffer. - Map> mapsByArray = new HashMap>(); - boolean overlapped = false; - - final AllocBuffer[] buffers = queue.toArray(new AllocBuffer[0]); - for (AllocBuffer buf : buffers) { - if (buf.size != 0) { - ByteArray ptr = new ByteArray(buf.alloc.getData()); - Map treeMap = mapsByArray.get(ptr); - if (treeMap == null) { - treeMap = new TreeMap(); - mapsByArray.put(ptr, treeMap); - } - AllocBuffer other = treeMap.put(buf.alloc.getOffset(), buf); - if (other != null) { - fail("Buffer " + other.toString() + " overlapped with " + buf.toString()); - } - } - } - - // Now check each byte array to make sure allocations don't overlap - for (Map treeMap : mapsByArray.values()) { - int expectedOff = 0; - for (AllocBuffer buf : treeMap.values()) { - assertEquals(expectedOff, buf.alloc.getOffset()); - expectedOff += buf.size; - } - } - } -} - diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/TestSyncThread.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/TestSyncThread.java deleted file mode 100644 index d9fa8cc67e4..00000000000 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/TestSyncThread.java +++ /dev/null @@ -1,365 +0,0 @@ -/** - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * - */ -package org.apache.bookkeeper.bookie; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import io.netty.buffer.ByteBuf; -import java.io.IOException; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; -import org.apache.bookkeeper.bookie.CheckpointSource.Checkpoint; -import org.apache.bookkeeper.bookie.LedgerDirsManager.LedgerDirsListener; -import org.apache.bookkeeper.bookie.LedgerDirsManager.NoWritableLedgerDirException; -import org.apache.bookkeeper.common.util.Watcher; -import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.conf.TestBKConfiguration; -import org.apache.bookkeeper.meta.LedgerManager; -import org.apache.bookkeeper.stats.StatsLogger; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Test a synchronization thread. - */ -public class TestSyncThread { - private static final Logger LOG = LoggerFactory.getLogger(TestSyncThread.class); - - ExecutorService executor = null; - - @Before - public void setupExecutor() { - executor = Executors.newSingleThreadExecutor(); - } - - @After - public void teardownExecutor() { - if (executor != null) { - executor.shutdownNow(); - executor = null; - } - } - - /** - * Test that if a flush is taking a long time, - * the sync thread will not shutdown until it - * has finished. - */ - @Test - public void testSyncThreadLongShutdown() throws Exception { - int flushInterval = 100; - ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); - conf.setFlushInterval(flushInterval); - CheckpointSource checkpointSource = new DummyCheckpointSource(); - LedgerDirsListener listener = new LedgerDirsListener() {}; - - final CountDownLatch checkpointCalledLatch = new CountDownLatch(1); - final CountDownLatch checkpointLatch = new CountDownLatch(1); - - final CountDownLatch flushCalledLatch = new CountDownLatch(1); - final CountDownLatch flushLatch = new CountDownLatch(1); - final AtomicBoolean failedSomewhere = new AtomicBoolean(false); - LedgerStorage storage = new DummyLedgerStorage() { - @Override - public void flush() throws IOException { - flushCalledLatch.countDown(); - try { - flushLatch.await(); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - LOG.error("Interrupted in flush thread", ie); - failedSomewhere.set(true); - } - } - - @Override - public void checkpoint(Checkpoint checkpoint) - throws IOException { - checkpointCalledLatch.countDown(); - try { - checkpointLatch.await(); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - LOG.error("Interrupted in checkpoint thread", ie); - failedSomewhere.set(true); - } - } - }; - - final SyncThread t = new SyncThread(conf, listener, storage, checkpointSource); - t.startCheckpoint(Checkpoint.MAX); - assertTrue("Checkpoint should have been called", - checkpointCalledLatch.await(10, TimeUnit.SECONDS)); - Future done = executor.submit(() -> { - try { - t.shutdown(); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - LOG.error("Interrupted shutting down sync thread", ie); - failedSomewhere.set(true); - return false; - } - return true; - }); - checkpointLatch.countDown(); - assertFalse("Shutdown shouldn't have finished", done.isDone()); - assertTrue("Flush should have been called", - flushCalledLatch.await(10, TimeUnit.SECONDS)); - - assertFalse("Shutdown shouldn't have finished", done.isDone()); - flushLatch.countDown(); - - assertTrue("Shutdown should have finished successfully", done.get(10, TimeUnit.SECONDS)); - assertFalse("Shouldn't have failed anywhere", failedSomewhere.get()); - } - - /** - * Test that sync thread suspension works. - * i.e. when we suspend the syncthread, nothing - * will be synced. - */ - @Test - public void testSyncThreadSuspension() throws Exception { - int flushInterval = 100; - ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); - conf.setFlushInterval(flushInterval); - CheckpointSource checkpointSource = new DummyCheckpointSource(); - LedgerDirsListener listener = new LedgerDirsListener() {}; - - final AtomicInteger checkpointCount = new AtomicInteger(0); - LedgerStorage storage = new DummyLedgerStorage() { - @Override - public void checkpoint(Checkpoint checkpoint) - throws IOException { - checkpointCount.incrementAndGet(); - } - }; - final SyncThread t = new SyncThread(conf, listener, storage, checkpointSource); - t.startCheckpoint(Checkpoint.MAX); - while (checkpointCount.get() == 0) { - Thread.sleep(flushInterval); - } - t.suspendSync(); - Thread.sleep(flushInterval); - int count = checkpointCount.get(); - for (int i = 0; i < 10; i++) { - t.startCheckpoint(Checkpoint.MAX); - assertEquals("Checkpoint count shouldn't change", count, checkpointCount.get()); - } - t.resumeSync(); - int i = 0; - while (checkpointCount.get() == count) { - Thread.sleep(flushInterval); - i++; - if (i > 100) { - fail("Checkpointing never resumed"); - } - } - t.shutdown(); - } - - /** - * Test that if the ledger storage throws a - * runtime exception, the bookie will be told - * to shutdown. - */ - @Test - public void testSyncThreadShutdownOnError() throws Exception { - int flushInterval = 100; - ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); - conf.setFlushInterval(flushInterval); - CheckpointSource checkpointSource = new DummyCheckpointSource(); - final CountDownLatch fatalLatch = new CountDownLatch(1); - LedgerDirsListener listener = new LedgerDirsListener() { - @Override - public void fatalError() { - fatalLatch.countDown(); - } - }; - - LedgerStorage storage = new DummyLedgerStorage() { - @Override - public void checkpoint(Checkpoint checkpoint) - throws IOException { - throw new RuntimeException("Fatal error in sync thread"); - } - }; - final SyncThread t = new SyncThread(conf, listener, storage, checkpointSource); - t.startCheckpoint(Checkpoint.MAX); - assertTrue("Should have called fatal error", fatalLatch.await(10, TimeUnit.SECONDS)); - t.shutdown(); - } - - /** - * Test that if the ledger storage throws - * a disk full exception, the owner of the sync - * thread will be notified. - */ - @Test - public void testSyncThreadDisksFull() throws Exception { - int flushInterval = 100; - ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); - conf.setFlushInterval(flushInterval); - CheckpointSource checkpointSource = new DummyCheckpointSource(); - final CountDownLatch diskFullLatch = new CountDownLatch(1); - LedgerDirsListener listener = new LedgerDirsListener() { - @Override - public void allDisksFull(boolean highPriorityWritesAllowed) { - diskFullLatch.countDown(); - } - }; - - LedgerStorage storage = new DummyLedgerStorage() { - @Override - public void checkpoint(Checkpoint checkpoint) - throws IOException { - throw new NoWritableLedgerDirException("Disk full error in sync thread"); - } - }; - final SyncThread t = new SyncThread(conf, listener, storage, checkpointSource); - t.startCheckpoint(Checkpoint.MAX); - assertTrue("Should have disk full error", diskFullLatch.await(10, TimeUnit.SECONDS)); - t.shutdown(); - } - - private static class DummyCheckpointSource implements CheckpointSource { - @Override - public Checkpoint newCheckpoint() { - return Checkpoint.MAX; - } - - @Override - public void checkpointComplete(Checkpoint checkpoint, boolean compact) - throws IOException { - } - } - - private static class DummyLedgerStorage implements LedgerStorage { - @Override - public void initialize( - ServerConfiguration conf, - LedgerManager ledgerManager, - LedgerDirsManager ledgerDirsManager, - LedgerDirsManager indexDirsManager, - StateManager stateManager, - CheckpointSource checkpointSource, - Checkpointer checkpointer, - StatsLogger statsLogger) - throws IOException { - } - - @Override - public void deleteLedger(long ledgerId) throws IOException { - } - - @Override - public void start() { - } - - @Override - public void shutdown() throws InterruptedException { - } - - @Override - public boolean ledgerExists(long ledgerId) throws IOException { - return true; - } - - @Override - public boolean setFenced(long ledgerId) throws IOException { - return true; - } - - @Override - public boolean isFenced(long ledgerId) throws IOException { - return false; - } - - @Override - public void setMasterKey(long ledgerId, byte[] masterKey) - throws IOException { - } - - @Override - public byte[] readMasterKey(long ledgerId) - throws IOException, BookieException { - return new byte[0]; - } - - @Override - public long addEntry(ByteBuf entry) throws IOException { - return 1L; - } - - @Override - public ByteBuf getEntry(long ledgerId, long entryId) - throws IOException { - return null; - } - - @Override - public long getLastAddConfirmed(long ledgerId) throws IOException { - return 0; - } - - @Override - public void flush() throws IOException { - } - - @Override - public void setExplicitlac(long ledgerId, ByteBuf lac) { - } - - @Override - public ByteBuf getExplicitLac(long ledgerId) { - return null; - } - - @Override - public boolean waitForLastAddConfirmedUpdate(long ledgerId, - long previousLAC, - Watcher watcher) - throws IOException { - return false; - } - - @Override - public void checkpoint(Checkpoint checkpoint) - throws IOException { - } - - @Override - public void registerLedgerDeletionListener(LedgerDeletionListener listener) { - } - } - -} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/UncleanShutdownDetectionTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/UncleanShutdownDetectionTest.java new file mode 100644 index 00000000000..3903ec75b45 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/UncleanShutdownDetectionTest.java @@ -0,0 +1,154 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.util.DiskChecker; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +/** + * Test the unclean shutdown implementation. + */ +public class UncleanShutdownDetectionTest { + + @Rule + public TemporaryFolder tempDir = new TemporaryFolder(); + + @Test + public void testRegisterStartWithoutRegisterShutdownEqualsUncleanShutdown() throws IOException { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + DiskChecker diskChecker = new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager( + conf, conf.getLedgerDirs(), diskChecker); + + UncleanShutdownDetection uncleanShutdownDetection = new UncleanShutdownDetectionImpl(ledgerDirsManager); + uncleanShutdownDetection.registerStartUp(); + + assertTrue(uncleanShutdownDetection.lastShutdownWasUnclean()); + } + + @Test + public void testRegisterStartWithRegisterShutdownEqualsCleanShutdown() throws IOException { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + DiskChecker diskChecker = new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager( + conf, conf.getLedgerDirs(), diskChecker); + + UncleanShutdownDetection uncleanShutdownDetection = new UncleanShutdownDetectionImpl(ledgerDirsManager); + uncleanShutdownDetection.registerStartUp(); + uncleanShutdownDetection.registerCleanShutdown(); + + assertFalse(uncleanShutdownDetection.lastShutdownWasUnclean()); + } + + @Test + public void testRegisterStartWithoutRegisterShutdownEqualsUncleanShutdownMultipleDirs() throws IOException { + File ledgerDir1 = tempDir.newFolder("l1"); + File ledgerDir2 = tempDir.newFolder("l2"); + File ledgerDir3 = tempDir.newFolder("l3"); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration() + .setLedgerDirNames(new String[] {ledgerDir1.getAbsolutePath(), ledgerDir2.getAbsolutePath(), + ledgerDir3.getAbsolutePath()}); + DiskChecker diskChecker = new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager( + conf, conf.getLedgerDirs(), diskChecker); + + UncleanShutdownDetection uncleanShutdownDetection = new UncleanShutdownDetectionImpl(ledgerDirsManager); + uncleanShutdownDetection.registerStartUp(); + + assertTrue(uncleanShutdownDetection.lastShutdownWasUnclean()); + } + + @Test + public void testRegisterStartWithRegisterShutdownEqualsCleanShutdownMultipleDirs() throws IOException { + File ledgerDir1 = tempDir.newFolder("l1"); + File ledgerDir2 = tempDir.newFolder("l2"); + File ledgerDir3 = tempDir.newFolder("l3"); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration() + .setLedgerDirNames(new String[] {ledgerDir1.getAbsolutePath(), ledgerDir2.getAbsolutePath(), + ledgerDir3.getAbsolutePath()}); + DiskChecker diskChecker = new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager( + conf, conf.getLedgerDirs(), diskChecker); + + UncleanShutdownDetection uncleanShutdownDetection = new UncleanShutdownDetectionImpl(ledgerDirsManager); + uncleanShutdownDetection.registerStartUp(); + uncleanShutdownDetection.registerCleanShutdown(); + + assertFalse(uncleanShutdownDetection.lastShutdownWasUnclean()); + } + + @Test + public void testRegisterStartWithPartialRegisterShutdownEqualsUncleanShutdownMultipleDirs() throws IOException { + File ledgerDir1 = tempDir.newFolder("l1"); + File ledgerDir2 = tempDir.newFolder("l2"); + File ledgerDir3 = tempDir.newFolder("l3"); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration() + .setLedgerDirNames(new String[] {ledgerDir1.getAbsolutePath(), ledgerDir2.getAbsolutePath(), + ledgerDir3.getAbsolutePath()}); + DiskChecker diskChecker = new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager( + conf, conf.getLedgerDirs(), diskChecker); + + UncleanShutdownDetection uncleanShutdownDetection = new UncleanShutdownDetectionImpl(ledgerDirsManager); + uncleanShutdownDetection.registerStartUp(); + uncleanShutdownDetection.registerCleanShutdown(); + File dirtyFile = new File(ledgerDirsManager.getAllLedgerDirs().get(0), + UncleanShutdownDetectionImpl.DIRTY_FILENAME); + dirtyFile.createNewFile(); + + assertTrue(uncleanShutdownDetection.lastShutdownWasUnclean()); + } + + @Test(expected = IOException.class) + public void testRegisterStartFailsToCreateDirtyFilesAndThrowsIOException() throws IOException { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + DiskChecker diskChecker = new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()); + LedgerDirsManager ledgerDirsManager = new MockLedgerDirsManager(conf, conf.getLedgerDirs(), diskChecker); + + UncleanShutdownDetection uncleanShutdownDetection = new UncleanShutdownDetectionImpl(ledgerDirsManager); + uncleanShutdownDetection.registerStartUp(); + } + + private class MockLedgerDirsManager extends LedgerDirsManager { + public MockLedgerDirsManager(ServerConfiguration conf, File[] dirs, DiskChecker diskChecker) + throws IOException { + super(conf, dirs, diskChecker); + } + + @Override + public List getAllLedgerDirs() { + List dirs = new ArrayList<>(); + dirs.add(new File("does_not_exist")); + return dirs; + } + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/UpdateCookieCmdTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/UpdateCookieCmdTest.java index e0d37bbc57f..51671140d9f 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/UpdateCookieCmdTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/UpdateCookieCmdTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -32,7 +32,7 @@ import org.apache.bookkeeper.meta.MetadataBookieDriver; import org.apache.bookkeeper.meta.MetadataDrivers; import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; -import org.apache.bookkeeper.proto.BookieServer; +import org.apache.bookkeeper.server.Main; import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.apache.bookkeeper.versioning.Version; @@ -51,9 +51,11 @@ public class UpdateCookieCmdTest extends BookKeeperClusterTestCase { MetadataBookieDriver driver; RegistrationManager rm; + ServerConfiguration conf; public UpdateCookieCmdTest() { - super(1); + super(0); + useUUIDasBookieId = false; } @Override @@ -63,13 +65,20 @@ public void setUp() throws Exception { baseConf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); driver = MetadataDrivers.getBookieDriver( URI.create(baseConf.getMetadataServiceUri())); - driver.initialize(baseConf, () -> {}, NullStatsLogger.INSTANCE); - rm = driver.getRegistrationManager(); + driver.initialize(baseConf, NullStatsLogger.INSTANCE); + rm = driver.createRegistrationManager(); + + conf = newServerConfiguration(); + LegacyCookieValidation validation = new LegacyCookieValidation(conf, rm); + validation.checkCookies(Main.storageDirectoriesFromConf(conf)); } @Override public void tearDown() throws Exception { super.tearDown(); + if (rm != null) { + rm.close(); + } if (driver != null) { driver.close(); } @@ -101,9 +110,9 @@ public void testUpdateCookieHostnameToIpAddress() throws Exception { updateCookie("-b", "ip", false); // start bookie to ensure everything works fine - ServerConfiguration conf = bsConfs.get(0); - BookieServer restartBookie = startBookie(conf); - restartBookie.shutdown(); + conf.setUseHostNameAsBookieID(false); + LegacyCookieValidation validation = new LegacyCookieValidation(conf, rm); + validation.checkCookies(Main.storageDirectoriesFromConf(conf)); } /** @@ -112,7 +121,7 @@ public void testUpdateCookieHostnameToIpAddress() throws Exception { @Test public void testUpdateCookieWithInvalidOption() throws Exception { String[] argv = new String[] { "updatecookie", "-b", "invalidBookieID" }; - final ServerConfiguration conf = bsConfs.get(0); + final ServerConfiguration conf = this.conf; updateCookie(argv, -1, conf); argv = new String[] { "updatecookie", "-b" }; @@ -142,13 +151,15 @@ public void testWhenBothIPaddressAndHostNameCookiesExists() throws Exception { updateCookie("-b", "hostname", true); // creates cookie with ipaddress - ServerConfiguration conf = bsConfs.get(0); + final ServerConfiguration conf = this.conf; conf.setUseHostNameAsBookieID(true); // sets to hostname Cookie cookie = Cookie.readFromRegistrationManager(rm, conf).getValue(); Cookie.Builder cookieBuilder = Cookie.newBuilder(cookie); conf.setUseHostNameAsBookieID(false); // sets to hostname - final String newBookieHost = Bookie.getBookieAddress(conf).toString(); - cookieBuilder.setBookieHost(newBookieHost); + + final String newBookieHost = BookieImpl.getBookieAddress(conf).toString(); + cookieBuilder.setBookieId(newBookieHost); + cookieBuilder.build().writeToRegistrationManager(rm, conf, Version.NEW); verifyCookieInZooKeeper(conf, 2); @@ -172,19 +183,15 @@ public void testWhenBothIPaddressAndHostNameCookiesExists() throws Exception { @Test public void testDuplicateUpdateCookieIpAddress() throws Exception { String[] argv = new String[] { "updatecookie", "-b", "ip" }; - final ServerConfiguration conf = bsConfs.get(0); + final ServerConfiguration conf = this.conf; conf.setUseHostNameAsBookieID(true); updateCookie(argv, -1, conf); } @Test public void testWhenNoCookieExists() throws Exception { - ServerConfiguration conf = bsConfs.get(0); - BookieServer bks = bs.get(0); - bks.shutdown(); - String zkCookiePath = ZKMetadataDriverBase.resolveZkLedgersRootPath(conf) - + "/" + COOKIE_NODE + "/" + Bookie.getBookieAddress(conf); + + "/" + COOKIE_NODE + "/" + BookieImpl.getBookieAddress(conf); Assert.assertNotNull("Cookie path doesn't still exists!", zkc.exists(zkCookiePath, false)); zkc.delete(zkCookiePath, -1); Assert.assertNull("Cookie path still exists!", zkc.exists(zkCookiePath, false)); @@ -210,10 +217,6 @@ private void updateCookie(String option, String optionVal, boolean useHostNameAs private void updateCookie(String option, String optionVal, boolean useHostNameAsBookieID, boolean useShortHostName) throws Exception { - ServerConfiguration conf = new ServerConfiguration(bsConfs.get(0)); - BookieServer bks = bs.get(0); - bks.shutdown(); - conf.setUseHostNameAsBookieID(!useHostNameAsBookieID); Cookie cookie = Cookie.readFromRegistrationManager(rm, conf).getValue(); final boolean previousBookieID = cookie.isBookieHostCreatedFromIp(); @@ -236,11 +239,11 @@ private void updateCookie(String option, String optionVal, boolean useHostNameAs verifyCookieInZooKeeper(newconf, 1); for (File journalDir : conf.getJournalDirs()) { - journalDir = Bookie.getCurrentDirectory(journalDir); + journalDir = BookieImpl.getCurrentDirectory(journalDir); Cookie jCookie = Cookie.readFromDirectory(journalDir); jCookie.verify(cookie); } - File[] ledgerDir = Bookie.getCurrentDirectories(conf.getLedgerDirs()); + File[] ledgerDir = BookieImpl.getCurrentDirectories(conf.getLedgerDirs()); for (File dir : ledgerDir) { Cookie lCookie = Cookie.readFromDirectory(dir); lCookie.verify(cookie); @@ -249,9 +252,6 @@ private void updateCookie(String option, String optionVal, boolean useHostNameAs private void updateCookie(String[] argv, int exitCode, ServerConfiguration conf) throws KeeperException, InterruptedException, IOException, UnknownHostException, Exception { - BookieServer bks = bs.get(0); - bks.shutdown(); - LOG.info("Perform updatecookie command"); BookieShell bkShell = new BookieShell(); bkShell.setConf(conf); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/UpgradeTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/UpgradeTest.java index 0ddd9a6a61d..5db649e53ad 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/UpgradeTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/UpgradeTest.java @@ -21,29 +21,33 @@ package org.apache.bookkeeper.bookie; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; - import java.io.BufferedWriter; import java.io.File; import java.io.FileOutputStream; +import java.io.IOException; import java.io.OutputStreamWriter; import java.io.PrintStream; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.util.Arrays; - +import java.util.List; import org.apache.bookkeeper.client.ClientUtil; import org.apache.bookkeeper.client.LedgerHandle; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.meta.MetadataBookieDriver; +import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; -import org.apache.bookkeeper.test.PortManager; import org.apache.bookkeeper.util.IOUtils; +import org.apache.bookkeeper.util.PortManager; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -60,6 +64,28 @@ public UpgradeTest() { super(0); } + static void writeLedgerDirWithIndexDir(File ledgerDir, + File indexDir, + byte[] masterKey) + throws Exception { + long ledgerId = 1; + + File fn = new File(indexDir, IndexPersistenceMgr.getLedgerName(ledgerId)); + fn.getParentFile().mkdirs(); + FileInfo fi = new FileInfo(fn, masterKey, FileInfo.CURRENT_HEADER_VERSION); + // force creation of index file + fi.write(new ByteBuffer[]{ ByteBuffer.allocate(0) }, 0); + fi.close(true); + + long logId = 0; + ByteBuffer logfileHeader = ByteBuffer.allocate(1024); + logfileHeader.put("BKLO".getBytes()); + FileChannel logfile = new RandomAccessFile( + new File(ledgerDir, Long.toHexString(logId) + ".log"), "rw").getChannel(); + logfile.write((ByteBuffer) logfileHeader.clear()); + logfile.close(); + } + static void writeLedgerDir(File dir, byte[] masterKey) throws Exception { @@ -110,18 +136,22 @@ static JournalChannel writeJournal(File journalDir, int numEntries, byte[] maste return jc; } - static File newV1JournalDirectory() throws Exception { - File d = IOUtils.createTempDir("bookie", "tmpdir"); + static File initV1JournalDirectory(File d) throws Exception { writeJournal(d, 100, "foobar".getBytes()).close(); return d; } - static File newV1LedgerDirectory() throws Exception { - File d = IOUtils.createTempDir("bookie", "tmpdir"); + static File initV1LedgerDirectory(File d) throws Exception { writeLedgerDir(d, "foobar".getBytes()); return d; } + static File initV1LedgerDirectoryWithIndexDir(File ledgerDir, + File indexDir) throws Exception { + writeLedgerDirWithIndexDir(ledgerDir, indexDir, "foobar".getBytes()); + return ledgerDir; + } + static void createVersion2File(File dir) throws Exception { File versionFile = new File(dir, "VERSION"); @@ -138,81 +168,144 @@ static void createVersion2File(File dir) throws Exception { } } - static File newV2JournalDirectory() throws Exception { - File d = newV1JournalDirectory(); - createVersion2File(d); + static File initV2JournalDirectory(File d) throws Exception { + createVersion2File(initV1JournalDirectory(d)); return d; } - static File newV2LedgerDirectory() throws Exception { - File d = newV1LedgerDirectory(); - createVersion2File(d); + static File initV2LedgerDirectory(File d) throws Exception { + createVersion2File(initV1LedgerDirectory(d)); return d; } - private static void testUpgradeProceedure(String zkServers, String journalDir, String ledgerDir) throws Exception { + static File initV2LedgerDirectoryWithIndexDir(File ledgerDir, File indexDir) throws Exception { + initV1LedgerDirectoryWithIndexDir(ledgerDir, indexDir); + createVersion2File(ledgerDir); + createVersion2File(indexDir); + return ledgerDir; + } + + private static void testUpgradeProcedure(String zkServers, String journalDir, String ledgerDir, String indexDir) + throws Exception { ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); conf.setMetadataServiceUri("zk://" + zkServers + "/ledgers"); conf.setJournalDirName(journalDir) - .setLedgerDirNames(new String[] { ledgerDir }) - .setBookiePort(bookiePort); + .setLedgerDirNames(new String[]{ledgerDir}) + .setIndexDirName(new String[]{indexDir}) + .setBookiePort(bookiePort); Bookie b = null; - try { - b = new Bookie(conf); + + try (MetadataBookieDriver metadataDriver = BookieResources.createMetadataDriver( + conf, NullStatsLogger.INSTANCE); + RegistrationManager rm = metadataDriver.createRegistrationManager()) { + TestBookieImpl.Resources resources = new TestBookieImpl.ResourceBuilder(conf) + .withMetadataDriver(metadataDriver).withRegistrationManager(rm).build(); + b = new TestBookieImpl(resources); fail("Shouldn't have been able to start"); - } catch (BookieException.InvalidCookieException e) { + } catch (IOException e) { // correct behaviour assertTrue("wrong exception", e.getMessage().contains("upgrade needed")); } FileSystemUpgrade.upgrade(conf); // should work fine - b = new Bookie(conf); - b.start(); - b.shutdown(); + try (MetadataBookieDriver metadataDriver = BookieResources.createMetadataDriver( + conf, NullStatsLogger.INSTANCE); + RegistrationManager rm = metadataDriver.createRegistrationManager()) { + TestBookieImpl.Resources resources = new TestBookieImpl.ResourceBuilder(conf) + .withMetadataDriver(metadataDriver).withRegistrationManager(rm).build(); + b = new TestBookieImpl(resources); + b.start(); + b.shutdown(); + } b = null; FileSystemUpgrade.rollback(conf); - try { - b = new Bookie(conf); + try (MetadataBookieDriver metadataDriver = BookieResources.createMetadataDriver( + conf, NullStatsLogger.INSTANCE); + RegistrationManager rm = metadataDriver.createRegistrationManager()) { + TestBookieImpl.Resources resources = new TestBookieImpl.ResourceBuilder(conf) + .withMetadataDriver(metadataDriver).withRegistrationManager(rm).build(); + b = new TestBookieImpl(resources); fail("Shouldn't have been able to start"); - } catch (BookieException.InvalidCookieException e) { + } catch (IOException e) { // correct behaviour assertTrue("wrong exception", e.getMessage().contains("upgrade needed")); } FileSystemUpgrade.upgrade(conf); FileSystemUpgrade.finalizeUpgrade(conf); - b = new Bookie(conf); - b.start(); - b.shutdown(); + try (MetadataBookieDriver metadataDriver = BookieResources.createMetadataDriver( + conf, NullStatsLogger.INSTANCE); + RegistrationManager rm = metadataDriver.createRegistrationManager()) { + TestBookieImpl.Resources resources = new TestBookieImpl.ResourceBuilder(conf) + .withMetadataDriver(metadataDriver).withRegistrationManager(rm).build(); + b = new TestBookieImpl(resources); + b.start(); + b.shutdown(); + } b = null; } @Test public void testUpgradeV1toCurrent() throws Exception { - File journalDir = newV1JournalDirectory(); - tmpDirs.add(journalDir); - File ledgerDir = newV1LedgerDirectory(); - tmpDirs.add(ledgerDir); - testUpgradeProceedure(zkUtil.getZooKeeperConnectString(), journalDir.getPath(), ledgerDir.getPath()); + File journalDir = initV1JournalDirectory(tmpDirs.createNew("bookie", "journal")); + File ledgerDir = initV1LedgerDirectory(tmpDirs.createNew("bookie", "ledger")); + testUpgradeProcedure(zkUtil.getZooKeeperConnectString(), journalDir.getPath(), + ledgerDir.getPath(), ledgerDir.getPath()); + } + + @Test + public void testUpgradeV1toCurrentWithIndexDir() throws Exception { + File journalDir = initV1JournalDirectory(tmpDirs.createNew("bookie", "journal")); + File indexDir = tmpDirs.createNew("bookie", "index"); + File ledgerDir = initV1LedgerDirectoryWithIndexDir( + tmpDirs.createNew("bookie", "ledger"), indexDir); + testUpgradeProcedure(zkUtil.getZooKeeperConnectString(), journalDir.getPath(), + ledgerDir.getPath(), indexDir.getPath()); } @Test public void testUpgradeV2toCurrent() throws Exception { - File journalDir = newV2JournalDirectory(); - tmpDirs.add(journalDir); - File ledgerDir = newV2LedgerDirectory(); - tmpDirs.add(ledgerDir); - testUpgradeProceedure(zkUtil.getZooKeeperConnectString(), journalDir.getPath(), ledgerDir.getPath()); + File journalDir = initV2JournalDirectory(tmpDirs.createNew("bookie", "journal")); + File ledgerDir = initV2LedgerDirectory(tmpDirs.createNew("bookie", "ledger")); + File indexDir = tmpDirs.createNew("bookie", "index"); + testUpgradeProcedure(zkUtil.getZooKeeperConnectString(), journalDir.getPath(), + ledgerDir.getPath(), indexDir.getPath()); + } + + @Test + public void testUpgradeV2toCurrentWithIndexDir() throws Exception { + File journalDir = initV2JournalDirectory(tmpDirs.createNew("bookie", "journal")); + File indexDir = tmpDirs.createNew("bookie", "index"); + File ledgerDir = initV2LedgerDirectoryWithIndexDir( + tmpDirs.createNew("bookie", "ledger"), indexDir); + testUpgradeProcedure(zkUtil.getZooKeeperConnectString(), journalDir.getPath(), + ledgerDir.getPath(), indexDir.getPath()); } @Test public void testUpgradeCurrent() throws Exception { - File journalDir = newV2JournalDirectory(); - tmpDirs.add(journalDir); - File ledgerDir = newV2LedgerDirectory(); - tmpDirs.add(ledgerDir); - testUpgradeProceedure(zkUtil.getZooKeeperConnectString(), journalDir.getPath(), ledgerDir.getPath()); + testUpgradeCurrent(false); + } + + @Test + public void testUpgradeCurrentWithIndexDir() throws Exception { + testUpgradeCurrent(true); + } + + public void testUpgradeCurrent(boolean hasIndexDir) throws Exception { + File journalDir = initV2JournalDirectory(tmpDirs.createNew("bookie", "journal")); + File ledgerDir = tmpDirs.createNew("bookie", "ledger"); + File indexDir = ledgerDir; + if (hasIndexDir) { + indexDir = tmpDirs.createNew("bookie", "index"); + initV2LedgerDirectoryWithIndexDir(ledgerDir, indexDir); + } else { + initV2LedgerDirectory(ledgerDir); + } + + testUpgradeProcedure(zkUtil.getZooKeeperConnectString(), journalDir.getPath(), + ledgerDir.getPath(), indexDir.getPath()); // Upgrade again ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); @@ -221,7 +314,13 @@ public void testUpgradeCurrent() throws Exception { .setBookiePort(bookiePort) .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); FileSystemUpgrade.upgrade(conf); // should work fine with current directory - Bookie b = new Bookie(conf); + MetadataBookieDriver metadataDriver = BookieResources.createMetadataDriver( + conf, NullStatsLogger.INSTANCE); + RegistrationManager rm = metadataDriver.createRegistrationManager(); + + TestBookieImpl.Resources resources = new TestBookieImpl.ResourceBuilder(conf) + .withMetadataDriver(metadataDriver).withRegistrationManager(rm).build(); + Bookie b = new TestBookieImpl(resources); b.start(); b.shutdown(); } @@ -259,4 +358,43 @@ public void testCommandLine() throws Exception { System.setErr(origerr); } } + + @Test + public void testFSUGetAllDirectories() throws Exception { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + final File journalDir = tmpDirs.createNew("bookie", "journal"); + final File ledgerDir1 = tmpDirs.createNew("bookie", "ledger"); + final File ledgerDir2 = tmpDirs.createNew("bookie", "ledger"); + + // test1 + conf.setJournalDirName(journalDir.getPath()) + .setLedgerDirNames(new String[]{ledgerDir1.getPath(), ledgerDir2.getPath()}) + .setIndexDirName(new String[]{ledgerDir1.getPath(), ledgerDir2.getPath()}); + List allDirectories = FileSystemUpgrade.getAllDirectories(conf); + assertEquals(3, allDirectories.size()); + + // test2 + conf.setJournalDirName(journalDir.getPath()) + .setLedgerDirNames(new String[]{ledgerDir1.getPath(), ledgerDir2.getPath()}) + .setIndexDirName(new String[]{ledgerDir2.getPath(), ledgerDir1.getPath()}); + allDirectories = FileSystemUpgrade.getAllDirectories(conf); + assertEquals(3, allDirectories.size()); + + final File indexDir1 = tmpDirs.createNew("bookie", "index"); + final File indexDir2 = tmpDirs.createNew("bookie", "index"); + + // test3 + conf.setJournalDirName(journalDir.getPath()) + .setLedgerDirNames(new String[]{ledgerDir1.getPath(), ledgerDir2.getPath()}) + .setIndexDirName(new String[]{indexDir1.getPath(), indexDir2.getPath()}); + allDirectories = FileSystemUpgrade.getAllDirectories(conf); + assertEquals(5, allDirectories.size()); + + // test4 + conf.setJournalDirName(journalDir.getPath()) + .setLedgerDirNames(new String[]{ledgerDir1.getPath(), ledgerDir2.getPath()}) + .setIndexDirName(new String[]{indexDir2.getPath(), indexDir1.getPath()}); + allDirectories = FileSystemUpgrade.getAllDirectories(conf); + assertEquals(5, allDirectories.size()); + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/datainteg/CookieValidationTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/datainteg/CookieValidationTest.java new file mode 100644 index 00000000000..158bde903fc --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/datainteg/CookieValidationTest.java @@ -0,0 +1,326 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.datainteg; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.notNullValue; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +import com.google.common.collect.Lists; +import java.io.File; +import java.io.FileOutputStream; +import java.net.UnknownHostException; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.Cookie; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.discover.MockRegistrationManager; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.test.TmpDirs; +import org.apache.bookkeeper.util.BookKeeperConstants; +import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; +import org.junit.After; +import org.junit.Assert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Test the DataIntegrityCookieValidation implementation of CookieValidation. + */ +@SuppressWarnings("deprecation") +public class CookieValidationTest { + private static Logger log = LoggerFactory.getLogger(CookieValidationTest.class); + final TmpDirs tmpDirs = new TmpDirs(); + + @After + public void cleanup() throws Exception { + tmpDirs.cleanup(); + } + + private File initializedDir() throws Exception { + File dir = tmpDirs.createNew("cookie", "validation"); + BookieImpl.checkDirectoryStructure(BookieImpl.getCurrentDirectory(dir)); + return dir; + } + + private static ServerConfiguration serverConf(boolean stampMissingCookies) { + ServerConfiguration conf = new ServerConfiguration(); + conf.setDataIntegrityStampMissingCookiesEnabled(stampMissingCookies); + conf.setAdvertisedAddress("foobar"); + return conf; + } + + private Versioned genCookie(ServerConfiguration conf) throws UnknownHostException { + return new Versioned<>(Cookie.generateCookie(conf).build().toString() + .getBytes(StandardCharsets.UTF_8), Version.NEW); + } + + @Test + public void testNoZkCookieAndEmptyDirsStampsNewCookie() throws Exception { + List dirs = Lists.newArrayList(initializedDir(), + initializedDir()); + + ServerConfiguration conf = serverConf(false); + BookieId bookieId = BookieImpl.getBookieId(conf); + MockRegistrationManager regManager = new MockRegistrationManager(); + DataIntegrityCookieValidation v = new DataIntegrityCookieValidation( + conf, regManager, new MockDataIntegrityCheck()); + v.checkCookies(dirs); + + byte[] cookieBytes = regManager.readCookie(bookieId).getValue(); + assertThat(cookieBytes, notNullValue()); + assertThat(cookieBytes.length, greaterThan(0)); + + Cookie regManagerCookie = Cookie.parseFromBytes(cookieBytes); + + for (File d : dirs) { + assertThat(Cookie.readFromDirectory(d), equalTo(regManagerCookie)); + } + } + + @Test(expected = BookieException.InvalidCookieException.class) + public void testZkCookieAndEmptyDirsRaisesErrorWithoutMissingCookieStamping() throws Exception { + List dirs = Lists.newArrayList(initializedDir(), + initializedDir()); + + ServerConfiguration conf = serverConf(false); + BookieId bookieId = BookieImpl.getBookieId(conf); + MockRegistrationManager regManager = new MockRegistrationManager(); + regManager.writeCookie(bookieId, genCookie(conf)); + DataIntegrityCookieValidation v = new DataIntegrityCookieValidation( + conf, regManager, new MockDataIntegrityCheck()); + v.checkCookies(dirs); + } + + @Test + public void testZkCookieAndEmptyDirsStampsNewCookieWithMissingCookieStamping() throws Exception { + List dirs = Lists.newArrayList(initializedDir(), + initializedDir()); + + ServerConfiguration conf = serverConf(true); + BookieId bookieId = BookieImpl.getBookieId(conf); + MockRegistrationManager regManager = new MockRegistrationManager(); + regManager.writeCookie(bookieId, genCookie(conf)); + DataIntegrityCookieValidation v = new DataIntegrityCookieValidation( + conf, regManager, new MockDataIntegrityCheck()); + v.checkCookies(dirs); + + byte[] cookieBytes = regManager.readCookie(bookieId).getValue(); + assertThat(cookieBytes, notNullValue()); + assertThat(cookieBytes.length, greaterThan(0)); + + Cookie regManagerCookie = Cookie.parseFromBytes(cookieBytes); + + for (File d : dirs) { + assertThat(Cookie.readFromDirectory(d), equalTo(regManagerCookie)); + } + } + + @Test(expected = BookieException.InvalidCookieException.class) + public void testMissingZKCookieRaisesError() throws Exception { + List dirs = Lists.newArrayList(initializedDir(), + initializedDir()); + + ServerConfiguration conf = serverConf(true); + + MockRegistrationManager regManager = new MockRegistrationManager(); + DataIntegrityCookieValidation v1 = new DataIntegrityCookieValidation( + conf, regManager, new MockDataIntegrityCheck()); + v1.checkCookies(dirs); + + MockRegistrationManager blankRegManager = new MockRegistrationManager(); + DataIntegrityCookieValidation v2 = new DataIntegrityCookieValidation( + conf, blankRegManager, new MockDataIntegrityCheck()); + v2.checkCookies(dirs); + } + + @Test + public void testMatchingCookiesTakesNoAction() throws Exception { + List dirs = Lists.newArrayList(initializedDir(), + initializedDir()); + + ServerConfiguration conf = serverConf(true); + + MockRegistrationManager regManager = new MockRegistrationManager(); + DataIntegrityCookieValidation v1 = new DataIntegrityCookieValidation( + conf, regManager, new MockDataIntegrityCheck()); + v1.checkCookies(dirs); // stamp original cookies + + DataIntegrityCookieValidation v2 = new DataIntegrityCookieValidation( + conf, regManager, new MockDataIntegrityCheck()); + v2.checkCookies(dirs); // should find cookies and return successfully + } + + @Test + public void testEmptyDirectoryTriggersIntegrityCheck() throws Exception { + List dirs = Lists.newArrayList(initializedDir(), + initializedDir()); + ServerConfiguration conf = serverConf(true); + + MockRegistrationManager regManager = new MockRegistrationManager(); + MockDataIntegrityCheck dataIntegCheck = spy(new MockDataIntegrityCheck()); + DataIntegrityCookieValidation v1 = new DataIntegrityCookieValidation( + conf, regManager, dataIntegCheck); + v1.checkCookies(dirs); // stamp original cookies + verify(dataIntegCheck, times(0)).runPreBootCheck("INVALID_COOKIE"); + + dirs.add(initializedDir()); + v1.checkCookies(dirs); // stamp original cookies + verify(dataIntegCheck, times(1)).runPreBootCheck("INVALID_COOKIE"); + + v1.checkCookies(dirs); // stamp original cookies + verify(dataIntegCheck, times(1)).runPreBootCheck("INVALID_COOKIE"); + } + + @Test + public void testErrorInIntegrityCheckPreventsStamping() throws Exception { + List dirs = Lists.newArrayList(initializedDir(), + initializedDir()); + + ServerConfiguration conf = serverConf(true); + + MockRegistrationManager regManager = spy(new MockRegistrationManager()); + MockDataIntegrityCheck dataIntegCheck = spy(new MockDataIntegrityCheck() { + @Override + public CompletableFuture runPreBootCheck(String reason) { + return FutureUtils.exception(new BookieException.InvalidCookieException("blah")); + } + }); + + DataIntegrityCookieValidation v1 = new DataIntegrityCookieValidation( + conf, regManager, dataIntegCheck); + + v1.checkCookies(dirs); // stamp original cookies + verify(dataIntegCheck, times(0)).runPreBootCheck("INVALID_COOKIE"); + verify(regManager, times(1)).writeCookie(any(), any()); + + // add a directory to trigger data integrity check + dirs.add(initializedDir()); + try { + v1.checkCookies(dirs); // stamp original cookies + Assert.fail("failure of data integrity should fail cookie check"); + } catch (BookieException.InvalidCookieException e) { + // expected + } + verify(dataIntegCheck, times(1)).runPreBootCheck("INVALID_COOKIE"); + verify(regManager, times(1)).writeCookie(any(), any()); + + // running the check again should run data integrity again, as stamping didn't happen + try { + v1.checkCookies(dirs); // stamp original cookies + Assert.fail("failure of data integrity should fail cookie check"); + } catch (BookieException.InvalidCookieException e) { + // expected + } + verify(dataIntegCheck, times(2)).runPreBootCheck("INVALID_COOKIE"); + verify(regManager, times(1)).writeCookie(any(), any()); + } + + @Test + public void testChangingBookieIdRaisesError() throws Exception { + List dirs = Lists.newArrayList(initializedDir(), + initializedDir()); + ServerConfiguration conf = serverConf(true); + MockRegistrationManager regManager = new MockRegistrationManager(); + DataIntegrityCookieValidation v1 = new DataIntegrityCookieValidation( + conf, regManager, new MockDataIntegrityCheck()); + v1.checkCookies(dirs); // stamp original cookies + + conf.setAdvertisedAddress("barfoo"); + DataIntegrityCookieValidation v2 = new DataIntegrityCookieValidation( + conf, regManager, new MockDataIntegrityCheck()); + try { + v2.checkCookies(dirs); // should fail as cookie not found in ZK, but exists in dirs + Assert.fail("Check shouldn't have succeeded with new bookieId"); + } catch (BookieException.InvalidCookieException ice) { + // expected + } + + conf.setAdvertisedAddress("foobar"); + DataIntegrityCookieValidation v3 = new DataIntegrityCookieValidation( + conf, regManager, new MockDataIntegrityCheck()); + v3.checkCookies(dirs); // should succeed as the cookie is same as before + } + + @Test + public void testMismatchLocalCookie() throws Exception { + List dirs = Lists.newArrayList(initializedDir(), + initializedDir()); + + ServerConfiguration conf = serverConf(true); + + MockDataIntegrityCheck dataIntegCheck = spy(new MockDataIntegrityCheck()); + MockRegistrationManager regManager = spy(new MockRegistrationManager()); + DataIntegrityCookieValidation v1 = new DataIntegrityCookieValidation( + conf, regManager, dataIntegCheck); + v1.checkCookies(dirs); // stamp original cookies + + verify(dataIntegCheck, times(0)).runPreBootCheck("INVALID_COOKIE"); + verify(regManager, times(1)).writeCookie(any(), any()); + + Cookie current = Cookie.readFromDirectory(dirs.get(0)); + Cookie mismatch = Cookie.newBuilder(current).setBookieId("mismatch:3181").build(); + mismatch.writeToDirectory(dirs.get(0)); + assertThat(current, not(Cookie.readFromDirectory(dirs.get(0)))); + + v1.checkCookies(dirs); + verify(dataIntegCheck, times(1)).runPreBootCheck("INVALID_COOKIE"); + verify(regManager, times(2)).writeCookie(any(), any()); + + Cookie afterCheck = Cookie.readFromDirectory(dirs.get(0)); + assertThat(afterCheck, equalTo(current)); + } + + @Test(expected = BookieException.InvalidCookieException.class) + public void testCorruptLocalCookie() throws Exception { + List dirs = Lists.newArrayList(initializedDir(), + initializedDir()); + + ServerConfiguration conf = serverConf(true); + + MockDataIntegrityCheck dataIntegCheck = spy(new MockDataIntegrityCheck()); + MockRegistrationManager regManager = spy(new MockRegistrationManager()); + DataIntegrityCookieValidation v1 = new DataIntegrityCookieValidation( + conf, regManager, dataIntegCheck); + v1.checkCookies(dirs); // stamp original cookies + + verify(dataIntegCheck, times(0)).runPreBootCheck("INVALID_COOKIE"); + verify(regManager, times(1)).writeCookie(any(), any()); + + File cookieFile = new File(dirs.get(0), BookKeeperConstants.VERSION_FILENAME); + try (FileOutputStream out = new FileOutputStream(cookieFile)) { + out.write(0xdeadbeef); + } + v1.checkCookies(dirs); // should throw + } +} + diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/datainteg/DataIntegrityCheckTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/datainteg/DataIntegrityCheckTest.java new file mode 100644 index 00000000000..6d4c7a122f0 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/datainteg/DataIntegrityCheckTest.java @@ -0,0 +1,1540 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.datainteg; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.isIn; +import static org.hamcrest.Matchers.not; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyInt; +import static org.mockito.Mockito.anyLong; +import static org.mockito.Mockito.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import io.netty.buffer.ByteBuf; +import io.reactivex.rxjava3.core.Single; +import io.reactivex.rxjava3.exceptions.CompositeException; +import io.reactivex.rxjava3.observers.TestObserver; +import io.reactivex.rxjava3.schedulers.Schedulers; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; +import java.util.stream.LongStream; +import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.LedgerStorage.StorageState; +import org.apache.bookkeeper.bookie.MockLedgerStorage; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.client.LedgerMetadataBuilder; +import org.apache.bookkeeper.client.api.DigestType; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.common.util.MockTicker; +import org.apache.bookkeeper.common.util.OrderedExecutor; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.meta.MockLedgerManager; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.MockBookieClient; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +/** + * Test of DataIntegrityCheckImpl. + */ +@SuppressWarnings("deprecation") +public class DataIntegrityCheckTest { + private static final byte[] PASSWD = new byte[0]; + + private final BookieId bookie1 = BookieId.parse("bookie1:3181"); + private final BookieId bookie2 = BookieId.parse("bookie2:3181"); + private final BookieId bookie3 = BookieId.parse("bookie3:3181"); + private final BookieId bookie4 = BookieId.parse("bookie4:3181"); + private final BookieId bookie5 = BookieId.parse("bookie5:3181"); + + private OrderedExecutor executor = null; + + @Before + public void setup() throws Exception { + executor = OrderedExecutor.newBuilder().numThreads(1).name("test").build(); + } + + @After + public void teardown() throws Exception { + if (executor != null) { + executor.shutdownNow(); + } + } + + private static ServerConfiguration serverConf() { + ServerConfiguration conf = new ServerConfiguration(); + conf.setAdvertisedAddress("foobar"); + return conf; + } + + private LedgerMetadataBuilder newMetadataWithEnsemble( + long ledgerId, + BookieId... bookies) { + return LedgerMetadataBuilder.create() + .withId(ledgerId) + .withPassword(new byte[0]) + .withDigestType(DigestType.CRC32C) + .withEnsembleSize(bookies.length) + .withWriteQuorumSize(bookies.length) + .withAckQuorumSize(bookies.length) + .newEnsembleEntry(0, Lists.newArrayList(bookies)); + } + + private LedgerMetadataBuilder newClosedMetadataWithEnsemble(long ledgerId, + long numEntries, + BookieId... bookies) { + return LedgerMetadataBuilder.create() + .withId(ledgerId) + .withPassword(new byte[0]) + .withDigestType(DigestType.CRC32C) + .withEnsembleSize(bookies.length) + .withWriteQuorumSize(bookies.length) + .withAckQuorumSize(bookies.length) + .newEnsembleEntry(0, Lists.newArrayList(bookies)) + .withLastEntryId(numEntries - 1) + .withLength(128 * numEntries) + .withClosedState(); + } + + @Test + public void testPrebootBookieIdInOpenSegmentMarkedInLimbo() throws Exception { + MockLedgerManager lm = new MockLedgerManager(); + + ServerConfiguration conf = serverConf(); + BookieId bookieId = BookieImpl.getBookieId(conf); + lm.createLedgerMetadata(0xbeefL, newMetadataWithEnsemble(0xbeefL, bookieId).build()).get(); + + MockLedgerStorage storage = new MockLedgerStorage(); + assertThat(storage.ledgerExists(0xbeefL), is(false)); + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(BookieImpl.getBookieId(conf), lm, storage, + mock(EntryCopier.class), + mock(BookKeeperAdmin.class), + Schedulers.io()); + impl.runPreBootCheck("test").get(); + + assertThat(storage.hasLimboState(0xbeefL), is(true)); + assertThat(storage.isFenced(0xbeefL), is(true)); + } + + @Test + public void testPrebootFencedMarkedInLimbo() throws Exception { + MockLedgerManager lm = new MockLedgerManager(); + + ServerConfiguration conf = serverConf(); + BookieId bookieId = BookieImpl.getBookieId(conf); + lm.createLedgerMetadata(0xbeefL, + newMetadataWithEnsemble(0xbeefL, + BookieImpl.getBookieId(conf)).withInRecoveryState().build()).get(); + + MockLedgerStorage storage = new MockLedgerStorage(); + assertThat(storage.ledgerExists(0xbeefL), is(false)); + + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookieId, lm, storage, + mock(EntryCopier.class), + mock(BookKeeperAdmin.class), + Schedulers.io()); + impl.runPreBootCheck("test").get(); + + assertThat(storage.hasLimboState(0xbeefL), is(true)); + assertThat(storage.isFenced(0xbeefL), is(true)); + } + + @Test + public void testPrebootClosedNotMarkedInLimbo() throws Exception { + MockLedgerManager lm = new MockLedgerManager(); + + ServerConfiguration conf = serverConf(); + BookieId bookieId = BookieImpl.getBookieId(conf); + lm.createLedgerMetadata(0xbeefL, + newMetadataWithEnsemble(0xbeefL, BookieImpl.getBookieId(conf)).withClosedState() + .withLength(100).withLastEntryId(1).build()).get(); + + MockLedgerStorage storage = new MockLedgerStorage(); + assertThat(storage.ledgerExists(0xbeefL), is(false)); + + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookieId, lm, storage, + mock(EntryCopier.class), + mock(BookKeeperAdmin.class), + Schedulers.io()); + impl.runPreBootCheck("test").get(); + + assertThat(storage.hasLimboState(0xbeefL), is(false)); + assertThat(storage.isFenced(0xbeefL), is(false)); + } + + @Test + public void testPrebootFlushCalled() throws Exception { + MockLedgerManager lm = new MockLedgerManager(); + + ServerConfiguration conf = serverConf(); + BookieId bookieId = BookieImpl.getBookieId(conf); + lm.createLedgerMetadata(0xbeefL, newMetadataWithEnsemble( + 0xbeefL, BookieImpl.getBookieId(conf)).build()).get(); + + MockLedgerStorage storage = spy(new MockLedgerStorage()); + assertThat(storage.ledgerExists(0xbeefL), is(false)); + + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookieId, lm, storage, + mock(EntryCopier.class), + mock(BookKeeperAdmin.class), + Schedulers.io()); + verify(storage, times(0)).flush(); + impl.runPreBootCheck("test").get(); + verify(storage, times(1)).flush(); + + assertThat(storage.hasLimboState(0xbeefL), is(true)); + assertThat(storage.isFenced(0xbeefL), is(true)); + } + + @Test(expected = ExecutionException.class) + public void testFailureInPrebootMarkFailsAll() throws Exception { + MockLedgerManager lm = new MockLedgerManager(); + + ServerConfiguration conf = serverConf(); + BookieId bookieId = BookieImpl.getBookieId(conf); + lm.createLedgerMetadata(0xbeedL, newMetadataWithEnsemble(0xbeedL, bookieId).build()).get(); + lm.createLedgerMetadata(0xbeefL, newMetadataWithEnsemble(0xbeefL, bookieId).build()).get(); + lm.createLedgerMetadata(0xbee0L, newMetadataWithEnsemble(0xbee0L, bookieId).build()).get(); + + MockLedgerStorage storage = new MockLedgerStorage() { + @Override + public void setLimboState(long ledgerId) throws IOException { + if (ledgerId == 0xbeefL) { + throw new IOException("boom!"); + } else { + super.setLimboState(ledgerId); + } + } + }; + + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookieId, lm, storage, + mock(EntryCopier.class), + mock(BookKeeperAdmin.class), + Schedulers.io()); + impl.runPreBootCheck("test").get(); + } + + @Test + public void testRecoverLimboOpensAndClears() throws Exception { + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + BookieId bookieId = BookieImpl.getBookieId(conf); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookieId, lm, storage, + mock(EntryCopier.class), + mock(BookKeeperAdmin.class), + Schedulers.io()) { + @Override + Single recoverLedger(long ledgerId, String runId) { + return Single.just(newClosedMetadataWithEnsemble(ledgerId, -1, bookieId, bookie1).build()); + } + }; + + Map ledgers = new HashMap<>(); + ledgers.put(0xf00L, newMetadataWithEnsemble(0xf00L, bookieId, bookie1).build()); + storage.setMasterKey(0xf00L, PASSWD); + storage.setLimboState(0xf00L); + ledgers.put(0xdeadL, newMetadataWithEnsemble(0xdeadL, bookieId, bookie1).build()); + storage.setMasterKey(0xdeadL, PASSWD); + storage.setLimboState(0xdeadL); + + Set results = impl.checkAndRecoverLedgers( + ledgers, "test").get(); + + assertThat(results.stream().filter(r -> r.isOK()).count(), equalTo(2L)); + verify(storage, times(1)).clearLimboState(0xf00L); + verify(storage, times(1)).clearLimboState(0xdeadL); + } + + @Test + public void testRecoverLimboErrorOnOpenOnlyAffectsThatOne() throws Exception { + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + BookieId bookieId = BookieImpl.getBookieId(conf); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookieId, lm, storage, + mock(EntryCopier.class), + mock(BookKeeperAdmin.class), + Schedulers.io()) { + @Override + Single recoverLedger(long ledgerId, String runId) { + if (ledgerId == 0xf00L) { + return Single.error(new BKException.BKReadException()); + } else { + return Single.just(newClosedMetadataWithEnsemble(ledgerId, 0, bookieId, bookie1).build()); + } + } + }; + + Map ledgers = new HashMap<>(); + ledgers.put(0xf00L, newMetadataWithEnsemble(0xf00L, bookieId, bookie1).build()); + storage.setMasterKey(0xf00L, PASSWD); + storage.setLimboState(0xf00L); + ledgers.put(0xdeadL, newMetadataWithEnsemble(0xdeadL, bookieId, bookie1).build()); + storage.setMasterKey(0xdeadL, PASSWD); + storage.setLimboState(0xdeadL); + + Set results = impl.checkAndRecoverLedgers(ledgers, "test").get(); + + assertThat(results.stream().filter(r -> r.isOK()).count(), equalTo(1L)); + assertThat(results.stream().filter(r -> r.isOK()).map(r -> r.getLedgerId()).findFirst().get(), + equalTo(0xdeadL)); + assertThat(results.stream().filter(r -> r.isError()).count(), equalTo(1L)); + assertThat(results.stream().filter(r -> r.isError()).map(r -> r.getLedgerId()).findFirst().get(), + equalTo(0xf00L)); + + verify(storage, times(0)).clearLimboState(0xf00L); + verify(storage, times(1)).clearLimboState(0xdeadL); + } + + @Test + public void testRecoverLimboNoSuchLedger() throws Exception { + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + BookieId bookieId = BookieImpl.getBookieId(conf); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookieId, lm, storage, + mock(EntryCopier.class), + mock(BookKeeperAdmin.class), + Schedulers.io()) { + @Override + Single recoverLedger(long ledgerId, String runId) { + if (ledgerId == 0xdeadL) { + return Single.error( + new BKException.BKNoSuchLedgerExistsOnMetadataServerException()); + } else { + return Single.just(newClosedMetadataWithEnsemble(ledgerId, -1, bookieId, bookie1).build()); + } + } + }; + + Map ledgers = new HashMap<>(); + ledgers.put(0xf00L, newMetadataWithEnsemble(0xf00L, bookieId, bookie1).build()); + storage.setMasterKey(0xf00L, PASSWD); + storage.setLimboState(0xf00L); + ledgers.put(0xdeadL, newMetadataWithEnsemble(0xdeadL, bookieId, bookie1).build()); + storage.setMasterKey(0xdeadL, PASSWD); + storage.setLimboState(0xdeadL); + + Set results = impl.checkAndRecoverLedgers(ledgers, "test").get(); + + assertThat(results.stream().filter(r -> r.isOK()).count(), equalTo(1L)); + assertThat(results.stream().filter(r -> r.isOK()).map(r -> r.getLedgerId()).findFirst().get(), + equalTo(0xf00L)); + assertThat(results.stream().filter(r -> r.isMissing()).count(), equalTo(1L)); + assertThat(results.stream().filter(r -> r.isMissing()).map(r -> r.getLedgerId()).findFirst().get(), + equalTo(0xdeadL)); + + verify(storage, times(1)).clearLimboState(0xf00L); + verify(storage, times(0)).clearLimboState(0xdeadL); + } + + @Test + public void testRecoverLimboClearStateFailure() throws Exception { + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + BookieId bookieId = BookieImpl.getBookieId(conf); + MockLedgerStorage storage = spy(new MockLedgerStorage() { + @Override + public void clearLimboState(long ledgerId) throws IOException { + if (ledgerId == 0xf00L) { + throw new IOException("foobar"); + } + } + }); + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookieId, lm, storage, + mock(EntryCopier.class), + mock(BookKeeperAdmin.class), + Schedulers.io()) { + @Override + Single recoverLedger(long ledgerId, String runId) { + return Single.just(newClosedMetadataWithEnsemble(ledgerId, -1, bookieId, bookie1).build()); + } + }; + Map ledgers = new HashMap<>(); + ledgers.put(0xf00L, newMetadataWithEnsemble(0xf00L, bookieId, bookie1).build()); + storage.setMasterKey(0xf00L, PASSWD); + storage.setLimboState(0xf00L); + ledgers.put(0xdeadL, newMetadataWithEnsemble(0xdeadL, bookieId, bookie1).build()); + storage.setMasterKey(0xdeadL, PASSWD); + storage.setLimboState(0xdeadL); + + Set results = impl.checkAndRecoverLedgers(ledgers, "test").get(); + + verify(storage, times(0)).flush(); + } + + // TODO: what is this test? +// @Test +// public void testRecoverLimboFlushFailure() throws Exception { +// MockLedgerManager lm = new MockLedgerManager(); +// ServerConfiguration conf = serverConf(); +// BookieId bookieId = BookieImpl.getBookieId(conf); +// MockLedgerStorage storage = spy(new MockLedgerStorage() { +// @Override +// public void flush() throws IOException { +// throw new IOException("foobar"); +// } +// }); +// DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookieId, lm, storage, +// mock(EntryCopier.class), +// mock(BookKeeperAdmin.class), +// Schedulers.io()) { +// @Override +// CompletableFuture recoverLedgerIgnoreMissing(long ledgerId) { +// return CompletableFuture.completedFuture(ledgerId); +// } +// }; +// Set ledgers = Sets.newHashSet(0xf00L, 0xdeadL); +// +// try { +// impl.recoverLedgersInLimbo(ledgers).get(); +// Assert.fail("Shouldn't continue on an IOException"); +// } catch (ExecutionException ee) { +// assertThat(ee.getCause(), instanceOf(IOException.class)); +// } +// assertThat(results.stream().filter(r -> r.isOK()).count(), equalTo(1L)); +// assertThat(results.stream().filter(r -> r.isOK()).map(r -> r.getLedgerId()).findFirst().get(), +// equalTo(0xdeadL)); +// assertThat(results.stream().filter(r -> r.isError()).count(), equalTo(1L)); +// assertThat(results.stream().filter(r -> r.isError()).map(r -> r.getLedgerId()).findFirst().get(), +// equalTo(0xf00L)); +// +// verify(storage, times(1)).clearLimboState(0xf00L); +// verify(storage, times(1)).clearLimboState(0xdeadL); +// } + + @Test + public void testRecoverLimboManyLedgers() throws Exception { + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + BookieId bookieId = BookieImpl.getBookieId(conf); + List cleared = new ArrayList<>(); + MockLedgerStorage storage = spy(new MockLedgerStorage() { + @Override + public void clearLimboState(long ledgerId) { + // not using spy for this because it takes 10ms per ledger to verify + cleared.add(ledgerId); + } + }); + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookieId, lm, storage, + mock(EntryCopier.class), + mock(BookKeeperAdmin.class), + Schedulers.io()) { + @Override + Single recoverLedger(long ledgerId, String runId) { + return Single.just(newClosedMetadataWithEnsemble(ledgerId, -1, bookieId, bookie1).build()); + } + }; + final long numLedgers = 10000; + long first = 1; + long last = first + numLedgers; + + Map ledgers = new HashMap<>(); + for (long i = first; i < last; i++) { + LedgerMetadata metadata = newMetadataWithEnsemble(i, bookieId, bookie1).build(); + ledgers.put(i, metadata); + storage.setMasterKey(i, metadata.getPassword()); + storage.setLimboState(i); + } + assertThat(ledgers.size(), equalTo((int) numLedgers)); + + Set results = impl.checkAndRecoverLedgers(ledgers, "test").get(); + assertThat(results.size(), equalTo((int) numLedgers)); + assertThat(results.stream().filter(r -> r.isOK()).count(), equalTo(numLedgers)); + for (DataIntegrityCheckImpl.LedgerResult r : results) { + assertThat(r.isOK(), equalTo(true)); + ledgers.remove(r.getLedgerId()); + } + assertThat(ledgers.isEmpty(), equalTo(true)); + + Set clearedSet = Sets.newHashSet(cleared); + assertThat(clearedSet.size(), equalTo(cleared.size())); + for (long l : LongStream.range(first, last).toArray()) { + assertThat(l, isIn(clearedSet)); + } + verify(storage, times(10000)).clearLimboState(anyLong()); + } + + @Test + public void testRecoverLimboManyLedgersErrorOnFirst() throws Exception { + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + BookieId bookieId = BookieImpl.getBookieId(conf); + List cleared = new ArrayList<>(); + MockLedgerStorage storage = spy(new MockLedgerStorage() { + @Override + public void clearLimboState(long ledgerId) { + // not using spy for this because it takes 10ms per ledger to verify + cleared.add(ledgerId); + } + }); + + final long numLedgers = 100; + long first = 1; + long last = first + numLedgers; + + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookieId, lm, storage, + mock(EntryCopier.class), + mock(BookKeeperAdmin.class), + Schedulers.io()) { + @Override + Single recoverLedger(long ledgerId, String runId) { + if (ledgerId == first) { + return Single.error( + new BKException.BKBookieHandleNotAvailableException()); + } else { + return Single.just(newClosedMetadataWithEnsemble(ledgerId, -1, bookieId, bookie1).build()); + } + } + }; + Map ledgers = new HashMap<>(); + for (long i = first; i < last; i++) { + LedgerMetadata metadata = newMetadataWithEnsemble(i, bookieId, bookie1).build(); + ledgers.put(i, metadata); + storage.setMasterKey(i, metadata.getPassword()); + storage.setLimboState(i); + } + assertThat(ledgers.size(), equalTo((int) numLedgers)); + + Set results = impl.checkAndRecoverLedgers(ledgers, "test").get(); + assertThat(results.size(), equalTo((int) numLedgers)); + assertThat(results.stream().filter(r -> r.isOK()).count(), equalTo(numLedgers - 1)); + assertThat(results.stream().filter(r -> r.isError()).count(), equalTo(1L)); + assertThat(results.stream().filter(r -> r.isError()).map(r -> r.getLedgerId()).findFirst().get(), + equalTo(first)); + Set clearedSet = Sets.newHashSet(cleared); + assertThat(clearedSet.size(), equalTo(cleared.size())); + for (long l : LongStream.range(first, last).toArray()) { + if (l == first) { + assertThat(l, not(isIn(clearedSet))); + } else { + assertThat(l, isIn(clearedSet)); + } + } + verify(storage, times((int) numLedgers - 1)).clearLimboState(anyLong()); + } + + @Test + public void testRecoverLimboNoLedgers() throws Exception { + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + BookieId bookieId = BookieImpl.getBookieId(conf); + List cleared = new ArrayList<>(); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookieId, lm, storage, + mock(EntryCopier.class), + mock(BookKeeperAdmin.class), + Schedulers.io()) { + @Override + Single recoverLedger(long ledgerId, String runId) { + return Single.just(newClosedMetadataWithEnsemble(ledgerId, -1, bookieId, bookie1).build()); + } + }; + ImmutableMap ledgers = ImmutableMap.of(); + Set resolved = + impl.checkAndRecoverLedgers(ledgers, "test").get(10, TimeUnit.SECONDS); + assertThat(resolved.isEmpty(), equalTo(true)); + verify(storage, times(0)).clearLimboState(anyLong()); + } + + + @Test + public void testRecoverSingleLedgerEntriesOnLedgerIDontHave() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookie1, lm, storage, + copier, + mock(BookKeeperAdmin.class), + Schedulers.io()); + long id1 = 0xdeadL; + LedgerMetadata metadata1 = newClosedMetadataWithEnsemble(id1, 1000, bookie3, bookie2).build(); + bookieClient.getMockBookies().seedLedger(id1, metadata1); + + assertThat(storage.ledgerExists(id1), equalTo(false)); + + TestObserver observer = TestObserver.create(); + impl.checkAndRecoverLedgerEntries(id1, metadata1, "test").subscribe(observer); + observer.await().assertNoErrors(); + + assertThat(storage.ledgerExists(id1), equalTo(true)); // because we passed it in + for (long i = 0; i <= metadata1.getLastEntryId(); i++) { + assertThat(storage.entryExists(id1, i), equalTo(false)); + } + } + + @Test + public void testRecoverSingleLedgerNotClosedOneEnsemble() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookie1, lm, storage, + copier, + mock(BookKeeperAdmin.class), + Schedulers.io()); + long id1 = 0xdeadL; + LedgerMetadata metadata1 = newMetadataWithEnsemble(id1, bookie1, bookie2).build(); + bookieClient.getMockBookies().seedLedgerForBookie(bookie2, id1, metadata1); + + assertThat(storage.ledgerExists(id1), equalTo(false)); + + TestObserver observer = TestObserver.create(); + impl.checkAndRecoverLedgerEntries(id1, metadata1, "test").subscribe(observer); + observer.await().assertNoErrors(); + + LedgerMetadata md1 = newMetadataWithEnsemble(id1, bookie1).build(); + assertThat(storage.ledgerExists(id1), equalTo(false)); + } + + @Test + public void testRecoverSingleLedgerNoClosedMultiEnsembleBookieInClosed() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookie1, lm, storage, + copier, + mock(BookKeeperAdmin.class), + Schedulers.io()); + long id1 = 0xdeadL; + LedgerMetadata metadata1 = newMetadataWithEnsemble(id1, bookie1, bookie2) + .newEnsembleEntry(10L, Lists.newArrayList(bookie3, bookie2)).build(); + bookieClient.getMockBookies().seedLedgerForBookie(bookie2, id1, metadata1); + bookieClient.getMockBookies().seedLedgerForBookie(bookie3, id1, metadata1); + + assertThat(storage.ledgerExists(id1), equalTo(false)); + + TestObserver observer = TestObserver.create(); + impl.checkAndRecoverLedgerEntries(id1, metadata1, "test").subscribe(observer); + observer.await().assertNoErrors(); + + assertThat(storage.ledgerExists(id1), equalTo(true)); + for (long e = 0; e < 10; e++) { + assertThat(storage.entryExists(id1, e), equalTo(true)); + } + assertThat(storage.entryExists(id1, 10), equalTo(false)); + } + + @Test + public void testRecoverSingleLedgerNotClosedMultiEnsembleBookieInFinal() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookie1, lm, storage, + copier, + mock(BookKeeperAdmin.class), + Schedulers.io()); + long id1 = 0xdeadL; + LedgerMetadata metadata1 = newMetadataWithEnsemble(id1, bookie3, bookie2) + .newEnsembleEntry(10L, Lists.newArrayList(bookie1, bookie2)).build(); + bookieClient.getMockBookies().seedLedgerForBookie(bookie2, id1, metadata1); + bookieClient.getMockBookies().seedLedgerForBookie(bookie3, id1, metadata1); + + assertThat(storage.ledgerExists(id1), equalTo(false)); + + TestObserver observer = TestObserver.create(); + impl.checkAndRecoverLedgerEntries(id1, metadata1, "test").subscribe(observer); + observer.await().assertNoErrors(); + + assertThat(storage.ledgerExists(id1), equalTo(true)); + } + + @Test + public void testRecoverSingleLedgerLargeEnsembleStriped() throws Exception { + + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + + EntryCopier copier = new EntryCopierImpl(bookie4, bookieClient, storage, new MockTicker()); + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookie4, lm, storage, + copier, + mock(BookKeeperAdmin.class), + Schedulers.io()); + long id1 = 0xdeadL; + LedgerMetadata metadata1 = LedgerMetadataBuilder.create() + .withId(id1) + .withPassword(new byte[0]) + .withDigestType(DigestType.CRC32C) + .withEnsembleSize(5) + .withWriteQuorumSize(2) + .withAckQuorumSize(2) + .newEnsembleEntry(0, Lists.newArrayList(bookie1, bookie2, bookie3, bookie4, bookie5)) + .withClosedState().withLastEntryId(10).withLength(1000) + .build(); + bookieClient.getMockBookies().seedLedgerForBookie(bookie1, id1, metadata1); + bookieClient.getMockBookies().seedLedgerForBookie(bookie2, id1, metadata1); + bookieClient.getMockBookies().seedLedgerForBookie(bookie3, id1, metadata1); + bookieClient.getMockBookies().seedLedgerForBookie(bookie5, id1, metadata1); + + assertThat(storage.ledgerExists(id1), equalTo(false)); + + TestObserver observer = TestObserver.create(); + impl.checkAndRecoverLedgerEntries(id1, metadata1, "test").subscribe(observer); + observer.await().assertNoErrors(); + + assertThat(storage.ledgerExists(id1), equalTo(true)); + assertThat(storage.entryExists(id1, 0), equalTo(false)); + assertThat(storage.entryExists(id1, 1), equalTo(false)); + assertThat(storage.entryExists(id1, 2), equalTo(true)); + assertThat(storage.entryExists(id1, 3), equalTo(true)); + assertThat(storage.entryExists(id1, 4), equalTo(false)); + assertThat(storage.entryExists(id1, 5), equalTo(false)); + assertThat(storage.entryExists(id1, 6), equalTo(false)); + assertThat(storage.entryExists(id1, 7), equalTo(true)); + assertThat(storage.entryExists(id1, 8), equalTo(true)); + assertThat(storage.entryExists(id1, 9), equalTo(false)); + assertThat(storage.entryExists(id1, 10), equalTo(false)); + } + + @Test + public void testRecoverSingleLedgerEntriesOnlyEntriesNeeded() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookie1, lm, storage, + copier, + mock(BookKeeperAdmin.class), + Schedulers.io()); + long id1 = 0xdeadL; + LedgerMetadata metadata1 = newClosedMetadataWithEnsemble(id1, 1000, bookie3, bookie2) + .newEnsembleEntry(10, Lists.newArrayList(bookie1, bookie2)) + .newEnsembleEntry(100, Lists.newArrayList(bookie3, bookie2)).build(); + bookieClient.getMockBookies().seedLedgerForBookie(bookie2, id1, metadata1); + bookieClient.getMockBookies().seedLedgerForBookie(bookie3, id1, metadata1); + + assertThat(storage.ledgerExists(id1), equalTo(false)); + + TestObserver observer = TestObserver.create(); + impl.checkAndRecoverLedgerEntries(id1, metadata1, "test").subscribe(observer); + observer.await().assertNoErrors(); + + assertThat(storage.ledgerExists(id1), equalTo(true)); + assertThat(storage.entryExists(id1, 9), equalTo(false)); + for (long e = 10; e < 100; e++) { + assertThat(storage.entryExists(id1, e), equalTo(true)); + } + assertThat(storage.entryExists(id1, 100), equalTo(false)); + } + + @Test + public void testRecoverSingleLedgerEntriesOnlyEntriesNeededEverySecond() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookie1, lm, storage, + copier, + mock(BookKeeperAdmin.class), + Schedulers.io()); + long id1 = 0xdeadL; + LedgerMetadata metadata1 = newClosedMetadataWithEnsemble(id1, 1000, bookie1, bookie2).build(); + bookieClient.getMockBookies().seedLedgerForBookie(bookie2, id1, metadata1); + long added = 0; + storage.setMasterKey(id1, PASSWD); + for (long e = 0; e <= metadata1.getLastEntryId(); e++) { + if (e % 2 == 0) { + storage.addEntry(bookieClient.getMockBookies().generateEntry(id1, e, e - 1)); + added++; + } + } + assertThat(storage.ledgerExists(id1), equalTo(true)); + + TestObserver observer = TestObserver.create(); + impl.checkAndRecoverLedgerEntries(id1, metadata1, "test").subscribe(observer); + observer.await().assertNoErrors(); + + for (long e = 0; e <= metadata1.getLastEntryId(); e++) { + if (e % 2 == 0) { + verify(bookieClient, times(0)).readEntry(any(), eq(id1), eq(e), + any(), any(), anyInt()); + } + if (e % 2 == 1) { + verify(bookieClient, times(1)).readEntry(any(), eq(id1), eq(e), + any(), any(), anyInt()); + } + + assertThat(storage.entryExists(id1, e), equalTo(true)); + } + } + + @Test + public void testRecoverSingleLedgerErrorAtStart() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookie1, lm, storage, + copier, + mock(BookKeeperAdmin.class), + Schedulers.io()); + long id1 = 0xdeadL; + LedgerMetadata metadata1 = newClosedMetadataWithEnsemble(id1, 1000, bookie1, bookie2).build(); + + // only seed for ledger1 & ledger3 + bookieClient.getMockBookies().seedLedgerForBookie(bookie2, id1, metadata1); + bookieClient.setPreReadHook((bookie, ledger, entry) -> { + if (entry == 0L) { + return FutureUtils.exception(new BKException.BKReadException()); + } else { + return CompletableFuture.completedFuture(null); + } + }); + + TestObserver observer = TestObserver.create(); + impl.checkAndRecoverLedgerEntries(id1, metadata1, "test").subscribe(observer); + observer.await().assertError((t) -> { + return t instanceof BKException.BKReadException; + }); + assertThat(storage.entryExists(id1, 0), equalTo(false)); + for (long e = 1; e <= metadata1.getLastEntryId(); e++) { + assertThat(storage.entryExists(id1, e), equalTo(true)); + } + } + + @Test + public void testRecoverSingleLedgerErrorEverySecond() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookie1, lm, storage, + copier, + mock(BookKeeperAdmin.class), + Schedulers.io()); + long id1 = 0xdeadL; + LedgerMetadata metadata1 = newClosedMetadataWithEnsemble(id1, 1000, bookie1, bookie2).build(); + + // only seed for ledger1 & ledger3 + bookieClient.getMockBookies().seedLedgerForBookie(bookie2, id1, metadata1); + bookieClient.setPreReadHook((bookie, ledger, entry) -> { + if (entry % 2 == 0) { + return FutureUtils.exception(new BKException.BKReadException()); + } else { + return CompletableFuture.completedFuture(null); + } + }); + + TestObserver observer = TestObserver.create(); + impl.checkAndRecoverLedgerEntries(id1, metadata1, "test").subscribe(observer); + observer.await().assertError((t) -> { + if (t instanceof CompositeException) { + CompositeException e = (CompositeException) t; + for (Throwable t2 : e.getExceptions()) { + if (!(t2 instanceof BKException.BKReadException)) { + return false; + } + } + return e.getExceptions().size() == 500; + } else { + return false; + } + }); + for (long e = 0; e <= metadata1.getLastEntryId(); e++) { + if (e % 2 == 0) { + assertThat(storage.entryExists(id1, e), equalTo(false)); + } else { + assertThat(storage.entryExists(id1, e), equalTo(true)); + } + } + } + + @Test + public void testRecoverSingleLedgerErrorOneOnStore() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + MockLedgerStorage storage = spy(new MockLedgerStorage() { + @Override + public long addEntry(ByteBuf entry) throws IOException, BookieException { + long entryId = extractEntryId(entry); + if (entryId > 10 && entryId <= 100) { + throw new IOException("Don't feel like storing these"); + } + return super.addEntry(entry); + } + }); + + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookie1, lm, storage, + copier, + mock(BookKeeperAdmin.class), + Schedulers.io()); + long id1 = 0xdeadL; + LedgerMetadata metadata1 = newClosedMetadataWithEnsemble(id1, 1000, bookie1, bookie2).build(); + // only seed for ledger1 & ledger3 + bookieClient.getMockBookies().seedLedgerForBookie(bookie2, id1, metadata1); + + TestObserver observer = TestObserver.create(); + impl.checkAndRecoverLedgerEntries(id1, metadata1, "test").subscribe(observer); + observer.await().assertError((t) -> { + if (t instanceof CompositeException) { + CompositeException e = (CompositeException) t; + for (Throwable t2 : e.getExceptions()) { + boolean failStore = t2 instanceof IOException; + if (!failStore) { + return false; + } + } + return e.getExceptions().size() == 90; + } else { + return false; + } + }); + for (long e = 0; e <= 10; e++) { + assertThat(storage.entryExists(id1, e), equalTo(true)); + } + for (long e = 11; e <= 100; e++) { + assertThat(storage.entryExists(id1, e), equalTo(false)); + } + for (long e = 101; e <= metadata1.getLastEntryId(); e++) { + assertThat(storage.entryExists(id1, e), equalTo(true)); + } + } + + @Test + public void testRecoverMultiLedgers() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookie1, lm, storage, + copier, + mock(BookKeeperAdmin.class), + Schedulers.io()); + long id1 = 0xdeadL; + long id2 = 0xbedeL; + long id3 = 0xbebeL; + LedgerMetadata metadata1 = newClosedMetadataWithEnsemble(id1, 1000, bookie1, bookie2).build(); + LedgerMetadata metadata2 = newClosedMetadataWithEnsemble(id2, 1000, bookie1, bookie3).build(); + LedgerMetadata metadata3 = newClosedMetadataWithEnsemble(id3, 1000, bookie1, bookie3).build(); + + bookieClient.getMockBookies().seedLedgerForBookie(bookie2, id1, metadata1); + bookieClient.getMockBookies().seedLedgerForBookie(bookie3, id2, metadata2); + bookieClient.getMockBookies().seedLedgerForBookie(bookie3, id3, metadata3); + + assertThat(storage.ledgerExists(id1), equalTo(false)); + assertThat(storage.ledgerExists(id2), equalTo(false)); + assertThat(storage.ledgerExists(id3), equalTo(false)); + Map ledgers = ImmutableMap.of( + id1, metadata1, id2, metadata2, id3, metadata3); + Set resolved = + impl.checkAndRecoverLedgers(ledgers, "test").get(10, TimeUnit.SECONDS); + assertThat(resolved.stream().filter(r -> r.isOK()).count(), equalTo(3L)); + assertThat(resolved.stream().filter(r -> r.isOK()).map(r -> r.getLedgerId()) + .collect(Collectors.toSet()), containsInAnyOrder(id1, id2, id3)); + for (long e = 0; e <= metadata1.getLastEntryId(); e++) { + assertThat(storage.entryExists(id1, e), equalTo(true)); + assertThat(storage.entryExists(id2, e), equalTo(true)); + assertThat(storage.entryExists(id3, e), equalTo(true)); + } + } + + @Test + public void testRecoverMultiLedgersOneUnavailable() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookie1, lm, storage, + copier, + mock(BookKeeperAdmin.class), + Schedulers.io()); + long id1 = 0xdeadL; + long id2 = 0xbedeL; + long id3 = 0xbebeL; + LedgerMetadata metadata1 = newClosedMetadataWithEnsemble(id1, 1000, bookie1, bookie2).build(); + LedgerMetadata metadata2 = newClosedMetadataWithEnsemble(id2, 1000, bookie1, bookie3).build(); + LedgerMetadata metadata3 = newClosedMetadataWithEnsemble(id3, 1000, bookie1, bookie3).build(); + + // id2 will be unavailable because there's no entries + bookieClient.getMockBookies().seedLedgerForBookie(bookie2, id1, metadata1); + bookieClient.getMockBookies().seedLedgerForBookie(bookie3, id3, metadata3); + + assertThat(storage.ledgerExists(id1), equalTo(false)); + assertThat(storage.ledgerExists(id2), equalTo(false)); + assertThat(storage.ledgerExists(id3), equalTo(false)); + + Map ledgers = ImmutableMap.of( + id1, metadata1, id2, metadata2, id3, metadata3); + Set resolved = + impl.checkAndRecoverLedgers(ledgers, "test").get(10, TimeUnit.SECONDS); + assertThat(resolved.stream().filter(r -> r.isOK()).count(), equalTo(2L)); + assertThat(resolved.stream().filter(r -> r.isError()).count(), equalTo(1L)); + assertThat(resolved.stream().filter(r -> r.isOK()).map(r -> r.getLedgerId()) + .collect(Collectors.toSet()), containsInAnyOrder(id1, id3)); + for (long e = 0; e <= metadata1.getLastEntryId(); e++) { + assertThat(storage.entryExists(id1, e), equalTo(true)); + assertThat(storage.entryExists(id3, e), equalTo(true)); + } + } + + @Test + public void testRecoverMultiLedgersOneFailsToWriteLocally() throws Exception { + long id1 = 0xdeadL; + long id2 = 0xbedeL; + long id3 = 0xbebeL; + + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + MockLedgerStorage storage = spy(new MockLedgerStorage() { + @Override + public long addEntry(ByteBuf entry) throws IOException, BookieException { + if (extractLedgerId(entry) == id1 + && extractEntryId(entry) == 3) { + throw new IOException("Don't feel like storing this"); + } + return super.addEntry(entry); + } + }); + + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookie1, lm, storage, + copier, + mock(BookKeeperAdmin.class), + Schedulers.io()); + LedgerMetadata metadata1 = newClosedMetadataWithEnsemble(id1, 1000, bookie1, bookie2).build(); + LedgerMetadata metadata2 = newClosedMetadataWithEnsemble(id2, 1000, bookie1, bookie3).build(); + LedgerMetadata metadata3 = newClosedMetadataWithEnsemble(id3, 1000, bookie1, bookie3).build(); + + bookieClient.getMockBookies().seedLedgerForBookie(bookie2, id1, metadata1); + bookieClient.getMockBookies().seedLedgerForBookie(bookie3, id2, metadata2); + bookieClient.getMockBookies().seedLedgerForBookie(bookie3, id3, metadata3); + + assertThat(storage.ledgerExists(id1), equalTo(false)); + assertThat(storage.ledgerExists(id2), equalTo(false)); + assertThat(storage.ledgerExists(id3), equalTo(false)); + + Map ledgers = ImmutableMap.of( + id1, metadata1, id2, metadata2, id3, metadata3); + + Set resolved = + impl.checkAndRecoverLedgers(ledgers, "test").get(10, TimeUnit.SECONDS); + assertThat(resolved.stream().filter(r -> r.isOK()).count(), equalTo(2L)); + assertThat(resolved.stream().filter(r -> r.isOK()) + .map(r -> r.getLedgerId()).collect(Collectors.toSet()), + containsInAnyOrder(id2, id3)); + assertThat(resolved.stream().filter(r -> r.isError()) + .map(r -> r.getLedgerId()).collect(Collectors.toSet()), + containsInAnyOrder(id1)); + + for (long e = 0; e <= metadata1.getLastEntryId(); e++) { + assertThat(storage.entryExists(id1, e), equalTo(e != 3)); + assertThat(storage.entryExists(id2, e), equalTo(true)); + assertThat(storage.entryExists(id3, e), equalTo(true)); + } + } + + @Test + public void testRecoverMultiLedgersAllUnavailable() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookie1, lm, storage, + copier, + mock(BookKeeperAdmin.class), + Schedulers.io()); + long id1 = 0xdeadL; + long id2 = 0xbedeL; + long id3 = 0xbebeL; + LedgerMetadata metadata1 = newClosedMetadataWithEnsemble(id1, 1000, bookie1, bookie2).build(); + LedgerMetadata metadata2 = newClosedMetadataWithEnsemble(id2, 1000, bookie1, bookie3).build(); + LedgerMetadata metadata3 = newClosedMetadataWithEnsemble(id3, 1000, bookie1, bookie3).build(); + + assertThat(storage.ledgerExists(id1), equalTo(false)); + assertThat(storage.ledgerExists(id2), equalTo(false)); + assertThat(storage.ledgerExists(id3), equalTo(false)); + + Map ledgers = ImmutableMap.of( + id1, metadata1, id2, metadata2, id3, metadata3); + + Set resolved = + impl.checkAndRecoverLedgers(ledgers, "test").get(10, TimeUnit.SECONDS); + assertThat(resolved.stream().filter(r -> r.isOK()).count(), equalTo(0L)); + assertThat(resolved.stream().filter(r -> r.isError()).count(), equalTo(3L)); + assertThat(storage.ledgerExists(id1), equalTo(true)); + assertThat(storage.entryExists(id1, 0), equalTo(false)); + assertThat(storage.ledgerExists(id2), equalTo(true)); + assertThat(storage.entryExists(id2, 0), equalTo(false)); + assertThat(storage.ledgerExists(id3), equalTo(true)); + assertThat(storage.entryExists(id3, 0), equalTo(false)); + } + + @Test + public void testEnsemblesContainBookie() throws Exception { + LedgerMetadata md1 = newMetadataWithEnsemble(1, bookie1).build(); + assertThat(DataIntegrityCheckImpl.ensemblesContainBookie(md1, bookie1), equalTo(true)); + assertThat(DataIntegrityCheckImpl.ensemblesContainBookie(md1, bookie2), equalTo(false)); + assertThat(DataIntegrityCheckImpl.ensemblesContainBookie(md1, bookie3), equalTo(false)); + + LedgerMetadata md2 = newMetadataWithEnsemble(2, bookie1, bookie2) + .newEnsembleEntry(1, Lists.newArrayList(bookie2, bookie3)).build(); + assertThat(DataIntegrityCheckImpl.ensemblesContainBookie(md2, bookie1), equalTo(true)); + assertThat(DataIntegrityCheckImpl.ensemblesContainBookie(md2, bookie2), equalTo(true)); + assertThat(DataIntegrityCheckImpl.ensemblesContainBookie(md2, bookie3), equalTo(true)); + + LedgerMetadata md3 = newMetadataWithEnsemble(3, bookie1, bookie2) + .newEnsembleEntry(1, Lists.newArrayList(bookie2, bookie1)).build(); + assertThat(DataIntegrityCheckImpl.ensemblesContainBookie(md3, bookie1), equalTo(true)); + assertThat(DataIntegrityCheckImpl.ensemblesContainBookie(md3, bookie2), equalTo(true)); + assertThat(DataIntegrityCheckImpl.ensemblesContainBookie(md3, bookie3), equalTo(false)); + } + + @Test + public void testMetadataCacheLoad() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookie1, lm, storage, + copier, + mock(BookKeeperAdmin.class), + Schedulers.io()); + long id1 = 0xdeadL; + long id2 = 0xbedeL; + long id3 = 0xbebeL; + LedgerMetadata metadata1 = newClosedMetadataWithEnsemble(id1, 1000, bookie1, bookie2).build(); + LedgerMetadata metadata2 = newClosedMetadataWithEnsemble(id2, 1000, bookie1, bookie3).build(); + LedgerMetadata metadata3 = newClosedMetadataWithEnsemble(id3, 1000, bookie1, bookie3).build(); + + lm.createLedgerMetadata(id1, metadata1).get(); + lm.createLedgerMetadata(id2, metadata2).get(); + lm.createLedgerMetadata(id3, metadata3).get(); + + Map ledgers = impl.getCachedOrReadMetadata("test").get(); + assertThat(ledgers.keySet(), containsInAnyOrder(id1, id2, id3)); + } + + @Test + public void testFullCheckCacheLoadAndProcessIfEmpty() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookie1, lm, storage, + copier, + mock(BookKeeperAdmin.class), + Schedulers.io()); + long id1 = 0xdeadL; + long id2 = 0xbedeL; + long id3 = 0xbebeL; + LedgerMetadata metadata1 = newClosedMetadataWithEnsemble(id1, 1000, bookie1, bookie2).build(); + LedgerMetadata metadata2 = newClosedMetadataWithEnsemble(id2, 1000, bookie1, bookie3).build(); + LedgerMetadata metadata3 = newClosedMetadataWithEnsemble(id3, 1000, bookie1, bookie3).build(); + + bookieClient.getMockBookies().seedLedgerForBookie(bookie2, id1, metadata1); + bookieClient.getMockBookies().seedLedgerForBookie(bookie3, id2, metadata2); + bookieClient.getMockBookies().seedLedgerForBookie(bookie3, id3, metadata3); + + lm.createLedgerMetadata(id1, metadata1).get(); + lm.createLedgerMetadata(id2, metadata2).get(); + lm.createLedgerMetadata(id3, metadata3).get(); + + assertThat(storage.ledgerExists(id1), equalTo(false)); + assertThat(storage.ledgerExists(id2), equalTo(false)); + assertThat(storage.ledgerExists(id3), equalTo(false)); + + impl.runFullCheck().get(); + + assertThat(storage.ledgerExists(id1), equalTo(true)); + assertThat(storage.ledgerExists(id2), equalTo(true)); + assertThat(storage.ledgerExists(id3), equalTo(true)); + } + + @Test + public void testFullCheckCacheLoadAndProcessSomeInLimbo() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + long id1 = 0xdeadL; + long id2 = 0xbedeL; + long id3 = 0xbebeL; + LedgerMetadata metadata1 = newClosedMetadataWithEnsemble(id1, 1000, bookie1, bookie2).build(); + LedgerMetadata metadata2 = newClosedMetadataWithEnsemble(id2, 1000, bookie1, bookie3).build(); + LedgerMetadata metadata3 = newMetadataWithEnsemble(id3, bookie1, bookie3).build(); + LedgerMetadata metadata3closed = newClosedMetadataWithEnsemble(id3, 1000, bookie1, bookie3).build(); + + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookie1, lm, storage, + copier, + mock(BookKeeperAdmin.class), + Schedulers.io()) { + @Override + Single recoverLedger(long ledgerId, String runId) { + return Single.just(metadata3closed); + } + }; + + bookieClient.getMockBookies().seedLedgerForBookie(bookie2, id1, metadata1); + bookieClient.getMockBookies().seedLedgerForBookie(bookie3, id2, metadata2); + bookieClient.getMockBookies().seedLedgerForBookie(bookie3, id3, metadata3closed); + + lm.createLedgerMetadata(id1, metadata1).get(); + lm.createLedgerMetadata(id2, metadata2).get(); + lm.createLedgerMetadata(id3, metadata3).get(); + + assertThat(storage.ledgerExists(id1), equalTo(false)); + assertThat(storage.ledgerExists(id2), equalTo(false)); + assertThat(storage.ledgerExists(id3), equalTo(false)); + storage.setMasterKey(id3, PASSWD); + storage.setLimboState(id3); + assertThat(storage.hasLimboState(id3), equalTo(true)); + + storage.setStorageStateFlag(StorageState.NEEDS_INTEGRITY_CHECK); + assertThat(StorageState.NEEDS_INTEGRITY_CHECK, + isIn(storage.getStorageStateFlags())); + + impl.runFullCheck().get(); + + assertThat(StorageState.NEEDS_INTEGRITY_CHECK, + not(isIn(storage.getStorageStateFlags()))); + + assertThat(storage.ledgerExists(id1), equalTo(true)); + assertThat(storage.ledgerExists(id2), equalTo(true)); + assertThat(storage.ledgerExists(id3), equalTo(true)); + assertThat(storage.hasLimboState(id3), equalTo(false)); + } + + @Test + public void testFullCheckInLimboRecoveryFailsFirstTime() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + long id1 = 0xdeadL; + long id2 = 0xbedeL; + long id3 = 0xbebeL; + LedgerMetadata metadata1 = newClosedMetadataWithEnsemble(id1, 1000, bookie1, bookie2).build(); + LedgerMetadata metadata2 = newClosedMetadataWithEnsemble(id2, 1000, bookie1, bookie3).build(); + LedgerMetadata metadata3 = newMetadataWithEnsemble(id3, bookie1, bookie3).build(); + LedgerMetadata metadata3closed = newClosedMetadataWithEnsemble(id3, 1000, bookie1, bookie3).build(); + + AtomicInteger callCount = new AtomicInteger(0); + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookie1, lm, storage, + copier, + mock(BookKeeperAdmin.class), + Schedulers.io()) { + @Override + Single recoverLedger(long ledgerId, String runId) { + if (callCount.getAndIncrement() == 0) { + return Single.error(new BKException.BKReadException()); + } else { + return Single.just(metadata3closed); + } + } + }; + + bookieClient.getMockBookies().seedLedgerForBookie(bookie2, id1, metadata1); + bookieClient.getMockBookies().seedLedgerForBookie(bookie3, id2, metadata2); + bookieClient.getMockBookies().seedLedgerForBookie(bookie3, id3, metadata3closed); + + lm.createLedgerMetadata(id1, metadata1).get(); + lm.createLedgerMetadata(id2, metadata2).get(); + lm.createLedgerMetadata(id3, metadata3).get(); + + assertThat(storage.ledgerExists(id1), equalTo(false)); + assertThat(storage.ledgerExists(id2), equalTo(false)); + assertThat(storage.ledgerExists(id3), equalTo(false)); + storage.setMasterKey(id3, PASSWD); + storage.setLimboState(id3); + assertThat(storage.hasLimboState(id3), equalTo(true)); + + storage.setStorageStateFlag(StorageState.NEEDS_INTEGRITY_CHECK); + assertThat(StorageState.NEEDS_INTEGRITY_CHECK, + isIn(storage.getStorageStateFlags())); + + impl.runFullCheck().get(); + + assertThat(StorageState.NEEDS_INTEGRITY_CHECK, + isIn(storage.getStorageStateFlags())); + verify(storage, times(1)).flush(); + + assertThat(storage.ledgerExists(id1), equalTo(true)); + assertThat(storage.ledgerExists(id2), equalTo(true)); + assertThat(storage.ledgerExists(id3), equalTo(true)); + assertThat(storage.entryExists(id3, 0), equalTo(false)); + assertThat(storage.hasLimboState(id3), equalTo(true)); + + // run again, second time shouldn't error + impl.runFullCheck().get(); + + assertThat(StorageState.NEEDS_INTEGRITY_CHECK, + not(isIn(storage.getStorageStateFlags()))); + verify(storage, times(2)).flush(); + + assertThat(storage.ledgerExists(id3), equalTo(true)); + assertThat(storage.entryExists(id3, 0), equalTo(true)); + assertThat(storage.hasLimboState(id3), equalTo(false)); + } + + @Test + public void testFullCheckInEntryCopyFailsFirstTime() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + long id1 = 0xdeadL; + long id2 = 0xbedeL; + long id3 = 0xbebeL; + LedgerMetadata metadata1 = newClosedMetadataWithEnsemble(id1, 100, bookie1, bookie2).build(); + LedgerMetadata metadata2 = newClosedMetadataWithEnsemble(id2, 100, bookie1, bookie3).build(); + LedgerMetadata metadata3 = newMetadataWithEnsemble(id3, bookie1, bookie3).build(); + LedgerMetadata metadata3closed = newClosedMetadataWithEnsemble(id3, 100, bookie1, bookie3).build(); + + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookie1, lm, storage, + copier, + mock(BookKeeperAdmin.class), + Schedulers.io()) { + @Override + Single recoverLedger(long ledgerId, String runId) { + return Single.just(metadata3closed); + } + }; + + bookieClient.getMockBookies().seedLedgerForBookie(bookie2, id1, metadata1); + bookieClient.getMockBookies().seedLedgerForBookie(bookie3, id2, metadata2); + + lm.createLedgerMetadata(id1, metadata1).get(); + lm.createLedgerMetadata(id2, metadata2).get(); + lm.createLedgerMetadata(id3, metadata3).get(); + + assertThat(storage.ledgerExists(id1), equalTo(false)); + assertThat(storage.ledgerExists(id2), equalTo(false)); + assertThat(storage.ledgerExists(id3), equalTo(false)); + storage.setMasterKey(id3, PASSWD); + storage.setLimboState(id3); + assertThat(storage.hasLimboState(id3), equalTo(true)); + + storage.setStorageStateFlag(StorageState.NEEDS_INTEGRITY_CHECK); + assertThat(StorageState.NEEDS_INTEGRITY_CHECK, + isIn(storage.getStorageStateFlags())); + + impl.runFullCheck().get(); + + assertThat(StorageState.NEEDS_INTEGRITY_CHECK, + isIn(storage.getStorageStateFlags())); + verify(storage, times(1)).flush(); + + assertThat(storage.ledgerExists(id1), equalTo(true)); + assertThat(storage.ledgerExists(id2), equalTo(true)); + assertThat(storage.ledgerExists(id3), equalTo(true)); + assertThat(storage.entryExists(id3, 0), equalTo(false)); + assertThat(storage.hasLimboState(id3), equalTo(false)); + + // make it possible to recover the ledger by seeding bookie3 + bookieClient.getMockBookies().seedLedgerForBookie(bookie3, id3, metadata3closed); + + // run again, second time shouldn't error + impl.runFullCheck().get(); + + assertThat(StorageState.NEEDS_INTEGRITY_CHECK, + not(isIn(storage.getStorageStateFlags()))); + verify(storage, times(2)).flush(); + + assertThat(storage.ledgerExists(id3), equalTo(true)); + assertThat(storage.entryExists(id3, 0), equalTo(true)); + assertThat(storage.hasLimboState(id3), equalTo(false)); + } + + + @Test + public void testFullCheckAllInLimboAndMissing() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + long id1 = 0xdeadL; + long id2 = 0xbedeL; + long id3 = 0xbebeL; + LedgerMetadata metadata1 = newMetadataWithEnsemble(id1, bookie1, bookie2).build(); + LedgerMetadata metadata2 = newMetadataWithEnsemble(id2, bookie1, bookie3).build(); + LedgerMetadata metadata3 = newMetadataWithEnsemble(id3, bookie1, bookie3).build(); + + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookie1, lm, storage, + copier, + mock(BookKeeperAdmin.class), + Schedulers.io()) { + @Override + Single recoverLedger(long ledgerId, String runId) { + return Single.error( + new BKException.BKNoSuchLedgerExistsOnMetadataServerException()); + } + }; + + lm.createLedgerMetadata(id1, metadata1).get(); + lm.createLedgerMetadata(id2, metadata2).get(); + lm.createLedgerMetadata(id3, metadata3).get(); + + assertThat(storage.ledgerExists(id1), equalTo(false)); + assertThat(storage.ledgerExists(id2), equalTo(false)); + assertThat(storage.ledgerExists(id3), equalTo(false)); + storage.setMasterKey(id1, PASSWD); + storage.setLimboState(id1); + storage.setMasterKey(id2, PASSWD); + storage.setLimboState(id2); + storage.setMasterKey(id3, PASSWD); + storage.setLimboState(id3); + assertThat(storage.hasLimboState(id1), equalTo(true)); + assertThat(storage.hasLimboState(id2), equalTo(true)); + assertThat(storage.hasLimboState(id3), equalTo(true)); + + storage.setStorageStateFlag(StorageState.NEEDS_INTEGRITY_CHECK); + assertThat(StorageState.NEEDS_INTEGRITY_CHECK, + isIn(storage.getStorageStateFlags())); + + impl.runFullCheck().get(); + + verify(storage, times(1)).flush(); + + assertThat(StorageState.NEEDS_INTEGRITY_CHECK, + not(isIn(storage.getStorageStateFlags()))); + } + + @Test + public void testFullCheckFailFlushRetainsFlag() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerManager lm = new MockLedgerManager(); + ServerConfiguration conf = serverConf(); + AtomicInteger count = new AtomicInteger(0); + MockLedgerStorage storage = spy(new MockLedgerStorage() { + @Override + public void flush() throws IOException { + if (count.getAndIncrement() == 0) { + throw new IOException("broken flush"); + } + } + }); + + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + long id1 = 0xdeadL; + long id2 = 0xbedeL; + long id3 = 0xbebeL; + LedgerMetadata metadata1 = newClosedMetadataWithEnsemble(id1, 100, bookie1, bookie2).build(); + LedgerMetadata metadata2 = newClosedMetadataWithEnsemble(id2, 100, bookie1, bookie3).build(); + LedgerMetadata metadata3 = newClosedMetadataWithEnsemble(id3, 100, bookie1, bookie3).build(); + + DataIntegrityCheckImpl impl = new DataIntegrityCheckImpl(bookie1, lm, storage, + copier, + mock(BookKeeperAdmin.class), + Schedulers.io()); + bookieClient.getMockBookies().seedLedgerForBookie(bookie2, id1, metadata1); + bookieClient.getMockBookies().seedLedgerForBookie(bookie3, id2, metadata2); + bookieClient.getMockBookies().seedLedgerForBookie(bookie3, id3, metadata3); + + lm.createLedgerMetadata(id1, metadata1).get(); + lm.createLedgerMetadata(id2, metadata2).get(); + lm.createLedgerMetadata(id3, metadata3).get(); + + assertThat(storage.ledgerExists(id1), equalTo(false)); + assertThat(storage.ledgerExists(id2), equalTo(false)); + assertThat(storage.ledgerExists(id3), equalTo(false)); + + storage.setStorageStateFlag(StorageState.NEEDS_INTEGRITY_CHECK); + try { + impl.runFullCheck().get(); + Assert.fail("Should have failed on flush"); + } catch (ExecutionException ee) { + assertThat(ee.getCause(), instanceOf(IOException.class)); + } + assertThat(StorageState.NEEDS_INTEGRITY_CHECK, + isIn(storage.getStorageStateFlags())); + verify(storage, times(1)).flush(); + + assertThat(storage.ledgerExists(id1), equalTo(true)); + assertThat(storage.ledgerExists(id2), equalTo(true)); + assertThat(storage.ledgerExists(id3), equalTo(true)); + + // run again, second time shouldn't error + impl.runFullCheck().get(); + + assertThat(StorageState.NEEDS_INTEGRITY_CHECK, + not(isIn(storage.getStorageStateFlags()))); + verify(storage, times(2)).flush(); + } +} + diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/datainteg/DataIntegrityServiceTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/datainteg/DataIntegrityServiceTest.java new file mode 100644 index 00000000000..ec9deecfab0 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/datainteg/DataIntegrityServiceTest.java @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.datainteg; + +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.server.conf.BookieConfiguration; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.junit.Assert; +import org.junit.Test; + +/** + * Test for DataIntegrityService. + */ +public class DataIntegrityServiceTest { + private static DataIntegrityService newLowIntervalService(DataIntegrityCheck check) { + return new DataIntegrityService( + new BookieConfiguration(new ServerConfiguration()), + NullStatsLogger.INSTANCE, check) { + @Override + public int interval() { + return 1; + } + @Override + public TimeUnit intervalUnit() { + return TimeUnit.MICROSECONDS; + } + }; + } + + @Test + public void testFullCheckRunsIfRequested() throws Exception { + CompletableFuture promise = new CompletableFuture<>(); + MockDataIntegrityCheck check = new MockDataIntegrityCheck() { + @Override + public boolean needsFullCheck() { + return true; + } + @Override + public CompletableFuture runFullCheck() { + promise.complete(null); + return super.runFullCheck(); + } + }; + DataIntegrityService service = newLowIntervalService(check); + try { + service.start(); + + promise.get(5, TimeUnit.SECONDS); + } finally { + service.stop(); + } + } + + @Test + public void testFullCheckDoesntRunIfNotRequested() throws Exception { + CompletableFuture promise = new CompletableFuture<>(); + MockDataIntegrityCheck check = new MockDataIntegrityCheck() { + @Override + public boolean needsFullCheck() { + return false; + } + @Override + public CompletableFuture runFullCheck() { + promise.complete(null); + return super.runFullCheck(); + } + }; + DataIntegrityService service = newLowIntervalService(check); + try { + service.start(); + + try { + // timeout set very low, so hard to tell if + // it's waiting to run or not running, but it + // should be the latter on any modern machine + promise.get(100, TimeUnit.MILLISECONDS); + Assert.fail("Shouldn't have run"); + } catch (TimeoutException te) { + // expected + } + } finally { + service.stop(); + } + } + + @Test + public void testFullCheckRunsMultipleTimes() throws Exception { + AtomicInteger count = new AtomicInteger(0); + CompletableFuture promise = new CompletableFuture<>(); + MockDataIntegrityCheck check = new MockDataIntegrityCheck() { + @Override + public boolean needsFullCheck() { + return true; + } + @Override + public CompletableFuture runFullCheck() { + if (count.incrementAndGet() == 10) { + promise.complete(null); + } + return super.runFullCheck(); + } + }; + DataIntegrityService service = newLowIntervalService(check); + try { + service.start(); + + promise.get(10, TimeUnit.SECONDS); + } finally { + service.stop(); + } + } + + @Test + public void testRunDontRunThenRunAgain() throws Exception { + AtomicBoolean needsFullCheck = new AtomicBoolean(true); + Semaphore semaphore = new Semaphore(1); + semaphore.acquire(); // increment the count, can only be released by a check + MockDataIntegrityCheck check = new MockDataIntegrityCheck() { + @Override + public boolean needsFullCheck() { + return needsFullCheck.getAndSet(false); + } + @Override + public CompletableFuture runFullCheck() { + semaphore.release(); + return super.runFullCheck(); + } + }; + DataIntegrityService service = newLowIntervalService(check); + try { + service.start(); + + Assert.assertTrue("Check should have run", + semaphore.tryAcquire(10, TimeUnit.SECONDS)); + Assert.assertFalse("Check shouldn't run again", + semaphore.tryAcquire(100, TimeUnit.MILLISECONDS)); + needsFullCheck.set(true); + Assert.assertTrue("Check should run again", + semaphore.tryAcquire(10, TimeUnit.SECONDS)); + } finally { + service.stop(); + } + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/datainteg/EntryCopierTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/datainteg/EntryCopierTest.java new file mode 100644 index 00000000000..8f692239b9b --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/datainteg/EntryCopierTest.java @@ -0,0 +1,631 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.datainteg; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyInt; +import static org.mockito.Mockito.anyLong; +import static org.mockito.Mockito.eq; +import static org.mockito.Mockito.inOrder; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import io.netty.buffer.ByteBuf; +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.MockLedgerStorage; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.LedgerMetadataBuilder; +import org.apache.bookkeeper.client.api.DigestType; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.common.util.MockTicker; +import org.apache.bookkeeper.common.util.OrderedExecutor; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.MockBookieClient; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.mockito.InOrder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Tests for EntryCopierImpl. + */ +@SuppressWarnings("deprecation") +public class EntryCopierTest { + private static final Logger log = LoggerFactory.getLogger(EntryCopierTest.class); + private static final BookieId bookie1 = BookieId.parse("bookie1:3181"); + private static final BookieId bookie2 = BookieId.parse("bookie2:3181"); + private static final BookieId bookie3 = BookieId.parse("bookie3:3181"); + private static final BookieId bookie4 = BookieId.parse("bookie4:3181"); + private static final BookieId bookie5 = BookieId.parse("bookie5:3181"); + private static final BookieId bookie6 = BookieId.parse("bookie6:3181"); + + private OrderedExecutor executor = null; + + @Before + public void setup() throws Exception { + executor = OrderedExecutor.newBuilder().numThreads(1).name("test").build(); + } + + @After + public void teardown() throws Exception { + if (executor != null) { + executor.shutdownNow(); + } + } + + @Test + public void testCopyFromAvailable() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + long ledgerId = 0xbeeb; + LedgerMetadata metadata = LedgerMetadataBuilder.create() + .withId(ledgerId) + .withPassword(new byte[0]) + .withDigestType(DigestType.CRC32C) + .withEnsembleSize(2) + .withWriteQuorumSize(2) + .withAckQuorumSize(2) + .newEnsembleEntry(0, Lists.newArrayList(bookie1, bookie2)) + .withLastEntryId(10) + .withLength(1000) + .withClosedState() + .build(); + bookieClient.getMockBookies().seedLedger(ledgerId, metadata); + + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + EntryCopier.Batch batch = copier.newBatch( + ledgerId, metadata); + + CompletableFuture f1 = batch.copyFromAvailable(0); + CompletableFuture f2 = batch.copyFromAvailable(2); + CompletableFuture f3 = batch.copyFromAvailable(4); + CompletableFuture f4 = batch.copyFromAvailable(10); + try { + batch.copyFromAvailable(100); + Assert.fail("Should have given IllegalArgumentException"); + } catch (IllegalArgumentException ie) { + // correct + } + + try { + batch.copyFromAvailable(-1); + Assert.fail("Should have given IllegalArgumentException"); + } catch (IllegalArgumentException ie) { + // correct + } + CompletableFuture.allOf(f1, f2, f3, f4).get(); + + verify(bookieClient, times(1)).readEntry(eq(bookie2), eq(ledgerId), eq(0L), + any(), any(), anyInt(), any()); + verify(bookieClient, times(1)).readEntry(eq(bookie2), eq(ledgerId), eq(2L), + any(), any(), anyInt(), any()); + verify(bookieClient, times(1)).readEntry(eq(bookie2), eq(ledgerId), eq(4L), + any(), any(), anyInt(), any()); + verify(bookieClient, times(1)).readEntry(eq(bookie2), eq(ledgerId), eq(10L), + any(), any(), anyInt(), any()); + verify(bookieClient, times(4)).readEntry(eq(bookie2), eq(ledgerId), anyLong(), + any(), any(), anyInt(), any()); + + verify(storage, times(4)).addEntry(any()); + assertThat(storage.entryExists(ledgerId, 0), equalTo(true)); + assertThat(storage.entryExists(ledgerId, 2), equalTo(true)); + assertThat(storage.entryExists(ledgerId, 4), equalTo(true)); + assertThat(storage.entryExists(ledgerId, 10), equalTo(true)); + } + + @Test + public void testNoCopiesAvailable() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + long ledgerId = 0xbeeb; + LedgerMetadata metadata = LedgerMetadataBuilder.create() + .withId(ledgerId) + .withPassword(new byte[0]) + .withDigestType(DigestType.CRC32C) + .withEnsembleSize(1) + .withWriteQuorumSize(1) + .withAckQuorumSize(1) + .newEnsembleEntry(0, Lists.newArrayList(bookie1)) + .withLastEntryId(10) + .withLength(1000) + .withClosedState() + .build(); + bookieClient.getMockBookies().seedLedger(ledgerId, metadata); + + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + EntryCopier.Batch batch = copier.newBatch( + ledgerId, metadata); + List> futures = Lists.newArrayList(); + for (long l = 0; l < 10; l++) { + futures.add(batch.copyFromAvailable(l)); + } + try { + CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).get(); + Assert.fail("Should have failed"); + } catch (ExecutionException e) { + assertThat(e.getCause(), instanceOf(BKException.BKReadException.class)); + } + } + + @Test + public void testCopyOneEntryFails() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + long ledgerId = 0xbeeb; + LedgerMetadata metadata = LedgerMetadataBuilder.create() + .withId(ledgerId) + .withPassword(new byte[0]) + .withDigestType(DigestType.CRC32C) + .withEnsembleSize(2) + .withWriteQuorumSize(2) + .withAckQuorumSize(2) + .newEnsembleEntry(0, Lists.newArrayList(bookie1, bookie2)) + .withLastEntryId(10) + .withLength(1000) + .withClosedState() + .build(); + bookieClient.getMockBookies().seedLedger(ledgerId, metadata); + + bookieClient.setPreReadHook((bookie, ledger, entry) -> { + if (entry == 2L) { + return FutureUtils.exception(new BKException.BKTimeoutException()); + } else { + return CompletableFuture.completedFuture(null); + } + }); + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + EntryCopier.Batch batch = copier.newBatch(ledgerId, metadata); + + CompletableFuture f1 = batch.copyFromAvailable(0); + CompletableFuture f2 = batch.copyFromAvailable(2); + CompletableFuture f3 = batch.copyFromAvailable(4); + CompletableFuture f4 = batch.copyFromAvailable(10); + + try { + CompletableFuture.allOf(f1, f2, f3, f4).get(); + Assert.fail("Should have failed"); + } catch (ExecutionException ee) { + assertThat(ee.getCause(), instanceOf(BKException.BKTimeoutException.class)); + } + + // other entries should still have been added + verify(storage, times(3)).addEntry(any()); + assertThat(storage.entryExists(ledgerId, 0), equalTo(true)); + assertThat(storage.entryExists(ledgerId, 4), equalTo(true)); + assertThat(storage.entryExists(ledgerId, 10), equalTo(true)); + } + + @Test + public void testCopyAllEntriesFail() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerStorage storage = spy(new MockLedgerStorage()); + long ledgerId = 0xbeeb; + LedgerMetadata metadata = LedgerMetadataBuilder.create() + .withId(ledgerId) + .withPassword(new byte[0]) + .withDigestType(DigestType.CRC32C) + .withEnsembleSize(2) + .withWriteQuorumSize(2) + .withAckQuorumSize(2) + .newEnsembleEntry(0, Lists.newArrayList(bookie1, bookie2)) + .withLastEntryId(10) + .withLength(1000) + .withClosedState() + .build(); + bookieClient.getMockBookies().seedLedger(ledgerId, metadata); + + bookieClient.setPreReadHook((bookie, ledger, entry) -> + FutureUtils.exception(new BKException.BKTimeoutException())); + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + EntryCopier.Batch batch = copier.newBatch(ledgerId, metadata); + + CompletableFuture f1 = batch.copyFromAvailable(0); + CompletableFuture f2 = batch.copyFromAvailable(2); + CompletableFuture f3 = batch.copyFromAvailable(4); + CompletableFuture f4 = batch.copyFromAvailable(10); + + try { + CompletableFuture.allOf(f1, f2, f3, f4).get(); + Assert.fail("Should have failed"); + } catch (ExecutionException ee) { + assertThat(ee.getCause(), instanceOf(BKException.BKTimeoutException.class)); + } + + // Nothing should have been added + verify(storage, times(0)).addEntry(any()); + } + + @Test + public void testCopyOneEntryFailsOnStorage() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerStorage storage = spy(new MockLedgerStorage() { + @Override + public long addEntry(ByteBuf buffer) throws IOException, BookieException { + long entryId = buffer.getLong(buffer.readerIndex() + 8); + if (entryId == 0L) { + throw new IOException("failing"); + } + return super.addEntry(buffer); + } + }); + long ledgerId = 0xbeeb; + LedgerMetadata metadata = LedgerMetadataBuilder.create() + .withId(ledgerId) + .withPassword(new byte[0]) + .withDigestType(DigestType.CRC32C) + .withEnsembleSize(2) + .withWriteQuorumSize(2) + .withAckQuorumSize(2) + .newEnsembleEntry(0, Lists.newArrayList(bookie1, bookie2)) + .withLastEntryId(10) + .withLength(1000) + .withClosedState() + .build(); + bookieClient.getMockBookies().seedLedger(ledgerId, metadata); + + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + EntryCopier.Batch batch = copier.newBatch(ledgerId, metadata); + + CompletableFuture f1 = batch.copyFromAvailable(0); + CompletableFuture f2 = batch.copyFromAvailable(2); + CompletableFuture f3 = batch.copyFromAvailable(4); + CompletableFuture f4 = batch.copyFromAvailable(10); + + try { + CompletableFuture.allOf(f1, f2, f3, f4).get(); + Assert.fail("Should have failed"); + } catch (ExecutionException ee) { + assertThat(ee.getCause(), instanceOf(IOException.class)); + } + + // other entries should still have been added + verify(storage, times(4)).addEntry(any()); + assertThat(storage.entryExists(ledgerId, 0), equalTo(false)); + assertThat(storage.entryExists(ledgerId, 2), equalTo(true)); + assertThat(storage.entryExists(ledgerId, 4), equalTo(true)); + assertThat(storage.entryExists(ledgerId, 10), equalTo(true)); + } + + @Test + public void testCopyAllEntriesFailOnStorage() throws Exception { + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + MockLedgerStorage storage = spy(new MockLedgerStorage() { + @Override + public long addEntry(ByteBuf buffer) throws IOException, BookieException { + throw new IOException("failing"); + } + }); + long ledgerId = 0xbeeb; + LedgerMetadata metadata = LedgerMetadataBuilder.create() + .withId(ledgerId) + .withPassword(new byte[0]) + .withDigestType(DigestType.CRC32C) + .withEnsembleSize(2) + .withWriteQuorumSize(2) + .withAckQuorumSize(2) + .newEnsembleEntry(0, Lists.newArrayList(bookie1, bookie2)) + .withLastEntryId(10) + .withLength(1000) + .withClosedState() + .build(); + bookieClient.getMockBookies().seedLedger(ledgerId, metadata); + + EntryCopier copier = new EntryCopierImpl(bookie1, bookieClient, storage, new MockTicker()); + EntryCopier.Batch batch = copier.newBatch(ledgerId, metadata); + + CompletableFuture f1 = batch.copyFromAvailable(0); + CompletableFuture f2 = batch.copyFromAvailable(2); + CompletableFuture f3 = batch.copyFromAvailable(4); + CompletableFuture f4 = batch.copyFromAvailable(10); + + try { + CompletableFuture.allOf(f1, f2, f3, f4).get(); + Assert.fail("Should have failed"); + } catch (ExecutionException ee) { + assertThat(ee.getCause(), instanceOf(IOException.class)); + } + + // other entries should still have been added + verify(storage, times(4)).addEntry(any()); + assertThat(storage.entryExists(ledgerId, 0), equalTo(false)); + assertThat(storage.entryExists(ledgerId, 2), equalTo(false)); + assertThat(storage.entryExists(ledgerId, 4), equalTo(false)); + assertThat(storage.entryExists(ledgerId, 10), equalTo(false)); + } + + @Test + public void testReadOneEntry() throws Exception { + long ledgerId = 0xbeeb; // don't change, the shuffle for preferred bookies uses ledger id as seed + LedgerMetadata metadata = LedgerMetadataBuilder.create() + .withId(ledgerId) + .withPassword(new byte[0]) + .withDigestType(DigestType.CRC32C) + .withEnsembleSize(3) + .withWriteQuorumSize(3) + .withAckQuorumSize(3) + .newEnsembleEntry(0, Lists.newArrayList(bookie1, bookie2, bookie3)) + .withLastEntryId(10) + .withLength(1000) + .withClosedState() + .build(); + + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + bookieClient.getMockBookies().seedLedger(ledgerId, metadata); + + EntryCopier copier = new EntryCopierImpl(bookie2, bookieClient, + new MockLedgerStorage(), new MockTicker()); + EntryCopierImpl.BatchImpl batch = (EntryCopierImpl.BatchImpl) copier.newBatch(ledgerId, metadata); + for (int i = 0; i <= 10; i++) { + batch.fetchEntry(i).get(); + verify(bookieClient, times(i + 1)).readEntry(any(), anyLong(), anyLong(), + any(), any(), anyInt()); + verify(bookieClient, times(i + 1)).readEntry(eq(bookie3), anyLong(), anyLong(), + any(), any(), anyInt()); + } + } + + @Test + public void testReadOneFirstReplicaFails() throws Exception { + long ledgerId = 0xbeeb; // don't change, the shuffle for preferred bookies uses ledger id as seed + LedgerMetadata metadata = LedgerMetadataBuilder.create() + .withId(ledgerId) + .withPassword(new byte[0]) + .withDigestType(DigestType.CRC32C) + .withEnsembleSize(3) + .withWriteQuorumSize(3) + .withAckQuorumSize(3) + .newEnsembleEntry(0, Lists.newArrayList(bookie1, bookie2, bookie3)) + .withLastEntryId(10) + .withLength(1000) + .withClosedState() + .build(); + + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + bookieClient.getMockBookies().seedLedger(ledgerId, metadata); + bookieClient.errorBookies(bookie3); + MockTicker ticker = new MockTicker(); + EntryCopierImpl copier = new EntryCopierImpl(bookie2, bookieClient, + new MockLedgerStorage(), ticker); + CompletableFuture errorProcessedPromise = new CompletableFuture<>(); + EntryCopierImpl.BatchImpl batch = copier.new BatchImpl(bookie2, ledgerId, + metadata, + new EntryCopierImpl.SinBin(ticker)) { + @Override + void notifyBookieError(BookieId bookie) { + super.notifyBookieError(bookie); + errorProcessedPromise.complete(null); + } + }; + + batch.fetchEntry(0).get(); + + // will read twice, fail at bookie3, succeed at bookie1 + verify(bookieClient, times(2)).readEntry(any(), anyLong(), anyLong(), + any(), any(), anyInt()); + verify(bookieClient, times(1)).readEntry(eq(bookie3), anyLong(), anyLong(), + any(), any(), anyInt()); + verify(bookieClient, times(1)).readEntry(eq(bookie1), anyLong(), anyLong(), + any(), any(), anyInt()); + errorProcessedPromise.get(10, TimeUnit.SECONDS); + batch.fetchEntry(1).get(); + + // subsequent read should go straight for bookie1 + verify(bookieClient, times(3)).readEntry(any(), anyLong(), anyLong(), + any(), any(), anyInt()); + verify(bookieClient, times(2)).readEntry(eq(bookie1), anyLong(), anyLong(), + any(), any(), anyInt()); + } + + @Test + public void testReadOneAllReplicasFail() throws Exception { + long ledgerId = 0xbeeb; // don't change, the shuffle for preferred bookies uses ledger id as seed + LedgerMetadata metadata = LedgerMetadataBuilder.create() + .withId(ledgerId) + .withPassword(new byte[0]) + .withDigestType(DigestType.CRC32C) + .withEnsembleSize(3) + .withWriteQuorumSize(3) + .withAckQuorumSize(3) + .newEnsembleEntry(0, Lists.newArrayList(bookie1, bookie2, bookie3)) + .withLastEntryId(10) + .withLength(1000) + .withClosedState() + .build(); + + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + bookieClient.getMockBookies().seedLedger(ledgerId, metadata); + // we expect to try to read from bookie3 first + bookieClient.setPreReadHook((bookie, ledgerId1, entryId) -> { + if (bookie.equals(bookie1)) { + return FutureUtils.exception(new BKException.BKReadException()); + } else if (bookie.equals(bookie3)) { + return FutureUtils.exception(new BKException.BKBookieException()); + } else { + return CompletableFuture.completedFuture(null); + } + }); + EntryCopier copier = new EntryCopierImpl(bookie2, bookieClient, + new MockLedgerStorage(), new MockTicker()); + EntryCopierImpl.BatchImpl batch = (EntryCopierImpl.BatchImpl) copier.newBatch(ledgerId, metadata); + + try { + batch.fetchEntry(0).get(); + Assert.fail("Shouldn't get this far"); + } catch (ExecutionException ee) { + assertThat(ee.getCause(), instanceOf(BKException.BKBookieException.class)); + } + + InOrder inOrder = inOrder(bookieClient); + inOrder.verify(bookieClient, times(1)).readEntry(eq(bookie3), anyLong(), anyLong(), + any(), any(), anyInt()); + inOrder.verify(bookieClient, times(1)).readEntry(eq(bookie1), anyLong(), anyLong(), + any(), any(), anyInt()); + } + + @Test + public void testReadOneWithErrorBookieReinstatedAfterSinBin() throws Exception { + long ledgerId = 0xbeeb; // don't change, the shuffle for preferred bookies uses ledger id as seed + LedgerMetadata metadata = LedgerMetadataBuilder.create() + .withId(ledgerId) + .withPassword(new byte[0]) + .withDigestType(DigestType.CRC32C) + .withEnsembleSize(3) + .withWriteQuorumSize(3) + .withAckQuorumSize(3) + .newEnsembleEntry(0, Lists.newArrayList(bookie1, bookie2, bookie3)) + .withLastEntryId(10) + .withLength(1000) + .withClosedState() + .build(); + + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + bookieClient.getMockBookies().seedLedger(ledgerId, metadata); + bookieClient.errorBookies(bookie3); + + CompletableFuture errorProcessedPromise = new CompletableFuture<>(); + + MockTicker ticker = new MockTicker(); + EntryCopierImpl copier = new EntryCopierImpl(bookie2, bookieClient, + new MockLedgerStorage(), ticker); + EntryCopierImpl.SinBin sinBin = new EntryCopierImpl.SinBin(ticker); + EntryCopierImpl.BatchImpl batch = copier.new BatchImpl(bookie2, ledgerId, metadata, sinBin) { + @Override + void notifyBookieError(BookieId bookie) { + super.notifyBookieError(bookie); + errorProcessedPromise.complete(null); + } + }; + batch.fetchEntry(0).get(); + verify(bookieClient, times(1)).readEntry(eq(bookie3), anyLong(), anyLong(), + any(), any(), anyInt()); + verify(bookieClient, times(1)).readEntry(eq(bookie1), anyLong(), anyLong(), + any(), any(), anyInt()); + errorProcessedPromise.get(10, TimeUnit.SECONDS); + + // bookie3 should be fine to use again, but we shouldn't use it until if come out + // of the sinbin + bookieClient.removeErrors(bookie3); + + // read batch again, error should carry over + EntryCopierImpl.BatchImpl batch2 = copier.new BatchImpl(bookie2, ledgerId, metadata, sinBin); + batch2.fetchEntry(0).get(); + verify(bookieClient, times(1)).readEntry(eq(bookie3), anyLong(), anyLong(), + any(), any(), anyInt()); + verify(bookieClient, times(2)).readEntry(eq(bookie1), anyLong(), anyLong(), + any(), any(), anyInt()); + // advance time + ticker.advance(70, TimeUnit.SECONDS); + + // sinbinned bookie should be restored, read should come from bookie3 again + EntryCopierImpl.BatchImpl batch3 = copier.new BatchImpl(bookie2, ledgerId, metadata, sinBin); + batch3.fetchEntry(0).get(); + verify(bookieClient, times(2)).readEntry(eq(bookie3), anyLong(), anyLong(), + any(), any(), anyInt()); + verify(bookieClient, times(2)).readEntry(eq(bookie1), anyLong(), anyLong(), + any(), any(), anyInt()); + } + + @Test + public void testReadEntryOnlyOnSelf() throws Exception { + long ledgerId = 0xbeeb; + LedgerMetadata metadata = LedgerMetadataBuilder.create() + .withId(ledgerId) + .withPassword(new byte[0]) + .withDigestType(DigestType.CRC32C) + .withEnsembleSize(1) + .withWriteQuorumSize(1) + .withAckQuorumSize(1) + .newEnsembleEntry(0, Lists.newArrayList(bookie2)) + .withLastEntryId(10) + .withLength(1000) + .withClosedState() + .build(); + + MockBookieClient bookieClient = spy(new MockBookieClient(executor)); + bookieClient.getMockBookies().seedLedger(ledgerId, metadata); + + CompletableFuture errorProcessedPromise = new CompletableFuture<>(); + + MockTicker ticker = new MockTicker(); + EntryCopierImpl copier = new EntryCopierImpl(bookie2, bookieClient, + new MockLedgerStorage(), ticker); + EntryCopierImpl.BatchImpl batch = (EntryCopierImpl.BatchImpl) copier.newBatch(ledgerId, metadata); + try { + batch.fetchEntry(0).get(); + } catch (ExecutionException ee) { + assertThat(ee.getCause(), instanceOf(BKException.BKReadException.class)); + } + verify(bookieClient, times(0)).readEntry(any(), anyLong(), anyLong(), + any(), any(), anyInt()); + } + + @Test + public void testPreferredBookieIndices() throws Exception { + long ledgerId = 0xbeeb; + LedgerMetadata metadata1 = LedgerMetadataBuilder.create() + .withId(ledgerId) + .withPassword(new byte[0]) + .withDigestType(DigestType.CRC32C) + .withEnsembleSize(5) + .withWriteQuorumSize(2) + .withAckQuorumSize(2) + .newEnsembleEntry(0, Lists.newArrayList(bookie1, bookie2, bookie3, bookie4, bookie5)) + .newEnsembleEntry(3, Lists.newArrayList(bookie1, bookie6, bookie3, bookie4, bookie5)) + .newEnsembleEntry(5, Lists.newArrayList(bookie1, bookie2, bookie3, bookie4, bookie5)) + .withLastEntryId(10) + .withLength(1000) + .withClosedState() + .build(); + + Map> order = + EntryCopierImpl.preferredBookieIndices(bookie2, metadata1, + Collections.emptySet(), + ledgerId); + assertThat(order.get(0L), contains(4, 0, 3, 2)); + assertThat(order.get(3L), contains(4, 1, 0, 3, 2)); + assertThat(order.get(5L), contains(4, 0, 3, 2)); + + Map> orderWithErr = + EntryCopierImpl.preferredBookieIndices(bookie2, metadata1, + Sets.newHashSet(bookie1, bookie3), + ledgerId); + assertThat(orderWithErr.get(0L), contains(4, 3, 0, 2)); + assertThat(orderWithErr.get(3L), contains(4, 1, 3, 0, 2)); + assertThat(orderWithErr.get(5L), contains(4, 3, 0, 2)); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/datainteg/MetadataAsyncIteratorTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/datainteg/MetadataAsyncIteratorTest.java new file mode 100644 index 00000000000..32f7c846f9b --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/datainteg/MetadataAsyncIteratorTest.java @@ -0,0 +1,301 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.datainteg; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; + +import com.google.common.collect.Lists; +import io.reactivex.rxjava3.schedulers.Schedulers; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.LedgerMetadataBuilder; +import org.apache.bookkeeper.client.api.DigestType; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.MockLedgerManager; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.versioning.Versioned; +import org.junit.Assert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Tests for MetadataAsyncIterator. + */ +public class MetadataAsyncIteratorTest { + private static Logger log = LoggerFactory.getLogger(MetadataAsyncIteratorTest.class); + + private LedgerMetadata newRandomMetadata(long randBit) throws Exception { + return LedgerMetadataBuilder.create() + .withId(1) + .withPassword(new byte[0]) + .withDigestType(DigestType.CRC32C) + .withEnsembleSize(1) + .withWriteQuorumSize(1) + .withAckQuorumSize(1) + .newEnsembleEntry(0, Lists.newArrayList( + BookieId.parse("foobar-" + randBit + ":3181"))) + .build(); + } + + private ConcurrentHashMap addLedgers(LedgerManager lm, int count) + throws Exception { + ConcurrentHashMap added = new ConcurrentHashMap<>(); + for (long i = 0; i < count; i++) { + LedgerMetadata metadata = newRandomMetadata(i); + lm.createLedgerMetadata(i, metadata).get(); + added.put(i, metadata); + } + return added; + } + + private static CompletableFuture removeFromMap( + ConcurrentHashMap map, + long ledgerId, LedgerMetadata metadata) { + if (log.isDebugEnabled()) { + log.debug("removing ledger {}", ledgerId); + } + if (map.remove(ledgerId, metadata)) { + return CompletableFuture.completedFuture(null); + } else { + if (log.isDebugEnabled()) { + log.debug("ledger {} already removed", ledgerId); + } + return FutureUtils.exception(new Exception("ledger already removed")); + } + } + + @Test + public void testIteratorOverAll() throws Exception { + MockLedgerManager lm = new MockLedgerManager(); + ConcurrentHashMap added = addLedgers(lm, 10000); + MetadataAsyncIterator iterator = new MetadataAsyncIterator(Schedulers.io(), + lm, 100 /* inflight */, + 3 /* timeout */, TimeUnit.SECONDS); + iterator.forEach((ledgerId, metadata) -> removeFromMap(added, ledgerId, metadata)) + .get(10, TimeUnit.SECONDS); + assertThat(added.isEmpty(), equalTo(true)); + } + + @Test + public void testSingleLedger() throws Exception { + MockLedgerManager lm = new MockLedgerManager(); + LedgerMetadata single = newRandomMetadata(0xdeadbeef); + MetadataAsyncIterator iterator = new MetadataAsyncIterator(Schedulers.io(), + lm, 100 /* inflight */, + 3 /* timeout */, TimeUnit.SECONDS); + iterator.forEach((ledgerId, metadata) -> { + if (ledgerId == 0xdeadbeef && metadata.equals(single)) { + return CompletableFuture.completedFuture(null); + } else { + return FutureUtils.exception(new Exception("Unexpected metadata")); + } + }).get(10, TimeUnit.SECONDS); + } + + @Test + public void testEmptyRange() throws Exception { + MockLedgerManager lm = new MockLedgerManager(); + MetadataAsyncIterator iterator = new MetadataAsyncIterator(Schedulers.io(), + lm, 100 /* inflight */, + 3 /* timeout */, TimeUnit.SECONDS); + iterator.forEach((ledgerId, metadata) -> FutureUtils.exception(new Exception("Should be empty"))) + .get(10, TimeUnit.SECONDS); + } + + @Test + public void testOneLedgerErrorsOnRead() throws Exception { + MockLedgerManager lm = new MockLedgerManager() { + @Override + public CompletableFuture> readLedgerMetadata(long ledgerId) { + if (ledgerId == 403) { + return FutureUtils.exception(new BKException.ZKException()); + } else { + return super.readLedgerMetadata(ledgerId); + } + } + }; + ConcurrentHashMap added = addLedgers(lm, 10000); + MetadataAsyncIterator iterator = new MetadataAsyncIterator(Schedulers.io(), + lm, 100 /* inflight */, + 3 /* timeout */, TimeUnit.SECONDS); + try { + iterator.forEach((ledgerId, metadata) -> removeFromMap(added, ledgerId, metadata)) + .get(10, TimeUnit.SECONDS); + } catch (ExecutionException ee) { + assertThat(ee.getCause(), instanceOf(BKException.ZKException.class)); + } + } + + @Test + public void testOneLedgerErrorsOnProcessing() throws Exception { + MockLedgerManager lm = new MockLedgerManager(); + ConcurrentHashMap added = addLedgers(lm, 10000); + MetadataAsyncIterator iterator = new MetadataAsyncIterator(Schedulers.io(), + lm, 100 /* inflight */, + 3 /* timeout */, TimeUnit.SECONDS); + try { + iterator.forEach((ledgerId, metadata) -> { + if (ledgerId == 403) { + log.info("IKDEBUG erroring"); + return FutureUtils.exception(new Exception("foobar")); + } else { + return CompletableFuture.completedFuture(null); + } + }).get(10, TimeUnit.SECONDS); + Assert.fail("shouldn't succeed"); + } catch (ExecutionException ee) { + assertThat(ee.getCause().getMessage(), equalTo("foobar")); + } + } + + @Test + public void testAllLedgersErrorOnRead() throws Exception { + MockLedgerManager lm = new MockLedgerManager() { + @Override + public CompletableFuture> readLedgerMetadata(long ledgerId) { + CompletableFuture> promise = new CompletableFuture<>(); + promise.completeExceptionally(new BKException.ZKException()); + return promise; + } + }; + ConcurrentHashMap added = addLedgers(lm, 10000); + MetadataAsyncIterator iterator = new MetadataAsyncIterator(Schedulers.io(), + lm, 100 /* inflight */, + 3 /* timeout */, TimeUnit.SECONDS); + try { + iterator.forEach((ledgerId, metadata) -> CompletableFuture.completedFuture(null)) + .get(10, TimeUnit.SECONDS); + } catch (ExecutionException ee) { + assertThat(ee.getCause(), instanceOf(BKException.ZKException.class)); + } + } + + @Test + public void testAllLedgersErrorOnProcessing() throws Exception { + MockLedgerManager lm = new MockLedgerManager(); + ConcurrentHashMap added = addLedgers(lm, 10000); + MetadataAsyncIterator iterator = new MetadataAsyncIterator(Schedulers.io(), + lm, 100 /* inflight */, + 3 /* timeout */, TimeUnit.SECONDS); + try { + iterator.forEach((ledgerId, metadata) -> FutureUtils.exception(new Exception("foobar"))) + .get(10, TimeUnit.SECONDS); + Assert.fail("shouldn't succeed"); + } catch (ExecutionException ee) { + assertThat(ee.getCause().getMessage(), equalTo("foobar")); + } + } + + @Test + public void testOneLedgerDisappearsBetweenListAndRead() throws Exception { + MockLedgerManager lm = new MockLedgerManager() { + @Override + public CompletableFuture> readLedgerMetadata(long ledgerId) { + if (ledgerId == 501) { + CompletableFuture> promise = new CompletableFuture<>(); + promise.completeExceptionally(new BKException.BKNoSuchLedgerExistsOnMetadataServerException()); + return promise; + } else { + return super.readLedgerMetadata(ledgerId); + } + } + }; + ConcurrentHashMap added = addLedgers(lm, 10000); + MetadataAsyncIterator iterator = new MetadataAsyncIterator(Schedulers.io(), + lm, 100 /* inflight */, + 3 /* timeout */, TimeUnit.SECONDS); + iterator.forEach((ledgerId, metadata) -> removeFromMap(added, ledgerId, metadata)) + .get(10, TimeUnit.SECONDS); + assertThat(added.size(), equalTo(1)); + log.info("IKDEBUG {} {}", added, added.containsKey(5L)); + assertThat(added.containsKey(501L), equalTo(true)); + } + + @Test + public void testEverySecondLedgerDisappearsBetweenListAndRead() throws Exception { + MockLedgerManager lm = new MockLedgerManager() { + @Override + public CompletableFuture> readLedgerMetadata(long ledgerId) { + if (ledgerId % 2 == 0) { + return FutureUtils.exception( + new BKException.BKNoSuchLedgerExistsOnMetadataServerException()); + } else { + return super.readLedgerMetadata(ledgerId); + } + } + }; + int numLedgers = 10000; + ConcurrentHashMap added = addLedgers(lm, numLedgers); + MetadataAsyncIterator iterator = new MetadataAsyncIterator(Schedulers.io(), + lm, 100, + 3, TimeUnit.SECONDS); + iterator.forEach((ledgerId, metadata) -> removeFromMap(added, ledgerId, metadata)) + .get(10, TimeUnit.SECONDS); + assertThat(added.size(), equalTo(numLedgers / 2)); + assertThat(added.keySet().stream().allMatch(k -> k % 2 == 0), equalTo(true)); + assertThat(added.keySet().stream().noneMatch(k -> k % 2 == 1), equalTo(true)); + } + + @Test + public void testEveryLedgerDisappearsBetweenListAndRead() throws Exception { + MockLedgerManager lm = new MockLedgerManager() { + @Override + public CompletableFuture> readLedgerMetadata(long ledgerId) { + return FutureUtils.exception( + new BKException.BKNoSuchLedgerExistsOnMetadataServerException()); + } + }; + int numLedgers = 10000; + ConcurrentHashMap added = addLedgers(lm, numLedgers); + MetadataAsyncIterator iterator = new MetadataAsyncIterator(Schedulers.io(), + lm, 100, + 3, TimeUnit.SECONDS); + iterator.forEach((ledgerId, metadata) -> removeFromMap(added, ledgerId, metadata)) + .get(10, TimeUnit.SECONDS); + assertThat(added.size(), equalTo(numLedgers)); + } + + @Test + public void testMaxOutInFlight() throws Exception { + MockLedgerManager lm = new MockLedgerManager(); + int numLedgers = 1000; + ConcurrentHashMap added = addLedgers(lm, numLedgers); + MetadataAsyncIterator iterator = new MetadataAsyncIterator(Schedulers.io(), + lm, 10, + 3, TimeUnit.SECONDS); + CompletableFuture blocker = new CompletableFuture<>(); + CompletableFuture iterFuture = iterator.forEach( + (ledgerId, metadata) -> + blocker.thenCompose(ignore -> removeFromMap(added, ledgerId, metadata))); + assertThat(iterFuture.isDone(), equalTo(false)); + blocker.complete(null); + iterFuture.get(10, TimeUnit.SECONDS); + assertThat(added.isEmpty(), equalTo(true)); + } + +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/datainteg/MockDataIntegrityCheck.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/datainteg/MockDataIntegrityCheck.java new file mode 100644 index 00000000000..cfbb1b17660 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/datainteg/MockDataIntegrityCheck.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.datainteg; + +import java.util.concurrent.CompletableFuture; + +/** + * Mock implementation of MockDataIntegrity. + */ +public class MockDataIntegrityCheck implements DataIntegrityCheck { + @Override + public CompletableFuture runPreBootCheck(String reason) { + return CompletableFuture.completedFuture(null); + } + @Override + public boolean needsFullCheck() { + return false; + } + @Override + public CompletableFuture runFullCheck() { + return CompletableFuture.completedFuture(null); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/datainteg/WriteSetsTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/datainteg/WriteSetsTest.java new file mode 100644 index 00000000000..49a121ec5b2 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/datainteg/WriteSetsTest.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.bookie.datainteg; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.equalTo; +import static org.junit.Assert.assertTrue; + +import com.google.common.collect.ImmutableList; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import org.apache.bookkeeper.client.DistributionSchedule; +import org.apache.bookkeeper.client.RoundRobinDistributionSchedule; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Tests for WriteSets. + */ +public class WriteSetsTest { + private static final Logger log = LoggerFactory.getLogger(WriteSetsTest.class); + + @Test + public void testOrderPreserved() throws Exception { + WriteSets writeSets = new WriteSets(ImmutableList.of(0, 3, 2, 4, 1), + 5 /* ensemble */, 2 /* writeQ */); + assertThat(writeSets.getForEntry(0), contains(0, 1)); + assertThat(writeSets.getForEntry(1), contains(2, 1)); + assertThat(writeSets.getForEntry(2), contains(3, 2)); + assertThat(writeSets.getForEntry(3), contains(3, 4)); + assertThat(writeSets.getForEntry(4), contains(0, 4)); + } + + @Test + public void testOrderPreservedWithGapForCurrentBookie() throws Exception { + // my bookie id maps to 2, so it is missing from the preferred order + WriteSets writeSets = new WriteSets(ImmutableList.of(0, 3, 4, 1), + 5 /* ensemble */, 2 /* writeQ */); + assertThat(writeSets.getForEntry(0), contains(0, 1)); + assertThat(writeSets.getForEntry(1), contains(1)); + assertThat(writeSets.getForEntry(2), contains(3)); + assertThat(writeSets.getForEntry(3), contains(3, 4)); + assertThat(writeSets.getForEntry(4), contains(0, 4)); + } + + @Test + public void testEmptyWriteSet() throws Exception { + // As can happen if we are the only bookie for a entry + WriteSets writeSets = new WriteSets(ImmutableList.of(0, 3, 4, 1), + 5 /* ensemble */, 1 /* writeQ */); + assertThat(writeSets.getForEntry(0), contains(0)); + assertThat(writeSets.getForEntry(1), contains(1)); + assertThat(writeSets.getForEntry(2), empty()); + assertThat(writeSets.getForEntry(3), contains(3)); + assertThat(writeSets.getForEntry(4), contains(4)); + } + + @Test + public void testE2W2() throws Exception { + DistributionSchedule schedule = new RoundRobinDistributionSchedule( + 2 /* write */, 2 /* ack */, 2 /* ensemble */); + WriteSets writeSets = new WriteSets(ImmutableList.of(0, 1), + 2 /* ensemble */, 2 /* writeQ */); + for (int i = 0; i < 100; i++) { + ImmutableList writeSet = writeSets.getForEntry(i); + DistributionSchedule.WriteSet distWriteSet = schedule.getWriteSet(i); + assertContentsMatch(writeSet, distWriteSet); + } + + WriteSets writeSets2 = new WriteSets(ImmutableList.of(1, 0), + 2 /* ensemble */, 2 /* writeQ */); + for (int i = 0; i < 100; i++) { + ImmutableList writeSet = writeSets2.getForEntry(i); + DistributionSchedule.WriteSet distWriteSet = schedule.getWriteSet(i); + assertContentsMatch(writeSet, distWriteSet); + } + }; + + @Test + public void testE10W2() throws Exception { + DistributionSchedule schedule = new RoundRobinDistributionSchedule( + 2 /* write */, 2 /* ack */, 10 /* ensemble */); + WriteSets writeSets = new WriteSets(ImmutableList.of(0, 8, 1, 9, 6, 3, 7, 4, 2, 5), + 10 /* ensemble */, + 2 /* writeQ */); + for (int i = 0; i < 100; i++) { + ImmutableList writeSet = writeSets.getForEntry(i); + DistributionSchedule.WriteSet distWriteSet = schedule.getWriteSet(i); + assertContentsMatch(writeSet, distWriteSet); + } + + WriteSets writeSets2 = new WriteSets(ImmutableList.of(7, 5, 1, 6, 3, 0, 8, 9, 4, 2), + 10 /* ensemble */, + 2 /* writeQ */); + for (int i = 0; i < 100; i++) { + ImmutableList writeSet = writeSets2.getForEntry(i); + DistributionSchedule.WriteSet distWriteSet = schedule.getWriteSet(i); + assertContentsMatch(writeSet, distWriteSet); + } + + WriteSets writeSets3 = new WriteSets(ImmutableList.of(0, 1, 2, 3, 4, 5, 6, 7, 8, 9), + 10 /* ensemble */, + 2 /* writeQ */); + for (int i = 0; i < 100; i++) { + ImmutableList writeSet = writeSets3.getForEntry(i); + DistributionSchedule.WriteSet distWriteSet = schedule.getWriteSet(i); + assertContentsMatch(writeSet, distWriteSet); + } + }; + + @Test + public void testManyVariants() throws Exception { + for (int w = 1; w <= 12; w++) { + for (int e = w; e <= 12; e++) { + DistributionSchedule schedule = new RoundRobinDistributionSchedule( + w /* write */, w /* ack */, e /* ensemble */); + + // Create shuffled set of indices + List indices = new ArrayList<>(); + for (int i = 0; i < e; i++) { + indices.add(i); + } + Collections.shuffle(indices); + + WriteSets writeSets = new WriteSets(ImmutableList.copyOf(indices), + e, w); + for (int i = 0; i < 100; i++) { + ImmutableList writeSet = writeSets.getForEntry(i); + DistributionSchedule.WriteSet distWriteSet = schedule.getWriteSet(i); + assertContentsMatch(writeSet, distWriteSet); + } + } + } + } + + @SuppressWarnings("deprecation") + private static void assertContentsMatch(ImmutableList writeSet, + DistributionSchedule.WriteSet distWriteSet) + throws Exception { + log.info("writeSet {} distWriteSet {}", writeSet, distWriteSet.size()); + assertThat(writeSet.size(), equalTo(distWriteSet.size())); + for (Integer i : writeSet) { + assertThat(distWriteSet.contains(i), equalTo(true)); + } + + for (int i = 0; i < distWriteSet.size(); i++) { + assertTrue(writeSet.contains(distWriteSet.get(i))); + } + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/EntryLogTestUtils.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/EntryLogTestUtils.java new file mode 100644 index 00000000000..f88e3883af5 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/EntryLogTestUtils.java @@ -0,0 +1,103 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; + +import com.google.common.util.concurrent.MoreExecutors; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.Unpooled; +import java.io.File; +import java.util.Arrays; +import org.apache.bookkeeper.bookie.DefaultEntryLogger; +import org.apache.bookkeeper.bookie.LedgerDirsManager; +import org.apache.bookkeeper.bookie.storage.directentrylogger.DirectEntryLogger; +import org.apache.bookkeeper.bookie.storage.directentrylogger.EntryLogIdsImpl; +import org.apache.bookkeeper.common.util.nativeio.NativeIOImpl; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.slogger.Slogger; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.util.DiskChecker; + +/** + * EntryLogTestUtils. + */ +public class EntryLogTestUtils { + private static final Slogger slog = Slogger.CONSOLE; + + public static LedgerDirsManager newDirsManager(File... ledgerDir) throws Exception { + return new LedgerDirsManager( + new ServerConfiguration(), ledgerDir, new DiskChecker(0.999f, 0.999f)); + } + + public static EntryLogger newLegacyEntryLogger(int logSizeLimit, File... ledgerDir) throws Exception { + ServerConfiguration conf = new ServerConfiguration(); + conf.setEntryLogSizeLimit(logSizeLimit); + return new DefaultEntryLogger(conf, newDirsManager(ledgerDir), null, + NullStatsLogger.INSTANCE, ByteBufAllocator.DEFAULT); + } + + public static DirectEntryLogger newDirectEntryLogger(int logSizeLimit, File ledgerDir) throws Exception { + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + + return new DirectEntryLogger( + curDir, new EntryLogIdsImpl(newDirsManager(ledgerDir), slog), + new NativeIOImpl(), + ByteBufAllocator.DEFAULT, + MoreExecutors.newDirectExecutorService(), + MoreExecutors.newDirectExecutorService(), + logSizeLimit, // max file size + 10 * 1024 * 1024, // max sane entry size + 1024 * 1024, // total write buffer size + 1024 * 1024, // total read buffer size + 64 * 1024, // read buffer size + 1, // numReadThreads + 300, // max fd cache time in seconds + slog, NullStatsLogger.INSTANCE); + } + + public static int logIdFromLocation(long location) { + return (int) (location >> 32); + } + + public static ByteBuf makeEntry(long ledgerId, long entryId, int size) { + return makeEntry(ledgerId, entryId, size, (byte) 0xdd); + } + + public static ByteBuf makeEntry(long ledgerId, long entryId, int size, byte pattern) { + ByteBuf buf = Unpooled.buffer(size); + buf.writeLong(ledgerId).writeLong(entryId); + byte[] data = new byte[buf.writableBytes()]; + Arrays.fill(data, pattern); + buf.writeBytes(data); + return buf; + } + + public static void assertEntryEquals(ByteBuf e1, ByteBuf e2) throws Exception { + assertThat(e1.readableBytes(), equalTo(e2.readableBytes())); + assertThat(e1, equalTo(e2)); + } + +} + diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/MockEntryLogIds.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/MockEntryLogIds.java new file mode 100644 index 00000000000..2f9e7bca2ec --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/MockEntryLogIds.java @@ -0,0 +1,34 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage; + +import java.util.concurrent.atomic.AtomicInteger; + +/** + * MockEntryLogIds. + */ +public class MockEntryLogIds implements EntryLogIds { + private final AtomicInteger counter = new AtomicInteger(0); + @Override + public int nextId() { + return counter.incrementAndGet(); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/directentrylogger/TestBuffer.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/directentrylogger/TestBuffer.java new file mode 100644 index 00000000000..d9c086cd5c7 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/directentrylogger/TestBuffer.java @@ -0,0 +1,219 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.directentrylogger; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +// CHECKSTYLE.OFF: IllegalImport +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.Unpooled; +import io.netty.util.internal.PlatformDependent; +import java.io.IOException; +import org.apache.bookkeeper.common.util.nativeio.NativeIOImpl; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +// CHECKSTYLE.ON: IllegalImport + +/** + * TestBuffer. + */ +public class TestBuffer { + + @Test + public void testIsAligned() throws Exception { + assertFalse(Buffer.isAligned(1234)); + assertTrue(Buffer.isAligned(4096)); + assertTrue(Buffer.isAligned(40960)); + assertTrue(Buffer.isAligned(1 << 20)); + assertFalse(Buffer.isAligned(-1)); + assertFalse(Buffer.isAligned(Integer.MAX_VALUE)); + assertFalse(Buffer.isAligned(Integer.MIN_VALUE)); + } + + @Test + public void testNextAlignment() throws Exception { + assertEquals(0, Buffer.nextAlignment(0)); + assertEquals(4096, Buffer.nextAlignment(1)); + assertEquals(4096, Buffer.nextAlignment(4096)); + assertEquals(8192, Buffer.nextAlignment(4097)); + assertEquals(0x7FFFF000, Buffer.nextAlignment(0x7FFFF000)); + } + + @Test + public void testNegativePosition() throws Exception { + Assertions.assertThrows(IllegalArgumentException.class, () -> { + Buffer.nextAlignment(-1); + }); + } + + @Test + public void testMaxAlignment() throws Exception { + Assertions.assertThrows(IllegalArgumentException.class, () -> { + Buffer.nextAlignment(Integer.MAX_VALUE); + }); + } + + @Test + public void testCreateUnaligned() throws Exception { + Assertions.assertThrows(IllegalArgumentException.class, () -> { + new Buffer(new NativeIOImpl(), ByteBufAllocator.DEFAULT, 1234); + }); + } + + @Test + public void testWriteInt() throws Exception { + int bufferSize = 1 << 20; + Buffer b = new Buffer(new NativeIOImpl(), ByteBufAllocator.DEFAULT, bufferSize); + assertTrue(b.hasSpace(bufferSize)); + assertEquals(0, b.position()); + b.writeInt(0xdeadbeef); + + + assertEquals((byte) 0xde, PlatformDependent.getByte(b.pointer() + 0)); + assertEquals((byte) 0xad, PlatformDependent.getByte(b.pointer() + 1)); + assertEquals((byte) 0xbe, PlatformDependent.getByte(b.pointer() + 2)); + assertEquals((byte) 0xef, PlatformDependent.getByte(b.pointer() + 3)); + + assertFalse(b.hasSpace(bufferSize)); + assertEquals(Integer.BYTES, b.position()); + + for (int i = 0; i < 10000; i++) { + b.writeInt(i); + } + assertEquals(Integer.BYTES * 10001, b.position()); + assertTrue(b.hasSpace(bufferSize - (Integer.BYTES * 10001))); + assertFalse(b.hasSpace(bufferSize - (Integer.BYTES * 10000))); + + assertEquals(0xdeadbeef, b.readInt(0)); + for (int i = 0; i < 10000; i++) { + assertEquals(i, b.readInt((i + 1) * Integer.BYTES)); + } + b.reset(); + assertTrue(b.hasSpace(bufferSize)); + assertEquals(0, b.position()); + } + + @Test + public void testWriteBuffer() throws Exception { + ByteBuf bb = Unpooled.buffer(1021); + fillByteBuf(bb, 0xdeadbeef); + int bufferSize = 1 << 20; + Buffer b = new Buffer(new NativeIOImpl(), ByteBufAllocator.DEFAULT, bufferSize); + assertEquals(0, b.position()); + b.writeByteBuf(bb); + assertEquals(1021, b.position()); + assertEquals(0, bb.readableBytes()); + bb.clear(); + fillByteBuf(bb, 0xcafecafe); + b.writeByteBuf(bb); + assertEquals(0, bb.readableBytes()); + assertEquals(2042, b.position()); + + bb = Unpooled.buffer(2042); + int ret = b.readByteBuf(bb, 0, 2042); + assertEquals(2042, ret); + for (int i = 0; i < 1020 / Integer.BYTES; i++) { + assertEquals(0xdeadbeef, bb.readInt()); + } + assertEquals((byte) 0xde, bb.readByte()); + for (int i = 0; i < 1020 / Integer.BYTES; i++) { + assertEquals(0xcafecafe, bb.readInt()); + } + } + + @Test + public void testPartialRead() throws Exception { + ByteBuf bb = Unpooled.buffer(5000); + + Buffer b = new Buffer(new NativeIOImpl(), ByteBufAllocator.DEFAULT, 4096); + for (int i = 0; i < 4096 / Integer.BYTES; i++) { + b.writeInt(0xdeadbeef); + } + + int ret = b.readByteBuf(bb, 0, 5000); + assertEquals(4096, ret); + } + + @Test + public void testReadIntAtBoundary() throws Exception { + Buffer b = new Buffer(new NativeIOImpl(), ByteBufAllocator.DEFAULT, 4096); + + for (int i = 0; i < 4096 / Integer.BYTES; i++) { + b.writeInt(0xdeadbeef); + } + assertTrue(b.hasData(4092, Integer.BYTES)); + assertFalse(b.hasData(4093, Integer.BYTES)); + assertFalse(b.hasData(4096, Integer.BYTES)); + + Assertions.assertThrows(IOException.class, () -> b.readInt(4096 - 2)); + } + + @Test + public void testReadLongAtBoundary() throws Exception { + Buffer b = new Buffer(new NativeIOImpl(), ByteBufAllocator.DEFAULT, 4096); + + for (int i = 0; i < 4096 / Integer.BYTES; i++) { + b.writeInt(0xdeadbeef); + } + assertTrue(b.hasData(4088, Long.BYTES)); + assertFalse(b.hasData(4089, Long.BYTES)); + assertFalse(b.hasData(4096, Long.BYTES)); + + Assertions.assertThrows(IOException.class, () -> b.readInt(4096 - 2)); + } + + @Test + public void testPadToAlignment() throws Exception { + Buffer b = new Buffer(new NativeIOImpl(), ByteBufAllocator.DEFAULT, 1 << 23); + + for (int i = 0; i < 1025; i++) { + b.writeInt(0xdededede); + } + int writtenLength = b.padToAlignment(); + + assertEquals(8192, writtenLength); + assertEquals(0xdededede, b.readInt(1024 * Integer.BYTES)); + for (int i = 1025 * Integer.BYTES; i < writtenLength; i += Integer.BYTES) { + assertEquals(0xf0f0f0f0, b.readInt(i)); + } + assertEquals(0, b.readInt(writtenLength)); + } + + @Test + public void testFree() throws Exception { + Buffer b = new Buffer(new NativeIOImpl(), ByteBufAllocator.DEFAULT, 1 << 23); + b.free(); // success if process doesn't explode + b.free(); + } + + static void fillByteBuf(ByteBuf bb, int value) { + while (bb.writableBytes() >= Integer.BYTES) { + bb.writeInt(value); + } + for (int i = 0; i < Integer.BYTES && bb.writableBytes() > 0; i++) { + byte b = (byte) (value >> (Integer.BYTES - i - 1) * 8); + bb.writeByte(b); + } + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/directentrylogger/TestDirectEntryLogger.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/directentrylogger/TestDirectEntryLogger.java new file mode 100644 index 00000000000..56d3927dfe0 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/directentrylogger/TestDirectEntryLogger.java @@ -0,0 +1,521 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.directentrylogger; + +import static org.apache.bookkeeper.bookie.storage.EntryLogTestUtils.assertEntryEquals; +import static org.apache.bookkeeper.bookie.storage.EntryLogTestUtils.logIdFromLocation; +import static org.apache.bookkeeper.bookie.storage.EntryLogTestUtils.makeEntry; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; + +import com.google.common.util.concurrent.MoreExecutors; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.util.ReferenceCountUtil; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.atomic.AtomicInteger; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.EntryLogMetadata; +import org.apache.bookkeeper.bookie.storage.EntryLogger; +import org.apache.bookkeeper.bookie.storage.MockEntryLogIds; +import org.apache.bookkeeper.common.util.nativeio.NativeIOImpl; +import org.apache.bookkeeper.slogger.Slogger; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.test.TmpDirs; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.DisabledOnOs; +import org.junit.jupiter.api.condition.OS; + +/** + * TestDirectEntryLogger. + */ +@Slf4j +@DisabledOnOs(OS.WINDOWS) +public class TestDirectEntryLogger { + private final Slogger slog = Slogger.CONSOLE; + + private static final long ledgerId1 = 1234; + + private final TmpDirs tmpDirs = new TmpDirs(); + + @AfterEach + public void cleanup() throws Exception { + tmpDirs.cleanup(); + } + + @Test + public void testLogRolling() throws Exception { + File ledgerDir = tmpDirs.createNew("logRolling", "ledgers"); + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + + ByteBuf e1 = makeEntry(ledgerId1, 1L, 4000); + ByteBuf e2 = makeEntry(ledgerId1, 2L, 4000); + ByteBuf e3 = makeEntry(ledgerId1, 3L, 4000); + + try (EntryLogger elog = new DirectEntryLogger( + curDir, new MockEntryLogIds(), + new NativeIOImpl(), + ByteBufAllocator.DEFAULT, + MoreExecutors.newDirectExecutorService(), + MoreExecutors.newDirectExecutorService(), + 9000, // max file size (header + size of one entry) + 10 * 1024 * 1024, // max sane entry size + 1024 * 1024, // total write buffer size + 1024 * 1024, // total read buffer size + 64 * 1024, // read buffer size + 1, // numReadThreads + 300, // max fd cache time in seconds + slog, NullStatsLogger.INSTANCE)) { + long loc1 = elog.addEntry(ledgerId1, e1.slice()); + int logId1 = logIdFromLocation(loc1); + assertThat(logId1, equalTo(1)); + + long loc2 = elog.addEntry(ledgerId1, e2.slice()); + int logId2 = logIdFromLocation(loc2); + assertThat(logId2, equalTo(2)); + + long loc3 = elog.addEntry(ledgerId1, e3.slice()); + int logId3 = logIdFromLocation(loc3); + assertThat(logId3, equalTo(3)); + } + } + + @Test + public void testReadLog() throws Exception { + File ledgerDir = tmpDirs.createNew("logRolling", "ledgers"); + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + + ByteBuf e1 = makeEntry(ledgerId1, 1L, 100); + ByteBuf e2 = makeEntry(ledgerId1, 2L, 100); + ByteBuf e3 = makeEntry(ledgerId1, 3L, 100); + + try (EntryLogger elog = new DirectEntryLogger( + curDir, new MockEntryLogIds(), + new NativeIOImpl(), + ByteBufAllocator.DEFAULT, + MoreExecutors.newDirectExecutorService(), + MoreExecutors.newDirectExecutorService(), + 200000, // max file size (header + size of one entry) + 10 * 1024 * 1024, // max sane entry size + 1024 * 1024, // total write buffer size + 1024 * 1024, // total read buffer size + 64 * 1024, // read buffer size + 1, // numReadThreads + 300, // max fd cache time in seconds + slog, NullStatsLogger.INSTANCE)) { + long loc1 = elog.addEntry(ledgerId1, e1.slice()); + long loc2 = elog.addEntry(ledgerId1, e2.slice()); + elog.flush(); + + ByteBuf e1read = elog.readEntry(ledgerId1, 1L, loc1); + ByteBuf e2read = elog.readEntry(ledgerId1, 2L, loc2); + assertEntryEquals(e1read, e1); + assertEntryEquals(e2read, e2); + ReferenceCountUtil.release(e1read); + ReferenceCountUtil.release(e2read); + + long loc3 = elog.addEntry(ledgerId1, e3.slice()); + elog.flush(); + + ByteBuf e3read = elog.readEntry(ledgerId1, 3L, loc3); + assertEntryEquals(e3read, e3); + ReferenceCountUtil.release(e3read); + } + } + + @Test + public void testLogReaderCleanup() throws Exception { + File ledgerDir = tmpDirs.createNew("logRolling", "ledgers"); + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + + final int entrySize = Buffer.ALIGNMENT; + final int maxFileSize = Header.EMPTY_HEADER.length + entrySize; + final int maxCachedReaders = 16; + + AtomicInteger outstandingReaders = new AtomicInteger(0); + EntryLogger elog = new DirectEntryLogger( + curDir, new MockEntryLogIds(), + new NativeIOImpl(), + ByteBufAllocator.DEFAULT, + MoreExecutors.newDirectExecutorService(), + MoreExecutors.newDirectExecutorService(), + maxFileSize, + 10 * 1024 * 1024, // max sane entry size + 1024 * 1024, // total write buffer size + maxCachedReaders * maxFileSize, // total read buffer size + maxFileSize, // read buffer size + 1, // numReadThreads + 300, // max fd cache time in seconds + slog, NullStatsLogger.INSTANCE) { + @Override + LogReader newDirectReader(int logId) throws IOException { + outstandingReaders.incrementAndGet(); + return new DirectReader(logId, logFilename(curDir, logId), ByteBufAllocator.DEFAULT, + new NativeIOImpl(), Buffer.ALIGNMENT, 10 * 1024 * 1024, + NullStatsLogger.INSTANCE.getOpStatsLogger("")) { + @Override + public void close() throws IOException { + super.close(); + outstandingReaders.decrementAndGet(); + } + }; + } + }; + try { + List locations = new ArrayList<>(); + // `+ 1` is not a typo: create one more log file than the max number of o cached readers + for (int i = 0; i < maxCachedReaders + 1; i++) { + ByteBuf e = makeEntry(ledgerId1, i, entrySize); + long loc = elog.addEntry(ledgerId1, e.slice()); + locations.add(loc); + } + elog.flush(); + for (Long loc : locations) { + ReferenceCountUtil.release(elog.readEntry(loc)); + } + assertThat(outstandingReaders.get(), equalTo(maxCachedReaders)); + } finally { + elog.close(); + } + assertThat(outstandingReaders.get(), equalTo(0)); + } + + @Test + public void testReadMetadataAndScan() throws Exception { + File ledgerDir = tmpDirs.createNew("directCanReadAndScanMeta", "ledgers"); + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + + long ledgerId1 = 1L; + long ledgerId2 = 2L; + + ByteBuf e1 = makeEntry(ledgerId1, 1L, 1000); + ByteBuf e2 = makeEntry(ledgerId2, 2L, 2000); + ByteBuf e3 = makeEntry(ledgerId1, 3L, 3000); + + long loc1, loc2, loc3; + try (DirectEntryLogger elog = new DirectEntryLogger( + curDir, new MockEntryLogIds(), + new NativeIOImpl(), + ByteBufAllocator.DEFAULT, + MoreExecutors.newDirectExecutorService(), + MoreExecutors.newDirectExecutorService(), + 2 << 16, // max file size + 10 * 1024 * 1024, // max sane entry size + 1024 * 1024, // total write buffer size + 1024 * 1024, // total read buffer size + 64 * 1024, // read buffer size + 1, // numReadThreads + 300, // max fd cache time in seconds + slog, NullStatsLogger.INSTANCE)) { + loc1 = elog.addEntry(ledgerId1, e1); + loc2 = elog.addEntry(ledgerId2, e2); + loc3 = elog.addEntry(ledgerId1, e3); + } + + try (DirectEntryLogger elog = new DirectEntryLogger( + curDir, new MockEntryLogIds(), + new NativeIOImpl(), + ByteBufAllocator.DEFAULT, + MoreExecutors.newDirectExecutorService(), + MoreExecutors.newDirectExecutorService(), + 2 << 16, // max file size + 10 * 1024 * 1024, // max sane entry size + 1024 * 1024, // total write buffer size + 1024 * 1024, // total read buffer size + 64 * 1024, // read buffer size + 1, // numReadThreads + 300, // max fd cache time in seconds + slog, NullStatsLogger.INSTANCE)) { + int logId = logIdFromLocation(loc1); + assertThat(logId, equalTo(logIdFromLocation(loc2))); + assertThat(logId, equalTo(logIdFromLocation(loc3))); + + EntryLogMetadata metaRead = elog.readEntryLogIndex(logId); + assertThat(metaRead.getEntryLogId(), equalTo((long) logId)); + assertThat(metaRead.getTotalSize(), equalTo(1000L + 2000 + 3000 + (Integer.BYTES * 3))); + assertThat(metaRead.getRemainingSize(), equalTo(metaRead.getTotalSize())); + assertThat(metaRead.getLedgersMap().get(ledgerId1), equalTo(1000L + 3000L + (Integer.BYTES * 2))); + assertThat(metaRead.getLedgersMap().get(ledgerId2), equalTo(2000L + Integer.BYTES)); + + EntryLogMetadata metaScan = elog.scanEntryLogMetadata(logId, null); + assertThat(metaScan.getEntryLogId(), equalTo((long) logId)); + assertThat(metaScan.getTotalSize(), equalTo(1000L + 2000 + 3000 + (Integer.BYTES * 3))); + assertThat(metaScan.getRemainingSize(), equalTo(metaScan.getTotalSize())); + assertThat(metaScan.getLedgersMap().get(ledgerId1), equalTo(1000L + 3000L + (Integer.BYTES * 2))); + assertThat(metaScan.getLedgersMap().get(ledgerId2), equalTo(2000L + Integer.BYTES)); + } + } + + @Test + public void testMetadataFallback() throws Exception { + File ledgerDir = tmpDirs.createNew("directMetaFallback", "ledgers"); + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + + long ledgerId1 = 1L; + long ledgerId2 = 2L; + + ByteBuf e1 = makeEntry(ledgerId1, 1L, 1000); + ByteBuf e2 = makeEntry(ledgerId2, 2L, 2000); + ByteBuf e3 = makeEntry(ledgerId1, 3L, 3000); + + int maxFileSize = 1000 + 2000 + 3000 + (Integer.BYTES * 3) + 4096; + long loc1, loc2, loc3; + try (DirectEntryLogger writer = new DirectEntryLogger( + curDir, new MockEntryLogIds(), + new NativeIOImpl(), + ByteBufAllocator.DEFAULT, + MoreExecutors.newDirectExecutorService(), + MoreExecutors.newDirectExecutorService(), + 2 << 16, // max file size + 10 * 1024 * 1024, // max sane entry size + 1024 * 1024, // total write buffer size + 1024 * 1024, // total read buffer size + 64 * 1024, // read buffer size + 1, // numReadThreads + 300, // max fd cache time in seconds + slog, NullStatsLogger.INSTANCE)) { + loc1 = writer.addEntry(ledgerId1, e1); + loc2 = writer.addEntry(ledgerId2, e2); + loc3 = writer.addEntry(ledgerId1, e3); + writer.flush(); + + try (DirectEntryLogger reader = new DirectEntryLogger( + curDir, new MockEntryLogIds(), + new NativeIOImpl(), + ByteBufAllocator.DEFAULT, + MoreExecutors.newDirectExecutorService(), + MoreExecutors.newDirectExecutorService(), + 2 << 16, // max file size + 10 * 1024 * 1024, // max sane entry size + 1024 * 1024, // total write buffer size + 1024 * 1024, // total read buffer size + 64 * 1024, // read buffer size + 1, // numReadThreads + 300, // max fd cache time in seconds + slog, NullStatsLogger.INSTANCE)) { + int logId = logIdFromLocation(loc1); + try { + reader.readEntryLogIndex(logId); + Assertions.fail("Shouldn't be there"); + } catch (IOException ioe) { + // expected + } + + EntryLogMetadata metaRead = reader.getEntryLogMetadata(logId); // should fail read, fallback to scan + assertThat(metaRead.getEntryLogId(), equalTo((long) logId)); + assertThat(metaRead.getTotalSize(), equalTo(1000L + 2000 + 3000 + (Integer.BYTES * 3))); + assertThat(metaRead.getRemainingSize(), equalTo(metaRead.getTotalSize())); + assertThat(metaRead.getLedgersMap().get(ledgerId1), equalTo(1000L + 3000L + (Integer.BYTES * 2))); + assertThat(metaRead.getLedgersMap().get(ledgerId2), equalTo(2000L + Integer.BYTES)); + } + } + } + + @Test + public void testMetadataManyBatch() throws Exception { + File ledgerDir = tmpDirs.createNew("directMetaManyBatches", "ledgers"); + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + + long lastLoc = -1; + int ledgerCount = 11000; + try (DirectEntryLogger writer = new DirectEntryLogger( + curDir, new MockEntryLogIds(), + new NativeIOImpl(), + ByteBufAllocator.DEFAULT, + MoreExecutors.newDirectExecutorService(), + MoreExecutors.newDirectExecutorService(), + 2 << 24, // max file size + 10 * 1024 * 1024, // max sane entry size + 32 * 1024 * 1024, // total write buffer size + 32 * 1024 * 1024, // total read buffer size + 16 * 1024 * 1024, // read buffer size + 1, // numReadThreads + 300, // max fd cache time in seconds + slog, NullStatsLogger.INSTANCE)) { + for (int i = 0; i < ledgerCount; i++) { + long loc = writer.addEntry(i, makeEntry(i, 1L, 1000)); + if (lastLoc >= 0) { + assertThat(logIdFromLocation(loc), equalTo(logIdFromLocation(lastLoc))); + } + lastLoc = loc; + } + writer.flush(); + } + + try (DirectEntryLogger reader = new DirectEntryLogger( + curDir, new MockEntryLogIds(), + new NativeIOImpl(), + ByteBufAllocator.DEFAULT, + MoreExecutors.newDirectExecutorService(), + MoreExecutors.newDirectExecutorService(), + 2 << 20, // max file size + 10 * 1024 * 1024, // max sane entry size + 32 * 1024 * 1024, // total write buffer size + 32 * 1024 * 1024, // total read buffer size + 16 * 1024 * 1024, // read buffer size + 1, // numReadThreads + 300, // max fd cache time in seconds + slog, NullStatsLogger.INSTANCE)) { + int logId = logIdFromLocation(lastLoc); + EntryLogMetadata metaRead = reader.readEntryLogIndex(logId); + + assertThat(metaRead.getEntryLogId(), equalTo((long) logId)); + assertThat(metaRead.getTotalSize(), equalTo((1000L + Integer.BYTES) * ledgerCount)); + assertThat(metaRead.getRemainingSize(), equalTo(metaRead.getTotalSize())); + for (int i = 0; i < ledgerCount; i++) { + assertThat(metaRead.getLedgersMap().containsKey(i), equalTo(true)); + } + } + } + + @Test + public void testGetFlushedLogs() throws Exception { + File ledgerDir = tmpDirs.createNew("testFlushedLogs", "ledgers"); + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + + ExecutorService executor = Executors.newFixedThreadPool(5); + CompletableFuture blockClose = new CompletableFuture<>(); + NativeIOImpl nativeIO = new NativeIOImpl() { + @Override + public int close(int fd) { + try { + blockClose.join(); + return super.close(fd); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + }; + DirectEntryLogger entryLogger = new DirectEntryLogger( + curDir, new MockEntryLogIds(), + nativeIO, + ByteBufAllocator.DEFAULT, + executor, + executor, + 23000, // max file size + 10 * 1024 * 1024, // max sane entry size + 1024 * 1024, // total write buffer size + 1024 * 1024, // total read buffer size + 32 * 1024, // read buffer size + 1, // numReadThreads + 300, // max fd cache time in seconds + slog, NullStatsLogger.INSTANCE); + try { // not using try-with-resources because close needs to be unblocked in failure + // Add entries. + // Ledger 1 is on first entry log + // Ledger 2 spans first, second and third entry log + // Ledger 3 is on the third entry log (which is still active when extract meta) + long loc1 = entryLogger.addEntry(1L, makeEntry(1L, 1L, 5000)); + long loc2 = entryLogger.addEntry(2L, makeEntry(2L, 1L, 5000)); + assertThat(logIdFromLocation(loc2), equalTo(logIdFromLocation(loc1))); + long loc3 = entryLogger.addEntry(2L, makeEntry(2L, 1L, 15000)); + assertThat(logIdFromLocation(loc3), greaterThan(logIdFromLocation(loc2))); + long loc4 = entryLogger.addEntry(2L, makeEntry(2L, 1L, 15000)); + assertThat(logIdFromLocation(loc4), greaterThan(logIdFromLocation(loc3))); + long loc5 = entryLogger.addEntry(3L, makeEntry(3L, 1L, 1000)); + assertThat(logIdFromLocation(loc5), equalTo(logIdFromLocation(loc4))); + + long logId1 = logIdFromLocation(loc2); + long logId2 = logIdFromLocation(loc3); + long logId3 = logIdFromLocation(loc5); + + // all three should exist + assertThat(entryLogger.logExists(logId1), equalTo(true)); + assertThat(entryLogger.logExists(logId2), equalTo(true)); + assertThat(entryLogger.logExists(logId3), equalTo(true)); + + assertThat(entryLogger.getFlushedLogIds(), empty()); + + blockClose.complete(null); + entryLogger.flush(); + + assertThat(entryLogger.getFlushedLogIds(), containsInAnyOrder(logId1, logId2)); + + long loc6 = entryLogger.addEntry(3L, makeEntry(3L, 1L, 25000)); + assertThat(logIdFromLocation(loc6), greaterThan(logIdFromLocation(loc5))); + entryLogger.flush(); + + assertThat(entryLogger.getFlushedLogIds(), containsInAnyOrder(logId1, logId2, logId3)); + } finally { + blockClose.complete(null); + entryLogger.close(); + executor.shutdownNow(); + } + } + + @Test + public void testBufferSizeNotPageAligned() throws Exception { + File ledgerDir = tmpDirs.createNew("logRolling", "ledgers"); + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + + ByteBuf e1 = makeEntry(ledgerId1, 1L, 4000); + ByteBuf e2 = makeEntry(ledgerId1, 2L, 4000); + ByteBuf e3 = makeEntry(ledgerId1, 3L, 4000); + + try (EntryLogger elog = new DirectEntryLogger( + curDir, new MockEntryLogIds(), + new NativeIOImpl(), + ByteBufAllocator.DEFAULT, + MoreExecutors.newDirectExecutorService(), + MoreExecutors.newDirectExecutorService(), + 9000, // max file size (header + size of one entry) + 10 * 1024 * 1024, // max sane entry size + 128 * 1024 + 500, // total write buffer size + 128 * 1024 + 300, // total read buffer size + 64 * 1024, // read buffer size + 1, // numReadThreads + 300, // max fd cache time in seconds + slog, NullStatsLogger.INSTANCE)) { + long loc1 = elog.addEntry(ledgerId1, e1.slice()); + int logId1 = logIdFromLocation(loc1); + assertThat(logId1, equalTo(1)); + + long loc2 = elog.addEntry(ledgerId1, e2.slice()); + int logId2 = logIdFromLocation(loc2); + assertThat(logId2, equalTo(2)); + + long loc3 = elog.addEntry(ledgerId1, e3.slice()); + int logId3 = logIdFromLocation(loc3); + assertThat(logId3, equalTo(3)); + } + } +} + diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/directentrylogger/TestDirectEntryLoggerCompat.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/directentrylogger/TestDirectEntryLoggerCompat.java new file mode 100644 index 00000000000..92d332c075f --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/directentrylogger/TestDirectEntryLoggerCompat.java @@ -0,0 +1,654 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.directentrylogger; + +import static org.apache.bookkeeper.bookie.storage.EntryLogTestUtils.assertEntryEquals; +import static org.apache.bookkeeper.bookie.storage.EntryLogTestUtils.logIdFromLocation; +import static org.apache.bookkeeper.bookie.storage.EntryLogTestUtils.makeEntry; +import static org.apache.bookkeeper.bookie.storage.EntryLogTestUtils.newLegacyEntryLogger; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.not; + +import com.google.common.util.concurrent.MoreExecutors; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.Unpooled; +import java.io.File; +import java.nio.charset.StandardCharsets; +import org.apache.bookkeeper.bookie.DefaultEntryLogger; +import org.apache.bookkeeper.bookie.EntryLogMetadata; +import org.apache.bookkeeper.bookie.LedgerDirsManager; +import org.apache.bookkeeper.bookie.storage.EntryLogger; +import org.apache.bookkeeper.bookie.storage.MockEntryLogIds; +import org.apache.bookkeeper.common.util.nativeio.NativeIOImpl; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.slogger.Slogger; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.test.TmpDirs; +import org.apache.bookkeeper.util.DiskChecker; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.DisabledOnOs; +import org.junit.jupiter.api.condition.OS; + +/** + * TestDirectEntryLoggerCompat. + */ +@DisabledOnOs(OS.WINDOWS) +public class TestDirectEntryLoggerCompat { + private final Slogger slog = Slogger.CONSOLE; + + private static final long ledgerId1 = 1234; + private static final long ledgerId2 = 4567; + private static final long ledgerId3 = 7890; + + private final TmpDirs tmpDirs = new TmpDirs(); + + @AfterEach + public void cleanup() throws Exception { + tmpDirs.cleanup(); + } + + @Test + public void testLegacyCanReadDirect() throws Exception { + File ledgerDir = tmpDirs.createNew("legacyCanRead", "ledgers"); + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + + ByteBuf e1 = makeEntry(ledgerId1, 1L, 1000); + ByteBuf e2 = makeEntry(ledgerId1, 2L, 1000); + ByteBuf e3 = makeEntry(ledgerId1, 3L, 1000); + + long loc1, loc2, loc3; + try (EntryLogger elog = new DirectEntryLogger( + curDir, new MockEntryLogIds(), + new NativeIOImpl(), + ByteBufAllocator.DEFAULT, + MoreExecutors.newDirectExecutorService(), + MoreExecutors.newDirectExecutorService(), + 10 * 1024 * 1024, // 10MiB, max file size + 10 * 1024 * 1024, // max sane entry size + 1024 * 1024, // total write buffer size + 1024 * 1024, // total read buffer size + 64 * 1024, // read buffer size + 1, // numReadThreads + 300, // max fd cache time in seconds + slog, NullStatsLogger.INSTANCE)) { + loc1 = elog.addEntry(ledgerId1, e1.slice()); + loc2 = elog.addEntry(ledgerId1, e2.slice()); + loc3 = elog.addEntry(ledgerId1, e3.slice()); + } + + try (EntryLogger legacy = newLegacyEntryLogger(2000000, ledgerDir)) { + assertEntryEquals(legacy.readEntry(ledgerId1, 1L, loc1), e1); + assertEntryEquals(legacy.readEntry(ledgerId1, 2L, loc2), e2); + assertEntryEquals(legacy.readEntry(ledgerId1, 3L, loc3), e3); + } + } + + @Test + public void testDirectCanReadLegacy() throws Exception { + File ledgerDir = tmpDirs.createNew("legacyCanRead", "ledgers"); + + ByteBuf e1 = makeEntry(ledgerId1, 1L, 1000); + ByteBuf e2 = makeEntry(ledgerId1, 2L, 1000); + ByteBuf e3 = makeEntry(ledgerId1, 3L, 1000); + + long loc1, loc2, loc3; + try (EntryLogger legacy = newLegacyEntryLogger(2000000, ledgerDir)) { + loc1 = legacy.addEntry(ledgerId1, e1.slice()); + loc2 = legacy.addEntry(ledgerId1, e2.slice()); + loc3 = legacy.addEntry(ledgerId1, e3.slice()); + legacy.flush(); + } + + try (EntryLogger elog = new DirectEntryLogger( + new File(ledgerDir, "current"), new MockEntryLogIds(), + new NativeIOImpl(), + ByteBufAllocator.DEFAULT, + MoreExecutors.newDirectExecutorService(), + MoreExecutors.newDirectExecutorService(), + 10 * 1024 * 1024, // 10MiB, max file size + 10 * 1024 * 1024, // max sane entry size + 1024 * 1024, // total write buffer size + 1024 * 1024, // total read buffer size + 64 * 1024, // read buffer size + 1, // numReadThreads + 300, // max fd cache time in seconds + slog, NullStatsLogger.INSTANCE)) { + assertEntryEquals(elog.readEntry(ledgerId1, 1L, loc1), e1); + assertEntryEquals(elog.readEntry(ledgerId1, 2L, loc2), e2); + assertEntryEquals(elog.readEntry(ledgerId1, 3L, loc3), e3); + } + } + + @Test + public void testLegacyCanReadDirectAfterMultipleRolls() throws Exception { + File ledgerDir = tmpDirs.createNew("legacyCanRead", "ledgers"); + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + + ByteBuf e1 = makeEntry(ledgerId1, 1L, 4000); + ByteBuf e2 = makeEntry(ledgerId1, 2L, 4000); + ByteBuf e3 = makeEntry(ledgerId1, 3L, 4000); + + long loc1, loc2, loc3; + try (EntryLogger elog = new DirectEntryLogger( + curDir, new MockEntryLogIds(), + new NativeIOImpl(), + ByteBufAllocator.DEFAULT, + MoreExecutors.newDirectExecutorService(), + MoreExecutors.newDirectExecutorService(), + 6000, // max file size + 10 * 1024 * 1024, // max sane entry size + 1024 * 1024, // total write buffer size + 1024 * 1024, // total read buffer size + 64 * 1024, // read buffer size + 1, // numReadThreads + 300, // max fd cache time in seconds + slog, NullStatsLogger.INSTANCE)) { + loc1 = elog.addEntry(ledgerId1, e1.slice()); + loc2 = elog.addEntry(ledgerId1, e2.slice()); + loc3 = elog.addEntry(ledgerId1, e3.slice()); + } + + try (EntryLogger legacy = newLegacyEntryLogger(2000000, ledgerDir)) { + assertEntryEquals(legacy.readEntry(ledgerId1, 1L, loc1), e1); + assertEntryEquals(legacy.readEntry(ledgerId1, 2L, loc2), e2); + assertEntryEquals(legacy.readEntry(ledgerId1, 3L, loc3), e3); + } + } + + @Test + public void testLegacyCanReadMetadataOfDirectWithIndexWritten() throws Exception { + File ledgerDir = tmpDirs.createNew("legacyCanReadMeta", "ledgers"); + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + + ByteBuf e1 = makeEntry(ledgerId1, 1L, 1000); + ByteBuf e2 = makeEntry(ledgerId2, 2L, 2000); + ByteBuf e3 = makeEntry(ledgerId1, 3L, 3000); + ByteBuf e4 = makeEntry(ledgerId1, 4L, 4000); + + int maxFileSize = 1000 + 2000 + 3000 + (Integer.BYTES * 3) + 4096; + long loc1, loc2, loc3, loc4; + try (EntryLogger elog = new DirectEntryLogger( + curDir, new MockEntryLogIds(), + new NativeIOImpl(), + ByteBufAllocator.DEFAULT, + MoreExecutors.newDirectExecutorService(), + MoreExecutors.newDirectExecutorService(), + maxFileSize, // max file size + 10 * 1024 * 1024, // max sane entry size + 1024 * 1024, // total write buffer size + 1024 * 1024, // total read buffer size + 64 * 1024, // read buffer size + 1, // numReadThreads + 300, // max fd cache time in seconds + slog, NullStatsLogger.INSTANCE)) { + loc1 = elog.addEntry(ledgerId1, e1); + loc2 = elog.addEntry(ledgerId2, e2); + loc3 = elog.addEntry(ledgerId1, e3); + loc4 = elog.addEntry(ledgerId1, e4); + } + + try (EntryLogger legacy = newLegacyEntryLogger( + maxFileSize, // size of first 3 entries + header + ledgerDir)) { + int logId = logIdFromLocation(loc1); + assertThat(logId, equalTo(logIdFromLocation(loc2))); + assertThat(logId, equalTo(logIdFromLocation(loc3))); + assertThat(logId, not(equalTo(logIdFromLocation(loc4)))); + + EntryLogMetadata meta = legacy.getEntryLogMetadata(logId); + + assertThat(meta.getEntryLogId(), equalTo((long) logId)); + assertThat(meta.getTotalSize(), equalTo(1000L + 2000 + 3000 + (Integer.BYTES * 3))); + assertThat(meta.getRemainingSize(), equalTo(meta.getTotalSize())); + assertThat(meta.getLedgersMap().get(ledgerId1), equalTo(1000L + 3000L + (Integer.BYTES * 2))); + assertThat(meta.getLedgersMap().get(ledgerId2), equalTo(2000L + Integer.BYTES)); + } + } + + @Test + public void testLegacyCanReadMetadataOfDirectWithNoIndexWritten() throws Exception { + File ledgerDir = tmpDirs.createNew("legacyCanReadMeta", "ledgers"); + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + + ByteBuf e1 = makeEntry(ledgerId1, 1L, 1000); + ByteBuf e2 = makeEntry(ledgerId2, 2L, 2000); + ByteBuf e3 = makeEntry(ledgerId1, 3L, 3000); + ByteBuf e4 = makeEntry(ledgerId1, 4L, 4000); + + int maxFileSize = 1000 + 2000 + 3000 + (Integer.BYTES * 3) + 4096; + long loc1, loc2, loc3; + try (EntryLogger elog = new DirectEntryLogger( + curDir, new MockEntryLogIds(), + new NativeIOImpl(), + ByteBufAllocator.DEFAULT, + MoreExecutors.newDirectExecutorService(), + MoreExecutors.newDirectExecutorService(), + maxFileSize * 10, // max file size + 10 * 1024 * 1024, // max sane entry size + 1024 * 1024, // total write buffer size + 1024 * 1024, // total read buffer size + 64 * 1024, // read buffer size + 1, // numReadThreads + 300, // max fd cache time in seconds + slog, NullStatsLogger.INSTANCE)) { + loc1 = elog.addEntry(ledgerId1, e1); + loc2 = elog.addEntry(ledgerId2, e2); + loc3 = elog.addEntry(ledgerId1, e3); + } + + try (EntryLogger legacy = newLegacyEntryLogger( + maxFileSize, // size of first 3 entries + header + ledgerDir)) { + int logId = logIdFromLocation(loc1); + assertThat(logId, equalTo(logIdFromLocation(loc2))); + assertThat(logId, equalTo(logIdFromLocation(loc3))); + EntryLogMetadata meta = legacy.getEntryLogMetadata(logId); + + assertThat(meta.getEntryLogId(), equalTo((long) logId)); + assertThat(meta.getTotalSize(), equalTo(1000L + 2000 + 3000 + (Integer.BYTES * 3))); + assertThat(meta.getRemainingSize(), equalTo(meta.getTotalSize())); + assertThat(meta.getLedgersMap().get(ledgerId1), equalTo(1000L + 3000L + (Integer.BYTES * 2))); + assertThat(meta.getLedgersMap().get(ledgerId2), equalTo(2000L + Integer.BYTES)); + } + } + + @Test + public void testDirectCanReadMetadataAndScanFromLegacy() throws Exception { + File ledgerDir = tmpDirs.createNew("directCanReadLegacyMeta", "ledgers"); + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + + ByteBuf e1 = makeEntry(ledgerId1, 1L, 1000); + ByteBuf e2 = makeEntry(ledgerId2, 2L, 2000); + ByteBuf e3 = makeEntry(ledgerId1, 3L, 3000); + ByteBuf e4 = makeEntry(ledgerId1, 4L, 4000); + + int maxFileSize = 1000 + 2000 + 3000 + (Integer.BYTES * 3) + 4096; + long loc1, loc2, loc3, loc4; + try (EntryLogger legacy = newLegacyEntryLogger( + maxFileSize, // size of first 3 entries + header + ledgerDir)) { + loc1 = legacy.addEntry(ledgerId1, e1); + loc2 = legacy.addEntry(ledgerId2, e2); + loc3 = legacy.addEntry(ledgerId1, e3); + loc4 = legacy.addEntry(ledgerId1, e4); // should force a roll + } + + try (DirectEntryLogger elog = new DirectEntryLogger( + curDir, new MockEntryLogIds(), + new NativeIOImpl(), + ByteBufAllocator.DEFAULT, + MoreExecutors.newDirectExecutorService(), + MoreExecutors.newDirectExecutorService(), + maxFileSize * 10, // max file size + 10 * 1024 * 1024, // max sane entry size + 1024 * 1024, // total write buffer size + 1024 * 1024, // total read buffer size + 64 * 1024, // read buffer size + 1, // numReadThreads + 300, // max fd cache time in seconds + slog, NullStatsLogger.INSTANCE)) { + int logId = logIdFromLocation(loc1); + assertThat(logId, equalTo(logIdFromLocation(loc2))); + assertThat(logId, equalTo(logIdFromLocation(loc3))); + assertThat(logId, not(equalTo(logIdFromLocation(loc4)))); + + EntryLogMetadata metaRead = elog.readEntryLogIndex(logId); + assertThat(metaRead.getEntryLogId(), equalTo((long) logId)); + assertThat(metaRead.getTotalSize(), equalTo(1000L + 2000 + 3000 + (Integer.BYTES * 3))); + assertThat(metaRead.getRemainingSize(), equalTo(metaRead.getTotalSize())); + assertThat(metaRead.getLedgersMap().get(ledgerId1), equalTo(1000L + 3000L + (Integer.BYTES * 2))); + assertThat(metaRead.getLedgersMap().get(ledgerId2), equalTo(2000L + Integer.BYTES)); + + EntryLogMetadata metaScan = elog.scanEntryLogMetadata(logId, null); + assertThat(metaScan.getEntryLogId(), equalTo((long) logId)); + assertThat(metaScan.getTotalSize(), equalTo(1000L + 2000 + 3000 + (Integer.BYTES * 3))); + assertThat(metaScan.getRemainingSize(), equalTo(metaScan.getTotalSize())); + assertThat(metaScan.getLedgersMap().get(ledgerId1), equalTo(1000L + 3000L + (Integer.BYTES * 2))); + assertThat(metaScan.getLedgersMap().get(ledgerId2), equalTo(2000L + Integer.BYTES)); + } + } + + // step1: default is DirectEntryLogger, write entries, read entries + // step2: change DirectEntryLogger to DefaultEntryLogger, write entries, and read all entries both written + // by DirectEntryLogger and DefaultEntryLogger + // step3: change DefaultEntryLogger to DirectEntryLogger, write entries, and read all entries written by + // DirectEntryLogger, DefaultEntryLogger and DirectEntryLogger. + // DirectEntryLogger -> DefaultEntryLogge -> DirectEntryLogger. + @Test + public void testCompatFromDirectToDefaultToDirectLogger() throws Exception { + File ledgerDir = tmpDirs.createNew("entryCompatTest", "ledgers"); + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + MockEntryLogIds entryLogIds = new MockEntryLogIds(); + + ByteBuf e1 = buildEntry(ledgerId1, 1, 1024, "entry-1".getBytes(StandardCharsets.UTF_8)); + ByteBuf e2 = buildEntry(ledgerId1, 2, 1024, "entry-2".getBytes(StandardCharsets.UTF_8)); + ByteBuf e3 = buildEntry(ledgerId1, 3, 1024, "entry-3".getBytes(StandardCharsets.UTF_8)); + ByteBuf e4 = buildEntry(ledgerId1, 4, 1024, "entry-4".getBytes(StandardCharsets.UTF_8)); + ByteBuf e5 = buildEntry(ledgerId1, 5, 1024, "entry-5".getBytes(StandardCharsets.UTF_8)); + ByteBuf e6 = buildEntry(ledgerId1, 6, 1024, "entry-6".getBytes(StandardCharsets.UTF_8)); + ByteBuf e7 = buildEntry(ledgerId1, 7, 1024, "entry-7".getBytes(StandardCharsets.UTF_8)); + + long loc1, loc2, loc3, loc4, loc5, loc6, loc7; + + // write entry into DirectEntryLogger + try (EntryLogger elog = new DirectEntryLogger( + curDir, entryLogIds, + new NativeIOImpl(), + ByteBufAllocator.DEFAULT, + MoreExecutors.newDirectExecutorService(), + MoreExecutors.newDirectExecutorService(), + 9000, // max file size (header + size of one entry) + 10 * 1024 * 1024, // max sane entry size + 1024 * 1024, // total write buffer size + 1024 * 1024, // total read buffer size + 64 * 1024, // read buffer size + 1, // numReadThreads + 300, // max fd cache time in seconds + slog, NullStatsLogger.INSTANCE)) { + loc1 = elog.addEntry(ledgerId1, e1.slice()); + loc2 = elog.addEntry(ledgerId1, e2.slice()); + loc3 = elog.addEntry(ledgerId1, e3.slice()); + elog.flush(); + + ByteBuf entry1 = elog.readEntry(ledgerId1, 1, loc1); + ByteBuf entry2 = elog.readEntry(ledgerId1, 2, loc2); + ByteBuf entry3 = elog.readEntry(ledgerId1, 3, loc3); + + assertEntryEquals(entry1, e1); + assertEntryEquals(entry2, e2); + assertEntryEquals(entry3, e3); + + entry1.release(); + entry2.release(); + entry3.release(); + } + + // read entry from DefaultEntryLogger + ServerConfiguration conf = new ServerConfiguration(); + LedgerDirsManager dirsMgr = new LedgerDirsManager( + conf, + new File[] { ledgerDir }, + new DiskChecker( + conf.getDiskUsageThreshold(), + conf.getDiskUsageWarnThreshold())); + EntryLogger entryLogger = new DefaultEntryLogger(conf, dirsMgr); + loc4 = entryLogger.addEntry(ledgerId1, e4.slice()); + loc5 = entryLogger.addEntry(ledgerId1, e5.slice()); + entryLogger.flush(); + + ByteBuf entry1 = entryLogger.readEntry(ledgerId1, 1, loc1); + ByteBuf entry2 = entryLogger.readEntry(ledgerId1, 2, loc2); + ByteBuf entry3 = entryLogger.readEntry(ledgerId1, 3, loc3); + ByteBuf entry4 = entryLogger.readEntry(ledgerId1, 4, loc4); + ByteBuf entry5 = entryLogger.readEntry(ledgerId1, 5, loc5); + + assertEntryEquals(entry1, e1); + assertEntryEquals(entry2, e2); + assertEntryEquals(entry3, e3); + assertEntryEquals(entry4, e4); + assertEntryEquals(entry5, e5); + + entry1.release(); + entry2.release(); + entry3.release(); + entry4.release(); + entry5.release(); + + // use DirectEntryLogger to read entries written by both DirectEntryLogger and DefaultEntryLogger + entryLogIds.nextId(); + try (EntryLogger elog = new DirectEntryLogger( + curDir, entryLogIds, + new NativeIOImpl(), + ByteBufAllocator.DEFAULT, + MoreExecutors.newDirectExecutorService(), + MoreExecutors.newDirectExecutorService(), + 9000, // max file size (header + size of one entry) + 10 * 1024 * 1024, // max sane entry size + 1024 * 1024, // total write buffer size + 1024 * 1024, // total read buffer size + 64 * 1024, // read buffer size + 1, // numReadThreads + 300, // max fd cache time in seconds + slog, NullStatsLogger.INSTANCE)) { + loc6 = elog.addEntry(ledgerId1, e6.slice()); + loc7 = elog.addEntry(ledgerId1, e7.slice()); + elog.flush(); + + entry1 = elog.readEntry(ledgerId1, 1, loc1); + entry2 = elog.readEntry(ledgerId1, 2, loc2); + entry3 = elog.readEntry(ledgerId1, 3, loc3); + entry4 = elog.readEntry(ledgerId1, 4, loc4); + entry5 = elog.readEntry(ledgerId1, 5, loc5); + ByteBuf entry6 = elog.readEntry(ledgerId1, 6, loc6); + ByteBuf entry7 = elog.readEntry(ledgerId1, 7, loc7); + + assertEntryEquals(entry1, e1); + assertEntryEquals(entry2, e2); + assertEntryEquals(entry3, e3); + assertEntryEquals(entry4, e4); + assertEntryEquals(entry5, e5); + assertEntryEquals(entry6, e6); + assertEntryEquals(entry7, e7); + + entry1.release(); + entry2.release(); + entry3.release(); + entry4.release(); + entry5.release(); + entry6.release(); + entry7.release(); + } + + ledgerDir.deleteOnExit(); + + } + + // step1: default is DefaultEntryLogger, write entries and read entries. + // step2: change DefaultEntryLogger to DirectEntryLogger, write entries, and read all entries both writer + // by DefaultEntryLogger and DirectEntryLogger + // step3: change DirectEntryLogger to DefaultEntryLogger, write entries, and read all entries both written + // by DirectEntryLogger and DefaultEntryLogger + // step4: change DefaultEntryLogger to DirectEntryLogger, write entries, and read all entries written by + // DirectEntryLogger, DefaultEntryLogger and DirectEntryLogger. + // DefaultEntryLogger -> DirectEntryLogger -> DefaultEntryLogger -> DirectEntryLogger. + @Test + public void testCompatFromDefaultToDirectToDefaultToDirectLogger() throws Exception { + File ledgerDir = tmpDirs.createNew("entryCompatTest", "ledgers"); + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + MockEntryLogIds entryLogIds = new MockEntryLogIds(); + + ByteBuf e1 = buildEntry(ledgerId1, 1, 1024, "entry-1".getBytes(StandardCharsets.UTF_8)); + ByteBuf e2 = buildEntry(ledgerId1, 2, 1024, "entry-2".getBytes(StandardCharsets.UTF_8)); + ByteBuf e3 = buildEntry(ledgerId1, 3, 1024, "entry-3".getBytes(StandardCharsets.UTF_8)); + ByteBuf e4 = buildEntry(ledgerId1, 4, 1024, "entry-4".getBytes(StandardCharsets.UTF_8)); + ByteBuf e5 = buildEntry(ledgerId1, 5, 1024, "entry-5".getBytes(StandardCharsets.UTF_8)); + ByteBuf e6 = buildEntry(ledgerId1, 6, 1024, "entry-6".getBytes(StandardCharsets.UTF_8)); + ByteBuf e7 = buildEntry(ledgerId1, 7, 1024, "entry-7".getBytes(StandardCharsets.UTF_8)); + ByteBuf e8 = buildEntry(ledgerId1, 8, 1024, "entry-8".getBytes(StandardCharsets.UTF_8)); + ByteBuf e9 = buildEntry(ledgerId1, 9, 1024, "entry-9".getBytes(StandardCharsets.UTF_8)); + + long loc1, loc2, loc3, loc4, loc5, loc6, loc7, loc8, loc9; + + // write e1 and e2 using DefaultEntryLogger + ServerConfiguration conf = new ServerConfiguration(); + LedgerDirsManager dirsMgr = new LedgerDirsManager( + conf, + new File[] { ledgerDir }, + new DiskChecker( + conf.getDiskUsageThreshold(), + conf.getDiskUsageWarnThreshold())); + EntryLogger entryLogger = new DefaultEntryLogger(conf, dirsMgr); + loc1 = entryLogger.addEntry(ledgerId1, e1.slice()); + loc2 = entryLogger.addEntry(ledgerId1, e2.slice()); + entryLogger.flush(); + + ByteBuf entry1 = entryLogger.readEntry(ledgerId1, 1, loc1); + ByteBuf entry2 = entryLogger.readEntry(ledgerId1, 2, loc2); + + assertEntryEquals(entry1, e1); + assertEntryEquals(entry2, e2); + + entry1.release(); + entry2.release(); + + // write e3, e4 and e5 using DirectEntryLogger and read all entries. + entryLogIds.nextId(); + try (EntryLogger elog = new DirectEntryLogger( + curDir, entryLogIds, + new NativeIOImpl(), + ByteBufAllocator.DEFAULT, + MoreExecutors.newDirectExecutorService(), + MoreExecutors.newDirectExecutorService(), + 9000, // max file size (header + size of one entry) + 10 * 1024 * 1024, // max sane entry size + 1024 * 1024, // total write buffer size + 1024 * 1024, // total read buffer size + 64 * 1024, // read buffer size + 1, // numReadThreads + 300, // max fd cache time in seconds + slog, NullStatsLogger.INSTANCE)) { + loc3 = elog.addEntry(ledgerId1, e3.slice()); + loc4 = elog.addEntry(ledgerId1, e4.slice()); + loc5 = elog.addEntry(ledgerId1, e5.slice()); + elog.flush(); + + entry1 = elog.readEntry(ledgerId1, 1, loc1); + entry2 = elog.readEntry(ledgerId1, 2, loc2); + ByteBuf entry3 = elog.readEntry(ledgerId1, 3, loc3); + ByteBuf entry4 = elog.readEntry(ledgerId1, 4, loc4); + ByteBuf entry5 = elog.readEntry(ledgerId1, 5, loc5); + + assertEntryEquals(entry1, e1); + assertEntryEquals(entry2, e2); + assertEntryEquals(entry3, e3); + assertEntryEquals(entry4, e4); + assertEntryEquals(entry5, e5); + + entry1.release(); + entry2.release(); + entry3.release(); + entry4.release(); + entry5.release(); + } + + // write e6 and e7 using DefaultEntryLogger and read all entries + entryLogger = new DefaultEntryLogger(conf, dirsMgr); + loc6 = entryLogger.addEntry(ledgerId1, e6.slice()); + loc7 = entryLogger.addEntry(ledgerId1, e7.slice()); + entryLogger.flush(); + + entry1 = entryLogger.readEntry(ledgerId1, 1, loc1); + entry2 = entryLogger.readEntry(ledgerId1, 2, loc2); + ByteBuf entry3 = entryLogger.readEntry(ledgerId1, 3, loc3); + ByteBuf entry4 = entryLogger.readEntry(ledgerId1, 4, loc4); + ByteBuf entry5 = entryLogger.readEntry(ledgerId1, 5, loc5); + ByteBuf entry6 = entryLogger.readEntry(ledgerId1, 6, loc6); + ByteBuf entry7 = entryLogger.readEntry(ledgerId1, 7, loc7); + + assertEntryEquals(entry1, e1); + assertEntryEquals(entry2, e2); + assertEntryEquals(entry3, e3); + assertEntryEquals(entry4, e4); + assertEntryEquals(entry5, e5); + assertEntryEquals(entry6, e6); + assertEntryEquals(entry7, e7); + + entry1.release(); + entry2.release(); + entry3.release(); + entry4.release(); + entry5.release(); + entry6.release(); + entry7.release(); + + // use DirectEntryLogger to read entries written by both DirectEntryLogger and DefaultEntryLogger + entryLogIds.nextId(); + try (EntryLogger elog = new DirectEntryLogger( + curDir, entryLogIds, + new NativeIOImpl(), + ByteBufAllocator.DEFAULT, + MoreExecutors.newDirectExecutorService(), + MoreExecutors.newDirectExecutorService(), + 9000, // max file size (header + size of one entry) + 10 * 1024 * 1024, // max sane entry size + 1024 * 1024, // total write buffer size + 1024 * 1024, // total read buffer size + 64 * 1024, // read buffer size + 1, // numReadThreads + 300, // max fd cache time in seconds + slog, NullStatsLogger.INSTANCE)) { + loc8 = elog.addEntry(ledgerId1, e8.slice()); + loc9 = elog.addEntry(ledgerId1, e9.slice()); + elog.flush(); + + entry1 = elog.readEntry(ledgerId1, 1, loc1); + entry2 = elog.readEntry(ledgerId1, 2, loc2); + entry3 = elog.readEntry(ledgerId1, 3, loc3); + entry4 = elog.readEntry(ledgerId1, 4, loc4); + entry5 = elog.readEntry(ledgerId1, 5, loc5); + entry6 = elog.readEntry(ledgerId1, 6, loc6); + entry7 = elog.readEntry(ledgerId1, 7, loc7); + ByteBuf entry8 = elog.readEntry(ledgerId1, 8, loc8); + ByteBuf entry9 = elog.readEntry(ledgerId1, 9, loc9); + + assertEntryEquals(entry1, e1); + assertEntryEquals(entry2, e2); + assertEntryEquals(entry3, e3); + assertEntryEquals(entry4, e4); + assertEntryEquals(entry5, e5); + assertEntryEquals(entry6, e6); + assertEntryEquals(entry7, e7); + assertEntryEquals(entry8, e8); + assertEntryEquals(entry9, e9); + + entry1.release(); + entry2.release(); + entry3.release(); + entry4.release(); + entry5.release(); + entry6.release(); + entry7.release(); + entry8.release(); + entry9.release(); + } + + ledgerDir.deleteOnExit(); + } + + private ByteBuf buildEntry(long ledgerId, long entryId, int size, byte[] bytes) { + ByteBuf entry = Unpooled.buffer(size); + entry.writeLong(ledgerId); // ledger id + entry.writeLong(entryId); // entry id + entry.writeBytes(bytes); + return entry; + } +} + diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/directentrylogger/TestDirectReader.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/directentrylogger/TestDirectReader.java new file mode 100644 index 00000000000..03bd276e127 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/directentrylogger/TestDirectReader.java @@ -0,0 +1,515 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.directentrylogger; + +import static org.apache.bookkeeper.bookie.storage.directentrylogger.DirectEntryLogger.logFilename; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; + +import com.google.common.util.concurrent.MoreExecutors; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.Unpooled; +import java.io.EOFException; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import org.apache.bookkeeper.common.util.nativeio.NativeIOException; +import org.apache.bookkeeper.common.util.nativeio.NativeIOImpl; +import org.apache.bookkeeper.slogger.Slogger; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.stats.OpStatsLogger; +import org.apache.bookkeeper.test.TmpDirs; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.DisabledOnOs; +import org.junit.jupiter.api.condition.OS; + + +/** + * TestDirectReader. + */ +@DisabledOnOs(OS.WINDOWS) +public class TestDirectReader { + + private final TmpDirs tmpDirs = new TmpDirs(); + private final ExecutorService writeExecutor = Executors.newSingleThreadExecutor(); + private final OpStatsLogger opLogger = NullStatsLogger.INSTANCE.getOpStatsLogger("null"); + + @AfterEach + public void cleanup() throws Exception { + tmpDirs.cleanup(); + writeExecutor.shutdownNow(); + } + + @Test + public void testReadInt() throws Exception { + File ledgerDir = tmpDirs.createNew("readInt", "logs"); + + writeFileWithPattern(ledgerDir, 1234, 0xdeadbeef, 0, 1 << 20); + + try (LogReader reader = new DirectReader(1234, logFilename(ledgerDir, 1234), + ByteBufAllocator.DEFAULT, + new NativeIOImpl(), Buffer.ALIGNMENT, + 1 << 20, opLogger)) { + assertThat(reader.readIntAt(0), equalTo(0xdeadbeef)); + assertThat(reader.readIntAt(2), equalTo(0xbeefdead)); + assertThat(reader.readIntAt(1024), equalTo(0xdeadbeef)); + assertThat(reader.readIntAt(1025), equalTo(0xadbeefde)); + } + } + + @Test + public void testReadIntAcrossBoundary() throws Exception { + File ledgerDir = tmpDirs.createNew("readInt", "logs"); + + writeFileWithPattern(ledgerDir, 1234, 0xdeadbeef, 0, 1 << 20); + + try (LogReader reader = new DirectReader(1234, logFilename(ledgerDir, 1234), + ByteBufAllocator.DEFAULT, + new NativeIOImpl(), Buffer.ALIGNMENT, + 1 << 20, opLogger)) { + assertThat(reader.readIntAt(Buffer.ALIGNMENT - 2), equalTo(0xbeefdead)); + } + } + + @Test + public void testReadLong() throws Exception { + File ledgerDir = tmpDirs.createNew("readLong", "logs"); + + writeFileWithPattern(ledgerDir, 1234, 0xbeefcafe, 0, 1 << 20); + + try (LogReader reader = new DirectReader(1234, logFilename(ledgerDir, 1234), + ByteBufAllocator.DEFAULT, + new NativeIOImpl(), Buffer.ALIGNMENT, + 1 << 20, opLogger)) { + assertThat(reader.readLongAt(0), equalTo(0xbeefcafebeefcafeL)); + assertThat(reader.readLongAt(2), equalTo(0xcafebeefcafebeefL)); + assertThat(reader.readLongAt(1024), equalTo(0xbeefcafebeefcafeL)); + assertThat(reader.readLongAt(1025), equalTo(0xefcafebeefcafebeL)); + } + } + + @Test + public void testReadLongAcrossBoundary() throws Exception { + File ledgerDir = tmpDirs.createNew("readLong", "logs"); + + writeFileWithPattern(ledgerDir, 1234, 0xbeefcafe, 0, 1 << 20); + + try (LogReader reader = new DirectReader(1234, logFilename(ledgerDir, 1234), + ByteBufAllocator.DEFAULT, + new NativeIOImpl(), Buffer.ALIGNMENT, + 1 << 20, opLogger)) { + assertThat(reader.readLongAt(0), equalTo(0xbeefcafebeefcafeL)); + assertThat(reader.readLongAt(2), equalTo(0xcafebeefcafebeefL)); + assertThat(reader.readLongAt(1024), equalTo(0xbeefcafebeefcafeL)); + assertThat(reader.readLongAt(1025), equalTo(0xefcafebeefcafebeL)); + } + } + + @Test + public void testReadBuffer() throws Exception { + File ledgerDir = tmpDirs.createNew("readBuffer", "logs"); + + writeFileWithPattern(ledgerDir, 1234, 0xbeefcafe, 1, 1 << 20); + + try (LogReader reader = new DirectReader(1234, logFilename(ledgerDir, 1234), + ByteBufAllocator.DEFAULT, + new NativeIOImpl(), Buffer.ALIGNMENT * 4, + 1 << 20, opLogger)) { + ByteBuf bb = reader.readBufferAt(0, Buffer.ALIGNMENT * 2); + try { + for (int j = 0; j < Buffer.ALIGNMENT / Integer.BYTES; j++) { + assertThat(bb.readInt(), equalTo(0xbeefcafe)); + } + for (int i = 0; i < Buffer.ALIGNMENT / Integer.BYTES; i++) { + assertThat(bb.readInt(), equalTo(0xbeefcaff)); + } + assertThat(bb.readableBytes(), equalTo(0)); + } finally { + bb.release(); + } + + bb = reader.readBufferAt(Buffer.ALIGNMENT * 8, Buffer.ALIGNMENT); + try { + for (int j = 0; j < Buffer.ALIGNMENT / Integer.BYTES; j++) { + assertThat(bb.readInt(), equalTo(0xbeefcb06)); + } + assertThat(bb.readableBytes(), equalTo(0)); + } finally { + bb.release(); + } + + bb = reader.readBufferAt(Buffer.ALIGNMENT * 10 + 123, 345); + try { + assertThat(bb.readByte(), equalTo((byte) 0x08)); + for (int j = 0; j < 344 / Integer.BYTES; j++) { + assertThat(bb.readInt(), equalTo(0xbeefcb08)); + } + assertThat(bb.readableBytes(), equalTo(0)); + } finally { + bb.release(); + } + + } + } + + @Test + public void testReadBufferAcrossBoundary() throws Exception { + File ledgerDir = tmpDirs.createNew("readBuffer", "logs"); + + writeFileWithPattern(ledgerDir, 1234, 0xbeefcafe, 1, 1 << 20); + BufferPool buffers = new BufferPool(new NativeIOImpl(), ByteBufAllocator.DEFAULT, Buffer.ALIGNMENT * 4, 8); + + try (LogReader reader = new DirectReader(1234, logFilename(ledgerDir, 1234), + ByteBufAllocator.DEFAULT, + new NativeIOImpl(), Buffer.ALIGNMENT * 4, + 1 << 20, opLogger)) { + ByteBuf bb = reader.readBufferAt((long) (Buffer.ALIGNMENT * 3.5), Buffer.ALIGNMENT); + try { + for (int j = 0; j < (Buffer.ALIGNMENT / Integer.BYTES) / 2; j++) { + assertThat(bb.readInt(), equalTo(0xbeefcb01)); + } + for (int i = 0; i < (Buffer.ALIGNMENT / Integer.BYTES) / 2; i++) { + assertThat(bb.readInt(), equalTo(0xbeefcb02)); + } + assertThat(bb.readableBytes(), equalTo(0)); + } finally { + bb.release(); + } + } + } + + @Test + public void testReadBufferBiggerThanReaderBuffer() throws Exception { + File ledgerDir = tmpDirs.createNew("readBuffer", "logs"); + + writeFileWithPattern(ledgerDir, 1234, 0xbeefcafe, 1, 1 << 20); + + // buffer size is ALIGNMENT, read will be ALIGNMENT*2 + try (LogReader reader = new DirectReader(1234, logFilename(ledgerDir, 1234), + ByteBufAllocator.DEFAULT, + new NativeIOImpl(), Buffer.ALIGNMENT, + 1 << 20, opLogger)) { + ByteBuf bb = reader.readBufferAt(0, Buffer.ALIGNMENT * 2); + try { + for (int j = 0; j < Buffer.ALIGNMENT / Integer.BYTES; j++) { + assertThat(bb.readInt(), equalTo(0xbeefcafe)); + } + for (int i = 0; i < Buffer.ALIGNMENT / Integer.BYTES; i++) { + assertThat(bb.readInt(), equalTo(0xbeefcaff)); + } + assertThat(bb.readableBytes(), equalTo(0)); + } finally { + bb.release(); + } + } + } + + @Test + public void testReadPastEndOfFile() throws Exception { + File ledgerDir = tmpDirs.createNew("readBuffer", "logs"); + + writeFileWithPattern(ledgerDir, 1234, 0xbeeeeeef, 1, 1 << 13); + Assertions.assertThrows(EOFException.class, () -> { + try (LogReader reader = new DirectReader(1234, logFilename(ledgerDir, 1234), + ByteBufAllocator.DEFAULT, + new NativeIOImpl(), Buffer.ALIGNMENT, + 1 << 20, opLogger)) { + reader.readBufferAt(1 << 13, Buffer.ALIGNMENT); + } + }); + } + + @Test + public void testReadPastEndOfFilePartial() throws Exception { + File ledgerDir = tmpDirs.createNew("readBuffer", "logs"); + + writeFileWithPattern(ledgerDir, 1234, 0xbeeeeeef, 1, 1 << 13); + Assertions.assertThrows(EOFException.class, () -> { + try (LogReader reader = new DirectReader(1234, logFilename(ledgerDir, 1234), + ByteBufAllocator.DEFAULT, + new NativeIOImpl(), Buffer.ALIGNMENT, + 1 << 20, opLogger)) { + reader.readBufferAt((1 << 13) - Buffer.ALIGNMENT / 2, Buffer.ALIGNMENT); + } + }); + } + + @Test + public void testReadEntries() throws Exception { + File ledgerDir = tmpDirs.createNew("readEntries", "logs"); + + int entrySize = Buffer.ALIGNMENT / 4 + 100; + Map offset2Pattern = new HashMap<>(); + try (BufferPool buffers = new BufferPool(new NativeIOImpl(), ByteBufAllocator.DEFAULT, Buffer.ALIGNMENT, 8); + LogWriter writer = new DirectWriter(1234, logFilename(ledgerDir, 1234), + 1 << 20, MoreExecutors.newDirectExecutorService(), + buffers, new NativeIOImpl(), Slogger.CONSOLE)) { + for (int i = 0; i < 1000; i++) { + ByteBuf bb = Unpooled.buffer(entrySize); + int pattern = 0xbeef + i; + TestBuffer.fillByteBuf(bb, pattern); + int offset = writer.writeDelimited(bb); + offset2Pattern.put(offset, pattern); + } + } + + try (LogReader reader = new DirectReader(1234, logFilename(ledgerDir, 1234), + ByteBufAllocator.DEFAULT, + new NativeIOImpl(), Buffer.ALIGNMENT, + 1 << 20, opLogger)) { + List> offset2PatternList = + new ArrayList>(offset2Pattern.entrySet()); + Collections.shuffle(offset2PatternList); + + for (Map.Entry e : offset2PatternList) { + ByteBuf entry = reader.readEntryAt(e.getKey()); + try { + assertThat(entry.readableBytes(), equalTo(entrySize)); + while (entry.isReadable()) { + assertThat(entry.readInt(), equalTo(e.getValue())); + } + } finally { + entry.release(); + } + } + } + } + + @Test + public void testReadFromFileBeingWrittenNoPreallocation() throws Exception { + File ledgerDir = tmpDirs.createNew("readWhileWriting", "logs"); + + int entrySize = Buffer.ALIGNMENT / 2 + 8; + NativeIOImpl nativeIO = new NativeIOImpl() { + @Override + public int fallocate(int fd, int mode, long offset, long len) + throws NativeIOException { + return 0; + } + }; + try (BufferPool buffers = new BufferPool(new NativeIOImpl(), ByteBufAllocator.DEFAULT, Buffer.ALIGNMENT, 8); + LogWriter writer = new DirectWriter(1234, logFilename(ledgerDir, 1234), + 1 << 20, MoreExecutors.newDirectExecutorService(), + buffers, new NativeIOImpl(), Slogger.CONSOLE); + LogReader reader = new DirectReader(1234, logFilename(ledgerDir, 1234), + ByteBufAllocator.DEFAULT, + new NativeIOImpl(), Buffer.ALIGNMENT, + 1 << 20, opLogger)) { + ByteBuf b2 = Unpooled.buffer(entrySize); + TestBuffer.fillByteBuf(b2, 0xfede); + int offset = writer.writeDelimited(b2); + + try { + reader.readEntryAt(offset); + Assertions.fail("Should have failed"); + } catch (IOException ioe) { + // expected + } + writer.flush(); + + ByteBuf bbread = reader.readEntryAt(offset); + try { + assertThat(bbread.readableBytes(), equalTo(entrySize)); + while (bbread.isReadable()) { + assertThat(bbread.readInt(), equalTo(0xfede)); + } + } finally { + bbread.release(); + } + } + } + + @Test + public void testReadFromFileBeingWrittenReadInPreallocated() throws Exception { + File ledgerDir = tmpDirs.createNew("readWhileWriting", "logs"); + + int entrySize = Buffer.ALIGNMENT / 2 + 8; + + try (BufferPool buffers = new BufferPool(new NativeIOImpl(), ByteBufAllocator.DEFAULT, Buffer.ALIGNMENT, 8); + LogWriter writer = new DirectWriter(1234, logFilename(ledgerDir, 1234), + 1 << 20, MoreExecutors.newDirectExecutorService(), + buffers, new NativeIOImpl(), Slogger.CONSOLE); + LogReader reader = new DirectReader(1234, logFilename(ledgerDir, 1234), + ByteBufAllocator.DEFAULT, + new NativeIOImpl(), Buffer.ALIGNMENT, + 1 << 20, opLogger)) { + ByteBuf bb = Unpooled.buffer(entrySize); + TestBuffer.fillByteBuf(bb, 0xfeed); + int offset = writer.writeDelimited(bb); + + try { + reader.readEntryAt(offset); + Assertions.fail("Should have failed"); + } catch (IOException ioe) { + // expected + } + writer.flush(); + ByteBuf bbread = reader.readEntryAt(offset); + try { + assertThat(bbread.readableBytes(), equalTo(entrySize)); + while (bbread.isReadable()) { + assertThat(bbread.readInt(), equalTo(0xfeed)); + } + } finally { + bbread.release(); + } + } + } + + @Test + public void testPartialRead() throws Exception { + File ledgerDir = tmpDirs.createNew("partialRead", "logs"); + + int entrySize = Buffer.ALIGNMENT * 4; + + NativeIOImpl nativeIO = new NativeIOImpl() { + @Override + public long pread(int fd, long buf, long size, long offset) throws NativeIOException { + long read = super.pread(fd, buf, size, offset); + return Math.min(read, Buffer.ALIGNMENT); // force only less than a buffer read + } + + @Override + public int fallocate(int fd, int mode, long offset, long len) + throws NativeIOException { + return 0; // don't preallocate + } + }; + try (BufferPool buffers = new BufferPool(new NativeIOImpl(), + ByteBufAllocator.DEFAULT, Buffer.ALIGNMENT * 10, 8); + LogWriter writer = new DirectWriter(1234, logFilename(ledgerDir, 1234), 1 << 20, + MoreExecutors.newDirectExecutorService(), + buffers, new NativeIOImpl(), Slogger.CONSOLE)) { + ByteBuf b1 = Unpooled.buffer(entrySize); + TestBuffer.fillByteBuf(b1, 0xfeedfeed); + int offset1 = writer.writeDelimited(b1); + + ByteBuf b2 = Unpooled.buffer(entrySize); + TestBuffer.fillByteBuf(b2, 0xfedefede); + int offset2 = writer.writeDelimited(b2); + writer.flush(); + + try (LogReader reader = new DirectReader(1234, logFilename(ledgerDir, 1234), + ByteBufAllocator.DEFAULT, + nativeIO, Buffer.ALIGNMENT * 3, + 1 << 20, opLogger)) { + ByteBuf bbread = reader.readEntryAt(offset1); + try { + assertThat(bbread.readableBytes(), equalTo(entrySize)); + while (bbread.readableBytes() >= Integer.BYTES) { + assertThat(bbread.readInt(), equalTo(0xfeedfeed)); + } + assertThat(bbread.readableBytes(), equalTo(0)); + } finally { + bbread.release(); + } + + bbread = reader.readEntryAt(offset2); + try { + assertThat(bbread.readableBytes(), equalTo(entrySize)); + while (bbread.readableBytes() >= Integer.BYTES) { + assertThat(bbread.readInt(), equalTo(0xfedefede)); + } + assertThat(bbread.readableBytes(), equalTo(0)); + } finally { + bbread.release(); + } + } + } + } + + @Test + public void testLargeEntry() throws Exception { + File ledgerDir = tmpDirs.createNew("largeEntries", "logs"); + + int entrySize = Buffer.ALIGNMENT * 4; + + int offset1, offset2; + try (BufferPool buffers = new BufferPool(new NativeIOImpl(), ByteBufAllocator.DEFAULT, Buffer.ALIGNMENT * 8, 8); + LogWriter writer = new DirectWriter(1234, logFilename(ledgerDir, 1234), 1 << 20, + MoreExecutors.newDirectExecutorService(), buffers, new NativeIOImpl(), + Slogger.CONSOLE)) { + ByteBuf b1 = Unpooled.buffer(entrySize); + TestBuffer.fillByteBuf(b1, 0xfeedfeed); + offset1 = writer.writeDelimited(b1); + + ByteBuf b2 = Unpooled.buffer(entrySize); + TestBuffer.fillByteBuf(b2, 0xfedefede); + offset2 = writer.writeDelimited(b2); + writer.flush(); + } + + try (LogReader reader = new DirectReader(1234, logFilename(ledgerDir, 1234), + ByteBufAllocator.DEFAULT, + new NativeIOImpl(), Buffer.ALIGNMENT, + 1 << 20, opLogger)) { + ByteBuf bbread = reader.readEntryAt(offset1); + try { + assertThat(bbread.readableBytes(), equalTo(entrySize)); + while (bbread.readableBytes() >= Integer.BYTES) { + assertThat(bbread.readInt(), equalTo(0xfeedfeed)); + } + assertThat(bbread.readableBytes(), equalTo(0)); + } finally { + bbread.release(); + } + + bbread = reader.readEntryAt(offset2); + try { + assertThat(bbread.readableBytes(), equalTo(entrySize)); + while (bbread.readableBytes() >= Integer.BYTES) { + assertThat(bbread.readInt(), equalTo(0xfedefede)); + } + assertThat(bbread.readableBytes(), equalTo(0)); + } finally { + bbread.release(); + } + } + } + + private static void writeFileWithPattern(File directory, int logId, + int pattern, int blockIncrement, int fileSize) throws Exception { + try (BufferPool buffers = new BufferPool(new NativeIOImpl(), ByteBufAllocator.DEFAULT, Buffer.ALIGNMENT, 8); + LogWriter writer = new DirectWriter(logId, logFilename(directory, logId), + fileSize, MoreExecutors.newDirectExecutorService(), + buffers, new NativeIOImpl(), Slogger.CONSOLE)) { + + for (int written = 0; written < fileSize; written += Buffer.ALIGNMENT) { + ByteBuf bb = Unpooled.buffer(Buffer.ALIGNMENT); + TestBuffer.fillByteBuf(bb, pattern); + writer.writeAt(written, bb); + bb.release(); + pattern += blockIncrement; + } + writer.flush(); + } + } + +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/directentrylogger/TestDirectWriter.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/directentrylogger/TestDirectWriter.java new file mode 100644 index 00000000000..4f1f3033ea9 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/directentrylogger/TestDirectWriter.java @@ -0,0 +1,340 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.directentrylogger; + +import static org.apache.bookkeeper.bookie.storage.directentrylogger.DirectEntryLogger.logFilename; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; + +import com.google.common.util.concurrent.MoreExecutors; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.Unpooled; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import org.apache.bookkeeper.common.util.nativeio.NativeIO; +import org.apache.bookkeeper.common.util.nativeio.NativeIOException; +import org.apache.bookkeeper.common.util.nativeio.NativeIOImpl; +import org.apache.bookkeeper.slogger.Slogger; +import org.apache.bookkeeper.test.TmpDirs; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.DisabledOnOs; +import org.junit.jupiter.api.condition.OS; + +/** + * TestDirectWriter. + */ +@DisabledOnOs(OS.WINDOWS) +public class TestDirectWriter { + private static final Slogger slog = Slogger.CONSOLE; + + private final TmpDirs tmpDirs = new TmpDirs(); + private final ExecutorService writeExecutor = Executors.newSingleThreadExecutor(); + + @AfterEach + public void cleanup() throws Exception { + tmpDirs.cleanup(); + writeExecutor.shutdownNow(); + } + + @Test + public void testWriteAtAlignment() throws Exception { + File ledgerDir = tmpDirs.createNew("writeAlignment", "logs"); + Assertions.assertThrows(IllegalArgumentException.class, () -> { + try (BufferPool buffers = new BufferPool(new NativeIOImpl(), ByteBufAllocator.DEFAULT, Buffer.ALIGNMENT, 8); + LogWriter writer = new DirectWriter(5678, logFilename(ledgerDir, 5678), + 1 << 24, writeExecutor, + buffers, new NativeIOImpl(), Slogger.CONSOLE)) { + ByteBuf bb = Unpooled.buffer(Buffer.ALIGNMENT); + TestBuffer.fillByteBuf(bb, 0xdededede); + writer.writeAt(1234, bb); + writer.flush(); + } + }); + } + + @Test + public void testWriteAlignmentSize() throws Exception { + File ledgerDir = tmpDirs.createNew("writeAlignment", "logs"); + Assertions.assertThrows(IllegalArgumentException.class, () -> { + try (BufferPool buffers = new BufferPool(new NativeIOImpl(), ByteBufAllocator.DEFAULT, Buffer.ALIGNMENT, 8); + LogWriter writer = new DirectWriter(5678, logFilename(ledgerDir, 5678), 1 << 24, writeExecutor, + buffers, new NativeIOImpl(), Slogger.CONSOLE)) { + ByteBuf bb = Unpooled.buffer(123); + TestBuffer.fillByteBuf(bb, 0xdededede); + writer.writeAt(0, bb); + writer.flush(); + } + }); + } + + @Test + public void testWriteAlignedNotAtStart() throws Exception { + File ledgerDir = tmpDirs.createNew("writeAlignment", "logs"); + try (BufferPool buffers = new BufferPool(new NativeIOImpl(), ByteBufAllocator.DEFAULT, Buffer.ALIGNMENT, 8); + LogWriter writer = new DirectWriter(5678, logFilename(ledgerDir, 5678), 1 << 24, writeExecutor, + buffers, new NativeIOImpl(), Slogger.CONSOLE)) { + ByteBuf bb = Unpooled.buffer(Buffer.ALIGNMENT); + TestBuffer.fillByteBuf(bb, 0xdededede); + writer.writeAt(Buffer.ALIGNMENT * 2, bb); + writer.flush(); + } + } + + + @Test + public void testFlushingWillWaitForBuffer() throws Exception { + File ledgerDir = tmpDirs.createNew("writeFailFailsFlush", "logs"); + try (BufferPool buffers = new BufferPool(new NativeIOImpl(), ByteBufAllocator.DEFAULT, + Buffer.ALIGNMENT, 1); // only one buffer available, so we can't flush in bg + LogWriter writer = new DirectWriter(5678, logFilename(ledgerDir, 5678), 1 << 24, writeExecutor, + buffers, new NativeIOImpl(), Slogger.CONSOLE)) { + ByteBuf bb = Unpooled.buffer(Buffer.ALIGNMENT / 2); + TestBuffer.fillByteBuf(bb, 0xdededede); + writer.writeDelimited(bb); + writer.flush(); + } + } + + @Test + public void testWriteFailFailsFlush() throws Exception { + File ledgerDir = tmpDirs.createNew("writeFailFailsFlush", "logs"); + NativeIO io = new NativeIOImpl() { + boolean failed = false; + @Override + public int pwrite(int fd, long pointer, int count, long offset) throws NativeIOException { + synchronized (this) { + if (!failed) { + failed = true; + throw new NativeIOException("fail for test"); + } + } + return super.pwrite(fd, pointer, count, offset); + } + }; + Assertions.assertThrows(IOException.class, () -> { + try (BufferPool buffers = new BufferPool(new NativeIOImpl(), ByteBufAllocator.DEFAULT, Buffer.ALIGNMENT, 8); + LogWriter writer = new DirectWriter(5678, logFilename(ledgerDir, 5678), 1 << 24, writeExecutor, + buffers, io, Slogger.CONSOLE)) { + for (int i = 0; i < 10; i++) { + ByteBuf bb = Unpooled.buffer(Buffer.ALIGNMENT / 2); + TestBuffer.fillByteBuf(bb, 0xdededede); + writer.writeDelimited(bb); + } + writer.flush(); + } + }); + } + + @Test + public void testWriteAtFailFailsFlush() throws Exception { + File ledgerDir = tmpDirs.createNew("writeAtFailFailsFlush", "logs"); + NativeIO io = new NativeIOImpl() { + boolean failed = false; + @Override + public int pwrite(int fd, long pointer, int count, long offset) throws NativeIOException { + synchronized (this) { + if (!failed) { + failed = true; + throw new NativeIOException("fail for test"); + } + } + return super.pwrite(fd, pointer, count, offset); + } + }; + Assertions.assertThrows(IOException.class, () -> { + try (BufferPool buffers = new BufferPool(new NativeIOImpl(), ByteBufAllocator.DEFAULT, 1 << 14, 8); + LogWriter writer = new DirectWriter(5678, logFilename(ledgerDir, 5678), 1 << 24, writeExecutor, + buffers, io, Slogger.CONSOLE)) { + ByteBuf bb = Unpooled.buffer(Buffer.ALIGNMENT); + TestBuffer.fillByteBuf(bb, 0xdededede); + writer.writeAt(0, bb); + writer.flush(); + } + }); + } + + @Test + public void testWriteWithPadding() throws Exception { + File ledgerDir = tmpDirs.createNew("paddingWrite", "logs"); + try (BufferPool buffers = new BufferPool(new NativeIOImpl(), ByteBufAllocator.DEFAULT, 1 << 14, 8); + LogWriter writer = new DirectWriter(5678, logFilename(ledgerDir, 5678), 1 << 24, writeExecutor, + buffers, new NativeIOImpl(), Slogger.CONSOLE)) { + ByteBuf bb = Unpooled.buffer(Buffer.ALIGNMENT); + TestBuffer.fillByteBuf(bb, 0xdededede); + bb.writerIndex(123); + writer.writeDelimited(bb); + writer.flush(); + } + + ByteBuf contents = readIntoByteBuf(ledgerDir, 5678); + assertThat(contents.readInt(), equalTo(123)); + for (int i = 0; i < 123; i++) { + assertThat(contents.readByte(), equalTo((byte) 0xde)); + } + for (int i = 0; i < Buffer.ALIGNMENT - (123 + Integer.BYTES); i++) { + assertThat(contents.readByte(), equalTo(Buffer.PADDING_BYTE)); + } + while (contents.isReadable()) { + assertThat((int) contents.readByte(), equalTo(0)); + } + } + + @Test + public void testWriteBlocksFlush() throws Exception { + ExecutorService flushExecutor = Executors.newSingleThreadExecutor(); + try { + File ledgerDir = tmpDirs.createNew("blockWrite", "logs"); + try (BufferPool buffers = new BufferPool(new NativeIOImpl(), ByteBufAllocator.DEFAULT, 1 << 14, 8); + LogWriter writer = new DirectWriter(1234, logFilename(ledgerDir, 1234), + 1 << 24, writeExecutor, + buffers, new NativeIOImpl(), Slogger.CONSOLE)) { + CompletableFuture blocker = new CompletableFuture<>(); + writeExecutor.submit(() -> { + blocker.join(); + return null; + }); + ByteBuf bb = Unpooled.buffer(4096); + TestBuffer.fillByteBuf(bb, 0xdeadbeef); + writer.writeAt(0, bb); + Future f = flushExecutor.submit(() -> { + writer.flush(); + return null; + }); + Thread.sleep(100); + assertThat(f.isDone(), equalTo(false)); + blocker.complete(null); + f.get(); + } + ByteBuf contents = readIntoByteBuf(ledgerDir, 1234); + for (int i = 0; i < 4096 / Integer.BYTES; i++) { + assertThat(contents.readInt(), equalTo(0xdeadbeef)); + } + if (contents.readableBytes() > 0) { // linux-only: fallocate will preallocate file + while (contents.isReadable()) { + assertThat((int) contents.readByte(), equalTo(0)); + } + } + } finally { + flushExecutor.shutdown(); + } + } + + @Test + public void testFailsToOpen() throws Exception { + File ledgerDir = tmpDirs.createNew("failOpen", "logs"); + ledgerDir.delete(); + + try (BufferPool buffers = new BufferPool(new NativeIOImpl(), ByteBufAllocator.DEFAULT, 1 << 14, 8)) { + Assertions.assertThrows(IOException.class, () -> { + new DirectWriter(1234, logFilename(ledgerDir, 1234), + 1 << 30, MoreExecutors.newDirectExecutorService(), + buffers, new NativeIOImpl(), Slogger.CONSOLE); + }); + } + } + + @Test + public void fallocateNotAvailable() throws Exception { + File ledgerDir = tmpDirs.createNew("fallocUnavailable", "logs"); + NativeIO nativeIO = new NativeIOImpl() { + @Override + public int fallocate(int fd, int mode, long offset, long len) + throws NativeIOException { + throw new NativeIOException("pretending I'm a mac"); + } + }; + try (BufferPool buffers = new BufferPool(new NativeIOImpl(), ByteBufAllocator.DEFAULT, 1 << 14, 8); + LogWriter writer = new DirectWriter(3456, logFilename(ledgerDir, 3456), + 1 << 24, writeExecutor, + buffers, nativeIO, Slogger.CONSOLE)) { + ByteBuf bb = Unpooled.buffer(Buffer.ALIGNMENT); + TestBuffer.fillByteBuf(bb, 0xdeadbeef); + + writer.writeAt(0, bb); + writer.flush(); + } + + // should be 0xdeadbeef until the end of the file + ByteBuf contents = readIntoByteBuf(ledgerDir, 3456); + assertThat(contents.readableBytes(), equalTo(Buffer.ALIGNMENT)); + while (contents.isReadable()) { + assertThat(contents.readInt(), equalTo(0xdeadbeef)); + } + } + + @Test + public void testWriteAtIntLimit() throws Exception { + File ledgerDir = tmpDirs.createNew("intLimit", "logs"); + + try (BufferPool buffers = new BufferPool(new NativeIOImpl(), ByteBufAllocator.DEFAULT, 1 << 14, 8); + LogWriter writer = new DirectWriter(3456, logFilename(ledgerDir, 3456), + (long) Integer.MAX_VALUE + (Buffer.ALIGNMENT * 100), + writeExecutor, + buffers, new NativeIOImpl(), Slogger.CONSOLE)) { + ByteBuf b1 = Unpooled.buffer(Buffer.ALIGNMENT - (Integer.BYTES * 2) - 1); + TestBuffer.fillByteBuf(b1, 0xdeadbeef); + + long finalSeekablePosition = Integer.MAX_VALUE & ~(Buffer.ALIGNMENT - 1); + writer.position(finalSeekablePosition); + long offset = writer.writeDelimited(b1); + assertThat(offset, equalTo(finalSeekablePosition + Integer.BYTES)); + assertThat(writer.position(), equalTo((long) Integer.MAX_VALUE - Integer.BYTES)); + + offset = writer.writeDelimited(b1); + assertThat(offset, equalTo((long) Integer.MAX_VALUE)); + + writer.flush(); + + try { + writer.writeDelimited(b1); + Assertions.fail("Shouldn't be possible, we've gone past MAX_INT"); + } catch (IOException ioe) { + // expected + } + } + + } + + static ByteBuf readIntoByteBuf(File directory, int logId) throws Exception { + byte[] bytes = new byte[1024]; + File file = new File(DirectEntryLogger.logFilename(directory, logId)); + slog.kv("filename", file.toString()).info("reading in"); + ByteBuf byteBuf = Unpooled.buffer((int) file.length()); + try (FileInputStream is = new FileInputStream(file)) { + int bytesRead = is.read(bytes); + while (bytesRead > 0) { + byteBuf.writeBytes(bytes, 0, bytesRead); + bytesRead = is.read(bytes); + } + } + + assertThat(byteBuf.readableBytes(), equalTo((int) file.length())); + return byteBuf; + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/directentrylogger/TestEntryLogIds.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/directentrylogger/TestEntryLogIds.java new file mode 100644 index 00000000000..de34f17499a --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/directentrylogger/TestEntryLogIds.java @@ -0,0 +1,290 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.directentrylogger; + +import static org.apache.bookkeeper.bookie.storage.EntryLogTestUtils.logIdFromLocation; +import static org.apache.bookkeeper.bookie.storage.EntryLogTestUtils.makeEntry; +import static org.apache.bookkeeper.bookie.storage.EntryLogTestUtils.newDirsManager; +import static org.apache.bookkeeper.bookie.storage.EntryLogTestUtils.newLegacyEntryLogger; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import com.google.common.collect.Lists; +import io.netty.buffer.ByteBuf; +import java.io.File; +import org.apache.bookkeeper.bookie.LedgerDirsManager; +import org.apache.bookkeeper.bookie.storage.EntryLogIds; +import org.apache.bookkeeper.bookie.storage.EntryLogger; +import org.apache.bookkeeper.slogger.Slogger; +import org.apache.bookkeeper.test.TmpDirs; +import org.apache.bookkeeper.util.LedgerDirUtil; +import org.apache.commons.lang3.tuple.Pair; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; + +/** + * TestEntryLogIds. + */ +public class TestEntryLogIds { + private static final Slogger slog = Slogger.CONSOLE; + + private final TmpDirs tmpDirs = new TmpDirs(); + + @AfterEach + public void cleanup() throws Exception { + tmpDirs.cleanup(); + } + + @Test + public void testNoStomping() throws Exception { + File ledgerDir = tmpDirs.createNew("entryLogIds", "ledgers"); + + int highestSoFar = -1; + try (EntryLogger legacy = newLegacyEntryLogger(1024, ledgerDir)) { + ByteBuf e1 = makeEntry(1L, 1L, 2048); + long loc1 = legacy.addEntry(1L, e1); + int logId1 = logIdFromLocation(loc1); + + ByteBuf e2 = makeEntry(1L, 2L, 2048); + long loc2 = legacy.addEntry(1L, e2); + int logId2 = logIdFromLocation(loc2); + assertThat(logId2, greaterThan(logId1)); + highestSoFar = logId2; + } + + EntryLogIds ids = new EntryLogIdsImpl(newDirsManager(ledgerDir), slog); + int logId3 = ids.nextId(); + assertThat(logId3, greaterThan(highestSoFar)); + touchLog(ledgerDir, logId3); + highestSoFar = logId3; + + int logId4 = ids.nextId(); + assertThat(logId4, greaterThan(highestSoFar)); + touchLog(ledgerDir, logId4); + highestSoFar = logId4; + + try (EntryLogger legacy = newLegacyEntryLogger(1024, ledgerDir)) { + ByteBuf e1 = makeEntry(1L, 1L, 2048); + long loc5 = legacy.addEntry(1L, e1); + int logId5 = logIdFromLocation(loc5); + assertThat(logId5, greaterThan(highestSoFar)); + + ByteBuf e2 = makeEntry(1L, 2L, 2048); + long loc6 = legacy.addEntry(1L, e2); + int logId6 = logIdFromLocation(loc6); + assertThat(logId6, greaterThan(logId5)); + } + } + + @Test + public void testNoStompingDirectStartsFirst() throws Exception { + File ledgerDir = tmpDirs.createNew("entryLogIds", "ledgers"); + + int highestSoFar = -1; + EntryLogIds ids = new EntryLogIdsImpl(newDirsManager(ledgerDir), slog); + int logId1 = ids.nextId(); + assertThat(logId1, greaterThan(highestSoFar)); + touchLog(ledgerDir, logId1); + highestSoFar = logId1; + + try (EntryLogger legacy = newLegacyEntryLogger(1024, ledgerDir)) { + ByteBuf e1 = makeEntry(1L, 1L, 2048); + long loc2 = legacy.addEntry(1L, e1); + int logId2 = logIdFromLocation(loc2); + assertThat(logId2, greaterThan(highestSoFar)); + highestSoFar = logId2; + + ByteBuf e2 = makeEntry(1L, 2L, 2048); + long loc3 = legacy.addEntry(1L, e2); + int logId3 = logIdFromLocation(loc3); + assertThat(logId3, greaterThan(logId2)); + highestSoFar = logId3; + } + + // reinitialize to pick up legacy + ids = new EntryLogIdsImpl(newDirsManager(ledgerDir), slog); + int logId4 = ids.nextId(); + assertThat(logId4, greaterThan(highestSoFar)); + touchLog(ledgerDir, logId4); + highestSoFar = logId4; + } + + @Test + public void testIdGenerator() throws Exception { + File base = tmpDirs.createNew("entryLogIds", "ledgers"); + File ledgerDir1 = new File(base, "l1"); + File ledgerDir2 = new File(base, "l2"); + File ledgerDir3 = new File(base, "l3"); + File ledgerDir4 = new File(base, "l4"); + ledgerDir1.mkdir(); + ledgerDir2.mkdir(); + ledgerDir3.mkdir(); + ledgerDir4.mkdir(); + + //case 1: use root ledgerDirsManager + LedgerDirsManager ledgerDirsManager = newDirsManager(ledgerDir1, ledgerDir2); + EntryLogIds ids1 = new EntryLogIdsImpl(ledgerDirsManager, slog); + for (int i = 0; i < 10; i++) { + int logId = ids1.nextId(); + File log1 = new File(ledgerDir1 + "/current", logId + ".log"); + log1.createNewFile(); + assertEquals(logId, i); + } + + EntryLogIds ids2 = new EntryLogIdsImpl(ledgerDirsManager, slog); + for (int i = 0; i < 10; i++) { + int logId = ids2.nextId(); + assertEquals(logId, 10 + i); + } + + // case 2: new LedgerDirsManager for per directory + LedgerDirsManager ledgerDirsManager3 = newDirsManager(ledgerDir3); + LedgerDirsManager ledgerDirsManager4 = newDirsManager(ledgerDir4); + EntryLogIds ids3 = new EntryLogIdsImpl(ledgerDirsManager3, slog); + for (int i = 0; i < 10; i++) { + int logId = ids3.nextId(); + File log1 = new File(ledgerDir3 + "/current", logId + ".log"); + log1.createNewFile(); + assertEquals(logId, i); + } + + EntryLogIds ids4 = new EntryLogIdsImpl(ledgerDirsManager4, slog); + for (int i = 0; i < 10; i++) { + int logId = ids4.nextId(); + assertEquals(logId, i); + } + } + + @Test + public void testMultiDirectory() throws Exception { + File base = tmpDirs.createNew("entryLogIds", "ledgers"); + File ledgerDir1 = new File(base, "l1"); + File ledgerDir2 = new File(base, "l2"); + File ledgerDir3 = new File(base, "l3"); + + int highestSoFar = -1; + try (EntryLogger legacy = newLegacyEntryLogger(1024, ledgerDir1, ledgerDir2, ledgerDir3)) { + ByteBuf e1 = makeEntry(1L, 1L, 2048); + long loc1 = legacy.addEntry(1L, e1); + int logId1 = logIdFromLocation(loc1); + assertThat(logId1, greaterThan(highestSoFar)); + highestSoFar = logId1; + + ByteBuf e2 = makeEntry(1L, 2L, 2048); + long loc2 = legacy.addEntry(1L, e2); + int logId2 = logIdFromLocation(loc2); + assertThat(logId2, greaterThan(highestSoFar)); + highestSoFar = logId2; + + ByteBuf e3 = makeEntry(1L, 3L, 2048); + long loc3 = legacy.addEntry(1L, e3); + int logId3 = logIdFromLocation(loc3); + assertThat(logId3, greaterThan(highestSoFar)); + highestSoFar = logId3; + } + + EntryLogIds ids = new EntryLogIdsImpl(newDirsManager(ledgerDir1, ledgerDir2, ledgerDir3), slog); + int logId4 = ids.nextId(); + assertThat(logId4, greaterThan(highestSoFar)); + touchLog(ledgerDir2, logId4); + highestSoFar = logId4; + + try (EntryLogger legacy = newLegacyEntryLogger(1024, ledgerDir1, ledgerDir2, ledgerDir3)) { + ByteBuf e1 = makeEntry(1L, 1L, 2048); + long loc5 = legacy.addEntry(1L, e1); + int logId5 = logIdFromLocation(loc5); + assertThat(logId5, greaterThan(highestSoFar)); + highestSoFar = logId5; + } + } + + @Test + public void testWrapAround() throws Exception { + File ledgerDir = tmpDirs.createNew("entryLogIds", "ledgers"); + new EntryLogIdsImpl(newDirsManager(ledgerDir), slog); + touchLog(ledgerDir, Integer.MAX_VALUE - 1); + + EntryLogIds ids = new EntryLogIdsImpl(newDirsManager(ledgerDir), slog); + int logId = ids.nextId(); + assertThat(logId, equalTo(0)); + } + + @Test + public void testCompactingLogsNotConsidered() throws Exception { + // if there is a process restart, all "compacting" logs will be deleted + // so their IDs are safe to reuse. Even in the case of two processes acting + // the directory concurrently, the transactional rename will prevent data + // loss. + File ledgerDir = tmpDirs.createNew("entryLogIds", "ledgers"); + new EntryLogIdsImpl(newDirsManager(ledgerDir), slog); + touchLog(ledgerDir, 123); + touchCompacting(ledgerDir, 129); + + EntryLogIds ids = new EntryLogIdsImpl(newDirsManager(ledgerDir), slog); + int logId = ids.nextId(); + assertThat(logId, equalTo(124)); + } + + @Test + public void testCompactedLogsConsidered() throws Exception { + File ledgerDir = tmpDirs.createNew("entryLogIds", "ledgers"); + new EntryLogIdsImpl(newDirsManager(ledgerDir), slog); + touchLog(ledgerDir, 123); + touchCompacted(ledgerDir, 129, 123); + + EntryLogIds ids = new EntryLogIdsImpl(newDirsManager(ledgerDir), slog); + int logId = ids.nextId(); + assertThat(logId, equalTo(130)); + } + + + @Test + public void testGapSelection() throws Exception { + assertEquals(LedgerDirUtil.findLargestGap(Lists.newArrayList()), Pair.of(0, Integer.MAX_VALUE)); + assertEquals(LedgerDirUtil.findLargestGap(Lists.newArrayList(0)), + Pair.of(1, Integer.MAX_VALUE)); + assertEquals(LedgerDirUtil.findLargestGap(Lists.newArrayList(1, 2, 3, 4, 5, 6)), + Pair.of(7, Integer.MAX_VALUE)); + assertEquals(LedgerDirUtil.findLargestGap(Lists.newArrayList(Integer.MAX_VALUE)), + Pair.of(0, Integer.MAX_VALUE)); + assertEquals(LedgerDirUtil.findLargestGap(Lists.newArrayList(Integer.MAX_VALUE / 2)), + Pair.of(0, Integer.MAX_VALUE / 2)); + assertEquals(LedgerDirUtil.findLargestGap(Lists.newArrayList(Integer.MAX_VALUE / 2 - 1)), + Pair.of(Integer.MAX_VALUE / 2, Integer.MAX_VALUE)); + } + + private static void touchLog(File ledgerDir, int logId) throws Exception { + assertThat(DirectEntryLogger.logFile(new File(ledgerDir, "current"), logId).createNewFile(), + equalTo(true)); + } + + private static void touchCompacting(File ledgerDir, int logId) throws Exception { + assertThat(DirectCompactionEntryLog.compactingFile(new File(ledgerDir, "current"), logId).createNewFile(), + equalTo(true)); + } + + private static void touchCompacted(File ledgerDir, int newLogId, int compactedLogId) throws Exception { + assertThat(DirectCompactionEntryLog.compactedFile(new File(ledgerDir, "current"), newLogId, compactedLogId) + .createNewFile(), equalTo(true)); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/directentrylogger/TestMetadata.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/directentrylogger/TestMetadata.java new file mode 100644 index 00000000000..e44a28cf8da --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/directentrylogger/TestMetadata.java @@ -0,0 +1,82 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.directentrylogger; + +import static org.apache.bookkeeper.bookie.storage.directentrylogger.DirectEntryLogger.logFilename; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import java.io.File; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import org.apache.bookkeeper.bookie.EntryLogMetadata; +import org.apache.bookkeeper.common.util.nativeio.NativeIOImpl; +import org.apache.bookkeeper.slogger.Slogger; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.stats.OpStatsLogger; +import org.apache.bookkeeper.test.TmpDirs; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.DisabledOnOs; +import org.junit.jupiter.api.condition.OS; + +@DisabledOnOs(OS.WINDOWS) +public class TestMetadata { + private final OpStatsLogger opLogger = NullStatsLogger.INSTANCE.getOpStatsLogger("null"); + + private final TmpDirs tmpDirs = new TmpDirs(); + private final ExecutorService writeExecutor = Executors.newSingleThreadExecutor(); + + @AfterEach + public void cleanup() throws Exception { + tmpDirs.cleanup(); + writeExecutor.shutdownNow(); + } + + @Test + public void testReadMetaFromHeader() throws Exception { + File ledgerDir = tmpDirs.createNew("writeMetadataBeforeFsync", "logs"); + int logId = 5678; + try (BufferPool buffers = new BufferPool(new NativeIOImpl(), ByteBufAllocator.DEFAULT, Buffer.ALIGNMENT, 8); + LogWriter writer = new DirectWriter(logId, logFilename(ledgerDir, logId), + 1 << 24, writeExecutor, + buffers, new NativeIOImpl(), Slogger.CONSOLE)) { + long offset = 4096L; + writer.position(offset); + EntryLogMetadata entryLogMetadata = new EntryLogMetadata(logId); + entryLogMetadata.addLedgerSize(1, 10); + entryLogMetadata.addLedgerSize(2, 11); + LogMetadata.write(writer, entryLogMetadata, ByteBufAllocator.DEFAULT); + try (LogReader reader = new DirectReader(logId, logFilename(ledgerDir, logId), + ByteBufAllocator.DEFAULT, + new NativeIOImpl(), Buffer.ALIGNMENT, + 1 << 20, opLogger)) { + ByteBuf header = reader.readBufferAt(0, Header.LOGFILE_LEGACY_HEADER_SIZE); + assertThat(Header.HEADER_V1, equalTo(Header.extractVersion(header))); + assertThat(offset, equalTo(Header.extractLedgerMapOffset(header))); + assertThat(2, equalTo(Header.extractLedgerCount(header))); + } + } + } + +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/directentrylogger/TestTransactionalEntryLogCompactor.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/directentrylogger/TestTransactionalEntryLogCompactor.java new file mode 100644 index 00000000000..3f072846f61 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/directentrylogger/TestTransactionalEntryLogCompactor.java @@ -0,0 +1,612 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.directentrylogger; + +import static org.apache.bookkeeper.bookie.TransactionalEntryLogCompactor.COMPACTED_SUFFIX; +import static org.apache.bookkeeper.bookie.TransactionalEntryLogCompactor.COMPACTING_SUFFIX; +import static org.apache.bookkeeper.bookie.storage.EntryLogTestUtils.assertEntryEquals; +import static org.apache.bookkeeper.bookie.storage.EntryLogTestUtils.logIdFromLocation; +import static org.apache.bookkeeper.bookie.storage.EntryLogTestUtils.makeEntry; +import static org.apache.bookkeeper.bookie.storage.EntryLogTestUtils.newDirectEntryLogger; +import static org.apache.bookkeeper.bookie.storage.EntryLogTestUtils.newDirsManager; +import static org.apache.bookkeeper.bookie.storage.EntryLogTestUtils.newLegacyEntryLogger; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.not; + +import com.google.common.util.concurrent.MoreExecutors; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.function.Function; +import java.util.stream.Collectors; +import org.apache.bookkeeper.bookie.EntryLocation; +import org.apache.bookkeeper.bookie.EntryLogMetadata; +import org.apache.bookkeeper.bookie.MockLedgerStorage; +import org.apache.bookkeeper.bookie.TransactionalEntryLogCompactor; +import org.apache.bookkeeper.bookie.storage.CompactionEntryLog; +import org.apache.bookkeeper.bookie.storage.EntryLogScanner; +import org.apache.bookkeeper.bookie.storage.EntryLogger; +import org.apache.bookkeeper.common.util.nativeio.NativeIOImpl; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.slogger.Slogger; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.test.TmpDirs; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.DisabledOnOs; +import org.junit.jupiter.api.condition.OS; + +/** + * TestTransactionalEntryLogCompactor. + */ +@DisabledOnOs(OS.WINDOWS) +public class TestTransactionalEntryLogCompactor { + private static final Slogger slog = Slogger.CONSOLE; + + private final TmpDirs tmpDirs = new TmpDirs(); + private static final long deadLedger = 1L; + private static final long liveLedger = 2L; + + @AfterEach + public void cleanup() throws Exception { + tmpDirs.cleanup(); + } + + @Test + public void testHappyCase() throws Exception { + File ledgerDir = tmpDirs.createNew("compactHappyCase", "ledgers"); + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + + long logId = writeLogData(ledgerDir); + MockLedgerStorage ledgerStorage = new MockLedgerStorage(); + try (EntryLogger entryLogger = newDirectEntryLogger(2 << 20, ledgerDir)) { + TransactionalEntryLogCompactor compactor = new TransactionalEntryLogCompactor( + new ServerConfiguration(), + entryLogger, + ledgerStorage, + (removedLogId) -> {}); + EntryLogMetadata meta = entryLogger.getEntryLogMetadata(logId); + assertThat(meta.containsLedger(deadLedger), equalTo(true)); + assertThat(meta.containsLedger(liveLedger), equalTo(true)); + assertThat(meta.getTotalSize(), equalTo(1000L + 1000 + (Integer.BYTES * 2))); + assertThat(meta.getRemainingSize(), equalTo(meta.getTotalSize())); + + meta.removeLedgerIf((ledgerId) -> ledgerId == deadLedger); + assertThat(compactor.compact(meta), equalTo(true)); + + assertThat(ledgerStorage.getUpdatedLocations(), hasSize(1)); + EntryLocation loc = ledgerStorage.getUpdatedLocations().get(0); + + long compactedLogId = logIdFromLocation(loc.getLocation()); + assertThat(compactedLogId, not(equalTo(logId))); + assertThat(loc.getLedger(), equalTo(liveLedger)); + assertThat(loc.getEntry(), equalTo(2L)); + + meta = entryLogger.getEntryLogMetadata(compactedLogId); + assertThat(meta.containsLedger(deadLedger), equalTo(false)); + assertThat(meta.containsLedger(liveLedger), equalTo(true)); + assertThat(meta.getTotalSize(), equalTo(1000L + Integer.BYTES)); + assertThat(meta.getRemainingSize(), equalTo(meta.getTotalSize())); + + ByteBuf bb = entryLogger.readEntry(loc.getLedger(), loc.getEntry(), loc.getLocation()); + assertEntryEquals(bb, makeEntry(liveLedger, 2L, 1000, (byte) 0xfa)); + assertThat(entryLogger.incompleteCompactionLogs(), empty()); + } + } + + @Test + public void testHappyCase1000() throws Exception { + File ledgerDir = tmpDirs.createNew("compactHappyCase1000", "ledgers"); + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + + long logId = writeLogData1000(ledgerDir); + MockLedgerStorage ledgerStorage = new MockLedgerStorage(); + try (EntryLogger entryLogger = newDirectEntryLogger(2 << 20, ledgerDir)) { + TransactionalEntryLogCompactor compactor = new TransactionalEntryLogCompactor( + new ServerConfiguration(), + entryLogger, + ledgerStorage, + (removedLogId) -> {}); + EntryLogMetadata meta = entryLogger.getEntryLogMetadata(logId); + assertThat(meta.containsLedger(deadLedger), equalTo(true)); + assertThat(meta.containsLedger(liveLedger), equalTo(true)); + assertThat(meta.getTotalSize(), equalTo((1000L + Integer.BYTES) * 1000 * 2)); + assertThat(meta.getRemainingSize(), equalTo(meta.getTotalSize())); + + meta.removeLedgerIf((ledgerId) -> ledgerId == deadLedger); + assertThat(compactor.compact(meta), equalTo(true)); + + assertThat(ledgerStorage.getUpdatedLocations(), hasSize(1000)); + long compactedLogId = -1; + for (int i = 0; i < 1000; i++) { + EntryLocation loc = ledgerStorage.getUpdatedLocations().get(i); + compactedLogId = logIdFromLocation(loc.getLocation()); + assertThat(compactedLogId, not(equalTo(logId))); + assertThat(loc.getLedger(), equalTo(liveLedger)); + assertThat(loc.getEntry(), equalTo(Long.valueOf(i))); + + ByteBuf bb = entryLogger.readEntry(loc.getLedger(), loc.getEntry(), loc.getLocation()); + assertEntryEquals(bb, makeEntry(liveLedger, i, 1000, (byte) (0xfa + i))); + } + + meta = entryLogger.getEntryLogMetadata(compactedLogId); + assertThat(meta.containsLedger(deadLedger), equalTo(false)); + assertThat(meta.containsLedger(liveLedger), equalTo(true)); + assertThat(meta.getTotalSize(), equalTo((1000L + Integer.BYTES) * 1000)); + assertThat(meta.getRemainingSize(), equalTo(meta.getTotalSize())); + + assertThat(entryLogger.incompleteCompactionLogs(), empty()); + } + } + + @Test + public void testScanFail() throws Exception { + File ledgerDir = tmpDirs.createNew("compactScanFail", "ledgers"); + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + + long logId = writeLogData(ledgerDir); + MockLedgerStorage ledgerStorage = new MockLedgerStorage(); + try (EntryLogger entryLogger = newDirectEntryLoggerFailAdd(ledgerDir)) { + TransactionalEntryLogCompactor compactor = new TransactionalEntryLogCompactor( + new ServerConfiguration(), + entryLogger, + ledgerStorage, + (removedLogId) -> {}); + EntryLogMetadata meta = entryLogger.getEntryLogMetadata(logId); + assertThat(meta.containsLedger(deadLedger), equalTo(true)); + assertThat(meta.containsLedger(liveLedger), equalTo(true)); + assertThat(meta.getTotalSize(), equalTo(1000L + 1000 + (Integer.BYTES * 2))); + assertThat(meta.getRemainingSize(), equalTo(meta.getTotalSize())); + + meta.removeLedgerIf((ledgerId) -> ledgerId == deadLedger); + assertThat(compactor.compact(meta), equalTo(false)); + + assertThat(ledgerStorage.getUpdatedLocations(), hasSize(0)); + assertThat(entryLogger.incompleteCompactionLogs(), empty()); + + assertThat(compactingFiles(curDir), empty()); + assertThat(compactedFiles(curDir), empty()); + } + } + + @Test + public void testScanFailNoAbortAndContinue() throws Exception { + File ledgerDir = tmpDirs.createNew("compactScanFail", "ledgers"); + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + + long logId = writeLogData(ledgerDir); + MockLedgerStorage ledgerStorage = new MockLedgerStorage(); + try (EntryLogger entryLogger = newDirectEntryLoggerFailAddNoAbort(ledgerDir)) { + TransactionalEntryLogCompactor compactor = new TransactionalEntryLogCompactor( + new ServerConfiguration(), + entryLogger, + ledgerStorage, + (removedLogId) -> {}); + EntryLogMetadata meta = entryLogger.getEntryLogMetadata(logId); + assertThat(meta.containsLedger(deadLedger), equalTo(true)); + assertThat(meta.containsLedger(liveLedger), equalTo(true)); + assertThat(meta.getTotalSize(), equalTo(1000L + 1000 + (Integer.BYTES * 2))); + assertThat(meta.getRemainingSize(), equalTo(meta.getTotalSize())); + + meta.removeLedgerIf((ledgerId) -> ledgerId == deadLedger); + assertThat(compactor.compact(meta), equalTo(false)); + + assertThat(ledgerStorage.getUpdatedLocations(), hasSize(0)); + assertThat(compactingFiles(curDir).size(), equalTo(1)); + assertThat(compactedFiles(curDir), empty()); + } + + try (EntryLogger entryLogger = newDirectEntryLogger(2 << 20, ledgerDir)) { + TransactionalEntryLogCompactor compactor = new TransactionalEntryLogCompactor( + new ServerConfiguration(), + entryLogger, + ledgerStorage, + (removedLogId) -> {}); + compactor.cleanUpAndRecover(); + assertThat(compactingFiles(curDir), empty()); + assertThat(compactedFiles(curDir), empty()); + + EntryLogMetadata meta = entryLogger.getEntryLogMetadata(logId); + meta.removeLedgerIf((ledgerId) -> ledgerId == deadLedger); + assertThat(compactor.compact(meta), equalTo(true)); + + assertThat(ledgerStorage.getUpdatedLocations(), hasSize(1)); + + EntryLocation loc = ledgerStorage.getUpdatedLocations().get(0); + + long compactedLogId = logIdFromLocation(loc.getLocation()); + assertThat(compactedLogId, not(equalTo(logId))); + assertThat(loc.getLedger(), equalTo(liveLedger)); + assertThat(loc.getEntry(), equalTo(2L)); + } + } + + @Test + public void testFlushFail() throws Exception { + File ledgerDir = tmpDirs.createNew("compactScanFail", "ledgers"); + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + + long logId = writeLogData(ledgerDir); + MockLedgerStorage ledgerStorage = new MockLedgerStorage(); + try (EntryLogger entryLogger = newDirectEntryLoggerFailFlush(ledgerDir)) { + TransactionalEntryLogCompactor compactor = new TransactionalEntryLogCompactor( + new ServerConfiguration(), + entryLogger, + ledgerStorage, + (removedLogId) -> {}); + EntryLogMetadata meta = entryLogger.getEntryLogMetadata(logId); + assertThat(meta.containsLedger(deadLedger), equalTo(true)); + assertThat(meta.containsLedger(liveLedger), equalTo(true)); + assertThat(meta.getTotalSize(), equalTo(1000L + 1000 + (Integer.BYTES * 2))); + assertThat(meta.getRemainingSize(), equalTo(meta.getTotalSize())); + + meta.removeLedgerIf((ledgerId) -> ledgerId == deadLedger); + assertThat(compactor.compact(meta), equalTo(false)); + + assertThat(ledgerStorage.getUpdatedLocations(), hasSize(0)); + assertThat(entryLogger.incompleteCompactionLogs(), empty()); + + assertThat(compactingFiles(curDir), empty()); + assertThat(compactedFiles(curDir), empty()); + } + } + + @Test + public void testMarkCompactFailNoAbort() throws Exception { + File ledgerDir = tmpDirs.createNew("compactScanFail", "ledgers"); + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + + long logId = writeLogData(ledgerDir); + MockLedgerStorage ledgerStorage = new MockLedgerStorage(); + try (EntryLogger entryLogger = newDirectEntryLoggerFailMarkCompactedNoAbort(ledgerDir)) { + TransactionalEntryLogCompactor compactor = new TransactionalEntryLogCompactor( + new ServerConfiguration(), + entryLogger, + ledgerStorage, + (removedLogId) -> {}); + EntryLogMetadata meta = entryLogger.getEntryLogMetadata(logId); + assertThat(meta.containsLedger(deadLedger), equalTo(true)); + assertThat(meta.containsLedger(liveLedger), equalTo(true)); + assertThat(meta.getTotalSize(), equalTo(1000L + 1000 + (Integer.BYTES * 2))); + assertThat(meta.getRemainingSize(), equalTo(meta.getTotalSize())); + + meta.removeLedgerIf((ledgerId) -> ledgerId == deadLedger); + assertThat(compactor.compact(meta), equalTo(false)); + + assertThat(ledgerStorage.getUpdatedLocations(), hasSize(0)); + assertThat(compactingFiles(curDir), empty()); + assertThat(compactedFiles(curDir), hasSize(1)); + } + + try (EntryLogger entryLogger = newDirectEntryLogger(2 << 20, ledgerDir)) { + assertThat(entryLogger.logExists(logId), equalTo(true)); + CompletableFuture removedId = new CompletableFuture<>(); + TransactionalEntryLogCompactor compactor = new TransactionalEntryLogCompactor( + new ServerConfiguration(), + entryLogger, + ledgerStorage, + (removedLogId) -> removedId.complete(removedLogId)); + compactor.cleanUpAndRecover(); + assertThat(compactingFiles(curDir), empty()); + assertThat(compactedFiles(curDir), empty()); + + assertThat(removedId.isDone(), equalTo(true)); + assertThat(removedId.get(), equalTo(logId)); + + assertThat(ledgerStorage.getUpdatedLocations(), hasSize(1)); + + EntryLocation loc = ledgerStorage.getUpdatedLocations().get(0); + + long compactedLogId = logIdFromLocation(loc.getLocation()); + assertThat(compactedLogId, not(equalTo(logId))); + assertThat(loc.getLedger(), equalTo(liveLedger)); + assertThat(loc.getEntry(), equalTo(2L)); + + EntryLogMetadata meta = entryLogger.getEntryLogMetadata(compactedLogId); + assertThat(meta.containsLedger(deadLedger), equalTo(false)); + assertThat(meta.containsLedger(liveLedger), equalTo(true)); + assertThat(meta.getTotalSize(), equalTo(1000L + Integer.BYTES)); + assertThat(meta.getRemainingSize(), equalTo(meta.getTotalSize())); + + ByteBuf bb = entryLogger.readEntry(loc.getLedger(), loc.getEntry(), loc.getLocation()); + assertEntryEquals(bb, makeEntry(liveLedger, 2L, 1000, (byte) 0xfa)); + assertThat(entryLogger.incompleteCompactionLogs(), empty()); + } + } + + @Test + public void testIndexFail() throws Exception { + File ledgerDir = tmpDirs.createNew("compactScanFail", "ledgers"); + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + + long logId = writeLogData(ledgerDir); + MockLedgerStorage ledgerStorageFailFlush = new MockLedgerStorage() { + @Override + public void flushEntriesLocationsIndex() throws IOException { + throw new IOException("fail on flush"); + } + }; + try (EntryLogger entryLogger = newDirectEntryLogger(2 << 20, ledgerDir)) { + TransactionalEntryLogCompactor compactor = new TransactionalEntryLogCompactor( + new ServerConfiguration(), + entryLogger, + ledgerStorageFailFlush, + (removedLogId) -> {}); + EntryLogMetadata meta = entryLogger.getEntryLogMetadata(logId); + assertThat(meta.containsLedger(deadLedger), equalTo(true)); + assertThat(meta.containsLedger(liveLedger), equalTo(true)); + assertThat(meta.getTotalSize(), equalTo(1000L + 1000 + (Integer.BYTES * 2))); + assertThat(meta.getRemainingSize(), equalTo(meta.getTotalSize())); + + meta.removeLedgerIf((ledgerId) -> ledgerId == deadLedger); + assertThat(compactor.compact(meta), equalTo(false)); + + assertThat(ledgerStorageFailFlush.getUpdatedLocations(), hasSize(1)); + assertThat(compactingFiles(curDir), empty()); + assertThat(compactedFiles(curDir), hasSize(1)); + } + + MockLedgerStorage ledgerStorage = new MockLedgerStorage(); + CompletableFuture removedId = new CompletableFuture<>(); + try (EntryLogger entryLogger = newDirectEntryLogger(2 << 20, ledgerDir)) { + TransactionalEntryLogCompactor compactor = new TransactionalEntryLogCompactor( + new ServerConfiguration(), + entryLogger, + ledgerStorage, + (removedLogId) -> removedId.complete(removedLogId)); + assertThat(entryLogger.logExists(logId), equalTo(true)); + compactor.cleanUpAndRecover(); + assertThat(compactingFiles(curDir), empty()); + assertThat(compactedFiles(curDir), empty()); + + assertThat(removedId.isDone(), equalTo(true)); + assertThat(removedId.get(), equalTo(logId)); + + assertThat(ledgerStorage.getUpdatedLocations(), hasSize(1)); + + EntryLocation loc = ledgerStorage.getUpdatedLocations().get(0); + + long compactedLogId = logIdFromLocation(loc.getLocation()); + assertThat(compactedLogId, not(equalTo(logId))); + assertThat(loc.getLedger(), equalTo(liveLedger)); + assertThat(loc.getEntry(), equalTo(2L)); + + EntryLogMetadata meta = entryLogger.getEntryLogMetadata(compactedLogId); + assertThat(meta.containsLedger(deadLedger), equalTo(false)); + assertThat(meta.containsLedger(liveLedger), equalTo(true)); + assertThat(meta.getTotalSize(), equalTo(1000L + Integer.BYTES)); + assertThat(meta.getRemainingSize(), equalTo(meta.getTotalSize())); + + ByteBuf bb = entryLogger.readEntry(loc.getLedger(), loc.getEntry(), loc.getLocation()); + assertEntryEquals(bb, makeEntry(liveLedger, 2L, 1000, (byte) 0xfa)); + assertThat(entryLogger.incompleteCompactionLogs(), empty()); + } + } + + @Test + public void testMetadataWritten() throws Exception { + File ledgerDir = tmpDirs.createNew("compactHappyCase", "ledgers"); + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + + long logId = writeLogData1000(ledgerDir); + MockLedgerStorage ledgerStorage = new MockLedgerStorage(); + try (EntryLogger entryLogger = newDirectEntryLogger(2 << 20, ledgerDir)) { + TransactionalEntryLogCompactor compactor = new TransactionalEntryLogCompactor( + new ServerConfiguration(), + entryLogger, + ledgerStorage, + (removedLogId) -> {}); + EntryLogMetadata meta = entryLogger.getEntryLogMetadata(logId); + meta.removeLedgerIf((ledgerId) -> ledgerId == deadLedger); + assertThat(compactor.compact(meta), equalTo(true)); + + assertThat(ledgerStorage.getUpdatedLocations(), hasSize(1000)); + long compactedLogId = logIdFromLocation( + ledgerStorage.getUpdatedLocations().get(0).getLocation()); + + meta = ((DirectEntryLogger) entryLogger).readEntryLogIndex(compactedLogId); + assertThat(meta.containsLedger(deadLedger), equalTo(false)); + assertThat(meta.containsLedger(liveLedger), equalTo(true)); + assertThat(meta.getTotalSize(), equalTo((1000L + Integer.BYTES) * 1000)); + assertThat(meta.getRemainingSize(), equalTo(meta.getTotalSize())); + } + } + + Set compactingFiles(File dir) throws Exception { + return Arrays.stream(dir.listFiles((f) -> f.getName().endsWith(COMPACTING_SUFFIX))) + .collect(Collectors.toSet()); + } + + Set compactedFiles(File dir) throws Exception { + return Arrays.stream(dir.listFiles((f) -> f.getName().endsWith(COMPACTED_SUFFIX))) + .collect(Collectors.toSet()); + } + + int writeLogData(File ledgerDir) throws Exception { + try (EntryLogger entryLogger = newLegacyEntryLogger(2 << 20, ledgerDir)) { + long loc1 = entryLogger.addEntry(deadLedger, makeEntry(deadLedger, 1L, 1000, (byte) 0xde)); + long loc2 = entryLogger.addEntry(liveLedger, makeEntry(liveLedger, 2L, 1000, (byte) 0xfa)); + assertThat(logIdFromLocation(loc1), equalTo(logIdFromLocation(loc2))); + return logIdFromLocation(loc2); + } + } + + int writeLogData1000(File ledgerDir) throws Exception { + try (EntryLogger entryLogger = newDirectEntryLogger(2 << 20, ledgerDir)) { + long loc1, loc2 = -1; + for (int i = 0; i < 1000; i++) { + loc1 = entryLogger.addEntry(deadLedger, makeEntry(deadLedger, i, 1000, (byte) (0xde + i))); + if (loc2 != -1) { + assertThat(logIdFromLocation(loc1), equalTo(logIdFromLocation(loc2))); + } + loc2 = entryLogger.addEntry(liveLedger, makeEntry(liveLedger, i, 1000, (byte) (0xfa + i))); + assertThat(logIdFromLocation(loc1), equalTo(logIdFromLocation(loc2))); + } + return logIdFromLocation(loc2); + } + } + + private static DirectEntryLogger newDirectEntryLoggerFailAdd(File ledgerDir) throws Exception { + return newDirectEntryLoggerCompactionOverride( + ledgerDir, + (cel) -> new CompactionEntryLogProxy(cel) { + @Override + public long addEntry(long ledgerId, ByteBuf entry) throws IOException { + throw new IOException("Don't allow adds"); + } + }); + } + + private static DirectEntryLogger newDirectEntryLoggerFailAddNoAbort(File ledgerDir) throws Exception { + return newDirectEntryLoggerCompactionOverride( + ledgerDir, + (cel) -> new CompactionEntryLogProxy(cel) { + @Override + public long addEntry(long ledgerId, ByteBuf entry) throws IOException { + throw new IOException("Don't allow adds"); + } + + @Override + public void abort() {} + }); + } + + private static DirectEntryLogger newDirectEntryLoggerFailFlush(File ledgerDir) throws Exception { + return newDirectEntryLoggerCompactionOverride( + ledgerDir, + (cel) -> new CompactionEntryLogProxy(cel) { + @Override + public void flush() throws IOException { + throw new IOException("No flushing"); + } + }); + } + + private static DirectEntryLogger newDirectEntryLoggerFailMarkCompactedNoAbort(File ledgerDir) throws Exception { + return newDirectEntryLoggerCompactionOverride( + ledgerDir, + (cel) -> new CompactionEntryLogProxy(cel) { + @Override + public void markCompacted() throws IOException { + super.markCompacted(); + throw new IOException("No compact"); + } + + @Override + public void abort() {} + }); + } + + private static DirectEntryLogger newDirectEntryLoggerCompactionOverride( + File ledgerDir, + Function override) throws Exception { + File curDir = new File(ledgerDir, "current"); + curDir.mkdirs(); + + return new DirectEntryLogger( + curDir, new EntryLogIdsImpl(newDirsManager(ledgerDir), slog), + new NativeIOImpl(), + ByteBufAllocator.DEFAULT, + MoreExecutors.newDirectExecutorService(), + MoreExecutors.newDirectExecutorService(), + 2 << 20, // max file size + 10 * 1024 * 1024, // max sane entry size + 1024 * 1024, // total write buffer size + 1024 * 1024, // total read buffer size + 4 * 1024, // read buffer size + 1, // numReadThreads + 300, // max fd cache time in seconds + slog, NullStatsLogger.INSTANCE) { + @Override + public CompactionEntryLog newCompactionLog(long logToCompact) throws IOException { + return override.apply(super.newCompactionLog(logToCompact)); + } + }; + } + + private static class CompactionEntryLogProxy implements CompactionEntryLog { + protected final CompactionEntryLog delegate; + + CompactionEntryLogProxy(CompactionEntryLog delegate) { + this.delegate = delegate; + } + + @Override + public long addEntry(long ledgerId, ByteBuf entry) throws IOException { + return delegate.addEntry(ledgerId, entry); + } + + @Override + public void scan(EntryLogScanner scanner) throws IOException { + delegate.scan(scanner); + } + + @Override + public void flush() throws IOException { + delegate.flush(); + } + + @Override + public void abort() { + delegate.abort(); + } + + @Override + public void markCompacted() throws IOException { + delegate.markCompacted(); + } + + @Override + public void makeAvailable() throws IOException { + delegate.makeAvailable(); + } + + @Override + public void finalizeAndCleanup() { + delegate.finalizeAndCleanup(); + } + + @Override + public long getDstLogId() { + return delegate.getDstLogId(); + } + + @Override + public long getSrcLogId() { + return delegate.getSrcLogId(); + } + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/ArraySortGroupTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/ArraySortGroupTest.java index 52fdf848071..94ca14fd6cc 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/ArraySortGroupTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/ArraySortGroupTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -51,37 +51,19 @@ public void simple() { 5, 6, 3, 1, // }; - ArrayGroupSort sorter = new ArrayGroupSort(2, 4); - sorter.sort(data); + ArrayGroupSort.sort(data); assertArrayEquals(expectedSorted, data); } - @Test(expected = IllegalArgumentException.class) - public void keySmallerThanTotalSize() { - new ArrayGroupSort(3, 2); - } - - @Test(expected = IllegalArgumentException.class) - public void negativeKeySize() { - new ArrayGroupSort(-1, 2); - } - - @Test(expected = IllegalArgumentException.class) - public void negativeTotalSize() { - new ArrayGroupSort(1, -1); - } - @Test(expected = IllegalArgumentException.class) public void arraySizeIsNotMultiple() { - ArrayGroupSort sorter = new ArrayGroupSort(1, 3); - sorter.sort(new long[] { 1, 2, 3, 4 }); + ArrayGroupSort.sort(new long[] { 1, 2, 3, 4, 5 }); } @Test(expected = IllegalArgumentException.class) public void arraySizeIsShorterThanRequired() { - ArrayGroupSort sorter = new ArrayGroupSort(1, 3); - sorter.sort(new long[] { 1, 2 }); + ArrayGroupSort.sort(new long[] { 1, 2 }); } @Test @@ -90,8 +72,7 @@ public void emptyArray() { long[] expectedSorted = new long[] {}; - ArrayGroupSort sorter = new ArrayGroupSort(2, 4); - sorter.sort(data); + ArrayGroupSort.sort(data); assertArrayEquals(expectedSorted, data); } @@ -101,8 +82,7 @@ public void singleItem() { long[] data = new long[] { 1, 2, 3, 4 }; long[] expectedSorted = new long[] { 1, 2, 3, 4 }; - ArrayGroupSort sorter = new ArrayGroupSort(2, 4); - sorter.sort(data); + ArrayGroupSort.sort(data); assertArrayEquals(expectedSorted, data); } @@ -112,8 +92,7 @@ public void twoItems() { long[] data = new long[] { 1, 2, 3, 4, 1, 1, 5, 5 }; long[] expectedSorted = new long[] { 1, 1, 5, 5, 1, 2, 3, 4 }; - ArrayGroupSort sorter = new ArrayGroupSort(2, 4); - sorter.sort(data); + ArrayGroupSort.sort(data); assertArrayEquals(expectedSorted, data); } @@ -123,8 +102,7 @@ public void threeItems() { long[] data = new long[] { 1, 2, 3, 4, 1, 1, 5, 5, 1, 0, 2, 1 }; long[] expectedSorted = new long[] { 1, 0, 2, 1, 1, 1, 5, 5, 1, 2, 3, 4 }; - ArrayGroupSort sorter = new ArrayGroupSort(2, 4); - sorter.sort(data); + ArrayGroupSort.sort(data); assertArrayEquals(expectedSorted, data); } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/ConversionRollbackTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/ConversionRollbackTest.java index bfd7a4d712f..d7034a339c1 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/ConversionRollbackTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/ConversionRollbackTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -22,17 +22,14 @@ import com.google.common.collect.Lists; import com.google.common.collect.Sets; - import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; - +import io.netty.buffer.UnpooledByteBufAllocator; import java.io.File; import java.io.IOException; import java.util.Set; - import lombok.extern.slf4j.Slf4j; - -import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.BookieImpl; import org.apache.bookkeeper.bookie.BookieShell; import org.apache.bookkeeper.bookie.CheckpointSource; import org.apache.bookkeeper.bookie.CheckpointSource.Checkpoint; @@ -82,8 +79,8 @@ public void convertFromDbStorageToInterleaved() throws Exception { File tmpDir = File.createTempFile("bkTest", ".dir"); tmpDir.delete(); tmpDir.mkdir(); - File curDir = Bookie.getCurrentDirectory(tmpDir); - Bookie.checkDirectoryStructure(curDir); + File curDir = BookieImpl.getCurrentDirectory(tmpDir); + BookieImpl.checkDirectoryStructure(curDir); log.info("Using temp directory: {}", tmpDir); @@ -93,8 +90,10 @@ public void convertFromDbStorageToInterleaved() throws Exception { new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); DbLedgerStorage dbStorage = new DbLedgerStorage(); - dbStorage.initialize(conf, null, ledgerDirsManager, ledgerDirsManager, null, checkpointSource, checkpointer, - NullStatsLogger.INSTANCE); + dbStorage.initialize(conf, null, ledgerDirsManager, ledgerDirsManager, + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT); + dbStorage.setCheckpointer(checkpointer); + dbStorage.setCheckpointSource(checkpointSource); // Insert some ledger & entries in the dbStorage for (long ledgerId = 0; ledgerId < 5; ledgerId++) { @@ -124,7 +123,9 @@ public void convertFromDbStorageToInterleaved() throws Exception { // Verify that interleaved storage index has the same entries InterleavedLedgerStorage interleavedStorage = new InterleavedLedgerStorage(); interleavedStorage.initialize(conf, null, ledgerDirsManager, ledgerDirsManager, - null, checkpointSource, checkpointer, NullStatsLogger.INSTANCE); + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT); + interleavedStorage.setCheckpointSource(checkpointSource); + interleavedStorage.setCheckpointer(checkpointer); Set ledgers = Sets.newTreeSet(interleavedStorage.getActiveLedgersInRange(0, Long.MAX_VALUE)); Assert.assertEquals(Sets.newTreeSet(Lists.newArrayList(0L, 1L, 2L, 3L, 4L)), ledgers); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/ConversionTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/ConversionTest.java index b2afe4c6d78..2e6095dc115 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/ConversionTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/ConversionTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -22,16 +22,14 @@ import com.google.common.collect.Lists; import com.google.common.collect.Sets; - import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; - +import io.netty.buffer.UnpooledByteBufAllocator; import java.io.File; import java.io.IOException; import java.util.Set; - -import org.apache.bookkeeper.bookie.Bookie; import org.apache.bookkeeper.bookie.Bookie.NoLedgerException; +import org.apache.bookkeeper.bookie.BookieImpl; import org.apache.bookkeeper.bookie.BookieShell; import org.apache.bookkeeper.bookie.CheckpointSource; import org.apache.bookkeeper.bookie.CheckpointSource.Checkpoint; @@ -79,8 +77,8 @@ public void test() throws Exception { File tmpDir = File.createTempFile("bkTest", ".dir"); tmpDir.delete(); tmpDir.mkdir(); - File curDir = Bookie.getCurrentDirectory(tmpDir); - Bookie.checkDirectoryStructure(curDir); + File curDir = BookieImpl.getCurrentDirectory(tmpDir); + BookieImpl.checkDirectoryStructure(curDir); System.out.println(tmpDir); @@ -91,7 +89,9 @@ public void test() throws Exception { InterleavedLedgerStorage interleavedStorage = new InterleavedLedgerStorage(); interleavedStorage.initialize(conf, null, ledgerDirsManager, ledgerDirsManager, - null, checkpointSource, checkpointer, NullStatsLogger.INSTANCE); + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT); + interleavedStorage.setCheckpointSource(checkpointSource); + interleavedStorage.setCheckpointer(checkpointer); // Insert some ledger & entries in the interleaved storage for (long ledgerId = 0; ledgerId < 5; ledgerId++) { @@ -121,11 +121,16 @@ public void test() throws Exception { // Verify that db index has the same entries DbLedgerStorage dbStorage = new DbLedgerStorage(); dbStorage.initialize(conf, null, ledgerDirsManager, ledgerDirsManager, - null, checkpointSource, checkpointer, NullStatsLogger.INSTANCE); + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT); + dbStorage.setCheckpointer(checkpointer); + dbStorage.setCheckpointSource(checkpointSource); interleavedStorage = new InterleavedLedgerStorage(); interleavedStorage.initialize(conf, null, ledgerDirsManager, - ledgerDirsManager, null, checkpointSource, checkpointer, NullStatsLogger.INSTANCE); + ledgerDirsManager, NullStatsLogger.INSTANCE, + UnpooledByteBufAllocator.DEFAULT); + interleavedStorage.setCheckpointSource(checkpointSource); + interleavedStorage.setCheckpointer(checkpointer); Set ledgers = Sets.newTreeSet(dbStorage.getActiveLedgersInRange(0, Long.MAX_VALUE)); Assert.assertEquals(Sets.newTreeSet(Lists.newArrayList(0L, 1L, 2L, 3L, 4L)), ledgers); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageBookieTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageBookieTest.java index cfc74474174..4e31d8eeeb0 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageBookieTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageBookieTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -28,7 +28,6 @@ import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.junit.Test; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,6 +42,12 @@ public DbLedgerStorageBookieTest() { baseConf.setLedgerStorageClass(DbLedgerStorage.class.getName()); baseConf.setFlushInterval(60000); baseConf.setGcWaitTime(60000); + + // Leave it empty to pickup default + baseConf.setProperty(DbLedgerStorage.WRITE_CACHE_MAX_SIZE_MB, ""); + + // Configure explicitly with a int object + baseConf.setProperty(DbLedgerStorage.READ_AHEAD_CACHE_MAX_SIZE_MB, 16); } @Test diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageIndexDirTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageIndexDirTest.java new file mode 100644 index 00000000000..4cefc726b14 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageIndexDirTest.java @@ -0,0 +1,263 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.ldb; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import com.google.common.collect.Lists; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import java.io.File; +import java.io.FilenameFilter; +import java.io.IOException; +import java.util.Arrays; +import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.TestBookieImpl; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.proto.BookieProtocol; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Unit test for {@link DbLedgerStorage}. + */ +public class DbLedgerStorageIndexDirTest { + + private DbLedgerStorage storage; + private File tmpLedgerDir; + private File tmpIndexDir; + private static final String LOCATION_INDEX_SUB_PATH = "locations"; + private static final String METADATA_INDEX_SUB_PATH = "ledgers"; + + @Before + public void setup() throws Exception { + tmpLedgerDir = File.createTempFile("ledgerDir", ".dir"); + tmpLedgerDir.delete(); + tmpLedgerDir.mkdir(); + File curLedgerDir = BookieImpl.getCurrentDirectory(tmpLedgerDir); + BookieImpl.checkDirectoryStructure(curLedgerDir); + + tmpIndexDir = File.createTempFile("indexDir", ".dir"); + tmpIndexDir.delete(); + tmpIndexDir.mkdir(); + File curIndexDir = BookieImpl.getCurrentDirectory(tmpIndexDir); + BookieImpl.checkDirectoryStructure(curIndexDir); + + int gcWaitTime = 1000; + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setGcWaitTime(gcWaitTime); + /** the testcase cover specify indexDir for the class {@link SingleDirectoryDbLedgerStorage} */ + conf.setLedgerStorageClass(DbLedgerStorage.class.getName()); + conf.setProperty(DbLedgerStorage.WRITE_CACHE_MAX_SIZE_MB, 1); + conf.setProperty(DbLedgerStorage.MAX_THROTTLE_TIME_MILLIS, 1000); + conf.setLedgerDirNames(new String[]{tmpLedgerDir.toString()}); + conf.setIndexDirName(new String[]{tmpIndexDir.toString()}); + Bookie bookie = new TestBookieImpl(conf); + + storage = (DbLedgerStorage) bookie.getLedgerStorage(); + } + + @After + public void teardown() throws Exception { + storage.shutdown(); + tmpLedgerDir.delete(); + tmpIndexDir.delete(); + } + + public boolean hasIndexStructure(File tmpDir) { + File indexParentDir = BookieImpl.getCurrentDirectory(tmpDir); + String[] indexSubPaths = indexParentDir.list(new FilenameFilter() { + @Override + public boolean accept(File dir, String name) { + if (LOCATION_INDEX_SUB_PATH.equals(name) || METADATA_INDEX_SUB_PATH.equals(name)) { + return true; + } + return false; + } + }); + + if (indexSubPaths.length == 0) { + return false; + } + long hasIndexPathCount = Arrays.stream(indexSubPaths).filter(isp -> { + String[] indexFiles = new File(indexParentDir, isp).list(new FilenameFilter() { + @Override + public boolean accept(File dir, String name) { + if ("LOCK".equals(name) || "IDENTITY".equals(name) || "CURRENT".equals(name)) { + return true; + } + return false; + } + }); + if (indexFiles.length == 3) { + return true; + } + return false; + }).count(); + + if (hasIndexPathCount == indexSubPaths.length) { + return true; + } + return false; + } + + @Test + public void checkIndexNotExistsInLedgerDirStructure() { + // old logic bugfix + assertEquals(false, hasIndexStructure(tmpLedgerDir)); + } + + @Test + public void checkIndexDirectoryStructure() { + // index new logic + assertEquals(true, hasIndexStructure(tmpIndexDir)); + } + + @Test + public void simpleRegressionTest() throws Exception { + assertEquals(false, storage.ledgerExists(3)); + try { + storage.isFenced(3); + fail("should have failed"); + } catch (Bookie.NoLedgerException nle) { + // OK + } + assertEquals(false, storage.ledgerExists(3)); + try { + storage.setFenced(3); + fail("should have failed"); + } catch (Bookie.NoLedgerException nle) { + // OK + } + storage.setMasterKey(3, "key".getBytes()); + try { + storage.setMasterKey(3, "other-key".getBytes()); + fail("should have failed"); + } catch (IOException ioe) { + assertTrue(ioe.getCause() instanceof BookieException.BookieIllegalOpException); + } + // setting the same key is NOOP + storage.setMasterKey(3, "key".getBytes()); + assertEquals(true, storage.ledgerExists(3)); + assertEquals(true, storage.setFenced(3)); + assertEquals(true, storage.isFenced(3)); + assertEquals(false, storage.setFenced(3)); + + storage.setMasterKey(4, "key".getBytes()); + assertEquals(false, storage.isFenced(4)); + assertEquals(true, storage.ledgerExists(4)); + + assertEquals("key", new String(storage.readMasterKey(4))); + + assertEquals(Lists.newArrayList(4L, 3L), Lists.newArrayList(storage.getActiveLedgersInRange(0, 100))); + assertEquals(Lists.newArrayList(4L, 3L), Lists.newArrayList(storage.getActiveLedgersInRange(3, 100))); + assertEquals(Lists.newArrayList(3L), Lists.newArrayList(storage.getActiveLedgersInRange(0, 4))); + + // Add / read entries + ByteBuf entry = Unpooled.buffer(1024); + entry.writeLong(4); // ledger id + entry.writeLong(1); // entry id + entry.writeLong(0); // lac + entry.writeBytes("entry-1".getBytes()); + + assertEquals(false, ((DbLedgerStorage) storage).isFlushRequired()); + + assertEquals(1, storage.addEntry(entry)); + + assertEquals(true, ((DbLedgerStorage) storage).isFlushRequired()); + + // Read from write cache + assertTrue(storage.entryExists(4, 1)); + ByteBuf res = storage.getEntry(4, 1); + assertEquals(entry, res); + + storage.flush(); + + assertEquals(false, ((DbLedgerStorage) storage).isFlushRequired()); + + // Read from db + assertTrue(storage.entryExists(4, 1)); + res = storage.getEntry(4, 1); + assertEquals(entry, res); + + try { + storage.getEntry(4, 2); + fail("Should have thrown exception"); + } catch (Bookie.NoEntryException e) { + // ok + } + + ByteBuf entry2 = Unpooled.buffer(1024); + entry2.writeLong(4); // ledger id + entry2.writeLong(2); // entry id + entry2.writeLong(1); // lac + entry2.writeBytes("entry-2".getBytes()); + + storage.addEntry(entry2); + + // Read last entry in ledger + res = storage.getEntry(4, BookieProtocol.LAST_ADD_CONFIRMED); + assertEquals(entry2, res); + + // Read last add confirmed in ledger + assertEquals(1L, storage.getLastAddConfirmed(4)); + + ByteBuf entry3 = Unpooled.buffer(1024); + entry3.writeLong(4); // ledger id + entry3.writeLong(3); // entry id + entry3.writeLong(2); // lac + entry3.writeBytes("entry-3".getBytes()); + storage.addEntry(entry3); + + ByteBuf entry4 = Unpooled.buffer(1024); + entry4.writeLong(4); // ledger id + entry4.writeLong(4); // entry id + entry4.writeLong(3); // lac + entry4.writeBytes("entry-4".getBytes()); + storage.addEntry(entry4); + + res = storage.getEntry(4, 4); + assertEquals(entry4, res); + + assertEquals(3, storage.getLastAddConfirmed(4)); + + // Delete + assertEquals(true, storage.ledgerExists(4)); + storage.deleteLedger(4); + assertEquals(false, storage.ledgerExists(4)); + + // remove entries for ledger 4 from cache + storage.flush(); + + try { + storage.getEntry(4, 4); + fail("Should have thrown exception since the ledger was deleted"); + } catch (Bookie.NoLedgerException e) { + // ok + } + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageReadCacheTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageReadCacheTest.java new file mode 100644 index 00000000000..81ef7f9495c --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageReadCacheTest.java @@ -0,0 +1,368 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.ldb; + +import static org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorage.READ_AHEAD_CACHE_BATCH_BYTES_SIZE; +import static org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorage.READ_AHEAD_CACHE_BATCH_SIZE; +import static org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorage.READ_AHEAD_CACHE_MAX_SIZE_MB; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import java.io.File; +import java.util.List; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.DefaultEntryLogger; +import org.apache.bookkeeper.bookie.TestBookieImpl; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.test.TestStatsProvider; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Unit test for {@link DbLedgerStorage}. + */ +public class DbLedgerStorageReadCacheTest { + private static final Logger LOGGER = LoggerFactory.getLogger(DbLedgerStorageReadCacheTest.class); + + @Test + public void chargeReadAheadCacheRegressionTest() { + TestDB testDB = new TestDB(); + try { + long readAheadCacheMaxSizeMb = 16L; + int readAheadCacheBatchSize = 1024; + long readAheadCacheBatchBytesSize = -1; + setup(testDB, readAheadCacheMaxSizeMb, readAheadCacheBatchSize, readAheadCacheBatchBytesSize); + SingleDirectoryDbLedgerStorage sdb = testDB.getStorage().getLedgerStorageList().get(0); + /** + * case1: currentReadAheadCount < readAheadCacheBatchSize + * currentReadAheadBytes < maxReadAheadBytesSize + * result: true + */ + int currentReadAheadCount = 1; + long currentReadAheadBytes = 1; + assertTrue(sdb.chargeReadAheadCache(currentReadAheadCount, currentReadAheadBytes)); + + /** + * case2: currentReadAheadCount > readAheadCacheBatchSize + * currentReadAheadBytes < maxReadAheadBytesSize + * result: false + */ + currentReadAheadCount = readAheadCacheBatchSize + 1; + currentReadAheadBytes = 1; + assertFalse(sdb.chargeReadAheadCache(currentReadAheadCount, currentReadAheadBytes)); + + /** + * case3: currentReadAheadCount < readAheadCacheBatchSize + * currentReadAheadBytes > maxReadAheadBytesSize + * result: false + */ + currentReadAheadCount = 1; + currentReadAheadBytes = readAheadCacheMaxSizeMb / 2 * 1024 * 1024 + 1; + assertFalse(sdb.chargeReadAheadCache(currentReadAheadCount, currentReadAheadBytes)); + } catch (Throwable e) { + LOGGER.error("readAheadCacheBatchSizeUnitTest run error", e); + } finally { + teardown(testDB.getStorage(), testDB.getTmpDir()); + } + } + + @Test + public void chargeReadAheadCacheUnitTest() { + TestDB testDB = new TestDB(); + try { + long readAheadCacheMaxSizeMb = 16L; + int readAheadCacheBatchSize = 1024; + long readAheadCacheBatchBytesSize = 2 * 1024 * 1024; + setup(testDB, readAheadCacheMaxSizeMb, readAheadCacheBatchSize, readAheadCacheBatchBytesSize); + SingleDirectoryDbLedgerStorage sdb = testDB.getStorage().getLedgerStorageList().get(0); + /** + * case1: currentReadAheadCount < readAheadCacheBatchSize + * currentReadAheadBytes < readAheadCacheBatchBytesSize + * currentReadAheadBytes < readCacheMaxSize + * result: true + */ + int currentReadAheadCount = 1; + long currentReadAheadBytes = 1; + assertTrue(sdb.chargeReadAheadCache(currentReadAheadCount, currentReadAheadBytes)); + + /** + * case2: currentReadAheadCount > readAheadCacheBatchSize + * currentReadAheadBytes < readAheadCacheBatchBytesSize + * currentReadAheadBytes < readCacheMaxSize + * result: false + */ + currentReadAheadCount = readAheadCacheBatchSize + 1; + currentReadAheadBytes = 1; + assertFalse(sdb.chargeReadAheadCache(currentReadAheadCount, currentReadAheadBytes)); + + /** + * case3: currentReadAheadCount < readAheadCacheBatchSize + * currentReadAheadBytes > readAheadCacheBatchBytesSize + * currentReadAheadBytes < readCacheMaxSize + * result: false + */ + currentReadAheadCount = 1; + currentReadAheadBytes = readAheadCacheBatchBytesSize + 1; + assertFalse(sdb.chargeReadAheadCache(currentReadAheadCount, currentReadAheadBytes)); + } catch (Throwable e) { + LOGGER.error("readAheadCacheBatchSizeUnitTest run error", e); + } finally { + teardown(testDB.getStorage(), testDB.getTmpDir()); + } + } + + @Test + public void compareDiffReadAheadPerfTest() { + /** + * case1(read ahead cache by limit bytes size): + * config: readAheadCacheMaxSizeMb = 2 * 8; + * readAheadCacheBatchSize = 1024; + * readAheadCacheBatchBytesSize = 2 * 1024 * 1024; + * case content: + * LedgerId:0, read 1024 pieces of entry,each piece of entry is 10KB + * LedgerId:1, read 1024 pieces of entry,each piece of entry is 10KB + * LedgerId:2, read 1024 pieces of entry,each piece of entry is 10KB + * LedgerId:3, read 1024 pieces of entry,each piece of entry is 10KB + */ + CacheResult cacheBatchBytesSizeResult = readAheadCacheBatchBytesSize(); + + /** + * case2(read ahead cache by limit count): + * config: readAheadCacheMaxSizeMb = 2 * 8; + * readAheadCacheBatchSize = 1024; + * case content: + * LedgerId:0, read 1024 pieces of entry,each piece of entry is 10KB + * LedgerId:1, read 1024 pieces of entry,each piece of entry is 10KB + * LedgerId:2, read 1024 pieces of entry,each piece of entry is 10KB + * LedgerId:3, read 1024 pieces of entry,each piece of entry is 10KB + */ + CacheResult cacheBatchSizeResult = readAheadCacheBatchSize(); + + /** + * result: case1(read ahead cache by limit bytes size) get less cachemiss, + * it is suitable for large messages, reduce the pollution of readAhead large messages to readCache + */ + assertEquals(8, cacheBatchBytesSizeResult.getCacheMissCount()); + assertEquals(132, cacheBatchSizeResult.getCacheMissCount()); + assertTrue(cacheBatchBytesSizeResult.getCacheMissCount() < cacheBatchSizeResult.getCacheMissCount()); + assertEquals( + cacheBatchBytesSizeResult.getCacheMissCount() + cacheBatchBytesSizeResult.getCacheHitCount(), + cacheBatchSizeResult.getCacheMissCount() + cacheBatchSizeResult.getCacheHitCount()); + } + + public void setup(TestDB testDB, long readAheadCacheMaxSizeMb, + int readAheadCacheBatchSize, long readAheadCacheBatchBytesSize) throws Exception { + File tmpDir = File.createTempFile("bkTest", ".dir"); + tmpDir.delete(); + tmpDir.mkdir(); + File curDir = BookieImpl.getCurrentDirectory(tmpDir); + BookieImpl.checkDirectoryStructure(curDir); + + int gcWaitTime = 1000; + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setGcWaitTime(gcWaitTime); + conf.setLedgerStorageClass(DbLedgerStorage.class.getName()); + conf.setLedgerDirNames(new String[]{tmpDir.toString()}); + if (readAheadCacheMaxSizeMb > 0) { + conf.setProperty(READ_AHEAD_CACHE_MAX_SIZE_MB, readAheadCacheMaxSizeMb); + } + if (readAheadCacheBatchSize > 0) { + conf.setProperty(READ_AHEAD_CACHE_BATCH_SIZE, readAheadCacheBatchSize); + } + if (readAheadCacheBatchBytesSize > 0) { + conf.setProperty(READ_AHEAD_CACHE_BATCH_BYTES_SIZE, readAheadCacheBatchBytesSize); + } + TestStatsProvider.TestStatsLogger statsLogger = new TestStatsProvider().getStatsLogger("test"); + BookieImpl bookie = new TestBookieImpl(new TestBookieImpl.ResourceBuilder(conf).build(statsLogger), + statsLogger); + + DbLedgerStorage storage = (DbLedgerStorage) bookie.getLedgerStorage(); + + storage.getLedgerStorageList().forEach(singleDirectoryDbLedgerStorage -> { + assertTrue(singleDirectoryDbLedgerStorage.getEntryLogger() instanceof DefaultEntryLogger); + }); + testDB.setStorage(storage); + testDB.setTmpDir(tmpDir); + } + + public void teardown(DbLedgerStorage storage, File tmpDir) { + if (storage != null) { + try { + storage.shutdown(); + } catch (InterruptedException e) { + LOGGER.error("storage.shutdown has error", e); + } + } + if (tmpDir != null) { + tmpDir.delete(); + } + } + + private void addEntries(DbLedgerStorage storage, long minLedgerId, long maxLedgerId, + long minEntryId, long maxEntryId) throws Exception { + // Add entries + for (long lid = minLedgerId; lid < maxLedgerId; lid++) { + long lac = 0; + for (long eid = minEntryId; eid < maxEntryId; eid++) { + ByteBuf entry = Unpooled.buffer(1024); + entry.writeLong(lid); // ledger id + entry.writeLong(eid); // entry id + entry.writeLong(lac); // lac + entry.writeBytes((get4KbMsg()).getBytes()); + assertEquals(eid, storage.addEntry(entry)); + lac++; + } + } + } + + private String get4KbMsg() { + StringBuffer buffer = new StringBuffer(); + for (int i = 0; i < 1024; i++) { + buffer.append("1234"); + } + assertEquals(4 * 1024, buffer.toString().length()); + return buffer.toString(); + } + + private CacheResult readAheadCacheBatchBytesSize() { + Long cacheMissCount; + TestDB testDB = new TestDB(); + try { + long readAheadCacheMaxSizeMb = 2 * 8L; + int readAheadCacheBatchSize = 1024; + long readAheadCacheBatchBytesSize = 2 * 1024 * 1024; + long minEntryId = 0; + long maxEntryId = 1024; + + setup(testDB, readAheadCacheMaxSizeMb, readAheadCacheBatchSize, readAheadCacheBatchBytesSize); + addEntries(testDB.getStorage(), 0, 4, minEntryId, maxEntryId); + + testDB.getStorage().flush(); + assertEquals(false, testDB.getStorage().isFlushRequired()); + // Read from db + for (long eid = minEntryId; eid < maxEntryId / 2; eid++) { + testDB.getStorage().getEntry(0, eid); + testDB.getStorage().getEntry(1, eid); + testDB.getStorage().getEntry(2, eid); + testDB.getStorage().getEntry(3, eid); + } + List ledgerStorageList = testDB.getStorage().getLedgerStorageList(); + DbLedgerStorageStats ledgerStats = ledgerStorageList.get(0).getDbLedgerStorageStats(); + cacheMissCount = ledgerStats.getReadCacheMissCounter().get(); + Long cacheHitCount = ledgerStats.getReadCacheHitCounter().get(); + LOGGER.info("simple1.cacheMissCount={},cacheHitCount={}", cacheMissCount, cacheHitCount); + return new CacheResult(cacheMissCount, cacheHitCount); + } catch (Throwable e) { + LOGGER.error("test case run error", e); + return new CacheResult(0, 0); + } finally { + teardown(testDB.getStorage(), testDB.getTmpDir()); + } + } + + public CacheResult readAheadCacheBatchSize() { + Long cacheMissCount; + TestDB testDB = new TestDB(); + try { + long readAheadCacheMaxSizeMb = 2 * 8L; + int readAheadCacheBatchSize = 1024; + long readAheadCacheBatchBytesSize = -1; + long minEntryId = 0; + long maxEntryId = 1024; + + setup(testDB, readAheadCacheMaxSizeMb, readAheadCacheBatchSize, readAheadCacheBatchBytesSize); + addEntries(testDB.getStorage(), 0, 4, minEntryId, maxEntryId); + + testDB.getStorage().flush(); + assertEquals(false, testDB.getStorage().isFlushRequired()); + // Read from db + for (long eid = minEntryId; eid < maxEntryId / 2; eid++) { + testDB.getStorage().getEntry(0, eid); + testDB.getStorage().getEntry(1, eid); + testDB.getStorage().getEntry(2, eid); + testDB.getStorage().getEntry(3, eid); + } + List ledgerStorageList = testDB.getStorage().getLedgerStorageList(); + DbLedgerStorageStats ledgerStats = ledgerStorageList.get(0).getDbLedgerStorageStats(); + cacheMissCount = ledgerStats.getReadCacheMissCounter().get(); + Long cacheHitCount = ledgerStats.getReadCacheHitCounter().get(); + LOGGER.info("simple2.cacheMissCount={},cacheHitCount={}", cacheMissCount, cacheHitCount); + return new CacheResult(cacheMissCount, cacheHitCount); + } catch (Throwable e) { + LOGGER.error("test case run error", e); + return new CacheResult(0, 0); + } finally { + teardown(testDB.getStorage(), testDB.getTmpDir()); + } + } + + private class TestDB { + private DbLedgerStorage storage; + private File tmpDir; + + public DbLedgerStorage getStorage() { + return storage; + } + + public void setStorage(DbLedgerStorage storage) { + this.storage = storage; + } + + public File getTmpDir() { + return tmpDir; + } + + public void setTmpDir(File tmpDir) { + this.tmpDir = tmpDir; + } + } + + private class CacheResult { + private long cacheMissCount; + private long cacheHitCount; + + private CacheResult(long cacheMissCount, long cacheHitCount) { + this.cacheMissCount = cacheMissCount; + this.cacheHitCount = cacheHitCount; + } + + public long getCacheMissCount() { + return cacheMissCount; + } + + public void setCacheMissCount(long cacheMissCount) { + this.cacheMissCount = cacheMissCount; + } + + public long getCacheHitCount() { + return cacheHitCount; + } + + public void setCacheHitCount(long cacheHitCount) { + this.cacheHitCount = cacheHitCount; + } + } +} \ No newline at end of file diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java index fb0a97a08cf..65f11e5d6a3 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,55 +21,73 @@ package org.apache.bookkeeper.bookie.storage.ldb; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import com.google.common.collect.Lists; - import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBufUtil; import io.netty.buffer.Unpooled; - +import io.netty.util.ReferenceCountUtil; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; +import java.nio.ByteBuffer; import java.util.List; - import org.apache.bookkeeper.bookie.Bookie; import org.apache.bookkeeper.bookie.Bookie.NoEntryException; import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.CheckpointSource; +import org.apache.bookkeeper.bookie.CheckpointSourceList; +import org.apache.bookkeeper.bookie.DefaultEntryLogger; import org.apache.bookkeeper.bookie.EntryLocation; -import org.apache.bookkeeper.bookie.EntryLogger; +import org.apache.bookkeeper.bookie.LedgerDirsManager; +import org.apache.bookkeeper.bookie.LedgerStorage; +import org.apache.bookkeeper.bookie.LogMark; +import org.apache.bookkeeper.bookie.TestBookieImpl; +import org.apache.bookkeeper.bookie.storage.EntryLogger; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.conf.TestBKConfiguration; import org.apache.bookkeeper.proto.BookieProtocol; import org.junit.After; import org.junit.Before; import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Unit test for {@link DbLedgerStorage}. */ public class DbLedgerStorageTest { - - private DbLedgerStorage storage; - private File tmpDir; + private static final Logger log = LoggerFactory.getLogger(DbLedgerStorageTest.class); + protected DbLedgerStorage storage; + protected File tmpDir; + protected LedgerDirsManager ledgerDirsManager; + protected ServerConfiguration conf; @Before public void setup() throws Exception { tmpDir = File.createTempFile("bkTest", ".dir"); tmpDir.delete(); tmpDir.mkdir(); - File curDir = Bookie.getCurrentDirectory(tmpDir); - Bookie.checkDirectoryStructure(curDir); + File curDir = BookieImpl.getCurrentDirectory(tmpDir); + BookieImpl.checkDirectoryStructure(curDir); int gcWaitTime = 1000; - ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf = TestBKConfiguration.newServerConfiguration(); conf.setGcWaitTime(gcWaitTime); conf.setLedgerStorageClass(DbLedgerStorage.class.getName()); conf.setLedgerDirNames(new String[] { tmpDir.toString() }); - Bookie bookie = new Bookie(conf); + BookieImpl bookie = new TestBookieImpl(conf); + ledgerDirsManager = bookie.getLedgerDirsManager(); storage = (DbLedgerStorage) bookie.getLedgerStorage(); + + storage.getLedgerStorageList().forEach(singleDirectoryDbLedgerStorage -> { + assertTrue(singleDirectoryDbLedgerStorage.getEntryLogger() instanceof DefaultEntryLogger); + }); } @After @@ -132,6 +150,7 @@ public void simple() throws Exception { assertEquals(true, ((DbLedgerStorage) storage).isFlushRequired()); // Read from write cache + assertTrue(storage.entryExists(4, 1)); ByteBuf res = storage.getEntry(4, 1); assertEquals(entry, res); @@ -140,6 +159,7 @@ public void simple() throws Exception { assertEquals(false, ((DbLedgerStorage) storage).isFlushRequired()); // Read from db + assertTrue(storage.entryExists(4, 1)); res = storage.getEntry(4, 1); assertEquals(entry, res); @@ -189,22 +209,13 @@ public void simple() throws Exception { storage.deleteLedger(4); assertEquals(false, storage.ledgerExists(4)); - // Should not throw exception event if the ledger was deleted - storage.getEntry(4, 4); - assertEquals(3, storage.getLastAddConfirmed(4)); - - storage.addEntry(Unpooled.wrappedBuffer(entry2)); - res = storage.getEntry(4, BookieProtocol.LAST_ADD_CONFIRMED); - assertEquals(entry4, res); - assertEquals(3, storage.getLastAddConfirmed(4)); - - // Get last entry from storage + // remove entries for ledger 4 from cache storage.flush(); try { storage.getEntry(4, 4); fail("Should have thrown exception since the ledger was deleted"); - } catch (NoEntryException e) { + } catch (Bookie.NoLedgerException e) { // ok } } @@ -219,6 +230,7 @@ public void testBookieCompaction() throws Exception { entry3.writeBytes("entry-3".getBytes()); storage.addEntry(entry3); + // Simulate bookie compaction SingleDirectoryDbLedgerStorage singleDirStorage = ((DbLedgerStorage) storage).getLedgerStorageList().get(0); EntryLogger entryLogger = singleDirStorage.getEntryLogger(); @@ -227,8 +239,10 @@ public void testBookieCompaction() throws Exception { newEntry3.writeLong(4); // ledger id newEntry3.writeLong(3); // entry id newEntry3.writeBytes("new-entry-3".getBytes()); - long location = entryLogger.addEntry(4L, newEntry3, false); + long location = entryLogger.addEntry(4L, newEntry3); + newEntry3.resetReaderIndex(); + storage.flush(); List locations = Lists.newArrayList(new EntryLocation(4, 3, location)); singleDirStorage.updateEntriesLocations(locations); @@ -245,11 +259,13 @@ public void doubleDirectory() throws Exception { File secondDir = new File(tmpDir, "dir2"); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); conf.setGcWaitTime(gcWaitTime); + conf.setProperty(DbLedgerStorage.WRITE_CACHE_MAX_SIZE_MB, 4); + conf.setProperty(DbLedgerStorage.READ_AHEAD_CACHE_MAX_SIZE_MB, 4); conf.setLedgerStorageClass(DbLedgerStorage.class.getName()); conf.setLedgerDirNames(new String[] { firstDir.getCanonicalPath(), secondDir.getCanonicalPath() }); // Should not fail - Bookie bookie = new Bookie(conf); + Bookie bookie = new TestBookieImpl(conf); assertEquals(2, ((DbLedgerStorage) bookie.getLedgerStorage()).getLedgerStorageList().size()); bookie.shutdown(); @@ -349,7 +365,7 @@ public void testEntriesOutOfOrderWithFlush() throws Exception { ByteBuf res = storage.getEntry(1, 2); assertEquals(entry2, res); - res.release(); + ReferenceCountUtil.release(res); storage.flush(); @@ -362,7 +378,7 @@ public void testEntriesOutOfOrderWithFlush() throws Exception { res = storage.getEntry(1, 2); assertEquals(entry2, res); - res.release(); + ReferenceCountUtil.release(res); ByteBuf entry1 = Unpooled.buffer(1024); entry1.writeLong(1); // ledger id @@ -373,21 +389,21 @@ public void testEntriesOutOfOrderWithFlush() throws Exception { res = storage.getEntry(1, 1); assertEquals(entry1, res); - res.release(); + ReferenceCountUtil.release(res); res = storage.getEntry(1, 2); assertEquals(entry2, res); - res.release(); + ReferenceCountUtil.release(res); storage.flush(); res = storage.getEntry(1, 1); assertEquals(entry1, res); - res.release(); + ReferenceCountUtil.release(res); res = storage.getEntry(1, 2); assertEquals(entry2, res); - res.release(); + ReferenceCountUtil.release(res); } @Test @@ -431,4 +447,379 @@ public void testAddEntriesAfterDelete() throws Exception { storage.flush(); } + + @Test + public void testLimboStateSucceedsWhenInLimboButHasEntry() throws Exception { + storage.setMasterKey(1, "foobar".getBytes()); + + ByteBuf entry0 = Unpooled.buffer(1024); + entry0.writeLong(1); // ledger id + entry0.writeLong(0); // entry id + entry0.writeBytes("entry-0".getBytes()); + + storage.addEntry(entry0); + storage.flush(); + storage.setLimboState(1); + + try { + storage.getEntry(1, 0); + } catch (BookieException.DataUnknownException e) { + fail("Should have been able to read entry"); + } + } + + @Test + public void testLimboStateThrowsInLimboWhenNoEntry() throws Exception { + storage.setMasterKey(1, "foobar".getBytes()); + + ByteBuf entry0 = Unpooled.buffer(1024); + entry0.writeLong(1); // ledger id + entry0.writeLong(1); // entry id + entry0.writeBytes("entry-0".getBytes()); + + storage.addEntry(entry0); + storage.flush(); + storage.setLimboState(1); + + try { + storage.getEntry(1, 1); + } catch (NoEntryException nee) { + fail("Shouldn't have seen NoEntryException"); + } catch (BookieException.DataUnknownException e) { + // expected + } + + storage.shutdown(); + Bookie restartedBookie = new TestBookieImpl(conf); + DbLedgerStorage restartedStorage = (DbLedgerStorage) restartedBookie.getLedgerStorage(); + try { + try { + restartedStorage.getEntry(1, 1); + } catch (NoEntryException nee) { + fail("Shouldn't have seen NoEntryException"); + } catch (BookieException.DataUnknownException e) { + // expected + } + } finally { + restartedStorage.shutdown(); + } + + storage = (DbLedgerStorage) new TestBookieImpl(conf).getLedgerStorage(); + } + + @Test + public void testLimboStateThrowsNoEntryExceptionWhenLimboCleared() throws Exception { + storage.setMasterKey(1, "foobar".getBytes()); + + ByteBuf entry0 = Unpooled.buffer(1024); + entry0.writeLong(1); // ledger id + entry0.writeLong(1); // entry id + entry0.writeBytes("entry-0".getBytes()); + + storage.addEntry(entry0); + storage.flush(); + storage.setLimboState(1); + + try { + storage.getEntry(1, 1); + } catch (NoEntryException nee) { + fail("Shouldn't have seen NoEntryException"); + } catch (BookieException.DataUnknownException e) { + // expected + } + + storage.clearLimboState(1); + try { + storage.getEntry(1, 1); + } catch (NoEntryException nee) { + // expected + } catch (BookieException.DataUnknownException e) { + fail("Should have seen NoEntryException"); + } + } + + @Test + public void testLimboStateSucceedsWhenFenced() throws Exception { + storage.setMasterKey(1, "foobar".getBytes()); + + ByteBuf entry0 = Unpooled.buffer(1024); + entry0.writeLong(1); // ledger id + entry0.writeLong(1); // entry id + entry0.writeBytes("entry-0".getBytes()); + + storage.addEntry(entry0); + storage.flush(); + storage.setFenced(1); + storage.setLimboState(1); + + try { + storage.isFenced(1); + } catch (IOException ioe) { + fail("Should have been able to get isFenced response"); + } + } + + @Test + public void testLimboStateThrowsInLimboWhenNotFenced() throws Exception { + storage.setMasterKey(1, "foobar".getBytes()); + + ByteBuf entry0 = Unpooled.buffer(1024); + entry0.writeLong(1); // ledger id + entry0.writeLong(1); // entry id + entry0.writeBytes("entry-0".getBytes()); + + storage.addEntry(entry0); + storage.flush(); + storage.setLimboState(1); + + try { + storage.isFenced(1); + fail("Shouldn't have been able to get isFenced response"); + } catch (BookieException.DataUnknownException e) { + // expected + } + } + + @Test + public void testHasEntry() throws Exception { + long ledgerId = 0xbeefee; + storage.setMasterKey(ledgerId, "foobar".getBytes()); + + ByteBuf entry0 = Unpooled.buffer(1024); + entry0.writeLong(ledgerId); // ledger id + entry0.writeLong(0); // entry id + entry0.writeBytes("entry-0".getBytes()); + + storage.addEntry(entry0); + + // should come from write cache + assertTrue(storage.entryExists(ledgerId, 0)); + assertFalse(storage.entryExists(ledgerId, 1)); + + storage.flush(); + + // should come from storage + assertTrue(storage.entryExists(ledgerId, 0)); + assertFalse(storage.entryExists(ledgerId, 1)); + + // pull entry into readcache + storage.getEntry(ledgerId, 0); + + // should come from read cache + assertTrue(storage.entryExists(ledgerId, 0)); + assertFalse(storage.entryExists(ledgerId, 1)); + } + + @Test + public void testStorageStateFlags() throws Exception { + assertTrue(storage.getStorageStateFlags().isEmpty()); + + storage.setStorageStateFlag(LedgerStorage.StorageState.NEEDS_INTEGRITY_CHECK); + assertTrue(storage.getStorageStateFlags() + .contains(LedgerStorage.StorageState.NEEDS_INTEGRITY_CHECK)); + + storage.shutdown(); + Bookie restartedBookie1 = new TestBookieImpl(conf); + DbLedgerStorage restartedStorage1 = (DbLedgerStorage) restartedBookie1.getLedgerStorage(); + try { + assertTrue(restartedStorage1.getStorageStateFlags() + .contains(LedgerStorage.StorageState.NEEDS_INTEGRITY_CHECK)); + restartedStorage1.clearStorageStateFlag(LedgerStorage.StorageState.NEEDS_INTEGRITY_CHECK); + + assertFalse(restartedStorage1.getStorageStateFlags() + .contains(LedgerStorage.StorageState.NEEDS_INTEGRITY_CHECK)); + + } finally { + restartedStorage1.shutdown(); + } + + Bookie restartedBookie2 = new TestBookieImpl(conf); + DbLedgerStorage restartedStorage2 = (DbLedgerStorage) restartedBookie2.getLedgerStorage(); + try { + assertFalse(restartedStorage2.getStorageStateFlags() + .contains(LedgerStorage.StorageState.NEEDS_INTEGRITY_CHECK)); + } finally { + restartedStorage2.shutdown(); + } + + storage = (DbLedgerStorage) new TestBookieImpl(conf).getLedgerStorage(); + } + + @Test + public void testMultiLedgerDirectoryCheckpoint() throws Exception { + int gcWaitTime = 1000; + File firstDir = new File(tmpDir, "dir1"); + File secondDir = new File(tmpDir, "dir2"); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setGcWaitTime(gcWaitTime); + conf.setProperty(DbLedgerStorage.WRITE_CACHE_MAX_SIZE_MB, 4); + conf.setProperty(DbLedgerStorage.READ_AHEAD_CACHE_MAX_SIZE_MB, 4); + conf.setLedgerStorageClass(DbLedgerStorage.class.getName()); + conf.setLedgerDirNames(new String[] { firstDir.getCanonicalPath(), secondDir.getCanonicalPath() }); + + BookieImpl bookie = new TestBookieImpl(conf); + ByteBuf entry1 = Unpooled.buffer(1024); + entry1.writeLong(1); // ledger id + entry1.writeLong(2); // entry id + entry1.writeBytes("entry-1".getBytes()); + + bookie.getLedgerStorage().addEntry(entry1); + // write one entry to first ledger directory and flush with logMark(1, 2), + // only the first ledger directory should have lastMark + bookie.getJournals().get(0).getLastLogMark().getCurMark().setLogMark(1, 2); + ((DbLedgerStorage) bookie.getLedgerStorage()).getLedgerStorageList().get(0).flush(); + + File firstDirMark = new File(firstDir + "/current", "lastMark"); + File secondDirMark = new File(secondDir + "/current", "lastMark"); + + // LedgerStorage flush won't trigger lastMark update due to two ledger directories configured + try { + readLogMark(firstDirMark); + readLogMark(secondDirMark); + fail(); + } catch (Exception e) { + // + } + + // write the second entry to second leger directory and flush with log(4, 5), + // the fist ledger directory's lastMark is (1, 2) and the second ledger directory's lastMark is (4, 5); + ByteBuf entry2 = Unpooled.buffer(1024); + entry2.writeLong(2); // ledger id + entry2.writeLong(1); // entry id + entry2.writeBytes("entry-2".getBytes()); + + bookie.getLedgerStorage().addEntry(entry2); + // write one entry to first ledger directory and flush with logMark(1, 2), + // only the first ledger directory should have lastMark + bookie.getJournals().get(0).getLastLogMark().getCurMark().setLogMark(4, 5); + ((DbLedgerStorage) bookie.getLedgerStorage()).getLedgerStorageList().get(1).flush(); + + // LedgerStorage flush won't trigger lastMark update due to two ledger directories configured + try { + readLogMark(firstDirMark); + readLogMark(secondDirMark); + fail(); + } catch (Exception e) { + // + } + + // The dbLedgerStorage flush also won't trigger lastMark update due to two ledger directories configured. + bookie.getLedgerStorage().flush(); + try { + readLogMark(firstDirMark); + readLogMark(secondDirMark); + fail(); + } catch (Exception e) { + // + } + + // trigger checkpoint simulate SyncThread do checkpoint. + CheckpointSource checkpointSource = new CheckpointSourceList(bookie.getJournals()); + bookie.getJournals().get(0).getLastLogMark().getCurMark().setLogMark(7, 8); + CheckpointSource.Checkpoint checkpoint = checkpointSource.newCheckpoint(); + checkpointSource.checkpointComplete(checkpoint, false); + + try { + LogMark firstLogMark = readLogMark(firstDirMark); + LogMark secondLogMark = readLogMark(secondDirMark); + assertEquals(7, firstLogMark.getLogFileId()); + assertEquals(8, firstLogMark.getLogFileOffset()); + assertEquals(7, secondLogMark.getLogFileId()); + assertEquals(8, secondLogMark.getLogFileOffset()); + } catch (Exception e) { + fail(); + } + + // test replay journal lastMark, to make sure we get the right LastMark position + bookie.getJournals().get(0).getLastLogMark().readLog(); + LogMark logMark = bookie.getJournals().get(0).getLastLogMark().getCurMark(); + assertEquals(7, logMark.getLogFileId()); + assertEquals(8, logMark.getLogFileOffset()); + } + + private LogMark readLogMark(File file) throws IOException { + byte[] buff = new byte[16]; + ByteBuffer bb = ByteBuffer.wrap(buff); + LogMark mark = new LogMark(); + try (FileInputStream fis = new FileInputStream(file)) { + int bytesRead = fis.read(buff); + if (bytesRead != 16) { + throw new IOException("Couldn't read enough bytes from lastMark." + + " Wanted " + 16 + ", got " + bytesRead); + } + } + bb.clear(); + mark.readLogMark(bb); + + return mark; + } + + @Test + public void testSingleLedgerDirectoryCheckpoint() throws Exception { + int gcWaitTime = 1000; + File ledgerDir = new File(tmpDir, "dir"); + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setGcWaitTime(gcWaitTime); + conf.setProperty(DbLedgerStorage.WRITE_CACHE_MAX_SIZE_MB, 4); + conf.setProperty(DbLedgerStorage.READ_AHEAD_CACHE_MAX_SIZE_MB, 4); + conf.setLedgerStorageClass(DbLedgerStorage.class.getName()); + conf.setLedgerDirNames(new String[] { ledgerDir.getCanonicalPath() }); + + BookieImpl bookie = new TestBookieImpl(conf); + ByteBuf entry1 = Unpooled.buffer(1024); + entry1.writeLong(1); // ledger id + entry1.writeLong(2); // entry id + entry1.writeBytes("entry-1".getBytes()); + bookie.getLedgerStorage().addEntry(entry1); + + bookie.getJournals().get(0).getLastLogMark().getCurMark().setLogMark(1, 2); + ((DbLedgerStorage) bookie.getLedgerStorage()).getLedgerStorageList().get(0).flush(); + + File ledgerDirMark = new File(ledgerDir + "/current", "lastMark"); + try { + LogMark logMark = readLogMark(ledgerDirMark); + assertEquals(1, logMark.getLogFileId()); + assertEquals(2, logMark.getLogFileOffset()); + } catch (Exception e) { + fail(); + } + + ByteBuf entry2 = Unpooled.buffer(1024); + entry2.writeLong(2); // ledger id + entry2.writeLong(1); // entry id + entry2.writeBytes("entry-2".getBytes()); + + bookie.getLedgerStorage().addEntry(entry2); + // write one entry to first ledger directory and flush with logMark(1, 2), + // only the first ledger directory should have lastMark + bookie.getJournals().get(0).getLastLogMark().getCurMark().setLogMark(4, 5); + + bookie.getLedgerStorage().flush(); + try { + LogMark logMark = readLogMark(ledgerDirMark); + assertEquals(4, logMark.getLogFileId()); + assertEquals(5, logMark.getLogFileOffset()); + } catch (Exception e) { + fail(); + } + + CheckpointSource checkpointSource = new CheckpointSourceList(bookie.getJournals()); + bookie.getJournals().get(0).getLastLogMark().getCurMark().setLogMark(7, 8); + CheckpointSource.Checkpoint checkpoint = checkpointSource.newCheckpoint(); + checkpointSource.checkpointComplete(checkpoint, false); + + try { + LogMark firstLogMark = readLogMark(ledgerDirMark); + assertEquals(7, firstLogMark.getLogFileId()); + assertEquals(8, firstLogMark.getLogFileOffset()); + } catch (Exception e) { + fail(); + } + + // test replay journal lastMark, to make sure we get the right LastMark position + bookie.getJournals().get(0).getLastLogMark().readLog(); + LogMark logMark = bookie.getJournals().get(0).getLastLogMark().getCurMark(); + assertEquals(7, logMark.getLogFileId()); + assertEquals(8, logMark.getLogFileOffset()); + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageWithDirectEntryLoggerTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageWithDirectEntryLoggerTest.java new file mode 100644 index 00000000000..76192cf2b40 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageWithDirectEntryLoggerTest.java @@ -0,0 +1,61 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.ldb; + +import static org.junit.Assert.assertTrue; + +import java.io.File; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.TestBookieImpl; +import org.apache.bookkeeper.bookie.storage.directentrylogger.DirectEntryLogger; +import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.junit.Before; + +/** + * Unit test for {@link DbLedgerStorage} with directIO entrylogger. + */ +public class DbLedgerStorageWithDirectEntryLoggerTest extends DbLedgerStorageTest { + + @Override + @Before + public void setup() throws Exception { + tmpDir = File.createTempFile("bkTest", ".dir"); + tmpDir.delete(); + tmpDir.mkdir(); + File curDir = BookieImpl.getCurrentDirectory(tmpDir); + BookieImpl.checkDirectoryStructure(curDir); + + int gcWaitTime = 1000; + conf = TestBKConfiguration.newServerConfiguration(); + conf.setGcWaitTime(gcWaitTime); + conf.setLedgerStorageClass(DbLedgerStorage.class.getName()); + conf.setLedgerDirNames(new String[] { tmpDir.toString() }); + conf.setProperty("dbStorage_directIOEntryLogger", true); + BookieImpl bookie = new TestBookieImpl(conf); + + ledgerDirsManager = bookie.getLedgerDirsManager(); + storage = (DbLedgerStorage) bookie.getLedgerStorage(); + + storage.getLedgerStorageList().forEach(singleDirectoryDbLedgerStorage -> { + assertTrue(singleDirectoryDbLedgerStorage.getEntryLogger() instanceof DirectEntryLogger); + }); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageWriteCacheTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageWriteCacheTest.java index 7a45ee764a0..102f7f5addc 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageWriteCacheTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageWriteCacheTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -24,18 +24,16 @@ import static org.junit.Assert.fail; import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; import io.netty.buffer.Unpooled; - import java.io.File; import java.io.IOException; -import java.util.concurrent.ScheduledExecutorService; - import org.apache.bookkeeper.bookie.Bookie; import org.apache.bookkeeper.bookie.BookieException.OperationRejectedException; -import org.apache.bookkeeper.bookie.CheckpointSource; -import org.apache.bookkeeper.bookie.Checkpointer; +import org.apache.bookkeeper.bookie.BookieImpl; import org.apache.bookkeeper.bookie.LedgerDirsManager; -import org.apache.bookkeeper.bookie.StateManager; +import org.apache.bookkeeper.bookie.TestBookieImpl; +import org.apache.bookkeeper.bookie.storage.EntryLogger; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.conf.TestBKConfiguration; import org.apache.bookkeeper.meta.LedgerManager; @@ -56,22 +54,25 @@ private static class MockedDbLedgerStorage extends DbLedgerStorage { @Override protected SingleDirectoryDbLedgerStorage newSingleDirectoryDbLedgerStorage(ServerConfiguration conf, - LedgerManager ledgerManager, LedgerDirsManager ledgerDirsManager, LedgerDirsManager indexDirsManager, - StateManager stateManager, CheckpointSource checkpointSource, Checkpointer checkpointer, - StatsLogger statsLogger, ScheduledExecutorService gcExecutor, long writeCacheSize, long readCacheSize) + LedgerManager ledgerManager, LedgerDirsManager ledgerDirsManager, LedgerDirsManager indexDirsManager, + EntryLogger entryLogger, StatsLogger statsLogger, + long writeCacheSize, long readCacheSize, int readAheadCacheBatchSize, long readAheadCacheBatchBytesSize) throws IOException { return new MockedSingleDirectoryDbLedgerStorage(conf, ledgerManager, ledgerDirsManager, indexDirsManager, - stateManager, checkpointSource, checkpointer, statsLogger, gcExecutor, writeCacheSize, - readCacheSize); + entryLogger, statsLogger, allocator, writeCacheSize, + readCacheSize, readAheadCacheBatchSize, readAheadCacheBatchBytesSize); } private static class MockedSingleDirectoryDbLedgerStorage extends SingleDirectoryDbLedgerStorage { public MockedSingleDirectoryDbLedgerStorage(ServerConfiguration conf, LedgerManager ledgerManager, - LedgerDirsManager ledgerDirsManager, LedgerDirsManager indexDirsManager, StateManager stateManager, - CheckpointSource checkpointSource, Checkpointer checkpointer, StatsLogger statsLogger, - ScheduledExecutorService gcExecutor, long writeCacheSize, long readCacheSize) throws IOException { - super(conf, ledgerManager, ledgerDirsManager, indexDirsManager, stateManager, checkpointSource, - checkpointer, statsLogger, gcExecutor, writeCacheSize, readCacheSize); + LedgerDirsManager ledgerDirsManager, LedgerDirsManager indexDirsManager, EntryLogger entryLogger, + StatsLogger statsLogger, + ByteBufAllocator allocator, long writeCacheSize, + long readCacheSize, int readAheadCacheBatchSize, long readAheadCacheBatchBytesSize) + throws IOException { + super(conf, ledgerManager, ledgerDirsManager, indexDirsManager, entryLogger, + statsLogger, allocator, writeCacheSize, readCacheSize, readAheadCacheBatchSize, + readAheadCacheBatchBytesSize); } @Override @@ -105,8 +106,8 @@ public void setup() throws Exception { tmpDir = File.createTempFile("bkTest", ".dir"); tmpDir.delete(); tmpDir.mkdir(); - File curDir = Bookie.getCurrentDirectory(tmpDir); - Bookie.checkDirectoryStructure(curDir); + File curDir = BookieImpl.getCurrentDirectory(tmpDir); + BookieImpl.checkDirectoryStructure(curDir); int gcWaitTime = 1000; ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); @@ -115,7 +116,7 @@ public void setup() throws Exception { conf.setProperty(DbLedgerStorage.WRITE_CACHE_MAX_SIZE_MB, 1); conf.setProperty(DbLedgerStorage.MAX_THROTTLE_TIME_MILLIS, 1000); conf.setLedgerDirNames(new String[] { tmpDir.toString() }); - Bookie bookie = new Bookie(conf); + Bookie bookie = new TestBookieImpl(conf); storage = (DbLedgerStorage) bookie.getLedgerStorage(); } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbReadLedgerIndexEntriesTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbReadLedgerIndexEntriesTest.java new file mode 100644 index 00000000000..585764a075d --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbReadLedgerIndexEntriesTest.java @@ -0,0 +1,151 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.ldb; + +import com.google.common.collect.Lists; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import io.netty.buffer.UnpooledByteBufAllocator; +import java.io.File; +import java.io.IOException; +import java.util.List; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.BlockingQueue; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.CheckpointSource; +import org.apache.bookkeeper.bookie.CheckpointSource.Checkpoint; +import org.apache.bookkeeper.bookie.Checkpointer; +import org.apache.bookkeeper.bookie.LedgerDirsManager; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.test.TmpDirs; +import org.apache.bookkeeper.util.DiskChecker; +import org.apache.commons.io.FileUtils; +import org.junit.Assert; +import org.junit.Test; + +/** + * Test for class {@link DbLedgerStorage#readLedgerIndexEntries}. + */ +public class DbReadLedgerIndexEntriesTest { + private static final int TEST_LEDGER_MIN_ID = 0; + private static final int TEST_LEDGER_MAX_ID = 5; + private static final int TEST_ENTRY_MIN_ID = 0; + private static final int TEST_ENTRY_MAX_ID = 10; + + CheckpointSource checkpointSource = new CheckpointSource() { + @Override + public Checkpoint newCheckpoint() { + return Checkpoint.MAX; + } + + @Override + public void checkpointComplete(Checkpoint checkpoint, boolean compact) throws IOException { + } + }; + + Checkpointer checkpointer = new Checkpointer() { + @Override + public void startCheckpoint(Checkpoint checkpoint) { + // No-op + } + + @Override + public void start() { + // no-op + } + }; + + protected final TmpDirs tmpDirs = new TmpDirs(); + + private String newDirectory() throws Exception { + File d = tmpDirs.createNew("bkTest", ".dir"); + d.delete(); + d.mkdir(); + File curDir = BookieImpl.getCurrentDirectory(d); + BookieImpl.checkDirectoryStructure(curDir); + return d.getPath(); + } + + @Test + public void testReadLedgerIndexEntries() throws Exception { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setLedgerDirNames(new String[]{newDirectory(), newDirectory()}); + conf.setIndexDirName(new String[]{newDirectory(), newDirectory()}); + conf.setLedgerStorageClass(DbLedgerStorage.class.getName()); + DiskChecker diskChecker = new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), diskChecker); + LedgerDirsManager indexDirsManager = new LedgerDirsManager(conf, conf.getIndexDirs(), diskChecker); + + DbLedgerStorage ledgerStorage = new DbLedgerStorage(); + ledgerStorage.initialize(conf, null, ledgerDirsManager, indexDirsManager, + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT); + ledgerStorage.setCheckpointer(checkpointer); + ledgerStorage.setCheckpointSource(checkpointSource); + + // Insert some ledger & entries in the storage + for (long ledgerId = TEST_LEDGER_MIN_ID; ledgerId <= TEST_LEDGER_MAX_ID; ledgerId++) { + ledgerStorage.setMasterKey(ledgerId, ("ledger-" + ledgerId).getBytes()); + ledgerStorage.setFenced(ledgerId); + + for (long entryId = TEST_ENTRY_MIN_ID; entryId <= TEST_ENTRY_MAX_ID; entryId++) { + ByteBuf entry = Unpooled.buffer(128); + entry.writeLong(ledgerId); + entry.writeLong(entryId); + entry.writeBytes(("entry-" + entryId).getBytes()); + + ledgerStorage.addEntry(entry); + } + } + + ledgerStorage.flush(); + ledgerStorage.shutdown(); + + // read ledger index entries + long ledgerId = TEST_LEDGER_MIN_ID; + try { + for (ledgerId = TEST_LEDGER_MIN_ID; ledgerId <= TEST_LEDGER_MAX_ID; ledgerId++) { + BlockingQueue entrys = new ArrayBlockingQueue<>(TEST_ENTRY_MAX_ID + 1); + DbLedgerStorage.readLedgerIndexEntries(ledgerId, conf, (eId, entryLogId, pos) -> { + System.out.println("entry " + eId + "\t:\t(log: " + entryLogId + ", pos: " + pos + ")"); + entrys.add(eId); + }); + for (long entryId = TEST_ENTRY_MIN_ID; entryId <= TEST_ENTRY_MAX_ID; entryId++) { + Assert.assertTrue(entrys.contains(entryId)); + } + } + } catch (Exception e) { + System.err.printf("ERROR: initializing dbLedgerStorage %s", e.getMessage()); + Assert.fail("fail to read this ledger(" + ledgerId + ") index entries"); + } + + List toDeleted = Lists.newArrayList(conf.getLedgerDirNames()); + toDeleted.addAll(Lists.newArrayList(conf.getIndexDirNames())); + toDeleted.forEach(d -> { + try { + FileUtils.forceDelete(new File(d)); + } catch (IOException e) { + e.printStackTrace(); + } + }); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/EntryLocationIndexTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/EntryLocationIndexTest.java index 6e83ffd58a4..765065f0b74 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/EntryLocationIndexTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/EntryLocationIndexTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,11 +21,13 @@ package org.apache.bookkeeper.bookie.storage.ldb; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import java.io.File; - +import java.io.IOException; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.test.TestStatsProvider; import org.junit.Test; /** @@ -77,6 +79,70 @@ public void deleteLedgerTest() throws Exception { idx.close(); } + @Test + public void deleteBatchLedgersTest() throws Exception { + File tmpDir = File.createTempFile("bkTest", ".dir"); + tmpDir.delete(); + tmpDir.mkdir(); + tmpDir.deleteOnExit(); + + EntryLocationIndex idx = new EntryLocationIndex(serverConfiguration, KeyValueStorageRocksDB.factory, + tmpDir.getAbsolutePath(), NullStatsLogger.INSTANCE); + + int numLedgers = 1000; + int numEntriesPerLedger = 100; + + int location = 0; + KeyValueStorage.Batch batch = idx.newBatch(); + for (int entryId = 0; entryId < numEntriesPerLedger; ++entryId) { + for (int ledgerId = 0; ledgerId < numLedgers; ++ledgerId) { + idx.addLocation(batch, ledgerId, entryId, location); + location++; + } + } + batch.flush(); + batch.close(); + + + int expectedLocation = 0; + for (int entryId = 0; entryId < numEntriesPerLedger; ++entryId) { + for (int ledgerId = 0; ledgerId < numLedgers; ++ledgerId) { + assertEquals(expectedLocation, idx.getLocation(ledgerId, entryId)); + expectedLocation++; + } + } + + for (int ledgerId = 0; ledgerId < numLedgers; ++ledgerId) { + if (ledgerId % 2 == 0) { + idx.delete(ledgerId); + } + } + + expectedLocation = 0; + for (int entryId = 0; entryId < numEntriesPerLedger; ++entryId) { + for (int ledgerId = 0; ledgerId < numLedgers; ++ledgerId) { + assertEquals(expectedLocation, idx.getLocation(ledgerId, entryId)); + expectedLocation++; + } + } + + idx.removeOffsetFromDeletedLedgers(); + + expectedLocation = 0; + for (int entryId = 0; entryId < numEntriesPerLedger; ++entryId) { + for (int ledgerId = 0; ledgerId < numLedgers; ++ledgerId) { + if (ledgerId % 2 == 0) { + assertEquals(0, idx.getLocation(ledgerId, entryId)); + } else { + assertEquals(expectedLocation, idx.getLocation(ledgerId, entryId)); + } + expectedLocation++; + } + } + + idx.close(); + } + // this tests if a ledger is added after it has been deleted @Test public void addLedgerAfterDeleteTest() throws Exception { @@ -108,4 +174,62 @@ public void addLedgerAfterDeleteTest() throws Exception { idx.close(); } + + // test non exist entry + @Test + public void testDeleteSpecialEntry() throws IOException { + File tmpDir = File.createTempFile("bkTest", ".dir"); + tmpDir.delete(); + tmpDir.mkdir(); + tmpDir.deleteOnExit(); + + EntryLocationIndex idx = new EntryLocationIndex(serverConfiguration, KeyValueStorageRocksDB.factory, + tmpDir.getAbsolutePath(), NullStatsLogger.INSTANCE); + + // Add some dummy indexes + idx.addLocation(40312, -1, 1); + idx.addLocation(40313, 10, 2); + idx.addLocation(40320, 0, 3); + + // Add more indexes in a different batch + idx.addLocation(40313, 11, 5); + idx.addLocation(40313, 12, 6); + idx.addLocation(40320, 1, 7); + + // delete a non exist entry + idx.delete(40312); + idx.removeOffsetFromDeletedLedgers(); + + // another delete entry operation shouldn't effected + idx.delete(40313); + idx.removeOffsetFromDeletedLedgers(); + assertEquals(0, idx.getLocation(40312, 10)); + } + + @Test + public void testEntryIndexLookupLatencyStats() throws IOException { + File tmpDir = File.createTempFile("bkTest", ".dir"); + tmpDir.delete(); + tmpDir.mkdir(); + tmpDir.deleteOnExit(); + + TestStatsProvider statsProvider = new TestStatsProvider(); + EntryLocationIndex idx = new EntryLocationIndex(serverConfiguration, KeyValueStorageRocksDB.factory, + tmpDir.getAbsolutePath(), statsProvider.getStatsLogger("scope")); + + // Add some dummy indexes + idx.addLocation(40313, 11, 5); + + // successful lookup + assertEquals(5, idx.getLocation(40313, 11)); + TestStatsProvider.TestOpStatsLogger lookupEntryLocationOpStats = + statsProvider.getOpStatsLogger("scope.lookup-entry-location"); + assertEquals(1, lookupEntryLocationOpStats.getSuccessCount()); + assertTrue(lookupEntryLocationOpStats.getSuccessAverage() > 0); + + // failed lookup + assertEquals(0, idx.getLocation(12345, 1)); + assertEquals(1, lookupEntryLocationOpStats.getFailureCount()); + assertEquals(1, lookupEntryLocationOpStats.getSuccessCount()); + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorageRocksDBTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorageRocksDBTest.java new file mode 100644 index 00000000000..2ef3e010f8b --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorageRocksDBTest.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

          + * http://www.apache.org/licenses/LICENSE-2.0 + *

          + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.bookie.storage.ldb; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.List; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.junit.Test; +import org.rocksdb.BlockBasedTableConfig; +import org.rocksdb.ChecksumType; +import org.rocksdb.ColumnFamilyDescriptor; +import org.rocksdb.ColumnFamilyOptions; +import org.rocksdb.CompressionType; +import org.rocksdb.DBOptions; +import org.rocksdb.Options; + +public class KeyValueStorageRocksDBTest { + + @Test + public void testRocksDBInitiateWithBookieConfiguration() throws Exception { + ServerConfiguration configuration = new ServerConfiguration(); + configuration.setEntryLocationRocksdbConf("entry_location_rocksdb.conf"); + File tmpDir = Files.createTempDirectory("bk-kv-rocksdbtest-conf").toFile(); + Files.createDirectory(Paths.get(tmpDir.toString(), "subDir")); + KeyValueStorageRocksDB rocksDB = new KeyValueStorageRocksDB(tmpDir.toString(), "subDir", + KeyValueStorageFactory.DbConfigType.EntryLocation, configuration); + assertNull(rocksDB.getColumnFamilyDescriptors()); + + Options options = (Options) rocksDB.getOptions(); + assertEquals(64 * 1024 * 1024, options.writeBufferSize()); + assertEquals(4, options.maxWriteBufferNumber()); + assertEquals(256 * 1024 * 1024, options.maxBytesForLevelBase()); + assertEquals(true, options.levelCompactionDynamicLevelBytes()); + rocksDB.close(); + } + + @Test + public void testRocksDBInitiateWithConfigurationFile() throws Exception { + ServerConfiguration configuration = new ServerConfiguration(); + URL url = getClass().getClassLoader().getResource("test_entry_location_rocksdb.conf"); + configuration.setEntryLocationRocksdbConf(url.getPath()); + File tmpDir = Files.createTempDirectory("bk-kv-rocksdbtest-file").toFile(); + Files.createDirectory(Paths.get(tmpDir.toString(), "subDir")); + KeyValueStorageRocksDB rocksDB = new KeyValueStorageRocksDB(tmpDir.toString(), "subDir", + KeyValueStorageFactory.DbConfigType.EntryLocation, configuration); + assertNotNull(rocksDB.getColumnFamilyDescriptors()); + + DBOptions dbOptions = (DBOptions) rocksDB.getOptions(); + assertTrue(dbOptions.createIfMissing()); + assertEquals(1, dbOptions.keepLogFileNum()); + assertEquals(1000, dbOptions.maxTotalWalSize()); + + List columnFamilyDescriptorList = rocksDB.getColumnFamilyDescriptors(); + ColumnFamilyOptions familyOptions = columnFamilyDescriptorList.get(0).getOptions(); + assertEquals(CompressionType.LZ4_COMPRESSION, familyOptions.compressionType()); + assertEquals(1024, familyOptions.writeBufferSize()); + assertEquals(1, familyOptions.maxWriteBufferNumber()); + assertEquals(true, familyOptions.levelCompactionDynamicLevelBytes()); + rocksDB.close(); + } + + @Test + public void testReadChecksumTypeFromBookieConfiguration() throws Exception { + ServerConfiguration configuration = new ServerConfiguration(); + configuration.setEntryLocationRocksdbConf("entry_location_rocksdb.conf"); + File tmpDir = Files.createTempDirectory("bk-kv-rocksdbtest-conf").toFile(); + Files.createDirectory(Paths.get(tmpDir.toString(), "subDir")); + KeyValueStorageRocksDB rocksDB = new KeyValueStorageRocksDB(tmpDir.toString(), "subDir", + KeyValueStorageFactory.DbConfigType.EntryLocation, configuration); + assertNull(rocksDB.getColumnFamilyDescriptors()); + + Options options = (Options) rocksDB.getOptions(); + assertEquals(ChecksumType.kxxHash, ((BlockBasedTableConfig) options.tableFormatConfig()).checksumType()); + } + + //@Test + public void testReadChecksumTypeFromConfigurationFile() throws Exception { + ServerConfiguration configuration = new ServerConfiguration(); + URL url = getClass().getClassLoader().getResource("test_entry_location_rocksdb.conf"); + configuration.setEntryLocationRocksdbConf(url.getPath()); + File tmpDir = Files.createTempDirectory("bk-kv-rocksdbtest-file").toFile(); + Files.createDirectory(Paths.get(tmpDir.toString(), "subDir")); + KeyValueStorageRocksDB rocksDB = new KeyValueStorageRocksDB(tmpDir.toString(), "subDir", + KeyValueStorageFactory.DbConfigType.EntryLocation, configuration); + assertNotNull(rocksDB.getColumnFamilyDescriptors()); + + List columnFamilyDescriptorList = rocksDB.getColumnFamilyDescriptors(); + ColumnFamilyOptions familyOptions = columnFamilyDescriptorList.get(0).getOptions(); + // There is a bug in RocksDB, which can't load BlockedBasedTableConfig from Options file. + // https://github.com/facebook/rocksdb/issues/5297 + // After the PR: https://github.com/facebook/rocksdb/pull/10826 merge, we can turn on this test. + assertEquals(ChecksumType.kxxHash, ((BlockBasedTableConfig) familyOptions.tableFormatConfig()).checksumType()); + } + + @Test + public void testLevelCompactionDynamicLevelBytesFromConfigurationFile() throws Exception { + ServerConfiguration configuration = new ServerConfiguration(); + URL url = getClass().getClassLoader().getResource("conf/entry_location_rocksdb.conf"); + configuration.setEntryLocationRocksdbConf(url.getPath()); + File tmpDir = Files.createTempDirectory("bk-kv-rocksdbtest-file").toFile(); + Files.createDirectory(Paths.get(tmpDir.toString(), "subDir")); + KeyValueStorageRocksDB rocksDB = new KeyValueStorageRocksDB(tmpDir.toString(), "subDir", + KeyValueStorageFactory.DbConfigType.EntryLocation, configuration); + assertNotNull(rocksDB.getColumnFamilyDescriptors()); + + List columnFamilyDescriptorList = rocksDB.getColumnFamilyDescriptors(); + ColumnFamilyOptions familyOptions = columnFamilyDescriptorList.get(0).getOptions(); + assertEquals(true, familyOptions.levelCompactionDynamicLevelBytes()); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorageTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorageTest.java index 65bcfb8163e..d52f19305e1 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorageTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/KeyValueStorageTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -24,17 +24,18 @@ import static org.junit.Assert.assertTrue; import com.google.common.collect.Lists; - import java.io.File; +import java.nio.file.Files; +import java.nio.file.Paths; import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.Map.Entry; - import org.apache.bookkeeper.bookie.storage.ldb.KeyValueStorage.Batch; import org.apache.bookkeeper.bookie.storage.ldb.KeyValueStorage.CloseableIterator; import org.apache.bookkeeper.bookie.storage.ldb.KeyValueStorageFactory.DbConfigType; import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.commons.io.FileUtils; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -71,10 +72,10 @@ private static byte[] toArray(long n) { @Test public void simple() throws Exception { - File tmpDir = File.createTempFile("bookie", "test"); - tmpDir.delete(); + File tmpDir = Files.createTempDirectory("junitTemporaryFolder").toFile(); + Files.createDirectory(Paths.get(tmpDir.toString(), "subDir")); - KeyValueStorage db = storageFactory.newKeyValueStorage(tmpDir.getAbsolutePath(), DbConfigType.Small, + KeyValueStorage db = storageFactory.newKeyValueStorage(tmpDir.toString(), "subDir", DbConfigType.Default, configuration); assertEquals(null, db.getFloor(toArray(3))); @@ -167,6 +168,43 @@ public void simple() throws Exception { batch.close(); db.close(); - tmpDir.delete(); + FileUtils.deleteDirectory(tmpDir); + } + + @Test + public void testBatch() throws Exception { + + configuration.setOperationMaxNumbersInSingleRocksDBWriteBatch(5); + + File tmpDir = Files.createTempDirectory("junitTemporaryFolder").toFile(); + Files.createDirectory(Paths.get(tmpDir.toString(), "subDir")); + + KeyValueStorage db = storageFactory.newKeyValueStorage(tmpDir.toString(), "subDir", DbConfigType.Default, + configuration); + + assertEquals(null, db.getFloor(toArray(3))); + assertEquals(0, db.count()); + + Batch batch = db.newBatch(); + assertEquals(0, batch.batchCount()); + + batch.put(toArray(1), toArray(1)); + batch.put(toArray(2), toArray(2)); + assertEquals(2, batch.batchCount()); + + batch.put(toArray(3), toArray(3)); + batch.put(toArray(4), toArray(4)); + batch.put(toArray(5), toArray(5)); + assertEquals(0, batch.batchCount()); + batch.put(toArray(6), toArray(6)); + assertEquals(1, batch.batchCount()); + + batch.flush(); + assertEquals(1, batch.batchCount()); + batch.close(); + assertEquals(0, batch.batchCount()); + + db.close(); + FileUtils.deleteDirectory(tmpDir); } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/LedgersIndexCheckOpTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/LedgersIndexCheckOpTest.java new file mode 100644 index 00000000000..41c80bf0319 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/LedgersIndexCheckOpTest.java @@ -0,0 +1,130 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.ldb; + +import com.google.common.collect.Lists; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import io.netty.buffer.UnpooledByteBufAllocator; +import java.io.File; +import java.io.IOException; +import java.util.List; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.CheckpointSource; +import org.apache.bookkeeper.bookie.CheckpointSource.Checkpoint; +import org.apache.bookkeeper.bookie.Checkpointer; +import org.apache.bookkeeper.bookie.LedgerDirsManager; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.test.TmpDirs; +import org.apache.bookkeeper.util.DiskChecker; +import org.apache.commons.io.FileUtils; +import org.junit.Assert; +import org.junit.Test; + +/** + * Test for class {@link LocationsIndexRebuildOp}. + */ +public class LedgersIndexCheckOpTest { + + CheckpointSource checkpointSource = new CheckpointSource() { + @Override + public Checkpoint newCheckpoint() { + return Checkpoint.MAX; + } + + @Override + public void checkpointComplete(Checkpoint checkpoint, boolean compact) throws IOException { + } + }; + + Checkpointer checkpointer = new Checkpointer() { + @Override + public void startCheckpoint(Checkpoint checkpoint) { + // No-op + } + + @Override + public void start() { + // no-op + } + }; + + protected final TmpDirs tmpDirs = new TmpDirs(); + private String newDirectory() throws Exception { + File d = tmpDirs.createNew("bkTest", ".dir"); + d.delete(); + d.mkdir(); + File curDir = BookieImpl.getCurrentDirectory(d); + BookieImpl.checkDirectoryStructure(curDir); + return d.getPath(); + } + + @Test + public void testMultiLedgerIndexDiffDirs() throws Exception { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setLedgerDirNames(new String[] { newDirectory(), newDirectory() }); + conf.setIndexDirName(new String[] { newDirectory(), newDirectory() }); + conf.setLedgerStorageClass(DbLedgerStorage.class.getName()); + DiskChecker diskChecker = new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), diskChecker); + LedgerDirsManager indexDirsManager = new LedgerDirsManager(conf, conf.getIndexDirs(), diskChecker); + + DbLedgerStorage ledgerStorage = new DbLedgerStorage(); + ledgerStorage.initialize(conf, null, ledgerDirsManager, indexDirsManager, + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT); + ledgerStorage.setCheckpointer(checkpointer); + ledgerStorage.setCheckpointSource(checkpointSource); + + // Insert some ledger & entries in the storage + for (long ledgerId = 0; ledgerId < 5; ledgerId++) { + ledgerStorage.setMasterKey(ledgerId, ("ledger-" + ledgerId).getBytes()); + ledgerStorage.setFenced(ledgerId); + + for (long entryId = 0; entryId < 100; entryId++) { + ByteBuf entry = Unpooled.buffer(128); + entry.writeLong(ledgerId); + entry.writeLong(entryId); + entry.writeBytes(("entry-" + entryId).getBytes()); + + ledgerStorage.addEntry(entry); + } + } + + ledgerStorage.flush(); + ledgerStorage.shutdown(); + + // ledgers index check + Assert.assertTrue(new LedgersIndexCheckOp(conf, true).initiate()); + + // clean data + List toDeleted = Lists.newArrayList(conf.getLedgerDirNames()); + toDeleted.addAll(Lists.newArrayList(conf.getIndexDirNames())); + toDeleted.forEach(d -> { + try { + FileUtils.forceDelete(new File(d)); + } catch (IOException e) { + e.printStackTrace(); + } + }); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/LedgersIndexRebuildOpTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/LedgersIndexRebuildOpTest.java new file mode 100644 index 00000000000..0e99c28998e --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/LedgersIndexRebuildOpTest.java @@ -0,0 +1,130 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.ldb; + +import com.google.common.collect.Lists; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import io.netty.buffer.UnpooledByteBufAllocator; +import java.io.File; +import java.io.IOException; +import java.util.List; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.CheckpointSource; +import org.apache.bookkeeper.bookie.CheckpointSource.Checkpoint; +import org.apache.bookkeeper.bookie.Checkpointer; +import org.apache.bookkeeper.bookie.LedgerDirsManager; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.test.TmpDirs; +import org.apache.bookkeeper.util.DiskChecker; +import org.apache.commons.io.FileUtils; +import org.junit.Assert; +import org.junit.Test; + +/** + * Test for class {@link LocationsIndexRebuildOp}. + */ +public class LedgersIndexRebuildOpTest { + + CheckpointSource checkpointSource = new CheckpointSource() { + @Override + public Checkpoint newCheckpoint() { + return Checkpoint.MAX; + } + + @Override + public void checkpointComplete(Checkpoint checkpoint, boolean compact) throws IOException { + } + }; + + Checkpointer checkpointer = new Checkpointer() { + @Override + public void startCheckpoint(Checkpoint checkpoint) { + // No-op + } + + @Override + public void start() { + // no-op + } + }; + + protected final TmpDirs tmpDirs = new TmpDirs(); + private String newDirectory() throws Exception { + File d = tmpDirs.createNew("bkTest", ".dir"); + d.delete(); + d.mkdir(); + File curDir = BookieImpl.getCurrentDirectory(d); + BookieImpl.checkDirectoryStructure(curDir); + return d.getPath(); + } + + @Test + public void testMultiLedgerIndexDiffDirs() throws Exception { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setLedgerDirNames(new String[] { newDirectory(), newDirectory() }); + conf.setIndexDirName(new String[] { newDirectory(), newDirectory() }); + conf.setLedgerStorageClass(DbLedgerStorage.class.getName()); + DiskChecker diskChecker = new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), diskChecker); + LedgerDirsManager indexDirsManager = new LedgerDirsManager(conf, conf.getIndexDirs(), diskChecker); + + DbLedgerStorage ledgerStorage = new DbLedgerStorage(); + ledgerStorage.initialize(conf, null, ledgerDirsManager, indexDirsManager, + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT); + ledgerStorage.setCheckpointer(checkpointer); + ledgerStorage.setCheckpointSource(checkpointSource); + + // Insert some ledger & entries in the storage + for (long ledgerId = 0; ledgerId < 5; ledgerId++) { + ledgerStorage.setMasterKey(ledgerId, ("ledger-" + ledgerId).getBytes()); + ledgerStorage.setFenced(ledgerId); + + for (long entryId = 0; entryId < 100; entryId++) { + ByteBuf entry = Unpooled.buffer(128); + entry.writeLong(ledgerId); + entry.writeLong(entryId); + entry.writeBytes(("entry-" + entryId).getBytes()); + + ledgerStorage.addEntry(entry); + } + } + + ledgerStorage.flush(); + ledgerStorage.shutdown(); + + // Rebuild index through the tool + Assert.assertTrue(new LedgersIndexRebuildOp(conf, true).initiate()); + + // clean test data + List toDeleted = Lists.newArrayList(conf.getLedgerDirNames()); + toDeleted.addAll(Lists.newArrayList(conf.getIndexDirNames())); + toDeleted.forEach(d -> { + try { + FileUtils.forceDelete(new File(d)); + } catch (IOException e) { + e.printStackTrace(); + } + }); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/LedgersIndexRebuildTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/LedgersIndexRebuildTest.java new file mode 100644 index 00000000000..f955f713bd6 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/LedgersIndexRebuildTest.java @@ -0,0 +1,134 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.ldb; + +import static org.junit.Assert.assertTrue; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import io.netty.buffer.UnpooledByteBufAllocator; +import java.io.File; +import java.io.IOException; +import java.util.UUID; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.BookieShell; +import org.apache.bookkeeper.bookie.LedgerDirsManager; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.util.DiskChecker; +import org.apache.commons.io.FileUtils; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.junit.MockitoJUnitRunner; + +/** + * Test for class {@link LedgersIndexRebuildOp}. + */ +@RunWith(MockitoJUnitRunner.class) +public class LedgersIndexRebuildTest { + + private final BookieId bookieAddress = BookieId.parse(UUID.randomUUID().toString()); + private ServerConfiguration conf; + private File tmpDir; + + @Before + public void setUp() throws IOException { + tmpDir = File.createTempFile("bkTest", ".dir"); + tmpDir.delete(); + tmpDir.mkdir(); + File curDir = BookieImpl.getCurrentDirectory(tmpDir); + BookieImpl.checkDirectoryStructure(curDir); + + System.out.println(tmpDir); + } + + @After + public void tearDown() throws IOException { + FileUtils.forceDelete(tmpDir); + } + + @Test + public void testRebuildIncludesAllLedgersAndSetToFenced() throws Exception { + byte[] masterKey = "12345".getBytes(); + long ledgerCount = 100; + + // no attempts to get ledger metadata fail + DbLedgerStorage ledgerStorage = setupLedgerStorage(); + + // Insert some ledger & entries in the storage + for (long ledgerId = 0; ledgerId < ledgerCount; ledgerId++) { + ledgerStorage.setMasterKey(ledgerId, masterKey); + + for (long entryId = 0; entryId < 2; entryId++) { + ByteBuf entry = Unpooled.buffer(128); + entry.writeLong(ledgerId); + entry.writeLong(entryId); + entry.writeBytes(("entry-" + entryId).getBytes()); + + ledgerStorage.addEntry(entry); + } + } + + ledgerStorage.flush(); + ledgerStorage.shutdown(); + + // Rebuild index through the tool + BookieShell shell = new BookieShell(); + shell.setConf(conf); + int res = shell.run(new String[] { "rebuild-db-ledgers-index", "-v" }); + + Assert.assertEquals(0, res); + + // Verify that the ledgers index has the ledgers and that they are fenced + ledgerStorage = new DbLedgerStorage(); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); + ledgerStorage.initialize(conf, null, ledgerDirsManager, ledgerDirsManager, + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT); + + for (long ledgerId = 0; ledgerId < ledgerCount; ledgerId++) { + assertTrue(ledgerStorage.ledgerExists(ledgerId)); + assertTrue(ledgerStorage.isFenced(ledgerId)); + } + + ledgerStorage.shutdown(); + } + + private DbLedgerStorage setupLedgerStorage() throws Exception { + conf = TestBKConfiguration.newServerConfiguration(); + conf.setBookieId(bookieAddress.getId()); + conf.setLedgerDirNames(new String[] { tmpDir.toString() }); + conf.setLedgerStorageClass(DbLedgerStorage.class.getName()); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), + new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); + + DbLedgerStorage ledgerStorage = new DbLedgerStorage(); + ledgerStorage.initialize(conf, null, ledgerDirsManager, ledgerDirsManager, + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT); + + return ledgerStorage; + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/LocationsIndexRebuildTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/LocationsIndexRebuildTest.java index 629a238f283..318f088af97 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/LocationsIndexRebuildTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/LocationsIndexRebuildTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -24,15 +24,14 @@ import com.google.common.collect.Lists; import com.google.common.collect.Sets; - import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; - +import io.netty.buffer.UnpooledByteBufAllocator; import java.io.File; import java.io.IOException; +import java.util.List; import java.util.Set; - -import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.BookieImpl; import org.apache.bookkeeper.bookie.BookieShell; import org.apache.bookkeeper.bookie.CheckpointSource; import org.apache.bookkeeper.bookie.CheckpointSource.Checkpoint; @@ -41,6 +40,7 @@ import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.conf.TestBKConfiguration; import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.test.TmpDirs; import org.apache.bookkeeper.util.DiskChecker; import org.apache.commons.io.FileUtils; import org.junit.Assert; @@ -74,13 +74,23 @@ public void start() { } }; + protected final TmpDirs tmpDirs = new TmpDirs(); + private String newDirectory() throws Exception { + File d = tmpDirs.createNew("bkTest", ".dir"); + d.delete(); + d.mkdir(); + File curDir = BookieImpl.getCurrentDirectory(d); + BookieImpl.checkDirectoryStructure(curDir); + return d.getPath(); + } + @Test public void test() throws Exception { File tmpDir = File.createTempFile("bkTest", ".dir"); tmpDir.delete(); tmpDir.mkdir(); - File curDir = Bookie.getCurrentDirectory(tmpDir); - Bookie.checkDirectoryStructure(curDir); + File curDir = BookieImpl.getCurrentDirectory(tmpDir); + BookieImpl.checkDirectoryStructure(curDir); System.out.println(tmpDir); @@ -91,8 +101,10 @@ public void test() throws Exception { new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold())); DbLedgerStorage ledgerStorage = new DbLedgerStorage(); - ledgerStorage.initialize(conf, null, ledgerDirsManager, ledgerDirsManager, null, checkpointSource, checkpointer, - NullStatsLogger.INSTANCE); + ledgerStorage.initialize(conf, null, ledgerDirsManager, ledgerDirsManager, + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT); + ledgerStorage.setCheckpointer(checkpointer); + ledgerStorage.setCheckpointSource(checkpointSource); // Insert some ledger & entries in the storage for (long ledgerId = 0; ledgerId < 5; ledgerId++) { @@ -121,8 +133,10 @@ public void test() throws Exception { // Verify that db index has the same entries ledgerStorage = new DbLedgerStorage(); - ledgerStorage.initialize(conf, null, ledgerDirsManager, ledgerDirsManager, null, checkpointSource, checkpointer, - NullStatsLogger.INSTANCE); + ledgerStorage.initialize(conf, null, ledgerDirsManager, ledgerDirsManager, + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT); + ledgerStorage.setCheckpointSource(checkpointSource); + ledgerStorage.setCheckpointer(checkpointer); Set ledgers = Sets.newTreeSet(ledgerStorage.getActiveLedgersInRange(0, Long.MAX_VALUE)); Assert.assertEquals(Sets.newTreeSet(Lists.newArrayList(0L, 1L, 2L, 3L, 4L)), ledgers); @@ -150,4 +164,83 @@ public void test() throws Exception { ledgerStorage.shutdown(); FileUtils.forceDelete(tmpDir); } + + @Test + public void testMultiLedgerIndexDiffDirs() throws Exception { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setLedgerDirNames(new String[] { newDirectory(), newDirectory() }); + conf.setIndexDirName(new String[] { newDirectory(), newDirectory() }); + conf.setLedgerStorageClass(DbLedgerStorage.class.getName()); + DiskChecker diskChecker = new DiskChecker(conf.getDiskUsageThreshold(), conf.getDiskUsageWarnThreshold()); + LedgerDirsManager ledgerDirsManager = new LedgerDirsManager(conf, conf.getLedgerDirs(), diskChecker); + LedgerDirsManager indexDirsManager = new LedgerDirsManager(conf, conf.getIndexDirs(), diskChecker); + + DbLedgerStorage ledgerStorage = new DbLedgerStorage(); + ledgerStorage.initialize(conf, null, ledgerDirsManager, indexDirsManager, + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT); + ledgerStorage.setCheckpointer(checkpointer); + ledgerStorage.setCheckpointSource(checkpointSource); + + // Insert some ledger & entries in the storage + for (long ledgerId = 0; ledgerId < 5; ledgerId++) { + ledgerStorage.setMasterKey(ledgerId, ("ledger-" + ledgerId).getBytes()); + ledgerStorage.setFenced(ledgerId); + + for (long entryId = 0; entryId < 100; entryId++) { + ByteBuf entry = Unpooled.buffer(128); + entry.writeLong(ledgerId); + entry.writeLong(entryId); + entry.writeBytes(("entry-" + entryId).getBytes()); + + ledgerStorage.addEntry(entry); + } + } + + ledgerStorage.flush(); + ledgerStorage.shutdown(); + + // Rebuild index through the tool + new LocationsIndexRebuildOp(conf).initiate(); + + // Verify that db index has the same entries + ledgerStorage = new DbLedgerStorage(); + ledgerStorage.initialize(conf, null, ledgerDirsManager, indexDirsManager, + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT); + ledgerStorage.setCheckpointSource(checkpointSource); + ledgerStorage.setCheckpointer(checkpointer); + + Set ledgers = Sets.newTreeSet(ledgerStorage.getActiveLedgersInRange(0, Long.MAX_VALUE)); + Assert.assertEquals(Sets.newTreeSet(Lists.newArrayList(0L, 1L, 2L, 3L, 4L)), ledgers); + + for (long ledgerId = 0; ledgerId < 5; ledgerId++) { + Assert.assertEquals(true, ledgerStorage.isFenced(ledgerId)); + Assert.assertEquals("ledger-" + ledgerId, new String(ledgerStorage.readMasterKey(ledgerId))); + + ByteBuf lastEntry = ledgerStorage.getLastEntry(ledgerId); + assertEquals(ledgerId, lastEntry.readLong()); + long lastEntryId = lastEntry.readLong(); + assertEquals(99, lastEntryId); + + for (long entryId = 0; entryId < 100; entryId++) { + ByteBuf entry = Unpooled.buffer(1024); + entry.writeLong(ledgerId); + entry.writeLong(entryId); + entry.writeBytes(("entry-" + entryId).getBytes()); + + ByteBuf result = ledgerStorage.getEntry(ledgerId, entryId); + Assert.assertEquals(entry, result); + } + } + + ledgerStorage.shutdown(); + List toDeleted = Lists.newArrayList(conf.getLedgerDirNames()); + toDeleted.addAll(Lists.newArrayList(conf.getIndexDirNames())); + toDeleted.forEach(d -> { + try { + FileUtils.forceDelete(new File(d)); + } catch (IOException e) { + e.printStackTrace(); + } + }); + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/PersistentEntryLogMetadataMapTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/PersistentEntryLogMetadataMapTest.java new file mode 100644 index 00000000000..1f5d52d6dc3 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/PersistentEntryLogMetadataMapTest.java @@ -0,0 +1,156 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.bookie.storage.ldb; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import com.google.common.collect.Lists; +import java.io.File; +import java.util.List; +import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.EntryLogMetadata; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +/** + * Unit test for {@link PersistentEntryLogMetadataMap}. + */ +public class PersistentEntryLogMetadataMapTest { + + private final ServerConfiguration configuration; + + @Rule + public TemporaryFolder tempFolder = new TemporaryFolder(); + + public PersistentEntryLogMetadataMapTest() { + this.configuration = new ServerConfiguration(); + } + + /** + * Validates PersistentEntryLogMetadataMap functionalities. + * + * @throws Exception + */ + @Test + public void simple() throws Exception { + File tmpDir = tempFolder.newFolder("metadata-cache"); + String path = tmpDir.getAbsolutePath(); + PersistentEntryLogMetadataMap entryMetadataMap = new PersistentEntryLogMetadataMap(path, configuration); + + List metadatas = Lists.newArrayList(); + int totalMetadata = 1000; + // insert entry-log-metadata records + for (int i = 1; i <= totalMetadata; i++) { + EntryLogMetadata entryLogMeta = createEntryLogMetadata(i, i); + metadatas.add(entryLogMeta); + entryMetadataMap.put(i, entryLogMeta); + } + for (int i = 1; i <= totalMetadata; i++) { + assertTrue(entryMetadataMap.containsKey(i)); + } + + assertEquals(entryMetadataMap.size(), totalMetadata); + + entryMetadataMap.forEach((logId, metadata) -> { + assertEquals(metadatas.get(logId.intValue() - 1).getTotalSize(), metadata.getTotalSize()); + for (int i = 0; i < logId.intValue(); i++) { + assertTrue(metadata.containsLedger(i)); + } + }); + + metadatas.forEach(meta -> { + long logId = meta.getEntryLogId(); + try { + entryMetadataMap.forKey(logId, (entryLogId, persistedMeta) -> { + assertEquals(meta.getEntryLogId(), persistedMeta.getEntryLogId()); + assertEquals(meta.getTotalSize(), persistedMeta.getTotalSize()); + assertEquals(logId, (long) entryLogId); + }); + } catch (BookieException.EntryLogMetadataMapException e) { + throw new RuntimeException(e); + } + }); + + // remove entry-log entry + for (int i = 1; i <= totalMetadata; i++) { + entryMetadataMap.remove(i); + } + + // entries should not be present into map + for (int i = 1; i <= totalMetadata; i++) { + assertFalse(entryMetadataMap.containsKey(i)); + } + + assertEquals(entryMetadataMap.size(), 0); + + entryMetadataMap.close(); + } + + /** + * Validates PersistentEntryLogMetadataMap persists metadata state in + * rocksDB. + * + * @throws Exception + */ + @Test + public void closeAndOpen() throws Exception { + File tmpDir = tempFolder.newFolder(); + String path = tmpDir.getAbsolutePath(); + PersistentEntryLogMetadataMap entryMetadataMap = new PersistentEntryLogMetadataMap(path, configuration); + + List metadatas = Lists.newArrayList(); + int totalMetadata = 1000; + for (int i = 1; i <= totalMetadata; i++) { + EntryLogMetadata entryLogMeta = createEntryLogMetadata(i, i); + metadatas.add(entryLogMeta); + entryMetadataMap.put(i, entryLogMeta); + } + for (int i = 1; i <= totalMetadata; i++) { + assertTrue(entryMetadataMap.containsKey(i)); + } + + // close metadata-map + entryMetadataMap.close(); + // Open it again + entryMetadataMap = new PersistentEntryLogMetadataMap(path, configuration); + + entryMetadataMap.forEach((logId, metadata) -> { + assertEquals(metadatas.get(logId.intValue() - 1).getTotalSize(), logId.longValue()); + for (int i = 0; i < logId.intValue(); i++) { + assertTrue(metadata.containsLedger(i)); + } + }); + + entryMetadataMap.close(); + } + + private EntryLogMetadata createEntryLogMetadata(long logId, long totalLedgers) { + EntryLogMetadata metadata = new EntryLogMetadata(logId); + for (int i = 0; i < totalLedgers; i++) { + metadata.addLedgerSize(i, 1); + } + return metadata; + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/ReadCacheTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/ReadCacheTest.java index 42e509963d4..ad846c6212b 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/ReadCacheTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/ReadCacheTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,12 +20,15 @@ */ package org.apache.bookkeeper.bookie.storage.ldb; + import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; - +import io.netty.buffer.UnpooledByteBufAllocator; import org.junit.Test; /** @@ -35,7 +38,7 @@ public class ReadCacheTest { @Test public void simple() { - ReadCache cache = new ReadCache(10 * 1024); + ReadCache cache = new ReadCache(UnpooledByteBufAllocator.DEFAULT, 10 * 1024); assertEquals(0, cache.count()); assertEquals(0, cache.size()); @@ -72,7 +75,7 @@ public void simple() { @Test public void emptyCache() { - ReadCache cache = new ReadCache(10 * 1024); + ReadCache cache = new ReadCache(UnpooledByteBufAllocator.DEFAULT, 10 * 1024); assertEquals(0, cache.count()); assertEquals(0, cache.size()); @@ -84,7 +87,7 @@ public void emptyCache() { @Test public void multipleSegments() { // Test with multiple smaller segments - ReadCache cache = new ReadCache(10 * 1024, 2 * 1024); + ReadCache cache = new ReadCache(UnpooledByteBufAllocator.DEFAULT, 10 * 1024, 2 * 1024); assertEquals(0, cache.count()); assertEquals(0, cache.size()); @@ -115,4 +118,24 @@ public void multipleSegments() { cache.close(); } + + @Test + public void testHasEntry() { + ReadCache cache = new ReadCache(UnpooledByteBufAllocator.DEFAULT, 10 * 1024, 2 * 1024); + + long ledgerId = 0xfefe; + for (int i = 0; i < 10; i++) { + ByteBuf entry = Unpooled.wrappedBuffer(new byte[1024]); + entry.setInt(0, i); + cache.put(ledgerId, i, entry); + } + + assertFalse(cache.hasEntry(0xdead, 0)); + assertFalse(cache.hasEntry(ledgerId, -1)); + for (int i = 0; i < 10; i++) { + assertTrue(cache.hasEntry(ledgerId, i)); + } + assertFalse(cache.hasEntry(ledgerId, 10)); + } + } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/WriteCacheTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/WriteCacheTest.java index f8b2bba4463..9ac84984b62 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/WriteCacheTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/WriteCacheTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -26,10 +26,12 @@ import static org.junit.Assert.assertTrue; import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; import io.netty.buffer.ByteBufUtil; -import io.netty.buffer.PooledByteBufAllocator; import io.netty.buffer.Unpooled; - +import io.netty.buffer.UnpooledByteBufAllocator; +import io.netty.util.ReferenceCountUtil; +import java.io.IOException; import java.nio.charset.Charset; import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.CountDownLatch; @@ -38,7 +40,6 @@ import java.util.concurrent.Executors; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; - import org.junit.Test; /** @@ -46,11 +47,13 @@ */ public class WriteCacheTest { + private static final ByteBufAllocator allocator = UnpooledByteBufAllocator.DEFAULT; + @Test public void simple() throws Exception { - WriteCache cache = new WriteCache(10 * 1024); + WriteCache cache = new WriteCache(allocator, 10 * 1024); - ByteBuf entry1 = PooledByteBufAllocator.DEFAULT.buffer(1024); + ByteBuf entry1 = allocator.buffer(1024); ByteBufUtil.writeUtf8(entry1, "entry-1"); entry1.writerIndex(entry1.capacity()); @@ -77,7 +80,7 @@ public void simple() throws Exception { assertEquals(0, cache.count()); assertEquals(0, cache.size()); - entry1.release(); + ReferenceCountUtil.release(entry1); cache.close(); } @@ -87,9 +90,9 @@ public void cacheFull() throws Exception { int entrySize = 1024; int entriesCount = cacheSize / entrySize; - WriteCache cache = new WriteCache(cacheSize); + WriteCache cache = new WriteCache(allocator, cacheSize); - ByteBuf entry = PooledByteBufAllocator.DEFAULT.buffer(entrySize); + ByteBuf entry = allocator.buffer(entrySize); entry.writerIndex(entry.capacity()); for (int i = 0; i < entriesCount; i++) { @@ -118,14 +121,14 @@ public void cacheFull() throws Exception { assertEquals(0, findCount.get()); - entry.release(); + ReferenceCountUtil.release(entry); cache.close(); } @Test public void testMultipleSegments() { // Create cache with max size 1Mb and each segment is 16Kb - WriteCache cache = new WriteCache(1024 * 1024, 16 * 1024); + WriteCache cache = new WriteCache(allocator, 1024 * 1024, 16 * 1024); ByteBuf entry = Unpooled.buffer(1024); entry.writerIndex(entry.capacity()); @@ -141,8 +144,8 @@ public void testMultipleSegments() { } @Test - public void testEmptyCache() { - WriteCache cache = new WriteCache(1024 * 1024, 16 * 1024); + public void testEmptyCache() throws IOException { + WriteCache cache = new WriteCache(allocator, 1024 * 1024, 16 * 1024); assertEquals(0, cache.count()); assertEquals(0, cache.size()); @@ -160,7 +163,7 @@ public void testEmptyCache() { @Test public void testMultipleWriters() throws Exception { // Create cache with max size 1Mb and each segment is 16Kb - WriteCache cache = new WriteCache(10 * 1024 * 1024, 16 * 1024); + WriteCache cache = new WriteCache(allocator, 10 * 1024 * 1024, 16 * 1024); ExecutorService executor = Executors.newCachedThreadPool(); @@ -219,8 +222,8 @@ public void testMultipleWriters() throws Exception { } @Test - public void testLedgerDeletion() { - WriteCache cache = new WriteCache(1024 * 1024, 16 * 1024); + public void testLedgerDeletion() throws IOException { + WriteCache cache = new WriteCache(allocator, 1024 * 1024, 16 * 1024); ByteBuf entry = Unpooled.buffer(1024); entry.writerIndex(entry.capacity()); @@ -265,7 +268,7 @@ public void testLedgerDeletion() { @Test public void testWriteReadsInMultipleSegments() { // Create cache with max size 4 KB and each segment is 128 bytes - WriteCache cache = new WriteCache(4 * 1024, 128); + WriteCache cache = new WriteCache(allocator, 4 * 1024, 128); for (int i = 0; i < 48; i++) { boolean inserted = cache.put(1, i, Unpooled.wrappedBuffer(("test-" + i).getBytes())); @@ -283,4 +286,41 @@ public void testWriteReadsInMultipleSegments() { cache.close(); } + @Test + public void testHasEntry() { + // Create cache with max size 4 KB and each segment is 128 bytes + WriteCache cache = new WriteCache(allocator, 4 * 1024, 128); + + long ledgerId = 0xdede; + for (int i = 0; i < 48; i++) { + boolean inserted = cache.put(ledgerId, i, Unpooled.wrappedBuffer(("test-" + i).getBytes())); + assertTrue(inserted); + } + + assertEquals(48, cache.count()); + + assertFalse(cache.hasEntry(0xfede, 1)); + assertFalse(cache.hasEntry(ledgerId, -1)); + for (int i = 0; i < 48; i++) { + assertTrue(cache.hasEntry(ledgerId, i)); + } + assertFalse(cache.hasEntry(ledgerId, 48)); + } + + @Test(expected = IOException.class) + public void testForEachIOException() throws Exception { + try (WriteCache cache = new WriteCache(allocator, 1024 * 1024, 16 * 1024)) { + + for (int i = 0; i < 48; i++) { + boolean inserted = cache.put(1, i, Unpooled.wrappedBuffer(("test-" + i).getBytes())); + assertTrue(inserted); + } + + assertEquals(48, cache.count()); + + cache.forEach(((ledgerId, entryId, entry) -> { + throw new IOException("test throw IOException."); + })); + } + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BKExceptionTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BKExceptionTest.java new file mode 100644 index 00000000000..0d74fb62809 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BKExceptionTest.java @@ -0,0 +1,66 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.client; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.util.concurrent.CompletionException; +import java.util.concurrent.ExecutionException; +import org.junit.jupiter.api.Test; + +/** + * Test for extracting codes from BKException. + */ +public class BKExceptionTest { + + @Test + public void testBKExceptionCode() { + assertEquals(BKException.Code.WriteException, + BKException.getExceptionCode(new BKException.BKWriteException(), + BKException.Code.ReadException)); + } + + @Test + public void testNonBKExceptionCode() { + assertEquals(BKException.Code.ReadException, + BKException.getExceptionCode(new Exception(), + BKException.Code.ReadException)); + } + + @Test + public void testNestedBKExceptionCode() { + assertEquals(BKException.Code.WriteException, + BKException.getExceptionCode( + new ExecutionException("test", new BKException.BKWriteException()), + BKException.Code.ReadException)); + } + + @Test + public void testDoubleNestedBKExceptionCode() { + assertEquals(BKException.Code.WriteException, + BKException.getExceptionCode( + new ExecutionException("test", + new CompletionException("blah", + new BKException.BKWriteException())), + BKException.Code.ReadException)); + } +} + diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperAdminTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperAdminTest.java index 6e8cf324de8..e776aea707d 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperAdminTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperAdminTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,30 +20,61 @@ */ package org.apache.bookkeeper.client; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.bookkeeper.util.BookKeeperConstants.AVAILABLE_NODE; import static org.apache.bookkeeper.util.BookKeeperConstants.READONLY; +import static org.hamcrest.Matchers.is; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import com.google.common.net.InetAddresses; import java.io.File; import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Objects; import java.util.Random; -import org.apache.bookkeeper.bookie.Bookie; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.BookieResources; +import org.apache.bookkeeper.bookie.CookieValidation; +import org.apache.bookkeeper.bookie.LegacyCookieValidation; import org.apache.bookkeeper.client.BookKeeper.DigestType; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.common.component.ComponentStarter; +import org.apache.bookkeeper.common.component.Lifecycle; +import org.apache.bookkeeper.common.component.LifecycleComponent; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.discover.BookieServiceInfo; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.meta.MetadataBookieDriver; import org.apache.bookkeeper.meta.UnderreplicatedLedger; import org.apache.bookkeeper.meta.ZkLedgerUnderreplicationManager; import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.proto.BookieServer; import org.apache.bookkeeper.replication.ReplicationException.UnavailableException; +import org.apache.bookkeeper.server.Main; +import org.apache.bookkeeper.server.conf.BookieConfiguration; +import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.apache.bookkeeper.util.AvailabilityOfEntriesOfLedger; import org.apache.bookkeeper.util.BookKeeperConstants; +import org.apache.bookkeeper.util.PortManager; import org.apache.commons.io.FileUtils; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.ZooDefs.Ids; @@ -98,10 +129,10 @@ public void testTriggerAuditWithoutStoreSystemTimeAsLedgerUnderreplicatedMarkTim } public void testTriggerAudit(boolean storeSystemTimeAsLedgerUnderreplicatedMarkTime) throws Exception { - ServerConfiguration thisServerConf = new ServerConfiguration(baseConf); - thisServerConf - .setStoreSystemTimeAsLedgerUnderreplicatedMarkTime(storeSystemTimeAsLedgerUnderreplicatedMarkTime); - restartBookies(thisServerConf); + restartBookies(c -> { + c.setStoreSystemTimeAsLedgerUnderreplicatedMarkTime(storeSystemTimeAsLedgerUnderreplicatedMarkTime); + return c; + }); ClientConfiguration thisClientConf = new ClientConfiguration(baseClientConf); thisClientConf .setStoreSystemTimeAsLedgerUnderreplicatedMarkTime(storeSystemTimeAsLedgerUnderreplicatedMarkTime); @@ -126,7 +157,7 @@ public void testTriggerAudit(boolean storeSystemTimeAsLedgerUnderreplicatedMarkT ledgerHandle.addEntry(0, "data".getBytes()); ledgerHandle.close(); - BookieServer bookieToKill = bs.get(1); + BookieServer bookieToKill = serverByIndex(1); killBookie(1); /* * since lostBookieRecoveryDelay is set, when a bookie is died, it will @@ -139,8 +170,8 @@ public void testTriggerAudit(boolean storeSystemTimeAsLedgerUnderreplicatedMarkT assertTrue("There are supposed to be underreplicatedledgers", underreplicatedLedgerItr.hasNext()); UnderreplicatedLedger underreplicatedLedger = underreplicatedLedgerItr.next(); assertEquals("Underreplicated ledgerId", ledgerId, underreplicatedLedger.getLedgerId()); - assertTrue("Missingreplica of Underreplicated ledgerId should contain " + bookieToKill.getLocalAddress(), - underreplicatedLedger.getReplicaList().contains(bookieToKill.getLocalAddress().toString())); + assertTrue("Missingreplica of Underreplicated ledgerId should contain " + bookieToKill, + underreplicatedLedger.getReplicaList().contains(bookieToKill.getBookieId().getId())); if (storeSystemTimeAsLedgerUnderreplicatedMarkTime) { long ctimeOfURL = underreplicatedLedger.getCtime(); assertTrue("ctime of underreplicated ledger should be greater than test starttime", @@ -154,18 +185,19 @@ public void testTriggerAudit(boolean storeSystemTimeAsLedgerUnderreplicatedMarkT @Test public void testBookieInit() throws Exception { - int bookieindex = 0; - ServerConfiguration confOfExistingBookie = bsConfs.get(bookieindex); - Assert.assertFalse("initBookie shouldn't have succeeded, since bookie is still running with that configuration", - BookKeeperAdmin.initBookie(confOfExistingBookie)); - killBookie(bookieindex); - Assert.assertFalse("initBookie shouldn't have succeeded, since previous bookie is not formatted yet", - BookKeeperAdmin.initBookie(confOfExistingBookie)); - - File[] journalDirs = confOfExistingBookie.getJournalDirs(); - for (File journalDir : journalDirs) { - FileUtils.deleteDirectory(journalDir); + ServerConfiguration confOfExistingBookie = newServerConfiguration(); + BookieId bookieId = BookieImpl.getBookieId(confOfExistingBookie); + try (MetadataBookieDriver driver = BookieResources.createMetadataDriver( + confOfExistingBookie, NullStatsLogger.INSTANCE); + RegistrationManager rm = driver.createRegistrationManager()) { + CookieValidation cookieValidation = new LegacyCookieValidation(confOfExistingBookie, rm); + cookieValidation.checkCookies(Main.storageDirectoriesFromConf(confOfExistingBookie)); + rm.registerBookie(bookieId, false /* readOnly */, BookieServiceInfo.EMPTY); + Assert.assertFalse( + "initBookie shouldn't have succeeded, since bookie is still running with that configuration", + BookKeeperAdmin.initBookie(confOfExistingBookie)); } + Assert.assertFalse("initBookie shouldn't have succeeded, since previous bookie is not formatted yet completely", BookKeeperAdmin.initBookie(confOfExistingBookie)); @@ -184,11 +216,10 @@ public void testBookieInit() throws Exception { } Assert.assertFalse("initBookie shouldn't have succeeded, since cookie in ZK is not deleted yet", BookKeeperAdmin.initBookie(confOfExistingBookie)); - String bookieId = Bookie.getBookieAddress(confOfExistingBookie).toString(); String bookieCookiePath = ZKMetadataDriverBase.resolveZkLedgersRootPath(confOfExistingBookie) + "/" + BookKeeperConstants.COOKIE_NODE - + "/" + bookieId; + + "/" + bookieId.toString(); zkc.delete(bookieCookiePath, -1); Assert.assertTrue("initBookie shouldn't succeeded", @@ -399,4 +430,327 @@ void initiateNewClusterAndCreateLedgers(ServerConfiguration newConfig, List futureResult = bkAdmin + .asyncGetListOfEntriesOfLedger(addressByIndex(i), nonExistingLedgerId); + try { + futureResult.get(); + fail("asyncGetListOfEntriesOfLedger is supposed to be failed with NoSuchLedgerExistsException"); + } catch (ExecutionException ee) { + assertTrue(ee.getCause() instanceof BKException); + BKException e = (BKException) ee.getCause(); + assertEquals(e.getCode(), BKException.Code.NoSuchLedgerExistsException); + } + } + } + } + + public void testGetListOfEntriesOfLedger(boolean isLedgerClosed) throws Exception { + ClientConfiguration conf = new ClientConfiguration(); + conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + int numOfEntries = 6; + BookKeeper bkc = new BookKeeper(conf); + LedgerHandle lh = bkc.createLedger(numOfBookies, numOfBookies, digestType, "testPasswd".getBytes()); + long lId = lh.getId(); + for (int i = 0; i < numOfEntries; i++) { + lh.addEntry("000".getBytes()); + } + if (isLedgerClosed) { + lh.close(); + } + try (BookKeeperAdmin bkAdmin = new BookKeeperAdmin(zkUtil.getZooKeeperConnectString())) { + for (int i = 0; i < bookieCount(); i++) { + CompletableFuture futureResult = bkAdmin + .asyncGetListOfEntriesOfLedger(addressByIndex(i), lId); + AvailabilityOfEntriesOfLedger availabilityOfEntriesOfLedger = futureResult.get(); + assertEquals("Number of entries", numOfEntries, + availabilityOfEntriesOfLedger.getTotalNumOfAvailableEntries()); + for (int j = 0; j < numOfEntries; j++) { + assertTrue("Entry should be available: " + j, availabilityOfEntriesOfLedger.isEntryAvailable(j)); + } + assertFalse("Entry should not be available: " + numOfEntries, + availabilityOfEntriesOfLedger.isEntryAvailable(numOfEntries)); + } + } + bkc.close(); + } + + @Test + public void testGetEntriesFromEmptyLedger() throws Exception { + ClientConfiguration conf = new ClientConfiguration(); + conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + BookKeeper bkc = new BookKeeper(conf); + LedgerHandle lh = bkc.createLedger(numOfBookies, numOfBookies, digestType, "testPasswd".getBytes(UTF_8)); + lh.close(); + long ledgerId = lh.getId(); + + try (BookKeeperAdmin bkAdmin = new BookKeeperAdmin(zkUtil.getZooKeeperConnectString())) { + Iterator iter = bkAdmin.readEntries(ledgerId, 0, 0).iterator(); + assertFalse(iter.hasNext()); + } + + bkc.close(); + } + + @Test + public void testGetListOfEntriesOfLedgerWithJustOneBookieInWriteQuorum() throws Exception { + ClientConfiguration conf = new ClientConfiguration(); + conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + int numOfEntries = 6; + BookKeeper bkc = new BookKeeper(conf); + /* + * in this testsuite there are going to be 2 (numOfBookies) and if + * writeQuorum is 1 then it will stripe entries to those two bookies. + */ + LedgerHandle lh = bkc.createLedger(2, 1, digestType, "testPasswd".getBytes()); + long lId = lh.getId(); + for (int i = 0; i < numOfEntries; i++) { + lh.addEntry("000".getBytes()); + } + + try (BookKeeperAdmin bkAdmin = new BookKeeperAdmin(zkUtil.getZooKeeperConnectString())) { + for (int i = 0; i < bookieCount(); i++) { + CompletableFuture futureResult = bkAdmin + .asyncGetListOfEntriesOfLedger(addressByIndex(i), lId); + AvailabilityOfEntriesOfLedger availabilityOfEntriesOfLedger = futureResult.get(); + /* + * since num of bookies in the ensemble is 2 and + * writeQuorum/ackQuorum is 1, it will stripe to these two + * bookies and hence in each bookie there will be only + * numOfEntries/2 entries. + */ + assertEquals("Number of entries", numOfEntries / 2, + availabilityOfEntriesOfLedger.getTotalNumOfAvailableEntries()); + } + } + bkc.close(); + } + + @Test + public void testGetBookies() throws Exception { + String ledgersRootPath = "/ledgers"; + Assert.assertTrue("Cluster rootpath should have been created successfully " + ledgersRootPath, + (zkc.exists(ledgersRootPath, false) != null)); + String bookieCookiePath = ZKMetadataDriverBase.resolveZkLedgersRootPath(baseConf) + + "/" + BookKeeperConstants.COOKIE_NODE; + Assert.assertTrue("AvailableBookiesPath should have been created successfully " + bookieCookiePath, + (zkc.exists(bookieCookiePath, false) != null)); + + try (BookKeeperAdmin bkAdmin = new BookKeeperAdmin(zkUtil.getZooKeeperConnectString())) { + Collection availableBookies = bkAdmin.getAvailableBookies(); + Assert.assertEquals(availableBookies.size(), bookieCount()); + + for (int i = 0; i < bookieCount(); i++) { + availableBookies.contains(addressByIndex(i)); + } + + BookieServer killedBookie = serverByIndex(1); + killBookieAndWaitForZK(1); + + Collection remainingBookies = bkAdmin.getAvailableBookies(); + Assert.assertFalse(remainingBookies.contains(killedBookie)); + + Collection allBookies = bkAdmin.getAllBookies(); + for (int i = 0; i < bookieCount(); i++) { + remainingBookies.contains(addressByIndex(i)); + allBookies.contains(addressByIndex(i)); + } + + Assert.assertEquals(remainingBookies.size(), allBookies.size() - 1); + Assert.assertTrue(allBookies.contains(killedBookie.getBookieId())); + } + } + + @Test + public void testGetListOfEntriesOfLedgerWithEntriesNotStripedToABookie() throws Exception { + ClientConfiguration conf = new ClientConfiguration(); + conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + BookKeeper bkc = new BookKeeper(conf); + /* + * in this testsuite there are going to be 2 (numOfBookies) bookies and + * we are having ensemble of size 2. + */ + LedgerHandle lh = bkc.createLedger(2, 1, digestType, "testPasswd".getBytes()); + long lId = lh.getId(); + /* + * ledger is writeclosed without adding any entry. + */ + lh.close(); + CountDownLatch callbackCalled = new CountDownLatch(1); + AtomicBoolean exceptionInCallback = new AtomicBoolean(false); + AtomicInteger exceptionCode = new AtomicInteger(BKException.Code.OK); + BookKeeperAdmin bkAdmin = new BookKeeperAdmin(zkUtil.getZooKeeperConnectString()); + /* + * since no entry is added, callback is supposed to fail with + * NoSuchLedgerExistsException. + */ + bkAdmin.asyncGetListOfEntriesOfLedger(addressByIndex(0), lId) + .whenComplete((availabilityOfEntriesOfLedger, throwable) -> { + exceptionInCallback.set(throwable != null); + if (throwable != null) { + exceptionCode.set(BKException.getExceptionCode(throwable)); + } + callbackCalled.countDown(); + }); + callbackCalled.await(); + assertTrue("Exception occurred", exceptionInCallback.get()); + assertEquals("Exception code", BKException.Code.NoSuchLedgerExistsException, exceptionCode.get()); + bkAdmin.close(); + bkc.close(); + } + + @Test + public void testAreEntriesOfLedgerStoredInTheBookieForLastEmptySegment() throws Exception { + int lastEntryId = 10; + long ledgerId = 100L; + BookieId bookie0 = new BookieSocketAddress("bookie0:3181").toBookieId(); + BookieId bookie1 = new BookieSocketAddress("bookie1:3181").toBookieId(); + BookieId bookie2 = new BookieSocketAddress("bookie2:3181").toBookieId(); + BookieId bookie3 = new BookieSocketAddress("bookie3:3181").toBookieId(); + + List ensembleOfSegment1 = new ArrayList(); + ensembleOfSegment1.add(bookie0); + ensembleOfSegment1.add(bookie1); + ensembleOfSegment1.add(bookie2); + + List ensembleOfSegment2 = new ArrayList(); + ensembleOfSegment2.add(bookie3); + ensembleOfSegment2.add(bookie1); + ensembleOfSegment2.add(bookie2); + + LedgerMetadataBuilder builder = LedgerMetadataBuilder.create(); + builder.withId(ledgerId) + .withEnsembleSize(3) + .withWriteQuorumSize(3) + .withAckQuorumSize(2) + .withDigestType(digestType.toApiDigestType()) + .withPassword(PASSWORD.getBytes()) + .newEnsembleEntry(0, ensembleOfSegment1) + .newEnsembleEntry(lastEntryId + 1, ensembleOfSegment2) + .withLastEntryId(lastEntryId).withLength(65576).withClosedState(); + LedgerMetadata meta = builder.build(); + + assertFalse("expected areEntriesOfLedgerStoredInTheBookie to return False for bookie3", + BookKeeperAdmin.areEntriesOfLedgerStoredInTheBookie(ledgerId, bookie3, meta)); + assertTrue("expected areEntriesOfLedgerStoredInTheBookie to return true for bookie2", + BookKeeperAdmin.areEntriesOfLedgerStoredInTheBookie(ledgerId, bookie2, meta)); + } + + @Test + public void testBookkeeperAdminFormatResetsLedgerIds() throws Exception { + ClientConfiguration conf = new ClientConfiguration(); + conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + + /* + * in this testsuite there are going to be 2 (numOfBookies) ledgers + * written and when formatting the BookieAdmin i expect that the + * ledger ids restart from 0 + */ + int numOfLedgers = 2; + try (BookKeeper bkc = new BookKeeper(conf)) { + Set ledgerIds = new HashSet<>(); + for (int n = 0; n < numOfLedgers; n++) { + try (LedgerHandle lh = bkc.createLedger(numOfBookies, numOfBookies, digestType, "L".getBytes())) { + ledgerIds.add(lh.getId()); + lh.addEntry("000".getBytes()); + } + } + + try (BookKeeperAdmin bkAdmin = new BookKeeperAdmin(zkUtil.getZooKeeperConnectString())) { + bkAdmin.format(baseConf, false, true); + } + + /** + * ledgers created after format produce the same ids + */ + for (int n = 0; n < numOfLedgers; n++) { + try (LedgerHandle lh = bkc.createLedger(numOfBookies, numOfBookies, digestType, "L".getBytes())) { + lh.addEntry("000".getBytes()); + assertTrue(ledgerIds.contains(lh.getId())); + } + } + } + } + + private void testBookieServiceInfo(boolean readonly, boolean legacy) throws Exception { + File tmpDir = tmpDirs.createNew("bookie", "test"); + final ServerConfiguration conf = TestBKConfiguration.newServerConfiguration() + .setJournalDirName(tmpDir.getPath()) + .setLedgerDirNames(new String[]{tmpDir.getPath()}) + .setBookiePort(PortManager.nextFreePort()) + .setMetadataServiceUri(metadataServiceUri); + + LifecycleComponent server = Main.buildBookieServer(new BookieConfiguration(conf)); + // 2. start the server + CompletableFuture stackComponentFuture = ComponentStarter.startComponent(server); + while (server.lifecycleState() != Lifecycle.State.STARTED) { + Thread.sleep(100); + } + + ServerConfiguration bkConf = newServerConfiguration().setForceReadOnlyBookie(readonly); + BookieServer bkServer = startBookie(bkConf).getServer(); + + BookieId bookieId = bkServer.getBookieId(); + String host = bkServer.getLocalAddress().getHostName(); + int port = bkServer.getLocalAddress().getPort(); + + if (legacy) { + String regPath = ZKMetadataDriverBase.resolveZkLedgersRootPath(bkConf) + "/" + AVAILABLE_NODE; + regPath = readonly + ? regPath + READONLY + "/" + bookieId + : regPath + "/" + bookieId.toString(); + // deleting the metadata, so that the bookie registration should + // continue successfully with legacy BookieServiceInfo + zkc.setData(regPath, new byte[]{}, -1); + } + + try (BookKeeperAdmin bkAdmin = new BookKeeperAdmin(zkUtil.getZooKeeperConnectString())) { + BookieServiceInfo bookieServiceInfo = bkAdmin.getBookieServiceInfo(bookieId); + + assertThat(bookieServiceInfo.getEndpoints().size(), is(1)); + BookieServiceInfo.Endpoint endpoint = bookieServiceInfo.getEndpoints().stream() + .filter(e -> Objects.equals(e.getId(), bookieId.getId())) + .findFirst() + .get(); + assertNotNull("Endpoint " + bookieId + " not found.", endpoint); + + assertThat(endpoint.getHost(), is(host)); + assertThat(endpoint.getPort(), is(port)); + assertThat(endpoint.getProtocol(), is("bookie-rpc")); + } + + bkServer.shutdown(); + stackComponentFuture.cancel(true); + } + + @Test + public void testBookieServiceInfoWritable() throws Exception { + testBookieServiceInfo(false, false); + } + + @Test + public void testBookieServiceInfoReadonly() throws Exception { + testBookieServiceInfo(true, false); + } + + @Test + public void testLegacyBookieServiceInfo() throws Exception { + testBookieServiceInfo(false, true); + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperClientTestsWithBookieErrors.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperClientTestsWithBookieErrors.java index 8a09c62ac3d..373e4f523c8 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperClientTestsWithBookieErrors.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperClientTestsWithBookieErrors.java @@ -29,7 +29,7 @@ import java.util.HashMap; import java.util.List; import java.util.function.Consumer; - +import org.apache.bookkeeper.bookie.BookieException; import org.apache.bookkeeper.bookie.SortedLedgerStorage; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.conf.ClientConfiguration; @@ -107,7 +107,7 @@ public MockSortedLedgerStorage() { } @Override - public ByteBuf getEntry(long ledgerId, long entryId) throws IOException { + public ByteBuf getEntry(long ledgerId, long entryId) throws IOException, BookieException { Consumer faultInjection; synchronized (lock) { faultInjection = storageFaultInjectionsMap.get(this); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperClientZKSessionExpiry.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperClientZKSessionExpiry.java index b1a8bb66dd7..c72834397e0 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperClientZKSessionExpiry.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperClientZKSessionExpiry.java @@ -51,7 +51,7 @@ public void run() { byte[] sessionPasswd = bkc.getZkHandle().getSessionPasswd(); try { - ZooKeeperWatcherBase watcher = new ZooKeeperWatcherBase(10000); + ZooKeeperWatcherBase watcher = new ZooKeeperWatcherBase(10000, false); ZooKeeper zk = new ZooKeeper(zkUtil.getZooKeeperConnectString(), 10000, watcher, sessionId, sessionPasswd); zk.close(); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperCloseTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperCloseTest.java index 850fe5d7ef2..e9db5b9d736 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperCloseTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperCloseTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -25,18 +25,17 @@ import static org.junit.Assert.fail; import com.google.common.util.concurrent.SettableFuture; - import io.netty.buffer.ByteBuf; - import java.io.IOException; import java.util.Enumeration; import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; - +import java.util.function.BiConsumer; import org.apache.bookkeeper.bookie.Bookie; import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.TestBookieImpl; import org.apache.bookkeeper.client.AsyncCallback.AddCallback; import org.apache.bookkeeper.client.AsyncCallback.CloseCallback; import org.apache.bookkeeper.client.AsyncCallback.CreateCallback; @@ -44,7 +43,7 @@ import org.apache.bookkeeper.client.BKException.BKClientClosedException; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; @@ -65,6 +64,7 @@ public class BookKeeperCloseTest extends BookKeeperClusterTestCase { .getLogger(BookKeeperCloseTest.class); private DigestType digestType = DigestType.CRC32; private static final String PASSWORD = "testPasswd"; + private static final BiConsumer NOOP_BICONSUMER = (l, e) -> { }; public BookKeeperCloseTest() { super(3); @@ -73,11 +73,11 @@ public BookKeeperCloseTest() { private void restartBookieSlow() throws Exception{ ServerConfiguration conf = killBookie(0); - Bookie delayBookie = new Bookie(conf) { + Bookie delayBookie = new TestBookieImpl(conf) { @Override public void recoveryAddEntry(ByteBuf entry, WriteCallback cb, Object ctx, byte[] masterKey) - throws IOException, BookieException { + throws IOException, BookieException, InterruptedException { try { Thread.sleep(5000); } catch (InterruptedException ie) { @@ -91,7 +91,7 @@ public void recoveryAddEntry(ByteBuf entry, WriteCallback cb, @Override public void addEntry(ByteBuf entry, boolean ackBeforeSync, WriteCallback cb, Object ctx, byte[] masterKey) - throws IOException, BookieException { + throws IOException, BookieException, InterruptedException { try { Thread.sleep(5000); } catch (InterruptedException ie) { @@ -104,7 +104,7 @@ public void addEntry(ByteBuf entry, boolean ackBeforeSync, WriteCallback cb, @Override public ByteBuf readEntry(long ledgerId, long entryId) - throws IOException, NoLedgerException { + throws IOException, NoLedgerException, BookieException { try { Thread.sleep(5000); } catch (InterruptedException ie) { @@ -115,8 +115,7 @@ public ByteBuf readEntry(long ledgerId, long entryId) return super.readEntry(ledgerId, entryId); } }; - bsConfs.add(conf); - bs.add(startBookie(conf, delayBookie)); + startAndAddBookie(conf, delayBookie); } /** @@ -152,7 +151,7 @@ public void createComplete(int rc, LedgerHandle lh, Object ctx) { // wait for creating the ledger assertTrue("create ledger call should have completed", openLatch.await(20, TimeUnit.SECONDS)); - assertEquals("Succesfully created ledger through closed bkclient!", + assertEquals("Successfully created ledger through closed bkclient!", BKException.Code.ClientClosedException, returnCode.get()); } @@ -470,7 +469,7 @@ Set getResult(int time, TimeUnit unit) throws Exception { @Test public void testBookKeeperAdmin() throws Exception { BookKeeper bk = new BookKeeper(baseClientConf, zkc); - try (BookKeeperAdmin bkadmin = new BookKeeperAdmin(bk)) { + try (BookKeeperAdmin bkadmin = new BookKeeperAdmin(bk, baseClientConf)) { LOG.info("Create ledger and add entries to it"); LedgerHandle lh1 = createLedgerWithEntries(bk, 100); @@ -478,7 +477,7 @@ public void testBookKeeperAdmin() throws Exception { LedgerHandle lh3 = createLedgerWithEntries(bk, 100); lh3.close(); - BookieSocketAddress bookieToKill = getBookie(0); + BookieId bookieToKill = getBookie(0); killBookie(bookieToKill); startNewBookie(); @@ -519,7 +518,7 @@ public void testBookKeeperAdmin() throws Exception { try { bkadmin.replicateLedgerFragment(lh3, - checkercb.getResult(10, TimeUnit.SECONDS).iterator().next()); + checkercb.getResult(10, TimeUnit.SECONDS).iterator().next(), NOOP_BICONSUMER); fail("Shouldn't be able to replicate with a closed client"); } catch (BKException.BKClientClosedException cce) { // correct behaviour diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperDiskSpaceWeightedLedgerPlacementTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperDiskSpaceWeightedLedgerPlacementTest.java index fec5dd84b6d..91612ec5c79 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperDiskSpaceWeightedLedgerPlacementTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperDiskSpaceWeightedLedgerPlacementTest.java @@ -29,13 +29,13 @@ import java.util.Optional; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; - import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.TestBookieImpl; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.common.testing.annotations.FlakyTest; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookieServer; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.slf4j.Logger; @@ -57,7 +57,7 @@ class BookKeeperCheckInfoReader extends BookKeeper { super(conf); } - void blockUntilBookieWeightIs(BookieSocketAddress bookie, Optional target) throws InterruptedException { + void blockUntilBookieWeightIs(BookieId bookie, Optional target) throws InterruptedException { long startMsecs = System.currentTimeMillis(); Optional freeDiskSpace = Optional.empty(); while (System.currentTimeMillis() < (startMsecs + MS_WEIGHT_UPDATE_TIMEOUT)) { @@ -69,7 +69,7 @@ void blockUntilBookieWeightIs(BookieSocketAddress bookie, Optional target) } fail(String.format( "Server %s still has weight %s rather than %s", - bookie.toString(), freeDiskSpace.toString(), target.toString())); + bookie.toString(), freeDiskSpace, target.toString())); } } @@ -77,7 +77,7 @@ private BookieServer restartBookie( BookKeeperCheckInfoReader client, ServerConfiguration conf, final long initialFreeDiskSpace, final long finalFreeDiskSpace, final AtomicBoolean useFinal) throws Exception { final AtomicBoolean ready = useFinal == null ? new AtomicBoolean(false) : useFinal; - Bookie bookieWithCustomFreeDiskSpace = new Bookie(conf) { + Bookie bookieWithCustomFreeDiskSpace = new TestBookieImpl(conf) { long startTime = System.currentTimeMillis(); @Override public long getTotalFreeSpace() { @@ -92,10 +92,8 @@ public long getTotalFreeSpace() { } } }; - bsConfs.add(conf); - BookieServer server = startBookie(conf, bookieWithCustomFreeDiskSpace); - bs.add(server); - client.blockUntilBookieWeightIs(server.getLocalAddress(), Optional.of(initialFreeDiskSpace)); + BookieServer server = startAndAddBookie(conf, bookieWithCustomFreeDiskSpace).getServer(); + client.blockUntilBookieWeightIs(server.getBookieId(), Optional.of(initialFreeDiskSpace)); if (useFinal == null) { ready.set(true); } @@ -113,8 +111,8 @@ private BookieServer replaceBookieWithCustomFreeDiskSpaceBookie( BookKeeperCheckInfoReader client, BookieServer bookie, final long freeDiskSpace) throws Exception { - for (int i = 0; i < bs.size(); i++) { - if (bs.get(i).getLocalAddress().equals(bookie.getLocalAddress())) { + for (int i = 0; i < bookieCount(); i++) { + if (addressByIndex(i).equals(bookie.getBookieId())) { return replaceBookieWithCustomFreeDiskSpaceBookie(client, i, freeDiskSpace); } } @@ -125,7 +123,7 @@ private BookieServer replaceBookieWithCustomFreeDiskSpaceBookie( BookKeeperCheckInfoReader client, int bookieIdx, long initialFreeDiskSpace, long finalFreeDiskSpace, AtomicBoolean useFinal) throws Exception { - BookieSocketAddress addr = bs.get(bookieIdx).getLocalAddress(); + BookieId addr = addressByIndex(bookieIdx); LOG.info("Killing bookie {}", addr); ServerConfiguration conf = killBookieAndWaitForZK(bookieIdx); client.blockUntilBookieWeightIs(addr, Optional.empty()); @@ -155,14 +153,12 @@ public void testDiskSpaceWeightedBookieSelection() throws Exception { replaceBookieWithCustomFreeDiskSpaceBookie(client, 0, multiple * freeDiskSpace); } } - Map m = new HashMap(); - for (BookieServer b : bs) { - m.put(b.getLocalAddress(), 0); - } + Map m = new HashMap<>(); + bookieAddresses().forEach(a -> m.put(a, 0)); for (int i = 0; i < 2000; i++) { LedgerHandle lh = client.createLedger(3, 3, DigestType.CRC32, "testPasswd".getBytes()); - for (BookieSocketAddress b : lh.getLedgerMetadata().getEnsemble(0)) { + for (BookieId b : lh.getLedgerMetadata().getEnsembleAt(0)) { m.put(b, m.get(b) + 1); } } @@ -170,12 +166,12 @@ public void testDiskSpaceWeightedBookieSelection() throws Exception { // make sure that bookies with higher weight(the last 2 bookies) are chosen 3X as often as the median; // since the number of ledgers created is small (2000), we allow a range of 2X to 4X instead of the exact 3X for (int i = 0; i < numBookies - 2; i++) { - double ratio1 = (double) m.get(bs.get(numBookies - 2).getLocalAddress()) - / (double) m.get(bs.get(i).getLocalAddress()); + double ratio1 = (double) m.get(addressByIndex(numBookies - 2)) + / (double) m.get(addressByIndex(i)); assertTrue("Weigheted placement is not honored: " + Math.abs(ratio1 - multiple), Math.abs(ratio1 - multiple) < 1); - double ratio2 = (double) m.get(bs.get(numBookies - 1).getLocalAddress()) - / (double) m.get(bs.get(i).getLocalAddress()); + double ratio2 = (double) m.get(addressByIndex(numBookies - 1)) + / (double) m.get(addressByIndex(i)); assertTrue("Weigheted placement is not honored: " + Math.abs(ratio2 - multiple), Math.abs(ratio2 - multiple) < 1); } @@ -205,14 +201,12 @@ public void testDiskSpaceWeightedBookieSelectionWithChangingWeights() throws Exc replaceBookieWithCustomFreeDiskSpaceBookie(client, 0, multiple * freeDiskSpace); } } - Map m = new HashMap(); - for (BookieServer b : bs) { - m.put(b.getLocalAddress(), 0); - } + Map m = new HashMap<>(); + bookieAddresses().forEach(a -> m.put(a, 0)); for (int i = 0; i < 2000; i++) { LedgerHandle lh = client.createLedger(3, 3, DigestType.CRC32, "testPasswd".getBytes()); - for (BookieSocketAddress b : lh.getLedgerMetadata().getEnsemble(0)) { + for (BookieId b : lh.getLedgerMetadata().getEnsembleAt(0)) { m.put(b, m.get(b) + 1); } } @@ -220,34 +214,33 @@ public void testDiskSpaceWeightedBookieSelectionWithChangingWeights() throws Exc // make sure that bookies with higher weight(the last 2 bookies) are chosen 3X as often as the median; // since the number of ledgers created is small (2000), we allow a range of 2X to 4X instead of the exact 3X for (int i = 0; i < numBookies - 2; i++) { - double ratio1 = (double) m.get(bs.get(numBookies - 2).getLocalAddress()) - / (double) m.get(bs.get(i).getLocalAddress()); + double ratio1 = (double) m.get(addressByIndex(numBookies - 2)) + / (double) m.get(addressByIndex(i)); assertTrue("Weigheted placement is not honored: " + Math.abs(ratio1 - multiple), Math.abs(ratio1 - multiple) < 1); - double ratio2 = (double) m.get(bs.get(numBookies - 1).getLocalAddress()) - / (double) m.get(bs.get(i).getLocalAddress()); + double ratio2 = (double) m.get(addressByIndex(numBookies - 1)) + / (double) m.get(addressByIndex(i)); assertTrue("Weigheted placement is not honored: " + Math.abs(ratio2 - multiple), - Math.abs(ratio2 - multiple) < 1); + Math.abs(ratio2 - multiple) < 1); } // Restart the bookies in such a way that the first 2 bookies go from 1MB to 3MB free space and the last // 2 bookies go from 3MB to 1MB - BookieServer server1 = bs.get(0); - BookieServer server2 = bs.get(1); - BookieServer server3 = bs.get(numBookies - 2); - BookieServer server4 = bs.get(numBookies - 1); + BookieServer server1 = serverByIndex(0); + BookieServer server2 = serverByIndex(1); + BookieServer server3 = serverByIndex(numBookies - 2); + BookieServer server4 = serverByIndex(numBookies - 1); server1 = replaceBookieWithCustomFreeDiskSpaceBookie(client, server1, multiple * freeDiskSpace); server2 = replaceBookieWithCustomFreeDiskSpaceBookie(client, server2, multiple * freeDiskSpace); server3 = replaceBookieWithCustomFreeDiskSpaceBookie(client, server3, freeDiskSpace); server4 = replaceBookieWithCustomFreeDiskSpaceBookie(client, server4, freeDiskSpace); - for (BookieServer b : bs) { - m.put(b.getLocalAddress(), 0); - } + bookieAddresses().forEach(a -> m.put(a, 0)); + for (int i = 0; i < 2000; i++) { LedgerHandle lh = client.createLedger(3, 3, DigestType.CRC32, "testPasswd".getBytes()); - for (BookieSocketAddress b : lh.getLedgerMetadata().getEnsemble(0)) { + for (BookieId b : lh.getLedgerMetadata().getEnsembleAt(0)) { m.put(b, m.get(b) + 1); } } @@ -255,18 +248,18 @@ public void testDiskSpaceWeightedBookieSelectionWithChangingWeights() throws Exc // make sure that bookies with higher weight(the last 2 bookies) are chosen 3X as often as the median; // since the number of ledgers created is small (2000), we allow a range of 2X to 4X instead of the exact 3X for (int i = 0; i < numBookies; i++) { - if (server1.getLocalAddress().equals(bs.get(i).getLocalAddress()) - || server2.getLocalAddress().equals(bs.get(i).getLocalAddress())) { + if (server1.getLocalAddress().equals(addressByIndex(i)) + || server2.getLocalAddress().equals(addressByIndex(i))) { continue; } - double ratio1 = (double) m.get(server1.getLocalAddress()) - / (double) m.get(bs.get(i).getLocalAddress()); + double ratio1 = (double) m.get(server1) + / (double) m.get(addressByIndex(i)); assertTrue("Weigheted placement is not honored: " + Math.abs(ratio1 - multiple), Math.abs(ratio1 - multiple) < 1); - double ratio2 = (double) m.get(server2.getLocalAddress()) - / (double) m.get(bs.get(i).getLocalAddress()); + double ratio2 = (double) m.get(server2) + / (double) m.get(addressByIndex(i)); assertTrue("Weigheted placement is not honored: " + Math.abs(ratio2 - multiple), - Math.abs(ratio2 - multiple) < 1); + Math.abs(ratio2 - multiple) < 1); } client.close(); } @@ -295,59 +288,56 @@ public void testDiskSpaceWeightedBookieSelectionWithBookiesDying() throws Except replaceBookieWithCustomFreeDiskSpaceBookie(client, 0, multiple * freeDiskSpace); } } - Map m = new HashMap(); - for (BookieServer b : bs) { - m.put(b.getLocalAddress(), 0); - } + Map m = new HashMap<>(); + bookieAddresses().forEach(a -> m.put(a, 0)); for (int i = 0; i < 2000; i++) { LedgerHandle lh = client.createLedger(3, 3, DigestType.CRC32, "testPasswd".getBytes()); - for (BookieSocketAddress b : lh.getLedgerMetadata().getEnsemble(0)) { + for (BookieId b : lh.getLedgerMetadata().getEnsembleAt(0)) { m.put(b, m.get(b) + 1); } } // make sure that bookies with higher weight are chosen 3X as often as the median; // since the number of ledgers is small (2000), there may be variation - double ratio1 = (double) m.get(bs.get(numBookies - 2).getLocalAddress()) - / (double) m.get(bs.get(0).getLocalAddress()); + double ratio1 = (double) m.get(addressByIndex(numBookies - 2)) + / (double) m.get(addressByIndex(0)); assertTrue("Weigheted placement is not honored: " + Math.abs(ratio1 - multiple), Math.abs(ratio1 - multiple) < 1); - double ratio2 = (double) m.get(bs.get(numBookies - 1).getLocalAddress()) - / (double) m.get(bs.get(1).getLocalAddress()); + double ratio2 = (double) m.get(addressByIndex(numBookies - 1)) + / (double) m.get(addressByIndex(1)); assertTrue("Weigheted placement is not honored: " + Math.abs(ratio2 - multiple), - Math.abs(ratio2 - multiple) < 1); + Math.abs(ratio2 - multiple) < 1); // Bring down the 2 bookies that had higher weight; after this the allocation to all // the remaining bookies should be uniform - for (BookieServer b : bs) { - m.put(b.getLocalAddress(), 0); - } - BookieServer server1 = bs.get(numBookies - 2); - BookieServer server2 = bs.get(numBookies - 1); + bookieAddresses().forEach(a -> m.put(a, 0)); + + BookieServer server1 = serverByIndex(numBookies - 2); + BookieServer server2 = serverByIndex(numBookies - 1); killBookieAndWaitForZK(numBookies - 1); killBookieAndWaitForZK(numBookies - 2); for (int i = 0; i < 2000; i++) { LedgerHandle lh = client.createLedger(3, 3, DigestType.CRC32, "testPasswd".getBytes()); - for (BookieSocketAddress b : lh.getLedgerMetadata().getEnsemble(0)) { + for (BookieId b : lh.getLedgerMetadata().getEnsembleAt(0)) { m.put(b, m.get(b) + 1); } } // make sure that bookies with higher weight are chosen 3X as often as the median; for (int i = 0; i < numBookies - 3; i++) { - double delta = Math.abs((double) m.get(bs.get(i).getLocalAddress()) - - (double) m.get(bs.get(i + 1).getLocalAddress())); - delta = (delta * 100) / (double) m.get(bs.get(i + 1).getLocalAddress()); + double delta = Math.abs((double) m.get(addressByIndex(i)) + - (double) m.get(addressByIndex(i + 1))); + delta = (delta * 100) / (double) m.get(addressByIndex(i + 1)); // the deviation should be less than 30% assertTrue("Weigheted placement is not honored: " + delta, delta <= 30); } // since the following 2 bookies were down, they shouldn't ever be selected - assertTrue("Weigheted placement is not honored" + m.get(server1.getLocalAddress()), - m.get(server1.getLocalAddress()) == 0); - assertTrue("Weigheted placement is not honored" + m.get(server2.getLocalAddress()), - m.get(server2.getLocalAddress()) == 0); + assertTrue("Weigheted placement is not honored" + m.get(server1), + m.get(server1) == 0); + assertTrue("Weigheted placement is not honored" + m.get(server2), + m.get(server2) == 0); client.close(); } @@ -375,14 +365,13 @@ public void testDiskSpaceWeightedBookieSelectionWithBookiesBeingAdded() throws E // let the last two bookies be down initially ServerConfiguration conf1 = killBookieAndWaitForZK(numBookies - 1); ServerConfiguration conf2 = killBookieAndWaitForZK(numBookies - 2); - Map m = new HashMap(); - for (BookieServer b : bs) { - m.put(b.getLocalAddress(), 0); - } + Map m = new HashMap<>(); + + bookieAddresses().forEach(a -> m.put(a, 0)); for (int i = 0; i < 2000; i++) { LedgerHandle lh = client.createLedger(3, 3, DigestType.CRC32, "testPasswd".getBytes()); - for (BookieSocketAddress b : lh.getLedgerMetadata().getEnsemble(0)) { + for (BookieId b : lh.getLedgerMetadata().getEnsembleAt(0)) { m.put(b, m.get(b) + 1); } } @@ -390,9 +379,9 @@ public void testDiskSpaceWeightedBookieSelectionWithBookiesBeingAdded() throws E // make sure that bookies with higher weight are chosen 3X as often as the median; // since the number of ledgers is small (2000), there may be variation for (int i = 0; i < numBookies - 3; i++) { - double delta = Math.abs((double) m.get(bs.get(i).getLocalAddress()) - - (double) m.get(bs.get(i + 1).getLocalAddress())); - delta = (delta * 100) / (double) m.get(bs.get(i + 1).getLocalAddress()); + double delta = Math.abs((double) m.get(addressByIndex(i)) + - (double) m.get(addressByIndex(i + 1))); + delta = (delta * 100) / (double) m.get(addressByIndex(i + 1)); // the deviation should be less than 30% assertTrue("Weigheted placement is not honored: " + delta, delta <= 30); } @@ -401,12 +390,11 @@ public void testDiskSpaceWeightedBookieSelectionWithBookiesBeingAdded() throws E restartBookie(client, conf1, multiple * freeDiskSpace, multiple * freeDiskSpace, null); restartBookie(client, conf2, multiple * freeDiskSpace, multiple * freeDiskSpace, null); - for (BookieServer b : bs) { - m.put(b.getLocalAddress(), 0); - } + bookieAddresses().forEach(a -> m.put(a, 0)); + for (int i = 0; i < 2000; i++) { LedgerHandle lh = client.createLedger(3, 3, DigestType.CRC32, "testPasswd".getBytes()); - for (BookieSocketAddress b : lh.getLedgerMetadata().getEnsemble(0)) { + for (BookieId b : lh.getLedgerMetadata().getEnsembleAt(0)) { m.put(b, m.get(b) + 1); } } @@ -414,12 +402,12 @@ public void testDiskSpaceWeightedBookieSelectionWithBookiesBeingAdded() throws E // make sure that bookies with higher weight(the last 2 bookies) are chosen 3X as often as the median; // since the number of ledgers created is small (2000), we allow a range of 2X to 4X instead of the exact 3X for (int i = 0; i < numBookies - 2; i++) { - double ratio1 = (double) m.get(bs.get(numBookies - 2).getLocalAddress()) - / (double) m.get(bs.get(i).getLocalAddress()); + double ratio1 = (double) m.get(addressByIndex(numBookies - 2)) + / (double) m.get(addressByIndex(i)); assertTrue("Weigheted placement is not honored: " + Math.abs(ratio1 - multiple), Math.abs(ratio1 - multiple) < 1); - double ratio2 = (double) m.get(bs.get(numBookies - 1).getLocalAddress()) - / (double) m.get(bs.get(i).getLocalAddress()); + double ratio2 = (double) m.get(addressByIndex(numBookies - 1)) + / (double) m.get(addressByIndex(i)); assertTrue("Weigheted placement is not honored: " + Math.abs(ratio2 - multiple), Math.abs(ratio2 - multiple) < 1); } @@ -455,22 +443,21 @@ public void testDiskSpaceWeightedBookieSelectionWithPeriodicBookieInfoUpdate() t client, 0, freeDiskSpace, multiple * freeDiskSpace, useHigherValue); } } - Map m = new HashMap(); - for (BookieServer b : bs) { - m.put(b.getLocalAddress(), 0); - } + Map m = new HashMap<>(); + + bookieAddresses().forEach(a -> m.put(a, 0)); for (int i = 0; i < 2000; i++) { LedgerHandle lh = client.createLedger(3, 3, DigestType.CRC32, "testPasswd".getBytes()); - for (BookieSocketAddress b : lh.getLedgerMetadata().getEnsemble(0)) { + for (BookieId b : lh.getLedgerMetadata().getEnsembleAt(0)) { m.put(b, m.get(b) + 1); } } for (int i = 0; i < numBookies - 1; i++) { - double delta = Math.abs((double) m.get(bs.get(i).getLocalAddress()) - - (double) m.get(bs.get(i + 1).getLocalAddress())); - delta = (delta * 100) / (double) m.get(bs.get(i + 1).getLocalAddress()); + double delta = Math.abs((double) m.get(addressByIndex(i)) + - (double) m.get(addressByIndex(i + 1))); + delta = (delta * 100) / (double) m.get(addressByIndex(i + 1)); assertTrue("Weigheted placement is not honored: " + delta, delta <= 30); // the deviation should be <30% } @@ -480,18 +467,16 @@ public void testDiskSpaceWeightedBookieSelectionWithPeriodicBookieInfoUpdate() t Thread.sleep(updateIntervalSecs * 1000); for (int i = 0; i < numBookies; i++) { if (i < numBookies - 2) { - client.blockUntilBookieWeightIs(bs.get(i).getLocalAddress(), Optional.of(freeDiskSpace)); + client.blockUntilBookieWeightIs(addressByIndex(i), Optional.of(freeDiskSpace)); } else { - client.blockUntilBookieWeightIs(bs.get(i).getLocalAddress(), Optional.of(freeDiskSpace * multiple)); + client.blockUntilBookieWeightIs(addressByIndex(i), Optional.of(freeDiskSpace * multiple)); } } - for (BookieServer b : bs) { - m.put(b.getLocalAddress(), 0); - } + bookieAddresses().forEach(a -> m.put(a, 0)); for (int i = 0; i < 2000; i++) { LedgerHandle lh = client.createLedger(3, 3, DigestType.CRC32, "testPasswd".getBytes()); - for (BookieSocketAddress b : lh.getLedgerMetadata().getEnsemble(0)) { + for (BookieId b : lh.getLedgerMetadata().getEnsembleAt(0)) { m.put(b, m.get(b) + 1); } } @@ -499,12 +484,12 @@ public void testDiskSpaceWeightedBookieSelectionWithPeriodicBookieInfoUpdate() t // make sure that bookies with higher weight(the last 2 bookies) are chosen 3X as often as the median; // since the number of ledgers created is small (2000), we allow a range of 2X to 4X instead of the exact 3X for (int i = 0; i < numBookies - 2; i++) { - double ratio1 = (double) m.get(bs.get(numBookies - 2).getLocalAddress()) - / (double) m.get(bs.get(i).getLocalAddress()); + double ratio1 = (double) m.get(addressByIndex(numBookies - 2)) + / (double) m.get(addressByIndex(i)); assertTrue("Weigheted placement is not honored: " + Math.abs(ratio1 - multiple), Math.abs(ratio1 - multiple) < 1); - double ratio2 = (double) m.get(bs.get(numBookies - 1).getLocalAddress()) - / (double) m.get(bs.get(i).getLocalAddress()); + double ratio2 = (double) m.get(addressByIndex(lastBookieIndex())) + / (double) m.get(addressByIndex(i)); assertTrue("Weigheted placement is not honored: " + Math.abs(ratio2 - multiple), Math.abs(ratio2 - multiple) < 1); } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperTest.java index 1a22e613e5f..14b71a163d2 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperTest.java @@ -20,35 +20,60 @@ */ package org.apache.bookkeeper.client; +import static org.apache.bookkeeper.client.BookKeeperClientStats.WRITE_DELAYED_DUE_TO_NOT_ENOUGH_FAULT_DOMAINS; +import static org.apache.bookkeeper.client.BookKeeperClientStats.WRITE_TIMED_OUT_DUE_TO_NOT_ENOUGH_FAULT_DOMAINS; import static org.apache.bookkeeper.common.concurrent.FutureUtils.result; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import io.netty.util.IllegalReferenceCountException; - import java.io.IOException; +import java.net.UnknownHostException; import java.util.Collections; import java.util.Enumeration; +import java.util.List; +import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; - +import java.util.concurrent.atomic.AtomicLong; import org.apache.bookkeeper.client.AsyncCallback.AddCallback; import org.apache.bookkeeper.client.AsyncCallback.ReadCallback; import org.apache.bookkeeper.client.BKException.BKBookieHandleNotAvailableException; -import org.apache.bookkeeper.client.BKException.BKIllegalOpException; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.client.api.WriteFlag; import org.apache.bookkeeper.client.api.WriteHandle; import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.stats.StatsLogger; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.apache.bookkeeper.test.TestStatsProvider; +import org.apache.bookkeeper.util.StaticDNSResolver; +import org.apache.bookkeeper.zookeeper.BoundExponentialBackoffRetryPolicy; +import org.apache.bookkeeper.zookeeper.ZooKeeperClient; +import org.apache.bookkeeper.zookeeper.ZooKeeperWatcherBase; +import org.apache.zookeeper.AsyncCallback.StringCallback; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException.ConnectionLossException; +import org.apache.zookeeper.WatchedEvent; +import org.apache.zookeeper.Watcher; +import org.apache.zookeeper.Watcher.Event.EventType; +import org.apache.zookeeper.Watcher.Event.KeeperState; import org.apache.zookeeper.ZooKeeper; -import org.junit.Test; +import org.apache.zookeeper.ZooKeeper.States; +import org.apache.zookeeper.data.ACL; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledForJreRange; +import org.junit.jupiter.api.condition.JRE; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -57,22 +82,23 @@ */ public class BookKeeperTest extends BookKeeperClusterTestCase { private static final Logger LOG = LoggerFactory.getLogger(BookKeeperTest.class); - + private static final long INVALID_LEDGERID = -1L; private final DigestType digestType; public BookKeeperTest() { - super(4); + super(3); this.digestType = DigestType.CRC32; } @Test + @EnabledForJreRange(max = JRE.JAVA_17) public void testConstructionZkDelay() throws Exception { ClientConfiguration conf = new ClientConfiguration(); conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()) .setZkTimeout(20000); CountDownLatch l = new CountDownLatch(1); - zkUtil.sleepServer(200, TimeUnit.MILLISECONDS, l); + zkUtil.sleepCluster(200, TimeUnit.MILLISECONDS, l); l.await(); BookKeeper bkc = new BookKeeper(conf); @@ -81,20 +107,21 @@ public void testConstructionZkDelay() throws Exception { } @Test + @EnabledForJreRange(max = JRE.JAVA_17) public void testConstructionNotConnectedExplicitZk() throws Exception { ClientConfiguration conf = new ClientConfiguration(); conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()) .setZkTimeout(20000); CountDownLatch l = new CountDownLatch(1); - zkUtil.sleepServer(200, TimeUnit.MILLISECONDS, l); + zkUtil.sleepCluster(200, TimeUnit.MILLISECONDS, l); l.await(); ZooKeeper zk = new ZooKeeper( zkUtil.getZooKeeperConnectString(), 50, event -> {}); - assertFalse("ZK shouldn't have connected yet", zk.getState().isConnected()); + assertFalse(zk.getState().isConnected(), "ZK shouldn't have connected yet"); try { BookKeeper bkc = new BookKeeper(conf, zk); fail("Shouldn't be able to construct with unconnected zk"); @@ -191,7 +218,7 @@ public void testAsyncReadWithError() throws Exception { final AtomicInteger result = new AtomicInteger(0); final CountDownLatch counter = new CountDownLatch(1); - // Try to write, we shoud get and error callback but not an exception + // Try to write, we should get and error callback but not an exception lh.asyncAddEntry("test".getBytes(), new AddCallback() { public void addComplete(int rc, LedgerHandle lh, long entryId, Object ctx) { result.set(rc); @@ -239,8 +266,8 @@ public void addComplete(int rc, LedgerHandle lh, long entryId, Object ctx) { } }; t.start(); - assertTrue("Close never completed", l.await(10, TimeUnit.SECONDS)); - assertTrue("Close was not successful", success.get()); + assertTrue(l.await(10, TimeUnit.SECONDS), "Close never completed"); + assertTrue(success.get(), "Close was not successful"); } } @@ -251,15 +278,15 @@ public void testIsClosed() throws Exception { BookKeeper bkc = new BookKeeper(conf); LedgerHandle lh = bkc.createLedger(digestType, "testPasswd".getBytes()); - Long lId = lh.getId(); + long lId = lh.getId(); lh.addEntry("000".getBytes()); boolean result = bkc.isClosed(lId); - assertTrue("Ledger shouldn't be flagged as closed!", !result); + assertFalse(result, "Ledger shouldn't be flagged as closed!"); lh.close(); result = bkc.isClosed(lId); - assertTrue("Ledger should be flagged as closed!", result); + assertTrue(result, "Ledger should be flagged as closed!"); bkc.close(); } @@ -323,9 +350,9 @@ public void testAutoCloseableBookKeeper() throws Exception { lh.addEntry("foobar".getBytes()); } } - assertTrue("Ledger should be closed!", bkc.isClosed(ledgerId)); + assertTrue(bkc.isClosed(ledgerId), "Ledger should be closed!"); } - assertTrue("BookKeeper should be closed!", bkc2.closed); + assertTrue(bkc2.closed, "BookKeeper should be closed!"); } @Test @@ -344,9 +371,8 @@ public void testReadAfterLastAddConfirmed() throws Exception { try (BookKeeper bkReader = new BookKeeper(clientConfiguration); LedgerHandle rlh = bkReader.openLedgerNoRecovery(ledgerId, digestType, "testPasswd".getBytes())) { - assertTrue( - "Expected LAC of rlh: " + (numOfEntries - 2) + " actual LAC of rlh: " + rlh.getLastAddConfirmed(), - (rlh.getLastAddConfirmed() == (numOfEntries - 2))); + assertTrue((rlh.getLastAddConfirmed() == (numOfEntries - 2)), "Expected LAC of rlh: " + + (numOfEntries - 2) + " actual LAC of rlh: " + rlh.getLastAddConfirmed()); assertFalse(writeLh.isClosed()); @@ -356,25 +382,23 @@ public void testReadAfterLastAddConfirmed() throws Exception { while (entries.hasMoreElements()) { LedgerEntry entry = entries.nextElement(); String entryString = new String(entry.getEntry()); - assertTrue("Expected entry String: " + ("foobar" + entryId) - + " actual entry String: " + entryString, - entryString.equals("foobar" + entryId)); + assertEquals(entryString, "foobar" + entryId, "Expected entry String: " + ("foobar" + entryId) + + " actual entry String: " + entryString); entryId++; } } try (BookKeeper bkReader = new BookKeeper(clientConfiguration); LedgerHandle rlh = bkReader.openLedgerNoRecovery(ledgerId, digestType, "testPasswd".getBytes())) { - assertTrue( - "Expected LAC of rlh: " + (numOfEntries - 2) + " actual LAC of rlh: " + rlh.getLastAddConfirmed(), - (rlh.getLastAddConfirmed() == (numOfEntries - 2))); + assertTrue((rlh.getLastAddConfirmed() == (numOfEntries - 2)), "Expected LAC of rlh: " + + (numOfEntries - 2) + " actual LAC of rlh: " + rlh.getLastAddConfirmed()); assertFalse(writeLh.isClosed()); // without readUnconfirmedEntries we are not able to read all of the entries try { rlh.readEntries(0, numOfEntries - 1); - fail("shoud not be able to read up to " + (numOfEntries - 1) + " with readEntries"); + fail("should not be able to read up to " + (numOfEntries - 1) + " with readEntries"); } catch (BKException.BKReadException expected) { } @@ -383,27 +407,24 @@ public void testReadAfterLastAddConfirmed() throws Exception { Collections.list(rlh.readEntries(0, rlh.getLastAddConfirmed())).size()); // assert local LAC does not change after reads - assertTrue( - "Expected LAC of rlh: " + (numOfEntries - 2) + " actual LAC of rlh: " + rlh.getLastAddConfirmed(), - (rlh.getLastAddConfirmed() == (numOfEntries - 2))); + assertTrue((rlh.getLastAddConfirmed() == (numOfEntries - 2)), "Expected LAC of rlh: " + + (numOfEntries - 2) + " actual LAC of rlh: " + rlh.getLastAddConfirmed()); // read all entries within the 0..LastAddConfirmed range with readUnconfirmedEntries assertEquals(rlh.getLastAddConfirmed() + 1, Collections.list(rlh.readUnconfirmedEntries(0, rlh.getLastAddConfirmed())).size()); // assert local LAC does not change after reads - assertTrue( - "Expected LAC of rlh: " + (numOfEntries - 2) + " actual LAC of rlh: " + rlh.getLastAddConfirmed(), - (rlh.getLastAddConfirmed() == (numOfEntries - 2))); + assertTrue((rlh.getLastAddConfirmed() == (numOfEntries - 2)), "Expected LAC of rlh: " + + (numOfEntries - 2) + " actual LAC of rlh: " + rlh.getLastAddConfirmed()); // read all entries within the LastAddConfirmed..numOfEntries - 1 range with readUnconfirmedEntries assertEquals(numOfEntries - rlh.getLastAddConfirmed(), Collections.list(rlh.readUnconfirmedEntries(rlh.getLastAddConfirmed(), numOfEntries - 1)).size()); // assert local LAC does not change after reads - assertTrue( - "Expected LAC of rlh: " + (numOfEntries - 2) + " actual LAC of rlh: " + rlh.getLastAddConfirmed(), - (rlh.getLastAddConfirmed() == (numOfEntries - 2))); + assertTrue((rlh.getLastAddConfirmed() == (numOfEntries - 2)), "Expected LAC of rlh: " + + (numOfEntries - 2) + " actual LAC of rlh: " + rlh.getLastAddConfirmed()); try { // read all entries within the LastAddConfirmed..numOfEntries range with readUnconfirmedEntries @@ -432,9 +453,8 @@ public void testReadAfterLastAddConfirmed() throws Exception { try (BookKeeper bkReader = new BookKeeper(clientConfiguration); LedgerHandle rlh = bkReader.openLedgerNoRecovery(ledgerId, digestType, "testPasswd".getBytes())) { - assertTrue( - "Expected LAC of rlh: " + (numOfEntries - 2) + " actual LAC of rlh: " + rlh.getLastAddConfirmed(), - (rlh.getLastAddConfirmed() == (numOfEntries - 2))); + assertTrue((rlh.getLastAddConfirmed() == (numOfEntries - 2)), "Expected LAC of rlh: " + + (numOfEntries - 2) + " actual LAC of rlh: " + rlh.getLastAddConfirmed()); assertFalse(writeLh.isClosed()); @@ -444,25 +464,23 @@ public void testReadAfterLastAddConfirmed() throws Exception { while (entries.hasMoreElements()) { LedgerEntry entry = entries.nextElement(); String entryString = new String(entry.getEntry()); - assertTrue("Expected entry String: " + ("foobar" + entryId) - + " actual entry String: " + entryString, - entryString.equals("foobar" + entryId)); + assertEquals(entryString, "foobar" + entryId, "Expected entry String: " + ("foobar" + entryId) + + " actual entry String: " + entryString); entryId++; } } try (BookKeeper bkReader = new BookKeeper(clientConfiguration); LedgerHandle rlh = bkReader.openLedgerNoRecovery(ledgerId, digestType, "testPasswd".getBytes())) { - assertTrue( - "Expected LAC of rlh: " + (numOfEntries - 2) + " actual LAC of rlh: " + rlh.getLastAddConfirmed(), - (rlh.getLastAddConfirmed() == (numOfEntries - 2))); + assertTrue((rlh.getLastAddConfirmed() == (numOfEntries - 2)), "Expected LAC of rlh: " + + (numOfEntries - 2) + " actual LAC of rlh: " + rlh.getLastAddConfirmed()); assertFalse(writeLh.isClosed()); // without readUnconfirmedEntries we are not able to read all of the entries try { rlh.readEntries(0, numOfEntries - 1); - fail("shoud not be able to read up to " + (numOfEntries - 1) + " with readEntries"); + fail("should not be able to read up to " + (numOfEntries - 1) + " with readEntries"); } catch (BKException.BKReadException expected) { } @@ -471,27 +489,24 @@ public void testReadAfterLastAddConfirmed() throws Exception { Collections.list(rlh.readEntries(0, rlh.getLastAddConfirmed())).size()); // assert local LAC does not change after reads - assertTrue( - "Expected LAC of rlh: " + (numOfEntries - 2) + " actual LAC of rlh: " + rlh.getLastAddConfirmed(), - (rlh.getLastAddConfirmed() == (numOfEntries - 2))); + assertTrue((rlh.getLastAddConfirmed() == (numOfEntries - 2)), "Expected LAC of rlh: " + + (numOfEntries - 2) + " actual LAC of rlh: " + rlh.getLastAddConfirmed()); // read all entries within the 0..LastAddConfirmed range with readUnconfirmedEntries assertEquals(rlh.getLastAddConfirmed() + 1, Collections.list(rlh.readUnconfirmedEntries(0, rlh.getLastAddConfirmed())).size()); // assert local LAC does not change after reads - assertTrue( - "Expected LAC of rlh: " + (numOfEntries - 2) + " actual LAC of rlh: " + rlh.getLastAddConfirmed(), - (rlh.getLastAddConfirmed() == (numOfEntries - 2))); + assertTrue((rlh.getLastAddConfirmed() == (numOfEntries - 2)), "Expected LAC of rlh: " + + (numOfEntries - 2) + " actual LAC of rlh: " + rlh.getLastAddConfirmed()); // read all entries within the LastAddConfirmed..numOfEntries - 1 range with readUnconfirmedEntries assertEquals(numOfEntries - rlh.getLastAddConfirmed(), Collections.list(rlh.readUnconfirmedEntries(rlh.getLastAddConfirmed(), numOfEntries - 1)).size()); // assert local LAC does not change after reads - assertTrue( - "Expected LAC of rlh: " + (numOfEntries - 2) + " actual LAC of rlh: " + rlh.getLastAddConfirmed(), - (rlh.getLastAddConfirmed() == (numOfEntries - 2))); + assertTrue((rlh.getLastAddConfirmed() == (numOfEntries - 2)), "Expected LAC of rlh: " + + (numOfEntries - 2) + " actual LAC of rlh: " + rlh.getLastAddConfirmed()); try { // read all entries within the LastAddConfirmed..numOfEntries range with readUnconfirmedEntries @@ -517,9 +532,8 @@ public void testReadAfterLastAddConfirmed() throws Exception { // open ledger with fencing, this will repair the ledger and make the last entry readable try (BookKeeper bkReader = new BookKeeper(clientConfiguration); LedgerHandle rlh = bkReader.openLedger(ledgerId, digestType, "testPasswd".getBytes())) { - assertTrue( - "Expected LAC of rlh: " + (numOfEntries - 1) + " actual LAC of rlh: " + rlh.getLastAddConfirmed(), - (rlh.getLastAddConfirmed() == (numOfEntries - 1))); + assertTrue((rlh.getLastAddConfirmed() == (numOfEntries - 1)), "Expected LAC of rlh: " + + (numOfEntries - 1) + " actual LAC of rlh: " + rlh.getLastAddConfirmed()); assertFalse(writeLh.isClosed()); @@ -529,19 +543,15 @@ public void testReadAfterLastAddConfirmed() throws Exception { while (entries.hasMoreElements()) { LedgerEntry entry = entries.nextElement(); String entryString = new String(entry.getEntry()); - assertTrue("Expected entry String: " + ("foobar" + entryId) - + " actual entry String: " + entryString, - entryString.equals("foobar" + entryId)); + assertEquals(entryString, "foobar" + entryId, "Expected entry String: " + ("foobar" + entryId) + + " actual entry String: " + entryString); entryId++; } } - try { - writeLh.close(); - fail("should not be able to close the first LedgerHandler as a recovery has been performed"); - } catch (BKException.BKMetadataVersionException expected) { - } - + // should still be able to close as long as recovery closed the ledger + // with the same last entryId and length as in the write handle. + writeLh.close(); } } @@ -590,6 +600,186 @@ public void testReadWriteWithV2WireProtocol() throws Exception { } } + @Test + public void testBatchReadFailBackToSingleRead1() throws Exception { + ClientConfiguration conf = new ClientConfiguration(); + conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + int numEntries = 100; + byte[] data = "foobar".getBytes(); + try (BookKeeper bkc = new BookKeeper(conf)) { + // basic read/write + { + long ledgerId; + try (LedgerHandle lh = bkc.createLedger(2, 2, 2, + digestType, "testPasswd".getBytes())) { + ledgerId = lh.getId(); + for (int i = 0; i < numEntries; i++) { + lh.addEntry(data); + } + } + try (LedgerHandle lh = bkc.openLedger(ledgerId, digestType, "testPasswd".getBytes())) { + assertEquals(numEntries - 1, lh.readLastConfirmed()); + //V3 protocol not support batch read. In theory, it will throw UnsupportedOperationException. + try { + lh.batchReadEntries(0, numEntries, 5 * 1024 * 1024); + fail("Should throw UnsupportedOperationException."); + } catch (UnsupportedOperationException e) { + assertEquals("Unsupported batch read entry operation for v3 protocol.", e.getMessage()); + } + } + } + } + + try (BookKeeper bkc = new BookKeeper(conf)) { + // basic read/write + { + long ledgerId; + try (LedgerHandle lh = bkc.createLedger(3, 2, 2, + digestType, "testPasswd".getBytes())) { + ledgerId = lh.getId(); + for (int i = 0; i < numEntries; i++) { + lh.addEntry(data); + } + } + try (LedgerHandle lh = bkc.openLedger(ledgerId, digestType, "testPasswd".getBytes())) { + assertEquals(numEntries - 1, lh.readLastConfirmed()); + //The ledger ensemble is not equals write quorum, so failback to single read, it also can + //read data successfully. + for (Enumeration readEntries = lh.batchReadEntries(0, numEntries, 5 * 1024 * 1024); + readEntries.hasMoreElements();) { + LedgerEntry entry = readEntries.nextElement(); + assertArrayEquals(data, entry.getEntry()); + } + } + } + } + } + + @Test + public void testBatchReadFailBackToSingleRead2() throws Exception { + ClientConfiguration conf = new ClientConfiguration(); + conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + int numEntries = 100; + byte[] data = "foobar".getBytes(); + try (BookKeeper bkc = new BookKeeper(conf)) { + // basic read/write + { + long ledgerId; + try (LedgerHandle lh = bkc.createLedger(2, 2, 2, + digestType, "testPasswd".getBytes())) { + ledgerId = lh.getId(); + for (int i = 0; i < numEntries; i++) { + lh.addEntry(data); + } + } + try (LedgerHandle lh = bkc.openLedger(ledgerId, digestType, "testPasswd".getBytes())) { + assertEquals(numEntries - 1, lh.readLastConfirmed()); + //V3 protocol not support batch read, it will throw UnsupportedOperationException. + try { + lh.batchReadEntries(0, numEntries, 5 * 1024 * 1024); + fail("Should throw UnsupportedOperationException."); + } catch (UnsupportedOperationException e) { + assertEquals("Unsupported batch read entry operation for v3 protocol.", e.getMessage()); + } + } + } + } + + conf.setBatchReadEnabled(false); + try (BookKeeper bkc = new BookKeeper(conf)) { + // basic read/write + { + long ledgerId; + try (LedgerHandle lh = bkc.createLedger(2, 2, 2, + digestType, "testPasswd".getBytes())) { + ledgerId = lh.getId(); + for (int i = 0; i < numEntries; i++) { + lh.addEntry(data); + } + } + try (LedgerHandle lh = bkc.openLedger(ledgerId, digestType, "testPasswd".getBytes())) { + assertEquals(numEntries - 1, lh.readLastConfirmed()); + //We config disable the batch read, so failback to single read, it also can + //read data successfully. + for (Enumeration readEntries = lh.batchReadEntries(0, numEntries, 5 * 1024 * 1024); + readEntries.hasMoreElements();) { + LedgerEntry entry = readEntries.nextElement(); + assertArrayEquals(data, entry.getEntry()); + } + } + } + } + } + + @Test + public void testBatchReadWithV2Protocol() throws Exception { + ClientConfiguration conf = new ClientConfiguration().setUseV2WireProtocol(true); + conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + int numEntries = 100; + byte[] data = "foobar".getBytes(); + try (BookKeeper bkc = new BookKeeper(conf)) { + // basic read/write + { + long ledgerId; + try (LedgerHandle lh = bkc.createLedger(2, 2, 2, digestType, "testPasswd".getBytes())) { + ledgerId = lh.getId(); + for (int i = 0; i < numEntries; i++) { + lh.addEntry(data); + } + } + try (LedgerHandle lh = bkc.openLedger(ledgerId, digestType, "testPasswd".getBytes())) { + assertEquals(numEntries - 1, lh.readLastConfirmed()); + int entries = 0; + for (Enumeration readEntries = lh.batchReadEntries(0, numEntries, 5 * 1024 * 1024); + readEntries.hasMoreElements();) { + LedgerEntry entry = readEntries.nextElement(); + assertArrayEquals(data, entry.getEntry()); + entries++; + } + assertEquals(numEntries, entries); + + //The maxCount is 0, the result is only limited by maxSize. + entries = 0; + for (Enumeration readEntries = lh.batchReadEntries(0, 0, 5 * 1024 * 1024); + readEntries.hasMoreElements();) { + LedgerEntry entry = readEntries.nextElement(); + assertArrayEquals(data, entry.getEntry()); + entries++; + } + assertEquals(numEntries, entries); + + // one entry size = 8(ledgerId) + 8(entryId) + 8(lac) + 8(length) + 8(digest) + payload size + long entrySize = 8 + 8 + 8 + 8 + 8 + data.length; + //response header size. + int headerSize = 24 + 8 + 4; + //The maxCount is 0, the result is only limited by maxSize. + entries = 0; + int expectEntriesNum = 5; + for (Enumeration readEntries = lh.batchReadEntries(0, 0, + expectEntriesNum * entrySize + headerSize + (expectEntriesNum * 4)); + readEntries.hasMoreElements();) { + LedgerEntry entry = readEntries.nextElement(); + assertArrayEquals(data, entry.getEntry()); + entries++; + } + assertEquals(expectEntriesNum, entries); + + //The maxCount is 100, the result entries reach maxSize limit. + entries = 0; + for (Enumeration readEntries = lh.batchReadEntries(0, 20, + expectEntriesNum * entrySize + headerSize + (expectEntriesNum * 4)); + readEntries.hasMoreElements();) { + LedgerEntry entry = readEntries.nextElement(); + assertArrayEquals(data, entry.getEntry()); + entries++; + } + assertEquals(expectEntriesNum, entries); + } + } + } + } + + @SuppressWarnings("deprecation") @Test public void testReadEntryReleaseByteBufs() throws Exception { ClientConfiguration confWriter = new ClientConfiguration(); @@ -659,8 +849,8 @@ public void testReadEntryReleaseByteBufs() throws Exception { for (Enumeration readEntries = lh.readEntries(0, numEntries - 1); readEntries.hasMoreElements();) { LedgerEntry entry = readEntries.nextElement(); - assertTrue("Can't release entry " + entry.getEntryId() + ": ref = " + entry.data.refCnt(), - entry.data.release()); + assertTrue(entry.data.release(), + "Can't release entry " + entry.getEntryId() + ": ref = " + entry.data.refCnt()); try { assertFalse(entry.data.release()); fail("ByteBuf already released"); @@ -684,8 +874,8 @@ public void testReadEntryReleaseByteBufs() throws Exception { readEntries.hasMoreElements();) { LedgerEntry entry = readEntries.nextElement(); // ButeBufs not reference counter - assertTrue("Can't release entry " + entry.getEntryId() + ": ref = " + entry.data.refCnt(), - entry.data.release()); + assertTrue(entry.data.release(), + "Can't release entry " + entry.getEntryId() + ": ref = " + entry.data.refCnt()); try { assertFalse(entry.data.release()); fail("ByteBuf already released"); @@ -784,11 +974,11 @@ public void readComplete(int rc, LedgerHandle lh, bkc.close(); } - @Test(expected = BKIllegalOpException.class) + @Test public void testCannotUseWriteFlagsOnV2Protocol() throws Exception { ClientConfiguration conf = new ClientConfiguration(baseClientConf); conf.setUseV2WireProtocol(true); - try (BookKeeperTestClient bkc = new BookKeeperTestClient(conf);) { + try (BookKeeperTestClient bkc = new BookKeeperTestClient(conf)) { try (WriteHandle wh = result(bkc.newCreateLedgerOp() .withEnsembleSize(3) .withWriteQuorumSize(3) @@ -796,16 +986,17 @@ public void testCannotUseWriteFlagsOnV2Protocol() throws Exception { .withPassword("".getBytes()) .withWriteFlags(WriteFlag.DEFERRED_SYNC) .execute())) { - result(wh.appendAsync("test".getBytes())); + Assertions.assertThrows(BKException.BKIllegalOpException.class, + () -> result(wh.appendAsync("test".getBytes()))); } } } - @Test(expected = BKIllegalOpException.class) + @Test public void testCannotUseForceOnV2Protocol() throws Exception { ClientConfiguration conf = new ClientConfiguration(baseClientConf); conf.setUseV2WireProtocol(true); - try (BookKeeperTestClient bkc = new BookKeeperTestClient(conf);) { + try (BookKeeperTestClient bkc = new BookKeeperTestClient(conf)) { try (WriteHandle wh = result(bkc.newCreateLedgerOp() .withEnsembleSize(3) .withWriteQuorumSize(3) @@ -813,9 +1004,298 @@ public void testCannotUseForceOnV2Protocol() throws Exception { .withPassword("".getBytes()) .withWriteFlags(WriteFlag.NONE) .execute())) { - result(wh.appendAsync("".getBytes())); - result(wh.force()); + result(wh.appendAsync("".getBytes())); + Assertions.assertThrows(BKException.BKIllegalOpException.class, + () -> result(wh.force())); + } + } + } + + class MockZooKeeperClient extends ZooKeeperClient { + class MockZooKeeper extends ZooKeeper { + public MockZooKeeper(String connectString, int sessionTimeout, Watcher watcher, boolean canBeReadOnly) + throws IOException { + super(connectString, sessionTimeout, watcher, canBeReadOnly); + } + + @Override + public void create(final String path, byte[] data, List acl, CreateMode createMode, StringCallback cb, + Object ctx) { + StringCallback injectedCallback = new StringCallback() { + @Override + public void processResult(int rc, String path, Object ctx, String name) { + /** + * if ledgerIdToInjectFailure matches with the path of + * the node, then throw CONNECTIONLOSS error and then + * reset it to INVALID_LEDGERID. + */ + if (path.contains(ledgerIdToInjectFailure.toString())) { + ledgerIdToInjectFailure.set(INVALID_LEDGERID); + cb.processResult(KeeperException.Code.CONNECTIONLOSS.intValue(), path, ctx, name); + } else { + cb.processResult(rc, path, ctx, name); + } + } + }; + super.create(path, data, acl, createMode, injectedCallback, ctx); } } + + private final String connectString; + private final int sessionTimeoutMs; + private final ZooKeeperWatcherBase watcherManager; + private final AtomicLong ledgerIdToInjectFailure; + + MockZooKeeperClient(String connectString, int sessionTimeoutMs, ZooKeeperWatcherBase watcher, + AtomicLong ledgerIdToInjectFailure) throws IOException { + /* + * in OperationalRetryPolicy maxRetries is > 0. So in case of any + * RecoverableException scenario, it will retry. + */ + super(connectString, sessionTimeoutMs, watcher, + new BoundExponentialBackoffRetryPolicy(sessionTimeoutMs, sessionTimeoutMs, Integer.MAX_VALUE), + new BoundExponentialBackoffRetryPolicy(sessionTimeoutMs, sessionTimeoutMs, 3), + NullStatsLogger.INSTANCE, 1, 0, false); + this.connectString = connectString; + this.sessionTimeoutMs = sessionTimeoutMs; + this.watcherManager = watcher; + this.ledgerIdToInjectFailure = ledgerIdToInjectFailure; + } + + @Override + protected ZooKeeper createZooKeeper() throws IOException { + return new MockZooKeeper(this.connectString, this.sessionTimeoutMs, this.watcherManager, false); + } } + + @Test + public void testZKConnectionLossForLedgerCreation() throws Exception { + int zkSessionTimeOut = 10000; + AtomicLong ledgerIdToInjectFailure = new AtomicLong(INVALID_LEDGERID); + ZooKeeperWatcherBase zooKeeperWatcherBase = new ZooKeeperWatcherBase(zkSessionTimeOut, false, + NullStatsLogger.INSTANCE); + MockZooKeeperClient zkFaultInjectionWrapper = new MockZooKeeperClient(zkUtil.getZooKeeperConnectString(), + zkSessionTimeOut, zooKeeperWatcherBase, ledgerIdToInjectFailure); + zkFaultInjectionWrapper.waitForConnection(); + assertEquals(States.CONNECTED, zkFaultInjectionWrapper.getState(), + "zkFaultInjectionWrapper should be in connected state"); + BookKeeper bk = new BookKeeper(baseClientConf, zkFaultInjectionWrapper); + long oldZkInstanceSessionId = zkFaultInjectionWrapper.getSessionId(); + long ledgerId = 567L; + LedgerHandle lh = bk.createLedgerAdv(ledgerId, 1, 1, 1, DigestType.CRC32, "".getBytes(), null); + lh.close(); + + /* + * trigger Expired event so that MockZooKeeperClient would run + * 'clientCreator' and create new zk handle. In this case it would + * create MockZooKeeper. + */ + zooKeeperWatcherBase.process(new WatchedEvent(EventType.None, KeeperState.Expired, "")); + zkFaultInjectionWrapper.waitForConnection(); + for (int i = 0; i < 10; i++) { + if (zkFaultInjectionWrapper.getState() == States.CONNECTED) { + break; + } + Thread.sleep(200); + } + assertEquals(States.CONNECTED, zkFaultInjectionWrapper.getState(), + "zkFaultInjectionWrapper should be in connected state"); + assertNotEquals(oldZkInstanceSessionId, zkFaultInjectionWrapper.getSessionId(), + "Session Id of old and new ZK instance should be different"); + ledgerId++; + ledgerIdToInjectFailure.set(ledgerId); + /** + * ledgerIdToInjectFailure is set to 'ledgerId', so zookeeper.create + * would return CONNECTIONLOSS error for the first time and when it is + * retried, as expected it would return NODEEXISTS error. + * + * AbstractZkLedgerManager.createLedgerMetadata should deal with this + * scenario appropriately. + */ + lh = bk.createLedgerAdv(ledgerId, 1, 1, 1, DigestType.CRC32, "".getBytes(), null); + lh.close(); + assertEquals(INVALID_LEDGERID, ledgerIdToInjectFailure.get(), + "injectZnodeCreationNoNodeFailure should have been reset it to INVALID_LEDGERID"); + lh = bk.openLedger(ledgerId, DigestType.CRC32, "".getBytes()); + lh.close(); + ledgerId++; + lh = bk.createLedgerAdv(ledgerId, 1, 1, 1, DigestType.CRC32, "".getBytes(), null); + lh.close(); + bk.close(); + } + + @Test + public void testLedgerDeletionIdempotency() throws Exception { + BookKeeper bk = new BookKeeper(baseClientConf); + long ledgerId = 789L; + LedgerHandle lh = bk.createLedgerAdv(ledgerId, 1, 1, 1, DigestType.CRC32, "".getBytes(), null); + lh.close(); + bk.deleteLedger(ledgerId); + bk.deleteLedger(ledgerId); + bk.close(); + } + + /** + * Mock of RackawareEnsemblePlacementPolicy. Overrides areAckedBookiesAdheringToPlacementPolicy to only return true + * when ackedBookies consists of writeQuorumSizeToUseForTesting bookies. + */ + public static class MockRackawareEnsemblePlacementPolicy extends RackawareEnsemblePlacementPolicy { + private int writeQuorumSizeToUseForTesting; + private CountDownLatch conditionFirstInvocationLatch; + + void setWriteQuorumSizeToUseForTesting(int writeQuorumSizeToUseForTesting) { + this.writeQuorumSizeToUseForTesting = writeQuorumSizeToUseForTesting; + } + + void setConditionFirstInvocationLatch(CountDownLatch conditionFirstInvocationLatch) { + this.conditionFirstInvocationLatch = conditionFirstInvocationLatch; + } + + @Override + public boolean areAckedBookiesAdheringToPlacementPolicy(Set ackedBookies, + int writeQuorumSize, + int ackQuorumSize) { + conditionFirstInvocationLatch.countDown(); + return ackedBookies.size() == writeQuorumSizeToUseForTesting; + } + } + + /** + * Test to verify that PendingAddOp waits for success condition from areAckedBookiesAdheringToPlacementPolicy + * before returning success to client. Also tests working of WRITE_DELAYED_DUE_TO_NOT_ENOUGH_FAULT_DOMAINS and + * WRITE_TIMED_OUT_DUE_TO_NOT_ENOUGH_FAULT_DOMAINS counters. + */ + @Test + public void testEnforceMinNumFaultDomainsForWrite() throws Exception { + byte[] data = "foobar".getBytes(); + byte[] password = "testPasswd".getBytes(); + + startNewBookie(); + startNewBookie(); + + ClientConfiguration conf = new ClientConfiguration(); + conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + conf.setEnsemblePlacementPolicy(MockRackawareEnsemblePlacementPolicy.class); + + conf.setAddEntryTimeout(2); + conf.setAddEntryQuorumTimeout(4); + conf.setEnforceMinNumFaultDomainsForWrite(true); + + TestStatsProvider statsProvider = new TestStatsProvider(); + + // Abnormal values for testing to prevent timeouts + BookKeeperTestClient bk = new BookKeeperTestClient(conf, statsProvider); + StatsLogger statsLogger = bk.getStatsLogger(); + + int ensembleSize = 3; + int writeQuorumSize = 3; + int ackQuorumSize = 2; + + CountDownLatch countDownLatch = new CountDownLatch(1); + MockRackawareEnsemblePlacementPolicy currPlacementPolicy = + (MockRackawareEnsemblePlacementPolicy) bk.getPlacementPolicy(); + currPlacementPolicy.setConditionFirstInvocationLatch(countDownLatch); + currPlacementPolicy.setWriteQuorumSizeToUseForTesting(writeQuorumSize); + + BookieId bookieToSleep; + + try (LedgerHandle lh = bk.createLedger(ensembleSize, writeQuorumSize, ackQuorumSize, digestType, password)) { + CountDownLatch sleepLatchCase1 = new CountDownLatch(1); + CountDownLatch sleepLatchCase2 = new CountDownLatch(1); + + // Put all non ensemble bookies to sleep + LOG.info("Putting all non ensemble bookies to sleep."); + for (BookieId addr : bookieAddresses()) { + try { + if (!lh.getCurrentEnsemble().contains(addr)) { + sleepBookie(addr, sleepLatchCase2); + } + } catch (UnknownHostException ignored) {} + } + + Thread writeToLedger = new Thread(() -> { + try { + LOG.info("Initiating write for entry"); + long entryId = lh.addEntry(data); + LOG.info("Wrote entry with entryId = {}", entryId); + } catch (InterruptedException | BKException ignored) { + } + }); + + bookieToSleep = lh.getCurrentEnsemble().get(0); + + LOG.info("Putting picked bookie to sleep"); + sleepBookie(bookieToSleep, sleepLatchCase1); + + assertEquals(statsLogger + .getCounter(WRITE_DELAYED_DUE_TO_NOT_ENOUGH_FAULT_DOMAINS) + .get() + .longValue(), 0); + + // Trying to write entry + writeToLedger.start(); + + // Waiting and checking to make sure that write has not succeeded + countDownLatch.await(conf.getAddEntryTimeout(), TimeUnit.SECONDS); + assertEquals(-1, lh.lastAddConfirmed, "Write succeeded but should not have"); + + // Wake the bookie + sleepLatchCase1.countDown(); + + // Waiting and checking to make sure that write has succeeded + writeToLedger.join(conf.getAddEntryTimeout() * 1000); + assertEquals(0, lh.lastAddConfirmed, "Write did not succeed but should have"); + + assertEquals(statsLogger + .getCounter(WRITE_DELAYED_DUE_TO_NOT_ENOUGH_FAULT_DOMAINS) + .get() + .longValue(), 1); + + // AddEntry thread for second scenario + Thread writeToLedger2 = new Thread(() -> { + try { + LOG.info("Initiating write for entry"); + long entryId = lh.addEntry(data); + LOG.info("Wrote entry with entryId = {}", entryId); + } catch (InterruptedException | BKException ignored) { + } + }); + + bookieToSleep = lh.getCurrentEnsemble().get(1); + + LOG.info("Putting picked bookie to sleep"); + sleepBookie(bookieToSleep, sleepLatchCase2); + + // Trying to write entry + writeToLedger2.start(); + + // Waiting and checking to make sure that write has failed + writeToLedger2.join((conf.getAddEntryQuorumTimeout() + 2) * 1000); + assertEquals(0, lh.lastAddConfirmed, "Write succeeded but should not have"); + + sleepLatchCase2.countDown(); + + assertEquals(statsLogger.getCounter(WRITE_DELAYED_DUE_TO_NOT_ENOUGH_FAULT_DOMAINS).get().longValue(), + 2); + + assertEquals(statsLogger.getCounter(WRITE_TIMED_OUT_DUE_TO_NOT_ENOUGH_FAULT_DOMAINS).get().longValue(), + 1); + } + } + + @Test + public void testBookieAddressResolverPassedToDNSToSwitchMapping() throws Exception { + ClientConfiguration conf = new ClientConfiguration(); + conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + + StaticDNSResolver tested = new StaticDNSResolver(); + try (BookKeeper bkc = BookKeeper + .forConfig(conf) + .dnsResolver(tested) + .build()) { + bkc.createLedger(digestType, "testPasswd".getBytes()).close(); + assertSame(bkc.getBookieAddressResolver(), tested.getBookieAddressResolver()); + } + } + } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperTestClient.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperTestClient.java index 253f0d9091c..d9917c4ec96 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperTestClient.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookKeeperTestClient.java @@ -20,16 +20,16 @@ */ package org.apache.bookkeeper.client; +import io.netty.buffer.UnpooledByteBufAllocator; import java.io.IOException; import java.util.concurrent.CompletableFuture; import java.util.concurrent.Future; - import lombok.extern.slf4j.Slf4j; - import org.apache.bookkeeper.common.concurrent.FutureUtils; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.discover.RegistrationClient.RegistrationListener; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.meta.zk.ZKMetadataClientDriver; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookieClient; import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.test.TestStatsProvider; @@ -45,19 +45,25 @@ public class BookKeeperTestClient extends BookKeeper { public BookKeeperTestClient(ClientConfiguration conf, TestStatsProvider statsProvider) throws IOException, InterruptedException, BKException { - super(conf, null, null, + super(conf, null, null, new UnpooledByteBufAllocator(false), statsProvider == null ? NullStatsLogger.INSTANCE : statsProvider.getStatsLogger(""), null, null, null); this.statsProvider = statsProvider; } + public BookKeeperTestClient(ClientConfiguration conf, ZooKeeper zkc) + throws IOException, InterruptedException, BKException { + super(conf, zkc, null, new UnpooledByteBufAllocator(false), + NullStatsLogger.INSTANCE, null, null, null); + } + public BookKeeperTestClient(ClientConfiguration conf) throws InterruptedException, BKException, IOException { - this(conf, null); + this(conf, (TestStatsProvider) null); } public ZooKeeper getZkHandle() { - return super.getZkHandle(); + return ((ZKMetadataClientDriver) metadataDriver).getZk(); } public ClientConfiguration getConf() { @@ -68,12 +74,12 @@ public BookieClient getBookieClient() { return bookieClient; } - public Future waitForReadOnlyBookie(BookieSocketAddress b) + public Future waitForReadOnlyBookie(BookieId b) throws Exception { return waitForBookieInSet(b, false); } - public Future waitForWritableBookie(BookieSocketAddress b) + public Future waitForWritableBookie(BookieId b) throws Exception { return waitForBookieInSet(b, true); } @@ -83,7 +89,7 @@ public Future waitForWritableBookie(BookieSocketAddress b) * or the read only set of bookies. Also ensure that it doesn't exist * in the other set before completing. */ - private Future waitForBookieInSet(BookieSocketAddress b, + private Future waitForBookieInSet(BookieId b, boolean writable) throws Exception { log.info("Wait for {} to become {}", b, writable ? "writable" : "readonly"); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieAddressResolverDisabledTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieAddressResolverDisabledTest.java new file mode 100644 index 00000000000..fdf608b9a14 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieAddressResolverDisabledTest.java @@ -0,0 +1,57 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.client; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.proto.BookieAddressResolver; +import org.junit.jupiter.api.Test; + +/** + * Unit test of {@link BookieAddressResolverDisabled}. + */ +public class BookieAddressResolverDisabledTest { + + @Test + public void testResolve() { + BookieAddressResolver resolver = new BookieAddressResolverDisabled(); + + BookieSocketAddress addr1 = resolver.resolve(BookieId.parse("127.0.0.1:3181")); + assertEquals("127.0.0.1", addr1.getHostName()); + assertEquals(3181, addr1.getPort()); + + BookieSocketAddress addr2 = resolver.resolve(BookieId.parse("localhost:3182")); + assertEquals("localhost", addr2.getHostName()); + assertEquals(3182, addr2.getPort()); + + try { + resolver.resolve(BookieId.parse("foobar")); + fail("Non-legacy style bookie id should fail to resolve address"); + } catch (Exception e) { + assertTrue(e instanceof BookieAddressResolver.BookieIdNotResolvedException); + } + } + +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieDecommissionTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieDecommissionTest.java index 906db8f5cc8..84ddd06712b 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieDecommissionTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieDecommissionTest.java @@ -18,21 +18,23 @@ */ package org.apache.bookkeeper.client; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.fail; import java.util.Iterator; - +import java.util.LinkedList; +import java.util.List; import lombok.extern.slf4j.Slf4j; -import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.BookieImpl; import org.apache.bookkeeper.client.BKException.BKIllegalOpException; import org.apache.bookkeeper.client.BookKeeper.DigestType; -import org.apache.bookkeeper.common.testing.annotations.FlakyTest; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.meta.UnderreplicatedLedger; import org.apache.bookkeeper.meta.ZkLedgerUnderreplicationManager; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; -import org.junit.Test; +import org.junit.jupiter.api.Test; /** * Unit test of bookie decommission operations. @@ -41,24 +43,27 @@ public class BookieDecommissionTest extends BookKeeperClusterTestCase { private static final int NUM_OF_BOOKIES = 6; - private static DigestType digestType = DigestType.CRC32; + private static final DigestType digestType = DigestType.CRC32; private static final String PASSWORD = "testPasswd"; public BookieDecommissionTest() { super(NUM_OF_BOOKIES, 480); - baseConf.setOpenLedgerRereplicationGracePeriod(String.valueOf(30000)); + baseConf.setOpenLedgerRereplicationGracePeriod(String.valueOf(1000)); setAutoRecoveryEnabled(true); } - @FlakyTest("https://github.com/apache/bookkeeper/issues/502") + @Test public void testDecommissionBookie() throws Exception { ZkLedgerUnderreplicationManager urLedgerMgr = new ZkLedgerUnderreplicationManager(baseClientConf, zkc); BookKeeperAdmin bkAdmin = new BookKeeperAdmin(zkUtil.getZooKeeperConnectString()); + List ledgerIds = new LinkedList<>(); + int numOfLedgers = 2 * NUM_OF_BOOKIES; int numOfEntries = 2 * NUM_OF_BOOKIES; for (int i = 0; i < numOfLedgers; i++) { LedgerHandle lh = bkc.createLedger(3, 2, digestType, PASSWORD.getBytes()); + ledgerIds.add(lh.getId()); for (int j = 0; j < numOfEntries; j++) { lh.addEntry("entry".getBytes()); } @@ -69,6 +74,7 @@ public void testDecommissionBookie() throws Exception { */ for (int i = 0; i < numOfLedgers; i++) { LedgerHandle emptylh = bkc.createLedger(3, 2, digestType, PASSWORD.getBytes()); + ledgerIds.add(emptylh.getId()); emptylh.close(); } @@ -77,7 +83,7 @@ public void testDecommissionBookie() throws Exception { * if we try to call decommissionBookie for a bookie which is not * shutdown, then it should throw BKIllegalOpException */ - bkAdmin.decommissionBookie(bs.get(0).getLocalAddress()); + bkAdmin.decommissionBookie(addressByIndex(0)); fail("Expected BKIllegalOpException because that bookie is not shutdown yet"); } catch (BKIllegalOpException bkioexc) { // expected IllegalException @@ -88,9 +94,9 @@ public void testDecommissionBookie() throws Exception { * this decommisionBookie should make sure that there are no * underreplicated ledgers because of this bookie */ - bkAdmin.decommissionBookie(Bookie.getBookieAddress(killedBookieConf)); + bkAdmin.decommissionBookie(BookieImpl.getBookieId(killedBookieConf)); bkAdmin.triggerAudit(); - Thread.sleep(500); + Thread.sleep(5000); Iterator ledgersToRereplicate = urLedgerMgr.listLedgersToRereplicate(null); if (ledgersToRereplicate.hasNext()) { while (ledgersToRereplicate.hasNext()) { @@ -101,9 +107,9 @@ public void testDecommissionBookie() throws Exception { } killedBookieConf = killBookie(0); - bkAdmin.decommissionBookie(Bookie.getBookieAddress(killedBookieConf)); + bkAdmin.decommissionBookie(BookieImpl.getBookieId(killedBookieConf)); bkAdmin.triggerAudit(); - Thread.sleep(500); + Thread.sleep(5000); ledgersToRereplicate = urLedgerMgr.listLedgersToRereplicate(null); if (ledgersToRereplicate.hasNext()) { while (ledgersToRereplicate.hasNext()) { @@ -113,6 +119,10 @@ public void testDecommissionBookie() throws Exception { fail("There are not supposed to be any underreplicatedledgers"); } bkAdmin.close(); + + for (Long id: ledgerIds) { + verifyNoFragmentsOnBookie(id, BookieImpl.getBookieId(killedBookieConf)); + } } @Test @@ -132,11 +142,16 @@ public void testDecommissionForLedgersWithMultipleSegmentsAndNotWriteClosed() th lh4.addEntry(j, "data".getBytes()); } + // avoiding autorecovery fencing the ledger + servers.forEach(srv -> srv.stopAutoRecovery()); + startNewBookie(); - assertEquals("Number of Available Bookies", NUM_OF_BOOKIES + 1, bkAdmin.getAvailableBookies().size()); + assertEquals(NUM_OF_BOOKIES + 1, bkAdmin.getAvailableBookies().size(), "Number of Available Bookies"); - ServerConfiguration killedBookieConf = killBookie(0); + BookieId killedBookieId = getBookie(0); + log.warn("Killing bookie {}", killedBookieId); + killBookie(0); /* * since one of the bookie is killed, ensemble change happens when next @@ -154,16 +169,24 @@ public void testDecommissionForLedgersWithMultipleSegmentsAndNotWriteClosed() th lh1.close(); lh2.close(); + servers.forEach(srv -> { + try { + srv.startAutoRecovery(); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + /* * If the last fragment of the ledger is underreplicated and if the * ledger is not closed then it will remain underreplicated for - * openLedgerRereplicationGracePeriod (by default 30 secs). For more + * openLedgerRereplicationGracePeriod (by default 30 secs, 1 in the test). For more * info. Check BOOKKEEPER-237 and BOOKKEEPER-325. But later * ReplicationWorker will fence the ledger. */ - bkAdmin.decommissionBookie(Bookie.getBookieAddress(killedBookieConf)); + bkAdmin.decommissionBookie(killedBookieId); bkAdmin.triggerAudit(); - Thread.sleep(500); + Thread.sleep(5000); Iterator ledgersToRereplicate = urLedgerMgr.listLedgersToRereplicate(null); if (ledgersToRereplicate.hasNext()) { while (ledgersToRereplicate.hasNext()) { @@ -173,6 +196,73 @@ public void testDecommissionForLedgersWithMultipleSegmentsAndNotWriteClosed() th fail("There are not supposed to be any underreplicatedledgers"); } bkAdmin.close(); + + verifyNoFragmentsOnBookie(1L, killedBookieId); + verifyNoFragmentsOnBookie(2L, killedBookieId); + verifyNoFragmentsOnBookie(3L, killedBookieId); + verifyNoFragmentsOnBookie(4L, killedBookieId); + } + + @Test + public void testDecommissionForEmptyLedgers() throws Exception { + ZkLedgerUnderreplicationManager urLedgerMgr = new ZkLedgerUnderreplicationManager(baseClientConf, zkc); + BookKeeperAdmin bkAdmin = new BookKeeperAdmin(zkUtil.getZooKeeperConnectString()); + + LedgerHandle lh1 = bkc.createLedgerAdv(1L, numBookies, numBookies - 1, numBookies - 1, + digestType, PASSWORD.getBytes(), null); + LedgerHandle lh2 = bkc.createLedgerAdv(2L, numBookies, numBookies - 1, numBookies - 1, + digestType, PASSWORD.getBytes(), null); + LedgerHandle lh3 = bkc.createLedgerAdv(3L, numBookies, numBookies - 1, numBookies - 1, + digestType, PASSWORD.getBytes(), null); + LedgerHandle lh4 = bkc.createLedgerAdv(4L, numBookies, numBookies - 1, numBookies - 1, + digestType, PASSWORD.getBytes(), null); + + lh1.close(); + lh2.close(); + + startNewBookie(); + + assertEquals(NUM_OF_BOOKIES + 1, bkAdmin.getAvailableBookies().size(), "Number of Available Bookies"); + + BookieId killedBookieId = getBookie(0); + log.warn("Killing bookie {}", killedBookieId); + killBookie(0); + assertEquals(NUM_OF_BOOKIES, bkAdmin.getAvailableBookies().size(), "Number of Available Bookies"); + + bkAdmin.decommissionBookie(killedBookieId); + bkAdmin.triggerAudit(); + Thread.sleep(5000); + Iterator ledgersToRereplicate = urLedgerMgr.listLedgersToRereplicate(null); + if (ledgersToRereplicate.hasNext()) { + while (ledgersToRereplicate.hasNext()) { + long ledgerId = ledgersToRereplicate.next().getLedgerId(); + log.error("Ledger: {} is underreplicated which is not expected. {}", + ledgerId, ledgersToRereplicate.next().getReplicaList()); + } + fail("There are not supposed to be any underreplicatedledgers"); + } + bkAdmin.close(); + + verifyNoFragmentsOnBookie(1L, killedBookieId); + verifyNoFragmentsOnBookie(2L, killedBookieId); + verifyNoFragmentsOnBookie(3L, killedBookieId); + verifyNoFragmentsOnBookie(4L, killedBookieId); + + lh3.close(); + lh4.close(); + } + + private void verifyNoFragmentsOnBookie(long ledgerId, BookieId bookieId) throws BKException, InterruptedException { + LedgerHandle lh = bkc.openLedgerNoRecovery(ledgerId, digestType, PASSWORD.getBytes()); + log.error("Ledger {} metadata: {}", ledgerId, lh.getLedgerMetadata()); + + lh.getLedgerMetadata().getAllEnsembles().forEach((num, bookies) -> { + bookies.forEach(id -> { + assertNotEquals(bookieId, id); + }); + }); + + lh.close(); } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieNetworkAddressChangeTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieNetworkAddressChangeTest.java new file mode 100644 index 00000000000..0a6a9869201 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieNetworkAddressChangeTest.java @@ -0,0 +1,171 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.client; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.fail; + +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.client.BKException.BKBookieHandleNotAvailableException; +import org.apache.bookkeeper.client.api.BookKeeper; +import org.apache.bookkeeper.client.api.LedgerEntries; +import org.apache.bookkeeper.client.api.ReadHandle; +import org.apache.bookkeeper.client.api.WriteHandle; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.discover.ZKRegistrationClient; +import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +/** + * Tests of the main BookKeeper client and the BP-41 bookieAddressTracking feature. + */ +@Slf4j +public class BookieNetworkAddressChangeTest extends BookKeeperClusterTestCase { + + public BookieNetworkAddressChangeTest() { + super(1); + this.useUUIDasBookieId = true; + } + + @Test + public void testFollowBookieAddressChange() throws Exception { + ClientConfiguration conf = new ClientConfiguration(); + conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + try (BookKeeper bkc = BookKeeper.newBuilder(conf) + .build();) { + long lId; + try (WriteHandle h = bkc + .newCreateLedgerOp() + .withAckQuorumSize(1) + .withEnsembleSize(1) + .withWriteQuorumSize(1) + .withPassword(new byte[0]) + .execute() + .get();) { + lId = h.getId(); + h.append("foo".getBytes("utf-8")); + } + + // restart bookie, change port + // on metadata we have a bookieId, not the network address + restartBookies(c -> c); + + try (ReadHandle h = bkc + .newOpenLedgerOp() + .withLedgerId(lId) + .withRecovery(true) + .withPassword(new byte[0]) + .execute() + .get()) { + assertEquals(0, h.getLastAddConfirmed()); + try (LedgerEntries entries = h.read(0, 0);) { + assertEquals("foo", new String(entries.getEntry(0).getEntryBytes(), "utf-8")); + } + } + } + } + + @Test + @Disabled("PLSR-1850 Seems like restart of the bookie always comes up on same port hence failing this test") + public void testFollowBookieAddressChangeTrckingDisabled() throws Exception { + ClientConfiguration conf = new ClientConfiguration(); + conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + conf.setEnableBookieAddressTracking(false); + try (BookKeeper bkc = BookKeeper.newBuilder(conf) + .build();) { + long lId; + try (WriteHandle h = bkc + .newCreateLedgerOp() + .withAckQuorumSize(1) + .withEnsembleSize(1) + .withWriteQuorumSize(1) + .withPassword(new byte[0]) + .execute() + .get();) { + lId = h.getId(); + h.append("foo".getBytes("utf-8")); + } + + // restart bookie, change port + // on metadata we have a bookieId, not the network address + restartBookie(getBookie(0)); + try (ReadHandle h = bkc + .newOpenLedgerOp() + .withLedgerId(lId) + .withRecovery(true) + .withPassword(new byte[0]) + .execute() + .get()) { + try (LedgerEntries entries = h.read(0, 0);) { + fail("Should not be able to connect to the bookie with Bookie Address Tracking Disabled"); + } catch (BKBookieHandleNotAvailableException expected) { + } + } + } + } + + @Test + public void testFollowBookieAddressChangeZkSessionExpire() throws Exception { + ClientConfiguration conf = new ClientConfiguration(); + conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + try (BookKeeper bkc = BookKeeper.newBuilder(conf) + .build();) { + long lId; + try (WriteHandle h = bkc + .newCreateLedgerOp() + .withAckQuorumSize(1) + .withEnsembleSize(1) + .withWriteQuorumSize(1) + .withPassword(new byte[0]) + .execute() + .get();) { + lId = h.getId(); + h.append("foo".getBytes("utf-8")); + } + + log.error("expiring ZK session!"); + // expire zk session + ZKRegistrationClient regClient = (ZKRegistrationClient) ((org.apache.bookkeeper.client.BookKeeper) bkc) + .getMetadataClientDriver() + .getRegistrationClient(); + + regClient.getZk().getTestable().injectSessionExpiration(); + + // restart bookie, change port + // on metadata we have a bookieId, not the network address + restartBookies(c -> c); + + try (ReadHandle h = bkc + .newOpenLedgerOp() + .withLedgerId(lId) + .withRecovery(true) + .withPassword(new byte[0]) + .execute() + .get()) { + assertEquals(0, h.getLastAddConfirmed()); + try (LedgerEntries entries = h.read(0, 0);) { + assertEquals("foo", new String(entries.getEntry(0).getEntryBytes(), "utf-8")); + } + } + } + } +} \ No newline at end of file diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieRecoveryTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieRecoveryTest.java index b1ad88abfb1..8b0ca9b529a 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieRecoveryTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieRecoveryTest.java @@ -20,13 +20,11 @@ */ package org.apache.bookkeeper.client; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import io.netty.buffer.ByteBuf; - import java.io.IOException; import java.util.ArrayList; import java.util.Collections; @@ -39,19 +37,18 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; - import org.apache.bookkeeper.client.AsyncCallback.RecoverCallback; import org.apache.bookkeeper.client.BookKeeper.DigestType; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookieProtocol; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadEntryCallback; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -107,7 +104,7 @@ public BookieRecoveryTest() { baseClientConf.setLedgerManagerFactoryClassName(ledgerManagerFactory); } - @Before + @BeforeEach @Override public void setUp() throws Exception { // Set up the configuration properties needed. @@ -122,7 +119,7 @@ public void setUp() throws Exception { bkAdmin = new BookKeeperAdmin(adminConf); } - @After + @AfterEach @Override public void tearDown() throws Exception { // Release any resources used by the BookieRecoveryTest instance. @@ -264,7 +261,7 @@ void metadataConflictWithRecovery(BookKeeper bkc) throws Exception { for (int i = 0; i < numEntries; i++) { lh.addEntry(data); } - BookieSocketAddress bookieToKill = lh.getLedgerMetadata().getEnsemble(numEntries - 1).get(1); + BookieId bookieToKill = lh.getLedgerMetadata().getEnsembleAt(numEntries - 1).get(1); killBookie(bookieToKill); startNewBookie(); for (int i = 0; i < numEntries; i++) { @@ -272,17 +269,17 @@ void metadataConflictWithRecovery(BookKeeper bkc) throws Exception { } bkAdmin.recoverBookieData(bookieToKill); // fail another bookie to cause ensemble change again - bookieToKill = lh.getLedgerMetadata().getEnsemble(2 * numEntries - 1).get(1); + bookieToKill = lh.getLedgerMetadata().getEnsembleAt(2 * numEntries - 1).get(1); ServerConfiguration confOfKilledBookie = killBookie(bookieToKill); startNewBookie(); for (int i = 0; i < numEntries; i++) { lh.addEntry(data); } // start the killed bookie again - bsConfs.add(confOfKilledBookie); - bs.add(startBookie(confOfKilledBookie)); + startAndAddBookie(confOfKilledBookie); + // all ensembles should be fully replicated since it is recovered - assertTrue("Not fully replicated", verifyFullyReplicated(lh, 3 * numEntries)); + assertTrue(verifyFullyReplicated(lh, 3 * numEntries), "Not fully replicated"); lh.close(); } @@ -307,9 +304,8 @@ public void testAsyncBookieRecoveryToSpecificBookie() throws Exception { // Shutdown the first bookie server LOG.info("Finished writing all ledger entries so shutdown one of the bookies."); - BookieSocketAddress bookieSrc = bs.get(0).getLocalAddress(); - bs.get(0).shutdown(); - bs.remove(0); + BookieId bookieSrc = addressByIndex(0); + killBookie(0); // Startup a new bookie server startNewBookie(); @@ -357,9 +353,9 @@ public void testAsyncBookieRecoveryToRandomBookies() throws Exception { // Shutdown the first bookie server LOG.info("Finished writing all ledger entries so shutdown one of the bookies."); - BookieSocketAddress bookieSrc = bs.get(0).getLocalAddress(); - bs.get(0).shutdown(); - bs.remove(0); + + BookieId bookieSrc = addressByIndex(0); + killBookie(0); // Startup three new bookie servers for (int i = 0; i < 3; i++) { @@ -410,15 +406,15 @@ public void testSyncBookieRecoveryToSpecificBookie() throws Exception { // Shutdown the first bookie server LOG.info("Finished writing all ledger entries so shutdown one of the bookies."); - BookieSocketAddress bookieSrc = bs.get(0).getLocalAddress(); - bs.get(0).shutdown(); - bs.remove(0); + + BookieId bookieSrc = addressByIndex(0); + killBookie(0); // Startup a new bookie server int newBookiePort = startNewBookie(); // Write some more entries for the ledgers so a new ensemble will be - // created for them. + //created for them. writeEntriestoLedgers(numMsgs, 10, lhs); // Call the sync recover bookie method. @@ -451,9 +447,9 @@ public void testSyncBookieRecoveryToRandomBookies() throws Exception { // Shutdown the first bookie server LOG.info("Finished writing all ledger entries so shutdown one of the bookies."); - BookieSocketAddress bookieSrc = bs.get(0).getLocalAddress(); - bs.get(0).shutdown(); - bs.remove(0); + + BookieId bookieSrc = addressByIndex(0); + killBookie(0); // Startup three new bookie servers for (int i = 0; i < 3; i++) { @@ -506,7 +502,7 @@ long await() throws InterruptedException { private boolean verifyFullyReplicated(LedgerHandle lh, long untilEntry) throws Exception { LedgerMetadata md = getLedgerMetadata(lh); - Map> ensembles = md.getEnsembles(); + Map> ensembles = md.getAllEnsembles(); HashMap ranges = new HashMap(); ArrayList keyList = Collections.list( @@ -517,7 +513,7 @@ private boolean verifyFullyReplicated(LedgerHandle lh, long untilEntry) throws E } ranges.put(keyList.get(keyList.size() - 1), untilEntry); - for (Map.Entry> e : ensembles.entrySet()) { + for (Map.Entry> e : ensembles.entrySet()) { int quorum = md.getAckQuorumSize(); long startEntryId = e.getKey(); long endEntryId = ranges.get(startEntryId); @@ -526,7 +522,7 @@ private boolean verifyFullyReplicated(LedgerHandle lh, long untilEntry) throws E ReplicationVerificationCallback cb = new ReplicationVerificationCallback(numRequests); for (long i = startEntryId; i < endEntryId; i++) { - for (BookieSocketAddress addr : e.getValue()) { + for (BookieId addr : e.getValue()) { bkc.getBookieClient().readEntry(addr, lh.getId(), i, cb, addr, BookieProtocol.FLAG_NONE); } @@ -558,39 +554,18 @@ public SyncLedgerMetaObject() { } private LedgerMetadata getLedgerMetadata(LedgerHandle lh) throws Exception { - final SyncLedgerMetaObject syncObj = new SyncLedgerMetaObject(); - bkc.getLedgerManager().readLedgerMetadata(lh.getId(), new GenericCallback() { - - @Override - public void operationComplete(int rc, LedgerMetadata result) { - synchronized (syncObj) { - syncObj.rc = rc; - syncObj.meta = result; - syncObj.value = true; - syncObj.notify(); - } - } - - }); - - synchronized (syncObj) { - while (!syncObj.value) { - syncObj.wait(); - } - } - assertEquals(BKException.Code.OK, syncObj.rc); - return syncObj.meta; + return bkc.getLedgerManager().readLedgerMetadata(lh.getId()).get().getValue(); } private boolean findDupesInEnsembles(List lhs) throws Exception { long numDupes = 0; for (LedgerHandle lh : lhs) { LedgerMetadata md = getLedgerMetadata(lh); - for (Map.Entry> e : md.getEnsembles().entrySet()) { - HashSet set = new HashSet(); + for (Map.Entry> e : md.getAllEnsembles().entrySet()) { + HashSet set = new HashSet(); long fragment = e.getKey(); - for (BookieSocketAddress addr : e.getValue()) { + for (BookieId addr : e.getValue()) { if (set.contains(addr)) { LOG.error("Dupe " + addr + " found in ensemble for fragment " + fragment + " of ledger " + lh.getId()); @@ -619,9 +594,9 @@ public void testBookieRecoveryOnClosedLedgers() throws Exception { closeLedgers(lhs); // Shutdown last bookie server in last ensemble - List lastEnsemble = lhs.get(0).getLedgerMetadata().getEnsembles() + List lastEnsemble = lhs.get(0).getLedgerMetadata().getAllEnsembles() .entrySet().iterator().next().getValue(); - BookieSocketAddress bookieToKill = lastEnsemble.get(lastEnsemble.size() - 1); + BookieId bookieToKill = lastEnsemble.get(lastEnsemble.size() - 1); killBookie(bookieToKill); // start a new bookie @@ -632,7 +607,7 @@ public void testBookieRecoveryOnClosedLedgers() throws Exception { bkAdmin.recoverBookieData(bookieToKill); for (LedgerHandle lh : lhs) { - assertTrue("Not fully replicated", verifyFullyReplicated(lh, numMsgs)); + assertTrue(verifyFullyReplicated(lh, numMsgs), "Not fully replicated"); lh.close(); } } @@ -648,9 +623,9 @@ public void testBookieRecoveryOnOpenedLedgers() throws Exception { writeEntriestoLedgers(numMsgs, 0, lhs); // Shutdown the first bookie server - List lastEnsemble = lhs.get(0).getLedgerMetadata().getEnsembles() + List lastEnsemble = lhs.get(0).getLedgerMetadata().getAllEnsembles() .entrySet().iterator().next().getValue(); - BookieSocketAddress bookieToKill = lastEnsemble.get(lastEnsemble.size() - 1); + BookieId bookieToKill = lastEnsemble.get(lastEnsemble.size() - 1); killBookie(bookieToKill); // start a new bookie @@ -662,7 +637,7 @@ public void testBookieRecoveryOnOpenedLedgers() throws Exception { bkAdmin.recoverBookieData(bookieToKill); for (LedgerHandle lh : lhs) { - assertTrue("Not fully replicated", verifyFullyReplicated(lh, numMsgs)); + assertTrue(verifyFullyReplicated(lh, numMsgs), "Not fully replicated"); } try { @@ -684,13 +659,13 @@ public void testBookieRecoveryOnInRecoveryLedger() throws Exception { writeEntriestoLedgers(numMsgs, 0, lhs); // Shutdown the first bookie server - List lastEnsemble = lhs.get(0).getLedgerMetadata().getEnsembles() + List lastEnsemble = lhs.get(0).getLedgerMetadata().getAllEnsembles() .entrySet().iterator().next().getValue(); // removed bookie - BookieSocketAddress bookieToKill = lastEnsemble.get(0); + BookieId bookieToKill = lastEnsemble.get(0); killBookie(bookieToKill); // temp failure - BookieSocketAddress bookieToKill2 = lastEnsemble.get(1); + BookieId bookieToKill2 = lastEnsemble.get(1); ServerConfiguration conf2 = killBookie(bookieToKill2); // start a new bookie @@ -713,22 +688,21 @@ public void testBookieRecoveryOnInRecoveryLedger() throws Exception { } // restart failed bookie - bs.add(startBookie(conf2)); - bsConfs.add(conf2); + startAndAddBookie(conf2); // recover them bkAdmin.recoverBookieData(bookieToKill); for (LedgerHandle lh : lhs) { - assertTrue("Not fully replicated", verifyFullyReplicated(lh, numMsgs)); + assertTrue(verifyFullyReplicated(lh, numMsgs), "Not fully replicated"); } // open ledgers to read metadata List newLhs = openLedgers(lhs); for (LedgerHandle newLh : newLhs) { // first ensemble should contains bookieToKill2 and not contain bookieToKill - Map.Entry> entry = - newLh.getLedgerMetadata().getEnsembles().entrySet().iterator().next(); + Map.Entry> entry = + newLh.getLedgerMetadata().getAllEnsembles().entrySet().iterator().next(); assertFalse(entry.getValue().contains(bookieToKill)); assertTrue(entry.getValue().contains(bookieToKill2)); } @@ -747,9 +721,9 @@ public void testAsyncBookieRecoveryToRandomBookiesNotEnoughBookies() throws Exce // Shutdown the first bookie server LOG.info("Finished writing all ledger entries so shutdown one of the bookies."); - BookieSocketAddress bookieSrc = bs.get(0).getLocalAddress(); - bs.get(0).shutdown(); - bs.remove(0); + + BookieId bookieSrc = addressByIndex(0); + killBookie(0); // Call the async recover bookie method. LOG.info("Now recover the data on the killed bookie (" + bookieSrc @@ -778,10 +752,9 @@ public void testSyncBookieRecoveryToRandomBookiesCheckForDupes() throws Exceptio // Shutdown the first bookie server LOG.info("Finished writing all ledger entries so shutdown one of the bookies."); - int removeIndex = r.nextInt(bs.size()); - BookieSocketAddress bookieSrc = bs.get(removeIndex).getLocalAddress(); - bs.get(removeIndex).shutdown(); - bs.remove(removeIndex); + int removeIndex = r.nextInt(bookieCount()); + BookieId bookieSrc = addressByIndex(removeIndex); + killBookie(removeIndex); // Startup new bookie server startNewBookie(); @@ -797,13 +770,13 @@ public void testSyncBookieRecoveryToRandomBookiesCheckForDupes() throws Exceptio sync.value = false; bkAdmin.recoverBookieData(bookieSrc); - assertFalse("Dupes exist in ensembles", findDupesInEnsembles(lhs)); + assertFalse(findDupesInEnsembles(lhs), "Dupes exist in ensembles"); // Write some more entries to ensure fencing hasn't broken stuff writeEntriestoLedgers(numMsgs, numMsgs * 2, lhs); for (LedgerHandle lh : lhs) { - assertTrue("Not fully replicated", verifyFullyReplicated(lh, numMsgs * 3)); + assertTrue(verifyFullyReplicated(lh, numMsgs * 3), "Not fully replicated"); lh.close(); } } @@ -821,14 +794,14 @@ public void recoverWithoutPasswordInConf() throws Exception { } lh.close(); - BookieSocketAddress bookieSrc = bs.get(0).getLocalAddress(); - bs.get(0).shutdown(); - bs.remove(0); + BookieId bookieSrc = addressByIndex(0); + killBookie(0); + startNewBookie(); // Check that entries are missing lh = bkc.openLedgerNoRecovery(ledgerId, digestCorrect, passwdCorrect); - assertFalse("Should be entries missing", verifyFullyReplicated(lh, 100)); + assertFalse(verifyFullyReplicated(lh, 100), "Should be entries missing"); lh.close(); // Try to recover with bad password in conf @@ -845,17 +818,16 @@ public void recoverWithoutPasswordInConf() throws Exception { bka.close(); lh = bkc.openLedgerNoRecovery(ledgerId, digestCorrect, passwdCorrect); - assertTrue("Should be back to fully replication", verifyFullyReplicated(lh, 100)); + assertTrue(verifyFullyReplicated(lh, 100), "Should be back to fully replication"); lh.close(); - bookieSrc = bs.get(0).getLocalAddress(); - bs.get(0).shutdown(); - bs.remove(0); + bookieSrc = addressByIndex(0); + killBookie(0); startNewBookie(); // Check that entries are missing lh = bkc.openLedgerNoRecovery(ledgerId, digestCorrect, passwdCorrect); - assertFalse("Should be entries missing", verifyFullyReplicated(lh, 100)); + assertFalse(verifyFullyReplicated(lh, 100), "Should be entries missing"); lh.close(); // Try to recover with no password in conf @@ -868,7 +840,7 @@ public void recoverWithoutPasswordInConf() throws Exception { bka.close(); lh = bkc.openLedgerNoRecovery(ledgerId, digestCorrect, passwdCorrect); - assertTrue("Should be back to fully replication", verifyFullyReplicated(lh, 100)); + assertTrue(verifyFullyReplicated(lh, 100), "Should be back to fully replication"); lh.close(); } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieRecoveryUseIOThreadTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieRecoveryUseIOThreadTest.java new file mode 100644 index 00000000000..3515c7699f9 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieRecoveryUseIOThreadTest.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.client; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.junit.jupiter.api.Test; +/** + * Tests for Bookie recovery use IO threads. + */ +public class BookieRecoveryUseIOThreadTest extends BookKeeperClusterTestCase { + + public BookieRecoveryUseIOThreadTest() { + super(1); + baseConf.setNumAddWorkerThreads(0); + baseConf.setNumReadWorkerThreads(0); + baseConf.setNumHighPriorityWorkerThreads(0); + } + + @Test + public void testRecoveryClosedLedger() throws BKException, IOException, InterruptedException { + // test the v2 protocol when using IO thread to handle the request + ClientConfiguration conf = new ClientConfiguration(); + conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + conf.setUseV2WireProtocol(true); + AtomicInteger finalRc = new AtomicInteger(Integer.MAX_VALUE); + CountDownLatch latch = new CountDownLatch(1); + try (BookKeeper bkc = new BookKeeper(conf)) { + bkc.asyncCreateLedger(1, 1, BookKeeper.DigestType.CRC32, "".getBytes(), + new AsyncCallback.CreateCallback() { + @Override + public void createComplete(int rc, LedgerHandle lh, Object ctx) { + lh.asyncAddEntry("hello".getBytes(), new AsyncCallback.AddCallback() { + @Override + public void addComplete(int rc, LedgerHandle lh, long entryId, Object ctx) { + if (rc == BKException.Code.OK) { + bkc.asyncOpenLedger(lh.ledgerId, BookKeeper.DigestType.CRC32, "".getBytes(), + new AsyncCallback.OpenCallback() { + @Override + public void openComplete(int rc, LedgerHandle lh, Object ctx) { + finalRc.set(rc); + latch.countDown(); + } + }, null); + } + } + }, null); + } + }, null); + latch.await(); + } + assertEquals(finalRc.get(), org.apache.bookkeeper.client.api.BKException.Code.OK); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieWriteLedgerTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieWriteLedgerTest.java index 82a87c3d934..ea0d1b56b49 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieWriteLedgerTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieWriteLedgerTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -26,6 +26,7 @@ import static org.apache.bookkeeper.client.BookKeeperClientStats.READ_OP_DM; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -35,10 +36,13 @@ import io.netty.buffer.PooledByteBufAllocator; import io.netty.buffer.Unpooled; import io.netty.buffer.UnpooledByteBufAllocator; - import java.io.IOException; +import java.net.URI; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; import java.util.Enumeration; import java.util.HashMap; import java.util.List; @@ -50,9 +54,8 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; - -import org.apache.bookkeeper.bookie.Bookie; import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.TestBookieImpl; import org.apache.bookkeeper.client.AsyncCallback.AddCallback; import org.apache.bookkeeper.client.BKException.BKLedgerClosedException; import org.apache.bookkeeper.client.BookKeeper.DigestType; @@ -60,27 +63,53 @@ import org.apache.bookkeeper.client.api.ReadHandle; import org.apache.bookkeeper.client.api.WriteAdvHandle; import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.meta.LedgerManagerFactory; +import org.apache.bookkeeper.meta.LedgerMetadataSerDe; +import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; import org.apache.bookkeeper.meta.LongHierarchicalLedgerManagerFactory; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.meta.MetadataBookieDriver; +import org.apache.bookkeeper.meta.MetadataDrivers; +import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.replication.ReplicationTestUtil; +import org.apache.bookkeeper.replication.ReplicationWorker; +import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.apache.bookkeeper.test.TestStatsProvider; +import org.apache.bookkeeper.util.BookKeeperConstants; import org.apache.commons.lang3.tuple.Pair; -import org.apache.zookeeper.KeeperException; +import org.awaitility.Awaitility; import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import org.powermock.reflect.Whitebox; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Testing ledger write entry cases. */ +@RunWith(Parameterized.class) public class BookieWriteLedgerTest extends BookKeeperClusterTestCase implements AddCallback { private static final Logger LOG = LoggerFactory .getLogger(BookieWriteLedgerTest.class); + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(new Object[][] { + { true, true }, { true, false }, { false, true }, { false, false } + }); + } + + @Parameterized.Parameter(0) + public boolean useV2; + + @Parameterized.Parameter(1) + public boolean writeJournal; + byte[] ledgerPassword = "aaa".getBytes(); LedgerHandle lh, lh2; Enumeration ls; @@ -106,6 +135,9 @@ public SyncObj() { @Override @Before public void setUp() throws Exception { + baseConf.setJournalWriteData(writeJournal); + baseClientConf.setUseV2WireProtocol(useV2); + super.setUp(); rng = new Random(0); // Initialize the Random // Number Generator @@ -150,8 +182,8 @@ public void testWithMultipleBookieFailuresInLastEnsemble() throws Exception { startNewBookie(); // Shutdown three bookies in the last ensemble and continue writing - List ensemble = lh.getLedgerMetadata() - .getEnsembles().entrySet().iterator().next().getValue(); + List ensemble = lh.getLedgerMetadata() + .getAllEnsembles().entrySet().iterator().next().getValue(); killBookie(ensemble.get(0)); killBookie(ensemble.get(1)); killBookie(ensemble.get(2)); @@ -195,8 +227,8 @@ public void testWriteAndReadStats() throws Exception { CountDownLatch sleepLatch1 = new CountDownLatch(1); CountDownLatch sleepLatch2 = new CountDownLatch(1); - List ensemble = lh.getLedgerMetadata() - .getEnsembles().entrySet().iterator().next().getValue(); + List ensemble = lh.getLedgerMetadata() + .getAllEnsembles().entrySet().iterator().next().getValue(); sleepBookie(ensemble.get(0), sleepLatch1); @@ -244,8 +276,7 @@ public void testWriteAndReadStats() throws Exception { // Replace the bookie with a fake bookie ServerConfiguration conf = killBookie(ensemble.get(0)); BookieWriteLedgerTest.CorruptReadBookie corruptBookie = new BookieWriteLedgerTest.CorruptReadBookie(conf); - bs.add(startBookie(conf, corruptBookie)); - bsConfs.add(conf); + startAndAddBookie(conf, corruptBookie); i = numEntriesToWrite; numEntriesToWrite = numEntriesToWrite + 50; @@ -292,7 +323,7 @@ public void testDelayedWriteEnsembleChange() throws Exception { CountDownLatch sleepLatch1 = new CountDownLatch(1); // get bookie at index-0 - BookieSocketAddress bookie1 = lh.getCurrentEnsemble().get(0); + BookieId bookie1 = lh.getCurrentEnsemble().get(0); sleepBookie(bookie1, sleepLatch1); int i = numEntriesToWrite; @@ -331,7 +362,7 @@ public void testDelayedWriteEnsembleChange() throws Exception { sleepLatch1.countDown(); // get the bookie at index-0 again, this must be different. - BookieSocketAddress bookie2 = lh.getCurrentEnsemble().get(0); + BookieId bookie2 = lh.getCurrentEnsemble().get(0); assertFalse( "Delayed write error must have forced ensemble change", @@ -389,7 +420,7 @@ public void testLedgerCreateAdv() throws Exception { startNewBookie(); // Shutdown one bookie in the last ensemble and continue writing - List ensemble = lh.getLedgerMetadata().getEnsembles().entrySet().iterator().next() + List ensemble = lh.getLedgerMetadata().getAllEnsembles().entrySet().iterator().next() .getValue(); killBookie(ensemble.get(0)); @@ -435,7 +466,7 @@ public void testLedgerCreateAdvAndWriteNonAdv() throws Exception { } /** - * Verify that LedgerHandleAdv cannnot handle addEntry without the entryId. + * Verify that LedgerHandleAdv cannot handle addEntry without the entryId. * * @throws Exception */ @@ -528,7 +559,7 @@ public void testLedgerCreateAdvWithLedgerId() throws Exception { startNewBookie(); // Shutdown one bookie in the last ensemble and continue writing - List ensemble = lh.getLedgerMetadata().getEnsembles().entrySet().iterator().next() + List ensemble = lh.getLedgerMetadata().getAllEnsembles().entrySet().iterator().next() .getValue(); killBookie(ensemble.get(0)); @@ -588,12 +619,44 @@ public void testLedgerCreateWithCustomMetadata() throws Exception { lh = bkc.openLedger(ledgerId, digestType, ledgerPassword); Map outputCustomMetadataMap = lh.getCustomMetadata(); assertTrue("Can't retrieve proper Custom Data", - LedgerMetadata.areByteArrayValMapsEqual(inputCustomMetadataMap, outputCustomMetadataMap)); + areByteArrayValMapsEqual(inputCustomMetadataMap, outputCustomMetadataMap)); lh.close(); bkc.deleteLedger(ledgerId); } } + /** + * Routine to compare two {@code Map}; Since the values in the map are {@code byte[]}, we can't use + * {@code Map.equals}. + * @param first + * The first map + * @param second + * The second map to compare with + * @return true if the 2 maps contain the exact set of {@code } pairs. + */ + public static boolean areByteArrayValMapsEqual(Map first, Map second) { + if (first == null && second == null) { + return true; + } + + // above check confirms that both are not null; + // if one is null the other isn't; so they must + // be different + if (first == null || second == null) { + return false; + } + + if (first.size() != second.size()) { + return false; + } + for (Map.Entry entry : first.entrySet()) { + if (!Arrays.equals(entry.getValue(), second.get(entry.getKey()))) { + return false; + } + } + return true; + } + /* * Verify the functionality of Advanced Ledger which accepts ledgerId as * input and returns LedgerHandleAdv. LedgerHandleAdv takes entryId for @@ -701,7 +764,7 @@ public void testLedgerCreateAdvWithLedgerIdInLoop() throws Exception { .withDigestType(org.apache.bookkeeper.client.api.DigestType.CRC32) .withPassword(ledgerPassword).makeAdv().withLedgerId(ledgerId) .execute() - .thenApply(writer -> { // Add entries to ledger when created + .thenCompose(writer -> { // Add entries to ledger when created LOG.info("Writing stream of {} entries to {}", numEntriesToWrite, ledgerId); List entries = rng.ints(numEntriesToWrite, 0, maxInt) @@ -720,8 +783,8 @@ public void testLedgerCreateAdvWithLedgerIdInLoop() throws Exception { ledgerId, entryId, entry.slice().readInt()); lastRequest = writer.writeAsync(entryId, entry); } - lastRequest.join(); - return Pair.of(writer, entries); + return lastRequest + .thenApply(___ -> Pair.of(writer, entries)); }); }) .parallel().map(CompletableFuture::join) // wait for all creations and adds in parallel @@ -784,7 +847,9 @@ public void testLedgerCreateAdvWithLedgerIdInLoop2() throws Exception { ledgerId %= 9999999999L; } - LOG.debug("Iteration: {} LedgerId: {}", lc, ledgerId); + if (LOG.isDebugEnabled()) { + LOG.debug("Iteration: {} LedgerId: {}", lc, ledgerId); + } lh = bkc.createLedgerAdv(ledgerId, 5, 3, 2, digestType, ledgerPassword, null); lhArray[lc] = lh; @@ -811,7 +876,9 @@ public void testLedgerCreateAdvWithLedgerIdInLoop2() throws Exception { for (int lc = 0; lc < ledgerCount; lc++) { // Read and verify long lid = lhArray[lc].getId(); - LOG.debug("readEntries for lc: {} ledgerId: {} ", lc, lhArray[lc].getId()); + if (LOG.isDebugEnabled()) { + LOG.debug("readEntries for lc: {} ledgerId: {} ", lc, lhArray[lc].getId()); + } readEntriesAndValidateDataArray(lhArray[lc], entryList.get(lc)); lhArray[lc].close(); bkc.deleteLedger(lid); @@ -846,8 +913,8 @@ public void testAsyncWritesWithMultipleFailuresInLastEnsemble() startNewBookie(); // Shutdown three bookies in the last ensemble and continue writing - List ensemble = lh.getLedgerMetadata() - .getEnsembles().entrySet().iterator().next().getValue(); + List ensemble = lh.getLedgerMetadata() + .getAllEnsembles().entrySet().iterator().next().getValue(); killBookie(ensemble.get(0)); killBookie(ensemble.get(1)); killBookie(ensemble.get(2)); @@ -870,7 +937,9 @@ public void testAsyncWritesWithMultipleFailuresInLastEnsemble() // wait for all entries to be acknowledged for the first ledger synchronized (syncObj1) { while (syncObj1.counter < 1) { - LOG.debug("Entries counter = " + syncObj1.counter); + if (LOG.isDebugEnabled()) { + LOG.debug("Entries counter = " + syncObj1.counter); + } syncObj1.wait(); } assertEquals(BKException.Code.OK, syncObj1.rc); @@ -878,7 +947,9 @@ public void testAsyncWritesWithMultipleFailuresInLastEnsemble() // wait for all entries to be acknowledged for the second ledger synchronized (syncObj2) { while (syncObj2.counter < 1) { - LOG.debug("Entries counter = " + syncObj2.counter); + if (LOG.isDebugEnabled()) { + LOG.debug("Entries counter = " + syncObj2.counter); + } syncObj2.wait(); } assertEquals(BKException.Code.OK, syncObj2.rc); @@ -919,7 +990,7 @@ public void testLedgerCreateAdvWithAsyncWritesWithBookieFailures() throws Except } // Start One more bookie and shutdown one from last ensemble before reading startNewBookie(); - List ensemble = lh.getLedgerMetadata().getEnsembles().entrySet().iterator().next() + List ensemble = lh.getLedgerMetadata().getAllEnsembles().entrySet().iterator().next() .getValue(); killBookie(ensemble.get(0)); @@ -973,7 +1044,7 @@ public void testLedgerHandleAdvOutOfOrderWriteAndFrocedEnsembleChange() throws E lh.asyncAddEntry(10, entry1.array(), 0, entry1.capacity(), this, syncObj1); // Make sure entry-10 goes to the bookies and gets response. - java.util.Queue myPendingAddOps = Whitebox.getInternalState(lh, "pendingAddOps"); + java.util.Queue myPendingAddOps = lh.getPendingAddOps(); PendingAddOp addOp = null; boolean pendingAddOpReceived = false; @@ -987,9 +1058,9 @@ public void testLedgerHandleAdvOutOfOrderWriteAndFrocedEnsembleChange() throws E } CountDownLatch sleepLatch1 = new CountDownLatch(1); - List ensemble; + List ensemble; - ensemble = lh.getLedgerMetadata().getEnsembles().entrySet().iterator().next().getValue(); + ensemble = lh.getLedgerMetadata().getAllEnsembles().entrySet().iterator().next().getValue(); // Put all 3 bookies to sleep and start 3 new ones sleepBookie(ensemble.get(0), sleepLatch1); @@ -1073,7 +1144,7 @@ public void testLedgerCreateAdvWithRandomAsyncWritesWithBookieFailuresBetweenWri if (j == numEntriesToWrite / 2) { // Start One more bookie and shutdown one from last ensemble at half-way startNewBookie(); - List ensemble = lh.getLedgerMetadata().getEnsembles().entrySet() + List ensemble = lh.getLedgerMetadata().getAllEnsembles().entrySet() .iterator().next().getValue(); killBookie(ensemble.get(0)); } @@ -1142,7 +1213,7 @@ public void testLedgerCreateAdvWithRandomAsyncWritesWithBookieFailures() throws } // Start One more bookie and shutdown one from last ensemble before reading startNewBookie(); - List ensemble = lh.getLedgerMetadata().getEnsembles().entrySet().iterator().next() + List ensemble = lh.getLedgerMetadata().getAllEnsembles().entrySet().iterator().next() .getValue(); killBookie(ensemble.get(0)); @@ -1395,6 +1466,78 @@ public void testLedgerCreateByteBufRefCnt() throws Exception { bkc.deleteLedger(lh.ledgerId); } + @Test + public void testReadLacNotSameWithMetadataLedgerReplication() throws Exception { + lh = bkc.createLedger(3, 3, 2, digestType, ledgerPassword); + for (int i = 0; i < 10; ++i) { + ByteBuffer entry = ByteBuffer.allocate(4); + entry.putInt(rng.nextInt(maxInt)); + entry.position(0); + lh.addEntry(entry.array()); + } + + List ensemble = lh.getLedgerMetadata().getAllEnsembles().entrySet().iterator().next().getValue(); + assertEquals(1, lh.getLedgerMetadata().getAllEnsembles().size()); + killBookie(ensemble.get(1)); + + try { + lh.ensembleChangeLoop(ensemble, Collections.singletonMap(1, ensemble.get(1))); + } catch (Exception e) { + fail(); + } + + LedgerHandle lh1 = bkc.openLedgerNoRecovery(lh.ledgerId, digestType, ledgerPassword); + assertEquals(2, lh1.getLedgerMetadata().getAllEnsembles().size()); + List firstEnsemble = lh1.getLedgerMetadata().getAllEnsembles().firstEntry().getValue(); + + long entryId = lh1.getLedgerMetadata().getAllEnsembles().lastEntry().getKey() - 1; + try { + lh1.readAsync(entryId, entryId).get(); + fail(); + } catch (Exception e) { + LOG.info("Failed to read entry: {} ", entryId, e); + } + + MetadataBookieDriver driver = MetadataDrivers.getBookieDriver( + URI.create(baseConf.getMetadataServiceUri())); + driver.initialize( + baseConf, + NullStatsLogger.INSTANCE); + // initialize urReplicationManager + LedgerManagerFactory mFactory = driver.getLedgerManagerFactory(); + LedgerUnderreplicationManager underReplicationManager = mFactory.newLedgerUnderreplicationManager(); + baseConf.setOpenLedgerRereplicationGracePeriod(String.valueOf(30)); + + + ReplicationWorker replicationWorker = new ReplicationWorker(baseConf); + replicationWorker.start(); + String basePath = ZKMetadataDriverBase.resolveZkLedgersRootPath(baseClientConf) + '/' + + BookKeeperConstants.UNDER_REPLICATION_NODE + + BookKeeperConstants.DEFAULT_ZK_LEDGERS_ROOT_PATH; + + try { + underReplicationManager.markLedgerUnderreplicated(lh1.getId(), ensemble.get(1).toString()); + + Awaitility.waitAtMost(30, TimeUnit.SECONDS).untilAsserted(() -> + assertFalse(ReplicationTestUtil.isLedgerInUnderReplication(zkc, lh1.getId(), basePath)) + ); + + assertNotEquals(firstEnsemble, lh1.getLedgerMetadata().getAllEnsembles().firstEntry().getValue()); + } finally { + replicationWorker.shutdown(); + } + } + + @Test + public void testLedgerMetadataTest() throws Exception { + baseClientConf.setLedgerMetadataFormatVersion(LedgerMetadataSerDe.METADATA_FORMAT_VERSION_2); + BookKeeperTestClient bkc = new BookKeeperTestClient(baseClientConf, new TestStatsProvider()); + // Create a ledger + lh = bkc.createLedger(3, 3, 2, digestType, ledgerPassword); + assertEquals(lh.getLedgerMetadata().getMetadataFormatVersion(), LedgerMetadataSerDe.METADATA_FORMAT_VERSION_2); + lh.close(); + } + private void readEntries(LedgerHandle lh, List entries) throws InterruptedException, BKException { ls = lh.readEntries(0, numEntriesToWrite - 1); int index = 0; @@ -1402,10 +1545,12 @@ private void readEntries(LedgerHandle lh, List entries) throws Interrupt ByteBuffer origbb = ByteBuffer.wrap(entries.get(index++)); Integer origEntry = origbb.getInt(); ByteBuffer result = ByteBuffer.wrap(ls.nextElement().getEntry()); - LOG.debug("Length of result: " + result.capacity()); - LOG.debug("Original entry: " + origEntry); Integer retrEntry = result.getInt(); - LOG.debug("Retrieved entry: " + retrEntry); + if (LOG.isDebugEnabled()) { + LOG.debug("Length of result: " + result.capacity()); + LOG.debug("Original entry: " + origEntry); + LOG.debug("Retrieved entry: " + retrEntry); + } assertTrue("Checking entry " + index + " for equality", origEntry .equals(retrEntry)); } @@ -1432,8 +1577,10 @@ private void readEntriesAndValidateDataArray(LedgerHandle lh, List entri while (ls.hasMoreElements()) { byte[] originalData = entries.get(index++); byte[] receivedData = ls.nextElement().getEntry(); - LOG.debug("Length of originalData: {}", originalData.length); - LOG.debug("Length of receivedData: {}", receivedData.length); + if (LOG.isDebugEnabled()) { + LOG.debug("Length of originalData: {}", originalData.length); + LOG.debug("Length of receivedData: {}", receivedData.length); + } assertEquals( String.format("LedgerID: %d EntryID: %d OriginalDataLength: %d ReceivedDataLength: %d", lh.getId(), (index - 1), originalData.length, receivedData.length), @@ -1454,18 +1601,18 @@ public void addComplete(int rc, LedgerHandle lh, long entryId, Object ctx) { } } - static class CorruptReadBookie extends Bookie { + static class CorruptReadBookie extends TestBookieImpl { static final Logger LOG = LoggerFactory.getLogger(CorruptReadBookie.class); ByteBuf localBuf; public CorruptReadBookie(ServerConfiguration conf) - throws IOException, KeeperException, InterruptedException, BookieException { + throws Exception { super(conf); } @Override - public ByteBuf readEntry(long ledgerId, long entryId) throws IOException, NoLedgerException { + public ByteBuf readEntry(long ledgerId, long entryId) throws IOException, NoLedgerException, BookieException { localBuf = super.readEntry(ledgerId, entryId); int capacity = 0; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieWriteLedgersWithDifferentDigestsTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieWriteLedgersWithDifferentDigestsTest.java index 9ab461a9b5c..c93d65e40aa 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieWriteLedgersWithDifferentDigestsTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieWriteLedgersWithDifferentDigestsTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -31,7 +31,6 @@ import java.util.Collection; import java.util.Enumeration; import java.util.Random; - import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.junit.Before; @@ -182,10 +181,12 @@ private void readEntries(LedgerHandle lh, ArrayList entries) throws Inte ByteBuffer origbb = ByteBuffer.wrap(entries.get(index++)); Integer origEntry = origbb.getInt(); ByteBuffer result = ByteBuffer.wrap(ls.nextElement().getEntry()); - LOG.debug("Length of result: " + result.capacity()); - LOG.debug("Original entry: " + origEntry); Integer retrEntry = result.getInt(); - LOG.debug("Retrieved entry: " + retrEntry); + if (LOG.isDebugEnabled()) { + LOG.debug("Length of result: " + result.capacity()); + LOG.debug("Original entry: " + origEntry); + LOG.debug("Retrieved entry: " + retrEntry); + } assertTrue("Checking entry " + index + " for equality", origEntry .equals(retrEntry)); } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ClientUtil.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ClientUtil.java index bb3e5532f7d..3f8af53c133 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ClientUtil.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ClientUtil.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,19 +17,28 @@ */ package org.apache.bookkeeper.client; +import static java.nio.charset.StandardCharsets.UTF_8; + import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; - +import io.netty.buffer.UnpooledByteBufAllocator; import java.security.GeneralSecurityException; - +import java.util.function.Function; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.meta.LedgerManager; import org.apache.bookkeeper.proto.DataFormats.LedgerMetadataFormat.DigestType; +import org.apache.bookkeeper.proto.MockBookieClient; import org.apache.bookkeeper.proto.checksum.DigestManager; -import org.apache.bookkeeper.util.ByteBufList; +import org.apache.bookkeeper.versioning.Versioned; /** * Client utilities. */ public class ClientUtil { + public static final org.apache.bookkeeper.client.api.DigestType DIGEST_TYPE = + org.apache.bookkeeper.client.api.DigestType.CRC32C; + public static final byte[] PASSWD = "foobar".getBytes(UTF_8); + public static ByteBuf generatePacket(long ledgerId, long entryId, long lastAddConfirmed, long length, byte[] data) throws GeneralSecurityException { return generatePacket(ledgerId, entryId, lastAddConfirmed, length, data, 0, data.length); @@ -37,9 +46,10 @@ public static ByteBuf generatePacket(long ledgerId, long entryId, long lastAddCo public static ByteBuf generatePacket(long ledgerId, long entryId, long lastAddConfirmed, long length, byte[] data, int offset, int len) throws GeneralSecurityException { - DigestManager dm = DigestManager.instantiate(ledgerId, new byte[2], DigestType.CRC32); - return ByteBufList.coalesce(dm.computeDigestAndPackageForSending(entryId, lastAddConfirmed, length, - Unpooled.wrappedBuffer(data, offset, len))); + DigestManager dm = DigestManager.instantiate(ledgerId, new byte[2], DigestType.CRC32, + UnpooledByteBufAllocator.DEFAULT, true); + return MockBookieClient.copyDataWithSkipHeader(dm.computeDigestAndPackageForSending(entryId, lastAddConfirmed, + length, Unpooled.wrappedBuffer(data, offset, len), new byte[20], 0)); } /** @@ -49,4 +59,29 @@ public static boolean isLedgerOpen(LedgerHandle handle) { return !handle.getLedgerMetadata().isClosed(); } + public static Versioned setupLedger(ClientContext clientCtx, long ledgerId, + LedgerMetadataBuilder builder) throws Exception { + return setupLedger(clientCtx.getLedgerManager(), ledgerId, builder); + } + + public static Versioned setupLedger(LedgerManager ledgerManager, long ledgerId, + LedgerMetadataBuilder builder) throws Exception { + LedgerMetadata md = builder.withPassword(PASSWD).withDigestType(DIGEST_TYPE).withId(ledgerId).build(); + return ledgerManager.createLedgerMetadata(ledgerId, md).get(); + } + + public static Versioned transformMetadata(ClientContext clientCtx, long ledgerId, + Function transform) + throws Exception { + return transformMetadata(clientCtx.getLedgerManager(), ledgerId, transform); + } + + public static Versioned transformMetadata(LedgerManager ledgerManager, long ledgerId, + Function transform) + throws Exception { + Versioned current = ledgerManager.readLedgerMetadata(ledgerId).get(); + return ledgerManager.writeLedgerMetadata(ledgerId, transform.apply(current.getValue()), + current.getVersion()).get(); + } + } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ConcurrentV2RecoveryTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ConcurrentV2RecoveryTest.java index 9a94aafa12b..2a8a57735f8 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ConcurrentV2RecoveryTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ConcurrentV2RecoveryTest.java @@ -29,15 +29,12 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; - import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.meta.HierarchicalLedgerManagerFactory; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; - import org.junit.Assert; import org.junit.Test; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/DeferredSyncTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/DeferredSyncTest.java index 95dec9c21a9..a49b5775945 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/DeferredSyncTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/DeferredSyncTest.java @@ -19,6 +19,7 @@ import static org.apache.bookkeeper.common.concurrent.FutureUtils.result; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -28,7 +29,7 @@ import org.apache.bookkeeper.client.api.WriteAdvHandle; import org.apache.bookkeeper.client.api.WriteFlag; import org.apache.bookkeeper.client.api.WriteHandle; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.junit.Test; /** @@ -127,7 +128,7 @@ public void testForceRequiresFullEnsemble() throws Exception { assertEquals(NUM_ENTRIES - 1, wh.getLastAddPushed()); assertEquals(-1, wh.getLastAddConfirmed()); - BookieSocketAddress bookieAddress = wh.getLedgerMetadata().getEnsembleAt(wh.getLastAddPushed()).get(0); + BookieId bookieAddress = wh.getLedgerMetadata().getEnsembleAt(wh.getLastAddPushed()).get(0); killBookie(bookieAddress); // write should succeed (we still have 2 bookies out of 3) @@ -145,7 +146,7 @@ public void testForceRequiresFullEnsemble() throws Exception { } @Test - public void testForceWillAdvanceLacOnlyUpToLastAcknoledgedWrite() throws Exception { + public void testForceWillAdvanceLacOnlyUpToLastAcknowledgedWrite() throws Exception { try (WriteHandle wh = result(newCreateLedgerOp() .withEnsembleSize(3) .withWriteQuorumSize(3) @@ -161,7 +162,7 @@ public void testForceWillAdvanceLacOnlyUpToLastAcknoledgedWrite() throws Excepti assertEquals(-1, wh.getLastAddConfirmed()); // one bookie will stop sending acks for forceLedger - BookieSocketAddress bookieAddress = wh.getLedgerMetadata().getEnsembleAt(wh.getLastAddPushed()).get(0); + BookieId bookieAddress = wh.getLedgerMetadata().getEnsembleAt(wh.getLastAddPushed()).get(0); suspendBookieForceLedgerAcks(bookieAddress); // start and complete a force, lastAddConfirmed cannot be "lastAddPushedAfterSuspendedWrite" @@ -213,7 +214,7 @@ public void testForbiddenEnsembleChange() throws Exception { // expected } LedgerHandle lh = (LedgerHandle) wh; - assertTrue(lh.getDelayedWriteFailedBookies().isEmpty()); + assertFalse(lh.hasDelayedWriteFailedBookies()); } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ExplicitLacTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ExplicitLacTest.java index 5c5c24b31ef..42d1aebf6ac 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ExplicitLacTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ExplicitLacTest.java @@ -35,7 +35,6 @@ import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.apache.bookkeeper.util.TestUtils; -import org.junit.Assume; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -135,19 +134,10 @@ public void testReadHandleWithNoExplicitLAC() throws Exception { @Test public void testExplicitLACIsPersisted() throws Exception { - /* - * In DbLedgerStorage scenario, TransientLedgerInfo is not persisted - - * https://github.com/apache/bookkeeper/issues/1533. - * - * So for this testcase we are ignoring DbLedgerStorage. It can/should - * be enabled when Issue-1533 is fixed. - */ - Assume.assumeTrue(!baseConf.getLedgerStorageClass().equals(DbLedgerStorage.class.getName())); ClientConfiguration confWithNoExplicitLAC = new ClientConfiguration(); confWithNoExplicitLAC.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); // enable explicitLacFlush by setting non-zero value for // explictLacInterval - long explictLacInterval = 100; confWithNoExplicitLAC.setExplictLacInterval(50); BookKeeper bkcWithExplicitLAC = new BookKeeper(confWithNoExplicitLAC); @@ -340,5 +330,25 @@ public void testReadHandleWithExplicitLACAndDeferredSync() throws Exception { bkcWithExplicitLAC.close(); } + @Test + public void fallbackV3() throws Exception { + ClientConfiguration v2Conf = new ClientConfiguration(); + v2Conf.setUseV2WireProtocol(true); + v2Conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + v2Conf.setExplictLacInterval(10); + + BookKeeper bookKeeper = new BookKeeper(v2Conf); + LedgerHandle write = (LedgerHandle) bookKeeper.createLedger(1, + 1, + 1, + DigestType.MAC, + "pass".getBytes()); + write.addEntry("test".getBytes()); + TestUtils.waitUntilExplicitLacUpdated(write, 0); + long lac = write.readExplicitLastConfirmed(); + assertEquals(0, lac); + write.close(); + bookKeeper.close(); + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/GenericEnsemblePlacementPolicyTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/GenericEnsemblePlacementPolicyTest.java index bf369cf08ba..9a30b5930d0 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/GenericEnsemblePlacementPolicyTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/GenericEnsemblePlacementPolicyTest.java @@ -24,19 +24,24 @@ import java.nio.charset.StandardCharsets; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; - -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.junit.Before; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; /** * Testing a generic ensemble placement policy. */ +@RunWith(Parameterized.class) public class GenericEnsemblePlacementPolicyTest extends BookKeeperClusterTestCase { private BookKeeper.DigestType digestType = BookKeeper.DigestType.CRC32; @@ -46,9 +51,15 @@ public class GenericEnsemblePlacementPolicyTest extends BookKeeperClusterTestCas private static List> customMetadataOnNewEnsembleStack = new ArrayList<>(); private static List> customMetadataOnReplaceBookieStack = new ArrayList<>(); - public GenericEnsemblePlacementPolicyTest() { + @Parameters + public static Collection getDiskWeightBasedPlacementEnabled() { + return Arrays.asList(new Object[][] { { false }, { true } }); + } + + public GenericEnsemblePlacementPolicyTest(boolean diskWeightBasedPlacementEnabled) { super(0); baseClientConf.setEnsemblePlacementPolicy(CustomEnsemblePlacementPolicy.class); + baseClientConf.setDiskWeightBasedPlacementEnabled(diskWeightBasedPlacementEnabled); } /** @@ -57,9 +68,9 @@ public GenericEnsemblePlacementPolicyTest() { public static final class CustomEnsemblePlacementPolicy extends DefaultEnsemblePlacementPolicy { @Override - public BookieSocketAddress replaceBookie(int ensembleSize, int writeQuorumSize, - int ackQuorumSize, Map customMetadata, Set currentEnsemble, - BookieSocketAddress bookieToReplace, Set excludeBookies) + public PlacementResult replaceBookie(int ensembleSize, int writeQuorumSize, + int ackQuorumSize, Map customMetadata, List currentEnsemble, + BookieId bookieToReplace, Set excludeBookies) throws BKException.BKNotEnoughBookiesException { new Exception("replaceBookie " + ensembleSize + "," + customMetadata).printStackTrace(); assertNotNull(customMetadata); @@ -69,8 +80,8 @@ public BookieSocketAddress replaceBookie(int ensembleSize, int writeQuorumSize, } @Override - public ArrayList newEnsemble(int ensembleSize, int quorumSize, - int ackQuorumSize, Map customMetadata, Set excludeBookies) + public PlacementResult> newEnsemble(int ensembleSize, int quorumSize, + int ackQuorumSize, Map customMetadata, Set excludeBookies) throws BKException.BKNotEnoughBookiesException { assertNotNull(customMetadata); customMetadataOnNewEnsembleStack.add(customMetadata); @@ -102,7 +113,7 @@ public void testNewEnsemble() throws Exception { } @Test - public void testNewEnsembleWithNotEnoughtBookies() throws Exception { + public void testNewEnsembleWithNotEnoughBookies() throws Exception { numBookies = 0; try { startBKCluster(zkUtil.getMetadataServiceUri()); @@ -132,7 +143,7 @@ public void testReplaceBookie() throws Exception { try (LedgerHandle lh = bk.createLedger(2, 2, 2, digestType, PASSWORD.getBytes(), customMetadata)) { lh.addEntry(value); long lId = lh.getId(); - List ensembleAtFirstEntry = lh.getLedgerMetadata().getEnsemble(lId); + List ensembleAtFirstEntry = lh.getLedgerMetadata().getEnsembleAt(lId); assertEquals(2, ensembleAtFirstEntry.size()); killBookie(ensembleAtFirstEntry.get(0)); lh.addEntry(value); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/HandleFailuresTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/HandleFailuresTest.java new file mode 100644 index 00000000000..d1182668e9c --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/HandleFailuresTest.java @@ -0,0 +1,501 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.client; + +import static org.apache.bookkeeper.util.TestUtils.assertEventuallyTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +import com.google.common.collect.Lists; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicReference; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.client.api.WriteFlag; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.proto.MockBookieClient; +import org.apache.bookkeeper.versioning.Versioned; +import org.junit.Assert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Ledger recovery tests using mocks rather than a real cluster. + */ +public class HandleFailuresTest { + private static final Logger log = LoggerFactory.getLogger(LedgerRecovery2Test.class); + + private static final BookieId b1 = new BookieSocketAddress("b1", 3181).toBookieId(); + private static final BookieId b2 = new BookieSocketAddress("b2", 3181).toBookieId(); + private static final BookieId b3 = new BookieSocketAddress("b3", 3181).toBookieId(); + private static final BookieId b4 = new BookieSocketAddress("b4", 3181).toBookieId(); + private static final BookieId b5 = new BookieSocketAddress("b5", 3181).toBookieId(); + + @Test(timeout = 30000) + public void testChangeTriggeredOneTimeForOneFailure() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + Versioned md = ClientUtil.setupLedger(clientCtx, 10L, + LedgerMetadataBuilder.create().newEnsembleEntry( + 0L, Lists.newArrayList(b1, b2, b3))); + + clientCtx.getMockRegistrationClient().addBookies(b4).get(); + clientCtx.getMockBookieClient().errorBookies(b1); + + LedgerHandle lh = new LedgerHandle(clientCtx, 10L, md, BookKeeper.DigestType.CRC32C, + ClientUtil.PASSWD, WriteFlag.NONE); + lh.appendAsync("entry1".getBytes()); + lh.appendAsync("entry2".getBytes()); + lh.appendAsync("entry3".getBytes()); + lh.appendAsync("entry4".getBytes()); + lh.appendAsync("entry5".getBytes()).get(); + + verify(clientCtx.getLedgerManager(), times(1)).writeLedgerMetadata(anyLong(), any(), any()); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().size(), 1); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().get(0L), Lists.newArrayList(b4, b2, b3)); + } + + @Test(timeout = 30000) + public void testSecondFailureOccursWhileFirstBeingHandled() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + Versioned md = ClientUtil.setupLedger(clientCtx, 10L, + LedgerMetadataBuilder.create() + .withEnsembleSize(3).withWriteQuorumSize(3).withAckQuorumSize(3) + .newEnsembleEntry(0L, Lists.newArrayList(b1, b2, b3))); + + clientCtx.getMockRegistrationClient().addBookies(b4, b5).get(); + CompletableFuture b2blocker = new CompletableFuture<>(); + clientCtx.getMockBookieClient().setPreWriteHook( + (bookie, ledgerId, entryId) -> { + if (bookie.equals(b1)) { + return FutureUtils.exception(new BKException.BKWriteException()); + } else if (bookie.equals(b2)) { + return b2blocker; + } else { + return FutureUtils.value(null); + } + }); + CompletableFuture metadataNotifier = new CompletableFuture<>(); + CompletableFuture metadataBlocker = new CompletableFuture<>(); + clientCtx.getMockLedgerManager().setPreWriteHook( + (ledgerId, metadata) -> { + metadataNotifier.complete(null); + return metadataBlocker; + }); + + LedgerHandle lh = new LedgerHandle(clientCtx, 10L, md, BookKeeper.DigestType.CRC32C, + ClientUtil.PASSWD, WriteFlag.NONE); + lh.appendAsync("entry1".getBytes()); + lh.appendAsync("entry2".getBytes()); + lh.appendAsync("entry3".getBytes()); + lh.appendAsync("entry4".getBytes()); + CompletableFuture future = lh.appendAsync("entry5".getBytes()); + + metadataNotifier.get(); // wait for first metadata write to occur + b2blocker.completeExceptionally(new BKException.BKWriteException()); // make b2 requests fail + metadataBlocker.complete(null); + + future.get(); + verify(clientCtx.getLedgerManager(), times(2)).writeLedgerMetadata(anyLong(), any(), any()); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().size(), 1); + Assert.assertTrue(lh.getLedgerMetadata().getAllEnsembles().get(0L).contains(b3)); + Assert.assertTrue(lh.getLedgerMetadata().getAllEnsembles().get(0L).contains(b4)); + Assert.assertTrue(lh.getLedgerMetadata().getAllEnsembles().get(0L).contains(b5)); + } + + @Test(timeout = 30000) + public void testHandlingFailuresOneBookieFailsImmediately() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + Versioned md = ClientUtil.setupLedger(clientCtx, 10L, + LedgerMetadataBuilder.create() + .withEnsembleSize(3).withWriteQuorumSize(3).withAckQuorumSize(3) + .newEnsembleEntry(0L, Lists.newArrayList(b1, b2, b3))); + clientCtx.getMockRegistrationClient().addBookies(b4).get(); + clientCtx.getMockBookieClient().errorBookies(b1); + + LedgerHandle lh = new LedgerHandle(clientCtx, 10L, md, BookKeeper.DigestType.CRC32C, + ClientUtil.PASSWD, WriteFlag.NONE); + lh.append("entry1".getBytes()); + lh.close(); + + Assert.assertTrue(lh.getLedgerMetadata().isClosed()); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().size(), 1); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().get(0L), Lists.newArrayList(b4, b2, b3)); + } + + @Test(timeout = 30000) + public void testHandlingFailuresOneBookieFailsAfterOneEntry() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + Versioned md = ClientUtil.setupLedger(clientCtx, 10L, + LedgerMetadataBuilder.create() + .withEnsembleSize(3).withWriteQuorumSize(3).withAckQuorumSize(3) + .newEnsembleEntry(0L, Lists.newArrayList(b1, b2, b3))); + clientCtx.getMockRegistrationClient().addBookies(b4).get(); + + LedgerHandle lh = new LedgerHandle(clientCtx, 10L, md, BookKeeper.DigestType.CRC32C, + ClientUtil.PASSWD, WriteFlag.NONE); + lh.append("entry1".getBytes()); + clientCtx.getMockBookieClient().errorBookies(b1); + lh.append("entry2".getBytes()); + lh.close(); + + Assert.assertTrue(lh.getLedgerMetadata().isClosed()); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().size(), 2); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().get(0L), Lists.newArrayList(b1, b2, b3)); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().get(1L), Lists.newArrayList(b4, b2, b3)); + Assert.assertEquals(lh.getLedgerMetadata().getLastEntryId(), 1L); + } + + @Test(timeout = 30000) + public void testHandlingFailuresMultipleBookieFailImmediatelyNotEnoughoReplace() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + Versioned md = ClientUtil.setupLedger(clientCtx, 10L, + LedgerMetadataBuilder.create() + .withEnsembleSize(3).withWriteQuorumSize(3).withAckQuorumSize(3) + .newEnsembleEntry(0L, Lists.newArrayList(b1, b2, b3))); + clientCtx.getMockBookieClient().errorBookies(b1, b2); + + LedgerHandle lh = new LedgerHandle(clientCtx, 10L, md, BookKeeper.DigestType.CRC32C, + ClientUtil.PASSWD, WriteFlag.NONE); + try { + lh.append("entry1".getBytes()); + Assert.fail("Shouldn't have been able to add"); + } catch (BKException.BKNotEnoughBookiesException bke) { + // correct behaviour + assertEventuallyTrue("Failure to add should trigger ledger closure", + () -> lh.getLedgerMetadata().isClosed()); + Assert.assertEquals("Ledger should be empty", + lh.getLedgerMetadata().getLastEntryId(), LedgerHandle.INVALID_ENTRY_ID); + Assert.assertEquals("Should be only one ensemble", lh.getLedgerMetadata().getAllEnsembles().size(), 1); + Assert.assertEquals("Ensemble shouldn't have changed", lh.getLedgerMetadata().getAllEnsembles().get(0L), + Lists.newArrayList(b1, b2, b3)); + } + } + + @Test(timeout = 30000) + public void testHandlingFailuresMultipleBookieFailAfterOneEntryNotEnoughoReplace() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + Versioned md = ClientUtil.setupLedger(clientCtx, 10L, + LedgerMetadataBuilder.create() + .withEnsembleSize(3).withWriteQuorumSize(3).withAckQuorumSize(3) + .newEnsembleEntry(0L, Lists.newArrayList(b1, b2, b3))); + + LedgerHandle lh = new LedgerHandle(clientCtx, 10L, md, BookKeeper.DigestType.CRC32C, + ClientUtil.PASSWD, WriteFlag.NONE); + lh.append("entry1".getBytes()); + + clientCtx.getMockBookieClient().errorBookies(b1, b2); + + try { + lh.append("entry2".getBytes()); + Assert.fail("Shouldn't have been able to add"); + } catch (BKException.BKNotEnoughBookiesException bke) { + // correct behaviour + assertEventuallyTrue("Failure to add should trigger ledger closure", + () -> lh.getLedgerMetadata().isClosed()); + Assert.assertEquals("Ledger should be empty", lh.getLedgerMetadata().getLastEntryId(), 0L); + Assert.assertEquals("Should be only one ensemble", lh.getLedgerMetadata().getAllEnsembles().size(), 1); + Assert.assertEquals("Ensemble shouldn't have changed", lh.getLedgerMetadata().getAllEnsembles().get(0L), + Lists.newArrayList(b1, b2, b3)); + } + } + + @Test(timeout = 30000) + public void testClientClosesWhileFailureHandlerInProgress() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + Versioned md = ClientUtil.setupLedger(clientCtx, 10L, + LedgerMetadataBuilder.create() + .withEnsembleSize(3).withWriteQuorumSize(3).withAckQuorumSize(3) + .newEnsembleEntry(0L, Lists.newArrayList(b1, b2, b3))); + clientCtx.getMockRegistrationClient().addBookies(b4).get(); + clientCtx.getMockBookieClient().errorBookies(b2); + + CompletableFuture changeInProgress = new CompletableFuture<>(); + CompletableFuture blockEnsembleChange = new CompletableFuture<>(); + clientCtx.getMockLedgerManager().setPreWriteHook((ledgerId, metadata) -> { + // block the write trying to replace b2 with b4 + if (metadata.getAllEnsembles().get(0L).get(1).equals(b4)) { + changeInProgress.complete(null); + return blockEnsembleChange; + } else { + return FutureUtils.value(null); + } + }); + + LedgerHandle lh = new LedgerHandle(clientCtx, 10L, md, BookKeeper.DigestType.CRC32C, + ClientUtil.PASSWD, WriteFlag.NONE); + CompletableFuture future = lh.appendAsync("entry1".getBytes()); + changeInProgress.get(); + + lh.close(); + + blockEnsembleChange.complete(null); // allow ensemble change to continue + try { + future.get(); + Assert.fail("Add shouldn't have succeeded"); + } catch (ExecutionException ee) { + Assert.assertEquals(ee.getCause().getClass(), BKException.BKLedgerClosedException.class); + } + Assert.assertTrue(lh.getLedgerMetadata().isClosed()); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().size(), 1); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().get(0L), Lists.newArrayList(b1, b2, b3)); + Assert.assertEquals(lh.getLedgerMetadata().getLastEntryId(), LedgerHandle.INVALID_ENTRY_ID); + } + + @Test(timeout = 30000) + public void testMetadataSetToClosedDuringFailureHandler() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + Versioned md = ClientUtil.setupLedger(clientCtx, 10L, + LedgerMetadataBuilder.create() + .withEnsembleSize(3).withWriteQuorumSize(3).withAckQuorumSize(3) + .newEnsembleEntry(0L, Lists.newArrayList(b1, b2, b3))); + clientCtx.getMockRegistrationClient().addBookies(b4).get(); + clientCtx.getMockBookieClient().errorBookies(b2); + + CompletableFuture changeInProgress = new CompletableFuture<>(); + CompletableFuture blockEnsembleChange = new CompletableFuture<>(); + clientCtx.getMockLedgerManager().setPreWriteHook((ledgerId, metadata) -> { + if (metadata.getAllEnsembles().get(0L).get(1).equals(b4)) { + // block the write trying to replace b2 with b4 + changeInProgress.complete(null); + return blockEnsembleChange; + } else { + return FutureUtils.value(null); + } + }); + + LedgerHandle lh = new LedgerHandle(clientCtx, 10L, md, BookKeeper.DigestType.CRC32C, + ClientUtil.PASSWD, WriteFlag.NONE); + CompletableFuture future = lh.appendAsync("entry1".getBytes()); + changeInProgress.get(); + + ClientUtil.transformMetadata(clientCtx, 10L, + (metadata) -> LedgerMetadataBuilder.from(metadata) + .withClosedState().withLastEntryId(1234L).withLength(10L).build()); + + blockEnsembleChange.complete(null); // allow ensemble change to continue + try { + future.get(); + Assert.fail("Add shouldn't have succeeded"); + } catch (ExecutionException ee) { + Assert.assertEquals(ee.getCause().getClass(), BKException.BKLedgerClosedException.class); + } + Assert.assertTrue(lh.getLedgerMetadata().isClosed()); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().size(), 1); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().get(0L), Lists.newArrayList(b1, b2, b3)); + Assert.assertEquals(lh.getLedgerMetadata().getLastEntryId(), 1234L); + } + + @Test(timeout = 30000) + public void testMetadataSetToInRecoveryDuringFailureHandler() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + Versioned md = ClientUtil.setupLedger(clientCtx, 10L, + LedgerMetadataBuilder.create() + .withEnsembleSize(3).withWriteQuorumSize(3).withAckQuorumSize(3) + .newEnsembleEntry(0L, Lists.newArrayList(b1, b2, b3))); + clientCtx.getMockRegistrationClient().addBookies(b4).get(); + clientCtx.getMockBookieClient().errorBookies(b2); + + CompletableFuture changeInProgress = new CompletableFuture<>(); + CompletableFuture blockEnsembleChange = new CompletableFuture<>(); + clientCtx.getMockLedgerManager().setPreWriteHook((ledgerId, metadata) -> { + if (metadata.getAllEnsembles().get(0L).get(1).equals(b4)) { + // block the write trying to replace b2 with b4 + changeInProgress.complete(null); + return blockEnsembleChange; + } else { + return FutureUtils.value(null); + } + }); + + LedgerHandle lh = new LedgerHandle(clientCtx, 10L, md, BookKeeper.DigestType.CRC32C, + ClientUtil.PASSWD, WriteFlag.NONE); + CompletableFuture future = lh.appendAsync("entry1".getBytes()); + changeInProgress.get(); + + ClientUtil.transformMetadata(clientCtx, 10L, + (metadata) -> LedgerMetadataBuilder.from(metadata).withInRecoveryState().build()); + + blockEnsembleChange.complete(null); // allow ensemble change to continue + try { + future.get(); + Assert.fail("Add shouldn't have succeeded"); + } catch (ExecutionException ee) { + Assert.assertEquals(ee.getCause().getClass(), BKException.BKLedgerFencedException.class); + } + Assert.assertFalse(lh.getLedgerMetadata().isClosed()); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().size(), 1); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().get(0L), Lists.newArrayList(b1, b2, b3)); + } + + @Test(timeout = 30000) + public void testOldEnsembleChangedDuringFailureHandler() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + Versioned md = ClientUtil.setupLedger(clientCtx, 10L, + LedgerMetadataBuilder.create() + .withEnsembleSize(3).withWriteQuorumSize(3).withAckQuorumSize(3) + .newEnsembleEntry(0L, Lists.newArrayList(b1, b2, b3))); + + LedgerHandle lh = new LedgerHandle(clientCtx, 10L, md, BookKeeper.DigestType.CRC32C, + ClientUtil.PASSWD, WriteFlag.NONE); + lh.append("entry1".getBytes()); + clientCtx.getMockRegistrationClient().addBookies(b4).get(); + clientCtx.getMockBookieClient().errorBookies(b3); + lh.append("entry2".getBytes()); + + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().size(), 2); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().get(0L), Lists.newArrayList(b1, b2, b3)); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().get(1L), Lists.newArrayList(b1, b2, b4)); + + + CompletableFuture changeInProgress = new CompletableFuture<>(); + CompletableFuture blockEnsembleChange = new CompletableFuture<>(); + clientCtx.getMockLedgerManager().setPreWriteHook((ledgerId, metadata) -> { + // block the write trying to replace b1 with b5 + if (metadata.getAllEnsembles().size() > 2 + && metadata.getAllEnsembles().get(2L).get(0).equals(b5)) { + changeInProgress.complete(null); + return blockEnsembleChange; + } else { + return FutureUtils.value(null); + } + }); + + clientCtx.getMockRegistrationClient().addBookies(b5).get(); + clientCtx.getMockBookieClient().errorBookies(b1); + + CompletableFuture future = lh.appendAsync("entry3".getBytes()); + changeInProgress.get(); + + ClientUtil.transformMetadata(clientCtx, 10L, + (metadata) -> LedgerMetadataBuilder.from(metadata).replaceEnsembleEntry( + 0L, Lists.newArrayList(b4, b2, b5)).build()); + + blockEnsembleChange.complete(null); // allow ensemble change to continue + future.get(); + + Assert.assertFalse(lh.getLedgerMetadata().isClosed()); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().size(), 3); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().get(0L), Lists.newArrayList(b4, b2, b5)); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().get(1L), Lists.newArrayList(b1, b2, b4)); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().get(2L), Lists.newArrayList(b5, b2, b4)); + } + + @Test(timeout = 30000) + public void testNoAddsAreCompletedWhileFailureHandlingInProgress() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + Versioned md = ClientUtil.setupLedger(clientCtx, 10L, + LedgerMetadataBuilder.create() + .withEnsembleSize(3).withWriteQuorumSize(3).withAckQuorumSize(2) + .newEnsembleEntry(0L, Lists.newArrayList(b1, b2, b3))); + + clientCtx.getMockRegistrationClient().addBookies(b4).get(); + clientCtx.getMockBookieClient().errorBookies(b3); + + LedgerHandle lh = new LedgerHandle(clientCtx, 10L, md, BookKeeper.DigestType.CRC32C, + ClientUtil.PASSWD, WriteFlag.NONE); + lh.append("entry1".getBytes()); + + CompletableFuture changeInProgress = new CompletableFuture<>(); + CompletableFuture blockEnsembleChange = new CompletableFuture<>(); + clientCtx.getMockLedgerManager().setPreWriteHook((ledgerId, metadata) -> { + // block the write trying to replace b3 with b4 + if (metadata.getAllEnsembles().get(1L).get(2).equals(b4)) { + changeInProgress.complete(null); + return blockEnsembleChange; + } else { + return FutureUtils.value(null); + } + }); + + CompletableFuture future = lh.appendAsync("entry2".getBytes()); + changeInProgress.get(); + try { + future.get(1, TimeUnit.SECONDS); + Assert.fail("Shouldn't complete"); + } catch (TimeoutException te) { + } + blockEnsembleChange.complete(null); + future.get(); + + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().size(), 2); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().get(0L), Lists.newArrayList(b1, b2, b3)); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().get(1L), Lists.newArrayList(b1, b2, b4)); + } + + @Test(timeout = 30000) + public void testHandleFailureBookieNotInWriteSet() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + Versioned md = ClientUtil.setupLedger(clientCtx, 10L, + LedgerMetadataBuilder.create() + .withEnsembleSize(3).withWriteQuorumSize(2).withAckQuorumSize(1) + .newEnsembleEntry(0L, Lists.newArrayList(b1, b2, b3))); + clientCtx.getMockRegistrationClient().addBookies(b4).get(); + + CompletableFuture b1Delay = new CompletableFuture<>(); + // Delay the first write to b1, then error it + clientCtx.getMockBookieClient().setPreWriteHook((bookie, ledgerId, entryId) -> { + if (bookie.equals(b1)) { + return b1Delay; + } else { + return FutureUtils.value(null); + } + }); + + CompletableFuture changeInProgress = new CompletableFuture<>(); + CompletableFuture blockEnsembleChange = new CompletableFuture<>(); + clientCtx.getMockLedgerManager().setPreWriteHook((ledgerId, metadata) -> { + changeInProgress.complete(null); + return blockEnsembleChange; + }); + + LedgerHandle lh = new LedgerHandle(clientCtx, 10L, md, BookKeeper.DigestType.CRC32C, + ClientUtil.PASSWD, WriteFlag.NONE); + log.info("b2 should be enough to complete first add"); + lh.append("entry1".getBytes()); + + log.info("when b1 completes with failure, handleFailures should kick off"); + b1Delay.completeExceptionally(new BKException.BKWriteException()); + + log.info("write second entry, should have enough bookies, but blocks completion on failure handling"); + AtomicReference> e2 = new AtomicReference<>(); + + // Execute appendAsync at the same thread of preWriteHook exception thread. So that the + // `delayedWriteFailedBookies` could update before appendAsync invoke. + ((MockBookieClient) clientCtx.getBookieClient()).getExecutor() + .chooseThread(lh.ledgerId) + .execute(() -> e2.set(lh.appendAsync("entry2".getBytes()))); + changeInProgress.get(); + assertEventuallyTrue("e2 should eventually complete", () -> lh.pendingAddOps.peek().completed); + Assert.assertFalse("e2 shouldn't be completed to client", e2.get().isDone()); + blockEnsembleChange.complete(null); // allow ensemble change to continue + + log.info("e2 should complete"); + e2.get().get(10, TimeUnit.SECONDS); + } + +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerClose2Test.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerClose2Test.java new file mode 100644 index 00000000000..40f69304828 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerClose2Test.java @@ -0,0 +1,307 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.client; + +import com.google.common.collect.Lists; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.client.api.WriteFlag; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.versioning.Versioned; +import org.junit.Assert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Ledger recovery tests using mocks rather than a real cluster. + */ +public class LedgerClose2Test { + private static final Logger log = LoggerFactory.getLogger(LedgerRecovery2Test.class); + + private static final BookieId b1 = new BookieSocketAddress("b1", 3181).toBookieId(); + private static final BookieId b2 = new BookieSocketAddress("b2", 3181).toBookieId(); + private static final BookieId b3 = new BookieSocketAddress("b3", 3181).toBookieId(); + private static final BookieId b4 = new BookieSocketAddress("b4", 3181).toBookieId(); + private static final BookieId b5 = new BookieSocketAddress("b5", 3181).toBookieId(); + + @Test + public void testTryAddAfterCloseHasBeenCalled() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + + for (int i = 0; i < 1000; i++) { + Versioned md = ClientUtil.setupLedger(clientCtx, i, + LedgerMetadataBuilder.create().newEnsembleEntry(0L, Lists.newArrayList(b1, b2, b3))); + LedgerHandle lh = new LedgerHandle(clientCtx, i, md, BookKeeper.DigestType.CRC32C, + ClientUtil.PASSWD, WriteFlag.NONE); + CompletableFuture closeFuture = lh.closeAsync(); + try { + long eid = lh.append("entry".getBytes()); + + // if it succeeds, it should be in final ledge + closeFuture.get(); + Assert.assertTrue(lh.getLedgerMetadata().isClosed()); + Assert.assertEquals(lh.getLedgerMetadata().getLastEntryId(), eid); + } catch (BKException.BKLedgerClosedException bke) { + closeFuture.get(); + Assert.assertTrue(lh.getLedgerMetadata().isClosed()); + Assert.assertEquals(lh.getLedgerMetadata().getLastEntryId(), LedgerHandle.INVALID_ENTRY_ID); + } + } + } + + @Test + public void testMetadataChangedDuringClose() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + Versioned md = ClientUtil.setupLedger(clientCtx, 10L, + LedgerMetadataBuilder.create() + .withEnsembleSize(3).withWriteQuorumSize(3).withAckQuorumSize(2) + .newEnsembleEntry(0L, Lists.newArrayList(b1, b2, b3))); + + + LedgerHandle lh = new LedgerHandle(clientCtx, 10L, md, BookKeeper.DigestType.CRC32C, + ClientUtil.PASSWD, WriteFlag.NONE); + lh.append("entry1".getBytes()); + clientCtx.getMockRegistrationClient().addBookies(b4).get(); + clientCtx.getMockBookieClient().errorBookies(b3); + lh.append("entry2".getBytes()); + + CompletableFuture closeInProgress = new CompletableFuture<>(); + CompletableFuture blockClose = new CompletableFuture<>(); + clientCtx.getMockLedgerManager().setPreWriteHook((ledgerId, metadata) -> { + // block the write trying to replace b3 with b4 + if (metadata.isClosed()) { + closeInProgress.complete(null); + return blockClose; + } else { + return FutureUtils.value(null); + } + }); + CompletableFuture closeFuture = lh.closeAsync(); + closeInProgress.get(); + + ClientUtil.transformMetadata(clientCtx, 10L, + (metadata) -> LedgerMetadataBuilder.from(metadata).replaceEnsembleEntry( + 0L, Lists.newArrayList(b4, b2, b5)).build()); + + blockClose.complete(null); + closeFuture.get(); + + Assert.assertTrue(lh.getLedgerMetadata().isClosed()); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().size(), 2); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().get(0L), Lists.newArrayList(b4, b2, b5)); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().get(1L), Lists.newArrayList(b1, b2, b4)); + Assert.assertEquals(lh.getLedgerMetadata().getLastEntryId(), 1L); + } + + @Test + public void testMetadataCloseWithCorrectLengthDuringClose() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + Versioned md = ClientUtil.setupLedger(clientCtx, 10L, + LedgerMetadataBuilder.create() + .withEnsembleSize(3).withWriteQuorumSize(3).withAckQuorumSize(2) + .newEnsembleEntry(0L, Lists.newArrayList(b1, b2, b3))); + + + LedgerHandle lh = new LedgerHandle(clientCtx, 10L, md, BookKeeper.DigestType.CRC32C, + ClientUtil.PASSWD, WriteFlag.NONE); + long lac = lh.append("entry1".getBytes()); + long length = lh.getLength(); + + CompletableFuture closeInProgress = new CompletableFuture<>(); + CompletableFuture blockClose = new CompletableFuture<>(); + clientCtx.getMockLedgerManager().setPreWriteHook((ledgerId, metadata) -> { + // block the write trying to do the first close + if (!closeInProgress.isDone() && metadata.isClosed()) { + closeInProgress.complete(null); + return blockClose; + } else { + return FutureUtils.value(null); + } + }); + CompletableFuture closeFuture = lh.closeAsync(); + closeInProgress.get(); + + ClientUtil.transformMetadata(clientCtx, 10L, + (metadata) -> LedgerMetadataBuilder.from(metadata) + .withClosedState().withLastEntryId(lac).withLength(length).build()); + + blockClose.complete(null); + closeFuture.get(); + + Assert.assertTrue(lh.getLedgerMetadata().isClosed()); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().size(), 1); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().get(0L), Lists.newArrayList(b1, b2, b3)); + Assert.assertEquals(lh.getLedgerMetadata().getLastEntryId(), lac); + Assert.assertEquals(lh.getLedgerMetadata().getLength(), length); + } + + @Test + public void testMetadataCloseWithDifferentLengthDuringClose() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + Versioned md = ClientUtil.setupLedger(clientCtx, 10L, + LedgerMetadataBuilder.create() + .withEnsembleSize(3).withWriteQuorumSize(3).withAckQuorumSize(2) + .newEnsembleEntry(0L, Lists.newArrayList(b1, b2, b3))); + + + LedgerHandle lh = new LedgerHandle(clientCtx, 10L, md, BookKeeper.DigestType.CRC32C, + ClientUtil.PASSWD, WriteFlag.NONE); + long lac = lh.append("entry1".getBytes()); + long length = lh.getLength(); + + CompletableFuture closeInProgress = new CompletableFuture<>(); + CompletableFuture blockClose = new CompletableFuture<>(); + clientCtx.getMockLedgerManager().setPreWriteHook((ledgerId, metadata) -> { + // block the write trying to do the first close + if (!closeInProgress.isDone() && metadata.isClosed()) { + closeInProgress.complete(null); + return blockClose; + } else { + return FutureUtils.value(null); + } + }); + CompletableFuture closeFuture = lh.closeAsync(); + closeInProgress.get(); + + /* close with different length. can happen in cases where there's a write outstanding */ + ClientUtil.transformMetadata(clientCtx, 10L, + (metadata) -> LedgerMetadataBuilder.from(metadata) + .withClosedState().withLastEntryId(lac + 1).withLength(length + 100).build()); + + blockClose.complete(null); + try { + closeFuture.get(); + Assert.fail("Close should fail. Ledger has been closed in a state we don't know how to untangle"); + } catch (ExecutionException ee) { + Assert.assertEquals(ee.getCause().getClass(), BKException.BKMetadataVersionException.class); + } + } + + @Test + public void testMetadataCloseMarkedInRecoveryWhileClosing() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + Versioned md = ClientUtil.setupLedger(clientCtx, 10L, + LedgerMetadataBuilder.create() + .withEnsembleSize(3).withWriteQuorumSize(3).withAckQuorumSize(2) + .newEnsembleEntry(0L, Lists.newArrayList(b1, b2, b3))); + + LedgerHandle lh = new LedgerHandle(clientCtx, 10L, md, BookKeeper.DigestType.CRC32C, + ClientUtil.PASSWD, WriteFlag.NONE); + long lac = lh.append("entry1".getBytes()); + long length = lh.getLength(); + + CompletableFuture closeInProgress = new CompletableFuture<>(); + CompletableFuture blockClose = new CompletableFuture<>(); + clientCtx.getMockLedgerManager().setPreWriteHook((ledgerId, metadata) -> { + // block the write trying to do the first close + if (metadata.isClosed()) { + closeInProgress.complete(null); + return blockClose; + } else { + return FutureUtils.value(null); + } + }); + CompletableFuture closeFuture = lh.closeAsync(); + closeInProgress.get(); + + ClientUtil.transformMetadata(clientCtx, 10L, + (metadata) -> LedgerMetadataBuilder.from(metadata).withInRecoveryState().build()); + + blockClose.complete(null); + + closeFuture.get(); // should override in recovery, since this handle knows what it has written + Assert.assertTrue(lh.getLedgerMetadata().isClosed()); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().size(), 1); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().get(0L), Lists.newArrayList(b1, b2, b3)); + Assert.assertEquals(lh.getLedgerMetadata().getLastEntryId(), lac); + Assert.assertEquals(lh.getLedgerMetadata().getLength(), length); + } + + @Test + public void testCloseWhileAddInProgress() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + Versioned md = ClientUtil.setupLedger(clientCtx, 10L, + LedgerMetadataBuilder.create() + .withEnsembleSize(3).withWriteQuorumSize(3).withAckQuorumSize(2) + .newEnsembleEntry(0L, Lists.newArrayList(b1, b2, b3))); + // block all entry writes from completing + CompletableFuture writesHittingBookies = new CompletableFuture<>(); + clientCtx.getMockBookieClient().setPreWriteHook((bookie, ledgerId, entryId) -> { + writesHittingBookies.complete(null); + return new CompletableFuture(); + }); + LedgerHandle lh = new LedgerHandle(clientCtx, 10L, md, BookKeeper.DigestType.CRC32C, + ClientUtil.PASSWD, WriteFlag.NONE); + CompletableFuture future = lh.appendAsync("entry1".getBytes()); + writesHittingBookies.get(); + + lh.close(); + try { + future.get(); + Assert.fail("That write shouldn't have succeeded"); + } catch (ExecutionException ee) { + Assert.assertEquals(ee.getCause().getClass(), BKException.BKLedgerClosedException.class); + } + Assert.assertTrue(lh.getLedgerMetadata().isClosed()); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().size(), 1); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().get(0L), Lists.newArrayList(b1, b2, b3)); + Assert.assertEquals(lh.getLedgerMetadata().getLastEntryId(), LedgerHandle.INVALID_ENTRY_ID); + Assert.assertEquals(lh.getLedgerMetadata().getLength(), 0); + } + + @Test + public void testDoubleCloseOnHandle() throws Exception { + long ledgerId = 123L; + MockClientContext clientCtx = MockClientContext.create(); + + Versioned md = ClientUtil.setupLedger(clientCtx, ledgerId, + LedgerMetadataBuilder.create() + .withEnsembleSize(3).withWriteQuorumSize(3).withAckQuorumSize(3) + .newEnsembleEntry(0L, Lists.newArrayList(b1, b2, b3))); + + CompletableFuture metadataPromise = new CompletableFuture<>(); + CompletableFuture clientPromise = new CompletableFuture<>(); + + LedgerHandle writer = new LedgerHandle(clientCtx, ledgerId, md, + BookKeeper.DigestType.CRC32C, + ClientUtil.PASSWD, WriteFlag.NONE); + long eid1 = writer.append("entry1".getBytes()); + + log.info("block writes from completing on bookies and metadata"); + clientCtx.getMockBookieClient().setPostWriteHook((bookie, lid, eid) -> clientPromise); + clientCtx.getMockLedgerManager().setPreWriteHook((lid, metadata) -> metadataPromise); + + log.info("try to add another entry, it will block"); + writer.appendAsync("entry2".getBytes()); + + log.info("attempt one close, should block forever"); + CompletableFuture firstClose = writer.closeAsync(); + + log.info("attempt second close, should not finish before first one"); + CompletableFuture secondClose = writer.closeAsync(); + + Thread.sleep(500); // give it a chance to complete, the request jumps around threads + Assert.assertFalse(firstClose.isDone()); + Assert.assertFalse(secondClose.isDone()); + } +} + diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerCloseTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerCloseTest.java index e761f1ce194..496e61f8b44 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerCloseTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerCloseTest.java @@ -17,12 +17,11 @@ */ package org.apache.bookkeeper.client; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import io.netty.buffer.ByteBuf; - import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -32,14 +31,14 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicInteger; - import org.apache.bookkeeper.bookie.Bookie; import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.TestBookieImpl; import org.apache.bookkeeper.client.AsyncCallback.AddCallback; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; @@ -145,7 +144,7 @@ private void verifyMetadataConsistency(int numEntries, LedgerHandle lh) final CountDownLatch recoverDoneLatch = new CountDownLatch(1); final CountDownLatch failedLatch = new CountDownLatch(1); // kill first bookie to replace with a unauthorize bookie - BookieSocketAddress bookie = lh.getCurrentEnsemble().get(0); + BookieId bookie = lh.getCurrentEnsemble().get(0); ServerConfiguration conf = killBookie(bookie); // replace a unauthorize bookie startUnauthorizedBookie(conf, addDoneLatch); @@ -196,7 +195,7 @@ public void addComplete(int rc, LedgerHandle lh, long entryId, Object ctx) { private void startUnauthorizedBookie(ServerConfiguration conf, final CountDownLatch latch) throws Exception { - Bookie sBookie = new Bookie(conf) { + Bookie sBookie = new TestBookieImpl(conf) { @Override public void addEntry(ByteBuf entry, boolean ackBeforeSync, WriteCallback cb, Object ctx, byte[] masterKey) throws IOException, BookieException { @@ -214,14 +213,13 @@ public void recoveryAddEntry(ByteBuf entry, WriteCallback cb, Object ctx, byte[] throw new IOException("Dead bookie for recovery adds."); } }; - bsConfs.add(conf); - bs.add(startBookie(conf, sBookie)); + startAndAddBookie(conf, sBookie); } // simulate slow adds, then become normal when recover, // so no ensemble change when recovering ledger on this bookie. private void startDeadBookie(ServerConfiguration conf, final CountDownLatch latch) throws Exception { - Bookie dBookie = new Bookie(conf) { + Bookie dBookie = new TestBookieImpl(conf) { @Override public void addEntry(ByteBuf entry, boolean ackBeforeSync, WriteCallback cb, Object ctx, byte[] masterKey) throws IOException, BookieException { @@ -234,8 +232,7 @@ public void addEntry(ByteBuf entry, boolean ackBeforeSync, WriteCallback cb, Obj throw new IOException("Dead bookie"); } }; - bsConfs.add(conf); - bs.add(startBookie(conf, dBookie)); + startAndAddBookie(conf, dBookie); } @Test diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerCmdTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerCmdTest.java index 4055917939d..6900dfbc13d 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerCmdTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerCmdTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -22,11 +22,11 @@ import static junit.framework.TestCase.assertEquals; -import java.io.IOException; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import org.apache.bookkeeper.bookie.BookieAccessor; +import org.apache.bookkeeper.bookie.BookieImpl; import org.apache.bookkeeper.bookie.BookieShell; import org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorage; import org.apache.bookkeeper.client.AsyncCallback.AddCallback; @@ -66,16 +66,12 @@ public void testLedgerDbStorageCmd() throws Exception { LOG.info("Create ledger and add entries to it"); LedgerHandle lh1 = createLedgerWithEntries(bk, 10); - bs.forEach(bookieServer -> { - try { - BookieAccessor.forceFlush(bookieServer.getBookie()); - } catch (IOException e) { - LOG.error("Error forceFlush:", e); - } - }); + for (int i = 0; i < bookieCount(); i++) { + BookieAccessor.forceFlush((BookieImpl) serverByIndex(i).getBookie()); + } String[] argv = { "ledger", Long.toString(lh1.getId()) }; - final ServerConfiguration conf = bsConfs.get(0); + final ServerConfiguration conf = confByIndex(0); conf.setUseHostNameAsBookieID(true); BookieShell bkShell = diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerHandleAdapter.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerHandleAdapter.java index 7098f4ee475..086e9f330c5 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerHandleAdapter.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerHandleAdapter.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -20,8 +20,7 @@ package org.apache.bookkeeper.client; import io.netty.buffer.ByteBuf; - -import org.apache.bookkeeper.util.ByteBufList; +import org.apache.bookkeeper.proto.MockBookieClient; /** * Adapter for tests to get the public access from LedgerHandle for its default @@ -29,15 +28,9 @@ */ public class LedgerHandleAdapter { - /** - * Get the ledger handle. - */ - public static LedgerMetadata getLedgerMetadata(LedgerHandle lh) { - return lh.getLedgerMetadata(); - } - - public static ByteBufList toSend(LedgerHandle lh, long entryId, ByteBuf data) { - return lh.getDigestManager().computeDigestAndPackageForSending(entryId, lh.getLastAddConfirmed(), - lh.addToLength(data.readableBytes()), data); + public static ByteBuf toSend(LedgerHandle lh, long entryId, ByteBuf data) { + return MockBookieClient.copyData(lh.getDigestManager() + .computeDigestAndPackageForSending(entryId, lh.getLastAddConfirmed(), + lh.addToLength(data.readableBytes()), data, new byte[20], 0)); } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerMetadataTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerMetadataTest.java index dc947dde722..4444dd5b767 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerMetadataTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerMetadataTest.java @@ -16,19 +16,21 @@ * specific language governing permissions and limitations * under the License. */ - package org.apache.bookkeeper.client; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import com.google.common.collect.Lists; +import java.util.Base64; import java.util.Collections; -import java.util.NoSuchElementException; +import java.util.List; import org.apache.bookkeeper.client.BookKeeper.DigestType; -import org.apache.bookkeeper.proto.DataFormats.LedgerMetadataFormat; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieSocketAddress; import org.junit.Test; /** @@ -36,20 +38,21 @@ */ public class LedgerMetadataTest { - private static final String passwdStr = "testPasswd"; - private static final byte[] passwd = passwdStr.getBytes(UTF_8); + private static final byte[] passwd = "testPasswd".getBytes(UTF_8); @Test public void testGetters() { - org.apache.bookkeeper.client.api.LedgerMetadata metadata = new LedgerMetadata( - 3, - 2, - 1, - DigestType.CRC32, - passwd, - Collections.emptyMap(), - false); - + List ensemble = Lists.newArrayList(new BookieSocketAddress("192.0.2.1", 1234).toBookieId(), + new BookieSocketAddress("192.0.2.2", 1234).toBookieId(), + new BookieSocketAddress("192.0.2.3", 1234).toBookieId()); + org.apache.bookkeeper.client.api.LedgerMetadata metadata = LedgerMetadataBuilder.create() + .withEnsembleSize(3).withWriteQuorumSize(2).withAckQuorumSize(1) + .withDigestType(DigestType.CRC32.toApiDigestType()).withPassword(passwd) + .newEnsembleEntry(0L, ensemble) + .withId(100L) + .build(); + + assertEquals(100L, metadata.getLedgerId()); assertEquals(3, metadata.getEnsembleSize()); assertEquals(2, metadata.getWriteQuorumSize()); assertEquals(1, metadata.getAckQuorumSize()); @@ -59,116 +62,26 @@ public void testGetters() { assertEquals(-1L, metadata.getLastEntryId()); assertEquals(0, metadata.getLength()); assertFalse(metadata.isClosed()); - assertTrue(metadata.getAllEnsembles().isEmpty()); - - try { - metadata.getEnsembleAt(99L); - fail("Should fail to retrieve ensemble if ensembles is empty"); - } catch (NoSuchElementException e) { - // expected - } - } - - @Test - public void testStoreSystemtimeAsLedgerCtimeEnabled() - throws Exception { - LedgerMetadata lm = new LedgerMetadata( - 3, - 3, - 2, - DigestType.CRC32, - passwd, - Collections.emptyMap(), - true); - LedgerMetadataFormat format = lm.buildProtoFormat(); - assertTrue(format.hasCtime()); - } - - @Test - public void testStoreSystemtimeAsLedgerCtimeDisabled() - throws Exception { - LedgerMetadata lm = new LedgerMetadata( - 3, - 3, - 2, - DigestType.CRC32, - passwd, - Collections.emptyMap(), - false); - LedgerMetadataFormat format = lm.buildProtoFormat(); - assertFalse(format.hasCtime()); - } - - @Test - public void testIsConflictWithStoreSystemtimeAsLedgerCtimeDisabled() { - LedgerMetadata lm1 = new LedgerMetadata( - 3, - 3, - 2, - DigestType.CRC32, - passwd, - Collections.emptyMap(), - false); - LedgerMetadata lm2 = new LedgerMetadata(lm1); - - lm1.setCtime(1L); - lm2.setCtime(2L); - assertFalse(lm1.isConflictWith(lm2)); - } - - @Test - public void testIsConflictWithStoreSystemtimeAsLedgerCtimeEnabled() { - LedgerMetadata lm1 = new LedgerMetadata( - 3, - 3, - 2, - DigestType.CRC32, - passwd, - Collections.emptyMap(), - true); - LedgerMetadata lm2 = new LedgerMetadata(lm1); - - lm1.setCtime(1L); - lm2.setCtime(2L); - assertTrue(lm1.isConflictWith(lm2)); - } - - @Test - public void testIsConflictWithDifferentStoreSystemtimeAsLedgerCtimeFlags() { - LedgerMetadata lm1 = new LedgerMetadata( - 3, - 3, - 2, - DigestType.CRC32, - passwd, - Collections.emptyMap(), - true); - LedgerMetadata lm2 = new LedgerMetadata( - 3, - 3, - 2, - DigestType.CRC32, - passwd, - Collections.emptyMap(), - false); - - assertTrue(lm1.isConflictWith(lm2)); + assertEquals(1, metadata.getAllEnsembles().size()); + assertEquals(ensemble, metadata.getAllEnsembles().get(0L)); + assertEquals(ensemble, metadata.getEnsembleAt(99L)); } @Test public void testToString() { - LedgerMetadata lm1 = new LedgerMetadata( - 3, - 3, - 2, - DigestType.CRC32, - passwd, - Collections.emptyMap(), - true); - - assertTrue("toString should contain 'password' field", lm1.toString().contains("password")); - assertTrue("toString should contain password value", lm1.toString().contains(passwdStr)); - assertFalse("toSafeString should not contain 'password' field", lm1.toSafeString().contains("password")); - assertFalse("toSafeString should not contain password value", lm1.toSafeString().contains(passwdStr)); + List ensemble = Lists.newArrayList(new BookieSocketAddress("192.0.2.1", 1234).toBookieId(), + new BookieSocketAddress("192.0.2.2", 1234).toBookieId(), + new BookieSocketAddress("192.0.2.3", 1234).toBookieId()); + + LedgerMetadata lm1 = LedgerMetadataBuilder.create() + .withDigestType(DigestType.CRC32.toApiDigestType()) + .withPassword(passwd) + .newEnsembleEntry(0L, ensemble) + .withId(100L) + .build(); + + assertTrue("toString should contain password value", + lm1.toString().contains(Base64.getEncoder().encodeToString(passwd))); + assertTrue("toSafeString should not contain password value", lm1.toSafeString().contains("OMITTED")); } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerRecovery2Test.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerRecovery2Test.java new file mode 100644 index 00000000000..22112cbc608 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerRecovery2Test.java @@ -0,0 +1,581 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.client; + +import com.google.common.collect.Lists; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.bookkeeper.client.api.DigestType; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.client.api.WriteFlag; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallbackFuture; +import org.apache.bookkeeper.proto.MockBookies; +import org.apache.bookkeeper.versioning.Versioned; +import org.junit.Assert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Ledger recovery tests using mocks rather than a real cluster. + */ +public class LedgerRecovery2Test { + private static final Logger log = LoggerFactory.getLogger(LedgerRecovery2Test.class); + + private static final byte[] PASSWD = "foobar".getBytes(); + private static final BookieId b1 = new BookieSocketAddress("b1", 3181).toBookieId(); + private static final BookieId b2 = new BookieSocketAddress("b2", 3181).toBookieId(); + private static final BookieId b3 = new BookieSocketAddress("b3", 3181).toBookieId(); + private static final BookieId b4 = new BookieSocketAddress("b4", 3181).toBookieId(); + private static final BookieId b5 = new BookieSocketAddress("b5", 3181).toBookieId(); + + private static Versioned setupLedger(ClientContext clientCtx, long ledgerId, + List bookies) throws Exception { + LedgerMetadata md = LedgerMetadataBuilder.create() + .withId(ledgerId) + .withPassword(PASSWD).withDigestType(DigestType.CRC32C) + .withWriteQuorumSize(bookies.size()) + .newEnsembleEntry(0, bookies).build(); + return clientCtx.getLedgerManager().createLedgerMetadata(ledgerId, md).get(); + } + + private static Versioned setupLedger(ClientContext clientCtx, long ledgerId, + List bookies, + int ensembleSize, + int writeQuorumSize, + int ackQuorumSize) throws Exception { + LedgerMetadata md = LedgerMetadataBuilder.create() + .withId(ledgerId) + .withPassword(PASSWD).withDigestType(DigestType.CRC32C) + .withEnsembleSize(ensembleSize) + .withWriteQuorumSize(writeQuorumSize) + .withAckQuorumSize(ackQuorumSize) + .newEnsembleEntry(0, bookies).build(); + return clientCtx.getLedgerManager().createLedgerMetadata(ledgerId, md).get(); + } + + + @Test + public void testCantRecoverAllDown() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + + Versioned md = setupLedger(clientCtx, 1L, Lists.newArrayList(b1, b2, b3)); + + clientCtx.getMockBookieClient().errorBookies(b1, b2, b3); + + ReadOnlyLedgerHandle lh = new ReadOnlyLedgerHandle( + clientCtx, 1L, md, BookKeeper.DigestType.CRC32C, PASSWD, false); + try { + GenericCallbackFuture promise = new GenericCallbackFuture<>(); + lh.recover(promise, null, false); + promise.get(); + Assert.fail("Recovery shouldn't have been able to complete"); + } catch (ExecutionException ee) { + Assert.assertEquals(BKException.BKReadException.class, ee.getCause().getClass()); + } + } + + @Test + public void testCanReadLacButCantWrite() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + + Versioned md = setupLedger(clientCtx, 1, Lists.newArrayList(b1, b2, b3)); + + clientCtx.getMockBookieClient().getMockBookies().seedEntries(b1, 1L, 0L, -1L); + clientCtx.getMockBookieClient().setPreWriteHook( + (bookie, ledgerId, entryId) -> FutureUtils.exception(new BKException.BKWriteException())); + + ReadOnlyLedgerHandle lh = new ReadOnlyLedgerHandle( + clientCtx, 1L, md, BookKeeper.DigestType.CRC32C, PASSWD, false); + try { + GenericCallbackFuture promise = new GenericCallbackFuture<>(); + lh.recover(promise, null, false); + promise.get(); + Assert.fail("Recovery shouldn't have been able to complete"); + } catch (ExecutionException ee) { + Assert.assertEquals(BKException.BKNotEnoughBookiesException.class, ee.getCause().getClass()); + } + } + + @Test + public void testMetadataClosedDuringRecovery() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + + Versioned md = setupLedger(clientCtx, 1, Lists.newArrayList(b1, b2, b3)); + + CompletableFuture writingBack = new CompletableFuture<>(); + CompletableFuture blocker = new CompletableFuture<>(); + clientCtx.getMockBookieClient().getMockBookies().seedEntries(b1, 1L, 0L, -1L); + // will block recovery at the writeback phase + clientCtx.getMockBookieClient().setPreWriteHook( + (bookie, ledgerId, entryId) -> { + writingBack.complete(null); + return blocker; + }); + + ReadOnlyLedgerHandle lh = new ReadOnlyLedgerHandle( + clientCtx, 1L, md, BookKeeper.DigestType.CRC32C, PASSWD, false); + + GenericCallbackFuture recoveryPromise = new GenericCallbackFuture<>(); + lh.recover(recoveryPromise, null, false); + + writingBack.get(10, TimeUnit.SECONDS); + + ClientUtil.transformMetadata(clientCtx, 1L, + (metadata) -> LedgerMetadataBuilder.from(metadata) + .withClosedState().withLastEntryId(-1).withLength(0).build()); + + // allow recovery to continue + blocker.complete(null); + + recoveryPromise.get(); + + Assert.assertEquals(lh.getLastAddConfirmed(), -1); + Assert.assertEquals(lh.getLength(), 0); + } + + @Test + public void testNewEnsembleAddedDuringRecovery() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + clientCtx.getMockRegistrationClient().addBookies(b4).get(); + + Versioned md = setupLedger(clientCtx, 1, Lists.newArrayList(b1, b2, b3)); + + CompletableFuture writingBack = new CompletableFuture<>(); + CompletableFuture blocker = new CompletableFuture<>(); + CompletableFuture failing = new CompletableFuture<>(); + clientCtx.getMockBookieClient().getMockBookies().seedEntries(b1, 1L, 0L, -1L); + // will block recovery at the writeback phase + clientCtx.getMockBookieClient().setPreWriteHook( + (bookie, ledgerId, entryId) -> { + writingBack.complete(null); + if (bookie.equals(b3)) { + return failing; + } else { + return blocker; + } + }); + + ReadOnlyLedgerHandle lh = new ReadOnlyLedgerHandle( + clientCtx, 1L, md, BookKeeper.DigestType.CRC32C, PASSWD, false); + + GenericCallbackFuture recoveryPromise = new GenericCallbackFuture<>(); + lh.recover(recoveryPromise, null, false); + + writingBack.get(10, TimeUnit.SECONDS); + + ClientUtil.transformMetadata(clientCtx, 1L, + (metadata) -> LedgerMetadataBuilder.from(metadata).newEnsembleEntry(1L, Lists.newArrayList(b1, b2, b4)) + .build()); + + // allow recovery to continue + failing.completeExceptionally(new BKException.BKWriteException()); + blocker.complete(null); + + try { + recoveryPromise.get(); + Assert.fail("Should fail on the update"); + } catch (ExecutionException ee) { + Assert.assertEquals(BKException.BKUnexpectedConditionException.class, ee.getCause().getClass()); + } + } + + @Test + public void testRecoveryBookieFailedAtStart() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + clientCtx.getMockRegistrationClient().addBookies(b4).get(); + + Versioned md = setupLedger(clientCtx, 1, Lists.newArrayList(b1, b2, b3)); + + CompletableFuture writingBack = new CompletableFuture<>(); + CompletableFuture blocker = new CompletableFuture<>(); + CompletableFuture failing = new CompletableFuture<>(); + clientCtx.getMockBookieClient().getMockBookies().seedEntries(b1, 1L, 0L, -1L); + clientCtx.getMockBookieClient().errorBookies(b2); + + ReadOnlyLedgerHandle lh = new ReadOnlyLedgerHandle( + clientCtx, 1L, md, BookKeeper.DigestType.CRC32C, PASSWD, false); + + GenericCallbackFuture recoveryPromise = new GenericCallbackFuture<>(); + lh.recover(recoveryPromise, null, false); + recoveryPromise.get(); + + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().size(), 1); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().get(0L), + Lists.newArrayList(b1, b4, b3)); + } + + @Test + public void testRecoveryOneBookieFailsDuring() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + clientCtx.getMockRegistrationClient().addBookies(b4).get(); + + Versioned md = setupLedger(clientCtx, 1, Lists.newArrayList(b1, b2, b3)); + clientCtx.getMockBookieClient().getMockBookies().seedEntries(b1, 1L, 0L, -1L); + clientCtx.getMockBookieClient().getMockBookies().seedEntries(b3, 1L, 1L, -1L); + clientCtx.getMockBookieClient().setPreWriteHook( + (bookie, ledgerId, entryId) -> { + if (bookie.equals(b2) && entryId == 1L) { + return FutureUtils.exception(new BKException.BKWriteException()); + } else { + return FutureUtils.value(null); + } + }); + + ReadOnlyLedgerHandle lh = new ReadOnlyLedgerHandle( + clientCtx, 1L, md, BookKeeper.DigestType.CRC32C, PASSWD, false); + + GenericCallbackFuture recoveryPromise = new GenericCallbackFuture<>(); + lh.recover(recoveryPromise, null, false); + recoveryPromise.get(); + + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().size(), 2); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().get(0L), + Lists.newArrayList(b1, b2, b3)); + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().get(1L), + Lists.newArrayList(b1, b4, b3)); + Assert.assertEquals(lh.getLastAddConfirmed(), 1L); + } + + @Test + public void testRecoveryTwoBookiesFailOnSameEntry() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + clientCtx.getMockRegistrationClient().addBookies(b4, b5).get(); + + Versioned md = setupLedger(clientCtx, 1, Lists.newArrayList(b1, b2, b3)); + clientCtx.getMockBookieClient().getMockBookies().seedEntries(b1, 1L, 0L, -1L); + clientCtx.getMockBookieClient().setPreWriteHook( + (bookie, ledgerId, entryId) -> { + if (bookie.equals(b1) || bookie.equals(b2)) { + return FutureUtils.exception(new BKException.BKWriteException()); + } else { + return FutureUtils.value(null); + } + }); + + ReadOnlyLedgerHandle lh = new ReadOnlyLedgerHandle( + clientCtx, 1L, md, BookKeeper.DigestType.CRC32C, PASSWD, false); + + GenericCallbackFuture recoveryPromise = new GenericCallbackFuture<>(); + lh.recover(recoveryPromise, null, false); + recoveryPromise.get(); + + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().size(), 1); + Assert.assertTrue(lh.getLedgerMetadata().getAllEnsembles().get(0L).contains(b3)); + Assert.assertTrue(lh.getLedgerMetadata().getAllEnsembles().get(0L).contains(b4)); + Assert.assertTrue(lh.getLedgerMetadata().getAllEnsembles().get(0L).contains(b5)); + Assert.assertEquals(lh.getLastAddConfirmed(), 0L); + } + + /** + * This test verifies the fix for the data loss scenario found by the TLA+ specification, specifically + * the invariant violation that metadata and writer can diverge. The scenario is that the original writer + * can commit an entry e that will later be lost because a second writer can close the ledger at e-1. + * The cause is that fencing was originally only performed on LAC reads which is not enough to prevent + * the 1st writer from reaching Ack Quorum after the 2nd writer has closed the ledger. The fix has + * been to fence on recovery reads also. + */ + @Test + public void testFirstWriterCannotCommitWriteAfter2ndWriterCloses() throws Exception { + /* + This test uses CompletableFutures to control the sequence of actions performed by + two writers. There are different sets of futures: + - block*: These futures block the various reads, writes and metadata updates until the + test thread is ready for them to be executed. Thus ensuring the right sequence + of events occur. + - reachedStepN: These futures block in the test thread to ensure that we only unblock + an action when the prior one has been executed and we are already blocked + on the next actionin the sequence. + */ + + // Setup w1 + CompletableFuture reachedStep1 = new CompletableFuture<>(); + CompletableFuture reachedStep2 = new CompletableFuture<>(); + CompletableFuture reachedStep3 = new CompletableFuture<>(); + CompletableFuture reachedStep4 = new CompletableFuture<>(); + CompletableFuture reachedStep5 = new CompletableFuture<>(); + CompletableFuture reachedStep6 = new CompletableFuture<>(); + CompletableFuture reachedStep7 = new CompletableFuture<>(); + CompletableFuture reachedStep8 = new CompletableFuture<>(); + CompletableFuture reachedStep9 = new CompletableFuture<>(); + + MockBookies mockBookies = new MockBookies(); + MockClientContext clientCtx1 = MockClientContext.create(mockBookies); + Versioned md1 = setupLedger(clientCtx1, 1, Lists.newArrayList(b1, b2, b3)); + + CompletableFuture blockB1Write = new CompletableFuture<>(); + CompletableFuture blockB2Write = new CompletableFuture<>(); + CompletableFuture blockB3Write = new CompletableFuture<>(); + clientCtx1.getMockBookieClient().setPreWriteHook( + (bookie, ledgerId, entryId) -> { + // ignore seed entries e0 and e1 + if (entryId < 2) { + return FutureUtils.value(null); + } + + if (!reachedStep1.isDone()) { + reachedStep1.complete(null); + } + + if (bookie.equals(b1)) { + return blockB1Write; + } else if (bookie.equals(b2)) { + reachedStep9.complete(null); + return blockB2Write; + } else if (bookie.equals(b3)) { + reachedStep3.complete(null); + return blockB3Write; + } else { + return FutureUtils.value(null); + } + }); + + LedgerHandle w1 = new LedgerHandle(clientCtx1, 1, md1, + BookKeeper.DigestType.CRC32C, + ClientUtil.PASSWD, WriteFlag.NONE); + w1.addEntry("e0".getBytes(StandardCharsets.UTF_8)); + w1.addEntry("e1".getBytes(StandardCharsets.UTF_8)); + + // Setup w2 + MockClientContext clientCtx2 = MockClientContext.create(mockBookies); + Versioned md2 = setupLedger(clientCtx2, 1, Lists.newArrayList(b1, b2, b3)); + + CompletableFuture blockB1ReadLac = new CompletableFuture<>(); + CompletableFuture blockB2ReadLac = new CompletableFuture<>(); + CompletableFuture blockB3ReadLac = new CompletableFuture<>(); + + CompletableFuture blockB1ReadEntry0 = new CompletableFuture<>(); + CompletableFuture blockB2ReadEntry0 = new CompletableFuture<>(); + CompletableFuture blockB3ReadEntry0 = new CompletableFuture<>(); + + AtomicBoolean isB1LacRead = new AtomicBoolean(true); + AtomicBoolean isB2LacRead = new AtomicBoolean(true); + AtomicBoolean isB3LacRead = new AtomicBoolean(true); + + clientCtx2.getMockBookieClient().setPreReadHook( + (bookie, ledgerId, entryId) -> { + if (bookie.equals(b1)) { + if (isB1LacRead.get()) { + isB1LacRead.set(false); + reachedStep2.complete(null); + return blockB1ReadLac; + } else { + reachedStep6.complete(null); + return blockB1ReadEntry0; + } + } else if (bookie.equals(b2)) { + if (isB2LacRead.get()) { + try { + isB2LacRead.set(false); + reachedStep4.complete(null); + blockB2ReadLac.get(); // block this read - it does not succeed + } catch (Throwable t){} + return FutureUtils.exception(new BKException.BKWriteException()); + } else { + reachedStep7.complete(null); + return blockB2ReadEntry0; + } + } else if (bookie.equals(b3)) { + if (isB3LacRead.get()) { + isB3LacRead.set(false); + reachedStep5.complete(null); + return blockB3ReadLac; + } else { + return blockB3ReadEntry0; + } + } else { + return FutureUtils.value(null); + } + }); + + AtomicInteger w2MetaUpdates = new AtomicInteger(0); + CompletableFuture blockW2StartingRecovery = new CompletableFuture<>(); + CompletableFuture blockW2ClosingLedger = new CompletableFuture<>(); + clientCtx2.getMockLedgerManager().setPreWriteHook((ledgerId, metadata) -> { + if (w2MetaUpdates.get() == 0) { + w2MetaUpdates.incrementAndGet(); + return blockW2StartingRecovery; + } else { + reachedStep8.complete(null); + return blockW2ClosingLedger; + } + }); + + ReadOnlyLedgerHandle w2 = new ReadOnlyLedgerHandle( + clientCtx2, 1L, md2, BookKeeper.DigestType.CRC32C, PASSWD, false); + + // Start an async add entry, blocked for now. + CompletableFuture w1WriteFuture = new CompletableFuture<>(); + AtomicInteger writeResult = new AtomicInteger(0); + w1.asyncAddEntry("e2".getBytes(), (int rc, LedgerHandle lh1, long entryId, Object ctx) -> { + if (rc == BKException.Code.OK) { + writeResult.set(1); + } else { + writeResult.set(2); + } + SyncCallbackUtils.finish(rc, null, w1WriteFuture); + }, null); + + // Step 1. w2 starts recovery + stepBlock(reachedStep1); + GenericCallbackFuture recoveryPromise = new GenericCallbackFuture<>(); + w2.recover(recoveryPromise, null, false); + blockW2StartingRecovery.complete(null); + + // Step 2. w2 fencing read LAC reaches B1 + stepBlock(reachedStep2); + blockB1ReadLac.complete(null); + + // Step 3. w1 add e0 reaches B3 + stepBlock(reachedStep3); + blockB3Write.complete(null); + + // Step 4. w2 fencing LAC read does not reach B2 or it fails + stepBlock(reachedStep4); + blockB2ReadLac.complete(null); + + // Step 5. w2 fencing LAC read reaches B3 + stepBlock(reachedStep5); + blockB3ReadLac.complete(null); + + // Step 6. w2 sends read e0 to b1, gets NoSuchLedger + stepBlock(reachedStep6); + blockB1ReadEntry0.complete(null); + + // Step 7. w2 send read e0 to b2, gets NoSuchLedger + stepBlock(reachedStep7); + blockB2ReadEntry0.complete(null); + + // Step 8. w2 closes ledger because (Qw-Qa)+1 bookies confirmed they do not have it + // last entry id set to 0 + stepBlock(reachedStep8); + blockW2ClosingLedger.complete(null); + + // Step 9. w1 add e0 reaches b2 (which was fenced by a recovery read) + stepBlock(reachedStep9); + blockB2Write.complete(null); + + // Step 10. w1 write fails to reach AckQuorum + try { + w1WriteFuture.get(200, TimeUnit.MILLISECONDS); + Assert.fail("The write to b2 should have failed as it was fenced by the recovery read of step 7"); + } catch (ExecutionException e) { + Assert.assertTrue(e.getCause() instanceof BKException.BKLedgerFencedException); + } + + // w1 received negative acknowledgement of e2 being written + Assert.assertEquals(1, w1.getLedgerMetadata().getAllEnsembles().size()); + Assert.assertEquals(2, writeResult.get()); + Assert.assertEquals(1L, w1.getLastAddConfirmed()); + + // w2 closed the ledger with only the original entries, not the third one + // i.e there is no divergence between w1m, w2 and metadata + Assert.assertEquals(1, w2.getLedgerMetadata().getAllEnsembles().size()); + Assert.assertEquals(1L, w2.getLastAddConfirmed()); + } + + private void stepBlock(CompletableFuture reachedStepFuture) { + try { + reachedStepFuture.get(); + } catch (Exception e) {} + } + + + /* + * This test verifies that an IllegalStateException does not occur during recovery because of an attempt + * to create a new ensemble that has a lower first entry id than an existing ledger. + * + * To reproduce original issue, revert the fix and run this test. + * The fix was to apply max(LAC from current ensemble, (first entry of current ensemble - 1)) as the LAC + * of the recovery phase rather than accept a value of -1 that might be returned by the LAC reads. + */ + @Test + public void testRecoveryWhenSecondEnsembleReturnsLacMinusOne() throws Exception { + MockClientContext clientCtx = MockClientContext.create(); + clientCtx.getMockRegistrationClient().addBookies(b4).get(); + + // at least two non-empty ensembles required as else the first ensemble would + // only be replaced, thus avoiding the issue. + + // initial state: 2 ensembles due to a write failure of e1 to b2 + // ensemble 1 + Versioned md = setupLedger(clientCtx, 1, Lists.newArrayList(b1, b2), 2, 2, 2); + clientCtx.getMockBookieClient().getMockBookies().seedEntries(b1, 1L, 0L, -1L); + clientCtx.getMockBookieClient().getMockBookies().seedEntries(b2, 1L, 0L, -1L); + clientCtx.getMockBookieClient().getMockBookies().seedEntries(b1, 1L, 1L, -1L); + // write to b2 failed, causing ensemble change + + // ensemble 2 - the write of e1 to b2 failed, so new ensemble with b3 created + ClientUtil.transformMetadata(clientCtx, 1L, + (metadata) -> LedgerMetadataBuilder.from(metadata).newEnsembleEntry(1L, Lists.newArrayList(b1, b3)) + .build()); + clientCtx.getMockBookieClient().getMockBookies().seedEntries(b3, 1L, 1L, 0L); + + ReadOnlyLedgerHandle lh = new ReadOnlyLedgerHandle( + clientCtx, 1L, md, BookKeeper.DigestType.CRC32C, PASSWD, false); + + // however, any read or write to b3 fails, which will: + // 1. cause the LAC read to return -1 (b1 has -1) + // 2. cause an ensemble change during recovery write back phase + clientCtx.getMockBookieClient().setPreWriteHook( + (bookie, ledgerId, entryId) -> { + if (bookie.equals(b3)) { + return FutureUtils.exception(new BKException.BKWriteException()); + } else { + return FutureUtils.value(null); + } + }); + + clientCtx.getMockBookieClient().setPreReadHook( + (bookie, ledgerId, entryId) -> { + if (bookie.equals(b3)) { + return FutureUtils.exception(new BKException.BKTimeoutException()); + } else { + return FutureUtils.value(null); + } + }); + + // writer 2 starts recovery (the subject of this test) + // (either the writer failed or simply has not yet sent the pending writes to the new ensemble) + GenericCallbackFuture recoveryPromise = new GenericCallbackFuture<>(); + lh.recover(recoveryPromise, null, false); + recoveryPromise.get(); + + // The recovery process is successfully able to complete recovery, with the expected ensembles. + Assert.assertEquals(lh.getLedgerMetadata().getAllEnsembles().size(), 2); + Assert.assertTrue(lh.getLedgerMetadata().getAllEnsembles().get(0L).contains(b1)); + Assert.assertTrue(lh.getLedgerMetadata().getAllEnsembles().get(0L).contains(b2)); + Assert.assertTrue(lh.getLedgerMetadata().getAllEnsembles().get(1L).contains(b1)); + Assert.assertTrue(lh.getLedgerMetadata().getAllEnsembles().get(1L).contains(b4)); + + // the ledger is closed with entry id 1 + Assert.assertEquals(lh.getLastAddConfirmed(), 1L); + Assert.assertEquals(lh.getLedgerMetadata().getLastEntryId(), 1L); + } +} \ No newline at end of file diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerRecoveryTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerRecoveryTest.java index e1260015344..4a9e65a6534 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerRecoveryTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/LedgerRecoveryTest.java @@ -26,23 +26,21 @@ import static org.junit.Assert.fail; import io.netty.buffer.ByteBuf; - import java.io.IOException; import java.util.Enumeration; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; - import org.apache.bookkeeper.bookie.Bookie; import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.TestBookieImpl; import org.apache.bookkeeper.client.AsyncCallback.AddCallback; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookieProtocol; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.junit.Test; @@ -85,10 +83,8 @@ private void testInternal(int numEntries) throws Exception { /* * Check if has recovered properly. */ - assertTrue("Has not recovered correctly: " + afterlh.getLastAddConfirmed(), - afterlh.getLastAddConfirmed() == numEntries - 1); - assertTrue("Has not set the length correctly: " + afterlh.getLength() + ", " + length, - afterlh.getLength() == length); + assertEquals("Has not recovered correctly", numEntries - 1, afterlh.getLastAddConfirmed()); + assertEquals("Has not set the length correctly", length, afterlh.getLength()); } @Test @@ -145,8 +141,7 @@ public void testLedgerRecoveryWithNotEnoughBookies() throws Exception { } // shutdown first bookie server - bs.get(0).shutdown(); - bs.remove(0); + killBookie(0); /* * Try to open ledger. @@ -188,18 +183,17 @@ private void ledgerRecoveryWithSlowBookie(int ensembleSize, int writeQuorumSize, // kill first bookie server to start a fake one to simulate a slow bookie // and failed to add entry on crash // until write succeed - BookieSocketAddress host = beforelh.getCurrentEnsemble().get(slowBookieIdx); + BookieId host = beforelh.getCurrentEnsemble().get(slowBookieIdx); ServerConfiguration conf = killBookie(host); - Bookie fakeBookie = new Bookie(conf) { + Bookie fakeBookie = new TestBookieImpl(conf) { @Override public void addEntry(ByteBuf entry, boolean ackBeforeSync, WriteCallback cb, Object ctx, byte[] masterKey) throws IOException, BookieException { // drop request to simulate a slow and failed bookie } }; - bsConfs.add(conf); - bs.add(startBookie(conf, fakeBookie)); + startAndAddBookie(conf, fakeBookie); // avoid not-enough-bookies case startNewBookie(); @@ -211,9 +205,8 @@ public void addEntry(ByteBuf entry, boolean ackBeforeSync, WriteCallback cb, Obj } conf = killBookie(host); - bsConfs.add(conf); // the bookie goes normally - bs.add(startBookie(conf)); + startAndAddBookie(conf); /* * Try to open ledger. @@ -250,7 +243,7 @@ public void testLedgerRecoveryWithRollingRestart() throws Exception { // Add a dead bookie to the cluster ServerConfiguration conf = newServerConfiguration(); - Bookie deadBookie1 = new Bookie(conf) { + Bookie deadBookie1 = new TestBookieImpl(conf) { @Override public void recoveryAddEntry(ByteBuf entry, WriteCallback cb, Object ctx, byte[] masterKey) throws IOException, BookieException { @@ -258,11 +251,10 @@ public void recoveryAddEntry(ByteBuf entry, WriteCallback cb, Object ctx, byte[] throw new IOException("Couldn't write for some reason"); } }; - bsConfs.add(conf); - bs.add(startBookie(conf, deadBookie1)); + startAndAddBookie(conf, deadBookie1); // kill first bookie server - BookieSocketAddress bookie1 = lhbefore.getCurrentEnsemble().get(0); + BookieId bookie1 = lhbefore.getCurrentEnsemble().get(0); ServerConfiguration conf1 = killBookie(bookie1); // Try to recover and fence the ledger after killing one bookie in the @@ -275,9 +267,8 @@ public void recoveryAddEntry(ByteBuf entry, WriteCallback cb, Object ctx, byte[] } // restart the first server, kill the second - bsConfs.add(conf1); - bs.add(startBookie(conf1)); - BookieSocketAddress bookie2 = lhbefore.getCurrentEnsemble().get(1); + startAndAddBookie(conf1); + BookieId bookie2 = lhbefore.getCurrentEnsemble().get(1); ServerConfiguration conf2 = killBookie(bookie2); // using async, because this could trigger an assertion @@ -300,8 +291,7 @@ public void openComplete(int rc, LedgerHandle lh, Object ctx) { assertTrue("Open call should have completed", openLatch.await(5, TimeUnit.SECONDS)); assertFalse("Open should not have succeeded", returnCode.get() == BKException.Code.OK); - bsConfs.add(conf2); - bs.add(startBookie(conf2)); + startAndAddBookie(conf2); LedgerHandle lhafter = bkc.openLedger(lhbefore.getId(), digestType, "".getBytes()); @@ -331,7 +321,7 @@ public void testBookieFailureDuringRecovery() throws Exception { // Add a dead bookie to the cluster ServerConfiguration conf = newServerConfiguration(); - Bookie deadBookie1 = new Bookie(conf) { + Bookie deadBookie1 = new TestBookieImpl(conf) { @Override public void recoveryAddEntry(ByteBuf entry, WriteCallback cb, Object ctx, byte[] masterKey) throws IOException, BookieException { @@ -339,11 +329,10 @@ public void recoveryAddEntry(ByteBuf entry, WriteCallback cb, Object ctx, byte[] throw new IOException("Couldn't write for some reason"); } }; - bsConfs.add(conf); - bs.add(startBookie(conf, deadBookie1)); + startAndAddBookie(conf, deadBookie1); // kill first bookie server - BookieSocketAddress bookie1 = lhbefore.getCurrentEnsemble().get(0); + BookieId bookie1 = lhbefore.getCurrentEnsemble().get(0); killBookie(bookie1); // Try to recover and fence the ledger after killing one bookie in the @@ -357,7 +346,6 @@ public void recoveryAddEntry(ByteBuf entry, WriteCallback cb, Object ctx, byte[] // start a new good server startNewBookie(); - LedgerHandle lhafter = bkc.openLedger(lhbefore.getId(), digestType, "".getBytes()); assertEquals("Fenced ledger should have correct lastAddConfirmed", @@ -395,9 +383,9 @@ public void addComplete(int rc, LedgerHandle lh, long entryId, Object ctx) { fail("Failed to add " + numEntries + " to ledger handle " + lh.getId()); } // kill first 2 bookies to replace bookies - BookieSocketAddress bookie1 = lh.getCurrentEnsemble().get(0); + BookieId bookie1 = lh.getCurrentEnsemble().get(0); ServerConfiguration conf1 = killBookie(bookie1); - BookieSocketAddress bookie2 = lh.getCurrentEnsemble().get(1); + BookieId bookie2 = lh.getCurrentEnsemble().get(1); ServerConfiguration conf2 = killBookie(bookie2); // replace these two bookies @@ -415,7 +403,7 @@ public void addComplete(int rc, LedgerHandle lh, long entryId, Object ctx) { } private void startDeadBookie(ServerConfiguration conf) throws Exception { - Bookie rBookie = new Bookie(conf) { + Bookie rBookie = new TestBookieImpl(conf) { @Override public void recoveryAddEntry(ByteBuf entry, WriteCallback cb, Object ctx, byte[] masterKey) throws IOException, BookieException { @@ -423,8 +411,7 @@ public void recoveryAddEntry(ByteBuf entry, WriteCallback cb, Object ctx, byte[] throw new IOException("Couldn't write entries for some reason"); } }; - bsConfs.add(conf); - bs.add(startBookie(conf, rBookie)); + startAndAddBookie(conf, rBookie); } @Test @@ -479,24 +466,13 @@ public void addComplete(int rc, LedgerHandle lh, long entryId, Object ctx) { LedgerHandle recoverLh = newBk.openLedgerNoRecovery(lh.getId(), digestType, "".getBytes()); assertEquals(BookieProtocol.INVALID_ENTRY_ID, recoverLh.getLastAddConfirmed()); - final CountDownLatch recoverLatch = new CountDownLatch(1); - final AtomicBoolean success = new AtomicBoolean(false); - MockClientContext parallelReadCtx = MockClientContext.copyOf(bkc.getClientCtx()) .setConf(ClientInternalConf.fromConfig(newConf.setEnableParallelRecoveryRead(true))); - LedgerRecoveryOp recoveryOp = new LedgerRecoveryOp( - recoverLh, parallelReadCtx, - new BookkeeperInternalCallbacks.GenericCallback() { - @Override - public void operationComplete(int rc, Void result) { - success.set(BKException.Code.OK == rc); - recoverLatch.countDown(); - } - }); - recoveryOp.initiate(); - recoverLatch.await(10, TimeUnit.SECONDS); - assertTrue(success.get()); + LedgerRecoveryOp recoveryOp = new LedgerRecoveryOp(recoverLh, parallelReadCtx); + CompletableFuture f = recoveryOp.initiate(); + f.get(10, TimeUnit.SECONDS); + assertEquals(numEntries, recoveryOp.readCount.get()); assertEquals(numEntries, recoveryOp.writeCount.get()); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ListLedgersTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ListLedgersTest.java index eef3e6d340c..8bb23684bc7 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ListLedgersTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ListLedgersTest.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with this * work for additional information regarding copyright ownership. The ASF @@ -21,7 +21,6 @@ import static org.junit.Assert.fail; import java.util.Iterator; - import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MdcContextTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MdcContextTest.java new file mode 100644 index 00000000000..2a7a15ca253 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MdcContextTest.java @@ -0,0 +1,252 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.bookkeeper.client; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.hasItem; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.fail; +import static org.mockito.AdditionalAnswers.answerVoid; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.spy; + +import java.io.File; +import java.util.Queue; +import java.util.UUID; +import java.util.concurrent.ConcurrentLinkedQueue; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.InterleavedLedgerStorage; +import org.apache.bookkeeper.bookie.LedgerDirsManager; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.apache.logging.log4j.ThreadContext; +import org.apache.logging.log4j.core.LogEvent; +import org.apache.logging.log4j.core.LoggerContext; +import org.apache.logging.log4j.core.appender.NullAppender; +import org.hamcrest.CoreMatchers; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + + +/** + * Test passing of MDC context. + */ +@SuppressWarnings("deprecation") +@Slf4j +public class MdcContextTest extends BookKeeperClusterTestCase { + public static final String MDC_REQUEST_ID = "request_id"; + + final byte[] entry = "Test Entry".getBytes(); + + BookKeeper bkc; + LedgerHandle lh; + + private NullAppender mockAppender; + private Queue capturedEvents; + + public MdcContextTest() { + super(3); + baseConf.setNumAddWorkerThreads(0); + baseConf.setNumReadWorkerThreads(0); + baseConf.setPreserveMdcForTaskExecution(true); + baseConf.setReadOnlyModeEnabled(true); + + // for read-only bookie + baseConf.setLedgerStorageClass(InterleavedLedgerStorage.class.getName()); + baseConf.setEntryLogFilePreAllocationEnabled(false); + baseConf.setMinUsableSizeForEntryLogCreation(Long.MAX_VALUE); + } + + + public static String mdcFormat(Object mdc, String message) { + return mdc == null + ? "[" + MDC_REQUEST_ID + ":] - " + message + : "[" + MDC_REQUEST_ID + ":" + mdc + + "] - " + message; + } + + public void assertLogWithMdc(String mdc, String msgSubstring) { + assertThat(capturedEvents, + hasItem(CoreMatchers.allOf( + containsString("[" + MDC_REQUEST_ID + ":" + mdc + "] - "), + containsString(msgSubstring) + ))); + } + + @Before + public void setUp() throws Exception { + super.setUp(); + ClientConfiguration conf = new ClientConfiguration(); + conf.setReadTimeout(360) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()) + .setPreserveMdcForTaskExecution(true); + + ThreadContext.clearMap(); + bkc = new BookKeeper(conf); + + ThreadContext.put(MDC_REQUEST_ID, "ledger_create"); + log.info("creating ledger"); + lh = bkc.createLedgerAdv(3, 3, 3, BookKeeper.DigestType.CRC32, new byte[] {}); + ThreadContext.clearMap(); + + LoggerContext lc = (LoggerContext) org.apache.logging.log4j.LogManager.getContext(false); + mockAppender = spy(NullAppender.createAppender(UUID.randomUUID().toString())); + mockAppender.start(); + lc.getConfiguration().addAppender(mockAppender); + lc.getRootLogger().addAppender(lc.getConfiguration().getAppender(mockAppender.getName())); + lc.getConfiguration().getRootLogger().setLevel(org.apache.logging.log4j.Level.INFO); + lc.updateLoggers(); + + capturedEvents = new ConcurrentLinkedQueue<>(); + + doAnswer(answerVoid((LogEvent event) -> capturedEvents.add( + mdcFormat(event.getContextData().getValue(MDC_REQUEST_ID), event.getMessage().getFormattedMessage()) + ))).when(mockAppender).append(any()); + } + + @After + public void tearDown() throws Exception { + lh.close(); + bkc.close(); + LoggerContext lc = (LoggerContext) org.apache.logging.log4j.LogManager.getContext(false); + lc.getRootLogger().removeAppender(lc.getConfiguration().getAppender(mockAppender.getName())); + lc.updateLoggers(); + capturedEvents = null; + ThreadContext.clearMap(); + super.tearDown(); + } + + @Test + public void testLedgerCreateFails() throws Exception { + ThreadContext.put(MDC_REQUEST_ID, "ledger_create_fail"); + try { + bkc.createLedgerAdv(99, 3, 2, BookKeeper.DigestType.CRC32, new byte[]{}); + Assert.fail("should not get here"); + } catch (BKException bke) { + // expected + } + assertLogWithMdc("ledger_create_fail", "Not enough bookies to create ledger"); + } + + @Test + public void testSimpleAdd() throws Exception { + ThreadContext.put(MDC_REQUEST_ID, "ledger_add_entry"); + lh.addEntry(0, entry); + + // client msg + assertLogWithMdc("ledger_add_entry", "Successfully connected to bookie"); + // bookie msg + assertLogWithMdc("ledger_add_entry", "Created new entry log file"); + } + + @Test + public void testAddWithEnsembleChange() throws Exception { + lh.addEntry(0, entry); + startNewBookie(); + killBookie(0); + + ThreadContext.put(MDC_REQUEST_ID, "ledger_add_entry"); + lh.addEntry(1, entry); + assertLogWithMdc("ledger_add_entry", "Could not connect to bookie"); + assertLogWithMdc("ledger_add_entry", "Failed to write entry"); + //commented out until we figure out a way to preserve MDC through a call out + //to another thread pool + //assertLogWithMdc("ledger_add_entry", "New Ensemble"); + } + + @Test + public void testAddFailsWithReadOnlyBookie() throws Exception { + for (int i = 0; i < 3; ++i) { + Bookie bookie = serverByIndex(i).getBookie(); + File[] ledgerDirs = confByIndex(i).getLedgerDirs(); + LedgerDirsManager ledgerDirsManager = ((BookieImpl) bookie).getLedgerDirsManager(); + ledgerDirsManager.addToFilledDirs(new File(ledgerDirs[0], "current")); + } + + ThreadContext.put(MDC_REQUEST_ID, "ledger_add_entry"); + try { + lh.addEntry(0, entry); + Assert.fail("should not get here"); + } catch (BKException bke) { + // expected, pass + } + + assertLogWithMdc("ledger_add_entry", "No writable ledger dirs below diskUsageThreshold"); + assertLogWithMdc("ledger_add_entry", "All ledger directories are non writable and no reserved space"); + assertLogWithMdc("ledger_add_entry", "Error writing entry:0 to ledger:0"); + assertLogWithMdc("ledger_add_entry", "Add for failed on bookie"); + assertLogWithMdc("ledger_add_entry", "Failed to find 1 bookies"); + assertLogWithMdc("ledger_add_entry", "Closing ledger 0 due to NotEnoughBookiesException"); + } + + @Test + public void testAddFailsDuplicateEntry() throws Exception { + lh.addEntry(0, entry); + + ThreadContext.put(MDC_REQUEST_ID, "ledger_add_duplicate_entry"); + try { + lh.addEntry(0, entry); + Assert.fail("should not get here"); + } catch (BKException bke) { + // expected, pass + } + + assertLogWithMdc("ledger_add_duplicate_entry", "Trying to re-add duplicate entryid:0"); + assertLogWithMdc("ledger_add_duplicate_entry", "Write of ledger entry to quorum failed"); + } + + @Test + public void testReadEntryBeyondLac() throws Exception { + ThreadContext.put(MDC_REQUEST_ID, "ledger_read_entry"); + + try { + lh.readEntries(100, 100); + fail("should not get here"); + } catch (BKException.BKReadException e) { + // pass + } + assertLogWithMdc("ledger_read_entry", "ReadEntries exception on ledgerId:0 firstEntry:100 lastEntry:100"); + } + + @Test + public void testReadFromDeletedLedger() throws Exception { + lh.addEntry(0, entry); + lh.close(); + bkc.deleteLedger(lh.ledgerId); + + ThreadContext.put(MDC_REQUEST_ID, "ledger_read_entry"); + + try { + lh.readEntries(100, 100); + fail("should not get here"); + } catch (BKException.BKReadException e) { + // pass + } + assertLogWithMdc("ledger_read_entry", "ReadEntries exception on ledgerId:0 firstEntry:100 lastEntry:100"); + } + +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MetadataUpdateLoopTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MetadataUpdateLoopTest.java index 5ed75ce9770..ed97b4af52a 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MetadataUpdateLoopTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MetadataUpdateLoopTest.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -27,29 +27,27 @@ import com.google.common.collect.Lists; import com.google.common.util.concurrent.ThreadFactoryBuilder; - import java.util.List; - import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.atomic.AtomicReference; - import java.util.stream.Collectors; import java.util.stream.IntStream; - +import lombok.AllArgsConstructor; +import lombok.Data; +import org.apache.bookkeeper.client.api.DigestType; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.meta.LedgerManager; import org.apache.bookkeeper.meta.MockLedgerManager; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.BookieSocketAddress; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallbackFuture; import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; import org.apache.commons.lang3.tuple.Triple; - import org.junit.Assert; import org.junit.Test; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -65,28 +63,29 @@ public class MetadataUpdateLoopTest { @Test public void testBasicUpdate() throws Exception { try (LedgerManager lm = new MockLedgerManager()) { - LedgerMetadata initMeta = LedgerMetadataBuilder.create().withEnsembleSize(5) - .newEnsembleEntry(0L, Lists.newArrayList( - new BookieSocketAddress("0.0.0.0:3181"), - new BookieSocketAddress("0.0.0.1:3181"), - new BookieSocketAddress("0.0.0.2:3181"), - new BookieSocketAddress("0.0.0.3:3181"), - new BookieSocketAddress("0.0.0.4:3181"))).build(); - GenericCallbackFuture promise = new GenericCallbackFuture<>(); long ledgerId = 1234L; - lm.createLedgerMetadata(ledgerId, initMeta, promise); - LedgerMetadata writtenMetadata = promise.get(); + LedgerMetadata initMeta = LedgerMetadataBuilder.create() + .withId(ledgerId) + .withEnsembleSize(5) + .withDigestType(DigestType.CRC32C).withPassword(new byte[0]) + .newEnsembleEntry(0L, Lists.newArrayList(BookieId.parse("0.0.0.0:3181"), + BookieId.parse("0.0.0.1:3181"), + BookieId.parse("0.0.0.2:3181"), + BookieId.parse("0.0.0.3:3181"), + BookieId.parse("0.0.0.4:3181"))).build(); + + Versioned writtenMetadata = lm.createLedgerMetadata(ledgerId, initMeta).get(); - AtomicReference reference = new AtomicReference<>(writtenMetadata); + AtomicReference> reference = new AtomicReference<>(writtenMetadata); - BookieSocketAddress newAddress = new BookieSocketAddress("0.0.0.5:3181"); + BookieId newAddress = BookieId.parse("0.0.0.5:3181"); MetadataUpdateLoop loop = new MetadataUpdateLoop( lm, ledgerId, reference::get, (currentMetadata) -> true, (currentMetadata) -> { - List ensemble = Lists.newArrayList(currentMetadata.getEnsemble(0L)); + List ensemble = Lists.newArrayList(currentMetadata.getEnsembleAt(0L)); ensemble.set(0, newAddress); return LedgerMetadataBuilder.from(currentMetadata).replaceEnsembleEntry(0L, ensemble).build(); }, @@ -94,12 +93,12 @@ public void testBasicUpdate() throws Exception { loop.run().get(); Assert.assertNotEquals(reference.get(), writtenMetadata); - Assert.assertEquals(reference.get().getEnsemble(0L).get(0), newAddress); + Assert.assertEquals(reference.get().getValue().getEnsembleAt(0L).get(0), newAddress); } } /** - * Test that when 2 update loops conflict when making diffent updates to the metadata, + * Test that when 2 update loops conflict when making different updates to the metadata, * both will eventually succeed, and both updates will be reflected in the final metadata. */ @Test @@ -108,38 +107,38 @@ public void testConflictOnWrite() throws Exception { lm.blockWrites(); long ledgerId = 1234L; - BookieSocketAddress b0 = new BookieSocketAddress("0.0.0.0:3181"); - BookieSocketAddress b1 = new BookieSocketAddress("0.0.0.1:3181"); - BookieSocketAddress b2 = new BookieSocketAddress("0.0.0.2:3181"); - BookieSocketAddress b3 = new BookieSocketAddress("0.0.0.3:3181"); - - LedgerMetadata initMeta = LedgerMetadataBuilder.create().withEnsembleSize(2) - .newEnsembleEntry(0L, Lists.newArrayList(b0, b1)).build(); - GenericCallbackFuture promise = new GenericCallbackFuture<>(); - lm.createLedgerMetadata(ledgerId, initMeta, promise); - LedgerMetadata writtenMetadata = promise.get(); - - AtomicReference reference1 = new AtomicReference<>(writtenMetadata); - CompletableFuture loop1 = new MetadataUpdateLoop( + BookieId b0 = BookieId.parse("0.0.0.0:3181"); + BookieId b1 = BookieId.parse("0.0.0.1:3181"); + BookieId b2 = BookieId.parse("0.0.0.2:3181"); + BookieId b3 = BookieId.parse("0.0.0.3:3181"); + + LedgerMetadata initMeta = LedgerMetadataBuilder.create().withEnsembleSize(2).withId(ledgerId) + .withDigestType(DigestType.CRC32C).withPassword(new byte[0]) + .withWriteQuorumSize(2).newEnsembleEntry(0L, Lists.newArrayList(b0, b1)).build(); + Versioned writtenMetadata = + lm.createLedgerMetadata(ledgerId, initMeta).get(); + + AtomicReference> reference1 = new AtomicReference<>(writtenMetadata); + CompletableFuture> loop1 = new MetadataUpdateLoop( lm, ledgerId, reference1::get, - (currentMetadata) -> currentMetadata.getEnsemble(0L).contains(b0), + (currentMetadata) -> currentMetadata.getEnsembleAt(0L).contains(b0), (currentMetadata) -> { - List ensemble = Lists.newArrayList(currentMetadata.getEnsemble(0L)); + List ensemble = Lists.newArrayList(currentMetadata.getEnsembleAt(0L)); ensemble.set(0, b2); return LedgerMetadataBuilder.from(currentMetadata).replaceEnsembleEntry(0L, ensemble).build(); }, reference1::compareAndSet).run(); - AtomicReference reference2 = new AtomicReference<>(writtenMetadata); - CompletableFuture loop2 = new MetadataUpdateLoop( + AtomicReference> reference2 = new AtomicReference<>(writtenMetadata); + CompletableFuture> loop2 = new MetadataUpdateLoop( lm, ledgerId, reference2::get, - (currentMetadata) -> currentMetadata.getEnsemble(0L).contains(b1), + (currentMetadata) -> currentMetadata.getEnsembleAt(0L).contains(b1), (currentMetadata) -> { - List ensemble = Lists.newArrayList(currentMetadata.getEnsemble(0L)); + List ensemble = Lists.newArrayList(currentMetadata.getEnsembleAt(0L)); ensemble.set(1, b3); return LedgerMetadataBuilder.from(currentMetadata).replaceEnsembleEntry(0L, ensemble).build(); }, @@ -147,19 +146,19 @@ public void testConflictOnWrite() throws Exception { lm.releaseWrites(); - LedgerMetadata l1meta = loop1.get(); - LedgerMetadata l2meta = loop2.get(); + Versioned l1meta = loop1.get(); + Versioned l2meta = loop2.get(); Assert.assertEquals(l1meta, reference1.get()); Assert.assertEquals(l2meta, reference2.get()); Assert.assertEquals(l1meta.getVersion().compare(l2meta.getVersion()), Version.Occurred.BEFORE); - Assert.assertEquals(l1meta.getEnsemble(0L).get(0), b2); - Assert.assertEquals(l1meta.getEnsemble(0L).get(1), b1); + Assert.assertEquals(l1meta.getValue().getEnsembleAt(0L).get(0), b2); + Assert.assertEquals(l1meta.getValue().getEnsembleAt(0L).get(1), b1); - Assert.assertEquals(l2meta.getEnsemble(0L).get(0), b2); - Assert.assertEquals(l2meta.getEnsemble(0L).get(1), b3); + Assert.assertEquals(l2meta.getValue().getEnsembleAt(0L).get(0), b2); + Assert.assertEquals(l2meta.getValue().getEnsembleAt(0L).get(1), b3); verify(lm, times(3)).writeLedgerMetadata(anyLong(), any(), any()); } @@ -176,36 +175,34 @@ public void testConflictOnWriteBothWritingSame() throws Exception { lm.blockWrites(); long ledgerId = 1234L; - BookieSocketAddress b0 = new BookieSocketAddress("0.0.0.0:3181"); - BookieSocketAddress b1 = new BookieSocketAddress("0.0.0.1:3181"); - BookieSocketAddress b2 = new BookieSocketAddress("0.0.0.2:3181"); + BookieId b0 = BookieId.parse("0.0.0.0:3181"); + BookieId b1 = BookieId.parse("0.0.0.1:3181"); + BookieId b2 = BookieId.parse("0.0.0.2:3181"); - LedgerMetadata initMeta = LedgerMetadataBuilder.create().withEnsembleSize(2) - .newEnsembleEntry(0L, Lists.newArrayList(b0, b1)).build(); - GenericCallbackFuture promise = new GenericCallbackFuture<>(); - lm.createLedgerMetadata(ledgerId, initMeta, promise); - LedgerMetadata writtenMetadata = promise.get(); + LedgerMetadata initMeta = LedgerMetadataBuilder.create().withEnsembleSize(2).withId(ledgerId) + .withDigestType(DigestType.CRC32C).withPassword(new byte[0]) + .withWriteQuorumSize(2).newEnsembleEntry(0L, Lists.newArrayList(b0, b1)).build(); + Versioned writtenMetadata = lm.createLedgerMetadata(ledgerId, initMeta).get(); + AtomicReference> reference = new AtomicReference<>(writtenMetadata); - AtomicReference reference = new AtomicReference<>(writtenMetadata); - - CompletableFuture loop1 = new MetadataUpdateLoop( + CompletableFuture> loop1 = new MetadataUpdateLoop( lm, ledgerId, reference::get, - (currentMetadata) -> currentMetadata.getEnsemble(0L).contains(b0), + (currentMetadata) -> currentMetadata.getEnsembleAt(0L).contains(b0), (currentMetadata) -> { - List ensemble = Lists.newArrayList(currentMetadata.getEnsemble(0L)); + List ensemble = Lists.newArrayList(currentMetadata.getEnsembleAt(0L)); ensemble.set(0, b2); return LedgerMetadataBuilder.from(currentMetadata).replaceEnsembleEntry(0L, ensemble).build(); }, reference::compareAndSet).run(); - CompletableFuture loop2 = new MetadataUpdateLoop( + CompletableFuture> loop2 = new MetadataUpdateLoop( lm, ledgerId, reference::get, - (currentMetadata) -> currentMetadata.getEnsemble(0L).contains(b0), + (currentMetadata) -> currentMetadata.getEnsembleAt(0L).contains(b0), (currentMetadata) -> { - List ensemble = Lists.newArrayList(currentMetadata.getEnsemble(0L)); + List ensemble = Lists.newArrayList(currentMetadata.getEnsembleAt(0L)); ensemble.set(0, b2); return LedgerMetadataBuilder.from(currentMetadata).replaceEnsembleEntry(0L, ensemble).build(); }, @@ -216,8 +213,8 @@ public void testConflictOnWriteBothWritingSame() throws Exception { Assert.assertEquals(loop1.get(), loop2.get()); Assert.assertEquals(loop1.get(), reference.get()); - Assert.assertEquals(reference.get().getEnsemble(0L).get(0), b2); - Assert.assertEquals(reference.get().getEnsemble(0L).get(1), b1); + Assert.assertEquals(reference.get().getValue().getEnsembleAt(0L).get(0), b2); + Assert.assertEquals(reference.get().getValue().getEnsembleAt(0L).get(1), b1); verify(lm, times(2)).writeLedgerMetadata(anyLong(), any(), any()); } @@ -231,39 +228,37 @@ public void testConflictOnWriteBothWritingSame() throws Exception { public void testConflictOnLocalUpdate() throws Exception { try (DeferCallbacksMockLedgerManager lm = spy(new DeferCallbacksMockLedgerManager(1))) { long ledgerId = 1234L; - BookieSocketAddress b0 = new BookieSocketAddress("0.0.0.0:3181"); - BookieSocketAddress b1 = new BookieSocketAddress("0.0.0.1:3181"); - BookieSocketAddress b2 = new BookieSocketAddress("0.0.0.2:3181"); - BookieSocketAddress b3 = new BookieSocketAddress("0.0.0.3:3181"); - - LedgerMetadata initMeta = LedgerMetadataBuilder.create().withEnsembleSize(2) - .newEnsembleEntry(0L, Lists.newArrayList(b0, b1)).build(); - GenericCallbackFuture promise = new GenericCallbackFuture<>(); - lm.createLedgerMetadata(ledgerId, initMeta, promise); - LedgerMetadata writtenMetadata = promise.get(); - - AtomicReference reference = new AtomicReference<>(writtenMetadata); - - CompletableFuture loop1 = new MetadataUpdateLoop( + BookieId b0 = BookieId.parse("0.0.0.0:3181"); + BookieId b1 = BookieId.parse("0.0.0.1:3181"); + BookieId b2 = BookieId.parse("0.0.0.2:3181"); + BookieId b3 = BookieId.parse("0.0.0.3:3181"); + + LedgerMetadata initMeta = LedgerMetadataBuilder.create().withEnsembleSize(2).withId(ledgerId) + .withDigestType(DigestType.CRC32C).withPassword(new byte[0]) + .withWriteQuorumSize(2).newEnsembleEntry(0L, Lists.newArrayList(b0, b1)).build(); + Versioned writtenMetadata = lm.createLedgerMetadata(ledgerId, initMeta).get(); + AtomicReference> reference = new AtomicReference<>(writtenMetadata); + + CompletableFuture> loop1 = new MetadataUpdateLoop( lm, ledgerId, reference::get, - (currentMetadata) -> currentMetadata.getEnsemble(0L).contains(b0), + (currentMetadata) -> currentMetadata.getEnsembleAt(0L).contains(b0), (currentMetadata) -> { - List ensemble = Lists.newArrayList(currentMetadata.getEnsemble(0L)); + List ensemble = Lists.newArrayList(currentMetadata.getEnsembleAt(0L)); ensemble.set(0, b2); return LedgerMetadataBuilder.from(currentMetadata).replaceEnsembleEntry(0L, ensemble).build(); }, reference::compareAndSet).run(); lm.waitForWriteCount(1); - CompletableFuture loop2 = new MetadataUpdateLoop( + CompletableFuture> loop2 = new MetadataUpdateLoop( lm, ledgerId, reference::get, - (currentMetadata) -> currentMetadata.getEnsemble(0L).contains(b1), + (currentMetadata) -> currentMetadata.getEnsembleAt(0L).contains(b1), (currentMetadata) -> { - List ensemble = Lists.newArrayList(currentMetadata.getEnsemble(0L)); + List ensemble = Lists.newArrayList(currentMetadata.getEnsembleAt(0L)); ensemble.set(1, b3); return LedgerMetadataBuilder.from(currentMetadata).replaceEnsembleEntry(0L, ensemble).build(); }, @@ -274,16 +269,16 @@ public void testConflictOnLocalUpdate() throws Exception { Assert.assertEquals(loop1.get(), reference.get()); - Assert.assertEquals(reference.get().getEnsemble(0L).get(0), b2); - Assert.assertEquals(reference.get().getEnsemble(0L).get(1), b3); + Assert.assertEquals(reference.get().getValue().getEnsembleAt(0L).get(0), b2); + Assert.assertEquals(reference.get().getValue().getEnsembleAt(0L).get(1), b3); verify(lm, times(3)).writeLedgerMetadata(anyLong(), any(), any()); } } - private static BookieSocketAddress address(String s) { + private static BookieId address(String s) { try { - return new BookieSocketAddress(s); + return BookieId.parse(s); } catch (Exception e) { throw new RuntimeException(e); } @@ -300,30 +295,29 @@ public void testHammer() throws Exception { long ledgerId = 1234L; int ensembleSize = 100; - List initialEnsemble = IntStream.range(0, ensembleSize) + List initialEnsemble = IntStream.range(0, ensembleSize) .mapToObj((i) -> address(String.format("0.0.0.%d:3181", i))) .collect(Collectors.toList()); - LedgerMetadata initMeta = LedgerMetadataBuilder.create().withEnsembleSize(ensembleSize) + LedgerMetadata initMeta = LedgerMetadataBuilder.create().withEnsembleSize(ensembleSize).withId(ledgerId) + .withDigestType(DigestType.CRC32C).withPassword(new byte[0]) .newEnsembleEntry(0L, initialEnsemble).build(); - GenericCallbackFuture promise = new GenericCallbackFuture<>(); - lm.createLedgerMetadata(ledgerId, initMeta, promise); - LedgerMetadata writtenMetadata = promise.get(); + Versioned writtenMetadata = lm.createLedgerMetadata(ledgerId, initMeta).get(); - AtomicReference reference = new AtomicReference<>(writtenMetadata); + AtomicReference> reference = new AtomicReference<>(writtenMetadata); - List replacementBookies = IntStream.range(0, ensembleSize) + List replacementBookies = IntStream.range(0, ensembleSize) .mapToObj((i) -> address(String.format("0.0.%d.1:3181", i))) .collect(Collectors.toList()); - List> loops = IntStream.range(0, ensembleSize) + List>> loops = IntStream.range(0, ensembleSize) .mapToObj((i) -> new MetadataUpdateLoop( lm, ledgerId, reference::get, - (currentMetadata) -> currentMetadata.getEnsemble(0L).contains(initialEnsemble.get(i)), + (currentMetadata) -> currentMetadata.getEnsembleAt(0L).contains(initialEnsemble.get(i)), (currentMetadata) -> { - List ensemble = Lists.newArrayList(currentMetadata.getEnsemble(0L)); + List ensemble = Lists.newArrayList(currentMetadata.getEnsembleAt(0L)); ensemble.set(i, replacementBookies.get(i)); return LedgerMetadataBuilder.from(currentMetadata).replaceEnsembleEntry(0L, ensemble).build(); }, @@ -332,7 +326,7 @@ public void testHammer() throws Exception { loops.forEach((l) -> l.join()); - Assert.assertEquals(reference.get().getEnsemble(0L), replacementBookies); + Assert.assertEquals(reference.get().getValue().getEnsembleAt(0L), replacementBookies); } } @@ -346,24 +340,27 @@ public void testNewestValueCannotBeUsedAfterReadBack() throws Exception { lm.blockWrites(); long ledgerId = 1234L; - BookieSocketAddress b0 = new BookieSocketAddress("0.0.0.0:3181"); - BookieSocketAddress b1 = new BookieSocketAddress("0.0.0.1:3181"); + BookieId b0 = new BookieSocketAddress("0.0.0.0:3181").toBookieId(); + BookieId b1 = new BookieSocketAddress("0.0.0.1:3181").toBookieId(); - LedgerMetadata initMeta = LedgerMetadataBuilder.create().withEnsembleSize(1) + LedgerMetadata initMeta = LedgerMetadataBuilder.create().withEnsembleSize(1).withId(ledgerId) + .withDigestType(DigestType.CRC32C).withPassword(new byte[0]) + .withWriteQuorumSize(1).withAckQuorumSize(1) .newEnsembleEntry(0L, Lists.newArrayList(b0)).build(); - GenericCallbackFuture promise = new GenericCallbackFuture<>(); - lm.createLedgerMetadata(ledgerId, initMeta, promise); - LedgerMetadata writtenMetadata = promise.get(); + Versioned writtenMetadata = lm.createLedgerMetadata(ledgerId, initMeta).get(); - AtomicReference reference = new AtomicReference<>(writtenMetadata); - CompletableFuture loop1 = new MetadataUpdateLoop( + AtomicReference> reference = new AtomicReference<>(writtenMetadata); + CompletableFuture> loop1 = new MetadataUpdateLoop( lm, ledgerId, reference::get, (currentMetadata) -> !currentMetadata.isClosed(), - (currentMetadata) -> LedgerMetadataBuilder.from(currentMetadata).closingAtEntry(10L).build(), + (currentMetadata) -> { + return LedgerMetadataBuilder.from(currentMetadata) + .withClosedState().withLastEntryId(10L).withLength(100L).build(); + }, reference::compareAndSet).run(); - CompletableFuture loop2 = new MetadataUpdateLoop( + CompletableFuture> loop2 = new MetadataUpdateLoop( lm, ledgerId, reference::get, @@ -371,18 +368,18 @@ public void testNewestValueCannotBeUsedAfterReadBack() throws Exception { if (currentMetadata.isClosed()) { throw new BKException.BKLedgerClosedException(); } else { - return currentMetadata.getEnsemble(0L).contains(b0); + return currentMetadata.getEnsembleAt(0L).contains(b0); } }, (currentMetadata) -> { - List ensemble = Lists.newArrayList(currentMetadata.getEnsemble(0L)); + List ensemble = Lists.newArrayList(currentMetadata.getEnsembleAt(0L)); ensemble.set(0, b1); return LedgerMetadataBuilder.from(currentMetadata).replaceEnsembleEntry(0L, ensemble).build(); }, reference::compareAndSet).run(); lm.releaseWrites(); - LedgerMetadata l1meta = loop1.get(); + Versioned l1meta = loop1.get(); try { loop2.get(); Assert.fail("Update loop should have failed"); @@ -390,8 +387,8 @@ public void testNewestValueCannotBeUsedAfterReadBack() throws Exception { Assert.assertEquals(ee.getCause().getClass(), BKException.BKLedgerClosedException.class); } Assert.assertEquals(l1meta, reference.get()); - Assert.assertEquals(l1meta.getEnsemble(0L).get(0), b0); - Assert.assertTrue(l1meta.isClosed()); + Assert.assertEquals(l1meta.getValue().getEnsembleAt(0L).get(0), b0); + Assert.assertTrue(l1meta.getValue().isClosed()); verify(lm, times(2)).writeLedgerMetadata(anyLong(), any(), any()); } @@ -416,14 +413,22 @@ public void close() { static class DeferCallbacksMockLedgerManager extends MockLedgerManager { int writeCount = 0; final int numToDefer; - List, Integer, LedgerMetadata>> deferred = Lists.newArrayList(); + List>, Versioned, Throwable>> deferred = + Lists.newArrayList(); DeferCallbacksMockLedgerManager(int numToDefer) { this.numToDefer = numToDefer; } synchronized void runDeferred() { - deferred.forEach((d) -> d.getLeft().operationComplete(d.getMiddle(), d.getRight())); + deferred.forEach((d) -> { + Throwable t = d.getRight(); + if (t != null) { + d.getLeft().completeExceptionally(t); + } else { + d.getLeft().complete(d.getMiddle()); + } + }); } synchronized void waitForWriteCount(int count) throws Exception { @@ -433,27 +438,43 @@ synchronized void waitForWriteCount(int count) throws Exception { } @Override - public synchronized void writeLedgerMetadata(long ledgerId, LedgerMetadata metadata, - GenericCallback cb) { - super.writeLedgerMetadata(ledgerId, metadata, - (rc, written) -> { - synchronized (DeferCallbacksMockLedgerManager.this) { - if (writeCount++ < numToDefer) { - LOG.info("Added aaaaato deferals"); - deferred.add(Triple.of(cb, rc, written)); - } else { - LOG.info("Completing {}", numToDefer); - cb.operationComplete(rc, written); - } - DeferCallbacksMockLedgerManager.this.notifyAll(); - } - }); - }; + public synchronized CompletableFuture> writeLedgerMetadata( + long ledgerId, LedgerMetadata metadata, + Version currentVersion) { + CompletableFuture> promise = new CompletableFuture<>(); + super.writeLedgerMetadata(ledgerId, metadata, currentVersion) + .whenComplete((written, exception) -> { + synchronized (DeferCallbacksMockLedgerManager.this) { + if (writeCount++ < numToDefer) { + LOG.info("Added to deferrals"); + deferred.add(Triple.of(promise, written, exception)); + } else { + LOG.info("Completing {}", numToDefer); + if (exception != null) { + promise.completeExceptionally(exception); + } else { + promise.complete(written); + } + } + DeferCallbacksMockLedgerManager.this.notifyAll(); + } + }); + return promise; + } + } + + @Data + @AllArgsConstructor + static class DeferredUpdate { + final CompletableFuture> promise; + final long ledgerId; + final LedgerMetadata metadata; + final Version currentVersion; } static class BlockableMockLedgerManager extends MockLedgerManager { boolean blocking = false; - List>> reqs = Lists.newArrayList(); + List reqs = Lists.newArrayList(); synchronized void blockWrites() { blocking = true; @@ -461,17 +482,29 @@ synchronized void blockWrites() { synchronized void releaseWrites() { blocking = false; - reqs.forEach((r) -> super.writeLedgerMetadata(r.getLeft(), r.getMiddle(), r.getRight())); + reqs.forEach((r) -> { + super.writeLedgerMetadata(r.getLedgerId(), r.getMetadata(), + r.getCurrentVersion()) + .whenComplete((written, exception) -> { + if (exception != null) { + r.getPromise().completeExceptionally(exception); + } else { + r.getPromise().complete(written); + } + }); + }); } @Override - public synchronized void writeLedgerMetadata(long ledgerId, LedgerMetadata metadata, - GenericCallback cb) { + public synchronized CompletableFuture> writeLedgerMetadata( + long ledgerId, LedgerMetadata metadata, Version currentVersion) { if (blocking) { - reqs.add(Triple.of(ledgerId, metadata, cb)); + CompletableFuture> promise = new CompletableFuture<>(); + reqs.add(new DeferredUpdate(promise, ledgerId, metadata, currentVersion)); + return promise; } else { - super.writeLedgerMetadata(ledgerId, metadata, cb); + return super.writeLedgerMetadata(ledgerId, metadata, currentVersion); } - }; + } } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockBookKeeper.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockBookKeeper.java index 3b63cfd3708..da1525ab8d1 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockBookKeeper.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockBookKeeper.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -19,25 +19,23 @@ package org.apache.bookkeeper.client; import io.netty.util.concurrent.DefaultThreadFactory; - import java.util.Arrays; import java.util.Collections; import java.util.Map; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; - import org.apache.bookkeeper.client.AsyncCallback.CreateCallback; import org.apache.bookkeeper.client.AsyncCallback.DeleteCallback; import org.apache.bookkeeper.client.AsyncCallback.OpenCallback; +import org.apache.bookkeeper.client.api.BKException.Code; import org.apache.bookkeeper.client.api.OpenBuilder; import org.apache.bookkeeper.client.api.ReadHandle; import org.apache.bookkeeper.client.impl.OpenBuilderBase; +import org.apache.bookkeeper.common.util.OrderedExecutor; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.zookeeper.ZooKeeper; import org.slf4j.Logger; @@ -50,14 +48,12 @@ */ public class MockBookKeeper extends BookKeeper { - final ExecutorService executor = Executors.newFixedThreadPool(1, new DefaultThreadFactory("mock-bookkeeper")); + final OrderedExecutor orderedExecutor = OrderedExecutor.newBuilder() + .numThreads(1) + .threadFactory(new DefaultThreadFactory("mock-bookkeeper")) + .build(); final ZooKeeper zkc; - @Override - public ZooKeeper getZkHandle() { - return zkc; - } - @Override public ClientConfiguration getConf() { return super.getConf(); @@ -75,12 +71,17 @@ public MockBookKeeper(ZooKeeper zkc) throws Exception { } @Override - public LedgerHandle createLedger(DigestType digestType, byte passwd[]) throws BKException { + public OrderedExecutor getMainWorkerPool() { + return orderedExecutor; + } + + @Override + public LedgerHandle createLedger(DigestType digestType, byte[] passwd) throws BKException { return createLedger(3, 2, digestType, passwd); } @Override - public LedgerHandle createLedger(int ensSize, int qSize, DigestType digestType, byte passwd[]) throws BKException { + public LedgerHandle createLedger(int ensSize, int qSize, DigestType digestType, byte[] passwd) throws BKException { return createLedger(ensSize, qSize, qSize, digestType, passwd); } @@ -92,7 +93,7 @@ public void asyncCreateLedger(int ensSize, int writeQuorumSize, int ackQuorumSiz return; } - executor.execute(new Runnable() { + orderedExecutor.chooseThread().execute(new Runnable() { public void run() { if (getProgrammedFailStatus()) { if (failReturnCode != BkTimeoutOperation) { @@ -111,9 +112,9 @@ public void run() { log.info("Creating ledger {}", id); MockLedgerHandle lh = new MockLedgerHandle(MockBookKeeper.this, id, digestType, passwd); ledgers.put(id, lh); - cb.createComplete(0, lh, ctx); + lh.executeOrdered(() -> cb.createComplete(0, lh, ctx)); } catch (Throwable t) { - t.printStackTrace(); + log.error("Error", t); } } }); @@ -162,13 +163,13 @@ public void asyncOpenLedger(long lId, DigestType digestType, byte[] passwd, Open MockLedgerHandle lh = ledgers.get(lId); if (lh == null) { - cb.openComplete(BKException.Code.NoSuchLedgerExistsException, null, ctx); + cb.openComplete(BKException.Code.NoSuchLedgerExistsOnMetadataServerException, null, ctx); } else if (lh.digest != digestType) { cb.openComplete(BKException.Code.DigestMatchException, null, ctx); } else if (!Arrays.equals(lh.passwd, passwd)) { cb.openComplete(BKException.Code.UnauthorizedAccessException, null, ctx); } else { - cb.openComplete(0, lh, ctx); + lh.executeOrdered(() -> cb.openComplete(0, lh, ctx)); } } @@ -189,7 +190,7 @@ public void asyncDeleteLedger(long lId, DeleteCallback cb, Object ctx) { ledgers.remove(lId); cb.deleteComplete(0, ctx); } else { - cb.deleteComplete(BKException.Code.NoSuchLedgerExistsException, ctx); + cb.deleteComplete(BKException.Code.NoSuchLedgerExistsOnMetadataServerException, ctx); } } @@ -202,7 +203,7 @@ public void deleteLedger(long lId) throws InterruptedException, BKException { } if (!ledgers.containsKey(lId)) { - throw BKException.create(BKException.Code.NoSuchLedgerExistsException); + throw BKException.create(BKException.Code.NoSuchLedgerExistsOnMetadataServerException); } ledgers.remove(lId); @@ -221,8 +222,9 @@ public OpenBuilder newOpenLedgerOp() { public CompletableFuture execute() { CompletableFuture promise = new CompletableFuture(); - if (!validate()) { - promise.completeExceptionally(new BKException.BKNoSuchLedgerExistsException()); + final int validateRc = validate(); + if (Code.OK != validateRc) { + promise.completeExceptionally(BKException.create(validateRc)); return promise; } else if (getProgrammedFailStatus()) { if (failReturnCode != BkTimeoutOperation) { @@ -236,7 +238,7 @@ public CompletableFuture execute() { MockLedgerHandle lh = ledgers.get(ledgerId); if (lh == null) { - promise.completeExceptionally(new BKException.BKNoSuchLedgerExistsException()); + promise.completeExceptionally(new BKException.BKNoSuchLedgerExistsOnMetadataServerException()); } else if (lh.digest != DigestType.fromApiDigestType(digestType)) { promise.completeExceptionally(new BKException.BKDigestMatchException()); } else if (!Arrays.equals(lh.passwd, password)) { @@ -261,7 +263,7 @@ public void shutdown() { } ledgers.clear(); - executor.shutdownNow(); + orderedExecutor.shutdownNow(); } public boolean isStopped() { @@ -274,7 +276,9 @@ public Set getLedgers() { void checkProgrammedFail() throws BKException { int steps = stepsToFail.getAndDecrement(); - log.debug("Steps to fail: {}", steps); + if (log.isDebugEnabled()) { + log.debug("Steps to fail: {}", steps); + } if (steps <= 0) { if (failReturnCode != BKException.Code.OK) { int rc = failReturnCode; @@ -287,7 +291,9 @@ void checkProgrammedFail() throws BKException { boolean getProgrammedFailStatus() { int steps = stepsToFail.getAndDecrement(); - log.debug("Steps to fail: {}", steps); + if (log.isDebugEnabled()) { + log.debug("Steps to fail: {}", steps); + } return steps == 0; } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockBookKeeperTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockBookKeeperTest.java index 85a654c7979..a7405934715 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockBookKeeperTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockBookKeeperTest.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with this * work for additional information regarding copyright ownership. The ASF @@ -21,7 +21,6 @@ import static org.junit.Assert.assertTrue; import java.util.Enumeration; - import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.junit.Test; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockBookKeeperTestCase.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockBookKeeperTestCase.java index 783e435d6c2..d5a4f05c24a 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockBookKeeperTestCase.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockBookKeeperTestCase.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -27,12 +27,15 @@ import static org.mockito.ArgumentMatchers.anyMap; import static org.mockito.ArgumentMatchers.anySet; import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; import io.netty.buffer.Unpooled; - +import io.netty.buffer.UnpooledByteBufAllocator; +import io.netty.util.ReferenceCounted; import java.security.GeneralSecurityException; import java.util.ArrayList; import java.util.Collections; @@ -41,30 +44,37 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentSkipListSet; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.ReentrantReadWriteLock; - import org.apache.bookkeeper.client.BKException.BKDigestMatchException; import org.apache.bookkeeper.client.BKException.Code; import org.apache.bookkeeper.client.api.CreateBuilder; import org.apache.bookkeeper.client.api.DeleteBuilder; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.client.api.OpenBuilder; import org.apache.bookkeeper.common.util.OrderedExecutor; import org.apache.bookkeeper.common.util.OrderedScheduler; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.meta.LedgerIdGenerator; import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.proto.BookieAddressResolver; import org.apache.bookkeeper.proto.BookieClient; import org.apache.bookkeeper.proto.BookieProtocol; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks; +import org.apache.bookkeeper.proto.MockBookieClient; import org.apache.bookkeeper.proto.checksum.DigestManager; import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.util.ByteBufList; +import org.apache.bookkeeper.versioning.LongVersion; +import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; import org.junit.After; import org.junit.Before; import org.mockito.invocation.InvocationOnMock; @@ -86,31 +96,33 @@ public abstract class MockBookKeeperTestCase { protected BookieClient bookieClient; protected LedgerManager ledgerManager; protected LedgerIdGenerator ledgerIdGenerator; + protected EnsemblePlacementPolicy placementPolicy; private BookieWatcher bookieWatcher; protected ConcurrentMap mockLedgerMetadataRegistry; protected AtomicLong mockNextLedgerId; protected ConcurrentSkipListSet fencedLedgers; - protected ConcurrentMap>> mockLedgerData; + protected ConcurrentMap>> mockLedgerData; - private Map> deferredBookieForceLedgerResponses; - private Set suspendedBookiesForForceLedgerAcks; + private Map> deferredBookieForceLedgerResponses; + private Set suspendedBookiesForForceLedgerAcks; - List failedBookies; - Set availableBookies; + List failedBookies; + Set availableBookies; private int lastIndexForBK; + protected int maxNumberOfAvailableBookies = Integer.MAX_VALUE; - private Map> getMockLedgerContents(long ledgerId) { + private Map> getMockLedgerContents(long ledgerId) { return mockLedgerData.computeIfAbsent(ledgerId, (id) -> new ConcurrentHashMap<>()); } - private Map getMockLedgerContentsInBookie(long ledgerId, BookieSocketAddress bookieSocketAddress) { + private Map getMockLedgerContentsInBookie(long ledgerId, BookieId bookieSocketAddress) { return getMockLedgerContents(ledgerId).computeIfAbsent(bookieSocketAddress, addr -> new ConcurrentHashMap<>()); } private MockEntry getMockLedgerEntry(long ledgerId, - BookieSocketAddress bookieSocketAddress, long entryId) throws BKException{ + BookieId bookieSocketAddress, long entryId) throws BKException{ if (failedBookies.contains(bookieSocketAddress)) { throw BKException.create(NoBookieAvailableException); } @@ -131,6 +143,7 @@ public MockEntry(byte[] payload, long lastAddConfirmed) { @Before public void setup() throws Exception { + maxNumberOfAvailableBookies = Integer.MAX_VALUE; deferredBookieForceLedgerResponses = new ConcurrentHashMap<>(); suspendedBookiesForForceLedgerAcks = Collections.synchronizedSet(new HashSet<>()); mockLedgerMetadataRegistry = new ConcurrentHashMap<>(); @@ -140,12 +153,16 @@ public void setup() throws Exception { scheduler = OrderedScheduler.newSchedulerBuilder().numThreads(4).name("bk-test").build(); executor = OrderedExecutor.newBuilder().build(); bookieWatcher = mock(BookieWatcher.class); + placementPolicy = new DefaultEnsemblePlacementPolicy(); bookieClient = mock(BookieClient.class); ledgerManager = mock(LedgerManager.class); ledgerIdGenerator = mock(LedgerIdGenerator.class); + BookieAddressResolver bookieAddressResolver = BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER; + when(bookieWatcher.getBookieAddressResolver()).thenReturn(bookieAddressResolver); bk = mock(BookKeeper.class); + doReturn(new ClientConfiguration()).when(bk).getConf(); failedBookies = new ArrayList<>(); availableBookies = new HashSet<>(); @@ -153,6 +170,7 @@ public void setup() throws Exception { when(bk.getCloseLock()).thenReturn(new ReentrantReadWriteLock()); when(bk.isClosed()).thenReturn(false); when(bk.getBookieWatcher()).thenReturn(bookieWatcher); + when(bk.getBookieAddressResolver()).thenReturn(bookieAddressResolver); when(bk.getMainWorkerPool()).thenReturn(executor); when(bk.getBookieClient()).thenReturn(bookieClient); when(bk.getScheduler()).thenReturn(scheduler); @@ -178,7 +196,7 @@ public BookieWatcher getBookieWatcher() { @Override public EnsemblePlacementPolicy getPlacementPolicy() { - return null; + return placementPolicy; } @Override @@ -205,6 +223,11 @@ public BookKeeperClientStats getClientStats() { public boolean isClientClosed() { return bk.isClosed(); } + + @Override + public ByteBufAllocator getByteBufAllocator() { + return UnpooledByteBufAllocator.DEFAULT; + } }; when(bk.getClientCtx()).thenReturn(clientCtx); when(bk.getLedgerManager()).thenReturn(ledgerManager); @@ -221,6 +244,7 @@ public boolean isClientClosed() { setupBookieWatcherForNewEnsemble(); setupBookieWatcherForEnsembleChange(); setupBookieClientReadEntry(); + setupBookieClientReadLac(); setupBookieClientAddEntry(); setupBookieClientForceLedger(); } @@ -236,7 +260,8 @@ private DigestManager getDigestType(long ledgerId) throws GeneralSecurityExcepti metadata.getPassword(), org.apache.bookkeeper.client.BookKeeper.DigestType.toProtoDigestType( org.apache.bookkeeper.client.BookKeeper.DigestType.fromApiDigestType( - metadata.getDigestType()))); + metadata.getDigestType())), + UnpooledByteBufAllocator.DEFAULT, false); } @After @@ -261,42 +286,53 @@ protected void closeBookkeeper() { when(bk.isClosed()).thenReturn(true); } - protected void killBookie(BookieSocketAddress killedBookieSocketAddress) { + protected void killBookie(BookieId killedBookieSocketAddress) { failedBookies.add(killedBookieSocketAddress); availableBookies.remove(killedBookieSocketAddress); } - protected void startKilledBookie(BookieSocketAddress killedBookieSocketAddress) { + protected void startKilledBookie(BookieId killedBookieSocketAddress) { checkState(failedBookies.contains(killedBookieSocketAddress)); checkState(!availableBookies.contains(killedBookieSocketAddress)); failedBookies.remove(killedBookieSocketAddress); availableBookies.add(killedBookieSocketAddress); } - protected void suspendBookieForceLedgerAcks(BookieSocketAddress address) { + protected void suspendBookieForceLedgerAcks(BookieId address) { suspendedBookiesForForceLedgerAcks.add(address); } - protected void resumeBookieWriteAcks(BookieSocketAddress address) { - suspendedBookiesForForceLedgerAcks.remove(address); - List pendingResponses = deferredBookieForceLedgerResponses.remove(address); + protected void resumeBookieWriteAcks(BookieId address) { + List pendingResponses; + + // why use the BookieId instance as the object monitor? there is a date race problem if not + // see https://github.com/apache/bookkeeper/issues/4200 + synchronized (address) { + suspendedBookiesForForceLedgerAcks.remove(address); + pendingResponses = deferredBookieForceLedgerResponses.remove(address); + } + if (pendingResponses != null) { pendingResponses.forEach(Runnable::run); } } - protected BookieSocketAddress startNewBookie() { - BookieSocketAddress address = generateBookieSocketAddress(lastIndexForBK++); + protected BookieId startNewBookie() { + BookieId address = generateBookieSocketAddress(lastIndexForBK++); availableBookies.add(address); return address; } - protected BookieSocketAddress generateBookieSocketAddress(int index) { - return new BookieSocketAddress("localhost", 1111 + index); + protected BookieId generateBookieSocketAddress(int index) { + return new BookieSocketAddress("localhost", 1111 + index).toBookieId(); } - protected ArrayList generateNewEnsemble(int ensembleSize) { - ArrayList ensemble = new ArrayList<>(ensembleSize); + protected ArrayList generateNewEnsemble(int ensembleSize) throws BKException.BKNotEnoughBookiesException { + LOG.info("generateNewEnsemble {}", ensembleSize); + if (ensembleSize > maxNumberOfAvailableBookies) { + throw new BKException.BKNotEnoughBookiesException(); + } + ArrayList ensemble = new ArrayList<>(ensembleSize); for (int i = 0; i < ensembleSize; i++) { ensemble.add(generateBookieSocketAddress(i)); } @@ -307,10 +343,10 @@ protected ArrayList generateNewEnsemble(int ensembleSize) { private void setupBookieWatcherForNewEnsemble() throws BKException.BKNotEnoughBookiesException { when(bookieWatcher.newEnsemble(anyInt(), anyInt(), anyInt(), any())) - .thenAnswer((Answer>) new Answer>() { + .thenAnswer((Answer>) new Answer>() { @Override @SuppressWarnings("unchecked") - public ArrayList answer(InvocationOnMock invocation) throws Throwable { + public ArrayList answer(InvocationOnMock invocation) throws Throwable { Object[] args = invocation.getArguments(); int ensembleSize = (Integer) args[0]; return generateNewEnsemble(ensembleSize); @@ -320,15 +356,15 @@ public ArrayList answer(InvocationOnMock invocation) throws private void setupBookieWatcherForEnsembleChange() throws BKException.BKNotEnoughBookiesException { when(bookieWatcher.replaceBookie(anyInt(), anyInt(), anyInt(), anyMap(), anyList(), anyInt(), anySet())) - .thenAnswer((Answer) new Answer() { + .thenAnswer((Answer) new Answer() { @Override @SuppressWarnings("unchecked") - public BookieSocketAddress answer(InvocationOnMock invocation) throws Throwable { + public BookieId answer(InvocationOnMock invocation) throws Throwable { Object[] args = invocation.getArguments(); - List existingBookies = (List) args[4]; - Set excludeBookies = (Set) args[6]; + List existingBookies = (List) args[4]; + Set excludeBookies = (Set) args[6]; excludeBookies.addAll(existingBookies); - Set remainBookies = new HashSet(availableBookies); + Set remainBookies = new HashSet(availableBookies); remainBookies.removeAll(excludeBookies); if (remainBookies.iterator().hasNext()) { return remainBookies.iterator().next(); @@ -338,7 +374,7 @@ public BookieSocketAddress answer(InvocationOnMock invocation) throws Throwable }); } - protected void registerMockEntryForRead(long ledgerId, long entryId, BookieSocketAddress bookieSocketAddress, + protected void registerMockEntryForRead(long ledgerId, long entryId, BookieId bookieSocketAddress, byte[] entryData, long lastAddConfirmed) { getMockLedgerContentsInBookie(ledgerId, bookieSocketAddress).put(entryId, new MockEntry(entryData, lastAddConfirmed)); @@ -362,17 +398,17 @@ private void setupReadLedgerMetadata() { doAnswer(invocation -> { Object[] args = invocation.getArguments(); Long ledgerId = (Long) args[0]; + CompletableFuture> promise = new CompletableFuture<>(); executor.executeOrdered(ledgerId, () -> { - BookkeeperInternalCallbacks.GenericCallback cb = (BookkeeperInternalCallbacks.GenericCallback) args[1]; LedgerMetadata ledgerMetadata = mockLedgerMetadataRegistry.get(ledgerId); if (ledgerMetadata == null) { - cb.operationComplete(BKException.Code.NoSuchLedgerExistsException, null); + promise.completeExceptionally(new BKException.BKNoSuchLedgerExistsOnMetadataServerException()); } else { - cb.operationComplete(BKException.Code.OK, new LedgerMetadata(ledgerMetadata)); + promise.complete(new Versioned<>(ledgerMetadata, new LongVersion(1))); } }); - return null; - }).when(ledgerManager).readLedgerMetadata(anyLong(), any()); + return promise; + }).when(ledgerManager).readLedgerMetadata(anyLong()); } @SuppressWarnings("unchecked") @@ -380,16 +416,16 @@ private void setupRemoveLedgerMetadata() { doAnswer(invocation -> { Object[] args = invocation.getArguments(); Long ledgerId = (Long) args[0]; + CompletableFuture promise = new CompletableFuture<>(); executor.executeOrdered(ledgerId, () -> { - BookkeeperInternalCallbacks.GenericCallback cb = (BookkeeperInternalCallbacks.GenericCallback) args[2]; - if (mockLedgerMetadataRegistry.remove(ledgerId) != null) { - cb.operationComplete(BKException.Code.OK, null); - } else { - cb.operationComplete(BKException.Code.NoSuchLedgerExistsException, null); - } - }); - return null; - }).when(ledgerManager).removeLedgerMetadata(anyLong(), any(), any()); + if (mockLedgerMetadataRegistry.remove(ledgerId) != null) { + promise.complete(null); + } else { + promise.completeExceptionally(new BKException.BKNoSuchLedgerExistsOnMetadataServerException()); + } + }); + return promise; + }).when(ledgerManager).removeLedgerMetadata(anyLong(), any()); } private void setupRegisterLedgerMetadataListener() { @@ -416,37 +452,41 @@ private void setupLedgerIdGenerator() { private void setupCreateLedgerMetadata() { doAnswer(invocation -> { Object[] args = invocation.getArguments(); - BookkeeperInternalCallbacks.GenericCallback cb = (BookkeeperInternalCallbacks.GenericCallback) args[2]; Long ledgerId = (Long) args[0]; + + CompletableFuture> promise = new CompletableFuture<>(); executor.executeOrdered(ledgerId, () -> { - LedgerMetadata ledgerMetadata = (LedgerMetadata) args[1]; - mockLedgerMetadataRegistry.put(ledgerId, new LedgerMetadata(ledgerMetadata)); - cb.operationComplete(BKException.Code.OK, null); + + LedgerMetadata ledgerMetadata = (LedgerMetadata) args[1]; + mockLedgerMetadataRegistry.put(ledgerId, ledgerMetadata); + promise.complete(new Versioned<>(ledgerMetadata, new LongVersion(1))); }); - return null; - }).when(ledgerManager).createLedgerMetadata(anyLong(), any(), any()); + return promise; + }).when(ledgerManager).createLedgerMetadata(anyLong(), any()); } @SuppressWarnings("unchecked") private void setupWriteLedgerMetadata() { doAnswer(invocation -> { - Object[] args = invocation.getArguments(); - Long ledgerId = (Long) args[0]; - LedgerMetadata metadata = (LedgerMetadata) args[1]; - BookkeeperInternalCallbacks.GenericCallback cb = (BookkeeperInternalCallbacks.GenericCallback) args[2]; - executor.executeOrdered(ledgerId, () -> { - mockLedgerMetadataRegistry.put(ledgerId, new LedgerMetadata(metadata)); - cb.operationComplete(BKException.Code.OK, null); - }); - return null; - }).when(ledgerManager).writeLedgerMetadata(anyLong(), any(), any()); + Object[] args = invocation.getArguments(); + Long ledgerId = (Long) args[0]; + LedgerMetadata metadata = (LedgerMetadata) args[1]; + Version currentVersion = (Version) args[2]; + CompletableFuture> promise = new CompletableFuture<>(); + executor.executeOrdered(ledgerId, () -> { + LedgerMetadata newMetadata = LedgerMetadataBuilder.from(metadata).build(); + mockLedgerMetadataRegistry.put(ledgerId, newMetadata); + promise.complete(new Versioned<>(newMetadata, new LongVersion(1234))); + }); + return promise; + }).when(ledgerManager).writeLedgerMetadata(anyLong(), any(), any()); } @SuppressWarnings("unchecked") protected void setupBookieClientReadEntry() { - final Stubber stub = doAnswer(invokation -> { - Object[] args = invokation.getArguments(); - BookieSocketAddress bookieSocketAddress = (BookieSocketAddress) args[0]; + final Stubber stub = doAnswer(invocation -> { + Object[] args = invocation.getArguments(); + BookieId bookieSocketAddress = (BookieId) args[0]; long ledgerId = (Long) args[1]; long entryId = (Long) args[2]; BookkeeperInternalCallbacks.ReadEntryCallback callback = @@ -475,10 +515,10 @@ protected void setupBookieClientReadEntry() { if (mockEntry != null) { LOG.info("readEntry - found mock entry {}@{} at {}", entryId, ledgerId, bookieSocketAddress); - ByteBufList entry = macManager.computeDigestAndPackageForSending(entryId, + ReferenceCounted entry = macManager.computeDigestAndPackageForSending(entryId, mockEntry.lastAddConfirmed, mockEntry.payload.length, - Unpooled.wrappedBuffer(mockEntry.payload)); - callback.readEntryComplete(BKException.Code.OK, ledgerId, entryId, ByteBufList.coalesce(entry), + Unpooled.wrappedBuffer(mockEntry.payload), new byte[20], 0); + callback.readEntryComplete(BKException.Code.OK, ledgerId, entryId, MockBookieClient.copyData(entry), args[4]); entry.release(); } else { @@ -502,6 +542,33 @@ protected void setupBookieClientReadEntry() { any(), anyInt(), any(), anyBoolean()); } + @SuppressWarnings("unchecked") + protected void setupBookieClientReadLac() { + final Stubber stub = doAnswer(invocation -> { + Object[] args = invocation.getArguments(); + BookieId bookieSocketAddress = (BookieId) args[0]; + long ledgerId = (Long) args[1]; + final BookkeeperInternalCallbacks.ReadLacCallback callback = + (BookkeeperInternalCallbacks.ReadLacCallback) args[2]; + Object ctx = args[3]; + long entryId = BookieProtocol.LAST_ADD_CONFIRMED; + // simply use "readEntry" with LAST_ADD_CONFIRMED to get current LAC + // there is nothing that writes ExplicitLAC within MockBookKeeperTestCase + bookieClient.readEntry(bookieSocketAddress, ledgerId, entryId, + new BookkeeperInternalCallbacks.ReadEntryCallback() { + @Override + public void readEntryComplete(int rc, long ledgerId, long entryId, ByteBuf buffer, Object ctx) { + callback.readLacComplete(rc, ledgerId, null, buffer, ctx); + } + }, ctx, BookieProtocol.FLAG_NONE); + return null; + }); + + stub.when(bookieClient).readLac(any(BookieId.class), anyLong(), + any(BookkeeperInternalCallbacks.ReadLacCallback.class), + any()); + } + private byte[] extractEntryPayload(long ledgerId, long entryId, ByteBufList toSend) throws BKException.BKDigestMatchException { ByteBuf toSendCopy = Unpooled.copiedBuffer(toSend.toArray()); @@ -522,10 +589,10 @@ private byte[] extractEntryPayload(long ledgerId, long entryId, ByteBufList toSe @SuppressWarnings("unchecked") protected void setupBookieClientAddEntry() { - final Stubber stub = doAnswer(invokation -> { - Object[] args = invokation.getArguments(); + final Stubber stub = doAnswer(invocation -> { + Object[] args = invocation.getArguments(); BookkeeperInternalCallbacks.WriteCallback callback = (BookkeeperInternalCallbacks.WriteCallback) args[5]; - BookieSocketAddress bookieSocketAddress = (BookieSocketAddress) args[0]; + BookieId bookieSocketAddress = (BookieId) args[0]; long ledgerId = (Long) args[1]; long entryId = (Long) args[3]; ByteBufList toSend = (ByteBufList) args[4]; @@ -570,7 +637,7 @@ protected void setupBookieClientAddEntry() { return null; }); - stub.when(bookieClient).addEntry(any(BookieSocketAddress.class), + stub.when(bookieClient).addEntry(any(BookieId.class), anyLong(), any(byte[].class), anyLong(), any(ByteBufList.class), any(BookkeeperInternalCallbacks.WriteCallback.class), @@ -579,9 +646,9 @@ protected void setupBookieClientAddEntry() { @SuppressWarnings("unchecked") protected void setupBookieClientForceLedger() { - final Stubber stub = doAnswer(invokation -> { - Object[] args = invokation.getArguments(); - BookieSocketAddress bookieSocketAddress = (BookieSocketAddress) args[0]; + final Stubber stub = doAnswer(invocation -> { + Object[] args = invocation.getArguments(); + BookieId bookieSocketAddress = (BookieId) args[0]; long ledgerId = (Long) args[1]; BookkeeperInternalCallbacks.ForceLedgerCallback callback = (BookkeeperInternalCallbacks.ForceLedgerCallback) args[2]; @@ -596,17 +663,23 @@ protected void setupBookieClientForceLedger() { callback.forceLedgerComplete(BKException.Code.OK, ledgerId, bookieSocketAddress, ctx); }); }; - if (suspendedBookiesForForceLedgerAcks.contains(bookieSocketAddress)) { - List queue = deferredBookieForceLedgerResponses.computeIfAbsent(bookieSocketAddress, - (k) -> new CopyOnWriteArrayList<>()); - queue.add(activity); - } else { + List queue = null; + + synchronized (bookieSocketAddress) { + if (suspendedBookiesForForceLedgerAcks.contains(bookieSocketAddress)) { + queue = deferredBookieForceLedgerResponses.computeIfAbsent(bookieSocketAddress, + (k) -> new CopyOnWriteArrayList<>()); + queue.add(activity); + } + } + + if (queue == null) { activity.run(); } return null; }); - stub.when(bookieClient).forceLedger(any(BookieSocketAddress.class), + stub.when(bookieClient).forceLedger(any(BookieId.class), anyLong(), any(BookkeeperInternalCallbacks.ForceLedgerCallback.class), any()); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockClientContext.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockClientContext.java index 040402dc102..93078a05129 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockClientContext.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockClientContext.java @@ -20,15 +20,29 @@ */ package org.apache.bookkeeper.client; -import java.util.function.BooleanSupplier; +import static com.google.common.base.Preconditions.checkState; +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.UnpooledByteBufAllocator; +import java.util.function.BooleanSupplier; import org.apache.bookkeeper.common.util.OrderedExecutor; import org.apache.bookkeeper.common.util.OrderedScheduler; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.discover.MockRegistrationClient; import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.MockLedgerManager; import org.apache.bookkeeper.proto.BookieClient; - -class MockClientContext implements ClientContext { - private ClientInternalConf conf; +import org.apache.bookkeeper.proto.MockBookieClient; +import org.apache.bookkeeper.proto.MockBookies; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.mockito.Mockito; + +/** + * Mock client context to allow testing client functionality with no external dependencies. + * The client context can be created with defaults, copied from another context or constructed from scratch. + */ +public class MockClientContext implements ClientContext { + private ClientInternalConf internalConf; private LedgerManager ledgerManager; private BookieWatcher bookieWatcher; private EnsemblePlacementPolicy placementPolicy; @@ -37,6 +51,38 @@ class MockClientContext implements ClientContext { private OrderedScheduler scheduler; private BookKeeperClientStats clientStats; private BooleanSupplier isClientClosed; + private MockRegistrationClient regClient; + private ByteBufAllocator allocator; + + static MockClientContext create(MockBookies mockBookies) throws Exception { + ClientConfiguration conf = new ClientConfiguration(); + OrderedScheduler scheduler = OrderedScheduler.newSchedulerBuilder().name("mock-executor").numThreads(1).build(); + MockRegistrationClient regClient = new MockRegistrationClient(); + EnsemblePlacementPolicy placementPolicy = new DefaultEnsemblePlacementPolicy(); + BookieWatcherImpl bookieWatcherImpl = new BookieWatcherImpl(conf, placementPolicy, + regClient, + new DefaultBookieAddressResolver(regClient), + NullStatsLogger.INSTANCE); + bookieWatcherImpl.initialBlockingBookieRead(); + + return new MockClientContext() + .setConf(ClientInternalConf.fromConfig(conf)) + .setLedgerManager(new MockLedgerManager()) + .setBookieWatcher(bookieWatcherImpl) + .setPlacementPolicy(placementPolicy) + .setRegistrationClient(regClient) + .setBookieClient(new MockBookieClient(scheduler, mockBookies)) + .setByteBufAllocator(UnpooledByteBufAllocator.DEFAULT) + .setMainWorkerPool(scheduler) + .setScheduler(scheduler) + .setClientStats(BookKeeperClientStats.newInstance(NullStatsLogger.INSTANCE)) + .setIsClientClosed(() -> false); + } + + static MockClientContext create() throws Exception { + MockBookies mockBookies = new MockBookies(); + return create(mockBookies); + } static MockClientContext copyOf(ClientContext other) { return new MockClientContext() @@ -48,57 +94,91 @@ static MockClientContext copyOf(ClientContext other) { .setMainWorkerPool(other.getMainWorkerPool()) .setScheduler(other.getScheduler()) .setClientStats(other.getClientStats()) + .setByteBufAllocator(other.getByteBufAllocator()) .setIsClientClosed(other::isClientClosed); } - MockClientContext setConf(ClientInternalConf conf) { - this.conf = conf; + public MockRegistrationClient getMockRegistrationClient() { + checkState(regClient != null); + return regClient; + } + + public MockLedgerManager getMockLedgerManager() { + checkState(ledgerManager instanceof MockLedgerManager); + return (MockLedgerManager) ledgerManager; + } + + public MockBookieClient getMockBookieClient() { + checkState(bookieClient instanceof MockBookieClient); + return (MockBookieClient) bookieClient; + } + + public MockClientContext setConf(ClientInternalConf internalConf) { + this.internalConf = maybeSpy(internalConf); return this; } - MockClientContext setLedgerManager(LedgerManager ledgerManager) { - this.ledgerManager = ledgerManager; + public MockClientContext setLedgerManager(LedgerManager ledgerManager) { + this.ledgerManager = maybeSpy(ledgerManager); return this; } - MockClientContext setBookieWatcher(BookieWatcher bookieWatcher) { - this.bookieWatcher = bookieWatcher; + public MockClientContext setBookieWatcher(BookieWatcher bookieWatcher) { + this.bookieWatcher = maybeSpy(bookieWatcher); return this; } - MockClientContext setPlacementPolicy(EnsemblePlacementPolicy placementPolicy) { - this.placementPolicy = placementPolicy; + public MockClientContext setPlacementPolicy(EnsemblePlacementPolicy placementPolicy) { + this.placementPolicy = maybeSpy(placementPolicy); return this; } - MockClientContext setBookieClient(BookieClient bookieClient) { - this.bookieClient = bookieClient; + public MockClientContext setBookieClient(BookieClient bookieClient) { + this.bookieClient = maybeSpy(bookieClient); return this; } - MockClientContext setMainWorkerPool(OrderedExecutor mainWorkerPool) { - this.mainWorkerPool = mainWorkerPool; + public MockClientContext setMainWorkerPool(OrderedExecutor mainWorkerPool) { + this.mainWorkerPool = maybeSpy(mainWorkerPool); return this; } - MockClientContext setScheduler(OrderedScheduler scheduler) { - this.scheduler = scheduler; + public MockClientContext setScheduler(OrderedScheduler scheduler) { + this.scheduler = maybeSpy(scheduler); return this; } - MockClientContext setClientStats(BookKeeperClientStats clientStats) { + public MockClientContext setClientStats(BookKeeperClientStats clientStats) { this.clientStats = clientStats; return this; } - MockClientContext setIsClientClosed(BooleanSupplier isClientClosed) { + public MockClientContext setIsClientClosed(BooleanSupplier isClientClosed) { this.isClientClosed = isClientClosed; return this; } + public MockClientContext setRegistrationClient(MockRegistrationClient regClient) { + this.regClient = maybeSpy(regClient); + return this; + } + + public MockClientContext setByteBufAllocator(ByteBufAllocator allocator) { + this.allocator = allocator; + return this; + } + + private static T maybeSpy(T orig) { + if (Mockito.mockingDetails(orig).isSpy()) { + return orig; + } else { + return Mockito.spy(orig); + } + } + @Override public ClientInternalConf getConf() { - return this.conf; + return this.internalConf; } @Override @@ -141,4 +221,8 @@ public boolean isClientClosed() { return isClientClosed.getAsBoolean(); } + @Override + public ByteBufAllocator getByteBufAllocator() { + return allocator; + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockLedgerEntry.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockLedgerEntry.java index 10c25d5161f..ca1a20cca7a 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockLedgerEntry.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockLedgerEntry.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -21,7 +21,6 @@ import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; import java.io.InputStream; - import org.apache.bookkeeper.client.impl.LedgerEntryImpl; /** diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockLedgerHandle.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockLedgerHandle.java index 47350208dc8..660bf0f80c7 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockLedgerHandle.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockLedgerHandle.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -19,28 +19,31 @@ package org.apache.bookkeeper.client; import com.google.common.collect.Lists; - import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; - import java.security.GeneralSecurityException; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Arrays; import java.util.Enumeration; +import java.util.List; import java.util.Queue; import java.util.concurrent.CompletableFuture; import java.util.concurrent.RejectedExecutionException; - import org.apache.bookkeeper.client.AsyncCallback.AddCallback; import org.apache.bookkeeper.client.AsyncCallback.CloseCallback; import org.apache.bookkeeper.client.AsyncCallback.ReadCallback; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.client.api.LastConfirmedAndEntry; import org.apache.bookkeeper.client.api.LedgerEntries; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.client.api.ReadHandle; import org.apache.bookkeeper.client.api.WriteFlag; import org.apache.bookkeeper.client.impl.LedgerEntryImpl; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.versioning.LongVersion; +import org.apache.bookkeeper.versioning.Versioned; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -60,8 +63,8 @@ public class MockLedgerHandle extends LedgerHandle { MockLedgerHandle(MockBookKeeper bk, long id, DigestType digest, byte[] passwd) throws GeneralSecurityException { super(bk.getClientCtx(), id, - new LedgerMetadata(3, 3, 2, DigestType.MAC, "".getBytes()), DigestType.MAC, "".getBytes(), - WriteFlag.NONE); + new Versioned<>(createMetadata(digest, passwd), new LongVersion(0L)), + digest, passwd, WriteFlag.NONE); this.bk = bk; this.id = id; this.digest = digest; @@ -77,9 +80,17 @@ public void asyncClose(CloseCallback cb, Object ctx) { return; } + LedgerMetadata metadata = getLedgerMetadata(); + metadata = LedgerMetadataBuilder.from(metadata) + .withClosedState() + .withLastEntryId(lastEntry) + .withLength(length) + .build(); + setLedgerMetadata(getVersionedLedgerMetadata(), new Versioned<>(metadata, new LongVersion(1L))); + fenced = true; try { - bk.executor.execute(() -> cb.closeComplete(0, this, ctx)); + executeOrdered(() -> cb.closeComplete(0, this, ctx)); } catch (RejectedExecutionException e) { cb.closeComplete(0, this, ctx); } @@ -93,25 +104,32 @@ public void asyncReadEntries(final long firstEntry, final long lastEntry, final return; } - bk.executor.execute(new Runnable() { + executeOrdered(new Runnable() { public void run() { if (bk.getProgrammedFailStatus()) { cb.readComplete(bk.failReturnCode, MockLedgerHandle.this, null, ctx); return; } else if (bk.isStopped()) { - log.debug("Bookkeeper is closed!"); + if (log.isDebugEnabled()) { + log.debug("Bookkeeper is closed!"); + } cb.readComplete(-1, MockLedgerHandle.this, null, ctx); return; } - log.debug("readEntries: first={} last={} total={}", firstEntry, lastEntry, entries.size()); + if (log.isDebugEnabled()) { + log.debug("readEntries: first={} last={} total={}", + firstEntry, lastEntry, entries.size()); + } final Queue seq = new ArrayDeque(); long entryId = firstEntry; while (entryId <= lastEntry && entryId < entries.size()) { seq.add(new LedgerEntry(entries.get((int) entryId++).duplicate())); } - log.debug("Entries read: {}", seq); + if (log.isDebugEnabled()) { + log.debug("Entries read: {}", seq); + } try { Thread.sleep(1); @@ -173,7 +191,7 @@ public void asyncAddEntry(final ByteBuf data, final AddCallback cb, final Object } data.retain(); - bk.executor.execute(new Runnable() { + executeOrdered(new Runnable() { public void run() { if (bk.getProgrammedFailStatus()) { fenced = true; @@ -263,6 +281,17 @@ public CompletableFuture readLastAddConfirmedAndEntryAsyn return readHandle.readLastAddConfirmedAndEntryAsync(entryId, timeOutInMillis, parallel); } + private static LedgerMetadata createMetadata(DigestType digest, byte[] passwd) { + List ensemble = Lists.newArrayList(new BookieSocketAddress("192.0.2.1", 1234).toBookieId(), + new BookieSocketAddress("192.0.2.2", 1234).toBookieId(), + new BookieSocketAddress("192.0.2.3", 1234).toBookieId()); + return LedgerMetadataBuilder.create() + .withId(124L).withDigestType(digest.toApiDigestType()) + .withPassword(passwd) + .newEnsembleEntry(0L, ensemble) + .build(); + } + private static final Logger log = LoggerFactory.getLogger(MockLedgerHandle.class); } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockReadHandle.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockReadHandle.java index e3e3ffedad9..fac6192a537 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockReadHandle.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/MockReadHandle.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -56,7 +56,7 @@ public CompletableFuture readAsync(long firstEntry, long lastEntr return promise; } - bk.executor.execute(() -> { + bk.orderedExecutor.chooseThread().execute(() -> { if (bk.getProgrammedFailStatus()) { promise.completeExceptionally(BKException.create(bk.failReturnCode)); return; @@ -65,13 +65,17 @@ public CompletableFuture readAsync(long firstEntry, long lastEntr return; } - log.debug("readEntries: first={} last={} total={}", firstEntry, lastEntry, entries.size()); + if (log.isDebugEnabled()) { + log.debug("readEntries: first={} last={} total={}", firstEntry, lastEntry, entries.size()); + } List seq = new ArrayList<>(); long entryId = firstEntry; while (entryId <= lastEntry && entryId < entries.size()) { seq.add(entries.get((int) entryId++).duplicate()); } - log.debug("Entries read: {}", seq); + if (log.isDebugEnabled()) { + log.debug("Entries read: {}", seq); + } promise.complete(LedgerEntriesImpl.create(seq)); }); return promise; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ParallelLedgerRecoveryTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ParallelLedgerRecoveryTest.java index 8b5f5a60321..78dd64f0b49 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ParallelLedgerRecoveryTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ParallelLedgerRecoveryTest.java @@ -20,15 +20,16 @@ */ package org.apache.bookkeeper.client; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; - +import io.netty.util.ReferenceCounted; import java.io.IOException; import java.util.Enumeration; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -37,11 +38,11 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; - -import org.apache.bookkeeper.bookie.Bookie; import org.apache.bookkeeper.bookie.BookieException; import org.apache.bookkeeper.bookie.InterleavedLedgerStorage; +import org.apache.bookkeeper.bookie.TestBookieImpl; import org.apache.bookkeeper.client.BookKeeper.DigestType; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.client.api.WriteFlag; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; @@ -53,7 +54,7 @@ import org.apache.bookkeeper.meta.exceptions.MetadataException; import org.apache.bookkeeper.meta.zk.ZKMetadataBookieDriver; import org.apache.bookkeeper.meta.zk.ZKMetadataClientDriver; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookieProtocol; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.LedgerMetadataListener; @@ -61,10 +62,10 @@ import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback; import org.apache.bookkeeper.proto.checksum.DigestManager; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; -import org.apache.bookkeeper.util.ByteBufList; import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; +import org.apache.commons.lang3.mutable.MutableInt; import org.apache.zookeeper.AsyncCallback.VoidCallback; -import org.apache.zookeeper.KeeperException; import org.junit.After; import org.junit.Test; import org.slf4j.Logger; @@ -93,30 +94,32 @@ void setLatch(CountDownLatch waitLatch) { } @Override - public void createLedgerMetadata(long ledgerId, LedgerMetadata metadata, GenericCallback cb) { - lm.createLedgerMetadata(ledgerId, metadata, cb); + public CompletableFuture> createLedgerMetadata( + long ledgerId, LedgerMetadata metadata) { + return lm.createLedgerMetadata(ledgerId, metadata); } @Override - public void removeLedgerMetadata(long ledgerId, Version version, GenericCallback cb) { - lm.removeLedgerMetadata(ledgerId, version, cb); + public CompletableFuture removeLedgerMetadata(long ledgerId, Version version) { + return lm.removeLedgerMetadata(ledgerId, version); } @Override - public void readLedgerMetadata(long ledgerId, GenericCallback readCb) { - lm.readLedgerMetadata(ledgerId, readCb); + public CompletableFuture> readLedgerMetadata(long ledgerId) { + return lm.readLedgerMetadata(ledgerId); } @Override - public LedgerRangeIterator getLedgerRanges() { - return lm.getLedgerRanges(); + public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) { + return lm.getLedgerRanges(zkOpTimeoutMs); } @Override - public void writeLedgerMetadata(final long ledgerId, final LedgerMetadata metadata, - final GenericCallback cb) { + public CompletableFuture> writeLedgerMetadata(long ledgerId, LedgerMetadata metadata, + Version currentVersion) { final CountDownLatch cdl = waitLatch; if (null != cdl) { + CompletableFuture> promise = new CompletableFuture<>(); executorService.submit(new Runnable() { @Override public void run() { @@ -126,11 +129,19 @@ public void run() { Thread.currentThread().interrupt(); LOG.error("Interrupted on waiting latch : ", e); } - lm.writeLedgerMetadata(ledgerId, metadata, cb); + lm.writeLedgerMetadata(ledgerId, metadata, currentVersion) + .whenComplete((metadata, exception) -> { + if (exception != null) { + promise.completeExceptionally(exception); + } else { + promise.complete(metadata); + } + }); } }); + return promise; } else { - lm.writeLedgerMetadata(ledgerId, metadata, cb); + return lm.writeLedgerMetadata(ledgerId, metadata, currentVersion); } } @@ -327,7 +338,7 @@ public void addComplete(int rc, LedgerHandle lh, long entryId, Object ctx) { LOG.info("Added {} entries to ledger {}.", numEntries, lh.getId()); - long ledgerLenth = lh.getLength(); + long ledgerLength = lh.getLength(); LedgerHandle recoverLh = newBk.openLedgerNoRecovery(lh.getId(), digestType, "".getBytes()); assertEquals(BookieProtocol.INVALID_ENTRY_ID, recoverLh.getLastAddPushed()); @@ -342,7 +353,7 @@ public void addComplete(int rc, LedgerHandle lh, long entryId, Object ctx) { final CountDownLatch recoverLatch = new CountDownLatch(1); final AtomicBoolean success = new AtomicBoolean(false); - recoverLh.recover(new GenericCallback() { + ((ReadOnlyLedgerHandle) recoverLh).recover(new GenericCallback() { @Override public void operationComplete(int rc, Void result) { LOG.info("Recovering ledger {} completed : {}.", lh.getId(), rc); @@ -364,19 +375,11 @@ public void operationComplete(int rc, Void result) { LedgerHandle newRecoverLh = newBk.openLedgerNoRecovery(lh.getId(), digestType, "".getBytes()); assertEquals(BookieProtocol.INVALID_ENTRY_ID, newRecoverLh.getLastAddPushed()); assertEquals(BookieProtocol.INVALID_ENTRY_ID, newRecoverLh.getLastAddConfirmed()); + // mark the ledger as in recovery to update version. - newRecoverLh.getLedgerMetadata().markLedgerInRecovery(); - final CountDownLatch updateLatch = new CountDownLatch(1); - final AtomicInteger updateResult = new AtomicInteger(0x12345); - newRecoverLh.writeLedgerConfig(new GenericCallback() { - @Override - public void operationComplete(int rc, LedgerMetadata result) { - updateResult.set(rc); - updateLatch.countDown(); - } - }); - updateLatch.await(); - assertEquals(BKException.Code.OK, updateResult.get()); + ClientUtil.transformMetadata(newBk.getClientCtx(), newRecoverLh.getId(), + (metadata) -> LedgerMetadataBuilder.from(metadata).withInRecoveryState().build()); + newRecoverLh.close(); LOG.info("Updated ledger manager {}.", newRecoverLh.getLedgerMetadata()); } @@ -392,7 +395,7 @@ public void operationComplete(int rc, LedgerMetadata result) { assertTrue(success.get()); assertEquals(numEntries - 1, recoverLh.getLastAddPushed()); assertEquals(numEntries - 1, recoverLh.getLastAddConfirmed()); - assertEquals(ledgerLenth, recoverLh.getLength()); + assertEquals(ledgerLength, recoverLh.getLength()); assertTrue(recoverLh.getLedgerMetadata().isClosed()); Enumeration enumeration = recoverLh.readEntries(0, numEntries - 1); @@ -422,9 +425,10 @@ public void testRecoveryOnEntryGap() throws Exception { long entryId = 14; long lac = 8; byte[] data = "recovery-on-entry-gap-gap".getBytes(UTF_8); - ByteBufList toSend = + ReferenceCounted toSend = lh.macManager.computeDigestAndPackageForSending( - entryId, lac, lh.getLength() + 100, Unpooled.wrappedBuffer(data, 0, data.length)); + entryId, lac, lh.getLength() + 100, Unpooled.wrappedBuffer(data, 0, data.length), + new byte[20], 0); final CountDownLatch addLatch = new CountDownLatch(1); final AtomicBoolean addSuccess = new AtomicBoolean(false); LOG.info("Add entry {} with lac = {}", entryId, lac); @@ -434,7 +438,7 @@ public void testRecoveryOnEntryGap() throws Exception { new WriteCallback() { @Override public void writeComplete(int rc, long ledgerId, long entryId, - BookieSocketAddress addr, Object ctx) { + BookieId addr, Object ctx) { addSuccess.set(BKException.Code.OK == rc); addLatch.countDown(); } @@ -459,7 +463,7 @@ public void writeComplete(int rc, long ledgerId, long entryId, final AtomicBoolean isMetadataClosed = new AtomicBoolean(false); final AtomicInteger numSuccessCalls = new AtomicInteger(0); final AtomicInteger numFailureCalls = new AtomicInteger(0); - recoverLh.recover(new GenericCallback() { + ((ReadOnlyLedgerHandle) recoverLh).recover(new GenericCallback() { @Override public void operationComplete(int rc, Void result) { if (BKException.Code.OK == rc) { @@ -480,7 +484,7 @@ public void operationComplete(int rc, Void result) { assertEquals("recovery callback should be triggered only once", 0, numFailureCalls.get()); } - static class DelayResponseBookie extends Bookie { + static class DelayResponseBookie extends TestBookieImpl { static final class WriteCallbackEntry { @@ -488,12 +492,12 @@ static final class WriteCallbackEntry { private final int rc; private final long ledgerId; private final long entryId; - private final BookieSocketAddress addr; + private final BookieId addr; private final Object ctx; WriteCallbackEntry(WriteCallback cb, int rc, long ledgerId, long entryId, - BookieSocketAddress addr, Object ctx) { + BookieId addr, Object ctx) { this.cb = cb; this.rc = rc; this.ledgerId = ledgerId; @@ -515,17 +519,17 @@ public void callback() { new LinkedBlockingQueue(); public DelayResponseBookie(ServerConfiguration conf) - throws IOException, KeeperException, InterruptedException, BookieException { + throws Exception { super(conf); } @Override - public void addEntry(ByteBuf entry, boolean ackBeforeSync, final WriteCallback cb, - Object ctx, byte[] masterKey) throws IOException, BookieException { + public void addEntry(ByteBuf entry, boolean ackBeforeSync, final WriteCallback cb, Object ctx, byte[] masterKey) + throws IOException, BookieException, InterruptedException { super.addEntry(entry, ackBeforeSync, new WriteCallback() { @Override public void writeComplete(int rc, long ledgerId, long entryId, - BookieSocketAddress addr, Object ctx) { + BookieId addr, Object ctx) { if (delayAddResponse.get()) { delayQueue.add(new WriteCallbackEntry(cb, rc, ledgerId, entryId, addr, ctx)); } else { @@ -536,7 +540,7 @@ public void writeComplete(int rc, long ledgerId, long entryId, } @Override - public ByteBuf readEntry(long ledgerId, long entryId) throws IOException, NoLedgerException { + public ByteBuf readEntry(long ledgerId, long entryId) throws IOException, NoLedgerException, BookieException { LOG.info("ReadEntry {} - {}", ledgerId, entryId); if (delayReadResponse.get() && delayReadOnEntry.get() == entryId) { CountDownLatch latch = delayReadLatch; @@ -588,12 +592,11 @@ public void testRecoveryWhenClosingLedgerHandle() throws Exception { LOG.info("Create ledger {}", lh0.getId()); // 0) place the bookie with a fake bookie - BookieSocketAddress address = lh0.getCurrentEnsemble().get(0); + BookieId address = lh0.getCurrentEnsemble().get(0); ServerConfiguration conf = killBookie(address); conf.setLedgerStorageClass(InterleavedLedgerStorage.class.getName()); DelayResponseBookie fakeBookie = new DelayResponseBookie(conf); - bs.add(startBookie(conf, fakeBookie)); - bsConfs.add(conf); + startAndAddBookie(conf, fakeBookie); // 1) bk0 write two entries lh0.addEntry("entry-0".getBytes(UTF_8)); @@ -639,7 +642,7 @@ public void addComplete(int rc, LedgerHandle lh, long entryId, Object ctx) { tlm1.setLatch(metadataLatch); final CountDownLatch recoverLatch = new CountDownLatch(1); final AtomicBoolean recoverSuccess = new AtomicBoolean(false); - lh1.recover(new GenericCallback() { + ((ReadOnlyLedgerHandle) lh1).recover(new GenericCallback() { @Override public void operationComplete(int rc, Void result) { LOG.info("Recovering ledger {} completed : {}", lh1.getId(), rc); @@ -669,7 +672,12 @@ public void operationComplete(int rc, Void result) { final AtomicInteger rcHolder = new AtomicInteger(-1234); final CountDownLatch doneLatch = new CountDownLatch(1); - new ReadLastConfirmedOp(readLh, bkc.getBookieClient(), readLh.getCurrentEnsemble(), + new ReadLastConfirmedOp(bkc.getBookieClient(), + readLh.distributionSchedule, + readLh.macManager, + readLh.ledgerId, + readLh.getLedgerMetadata().getAllEnsembles().lastEntry().getValue(), + readLh.ledgerKey, new ReadLastConfirmedOp.LastConfirmedDataCallback() { @Override public void readLastConfirmedDataComplete(int rc, DigestManager.RecoveryData data) { @@ -687,4 +695,98 @@ public void readLastConfirmedDataComplete(int rc, DigestManager.RecoveryData dat readBk.close(); } + /** + * Validate ledger can recover with response: (Qw - Qa)+1. + * @throws Exception + */ + @Test + public void testRecoveryWithUnavailableBookie() throws Exception { + + byte[] passwd = "".getBytes(UTF_8); + ClientConfiguration newConf = new ClientConfiguration(); + newConf.addConfiguration(baseClientConf); + final BookKeeper readBk = new BookKeeper(newConf); + final BookKeeper newBk0 = new BookKeeper(newConf); + + /** + * Test Group-1 : Expected Response for recovery: Qr = (Qw - Qa)+1 = (3 + * -2) + 1 = 2 + */ + int ensembleSize = 3; + int writeQuorumSize = 3; + int ackQuormSize = 2; + LedgerHandle lh0 = newBk0.createLedger(ensembleSize, writeQuorumSize, ackQuormSize, DigestType.DUMMY, passwd); + LedgerHandle readLh = readBk.openLedgerNoRecovery(lh0.getId(), DigestType.DUMMY, passwd); + // Test 1: bookie response: OK, NO_SUCH_LEDGER_EXISTS, NOT_AVAILABLE + // Expected: Recovery successful Q(response) = 2 + int responseCode = readLACFromQuorum(readLh, BKException.Code.BookieHandleNotAvailableException, + BKException.Code.OK, BKException.Code.NoSuchLedgerExistsException); + assertEquals(responseCode, BKException.Code.OK); + // Test 2: bookie response: OK, NOT_AVAILABLE, NOT_AVAILABLE + // Expected: Recovery fail Q(response) = 1 + responseCode = readLACFromQuorum(readLh, BKException.Code.BookieHandleNotAvailableException, + BKException.Code.OK, BKException.Code.BookieHandleNotAvailableException); + assertEquals(responseCode, BKException.Code.BookieHandleNotAvailableException); + + /** + * Test Group-2 : Expected Response for recovery: Qr = (Qw - Qa)+1 = (2 + * -2) + 1 = 1 + */ + ensembleSize = 2; + writeQuorumSize = 2; + ackQuormSize = 2; + lh0 = newBk0.createLedger(ensembleSize, writeQuorumSize, ackQuormSize, DigestType.DUMMY, passwd); + readLh = readBk.openLedgerNoRecovery(lh0.getId(), DigestType.DUMMY, passwd); + // Test 1: bookie response: OK, NOT_AVAILABLE + // Expected: Recovery successful Q(response) = 1 + responseCode = readLACFromQuorum(readLh, BKException.Code.BookieHandleNotAvailableException, + BKException.Code.OK); + assertEquals(responseCode, BKException.Code.OK); + + // Test 1: bookie response: OK, NO_SUCH_LEDGER_EXISTS + // Expected: Recovery successful Q(response) = 2 + responseCode = readLACFromQuorum(readLh, BKException.Code.NoSuchLedgerExistsException, BKException.Code.OK); + assertEquals(responseCode, BKException.Code.OK); + + // Test 3: bookie response: NOT_AVAILABLE, NOT_AVAILABLE + // Expected: Recovery fail Q(response) = 0 + responseCode = readLACFromQuorum(readLh, BKException.Code.BookieHandleNotAvailableException, + BKException.Code.BookieHandleNotAvailableException); + assertEquals(responseCode, BKException.Code.BookieHandleNotAvailableException); + + newBk0.close(); + readBk.close(); + } + + private int readLACFromQuorum(LedgerHandle ledger, int... bookieLACResponse) throws Exception { + MutableInt responseCode = new MutableInt(100); + CountDownLatch responseLatch = new CountDownLatch(1); + ReadLastConfirmedOp readLCOp = new ReadLastConfirmedOp( + bkc.getBookieClient(), + ledger.getDistributionSchedule(), + ledger.getDigestManager(), + ledger.getId(), + ledger.getLedgerMetadata().getAllEnsembles().lastEntry().getValue(), + ledger.getLedgerKey(), + new ReadLastConfirmedOp.LastConfirmedDataCallback() { + @Override + public void readLastConfirmedDataComplete(int rc, DigestManager.RecoveryData data) { + System.out.println("response = " + rc); + responseCode.setValue(rc); + responseLatch.countDown(); + } + }); + byte[] lac = new byte[Long.SIZE * 3]; + ByteBuf data = Unpooled.wrappedBuffer(lac, 0, lac.length); + int writerIndex = data.writerIndex(); + data.resetWriterIndex(); + data.writeLong(ledger.getId()); + data.writeLong(0L); + data.writerIndex(writerIndex); + for (int i = 0; i < bookieLACResponse.length; i++) { + readLCOp.readEntryComplete(bookieLACResponse[i], 0, 0, data, i); + } + responseLatch.await(); + return responseCode.intValue(); + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/PendingAddOpTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/PendingAddOpTest.java index 51d296c9005..5fb318c51fe 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/PendingAddOpTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/PendingAddOpTest.java @@ -82,7 +82,7 @@ public void testExecuteAfterCancelled() { assertSame(lh, op.lh); assertEquals(Code.NotEnoughBookiesException, rcHolder.get()); - op.run(); + op.initiate(); // after the op is run, the object is recycled. assertNull(op.lh); } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ReadLastConfirmedAndEntryOpTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ReadLastConfirmedAndEntryOpTest.java index 3d9c3943b3f..760f2490182 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ReadLastConfirmedAndEntryOpTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ReadLastConfirmedAndEntryOpTest.java @@ -32,6 +32,8 @@ import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; +import io.netty.buffer.UnpooledByteBufAllocator; +import io.netty.util.ReferenceCounted; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -47,11 +49,13 @@ import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.client.ReadLastConfirmedAndEntryOp.LastConfirmedAndEntryCallback; import org.apache.bookkeeper.client.api.LastConfirmedAndEntry; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.client.impl.LastConfirmedAndEntryImpl; import org.apache.bookkeeper.client.impl.LedgerEntryImpl; import org.apache.bookkeeper.common.concurrent.FutureUtils; import org.apache.bookkeeper.common.util.OrderedScheduler; import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.proto.BookieClient; import org.apache.bookkeeper.proto.BookieProtocol; @@ -81,7 +85,7 @@ public class ReadLastConfirmedAndEntryOpTest { private ScheduledExecutorService scheduler; private OrderedScheduler orderedScheduler; private ClientInternalConf internalConf; - private EnsemblePlacementPolicy mockPlacementPolicy; + private EnsemblePlacementPolicy placementPolicy; private LedgerMetadata ledgerMetadata; private DistributionSchedule distributionSchedule; private DigestManager digestManager; @@ -99,13 +103,15 @@ public void setup() throws Exception { internalConf = ClientInternalConf.fromConfig(conf); // metadata - this.ledgerMetadata = - new LedgerMetadata(3, 3, 2, DigestType.CRC32, new byte[0]); - ArrayList ensemble = new ArrayList<>(3); + ArrayList ensemble = new ArrayList<>(3); for (int i = 0; i < 3; i++) { - ensemble.add(new BookieSocketAddress("127.0.0.1", 3181 + i)); + ensemble.add(new BookieSocketAddress("127.0.0.1", 3181 + i).toBookieId()); } - this.ledgerMetadata.addEnsemble(0L, ensemble); + this.ledgerMetadata = LedgerMetadataBuilder.create() + .withId(124L).withEnsembleSize(3).withWriteQuorumSize(2).withAckQuorumSize(2) + .withPassword(new byte[0]) + .withDigestType(DigestType.CRC32.toApiDigestType()) + .newEnsembleEntry(0L, ensemble).build(); this.distributionSchedule = new RoundRobinDistributionSchedule(3, 2, 3); // schedulers this.scheduler = Executors.newSingleThreadScheduledExecutor(); @@ -115,10 +121,11 @@ public void setup() throws Exception { .build(); this.mockBookieClient = mock(BookieClient.class); - this.mockPlacementPolicy = mock(EnsemblePlacementPolicy.class); + //this.mockPlacementPolicy = mock(EnsemblePlacementPolicy.class); + this.placementPolicy = new DefaultEnsemblePlacementPolicy(); this.mockClientCtx = mock(ClientContext.class); when(mockClientCtx.getBookieClient()).thenReturn(mockBookieClient); - when(mockClientCtx.getPlacementPolicy()).thenReturn(mockPlacementPolicy); + when(mockClientCtx.getPlacementPolicy()).thenReturn(placementPolicy); when(mockClientCtx.getConf()).thenReturn(internalConf); when(mockClientCtx.getScheduler()).thenReturn(orderedScheduler); when(mockClientCtx.getMainWorkerPool()).thenReturn(orderedScheduler); @@ -129,7 +136,7 @@ public void setup() throws Exception { when(mockLh.getCurrentEnsemble()).thenReturn(ensemble); when(mockLh.getLedgerMetadata()).thenReturn(ledgerMetadata); when(mockLh.getDistributionSchedule()).thenReturn(distributionSchedule); - digestManager = new DummyDigestManager(LEDGERID, false); + digestManager = new DummyDigestManager(LEDGERID, false, UnpooledByteBufAllocator.DEFAULT); when(mockLh.getDigestManager()).thenReturn(digestManager); } @@ -142,7 +149,7 @@ public void teardown() { @Data static class ReadLastConfirmedAndEntryHolder { - private final BookieSocketAddress address; + private final BookieId address; private final ReadEntryCallback callback; private final ReadLastConfirmedAndEntryContext context; @@ -161,15 +168,20 @@ public void testSpeculativeResponses() throws Exception { final long lac = 1L; ByteBuf data = Unpooled.copiedBuffer("test-speculative-responses", UTF_8); - ByteBufList dataWithDigest = digestManager.computeDigestAndPackageForSending( - entryId, lac, data.readableBytes(), data); - byte[] bytesWithDigest = new byte[dataWithDigest.readableBytes()]; - assertEquals(bytesWithDigest.length, dataWithDigest.getBytes(bytesWithDigest)); + ReferenceCounted refCnt = digestManager.computeDigestAndPackageForSending( + entryId, lac, data.readableBytes(), data, new byte[20], 0); + + byte[] bytesWithDigest = null; + if (refCnt instanceof ByteBufList) { + ByteBufList dataWithDigest = (ByteBufList) refCnt; + bytesWithDigest = new byte[dataWithDigest.readableBytes()]; + assertEquals(bytesWithDigest.length, dataWithDigest.getBytes(bytesWithDigest)); + } - final Map callbacks = + final Map callbacks = Collections.synchronizedMap(new HashMap<>()); doAnswer(invocationOnMock -> { - BookieSocketAddress address = invocationOnMock.getArgument(0); + BookieId address = invocationOnMock.getArgument(0); ReadEntryCallback callback = invocationOnMock.getArgument(6); ReadLastConfirmedAndEntryContext context = invocationOnMock.getArgument(7); @@ -179,8 +191,7 @@ public void testSpeculativeResponses() throws Exception { callbacks.put(address, holder); return null; - }).when(mockBookieClient).readEntryWaitForLACUpdate( - any(BookieSocketAddress.class), + }).when(mockBookieClient).readEntryWaitForLACUpdate(any(BookieId.class), anyLong(), anyLong(), anyLong(), @@ -217,9 +228,9 @@ public void testSpeculativeResponses() throws Exception { // 2) complete second bookie with valid entry response. this will trigger double-release bug described in // {@link https://github.com/apache/bookkeeper/issues/1476} - Iterator> iter = callbacks.entrySet().iterator(); + Iterator> iter = callbacks.entrySet().iterator(); assertTrue(iter.hasNext()); - Entry firstBookieEntry = iter.next(); + Entry firstBookieEntry = iter.next(); ReadLastConfirmedAndEntryHolder firstBookieHolder = firstBookieEntry.getValue(); ReadLastConfirmedAndEntryContext firstContext = firstBookieHolder.context; firstContext.setLastAddConfirmed(entryId); @@ -231,7 +242,7 @@ public void testSpeculativeResponses() throws Exception { LedgerEntryImpl entry = LedgerEntryImpl.create(LEDGERID, Long.MAX_VALUE); assertTrue(iter.hasNext()); - Entry secondBookieEntry = iter.next(); + Entry secondBookieEntry = iter.next(); ReadLastConfirmedAndEntryHolder secondBookieHolder = secondBookieEntry.getValue(); ReadLastConfirmedAndEntryContext secondContext = secondBookieHolder.context; secondContext.setLastAddConfirmed(entryId); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ReadLastConfirmedOpTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ReadLastConfirmedOpTest.java new file mode 100644 index 00000000000..954bdccca83 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ReadLastConfirmedOpTest.java @@ -0,0 +1,110 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.client; + +import com.google.common.collect.Lists; +import io.netty.buffer.UnpooledByteBufAllocator; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import org.apache.bookkeeper.common.util.OrderedExecutor; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.proto.DataFormats.LedgerMetadataFormat.DigestType; +import org.apache.bookkeeper.proto.MockBookieClient; +import org.apache.bookkeeper.proto.checksum.DigestManager; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Tests about ReadLastConfirmedOp. + */ +public class ReadLastConfirmedOpTest { + private static final Logger log = LoggerFactory.getLogger(ReadLastConfirmedOpTest.class); + private final BookieId bookie1 = new BookieSocketAddress("bookie1", 3181).toBookieId(); + private final BookieId bookie2 = new BookieSocketAddress("bookie2", 3181).toBookieId(); + + OrderedExecutor executor = null; + + @Before + public void setup() throws Exception { + executor = OrderedExecutor.newBuilder() + .name("BookKeeperClientWorker") + .numThreads(1) + .build(); + } + + @After + public void teardown() throws Exception { + if (executor != null) { + executor.shutdown(); + } + } + + /** + * Test for specific bug that was introduced with dcdd1e88. + */ + @Test + public void testBookieFailsAfterLedgerMissingOnFirst() throws Exception { + long ledgerId = 0xf00b; + List ensemble = Lists.newArrayList(bookie1, bookie2); + byte[] ledgerKey = new byte[0]; + + MockBookieClient bookieClient = new MockBookieClient(executor); + DistributionSchedule schedule = new RoundRobinDistributionSchedule(2, 2, 2); + DigestManager digestManager = DigestManager.instantiate(ledgerId, ledgerKey, + DigestType.CRC32C, + UnpooledByteBufAllocator.DEFAULT, + true /* useV2 */); + + CompletableFuture blocker = new CompletableFuture<>(); + bookieClient.setPreReadHook((bookie, lId, entryId) -> { + if (bookie.equals(bookie1)) { + return CompletableFuture.completedFuture(null); + } else { + return blocker; + } + }); + CompletableFuture promise = new CompletableFuture<>(); + ReadLastConfirmedOp op = new ReadLastConfirmedOp( + bookieClient, schedule, + digestManager, ledgerId, ensemble, + ledgerKey, + (rc, data) -> { + if (rc != BKException.Code.OK) { + promise.completeExceptionally( + BKException.create(rc)); + } else { + promise.complete(data); + } + }); + op.initiateWithFencing(); + + while (op.getNumResponsesPending() > 1) { + Thread.sleep(100); + } + blocker.completeExceptionally( + new BKException.BKBookieHandleNotAvailableException()); + promise.get(); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/RoundRobinDistributionScheduleTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/RoundRobinDistributionScheduleTest.java index b78f1adf090..a9f44989363 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/RoundRobinDistributionScheduleTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/RoundRobinDistributionScheduleTest.java @@ -27,10 +27,9 @@ import static org.junit.Assert.assertTrue; import com.google.common.collect.Sets; - +import java.util.BitSet; import java.util.HashSet; import java.util.Set; - import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -82,11 +81,7 @@ public void testCoverageSets() { boolean[] buildAvailable(int ensemble, Set responses) { boolean[] available = new boolean[ensemble]; for (int i = 0; i < ensemble; i++) { - if (responses.contains(i)) { - available[i] = false; - } else { - available[i] = true; - } + available[i] = !responses.contains(i); } return available; } @@ -161,4 +156,49 @@ public void testMoveAndShift() { w.moveAndShift(4, 4); assertEquals(w, writeSetFromValues(1, 2, 3, 4, 5)); } + + @Test + public void testGetEntriesStripedToTheBookie() { + + RoundRobinDistributionSchedule schedule; + BitSet entriesStriped; + + int ensSize = 3; + int writeQuorum = 3; + int ackQuorum = 3; + int startEntryId = 3; + int lastEntryId = 5; + schedule = new RoundRobinDistributionSchedule(writeQuorum, ackQuorum, ensSize); + + for (int bookieIndex = 0; bookieIndex < ensSize; bookieIndex++) { + entriesStriped = schedule.getEntriesStripedToTheBookie(bookieIndex, startEntryId, lastEntryId); + assertEquals("Cardinality", 3, entriesStriped.cardinality()); + for (int i = 0; i < entriesStriped.length(); i++) { + assertEquals("EntryAvailability", schedule.hasEntry((startEntryId + i), bookieIndex), + entriesStriped.get(i)); + } + } + + ensSize = 5; + writeQuorum = 3; + ackQuorum = 2; + startEntryId = 100; + lastEntryId = 122; + schedule = new RoundRobinDistributionSchedule(writeQuorum, ackQuorum, ensSize); + for (int bookieIndex = 0; bookieIndex < ensSize; bookieIndex++) { + entriesStriped = schedule.getEntriesStripedToTheBookie(bookieIndex, startEntryId, lastEntryId); + for (int i = 0; i < entriesStriped.length(); i++) { + assertEquals("EntryAvailability", schedule.hasEntry((startEntryId + i), bookieIndex), + entriesStriped.get(i)); + } + } + + schedule = new RoundRobinDistributionSchedule(2, 2, 3); + entriesStriped = schedule.getEntriesStripedToTheBookie(2, 0, 0); + assertEquals("Cardinality", 0, entriesStriped.cardinality()); + entriesStriped = schedule.getEntriesStripedToTheBookie(2, 3, 3); + assertEquals("Cardinality", 0, entriesStriped.cardinality()); + entriesStriped = schedule.getEntriesStripedToTheBookie(2, 4, 4); + assertEquals("Cardinality", 1, entriesStriped.cardinality()); + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/SlowBookieTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/SlowBookieTest.java index a70ec32de7c..d77e7a1d7d3 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/SlowBookieTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/SlowBookieTest.java @@ -28,15 +28,16 @@ import java.util.List; import java.util.Set; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; - import org.apache.bookkeeper.conf.ClientConfiguration; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookieClientImpl; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.awaitility.Awaitility; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -73,7 +74,7 @@ public void testSlowBookie() throws Exception { final CountDownLatch b0latch = new CountDownLatch(1); final CountDownLatch b1latch = new CountDownLatch(1); final CountDownLatch addEntrylatch = new CountDownLatch(1); - List curEns = lh.getCurrentEnsemble(); + List curEns = lh.getCurrentEnsemble(); try { sleepBookie(curEns.get(0), b0latch); for (int i = 0; i < 10; i++) { @@ -90,8 +91,8 @@ public void addComplete(int rc, LedgerHandle lh, long entryId, Object ctx) { }; lh.asyncAddEntry(entry, cb, null); - Thread.sleep(3000); // sleep 3 seconds to allow time to complete - assertEquals("Successfully added entry!", 0xdeadbeef, i.get()); + Awaitility.await().untilAsserted(() -> + assertEquals("Successfully added entry!", 0xdeadbeef, i.get())); b0latch.countDown(); b1latch.countDown(); addEntrylatch.await(4000, TimeUnit.MILLISECONDS); @@ -149,7 +150,9 @@ public void run() { final AtomicInteger numFragments = new AtomicInteger(-1); lc.checkLedger(lh2, new GenericCallback>() { public void operationComplete(int rc, Set badFragments) { - LOG.debug("Checked ledgers returned {} {}", rc, badFragments); + if (LOG.isDebugEnabled()) { + LOG.debug("Checked ledgers returned {} {}", rc, badFragments); + } if (rc == BKException.Code.OK) { numFragments.set(badFragments.size()); } @@ -175,8 +178,9 @@ public void testSlowBookieAndBackpressureOn() throws Exception { final boolean expectWriteError = false; final boolean expectFailedTest = false; - LedgerHandle lh = doBackpressureTest(entry, conf, expectWriteError, expectFailedTest, 2000); - assertTrue(lh.readLastConfirmed() < 5); + try (LedgerHandle lh = doBackPressureTest(entry, conf, expectWriteError, expectFailedTest, 2000)) { + assertTrue(lh.readLastConfirmed() < 5); + } } @Test @@ -194,8 +198,9 @@ public void testSlowBookieAndFastFailOn() throws Exception { final boolean expectWriteError = true; final boolean expectFailedTest = false; - LedgerHandle lh = doBackpressureTest(entry, conf, expectWriteError, expectFailedTest, 1000); - assertTrue(lh.readLastConfirmed() < 5); + try (LedgerHandle lh = doBackPressureTest(entry, conf, expectWriteError, expectFailedTest, 1000)) { + assertTrue(lh.readLastConfirmed() < 5); + } } @Test @@ -213,14 +218,14 @@ public void testSlowBookieAndNoBackpressure() throws Exception { final boolean expectWriteError = false; final boolean expectFailedTest = false; - LedgerHandle lh = doBackpressureTest(entry, conf, expectWriteError, expectFailedTest, 4000); - - assertTrue(lh.readLastConfirmed() > 90); + try (LedgerHandle lh = doBackPressureTest(entry, conf, expectWriteError, expectFailedTest, 4000)) { + assertTrue(lh.readLastConfirmed() > 90); + } } - private LedgerHandle doBackpressureTest(byte[] entry, ClientConfiguration conf, - boolean expectWriteError, boolean expectFailedTest, - long sleepInMillis) throws Exception { + private LedgerHandle doBackPressureTest(byte[] entry, ClientConfiguration conf, + boolean expectWriteError, boolean expectFailedTest, + long sleepInMillis) throws Exception { BookKeeper bkc = new BookKeeper(conf); byte[] pwd = new byte[] {}; @@ -279,29 +284,60 @@ private LedgerHandle doBackpressureTest(byte[] entry, ClientConfiguration conf, LedgerHandle lh2 = bkc.openLedger(lh.getId(), BookKeeper.DigestType.CRC32, pwd); LedgerChecker lc = new LedgerChecker(bkc); - final CountDownLatch checklatch = new CountDownLatch(1); + final CountDownLatch checkLatch = new CountDownLatch(1); final AtomicInteger numFragments = new AtomicInteger(-1); lc.checkLedger(lh2, (rc, fragments) -> { - LOG.debug("Checked ledgers returned {} {}", rc, fragments); + if (LOG.isDebugEnabled()) { + LOG.debug("Checked ledgers returned {} {}", rc, fragments); + } if (rc == BKException.Code.OK) { numFragments.set(fragments.size()); LOG.error("Checked ledgers returned {} {}", rc, fragments); } - checklatch.countDown(); + checkLatch.countDown(); }); - checklatch.await(); + checkLatch.await(); assertEquals("There should be no missing fragments", 0, numFragments.get()); return lh2; } - private void setTargetChannelState(BookKeeper bkc, BookieSocketAddress address, - long key, boolean state) throws Exception { + private void setTargetChannelState(BookKeeper bkc, BookieId address, + long key, boolean writable) throws Exception { ((BookieClientImpl) bkc.getBookieClient()).lookupClient(address).obtain((rc, pcbc) -> { - pcbc.setWritable(state); + pcbc.setWritable(writable); }, key); } + @Test + public void testWriteSetWriteableCheck() throws Exception { + final ClientConfiguration conf = new ClientConfiguration(); + conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + BookKeeper bkc = new BookKeeper(conf); + + byte[] pwd = new byte[]{}; + try (LedgerHandle lh = bkc.createLedger(4, 2, 2, BookKeeper.DigestType.CRC32, pwd)) { + lh.addEntry(entry); // [b0, b1] + long entryId = lh.addEntry(entry); // [b1, b2] + + long nextEntryId = entryId + 1; + RoundRobinDistributionSchedule schedule = new RoundRobinDistributionSchedule(2, 2, 4); + DistributionSchedule.WriteSet writeSet = schedule.getWriteSet(nextEntryId); + + // b2 or b3 is no more writeable + int slowBookieIndex = writeSet.get(ThreadLocalRandom.current().nextInt(writeSet.size())); + List curEns = lh.getCurrentEnsemble(); + + // Trigger connection to the bookie service first + bkc.getBookieInfo().get(curEns.get(slowBookieIndex)); + // then mock channel is not writable + setTargetChannelState(bkc, curEns.get(slowBookieIndex), 0, false); + + boolean isWriteable = lh.waitForWritable(writeSet, 0, 1000); + assertFalse("We should check b2,b3 both are not writeable", isWriteable); + } + } + @Test public void testManyBookieFailureWithSlowBookies() throws Exception { ClientConfiguration conf = new ClientConfiguration(); @@ -311,7 +347,7 @@ public void testManyBookieFailureWithSlowBookies() throws Exception { BookKeeper bkc = new BookKeeper(conf); byte[] pwd = new byte[] {}; - final LedgerHandle lh = bkc.createLedger(4, 3, 1, BookKeeper.DigestType.CRC32, pwd); + final LedgerHandle lh = bkc.createLedger(4, 3, 2, BookKeeper.DigestType.CRC32, pwd); final AtomicBoolean finished = new AtomicBoolean(false); final AtomicBoolean failTest = new AtomicBoolean(false); Thread t = new Thread() { @@ -353,7 +389,9 @@ public void run() { final AtomicInteger numFragments = new AtomicInteger(-1); lc.checkLedger(lh2, new GenericCallback>() { public void operationComplete(int rc, Set fragments) { - LOG.debug("Checked ledgers returned {} {}", rc, fragments); + if (LOG.isDebugEnabled()) { + LOG.debug("Checked ledgers returned {} {}", rc, fragments); + } if (rc == BKException.Code.OK) { numFragments.set(fragments.size()); } @@ -363,4 +401,38 @@ public void operationComplete(int rc, Set fragments) { checklatch.await(); assertEquals("There should be no missing fragments", 0, numFragments.get()); } + + @Test + public void testWaitForWritable() throws Exception { + final ClientConfiguration conf = new ClientConfiguration(); + conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + BookKeeper bkc = new BookKeeper(conf); + + byte[] pwd = new byte[]{}; + try (LedgerHandle lh = bkc.createLedger(1, 1, 1, BookKeeper.DigestType.CRC32, pwd)) { + long entryId = lh.addEntry(this.entry); + + RoundRobinDistributionSchedule schedule = new RoundRobinDistributionSchedule(1, 1, 1); + DistributionSchedule.WriteSet writeSet = schedule.getWriteSet(entryId); + + int slowBookieIndex = writeSet.get(ThreadLocalRandom.current().nextInt(writeSet.size())); + List curEns = lh.getCurrentEnsemble(); + + // disable channel writable + setTargetChannelState(bkc, curEns.get(slowBookieIndex), 0, false); + + AtomicBoolean isWriteable = new AtomicBoolean(false); + final long timeout = 10000; + + // waitForWritable async + new Thread(() -> isWriteable.set(lh.waitForWritable(writeSet, 0, timeout))).start(); + + Awaitility.await().pollDelay(5, TimeUnit.SECONDS).untilAsserted(() -> assertFalse(isWriteable.get())); + + // enable channel writable + setTargetChannelState(bkc, curEns.get(slowBookieIndex), 0, true); + Awaitility.await().untilAsserted(() -> assertTrue(isWriteable.get())); + } + } + } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestAddEntryQuorumTimeout.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestAddEntryQuorumTimeout.java index efbc43d5070..319134d95e4 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestAddEntryQuorumTimeout.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestAddEntryQuorumTimeout.java @@ -25,10 +25,9 @@ import java.util.List; import java.util.concurrent.CountDownLatch; - import org.apache.bookkeeper.client.AsyncCallback.AddCallback; import org.apache.bookkeeper.client.BookKeeper.DigestType; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.junit.Before; import org.junit.Test; @@ -81,7 +80,7 @@ public void addComplete(int rc, LedgerHandle lh, long entryId, Object ctx) { public void testBasicTimeout() throws Exception { BookKeeperTestClient bkc = new BookKeeperTestClient(baseClientConf); LedgerHandle lh = bkc.createLedger(3, 3, 3, digestType, testPasswd); - List curEns = lh.getCurrentEnsemble(); + List curEns = lh.getCurrentEnsemble(); byte[] data = "foobar".getBytes(); lh.addEntry(data); sleepBookie(curEns.get(0), 5).await(); @@ -105,7 +104,7 @@ private void waitForSyncObj(SyncObj syncObj) throws Exception { public void testTimeoutWithPendingOps() throws Exception { BookKeeperTestClient bkc = new BookKeeperTestClient(baseClientConf); LedgerHandle lh = bkc.createLedger(3, 3, 3, digestType, testPasswd); - List curEns = lh.getCurrentEnsemble(); + List curEns = lh.getCurrentEnsemble(); byte[] data = "foobar".getBytes(); SyncObj syncObj1 = new SyncObj(); @@ -130,7 +129,7 @@ public void testTimeoutWithPendingOps() throws Exception { public void testLedgerClosedAfterTimeout() throws Exception { BookKeeperTestClient bkc = new BookKeeperTestClient(baseClientConf); LedgerHandle lh = bkc.createLedger(3, 3, 3, digestType, testPasswd); - List curEns = lh.getCurrentEnsemble(); + List curEns = lh.getCurrentEnsemble(); byte[] data = "foobar".getBytes(); CountDownLatch b0latch = sleepBookie(curEns.get(0), 5); try { diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestBatchedRead.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestBatchedRead.java new file mode 100644 index 00000000000..1bb95ed0478 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestBatchedRead.java @@ -0,0 +1,292 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.client; + +import static org.apache.bookkeeper.common.concurrent.FutureUtils.result; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.util.Iterator; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CountDownLatch; +import org.apache.bookkeeper.client.BKException.Code; +import org.apache.bookkeeper.client.BookKeeper.DigestType; +import org.apache.bookkeeper.client.api.LedgerEntry; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Unit tests for batch reading. + */ +public class TestBatchedRead extends BookKeeperClusterTestCase { + + private static final Logger LOG = LoggerFactory.getLogger(TestBatchedRead.class); + + final DigestType digestType; + final byte[] passwd = "sequence-read".getBytes(); + + public TestBatchedRead() { + super(6); + baseClientConf.setUseV2WireProtocol(true); + this.digestType = DigestType.CRC32; + } + + long getLedgerToRead(int ensemble, int writeQuorum, int ackQuorum, int numEntries) + throws Exception { + LedgerHandle lh = bkc.createLedger(ensemble, writeQuorum, ackQuorum, digestType, passwd); + for (int i = 0; i < numEntries; i++) { + lh.addEntry(("" + i).getBytes()); + } + lh.close(); + return lh.getId(); + } + + BatchedReadOp createReadOp(LedgerHandle lh, long startEntry, int count) { + return new BatchedReadOp(lh, bkc.getClientCtx(), startEntry, count, 1024 * count, false); + } + + BatchedReadOp createRecoveryReadOp(LedgerHandle lh, long startEntry, int count) { + return new BatchedReadOp(lh, bkc.getClientCtx(), startEntry, count, 1024 * count, true); + } + + @Test + public void testNormalRead() throws Exception { + int numEntries = 10; + long id = getLedgerToRead(5, 5, 2, numEntries); + LedgerHandle lh = bkc.openLedger(id, digestType, passwd); + + //read single entry + for (int i = 0; i < numEntries; i++) { + BatchedReadOp readOp = createReadOp(lh, i, 1); + readOp.submit(); + Iterator entries = readOp.future().get().iterator(); + assertTrue(entries.hasNext()); + LedgerEntry entry = entries.next(); + assertNotNull(entry); + assertEquals(i, Integer.parseInt(new String(entry.getEntryBytes()))); + entry.close(); + assertFalse(entries.hasNext()); + } + + // read multiple entries + BatchedReadOp readOp = createReadOp(lh, 0, numEntries); + readOp.submit(); + Iterator iterator = readOp.future().get().iterator(); + + int numReads = 0; + while (iterator.hasNext()) { + LedgerEntry entry = iterator.next(); + assertNotNull(entry); + assertEquals(numReads, Integer.parseInt(new String(entry.getEntryBytes()))); + entry.close(); + ++numReads; + } + assertEquals(numEntries, numReads); + lh.close(); + } + + @Test + public void testReadWhenEnsembleNotEqualWQ() throws Exception { + int numEntries = 10; + long id = getLedgerToRead(5, 2, 2, numEntries); + LedgerHandle lh = bkc.openLedger(id, digestType, passwd); + + //read single entry + for (int i = 0; i < numEntries; i++) { + BatchedReadOp readOp = createReadOp(lh, i, 1); + readOp.submit(); + Iterator entries = readOp.future().get().iterator(); + assertTrue(entries.hasNext()); + LedgerEntry entry = entries.next(); + assertNotNull(entry); + assertEquals(i, Integer.parseInt(new String(entry.getEntryBytes()))); + entry.close(); + assertFalse(entries.hasNext()); + } + + // read multiple entries, because the ensemble is not equals with write quorum, the return entries + // will less than max count. + for (int i = 0; i < numEntries; i++) { + BatchedReadOp readOp = createReadOp(lh, i, numEntries); + readOp.submit(); + Iterator entries = readOp.future().get().iterator(); + assertTrue(entries.hasNext()); + LedgerEntry entry = entries.next(); + assertNotNull(entry); + assertEquals(i, Integer.parseInt(new String(entry.getEntryBytes()))); + entry.close(); + assertFalse(entries.hasNext()); + } + lh.close(); + } + + private static void expectFail(CompletableFuture future, int expectedRc) { + try { + result(future); + fail("Expect to fail"); + } catch (Exception e) { + assertTrue(e instanceof BKException); + BKException bke = (BKException) e; + assertEquals(expectedRc, bke.getCode()); + } + } + + @Test + public void testReadMissingEntries() throws Exception { + int numEntries = 10; + + long id = getLedgerToRead(5, 5, 2, numEntries); + LedgerHandle lh = bkc.openLedger(id, digestType, passwd); + + // read single entry + BatchedReadOp readOp = createReadOp(lh, 10, 1); + readOp.submit(); + expectFail(readOp.future(), Code.NoSuchEntryException); + + // read multiple entries + readOp = createReadOp(lh, 8, 3); + readOp.submit(); + + int index = 8; + int numReads = 0; + Iterator iterator = readOp.future().get().iterator(); + while (iterator.hasNext()) { + LedgerEntry entry = iterator.next(); + assertNotNull(entry); + assertEquals(index, Integer.parseInt(new String(entry.getEntryBytes()))); + entry.close(); + ++index; + ++numReads; + } + assertEquals(2, numReads); + lh.close(); + } + + @Test + public void testFailRecoveryReadMissingEntryImmediately() throws Exception { + int numEntries = 1; + + long id = getLedgerToRead(5, 5, 3, numEntries); + + ClientConfiguration newConf = new ClientConfiguration() + .setReadEntryTimeout(30000); + newConf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + BookKeeper newBk = new BookKeeper(newConf); + + LedgerHandle lh = bkc.openLedger(id, digestType, passwd); + + List ensemble = lh.getLedgerMetadata().getEnsembleAt(10); + CountDownLatch latch1 = new CountDownLatch(1); + CountDownLatch latch2 = new CountDownLatch(1); + // sleep two bookie + sleepBookie(ensemble.get(0), latch1); + sleepBookie(ensemble.get(1), latch2); + + BatchedReadOp readOp = createRecoveryReadOp(lh, 10, 1); + readOp.submit(); + // would fail immediately if found missing entries don't cover ack quorum + expectFail(readOp.future(), Code.NoSuchEntryException); + latch1.countDown(); + latch2.countDown(); + + lh.close(); + newBk.close(); + } + + @Test + public void testReadWithFailedBookies() throws Exception { + int numEntries = 10; + + long id = getLedgerToRead(5, 3, 3, numEntries); + + ClientConfiguration newConf = new ClientConfiguration() + .setReadEntryTimeout(30000); + newConf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + BookKeeper newBk = new BookKeeper(newConf); + + LedgerHandle lh = bkc.openLedger(id, digestType, passwd); + + List ensemble = lh.getLedgerMetadata().getEnsembleAt(5); + // kill two bookies + killBookie(ensemble.get(0)); + killBookie(ensemble.get(1)); + + // read multiple entries, because the ensemble is not equals with write quorum, the return entries + // will less than max count. + int numReads = 0; + for (int i = 0; i < numEntries;) { + BatchedReadOp readOp = createReadOp(lh, i, numEntries); + readOp.submit(); + Iterator entries = readOp.future().get().iterator(); + if (!entries.hasNext()) { + i++; + continue; + } + while (entries.hasNext()) { + LedgerEntry entry = entries.next(); + assertNotNull(entry); + assertEquals(i, Integer.parseInt(new String(entry.getEntryBytes()))); + entry.close(); + i++; + numReads++; + } + } + assertEquals(10, numReads); + lh.close(); + newBk.close(); + } + + @Test + public void testReadFailureWithFailedBookies() throws Exception { + int numEntries = 10; + + long id = getLedgerToRead(5, 3, 3, numEntries); + + ClientConfiguration newConf = new ClientConfiguration() + .setReadEntryTimeout(30000); + newConf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + BookKeeper newBk = new BookKeeper(newConf); + + LedgerHandle lh = bkc.openLedger(id, digestType, passwd); + + List ensemble = lh.getLedgerMetadata().getEnsembleAt(5); + // kill two bookies + killBookie(ensemble.get(0)); + killBookie(ensemble.get(1)); + killBookie(ensemble.get(2)); + + // read multiple entries + BatchedReadOp readOp = createReadOp(lh, 0, numEntries); + readOp.submit(); + expectFail(readOp.future(), Code.BookieHandleNotAvailableException); + + lh.close(); + newBk.close(); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestBookieHealthCheck.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestBookieHealthCheck.java index ad35450b4ad..0fee2aa1677 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestBookieHealthCheck.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestBookieHealthCheck.java @@ -21,9 +21,8 @@ package org.apache.bookkeeper.client; import java.util.concurrent.TimeUnit; - import org.apache.bookkeeper.client.AsyncCallback.AddCallback; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.junit.Assert; import org.junit.Test; @@ -55,7 +54,7 @@ public void testBkQuarantine() throws Exception { lh.addEntry(msg); } - BookieSocketAddress bookieToQuarantine = lh.getLedgerMetadata().getEnsemble(numEntries).get(0); + BookieId bookieToQuarantine = lh.getLedgerMetadata().getEnsembleAt(numEntries).get(0); sleepBookie(bookieToQuarantine, baseClientConf.getAddEntryTimeout() * 2).await(); byte[] tempMsg = "temp-msg".getBytes(); @@ -79,12 +78,12 @@ public void addComplete(int rc, LedgerHandle lh, long entryId, Object ctx) { // the bookie to be left out of the ensemble should always be the quarantined bookie LedgerHandle lh1 = bkc.createLedger(2, 2, 2, BookKeeper.DigestType.CRC32, new byte[] {}); LedgerHandle lh2 = bkc.createLedger(3, 3, 3, BookKeeper.DigestType.CRC32, new byte[] {}); - Assert.assertFalse(lh1.getLedgerMetadata().getEnsemble(0).contains(bookieToQuarantine)); - Assert.assertFalse(lh2.getLedgerMetadata().getEnsemble(0).contains(bookieToQuarantine)); + Assert.assertFalse(lh1.getLedgerMetadata().getEnsembleAt(0).contains(bookieToQuarantine)); + Assert.assertFalse(lh2.getLedgerMetadata().getEnsembleAt(0).contains(bookieToQuarantine)); // the quarantined bookie can still be in the ensemble if we do not have enough healthy bookies LedgerHandle lh3 = bkc.createLedger(4, 4, 4, BookKeeper.DigestType.CRC32, new byte[] {}); - Assert.assertTrue(lh3.getLedgerMetadata().getEnsemble(0).contains(bookieToQuarantine)); + Assert.assertTrue(lh3.getLedgerMetadata().getEnsembleAt(0).contains(bookieToQuarantine)); // make sure faulty bookie is out of quarantine Thread.sleep(baseClientConf.getBookieQuarantineTimeSeconds() * 1000); @@ -97,7 +96,7 @@ public void addComplete(int rc, LedgerHandle lh, long entryId, Object ctx) { public void testNoQuarantineOnBkRestart() throws Exception { final LedgerHandle lh = bkc.createLedger(2, 2, 2, BookKeeper.DigestType.CRC32, new byte[] {}); final int numEntries = 20; - BookieSocketAddress bookieToRestart = lh.getLedgerMetadata().getEnsemble(0).get(0); + BookieId bookieToRestart = lh.getLedgerMetadata().getEnsembleAt(0).get(0); // we add entries on a separate thread so that we can restart a bookie on the current thread Thread addEntryThread = new Thread() { @@ -132,8 +131,8 @@ public void testNoQuarantineOnExpectedBkErrors() throws Exception { byte[] msg = ("msg-" + i).getBytes(); lh.addEntry(msg); } - BookieSocketAddress bookie1 = lh.getLedgerMetadata().getEnsemble(0).get(0); - BookieSocketAddress bookie2 = lh.getLedgerMetadata().getEnsemble(0).get(1); + BookieId bookie1 = lh.getLedgerMetadata().getEnsembleAt(0).get(0); + BookieId bookie2 = lh.getLedgerMetadata().getEnsembleAt(0).get(1); try { // we read an entry that is not added lh.readEntries(10, 10); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestBookieWatcher.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestBookieWatcher.java index 69fe345f2d0..6c2f004826c 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestBookieWatcher.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestBookieWatcher.java @@ -23,12 +23,13 @@ import static org.junit.Assert.fail; import java.io.IOException; +import java.util.Collections; +import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; - import lombok.Cleanup; - import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.apache.bookkeeper.zookeeper.ZooKeeperClient; import org.apache.zookeeper.KeeperException; @@ -37,6 +38,7 @@ import org.apache.zookeeper.Watcher.Event.EventType; import org.apache.zookeeper.Watcher.Event.KeeperState; import org.apache.zookeeper.ZooKeeper; +import org.junit.Assert; import org.junit.Test; /** @@ -68,6 +70,58 @@ public void process(WatchedEvent event) { newZk.close(); } + /** + * Test to validate behavior of the isBookieUnavailable method. + * Because the method relies on getBookies and getReadOnlyBookies, + * these methods are essentially tested here as well. + * + * @throws Exception + */ + @Test + public void testBookieWatcherIsBookieUnavailable() throws Exception { + BookieWatcher bookieWatcher = bkc.getBookieWatcher(); + + Set writableBookies1 = bookieWatcher.getBookies(); + Set readonlyBookies1 = bookieWatcher.getReadOnlyBookies(); + + Assert.assertEquals("There should be writable bookies initially.", 2, writableBookies1.size()); + Assert.assertEquals("There should be no read only bookies initially.", + Collections.emptySet(), readonlyBookies1); + + BookieId bookieId0 = getBookie(0); + BookieId bookieId1 = getBookie(1); + + boolean isUnavailable1 = bookieWatcher.isBookieUnavailable(bookieId0); + Assert.assertFalse("The bookie should not be unavailable.", isUnavailable1); + + // Next, set to read only, which is still available + setBookieToReadOnly(bookieId0); + + Set writableBookies2 = bookieWatcher.getBookies(); + Set readonlyBookies2 = bookieWatcher.getReadOnlyBookies(); + + Assert.assertEquals("There should be one writable bookie.", + Collections.singleton(bookieId1), writableBookies2); + Assert.assertEquals("There should be one read only bookie.", + Collections.singleton(bookieId0), readonlyBookies2); + + boolean isUnavailable2 = bookieWatcher.isBookieUnavailable(bookieId0); + Assert.assertFalse("The bookie should not be unavailable.", isUnavailable2); + + // Next, kill it, which should make it unavailable + killBookieAndWaitForZK(0); + + Set writableBookies3 = bookieWatcher.getBookies(); + Set readonlyBookies3 = bookieWatcher.getReadOnlyBookies(); + + Assert.assertEquals("There should be one writable bookie.", + Collections.singleton(bookieId1), writableBookies3); + Assert.assertEquals("There should be no read only bookies.", Collections.emptySet(), readonlyBookies3); + + boolean isUnavailable3 = bookieWatcher.isBookieUnavailable(bookieId0); + Assert.assertTrue("The bookie should be unavailable.", isUnavailable3); + } + @Test public void testBookieWatcherSurviveWhenSessionExpired() throws Exception { final int timeout = 2000; @@ -104,7 +158,7 @@ private void runBookieWatcherWhenSessionExpired(ZooKeeper zk, int timeout, boole ClientConfiguration conf = new ClientConfiguration(); conf.setMetadataServiceUri(metadataServiceUri); - try (BookKeeper bkc = new BookKeeper(conf, zk)) { + try (BookKeeperTestClient bkc = new BookKeeperTestClient(conf, zk)) { LedgerHandle lh; try { diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestDelayEnsembleChange.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestDelayEnsembleChange.java index e8da86e0f72..581d46905fe 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestDelayEnsembleChange.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestDelayEnsembleChange.java @@ -30,17 +30,17 @@ import static org.junit.Assert.assertTrue; import io.netty.buffer.ByteBuf; - import java.util.ArrayList; import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicLong; - import org.apache.bookkeeper.client.BookKeeper.DigestType; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadEntryCallback; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.apache.bookkeeper.util.TestUtils; import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; @@ -103,9 +103,9 @@ private void verifyEntries(LedgerHandle lh, long startEntry, long untilEntry, LedgerMetadata md = lh.getLedgerMetadata(); for (long eid = startEntry; eid < untilEntry; eid++) { - List addresses = md.getEnsemble(eid); + List addresses = md.getEnsembleAt(eid); VerificationCallback callback = new VerificationCallback(addresses.size()); - for (BookieSocketAddress addr : addresses) { + for (BookieId addr : addresses) { bkc.getBookieClient().readEntry(addr, lh.getId(), eid, callback, addr, 0, null); } @@ -121,9 +121,9 @@ private void verifyEntriesRange(LedgerHandle lh, long startEntry, long untilEntr LedgerMetadata md = lh.getLedgerMetadata(); for (long eid = startEntry; eid < untilEntry; eid++) { - List addresses = md.getEnsemble(eid); + List addresses = md.getEnsembleAt(eid); VerificationCallback callback = new VerificationCallback(addresses.size()); - for (BookieSocketAddress addr : addresses) { + for (BookieId addr : addresses) { bkc.getBookieClient().readEntry(addr, lh.getId(), eid, callback, addr, 0, null); } @@ -155,12 +155,10 @@ public void testNotChangeEnsembleIfNotBrokenAckQuorum() throws Exception { // ensure there is no ensemble changed assertEquals("There should be no ensemble change if delaying ensemble change is enabled.", - 1, lh.getLedgerMetadata().getEnsembles().size()); + 1, lh.getLedgerMetadata().getAllEnsembles().size()); - bsConfs.add(conf0); - bs.add(startBookie(conf0)); - bsConfs.add(conf1); - bs.add(startBookie(conf1)); + startAndAddBookie(conf0); + startAndAddBookie(conf1); for (int i = 2 * numEntries; i < 3 * numEntries; i++) { lh.addEntry(data); @@ -168,7 +166,7 @@ public void testNotChangeEnsembleIfNotBrokenAckQuorum() throws Exception { // ensure there is no ensemble changed assertEquals("There should be no ensemble change if delaying ensemble change is enabled.", - 1, lh.getLedgerMetadata().getEnsembles().size()); + 1, lh.getLedgerMetadata().getAllEnsembles().size()); // check entries verifyEntries(lh, 0, numEntries, 5, 0); @@ -192,11 +190,16 @@ public void testChangeEnsembleIfBrokenAckQuorum() throws Exception { lh.addEntry(data); } - for (BookieSocketAddress addr : lh.getLedgerMetadata().getEnsembles().get(0L)) { + for (BookieId addr : lh.getLedgerMetadata().getAllEnsembles().get(0L)) { + StringBuilder nameBuilder = new StringBuilder(CLIENT_SCOPE); + nameBuilder.append('.'). + append("bookie_"). + append(TestUtils.buildStatsCounterPathFromBookieID(addr)). + append('.'). + append(LEDGER_ENSEMBLE_BOOKIE_DISTRIBUTION); assertTrue( LEDGER_ENSEMBLE_BOOKIE_DISTRIBUTION + " should be > 0 for " + addr, - bkc.getTestStatsProvider().getCounter( - CLIENT_SCOPE + "." + LEDGER_ENSEMBLE_BOOKIE_DISTRIBUTION + "-" + addr) + bkc.getTestStatsProvider().getCounter(nameBuilder.toString()) .get() > 0); } assertTrue( @@ -221,7 +224,7 @@ public void testChangeEnsembleIfBrokenAckQuorum() throws Exception { // ensure there is no ensemble changed assertEquals("There should be no ensemble change if delaying ensemble change is enabled.", - 1, lh.getLedgerMetadata().getEnsembles().size()); + 1, lh.getLedgerMetadata().getAllEnsembles().size()); assertTrue( "Stats should not have captured an ensemble change", bkc.getTestStatsProvider().getOpStatsLogger( @@ -238,7 +241,7 @@ public void testChangeEnsembleIfBrokenAckQuorum() throws Exception { // ensure there is no ensemble changed assertEquals("There should be no ensemble change if delaying ensemble change is enabled.", - 1, lh.getLedgerMetadata().getEnsembles().size()); + 1, lh.getLedgerMetadata().getAllEnsembles().size()); logger.info("Kill bookie 2 and write another {} entries.", numEntries); @@ -250,27 +253,24 @@ public void testChangeEnsembleIfBrokenAckQuorum() throws Exception { // ensemble change should kick in assertEquals("There should be ensemble change if ack quorum couldn't be formed.", - 2, lh.getLedgerMetadata().getEnsembles().size()); + 2, lh.getLedgerMetadata().getAllEnsembles().size()); assertTrue( "Stats should have captured an ensemble change", bkc.getTestStatsProvider().getOpStatsLogger( CLIENT_SCOPE + "." + WATCHER_SCOPE + "." + REPLACE_BOOKIE_TIME) .getSuccessCount() > 0); - List firstFragment = lh.getLedgerMetadata().getEnsemble(0); - List secondFragment = lh.getLedgerMetadata().getEnsemble(3 * numEntries); + List firstFragment = lh.getLedgerMetadata().getEnsembleAt(0); + List secondFragment = lh.getLedgerMetadata().getEnsembleAt(3 * numEntries); assertFalse(firstFragment.get(0).equals(secondFragment.get(0))); assertFalse(firstFragment.get(1).equals(secondFragment.get(1))); assertFalse(firstFragment.get(2).equals(secondFragment.get(2))); assertEquals(firstFragment.get(3), secondFragment.get(3)); assertEquals(firstFragment.get(4), secondFragment.get(4)); - bsConfs.add(conf0); - bs.add(startBookie(conf0)); - bsConfs.add(conf1); - bs.add(startBookie(conf1)); - bsConfs.add(conf2); - bs.add(startBookie(conf2)); + startAndAddBookie(conf0); + startAndAddBookie(conf1); + startAndAddBookie(conf2); for (int i = 4 * numEntries; i < 5 * numEntries; i++) { lh.addEntry(data); @@ -278,7 +278,7 @@ public void testChangeEnsembleIfBrokenAckQuorum() throws Exception { // ensure there is no ensemble changed assertEquals("There should be no ensemble change if delaying ensemble change is enabled.", - 2, lh.getLedgerMetadata().getEnsembles().size()); + 2, lh.getLedgerMetadata().getAllEnsembles().size()); // check entries verifyEntries(lh, 0, numEntries, 5, 0); @@ -316,14 +316,11 @@ public void testEnsembleChangeWithNotEnoughBookies() throws Exception { // ensure there is ensemble changed assertEquals("There should be ensemble change if ack quorum is broken.", - 2, lh.getLedgerMetadata().getEnsembles().size()); + 2, lh.getLedgerMetadata().getAllEnsembles().size()); - bsConfs.add(conf0); - bs.add(startBookie(conf0)); - bsConfs.add(conf1); - bs.add(startBookie(conf1)); - bsConfs.add(conf2); - bs.add(startBookie(conf2)); + startAndAddBookie(conf0); + startAndAddBookie(conf1); + startAndAddBookie(conf2); for (int i = 2 * numEntries; i < 3 * numEntries; i++) { lh.addEntry(data); @@ -331,7 +328,7 @@ public void testEnsembleChangeWithNotEnoughBookies() throws Exception { // ensure there is no ensemble changed assertEquals("There should be no ensemble change after adding failed bookies back.", - 2, lh.getLedgerMetadata().getEnsembles().size()); + 2, lh.getLedgerMetadata().getAllEnsembles().size()); // check entries verifyEntries(lh, 0, numEntries, 5, 0); @@ -372,11 +369,10 @@ public void testEnsembleChangeWithMoreBookieFailures() throws Exception { // ensure there is no ensemble changed assertEquals("There should be ensemble change if breaking ack quorum.", - 2, lh.getLedgerMetadata().getEnsembles().size()); + 2, lh.getLedgerMetadata().getAllEnsembles().size()); for (ServerConfiguration conf : confs) { - bsConfs.add(conf); - bs.add(startBookie(conf)); + startAndAddBookie(conf); } for (int i = 2 * numEntries; i < 3 * numEntries; i++) { @@ -386,7 +382,7 @@ public void testEnsembleChangeWithMoreBookieFailures() throws Exception { // ensure there is no ensemble changed assertEquals("There should not be ensemble changed if delaying ensemble change is enabled.", - 2, lh.getLedgerMetadata().getEnsembles().size()); + 2, lh.getLedgerMetadata().getAllEnsembles().size()); // check entries verifyEntries(lh, 0, numEntries, 5, 0); @@ -414,7 +410,7 @@ public void testChangeEnsembleIfBookieReadOnly() throws Exception { // ensure there is no ensemble changed assertEquals("The ensemble should change when a bookie is readonly even if we delay ensemble change.", - 2, lh.getLedgerMetadata().getEnsembles().size()); + 2, lh.getLedgerMetadata().getAllEnsembles().size()); } @@ -429,8 +425,8 @@ public void testChangeEnsembleSecondBookieReadOnly() throws Exception { lh.addEntry(data); } - BookieSocketAddress failedBookie = lh.getCurrentEnsemble().get(0); - BookieSocketAddress readOnlyBookie = lh.getCurrentEnsemble().get(1); + BookieId failedBookie = lh.getCurrentEnsemble().get(0); + BookieId readOnlyBookie = lh.getCurrentEnsemble().get(1); ServerConfiguration conf0 = killBookie(failedBookie); for (int i = 0; i < numEntries; i++) { @@ -438,7 +434,7 @@ public void testChangeEnsembleSecondBookieReadOnly() throws Exception { } assertEquals("There should be ensemble change if delaying ensemble change is enabled.", - 1, lh.getLedgerMetadata().getEnsembles().size()); + 1, lh.getLedgerMetadata().getAllEnsembles().size()); // kill two bookies, but we still have 3 bookies for the ack quorum. setBookieToReadOnly(readOnlyBookie); @@ -449,7 +445,7 @@ public void testChangeEnsembleSecondBookieReadOnly() throws Exception { // ensure there is no ensemble changed assertEquals("The ensemble should change when a bookie is readonly even if we delay ensemble change.", - 2, lh.getLedgerMetadata().getEnsembles().size()); + 2, lh.getLedgerMetadata().getAllEnsembles().size()); assertEquals(3, lh.getCurrentEnsemble().size()); assertFalse(lh.getCurrentEnsemble().contains(failedBookie)); assertFalse(lh.getCurrentEnsemble().contains(readOnlyBookie)); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestDisableEnsembleChange.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestDisableEnsembleChange.java index f114cbf70f3..286e8152e24 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestDisableEnsembleChange.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestDisableEnsembleChange.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,27 +20,26 @@ */ package org.apache.bookkeeper.client; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.bookkeeper.util.BookKeeperConstants.FEATURE_DISABLE_ENSEMBLE_CHANGE; +import static org.apache.bookkeeper.util.TestUtils.assertEventuallyTrue; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import com.google.common.util.concurrent.RateLimiter; - import java.util.ArrayList; import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; - import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.feature.SettableFeature; import org.apache.bookkeeper.feature.SettableFeatureProvider; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.junit.Test; import org.slf4j.Logger; @@ -87,9 +86,9 @@ void disableEnsembleChangeTest(boolean startNewBookie) throws Exception { final AtomicBoolean failTest = new AtomicBoolean(false); final byte[] entry = "test-disable-ensemble-change".getBytes(UTF_8); - assertEquals(1, lh.getLedgerMetadata().getEnsembles().size()); - ArrayList ensembleBeforeFailure = - new ArrayList<>(lh.getLedgerMetadata().getEnsembles().entrySet().iterator().next().getValue()); + assertEquals(1, lh.getLedgerMetadata().getAllEnsembles().size()); + ArrayList ensembleBeforeFailure = + new ArrayList<>(lh.getLedgerMetadata().getAllEnsembles().entrySet().iterator().next().getValue()); final RateLimiter rateLimiter = RateLimiter.create(10); @@ -119,11 +118,11 @@ public void run() { // check the ensemble after failure assertEquals("No new ensemble should be added when disable ensemble change.", - 1, lh.getLedgerMetadata().getEnsembles().size()); - ArrayList ensembleAfterFailure = - new ArrayList<>(lh.getLedgerMetadata().getEnsembles().entrySet().iterator().next().getValue()); - assertArrayEquals(ensembleBeforeFailure.toArray(new BookieSocketAddress[ensembleBeforeFailure.size()]), - ensembleAfterFailure.toArray(new BookieSocketAddress[ensembleAfterFailure.size()])); + 1, lh.getLedgerMetadata().getAllEnsembles().size()); + ArrayList ensembleAfterFailure = + new ArrayList<>(lh.getLedgerMetadata().getAllEnsembles().entrySet().iterator().next().getValue()); + assertArrayEquals(ensembleBeforeFailure.toArray(new BookieId[ensembleBeforeFailure.size()]), + ensembleAfterFailure.toArray(new BookieId[ensembleAfterFailure.size()])); // enable ensemble change disableEnsembleChangeFeature.set(false); @@ -160,12 +159,13 @@ public void run() { assertFalse("Ledger should be closed when enable ensemble change again.", lh.getLedgerMetadata().isClosed()); assertEquals("New ensemble should be added when enable ensemble change again.", - 2, lh.getLedgerMetadata().getEnsembles().size()); + 2, lh.getLedgerMetadata().getAllEnsembles().size()); } else { assertTrue("Should fail adding entries when enable ensemble change again.", failTest.get()); - assertTrue("Ledger should be closed when enable ensemble change again.", - lh.getLedgerMetadata().isClosed()); + // The ledger close occurs in the background, so assert that it happens eventually + assertEventuallyTrue("Ledger should be closed when enable ensemble change again.", + () -> lh.getLedgerMetadata().isClosed()); } } @@ -207,8 +207,7 @@ public void addComplete(int rc, LedgerHandle lh, long entryId, Object ctx) { addLatch.await(1000, TimeUnit.MILLISECONDS)); assertEquals(res.get(), 0xdeadbeef); // start the original bookie - bsConfs.add(killedConf); - bs.add(startBookie(killedConf)); + startAndAddBookie(killedConf); assertTrue("Add entry operation should complete at this point.", addLatch.await(10000, TimeUnit.MILLISECONDS)); assertEquals(res.get(), BKException.Code.OK); @@ -239,7 +238,7 @@ public void testRetrySlowBookie() throws Exception { lh.addEntry(entry); } - List curEns = lh.getCurrentEnsemble(); + List curEns = lh.getCurrentEnsemble(); final CountDownLatch wakeupLatch = new CountDownLatch(1); final CountDownLatch suspendLatch = new CountDownLatch(1); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestFencing.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestFencing.java index d0787d5157b..34751525aa6 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestFencing.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestFencing.java @@ -26,10 +26,9 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.CyclicBarrier; - import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.conf.ClientConfiguration; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.junit.Test; import org.slf4j.Logger; @@ -182,7 +181,7 @@ public void run() { CyclicBarrier barrier = new CyclicBarrier(numRecovery + 1); - LedgerOpenThread threads[] = new LedgerOpenThread[numRecovery]; + LedgerOpenThread[] threads = new LedgerOpenThread[numRecovery]; for (int i = 0; i < numRecovery; i++) { threads[i] = new LedgerOpenThread(i, digestType, writelh.getId(), barrier); threads[i].start(); @@ -266,7 +265,7 @@ public void testFencingInteractionWithBookieRecovery() throws Exception { writelh.addEntry(tmp.getBytes()); } - BookieSocketAddress bookieToKill = writelh.getLedgerMetadata().getEnsemble(numEntries).get(0); + BookieId bookieToKill = writelh.getLedgerMetadata().getEnsembleAt(numEntries).get(0); killBookie(bookieToKill); // write entries to change ensemble @@ -318,7 +317,7 @@ public void testFencingInteractionWithBookieRecovery2() throws Exception { LedgerHandle readlh = bkc.openLedger(writelh.getId(), digestType, "testPasswd".getBytes()); // should be fenced by now - BookieSocketAddress bookieToKill = writelh.getLedgerMetadata().getEnsemble(numEntries).get(0); + BookieId bookieToKill = writelh.getLedgerMetadata().getEnsembleAt(numEntries).get(0); killBookie(bookieToKill); admin.recoverBookieData(bookieToKill); @@ -351,7 +350,7 @@ public void testFencingWithHungBookie() throws Exception { } CountDownLatch sleepLatch = new CountDownLatch(1); - sleepBookie(writelh.getLedgerMetadata().getEnsembles().get(0L).get(1), sleepLatch); + sleepBookie(writelh.getLedgerMetadata().getAllEnsembles().get(0L).get(1), sleepLatch); LedgerHandle readlh = bkc.openLedger(writelh.getId(), digestType, "testPasswd".getBytes()); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestGetBookieInfoTimeout.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestGetBookieInfoTimeout.java index 6291413614e..c402451588b 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestGetBookieInfoTimeout.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestGetBookieInfoTimeout.java @@ -21,22 +21,23 @@ * */ +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import io.netty.buffer.UnpooledByteBufAllocator; import io.netty.channel.EventLoopGroup; import io.netty.channel.nio.NioEventLoopGroup; import io.netty.util.concurrent.DefaultThreadFactory; - +import java.util.Map; import java.util.concurrent.CountDownLatch; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; - import org.apache.bookkeeper.client.BKException.Code; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.client.BookieInfoReader.BookieInfo; import org.apache.bookkeeper.common.util.OrderedExecutor; import org.apache.bookkeeper.conf.ClientConfiguration; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookieClient; import org.apache.bookkeeper.proto.BookieClientImpl; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GetBookieInfoCallback; @@ -61,8 +62,8 @@ public class TestGetBookieInfoTimeout extends BookKeeperClusterTestCase { private ScheduledExecutorService scheduler; public TestGetBookieInfoTimeout() { - super(10); - this.digestType = DigestType.CRC32; + super(5); + this.digestType = DigestType.CRC32C; } @Before @@ -99,16 +100,16 @@ public void testGetBookieInfoTimeout() throws Exception { // set timeout for getBookieInfo to be 2 secs and cause one of the bookies to go to sleep for 3X that time ClientConfiguration cConf = new ClientConfiguration(); cConf.setGetBookieInfoTimeout(2); + cConf.setReadEntryTimeout(100000); // by default we are using readEntryTimeout for timeouts - final BookieSocketAddress bookieToSleep = writelh.getLedgerMetadata().getEnsemble(0).get(0); + final BookieId bookieToSleep = writelh.getLedgerMetadata().getEnsembleAt(0).get(0); int sleeptime = cConf.getBookieInfoTimeout() * 3; CountDownLatch latch = sleepBookie(bookieToSleep, sleeptime); latch.await(); // try to get bookie info from the sleeping bookie. It should fail with timeout error - BookieSocketAddress addr = new BookieSocketAddress(bookieToSleep.getSocketAddress().getHostString(), - bookieToSleep.getPort()); - BookieClient bc = new BookieClientImpl(cConf, eventLoopGroup, executor, scheduler, NullStatsLogger.INSTANCE); + BookieClient bc = new BookieClientImpl(cConf, eventLoopGroup, UnpooledByteBufAllocator.DEFAULT, executor, + scheduler, NullStatsLogger.INSTANCE, bkc.getBookieAddressResolver()); long flags = BookkeeperProtocol.GetBookieInfoRequest.Flags.FREE_DISK_SPACE_VALUE | BookkeeperProtocol.GetBookieInfoRequest.Flags.TOTAL_DISK_CAPACITY_VALUE; @@ -126,7 +127,7 @@ class CallbackObj { } } CallbackObj obj = new CallbackObj(flags); - bc.getBookieInfo(addr, flags, new GetBookieInfoCallback() { + bc.getBookieInfo(bookieToSleep, flags, new GetBookieInfoCallback() { @Override public void getBookieInfoComplete(int rc, BookieInfo bInfo, Object ctx) { CallbackObj obj = (CallbackObj) ctx; @@ -145,7 +146,18 @@ public void getBookieInfoComplete(int rc, BookieInfo bInfo, Object ctx) { }, obj); obj.latch.await(); - LOG.debug("Return code: " + obj.rc); + if (LOG.isDebugEnabled()) { + LOG.debug("Return code: " + obj.rc); + } assertTrue("GetBookieInfo failed with unexpected error code: " + obj.rc, obj.rc == Code.TimeoutException); } + + @Test + public void testGetBookieInfoWithAllStoppedBookies() throws Exception { + Map bookieInfo = bkc.getBookieInfo(); + assertEquals(5, bookieInfo.size()); + stopAllBookies(false); + bookieInfo = bkc.getBookieInfo(); + assertEquals(0, bookieInfo.size()); + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestLedgerChecker.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestLedgerChecker.java index 16a09585370..222cd0aac4c 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestLedgerChecker.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestLedgerChecker.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -23,12 +23,17 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; import java.util.List; import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicInteger; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.junit.Test; @@ -78,7 +83,7 @@ public void testChecker() throws Exception { for (int i = 0; i < 10; i++) { lh.addEntry(TEST_LEDGER_ENTRY_DATA); } - BookieSocketAddress replicaToKill = lh.getLedgerMetadata().getEnsembles() + BookieId replicaToKill = lh.getLedgerMetadata().getAllEnsembles() .get(0L).get(0); LOG.info("Killing {}", replicaToKill); killBookie(replicaToKill); @@ -97,8 +102,8 @@ public void testChecker() throws Exception { assertTrue("Fragment should be missing from first replica", result.iterator().next().getAddresses().contains(replicaToKill)); - BookieSocketAddress replicaToKill2 = lh.getLedgerMetadata() - .getEnsembles().get(0L).get(1); + BookieId replicaToKill2 = lh.getLedgerMetadata() + .getAllEnsembles().get(0L).get(1); LOG.info("Killing {}", replicaToKill2); killBookie(replicaToKill2); @@ -137,9 +142,9 @@ public void testShouldNotGetTheFragmentIfThereIsNoMissedEntry() // Entry should have added in first 2 Bookies. // Kill the 3rd BK from ensemble. - List firstEnsemble = lh.getLedgerMetadata() - .getEnsembles().get(0L); - BookieSocketAddress lastBookieFromEnsemble = firstEnsemble.get(2); + List firstEnsemble = lh.getLedgerMetadata() + .getAllEnsembles().get(0L); + BookieId lastBookieFromEnsemble = firstEnsemble.get(2); LOG.info("Killing " + lastBookieFromEnsemble + " from ensemble=" + firstEnsemble); killBookie(lastBookieFromEnsemble); @@ -147,13 +152,13 @@ public void testShouldNotGetTheFragmentIfThereIsNoMissedEntry() startNewBookie(); LOG.info("Ensembles after first entry :" - + lh.getLedgerMetadata().getEnsembles()); + + lh.getLedgerMetadata().getAllEnsembles()); // Adding one more entry. Here enseble should be reformed. lh.addEntry(TEST_LEDGER_ENTRY_DATA); LOG.info("Ensembles after second entry :" - + lh.getLedgerMetadata().getEnsembles()); + + lh.getLedgerMetadata().getAllEnsembles()); Set result = getUnderReplicatedFragments(lh); @@ -163,7 +168,7 @@ public void testShouldNotGetTheFragmentIfThereIsNoMissedEntry() LOG.info("unreplicated fragment: {}", r); } - assertEquals("Should not have any missing fragment", 0, result.size()); + assertEquals("Empty fragment should be considered missing", 1, result.size()); } /** @@ -180,13 +185,13 @@ public void testShouldGetTwoFrgamentsIfTwoBookiesFailedInSameEnsemble() startNewBookie(); lh.addEntry(TEST_LEDGER_ENTRY_DATA); - List firstEnsemble = lh.getLedgerMetadata() - .getEnsembles().get(0L); + List firstEnsemble = lh.getLedgerMetadata() + .getAllEnsembles().get(0L); - BookieSocketAddress firstBookieFromEnsemble = firstEnsemble.get(0); + BookieId firstBookieFromEnsemble = firstEnsemble.get(0); killBookie(firstEnsemble, firstBookieFromEnsemble); - BookieSocketAddress secondBookieFromEnsemble = firstEnsemble.get(1); + BookieId secondBookieFromEnsemble = firstEnsemble.get(1); killBookie(firstEnsemble, secondBookieFromEnsemble); lh.addEntry(TEST_LEDGER_ENTRY_DATA); Set result = getUnderReplicatedFragments(lh); @@ -197,7 +202,7 @@ public void testShouldGetTwoFrgamentsIfTwoBookiesFailedInSameEnsemble() LOG.info("unreplicated fragment: {}", r); } - assertEquals("There should be 1 fragments", 1, result.size()); + assertEquals("Empty fragment should be considered missing", 2, result.size()); assertEquals("There should be 2 failed bookies in the fragment", 2, result.iterator().next().getBookiesIndexes().size()); } @@ -213,9 +218,9 @@ public void testShouldNotGetAnyFragmentIfNoLedgerPresent() LedgerHandle lh = bkc.createLedger(3, 2, BookKeeper.DigestType.CRC32, TEST_LEDGER_PASSWORD); - List firstEnsemble = lh.getLedgerMetadata() - .getEnsembles().get(0L); - BookieSocketAddress firstBookieFromEnsemble = firstEnsemble.get(0); + List firstEnsemble = lh.getLedgerMetadata() + .getAllEnsembles().get(0L); + BookieId firstBookieFromEnsemble = firstEnsemble.get(0); killBookie(firstBookieFromEnsemble); startNewBookie(); lh.addEntry(TEST_LEDGER_ENTRY_DATA); @@ -258,9 +263,9 @@ public void testShouldGetFailedEnsembleNumberOfFgmntsIfEnsembleBookiesFailedOnNe } // Kill all three bookies - List firstEnsemble = lh.getLedgerMetadata() - .getEnsembles().get(0L); - for (BookieSocketAddress bkAddr : firstEnsemble) { + List firstEnsemble = lh.getLedgerMetadata() + .getAllEnsembles().get(0L); + for (BookieId bkAddr : firstEnsemble) { killBookie(firstEnsemble, bkAddr); } @@ -302,14 +307,14 @@ public void testShouldNotGetAnyFragmentWithEmptyLedger() throws Exception { public void testShouldGet2FragmentsWithEmptyLedgerButBookiesDead() throws Exception { LedgerHandle lh = bkc.createLedger(3, 2, BookKeeper.DigestType.CRC32, TEST_LEDGER_PASSWORD); - for (BookieSocketAddress b : lh.getLedgerMetadata().getEnsembles().get(0L)) { + for (BookieId b : lh.getLedgerMetadata().getAllEnsembles().get(0L)) { killBookie(b); } Set result = getUnderReplicatedFragments(lh); assertNotNull("Result shouldn't be null", result); assertEquals("There should be 1 fragments.", 1, result.size()); - assertEquals("There should be 2 failed bookies in the fragment", - 2, result.iterator().next().getBookiesIndexes().size()); + assertEquals("There should be 3 failed bookies in the fragment", + 3, result.iterator().next().getBookiesIndexes().size()); } /** @@ -321,9 +326,9 @@ public void testShouldGetOneFragmentWithSingleEntryOpenedLedger() throws Excepti LedgerHandle lh = bkc.createLedger(3, 3, BookKeeper.DigestType.CRC32, TEST_LEDGER_PASSWORD); lh.addEntry(TEST_LEDGER_ENTRY_DATA); - List firstEnsemble = lh.getLedgerMetadata() - .getEnsembles().get(0L); - BookieSocketAddress lastBookieFromEnsemble = firstEnsemble.get(0); + List firstEnsemble = lh.getLedgerMetadata() + .getAllEnsembles().get(0L); + BookieId lastBookieFromEnsemble = firstEnsemble.get(0); LOG.info("Killing " + lastBookieFromEnsemble + " from ensemble=" + firstEnsemble); killBookie(lastBookieFromEnsemble); @@ -355,10 +360,10 @@ public void testSingleEntryAfterEnsembleChange() throws Exception { for (int i = 0; i < 10; i++) { lh.addEntry(TEST_LEDGER_ENTRY_DATA); } - List firstEnsemble = lh.getLedgerMetadata() - .getEnsembles().get(0L); + List firstEnsemble = lh.getLedgerMetadata() + .getAllEnsembles().get(0L); DistributionSchedule.WriteSet writeSet = lh.getDistributionSchedule().getWriteSet(lh.getLastAddPushed()); - BookieSocketAddress lastBookieFromEnsemble = firstEnsemble.get(writeSet.get(0)); + BookieId lastBookieFromEnsemble = firstEnsemble.get(writeSet.get(0)); LOG.info("Killing " + lastBookieFromEnsemble + " from ensemble=" + firstEnsemble); killBookie(lastBookieFromEnsemble); @@ -400,11 +405,11 @@ public void testSingleEntryAfterEnsembleChange() throws Exception { public void testClosedEmptyLedger() throws Exception { LedgerHandle lh = bkc.createLedger(3, 3, BookKeeper.DigestType.CRC32, TEST_LEDGER_PASSWORD); - List firstEnsemble = lh.getLedgerMetadata() - .getEnsembles().get(0L); + List firstEnsemble = lh.getLedgerMetadata() + .getAllEnsembles().get(0L); lh.close(); - BookieSocketAddress lastBookieFromEnsemble = firstEnsemble.get(0); + BookieId lastBookieFromEnsemble = firstEnsemble.get(0); LOG.info("Killing " + lastBookieFromEnsemble + " from ensemble=" + firstEnsemble); killBookie(lastBookieFromEnsemble); @@ -415,8 +420,8 @@ public void testClosedEmptyLedger() throws Exception { Set result = getUnderReplicatedFragments(lh1); assertNotNull("Result shouldn't be null", result); - assertEquals("There should be 0 fragment. But returned fragments are " - + result, 0, result.size()); + assertEquals("Empty fragment should be considered missing" + + result, 1, result.size()); } /** @@ -427,13 +432,13 @@ public void testClosedEmptyLedger() throws Exception { public void testClosedSingleEntryLedger() throws Exception { LedgerHandle lh = bkc.createLedger(3, 2, BookKeeper.DigestType.CRC32, TEST_LEDGER_PASSWORD); - List firstEnsemble = lh.getLedgerMetadata() - .getEnsembles().get(0L); + List firstEnsemble = lh.getLedgerMetadata() + .getAllEnsembles().get(0L); lh.addEntry(TEST_LEDGER_ENTRY_DATA); lh.close(); // kill bookie 2 - BookieSocketAddress lastBookieFromEnsemble = firstEnsemble.get(2); + BookieId lastBookieFromEnsemble = firstEnsemble.get(2); LOG.info("Killing " + lastBookieFromEnsemble + " from ensemble=" + firstEnsemble); killBookie(lastBookieFromEnsemble); @@ -444,8 +449,8 @@ public void testClosedSingleEntryLedger() throws Exception { Set result = getUnderReplicatedFragments(lh1); assertNotNull("Result shouldn't be null", result); - assertEquals("There should be 0 fragment. But returned fragments are " - + result, 0, result.size()); + assertEquals("Empty fragment should be considered missing" + + result, 1, result.size()); lh1.close(); // kill bookie 1 @@ -463,8 +468,8 @@ public void testClosedSingleEntryLedger() throws Exception { assertNotNull("Result shouldn't be null", result); assertEquals("There should be 1 fragment. But returned fragments are " + result, 1, result.size()); - assertEquals("There should be 1 failed bookies in the fragment", - 1, result.iterator().next().getBookiesIndexes().size()); + assertEquals("There should be 2 failed bookies in the fragment", + 2, result.iterator().next().getBookiesIndexes().size()); lh1.close(); // kill bookie 0 @@ -482,21 +487,44 @@ public void testClosedSingleEntryLedger() throws Exception { assertNotNull("Result shouldn't be null", result); assertEquals("There should be 1 fragment. But returned fragments are " + result, 1, result.size()); - assertEquals("There should be 2 failed bookies in the fragment", - 2, result.iterator().next().getBookiesIndexes().size()); + assertEquals("There should be 3 failed bookies in the fragment", + 3, result.iterator().next().getBookiesIndexes().size()); lh1.close(); } + @Test + public void testVerifyLedgerFragmentSkipsUnavailableBookie() throws Exception { + // Initialize LedgerChecker with mocked watcher to validate interactions + BookieWatcher bookieWatcher = mock(BookieWatcher.class); + when(bookieWatcher.isBookieUnavailable(any())).thenReturn(true); + LedgerChecker mockedChecker = new LedgerChecker(bkc.getBookieClient(), bookieWatcher); + + LedgerHandle ledgerHandle = bkc.createLedger(BookKeeper.DigestType.CRC32, TEST_LEDGER_PASSWORD); + + // Add entries to ensure the right code path is validated + ledgerHandle.addEntry(TEST_LEDGER_ENTRY_DATA); + ledgerHandle.addEntry(TEST_LEDGER_ENTRY_DATA); + ledgerHandle.addEntry(TEST_LEDGER_ENTRY_DATA); + + CheckerCallback cb = new CheckerCallback(); + mockedChecker.checkLedger(ledgerHandle, cb); + Set result = cb.waitAndGetResult(); + + // Note that the bookieWatcher mock is set to make the ledger underreplicated + assertEquals("The one ledger should be considered underreplicated.", 1, result.size()); + verify(bookieWatcher, times(3)).isBookieUnavailable(any()); + } + private Set getUnderReplicatedFragments(LedgerHandle lh) throws InterruptedException { - LedgerChecker checker = new LedgerChecker(bkc); + LedgerChecker checker = new LedgerChecker(bkc, 1); CheckerCallback cb = new CheckerCallback(); checker.checkLedger(lh, cb); Set result = cb.waitAndGetResult(); return result; } - private void killBookie(List firstEnsemble, BookieSocketAddress ensemble) + private void killBookie(List firstEnsemble, BookieId ensemble) throws Exception { LOG.info("Killing " + ensemble + " from ensemble=" + firstEnsemble); killBookie(ensemble); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestLedgerFragmentReplication.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestLedgerFragmentReplication.java index 9782ca7e13f..cea98d9492f 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestLedgerFragmentReplication.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestLedgerFragmentReplication.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -23,20 +23,27 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import com.google.common.collect.Lists; import com.google.common.collect.Sets; - +import java.util.ArrayList; +import java.util.EnumSet; import java.util.Enumeration; +import java.util.HashSet; import java.util.List; import java.util.Map.Entry; import java.util.Set; import java.util.SortedMap; import java.util.concurrent.CountDownLatch; - +import java.util.function.BiConsumer; import org.apache.bookkeeper.client.BookKeeper.DigestType; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.client.api.WriteFlag; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.apache.bookkeeper.versioning.LongVersion; +import org.apache.bookkeeper.versioning.Versioned; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -49,6 +56,7 @@ public class TestLedgerFragmentReplication extends BookKeeperClusterTestCase { private static final byte[] TEST_PSSWD = "testpasswd".getBytes(); private static final DigestType TEST_DIGEST_TYPE = BookKeeper.DigestType.CRC32; + private static final BiConsumer NOOP_BICONSUMER = (l, e) -> { }; private static final Logger LOG = LoggerFactory .getLogger(TestLedgerFragmentReplication.class); @@ -87,13 +95,13 @@ public void testReplicateLFShouldCopyFailedBookieFragmentsToTargetBookie() for (int i = 0; i < 10; i++) { lh.addEntry(data); } - BookieSocketAddress replicaToKill = lh.getLedgerMetadata().getEnsembles() + BookieId replicaToKill = lh.getLedgerMetadata().getAllEnsembles() .get(0L).get(0); - LOG.info("Killing Bookie", replicaToKill); + LOG.info("Killing Bookie : {}", replicaToKill); killBookie(replicaToKill); - BookieSocketAddress newBkAddr = startNewBookieAndReturnAddress(); + BookieId newBkAddr = startNewBookieAndReturnBookieId(); LOG.info("New Bookie addr : {}", newBkAddr); for (int i = 0; i < 10; i++) { @@ -107,15 +115,15 @@ public void testReplicateLFShouldCopyFailedBookieFragmentsToTargetBookie() // 0-9 entries should be copy to new bookie for (LedgerFragment lf : result) { - admin.replicateLedgerFragment(lh, lf); + admin.replicateLedgerFragment(lh, lf, NOOP_BICONSUMER); } // Killing all bookies except newly replicated bookie - SortedMap> allBookiesBeforeReplication = lh - .getLedgerMetadata().getEnsembles(); - for (Entry> entry : allBookiesBeforeReplication.entrySet()) { - List bookies = entry.getValue(); - for (BookieSocketAddress bookie : bookies) { + SortedMap> allBookiesBeforeReplication = lh + .getLedgerMetadata().getAllEnsembles(); + for (Entry> entry : allBookiesBeforeReplication.entrySet()) { + List bookies = entry.getValue(); + for (BookieId bookie : bookies) { if (newBkAddr.equals(bookie)) { continue; } @@ -141,11 +149,11 @@ public void testReplicateLFFailsOnlyOnLastUnClosedFragments() for (int i = 0; i < 10; i++) { lh.addEntry(data); } - BookieSocketAddress replicaToKill = lh.getLedgerMetadata().getEnsembles() + BookieId replicaToKill = lh.getLedgerMetadata().getAllEnsembles() .get(0L).get(0); startNewBookie(); - LOG.info("Killing Bookie", replicaToKill); + LOG.info("Killing Bookie : {}", replicaToKill); killBookie(replicaToKill); // Lets reform ensemble @@ -153,13 +161,13 @@ public void testReplicateLFFailsOnlyOnLastUnClosedFragments() lh.addEntry(data); } - BookieSocketAddress replicaToKill2 = lh.getLedgerMetadata() - .getEnsembles().get(0L).get(1); + BookieId replicaToKill2 = lh.getLedgerMetadata() + .getAllEnsembles().get(0L).get(1); - BookieSocketAddress newBkAddr = startNewBookieAndReturnAddress(); + BookieId newBkAddr = startNewBookieAndReturnBookieId(); LOG.info("New Bookie addr : {}", newBkAddr); - LOG.info("Killing Bookie", replicaToKill2); + LOG.info("Killing Bookie : {}", replicaToKill2); killBookie(replicaToKill2); Set result = getFragmentsToReplicate(lh); @@ -170,11 +178,11 @@ public void testReplicateLFFailsOnlyOnLastUnClosedFragments() int unclosedCount = 0; for (LedgerFragment lf : result) { if (lf.isClosed()) { - admin.replicateLedgerFragment(lh, lf); + admin.replicateLedgerFragment(lh, lf, NOOP_BICONSUMER); } else { unclosedCount++; try { - admin.replicateLedgerFragment(lh, lf); + admin.replicateLedgerFragment(lh, lf, NOOP_BICONSUMER); fail("Shouldn't be able to rereplicate unclosed ledger"); } catch (BKException bke) { // correct behaviour @@ -200,7 +208,7 @@ public void testReplicateLFShouldReturnFalseIfTheReplicationFails() } // Kill the first Bookie - BookieSocketAddress replicaToKill = lh.getLedgerMetadata().getEnsembles() + BookieId replicaToKill = lh.getLedgerMetadata().getAllEnsembles() .get(0L).get(0); killBookie(replicaToKill); LOG.info("Killed Bookie =" + replicaToKill); @@ -210,7 +218,7 @@ public void testReplicateLFShouldReturnFalseIfTheReplicationFails() lh.addEntry(data); } // Kill the second Bookie - replicaToKill = lh.getLedgerMetadata().getEnsembles().get(0L).get(0); + replicaToKill = lh.getLedgerMetadata().getAllEnsembles().get(0L).get(0); killBookie(replicaToKill); LOG.info("Killed Bookie =" + replicaToKill); @@ -218,7 +226,7 @@ public void testReplicateLFShouldReturnFalseIfTheReplicationFails() BookKeeperAdmin admin = new BookKeeperAdmin(baseClientConf); for (LedgerFragment lf : fragments) { try { - admin.replicateLedgerFragment(lh, lf); + admin.replicateLedgerFragment(lh, lf, NOOP_BICONSUMER); } catch (BKException.BKLedgerRecoveryException e) { // expected } @@ -232,19 +240,19 @@ public void testReplicateLFShouldReturnFalseIfTheReplicationFails() @Test public void testSplitIntoSubFragmentsWithDifferentFragmentBoundaries() throws Exception { - LedgerMetadata metadata = new LedgerMetadata(3, 3, 3, TEST_DIGEST_TYPE, - TEST_PSSWD) { - @Override - List getEnsemble(long entryId) { - return null; - } - - @Override - public boolean isClosed() { - return true; - } - }; - LedgerHandle lh = new LedgerHandle(bkc.getClientCtx(), 0, metadata, TEST_DIGEST_TYPE, + List ensemble = Lists.newArrayList(new BookieSocketAddress("192.0.2.1", 1234).toBookieId(), + new BookieSocketAddress("192.0.2.2", 1234).toBookieId(), + new BookieSocketAddress("192.0.2.3", 1234).toBookieId()); + LedgerMetadata metadata = LedgerMetadataBuilder.create() + .withId(124L).withEnsembleSize(3).withWriteQuorumSize(3).withAckQuorumSize(3) + .withPassword(TEST_PSSWD).withDigestType(TEST_DIGEST_TYPE.toApiDigestType()) + .withClosedState().withLastEntryId(-1).withLength(0) + .newEnsembleEntry(0L, ensemble) + .build(); + + LedgerHandle lh = new LedgerHandle(bkc.getClientCtx(), 0, + new Versioned<>(metadata, new LongVersion(0L)), + TEST_DIGEST_TYPE, TEST_PSSWD, WriteFlag.NONE); testSplitIntoSubFragments(10, 21, -1, 1, lh); testSplitIntoSubFragments(10, 21, 20, 1, lh); @@ -255,6 +263,8 @@ public boolean isClosed() { testSplitIntoSubFragments(22, 103, 11, 8, lh); testSplitIntoSubFragments(49, 51, 1, 3, lh); testSplitIntoSubFragments(11, 101, 3, 31, lh); + testSplitIntoSubFragments(0, -1, 1, 1, lh); + testSplitIntoSubFragments(0, -1, 10, 1, lh); } /** @@ -264,17 +274,7 @@ void testSplitIntoSubFragments(final long oriFragmentFirstEntry, final long oriFragmentLastEntry, long entriesPerSubFragment, long expectedSubFragments, LedgerHandle lh) { LedgerFragment fr = new LedgerFragment(lh, oriFragmentFirstEntry, - oriFragmentLastEntry, Sets.newHashSet(0)) { - @Override - public long getLastStoredEntryId() { - return oriFragmentLastEntry; - } - - @Override - public long getFirstStoredEntryId() { - return oriFragmentFirstEntry; - } - }; + oriFragmentLastEntry, Sets.newHashSet(0)); Set subFragments = LedgerFragmentReplicator .splitIntoSubFragments(lh, fr, entriesPerSubFragment); assertEquals(expectedSubFragments, subFragments.size()); @@ -338,4 +338,66 @@ private void verifyRecoveredLedgers(LedgerHandle lh, long startEntryId, } } + @Test + public void testSplitLedgerFragmentState() throws Exception { + int lastEntryId = 10; + int rereplicationEntryBatchSize = 10; + + List ensemble = new ArrayList(); + ensemble.add(BookieId.parse("bookie0:3181")); + ensemble.add(BookieId.parse("bookie1:3181")); + ensemble.add(BookieId.parse("bookie2:3181")); + ensemble.add(BookieId.parse("bookie3:3181")); + ensemble.add(BookieId.parse("bookie4:3181")); + ensemble.add(BookieId.parse("bookie5:3181")); + ensemble.add(BookieId.parse("bookie6:3181")); + + LedgerMetadataBuilder builder = LedgerMetadataBuilder.create(); + builder.withId(124L).withEnsembleSize(7).withWriteQuorumSize(3).withAckQuorumSize(2) + .withDigestType(TEST_DIGEST_TYPE.toApiDigestType()).withPassword(TEST_PSSWD) + .newEnsembleEntry(0, ensemble).withLastEntryId(lastEntryId).withLength(512).withClosedState(); + LedgerMetadata met = builder.build(); + + LedgerHandle lh = new LedgerHandle(bkc.getClientCtx(), 100L, new Versioned<>(met, new LongVersion(0L)), + TEST_DIGEST_TYPE, TEST_PSSWD, EnumSet.noneOf(WriteFlag.class)); + + /* + * create LedgerFragment from the ledger ensemble for the bookies with + * indexes 1 and 5. + */ + Set bookieIndexes = new HashSet<>(); + bookieIndexes.add(1); + bookieIndexes.add(5); + LedgerFragment lfrag = new LedgerFragment(lh, 0, 10, bookieIndexes); + + /* + * Since this ledger contains 11 entries (lastEntryId is 10), when it is + * split into subFragments of size 10 it will be split into 2. In the + * first subfragment, firstEntryID (and firstStoredEntryId) will be 0. + * lastKnownEntryID will be 9 but lastStoredEntryId will be 8. Because + * entry 9 will not be stored in both of the nodes and entry 8 is the + * last entry that is stored in either one of the node. + * + * In the second sub-fragment firstEntryID, firstStoredEntryId, + * lastKnownEntryID and lastStoredEntryId should be 10. + */ + Set partitionedFragments = LedgerFragmentReplicator.splitIntoSubFragments(lh, lfrag, + rereplicationEntryBatchSize); + assertEquals("Number of sub-fragments", 2, partitionedFragments.size()); + for (LedgerFragment partitionedFragment : partitionedFragments) { + if (partitionedFragment.getFirstEntryId() == 0) { + validateEntryIds(partitionedFragment, 0, 0, 9, 8); + } else { + validateEntryIds(partitionedFragment, 10, 10, 10, 10); + } + } + } + + private void validateEntryIds(LedgerFragment partitionedFragment, long expectedFirstEntryId, + long expectedFirstStoredEntryId, long expectedLastKnownEntryID, long expectedLastStoredEntryId) { + assertEquals("FirstEntryId", expectedFirstEntryId, partitionedFragment.getFirstEntryId()); + assertEquals("FirstStoredEntryId", expectedFirstStoredEntryId, partitionedFragment.getFirstStoredEntryId()); + assertEquals("LastKnownEntryID", expectedLastKnownEntryID, partitionedFragment.getLastKnownEntryId()); + assertEquals("LastStoredEntryId", expectedLastStoredEntryId, partitionedFragment.getLastStoredEntryId()); + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestLedgerFragmentReplicationWithMock.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestLedgerFragmentReplicationWithMock.java new file mode 100644 index 00000000000..c618aa4a15b --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestLedgerFragmentReplicationWithMock.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.client; + +import static org.junit.Assert.assertEquals; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.when; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.Unpooled; +import java.lang.reflect.Field; +import java.util.Enumeration; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import org.apache.bookkeeper.client.impl.LedgerEntryImpl; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.BookieClientImpl; +import org.apache.bookkeeper.proto.BookieProtoEncoding; +import org.apache.bookkeeper.proto.BookieProtocol; +import org.apache.bookkeeper.proto.checksum.DigestManager; +import org.apache.bookkeeper.proto.checksum.DummyDigestManager; +import org.apache.commons.collections4.IteratorUtils; +import org.apache.zookeeper.AsyncCallback; +import org.junit.Test; +import org.mockito.Mockito; + +public class TestLedgerFragmentReplicationWithMock { + + @Test + public void testRecoverLedgerFragmentEntrySendRightRequestWithFlag() throws Exception { + CountDownLatch latch = new CountDownLatch(1); + BookieClientImpl bookieClient = Mockito.mock(BookieClientImpl.class); + doAnswer(invocationOnMock -> { + ByteBuf toSend = invocationOnMock.getArgument(4); + BookieProtoEncoding.RequestEnDeCoderPreV3 deCoderPreV3 = + new BookieProtoEncoding.RequestEnDeCoderPreV3(null); + toSend.readerIndex(4); + BookieProtocol.ParsedAddRequest request = (BookieProtocol.ParsedAddRequest) deCoderPreV3.decode(toSend); + + Field flagField = request.getClass().getSuperclass().getDeclaredField("flags"); + flagField.setAccessible(true); + short flag = flagField.getShort(request); + assertEquals(flag, BookieProtocol.FLAG_RECOVERY_ADD); + latch.countDown(); + return null; + }).when(bookieClient) + .addEntry(any(), anyLong(), any(), anyLong(), any(), any(), any(), anyInt(), anyBoolean(), any()); + + BookKeeper bkc = Mockito.mock(BookKeeper.class); + when(bkc.getBookieClient()).thenReturn(bookieClient); + + LedgerHandle lh = Mockito.mock(LedgerHandle.class); + DummyDigestManager ds = new DummyDigestManager(1L, true, ByteBufAllocator.DEFAULT); + when(lh.getDigestManager()).thenReturn(ds); + when(lh.getLedgerKey()).thenReturn(DigestManager.generateMasterKey("".getBytes())); + + ByteBuf data = Unpooled.wrappedBuffer(new byte[1024]); + LedgerEntry entry = new LedgerEntry(LedgerEntryImpl.create(1L, 1L, data.readableBytes(), data)); + List list = new LinkedList<>(); + list.add(entry); + Enumeration entries = IteratorUtils.asEnumeration(list.iterator()); + doAnswer(invocation -> { + org.apache.bookkeeper.client.AsyncCallback.ReadCallback rc = + invocation.getArgument(2, org.apache.bookkeeper.client.AsyncCallback.ReadCallback.class); + rc.readComplete(0, lh, entries, null); + return null; + }).when(lh).asyncReadEntries(anyLong(), anyLong(), any(), any()); + + ClientConfiguration conf = new ClientConfiguration(); + LedgerFragmentReplicator lfr = new LedgerFragmentReplicator(bkc, conf); + + Set bookies = new HashSet<>(); + bookies.add(BookieId.parse("127.0.0.1:3181")); + + AsyncCallback.VoidCallback vc = new AsyncCallback.VoidCallback() { + @Override + public void processResult(int rc, String path, Object ctx) { + } + }; + + lfr.recoverLedgerFragmentEntry(1L, lh, vc, bookies, (lid, le) -> {}); + + latch.await(); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestMaxEnsembleChangeNum.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestMaxEnsembleChangeNum.java index d3e810c13ff..3015bef64a0 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestMaxEnsembleChangeNum.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestMaxEnsembleChangeNum.java @@ -56,7 +56,7 @@ public void testChangeEnsembleMaxNumWithWriter() throws Exception { writer.append(ByteBuffer.wrap(data)); } assertEquals("There should be zero ensemble change", - 1, getLedgerMetadata(lId).getEnsembles().size()); + 1, getLedgerMetadata(lId).getAllEnsembles().size()); simulateEnsembleChangeWithWriter(changeNum, numEntries, writer); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestMaxSizeWorkersQueue.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestMaxSizeWorkersQueue.java index cc70e024a19..3487566a87e 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestMaxSizeWorkersQueue.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestMaxSizeWorkersQueue.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -22,7 +22,6 @@ import static org.junit.Assert.fail; import com.google.common.collect.Lists; - import java.util.Enumeration; import java.util.List; import java.util.concurrent.Callable; @@ -33,7 +32,6 @@ import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; - import org.apache.bookkeeper.client.AsyncCallback.AddCallback; import org.apache.bookkeeper.client.AsyncCallback.ReadCallback; import org.apache.bookkeeper.client.BookKeeper.DigestType; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestParallelRead.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestParallelRead.java index 68fd29c942d..423e02b4aad 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestParallelRead.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestParallelRead.java @@ -24,18 +24,27 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; import java.util.Iterator; import java.util.List; +import java.util.TreeMap; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CountDownLatch; import org.apache.bookkeeper.client.BKException.Code; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.client.api.LedgerEntry; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.conf.ClientConfiguration; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.stats.OpStatsLogger; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.junit.Test; import org.slf4j.Logger; @@ -156,7 +165,7 @@ public void testFailParallelRecoveryReadMissingEntryImmediately() throws Excepti LedgerHandle lh = bkc.openLedger(id, digestType, passwd); - List ensemble = lh.getLedgerMetadata().getEnsemble(10); + List ensemble = lh.getLedgerMetadata().getEnsembleAt(10); CountDownLatch latch1 = new CountDownLatch(1); CountDownLatch latch2 = new CountDownLatch(1); // sleep two bookie @@ -187,7 +196,7 @@ public void testParallelReadWithFailedBookies() throws Exception { LedgerHandle lh = bkc.openLedger(id, digestType, passwd); - List ensemble = lh.getLedgerMetadata().getEnsemble(5); + List ensemble = lh.getLedgerMetadata().getEnsembleAt(5); // kill two bookies killBookie(ensemble.get(0)); killBookie(ensemble.get(1)); @@ -223,7 +232,7 @@ public void testParallelReadFailureWithFailedBookies() throws Exception { LedgerHandle lh = bkc.openLedger(id, digestType, passwd); - List ensemble = lh.getLedgerMetadata().getEnsemble(5); + List ensemble = lh.getLedgerMetadata().getEnsembleAt(5); // kill two bookies killBookie(ensemble.get(0)); killBookie(ensemble.get(1)); @@ -238,4 +247,73 @@ public void testParallelReadFailureWithFailedBookies() throws Exception { newBk.close(); } + @Test + public void testLedgerEntryRequestComplete() throws Exception { + LedgerHandle lh = mock(LedgerHandle.class); + LedgerMetadata ledgerMetadata = mock(LedgerMetadata.class); + ClientContext clientContext = mock(ClientContext.class); + ClientInternalConf clientInternalConf = mock(ClientInternalConf.class); + doReturn(clientInternalConf).when(clientContext).getConf(); + BookKeeperClientStats bookKeeperClientStats = mock(BookKeeperClientStats.class); + doReturn(bookKeeperClientStats).when(clientContext).getClientStats(); + OpStatsLogger opStatsLogger = mock(OpStatsLogger.class); + doReturn(opStatsLogger).when(bookKeeperClientStats).getReadOpLogger(); + doReturn(ledgerMetadata).when(lh).getLedgerMetadata(); + doReturn(2).when(ledgerMetadata).getWriteQuorumSize(); + doReturn(1).when(ledgerMetadata).getAckQuorumSize(); + doReturn(new TreeMap<>()).when(ledgerMetadata).getAllEnsembles(); + DistributionSchedule.WriteSet writeSet = mock(DistributionSchedule.WriteSet.class); + doReturn(writeSet).when(lh).getWriteSetForReadOperation(anyLong()); + PendingReadOp pendingReadOp = new PendingReadOp(lh, clientContext, 1, 2, false); + pendingReadOp.parallelRead(true); + pendingReadOp.initiate(); + PendingReadOp.SingleLedgerEntryRequest first = pendingReadOp.seq.get(0); + PendingReadOp.SingleLedgerEntryRequest second = pendingReadOp.seq.get(1); + + pendingReadOp.submitCallback(-105); + + // pendingReadOp.submitCallback(-105) will close all ledgerEntryImpl + assertEquals(-1, first.entryImpl.getEntryId()); + assertEquals(-1, first.entryImpl.getLedgerId()); + assertEquals(-1, first.entryImpl.getLength()); + assertNull(first.entryImpl.getEntryBuffer()); + assertTrue(first.complete.get()); + + assertEquals(-1, second.entryImpl.getEntryId()); + assertEquals(-1, second.entryImpl.getLedgerId()); + assertEquals(-1, second.entryImpl.getLength()); + assertNull(second.entryImpl.getEntryBuffer()); + assertTrue(second.complete.get()); + + // Mock ledgerEntryImpl reuse + ByteBuf byteBuf = Unpooled.buffer(10); + pendingReadOp.readEntryComplete(BKException.Code.OK, 1, 1, Unpooled.buffer(10), + new ReadOpBase.ReadContext(1, BookieId.parse("test"), first)); + + // byteBuf has been release + assertEquals(byteBuf.refCnt(), 1); + // entryBuffer is not replaced + assertNull(first.entryImpl.getEntryBuffer()); + // ledgerEntryRequest has been complete + assertTrue(first.complete.get()); + + pendingReadOp = new PendingReadOp(lh, clientContext, 1, 2, false); + pendingReadOp.parallelRead(true); + pendingReadOp.initiate(); + + // read entry failed twice, will not close twice + pendingReadOp.readEntryComplete(BKException.Code.TooManyRequestsException, 1, 1, Unpooled.buffer(10), + new ReadOpBase.ReadContext(1, BookieId.parse("test"), first)); + + pendingReadOp.readEntryComplete(BKException.Code.TooManyRequestsException, 1, 1, Unpooled.buffer(10), + new ReadOpBase.ReadContext(1, BookieId.parse("test"), first)); + + // will not complete twice when completed + byteBuf = Unpooled.buffer(10); + pendingReadOp.readEntryComplete(Code.OK, 1, 1, Unpooled.buffer(10), + new ReadOpBase.ReadContext(1, BookieId.parse("test"), first)); + assertEquals(1, byteBuf.refCnt()); + + } + } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestPendingReadLacOp.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestPendingReadLacOp.java index c9ca5083889..a37462dee7d 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestPendingReadLacOp.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestPendingReadLacOp.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -23,8 +23,10 @@ import static org.junit.Assert.assertEquals; import io.netty.buffer.Unpooled; +import io.netty.util.ReferenceCounted; import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; +import org.apache.bookkeeper.proto.MockBookieClient; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.apache.bookkeeper.util.ByteBufList; import org.junit.Test; @@ -36,8 +38,8 @@ */ public class TestPendingReadLacOp extends BookKeeperClusterTestCase { private static final Logger LOG = LoggerFactory.getLogger(TestPendingReadLacOp.class); - byte pwd[] = "asdf".getBytes(); - byte data[] = "foo".getBytes(); + byte[] pwd = "asdf".getBytes(); + byte[] data = "foo".getBytes(); public TestPendingReadLacOp() { super(3); @@ -57,17 +59,20 @@ public void testPendingReadLacOpMissingExplicitLAC() throws Exception { public void initiate() { for (int i = 0; i < lh.getCurrentEnsemble().size(); i++) { final int index = i; - ByteBufList buffer = lh.getDigestManager().computeDigestAndPackageForSending( + ReferenceCounted toSend = lh.getDigestManager().computeDigestAndPackageForSending( 2, 1, data.length, - Unpooled.wrappedBuffer(data)); + Unpooled.wrappedBuffer(data), + new byte[20], + 0); + bkc.scheduler.schedule(() -> { readLacComplete( 0, lh.getId(), null, - Unpooled.copiedBuffer(buffer.toArray()), + MockBookieClient.copyData(toSend), index); }, 0, TimeUnit.SECONDS); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestPiggybackLAC.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestPiggybackLAC.java index d8be69c7f30..26a9e0611fe 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestPiggybackLAC.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestPiggybackLAC.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -25,7 +25,6 @@ import java.util.Arrays; import java.util.Collection; import java.util.Enumeration; - import org.apache.bookkeeper.bookie.InterleavedLedgerStorage; import org.apache.bookkeeper.bookie.LedgerStorage; import org.apache.bookkeeper.bookie.SortedLedgerStorage; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestRackawareEnsemblePlacementPolicy.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestRackawareEnsemblePlacementPolicy.java index dd95b7a388d..d1e2cd5323b 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestRackawareEnsemblePlacementPolicy.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestRackawareEnsemblePlacementPolicy.java @@ -17,18 +17,23 @@ */ package org.apache.bookkeeper.client; +import static org.apache.bookkeeper.client.RackawareEnsemblePlacementPolicyImpl.RACKNAME_DISTANCE_FROM_LEAVES; import static org.apache.bookkeeper.client.RackawareEnsemblePlacementPolicyImpl.REPP_DNS_RESOLVER_CLASS; import static org.apache.bookkeeper.client.RackawareEnsemblePlacementPolicyImpl.shuffleWithMask; import static org.apache.bookkeeper.client.RoundRobinDistributionSchedule.writeSetFromValues; import static org.apache.bookkeeper.feature.SettableFeatureProvider.DISABLE_ALL; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.is; +import static org.junit.Assert.assertThat; import com.google.common.util.concurrent.ThreadFactoryBuilder; - import io.netty.util.HashedWheelTimer; - import java.net.InetAddress; +import java.net.UnknownHostException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -36,21 +41,36 @@ import java.util.Optional; import java.util.Set; import java.util.concurrent.TimeUnit; - +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Consumer; import junit.framework.TestCase; - import org.apache.bookkeeper.client.BKException.BKNotEnoughBookiesException; import org.apache.bookkeeper.client.BookieInfoReader.BookieInfo; -import org.apache.bookkeeper.client.TopologyAwareEnsemblePlacementPolicy.BookieNode; +import org.apache.bookkeeper.client.EnsemblePlacementPolicy.PlacementPolicyAdherence; +import org.apache.bookkeeper.client.ITopologyAwareEnsemblePlacementPolicy.Ensemble; import org.apache.bookkeeper.client.TopologyAwareEnsemblePlacementPolicy.EnsembleForReplacementWithNoConstraints; import org.apache.bookkeeper.client.TopologyAwareEnsemblePlacementPolicy.TruePredicate; +import org.apache.bookkeeper.common.util.ReflectionUtils; import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.net.AbstractDNSToSwitchMapping; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieNode; import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.net.DNSToSwitchMapping; import org.apache.bookkeeper.net.NetworkTopology; import org.apache.bookkeeper.net.Node; +import org.apache.bookkeeper.net.ScriptBasedMapping; +import org.apache.bookkeeper.proto.BookieAddressResolver; +import org.apache.bookkeeper.stats.Gauge; import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.test.TestStatsProvider; +import org.apache.bookkeeper.test.TestStatsProvider.TestStatsLogger; import org.apache.bookkeeper.util.StaticDNSResolver; +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.hamcrest.Description; +import org.hamcrest.Matcher; +import org.hamcrest.TypeSafeMatcher; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -63,11 +83,13 @@ public class TestRackawareEnsemblePlacementPolicy extends TestCase { static final Logger LOG = LoggerFactory.getLogger(TestRackawareEnsemblePlacementPolicy.class); RackawareEnsemblePlacementPolicy repp; - final List ensemble = new ArrayList(); + final List ensemble = new ArrayList(); DistributionSchedule.WriteSet writeSet = DistributionSchedule.NULL_WRITE_SET; ClientConfiguration conf = new ClientConfiguration(); - BookieSocketAddress addr1, addr2, addr3, addr4; + BookieSocketAddress addr1; + BookieSocketAddress addr2, addr3, addr4; io.netty.util.HashedWheelTimer timer; + final int minNumRacksPerWriteQuorumConfValue = 2; @Override protected void setUp() throws Exception { @@ -79,6 +101,7 @@ protected void setUp() throws Exception { StaticDNSResolver.addNodeToRack("localhost", NetworkTopology.DEFAULT_REGION_AND_RACK); LOG.info("Set up static DNS Resolver."); conf.setProperty(REPP_DNS_RESOLVER_CLASS, StaticDNSResolver.class.getName()); + conf.setMinNumRacksPerWriteQuorum(minNumRacksPerWriteQuorumConfValue); addr1 = new BookieSocketAddress("127.0.0.2", 3181); addr2 = new BookieSocketAddress("127.0.0.3", 3181); addr3 = new BookieSocketAddress("127.0.0.4", 3181); @@ -88,10 +111,10 @@ protected void setUp() throws Exception { StaticDNSResolver.addNodeToRack(addr2.getHostName(), NetworkTopology.DEFAULT_REGION_AND_RACK); StaticDNSResolver.addNodeToRack(addr3.getHostName(), NetworkTopology.DEFAULT_REGION_AND_RACK); StaticDNSResolver.addNodeToRack(addr4.getHostName(), NetworkTopology.DEFAULT_REGION + "/rack2"); - ensemble.add(addr1); - ensemble.add(addr2); - ensemble.add(addr3); - ensemble.add(addr4); + ensemble.add(addr1.toBookieId()); + ensemble.add(addr2.toBookieId()); + ensemble.add(addr3.toBookieId()); + ensemble.add(addr4.toBookieId()); writeSet = writeSetFromValues(0, 1, 2, 3); timer = new HashedWheelTimer( @@ -100,7 +123,8 @@ protected void setUp() throws Exception { conf.getTimeoutTimerNumTicks()); repp = new RackawareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); } @@ -114,16 +138,16 @@ static BookiesHealthInfo getBookiesHealthInfo() { return getBookiesHealthInfo(new HashMap<>(), new HashMap<>()); } - static BookiesHealthInfo getBookiesHealthInfo(Map bookieFailureHistory, - Map bookiePendingRequests) { + static BookiesHealthInfo getBookiesHealthInfo(Map bookieFailureHistory, + Map bookiePendingRequests) { return new BookiesHealthInfo() { @Override - public long getBookieFailureHistory(BookieSocketAddress bookieSocketAddress) { + public long getBookieFailureHistory(BookieId bookieSocketAddress) { return bookieFailureHistory.getOrDefault(bookieSocketAddress, -1L); } @Override - public long getBookiePendingRequests(BookieSocketAddress bookieSocketAddress) { + public long getBookiePendingRequests(BookieId bookieSocketAddress) { return bookiePendingRequests.getOrDefault(bookieSocketAddress, 0L); } }; @@ -136,23 +160,35 @@ static void updateMyRack(String rack) throws Exception { StaticDNSResolver.addNodeToRack("localhost", rack); } + @Test + public void testInitialize() throws Exception { + String dnsResolverName = conf.getString(REPP_DNS_RESOLVER_CLASS, ScriptBasedMapping.class.getName()); + DNSToSwitchMapping dnsResolver = ReflectionUtils.newInstance(dnsResolverName, DNSToSwitchMapping.class); + AbstractDNSToSwitchMapping tmp = (AbstractDNSToSwitchMapping) dnsResolver; + assertNull(tmp.getBookieAddressResolver()); + + dnsResolver.setBookieAddressResolver(repp.bookieAddressResolver); + assertNotNull(tmp.getBookieAddressResolver()); + } + @Test public void testNodeDown() throws Exception { repp.uninitalize(); updateMyRack(NetworkTopology.DEFAULT_REGION_AND_RACK); repp = new RackawareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); - addrs.remove(addr1); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + addrs.remove(addr1.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); DistributionSchedule.WriteSet origWriteSet = writeSet.copy(); DistributionSchedule.WriteSet reorderSet = repp.reorderReadSequence( @@ -170,19 +206,20 @@ public void testNodeReadOnly() throws Exception { updateMyRack("/r1/rack1"); repp = new RackawareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); - addrs.remove(addr1); - Set ro = new HashSet(); - ro.add(addr1); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + addrs.remove(addr1.toBookieId()); + Set ro = new HashSet(); + ro.add(addr1.toBookieId()); repp.onClusterChanged(addrs, ro); DistributionSchedule.WriteSet origWriteSet = writeSet.copy(); @@ -198,19 +235,20 @@ public void testNodeSlow() throws Exception { updateMyRack("/r1/rack1"); repp = new RackawareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); - repp.registerSlowBookie(addr1, 0L); - Map bookiePendingMap = new HashMap<>(); - bookiePendingMap.put(addr1, 1L); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + repp.registerSlowBookie(addr1.toBookieId(), 0L); + Map bookiePendingMap = new HashMap<>(); + bookiePendingMap.put(addr1.toBookieId(), 1L); repp.onClusterChanged(addrs, new HashSet<>()); DistributionSchedule.WriteSet origWriteSet = writeSet.copy(); @@ -228,21 +266,22 @@ public void testTwoNodesSlow() throws Exception { updateMyRack("/r1/rack1"); repp = new RackawareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); - repp.registerSlowBookie(addr1, 0L); - repp.registerSlowBookie(addr2, 0L); - Map bookiePendingMap = new HashMap<>(); - bookiePendingMap.put(addr1, 1L); - bookiePendingMap.put(addr2, 2L); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + repp.registerSlowBookie(addr1.toBookieId(), 0L); + repp.registerSlowBookie(addr2.toBookieId(), 0L); + Map bookiePendingMap = new HashMap<>(); + bookiePendingMap.put(addr1.toBookieId(), 1L); + bookiePendingMap.put(addr2.toBookieId(), 2L); repp.onClusterChanged(addrs, new HashSet<>()); DistributionSchedule.WriteSet origWriteSet = writeSet.copy(); @@ -260,19 +299,20 @@ public void testTwoNodesDown() throws Exception { updateMyRack("/r1/rack1"); repp = new RackawareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); - addrs.remove(addr1); - addrs.remove(addr2); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + addrs.remove(addr1.toBookieId()); + addrs.remove(addr2.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); DistributionSchedule.WriteSet origWriteSet = writeSet.copy(); DistributionSchedule.WriteSet reorderSet = repp.reorderReadSequence( @@ -289,20 +329,21 @@ public void testNodeDownAndReadOnly() throws Exception { updateMyRack("/r1/rack1"); repp = new RackawareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); - addrs.remove(addr1); - addrs.remove(addr2); - Set roAddrs = new HashSet(); - roAddrs.add(addr2); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + addrs.remove(addr1.toBookieId()); + addrs.remove(addr2.toBookieId()); + Set roAddrs = new HashSet(); + roAddrs.add(addr2.toBookieId()); repp.onClusterChanged(addrs, roAddrs); DistributionSchedule.WriteSet origWriteSet = writeSet.copy(); DistributionSchedule.WriteSet reorderSet = repp.reorderReadSequence( @@ -318,20 +359,21 @@ public void testNodeDownAndNodeSlow() throws Exception { updateMyRack("/r1/rack1"); repp = new RackawareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); - repp.registerSlowBookie(addr1, 0L); - Map bookiePendingMap = new HashMap<>(); - bookiePendingMap.put(addr1, 1L); - addrs.remove(addr2); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + repp.registerSlowBookie(addr1.toBookieId(), 0L); + Map bookiePendingMap = new HashMap<>(); + bookiePendingMap.put(addr1.toBookieId(), 1L); + addrs.remove(addr2.toBookieId()); repp.onClusterChanged(addrs, new HashSet<>()); DistributionSchedule.WriteSet origWriteSet = writeSet.copy(); @@ -349,24 +391,25 @@ public void testNodeDownAndReadOnlyAndNodeSlow() throws Exception { updateMyRack("/r1/rack1"); repp = new RackawareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); - addrs.remove(addr1); - addrs.remove(addr2); - Set ro = new HashSet(); - ro.add(addr2); - repp.registerSlowBookie(addr3, 0L); - Map bookiePendingMap = new HashMap<>(); - bookiePendingMap.put(addr3, 1L); - addrs.remove(addr2); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + addrs.remove(addr1.toBookieId()); + addrs.remove(addr2.toBookieId()); + Set ro = new HashSet(); + ro.add(addr2.toBookieId()); + repp.registerSlowBookie(addr3.toBookieId(), 0L); + Map bookiePendingMap = new HashMap<>(); + bookiePendingMap.put(addr3.toBookieId(), 1L); + addrs.remove(addr2.toBookieId()); repp.onClusterChanged(addrs, ro); DistributionSchedule.WriteSet origWriteSet = writeSet.copy(); @@ -391,21 +434,22 @@ public void testPendingRequestsReorder() throws Exception { repp = new RackawareEnsemblePlacementPolicy(); ClientConfiguration conf = (ClientConfiguration) this.conf.clone(); conf.setReorderThresholdPendingRequests(10); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); - Map bookiePendingMap = new HashMap<>(); - bookiePendingMap.put(addr1, 20L); - bookiePendingMap.put(addr2, 7L); - bookiePendingMap.put(addr3, 1L); // best bookie -> this one first - bookiePendingMap.put(addr4, 5L); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + Map bookiePendingMap = new HashMap<>(); + bookiePendingMap.put(addr1.toBookieId(), 20L); + bookiePendingMap.put(addr2.toBookieId(), 7L); + bookiePendingMap.put(addr3.toBookieId(), 1L); // best bookie -> this one first + bookiePendingMap.put(addr4.toBookieId(), 5L); DistributionSchedule.WriteSet origWriteSet = writeSet.copy(); DistributionSchedule.WriteSet reorderSet = repp.reorderReadSequence( @@ -429,38 +473,39 @@ public void testPendingRequestsReorderLargeEnsemble() throws Exception { repp = new RackawareEnsemblePlacementPolicy(); ClientConfiguration conf = (ClientConfiguration) this.conf.clone(); conf.setReorderThresholdPendingRequests(10); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); // Update cluster BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.6", 3181); BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.7", 3181); BookieSocketAddress addr7 = new BookieSocketAddress("127.0.0.8", 3181); - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - addrs.add(addr5); - addrs.add(addr6); - addrs.add(addr7); - repp.onClusterChanged(addrs, new HashSet()); - Map bookiePendingMap = new HashMap<>(); - bookiePendingMap.put(addr1, 1L); // not in write set - bookiePendingMap.put(addr2, 20L); - bookiePendingMap.put(addr3, 0L); // not in write set - bookiePendingMap.put(addr4, 12L); - bookiePendingMap.put(addr5, 9L); // not in write set - bookiePendingMap.put(addr6, 2L); // best bookie -> this one first - bookiePendingMap.put(addr7, 10L); - List ensemble = new ArrayList(); - ensemble.add(addr1); - ensemble.add(addr2); - ensemble.add(addr3); - ensemble.add(addr4); - ensemble.add(addr5); - ensemble.add(addr6); - ensemble.add(addr7); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + addrs.add(addr6.toBookieId()); + addrs.add(addr7.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + Map bookiePendingMap = new HashMap<>(); + bookiePendingMap.put(addr1.toBookieId(), 1L); // not in write set + bookiePendingMap.put(addr2.toBookieId(), 20L); + bookiePendingMap.put(addr3.toBookieId(), 0L); // not in write set + bookiePendingMap.put(addr4.toBookieId(), 12L); + bookiePendingMap.put(addr5.toBookieId(), 9L); // not in write set + bookiePendingMap.put(addr6.toBookieId(), 2L); // best bookie -> this one first + bookiePendingMap.put(addr7.toBookieId(), 10L); + List ensemble = new ArrayList(); + ensemble.add(addr1.toBookieId()); + ensemble.add(addr2.toBookieId()); + ensemble.add(addr3.toBookieId()); + ensemble.add(addr4.toBookieId()); + ensemble.add(addr5.toBookieId()); + ensemble.add(addr6.toBookieId()); + ensemble.add(addr7.toBookieId()); DistributionSchedule.WriteSet writeSet = writeSetFromValues(1, 3, 5, 6); DistributionSchedule.WriteSet origWriteSet = writeSet.copy(); @@ -485,21 +530,22 @@ public void testPendingRequestsNoReorder1() throws Exception { repp = new RackawareEnsemblePlacementPolicy(); ClientConfiguration conf = (ClientConfiguration) this.conf.clone(); conf.setReorderThresholdPendingRequests(10); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); - Map bookiePendingMap = new HashMap<>(); - bookiePendingMap.put(addr1, 10L); // -> this one first - bookiePendingMap.put(addr2, 7L); - bookiePendingMap.put(addr3, 1L); // best bookie, but below threshold - bookiePendingMap.put(addr4, 5L); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + Map bookiePendingMap = new HashMap<>(); + bookiePendingMap.put(addr1.toBookieId(), 10L); // -> this one first + bookiePendingMap.put(addr2.toBookieId(), 7L); + bookiePendingMap.put(addr3.toBookieId(), 1L); // best bookie, but below threshold + bookiePendingMap.put(addr4.toBookieId(), 5L); DistributionSchedule.WriteSet origWriteSet = writeSet.copy(); DistributionSchedule.WriteSet reorderSet = repp.reorderReadSequence( @@ -521,21 +567,22 @@ public void testPendingRequestsNoReorder2() throws Exception { repp = new RackawareEnsemblePlacementPolicy(); ClientConfiguration conf = (ClientConfiguration) this.conf.clone(); conf.setReorderThresholdPendingRequests(10); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); - Map bookiePendingMap = new HashMap<>(); - bookiePendingMap.put(addr1, 1L); // -> this one first - bookiePendingMap.put(addr2, 7L); - bookiePendingMap.put(addr3, 1L); - bookiePendingMap.put(addr4, 5L); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + Map bookiePendingMap = new HashMap<>(); + bookiePendingMap.put(addr1.toBookieId(), 1L); // -> this one first + bookiePendingMap.put(addr2.toBookieId(), 7L); + bookiePendingMap.put(addr3.toBookieId(), 1L); + bookiePendingMap.put(addr4.toBookieId(), 5L); DistributionSchedule.WriteSet origWriteSet = writeSet.copy(); DistributionSchedule.WriteSet reorderSet = repp.reorderReadSequence( @@ -544,6 +591,54 @@ public void testPendingRequestsNoReorder2() throws Exception { assertEquals("writeSet should be in original order", origWriteSet, reorderSet); } + @Test + public void testIsEnsembleAdheringToPlacementPolicy() throws Exception { + BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.2", 3181); + BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.3", 3181); + BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.4", 3181); + BookieSocketAddress addr4 = new BookieSocketAddress("127.0.0.5", 3181); + BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.6", 3181); + BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.7", 3181); + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), NetworkTopology.DEFAULT_REGION_AND_RACK); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/default-region/r2"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/default-region/r2"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/default-region/r3"); + StaticDNSResolver.addNodeToRack(addr5.getHostName(), "/default-region/r3"); + StaticDNSResolver.addNodeToRack(addr6.getHostName(), "/default-region/r3"); + // Update cluster + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + + List emptyEnsemble = new ArrayList<>(); + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.FAIL, + repp.isEnsembleAdheringToPlacementPolicy(emptyEnsemble, 3, 3)); + + List ensemble = new ArrayList<>(); + ensemble.add(addr1.toBookieId()); + ensemble.add(addr2.toBookieId()); + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.MEETS_STRICT, + repp.isEnsembleAdheringToPlacementPolicy(ensemble, 3, 3)); + + ensemble = new ArrayList<>(); + ensemble.add(addr1.toBookieId()); + ensemble.add(addr2.toBookieId()); + ensemble.add(addr3.toBookieId()); + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.MEETS_STRICT, + repp.isEnsembleAdheringToPlacementPolicy(ensemble, 3, 3)); + + ensemble = new ArrayList<>(); + ensemble.add(addr4.toBookieId()); + ensemble.add(addr5.toBookieId()); + ensemble.add(addr6.toBookieId()); + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.FAIL, + repp.isEnsembleAdheringToPlacementPolicy(ensemble, 3, 3)); + } + @Test public void testReplaceBookieWithEnoughBookiesInSameRack() throws Exception { BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.2", 3181); @@ -556,15 +651,19 @@ public void testReplaceBookieWithEnoughBookiesInSameRack() throws Exception { StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/default-region/r2"); StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/default-region/r3"); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); // replace node under r2 - BookieSocketAddress replacedBookie = repp.replaceBookie(1, 1, 1, null, new HashSet<>(), addr2, new HashSet<>()); - assertEquals(addr3, replacedBookie); + EnsemblePlacementPolicy.PlacementResult replaceBookieResponse = + repp.replaceBookie(1, 1, 1, null, new ArrayList<>(), addr2.toBookieId(), new HashSet<>()); + BookieId replacedBookie = replaceBookieResponse.getResult(); + PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy = replaceBookieResponse.getAdheringToPolicy(); + assertEquals(addr3.toBookieId(), replacedBookie); + assertEquals(PlacementPolicyAdherence.MEETS_STRICT, isEnsembleAdheringToPlacementPolicy); } @Test @@ -579,19 +678,22 @@ public void testReplaceBookieWithEnoughBookiesInDifferentRack() throws Exception StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/default-region/r3"); StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/default-region/r4"); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); // replace node under r2 - Set excludedAddrs = new HashSet(); - excludedAddrs.add(addr1); - BookieSocketAddress replacedBookie = repp.replaceBookie(1, 1, 1, null, new HashSet<>(), addr2, excludedAddrs); - - assertFalse(addr1.equals(replacedBookie)); - assertTrue(addr3.equals(replacedBookie) || addr4.equals(replacedBookie)); + Set excludedAddrs = new HashSet(); + excludedAddrs.add(addr1.toBookieId()); + EnsemblePlacementPolicy.PlacementResult replaceBookieResponse = + repp.replaceBookie(1, 1, 1, null, new ArrayList<>(), addr2.toBookieId(), excludedAddrs); + BookieId replacedBookie = replaceBookieResponse.getResult(); + PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy = replaceBookieResponse.getAdheringToPolicy(); + assertFalse(addr1.toBookieId().equals(replacedBookie)); + assertTrue(addr3.toBookieId().equals(replacedBookie) || addr4.toBookieId().equals(replacedBookie)); + assertEquals(PlacementPolicyAdherence.MEETS_STRICT, isEnsembleAdheringToPlacementPolicy); } @Test @@ -606,19 +708,19 @@ public void testReplaceBookieWithNotEnoughBookies() throws Exception { StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/default-region/r3"); StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/default-region/r4"); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); // replace node under r2 - Set excludedAddrs = new HashSet(); - excludedAddrs.add(addr1); - excludedAddrs.add(addr3); - excludedAddrs.add(addr4); + Set excludedAddrs = new HashSet(); + excludedAddrs.add(addr1.toBookieId()); + excludedAddrs.add(addr3.toBookieId()); + excludedAddrs.add(addr4.toBookieId()); try { - repp.replaceBookie(1, 1, 1, null, new HashSet(), addr2, excludedAddrs); + repp.replaceBookie(1, 1, 1, null, new ArrayList(), addr2.toBookieId(), excludedAddrs); fail("Should throw BKNotEnoughBookiesException when there is not enough bookies"); } catch (BKNotEnoughBookiesException bnebe) { // should throw not enou @@ -627,7 +729,7 @@ public void testReplaceBookieWithNotEnoughBookies() throws Exception { @Test public void testReplaceBookieWithEnoughBookiesInSameRackAsEnsemble() throws Exception { - BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.1", 3181); + BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.5", 3181); BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.2", 3181); BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.3", 3181); BookieSocketAddress addr4 = new BookieSocketAddress("127.0.0.4", 3181); @@ -637,22 +739,25 @@ public void testReplaceBookieWithEnoughBookiesInSameRackAsEnsemble() throws Exce StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/r2"); StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/r3"); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); // replace node under r2 - Set ensembleBookies = new HashSet(); - ensembleBookies.add(addr2); - ensembleBookies.add(addr4); - BookieSocketAddress replacedBookie = repp.replaceBookie( + List ensembleBookies = new ArrayList(); + ensembleBookies.add(addr2.toBookieId()); + ensembleBookies.add(addr4.toBookieId()); + EnsemblePlacementPolicy.PlacementResult replaceBookieResponse = repp.replaceBookie( 1, 1, 1 , null, ensembleBookies, - addr4, + addr4.toBookieId(), new HashSet<>()); - assertEquals(addr1, replacedBookie); + BookieId replacedBookie = replaceBookieResponse.getResult(); + PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy = replaceBookieResponse.getAdheringToPolicy(); + assertEquals(addr1.toBookieId(), replacedBookie); + assertEquals(PlacementPolicyAdherence.MEETS_STRICT, isEnsembleAdheringToPlacementPolicy); } @Test @@ -662,22 +767,81 @@ public void testNewEnsembleWithSingleRack() throws Exception { BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.8", 3181); BookieSocketAddress addr4 = new BookieSocketAddress("127.0.0.9", 3181); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); try { - List ensemble = repp.newEnsemble(3, 2, 2, null, new HashSet<>()); - assertEquals(0, getNumCoveredWriteQuorums(ensemble, 2, conf.getMinNumRacksPerWriteQuorum())); - List ensemble2 = repp.newEnsemble(4, 2, 2, null, new HashSet<>()); - assertEquals(0, getNumCoveredWriteQuorums(ensemble2, 2, conf.getMinNumRacksPerWriteQuorum())); + EnsemblePlacementPolicy.PlacementResult> ensembleResponse; + ensembleResponse = repp.newEnsemble(3, 2, 2, null, new HashSet<>()); + List ensemble = ensembleResponse.getResult(); + PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy = ensembleResponse.getAdheringToPolicy(); + assertEquals(0, getNumCoveredWriteQuorums(ensemble, 2, conf.getMinNumRacksPerWriteQuorum(), + repp.bookieAddressResolver)); + assertEquals(PlacementPolicyAdherence.FAIL, isEnsembleAdheringToPlacementPolicy); + EnsemblePlacementPolicy.PlacementResult> ensembleResponse2; + ensembleResponse2 = repp.newEnsemble(4, 2, 2, null, new HashSet<>()); + List ensemble2 = ensembleResponse2.getResult(); + PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy2 = ensembleResponse2.getAdheringToPolicy(); + assertEquals(0, getNumCoveredWriteQuorums(ensemble2, 2, conf.getMinNumRacksPerWriteQuorum(), + repp.bookieAddressResolver)); + assertEquals(PlacementPolicyAdherence.FAIL, isEnsembleAdheringToPlacementPolicy); } catch (BKNotEnoughBookiesException bnebe) { fail("Should not get not enough bookies exception even there is only one rack."); } } + @Test(timeout = 30_000) + public void testNewEnsembleWithExcludeBookies() throws Exception { + repp.uninitalize(); + updateMyRack(NetworkTopology.DEFAULT_REGION_AND_RACK); + + repp = new RackawareEnsemblePlacementPolicy(); + conf.setDiskWeightBasedPlacementEnabled(true); + repp.initialize(conf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); + + BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.2", 3181); + BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.3", 3181); + BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.4", 3181); + + BookieNode addr1Node = new BookieNode(addr1.toBookieId(), repp.resolveNetworkLocation(addr1.toBookieId())); + BookieNode addr2Node = new BookieNode(addr2.toBookieId(), repp.resolveNetworkLocation(addr2.toBookieId())); + BookieNode addr3Node = new BookieNode(addr3.toBookieId(), repp.resolveNetworkLocation(addr3.toBookieId())); + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/default-region/r1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/default-region/r2"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/default-region/r1"); + // Update cluster + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + + Set excludeBookies = new HashSet<>(); + excludeBookies.add(addr2Node); + excludeBookies.add(addr3Node); + + TopologyAwareEnsemblePlacementPolicy.RRTopologyAwareCoverageEnsemble ensemble = + new TopologyAwareEnsemblePlacementPolicy.RRTopologyAwareCoverageEnsemble( + 2, 2, 2, + RACKNAME_DISTANCE_FROM_LEAVES, + null, null, 1); + ensemble.addNode(new BookieNode(addr1.toBookieId(), repp.resolveNetworkLocation(addr1.toBookieId()))); + try { + repp.selectRandomInternal(null, 1, excludeBookies, null, ensemble); + fail("Should fail with not enough bookies exception"); + } catch (BKNotEnoughBookiesException ex) { + // + } + + conf.setDiskWeightBasedPlacementEnabled(false); + } + @Test public void testSingleRackWithEnforceMinNumRacks() throws Exception { repp.uninitalize(); @@ -693,26 +857,25 @@ public void testSingleRackWithEnforceMinNumRacks() throws Exception { clientConf.setEnforceMinNumRacksPerWriteQuorum(true); repp = new RackawareEnsemblePlacementPolicy(); repp.initialize(clientConf, Optional. empty(), timer, DISABLE_ALL, - NullStatsLogger.INSTANCE); + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); - - List ensemble; + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + List ensemble; try { - ensemble = repp.newEnsemble(3, 2, 2, null, new HashSet<>()); + ensemble = repp.newEnsemble(3, 2, 2, null, new HashSet<>()).getResult(); fail("Should get not enough bookies exception since there is only one rack."); } catch (BKNotEnoughBookiesException bnebe) { } try { - ensemble = repp.newEnsemble(3, 2, 2, new HashSet<>(), EnsembleForReplacementWithNoConstraints.INSTANCE, - TruePredicate.INSTANCE); + ensemble = repp.newEnsemble(3, 2, 2, new HashSet<>(), + EnsembleForReplacementWithNoConstraints.INSTANCE, TruePredicate.INSTANCE).getResult(); fail("Should get not enough bookies exception since there is only one rack."); } catch (BKNotEnoughBookiesException bnebe) { } @@ -720,64 +883,96 @@ public void testSingleRackWithEnforceMinNumRacks() throws Exception { @Test public void testNewEnsembleWithEnforceMinNumRacks() throws Exception { + String defaultRackForThisTest = NetworkTopology.DEFAULT_REGION_AND_RACK; repp.uninitalize(); + updateMyRack(defaultRackForThisTest); int minNumRacksPerWriteQuorum = 4; ClientConfiguration clientConf = new ClientConfiguration(conf); clientConf.setMinNumRacksPerWriteQuorum(minNumRacksPerWriteQuorum); // set enforceMinNumRacksPerWriteQuorum clientConf.setEnforceMinNumRacksPerWriteQuorum(true); + TestStatsProvider statsProvider = new TestStatsProvider(); + TestStatsLogger statsLogger = statsProvider.getStatsLogger(""); repp = new RackawareEnsemblePlacementPolicy(); - repp.initialize(clientConf, Optional. empty(), timer, DISABLE_ALL, - NullStatsLogger.INSTANCE); - repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); + repp.initialize(clientConf, Optional. empty(), timer, + DISABLE_ALL, statsLogger, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + repp.withDefaultRack(defaultRackForThisTest); + Gauge numBookiesInDefaultRackGauge = statsLogger + .getGauge(BookKeeperClientStats.NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK); int numOfRacks = 3; int numOfBookiesPerRack = 5; - BookieSocketAddress[] bookieSocketAddresses = new BookieSocketAddress[numOfRacks * numOfBookiesPerRack]; + BookieId[] bookieSocketAddresses = new BookieId[numOfRacks * numOfBookiesPerRack]; for (int i = 0; i < numOfRacks; i++) { for (int j = 0; j < numOfBookiesPerRack; j++) { int index = i * numOfBookiesPerRack + j; - bookieSocketAddresses[index] = new BookieSocketAddress("128.0.0." + index, 3181); - StaticDNSResolver.addNodeToRack(bookieSocketAddresses[index].getHostName(), "/default-region/r" + i); + bookieSocketAddresses[index] = new BookieSocketAddress("128.0.0." + index, 3181).toBookieId(); + StaticDNSResolver.addNodeToRack("128.0.0." + index, "/default-region/r" + i); } } - repp.onClusterChanged(new HashSet(Arrays.asList(bookieSocketAddresses)), - new HashSet()); + int numOfBookiesInDefaultRack = 5; + BookieId[] bookieSocketAddressesInDefaultRack = new BookieId[numOfBookiesInDefaultRack]; + for (int i = 0; i < numOfBookiesInDefaultRack; i++) { + bookieSocketAddressesInDefaultRack[i] = new BookieSocketAddress("128.0.0." + (100 + i), 3181).toBookieId(); + StaticDNSResolver.addNodeToRack("128.0.0." + (100 + i), + defaultRackForThisTest); + } + + List nonDefaultRackBookiesList = Arrays.asList(bookieSocketAddresses); + List defaultRackBookiesList = Arrays.asList(bookieSocketAddressesInDefaultRack); + Set writableBookies = new HashSet(nonDefaultRackBookiesList); + writableBookies.addAll(defaultRackBookiesList); + repp.onClusterChanged(writableBookies, new HashSet()); + assertEquals("NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK guage value", numOfBookiesInDefaultRack, + numBookiesInDefaultRackGauge.getSample()); try { + // this newEnsemble call will exclude default rack bookies repp.newEnsemble(8, 4, 4, null, new HashSet<>()); - fail("Should get not enough bookies exception since there are only 3 racks"); + fail("Should get not enough bookies exception since there are only 3 non-default racks"); } catch (BKNotEnoughBookiesException bnebe) { } try { - repp.newEnsemble(8, 4, 4, new HashSet<>(), + repp.newEnsemble(8, 4, 4, new HashSet<>(defaultRackBookiesList), EnsembleForReplacementWithNoConstraints.INSTANCE, TruePredicate.INSTANCE); - fail("Should get not enough bookies exception since there are only 3 racks"); + fail("Should get not enough bookies exception since there are only 3 non-default racks" + + " and defaultrack bookies are excluded"); } catch (BKNotEnoughBookiesException bnebe) { } /* * Though minNumRacksPerWriteQuorum is set to 4, since writeQuorum is 3 - * and there are enough bookies in 3 racks, this newEnsemble calls should - * succeed. + * and there are enough bookies in 3 racks, this newEnsemble calls + * should succeed. */ - List ensemble; + EnsemblePlacementPolicy.PlacementResult> ensembleResponse; + List ensemble; + PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy; int ensembleSize = numOfRacks * numOfBookiesPerRack; int writeQuorumSize = numOfRacks; int ackQuorumSize = numOfRacks; - ensemble = repp.newEnsemble(ensembleSize, writeQuorumSize, ackQuorumSize, null, new HashSet<>()); + ensembleResponse = repp.newEnsemble(ensembleSize, writeQuorumSize, ackQuorumSize, null, new HashSet<>()); + ensemble = ensembleResponse.getResult(); + isEnsembleAdheringToPlacementPolicy = ensembleResponse.getAdheringToPolicy(); assertEquals("Number of writeQuorum sets covered", ensembleSize, - getNumCoveredWriteQuorums(ensemble, writeQuorumSize, clientConf.getMinNumRacksPerWriteQuorum())); - - ensemble = repp.newEnsemble(ensembleSize, writeQuorumSize, ackQuorumSize, new HashSet<>(), - EnsembleForReplacementWithNoConstraints.INSTANCE, TruePredicate.INSTANCE); + getNumCoveredWriteQuorums(ensemble, writeQuorumSize, clientConf.getMinNumRacksPerWriteQuorum(), + repp.bookieAddressResolver)); + assertEquals(PlacementPolicyAdherence.MEETS_STRICT, isEnsembleAdheringToPlacementPolicy); + + ensembleResponse = repp.newEnsemble(ensembleSize, writeQuorumSize, ackQuorumSize, + new HashSet<>(defaultRackBookiesList), EnsembleForReplacementWithNoConstraints.INSTANCE, + TruePredicate.INSTANCE); + ensemble = ensembleResponse.getResult(); + isEnsembleAdheringToPlacementPolicy = ensembleResponse.getAdheringToPolicy(); assertEquals("Number of writeQuorum sets covered", ensembleSize, - getNumCoveredWriteQuorums(ensemble, writeQuorumSize, clientConf.getMinNumRacksPerWriteQuorum())); + getNumCoveredWriteQuorums(ensemble, writeQuorumSize, clientConf.getMinNumRacksPerWriteQuorum(), + repp.bookieAddressResolver)); + assertEquals(PlacementPolicyAdherence.MEETS_STRICT, isEnsembleAdheringToPlacementPolicy); } @Test @@ -791,7 +986,7 @@ public void testNewEnsembleWithSufficientRacksAndEnforceMinNumRacks() throws Exc clientConf.setEnforceMinNumRacksPerWriteQuorum(true); repp = new RackawareEnsemblePlacementPolicy(); repp.initialize(clientConf, Optional. empty(), timer, DISABLE_ALL, - NullStatsLogger.INSTANCE); + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); int writeQuorumSize = 3; @@ -800,19 +995,19 @@ public void testNewEnsembleWithSufficientRacksAndEnforceMinNumRacks() throws Exc int numOfRacks = 2 * effectiveMinNumRacksPerWriteQuorum - 1; int numOfBookiesPerRack = 20; - BookieSocketAddress[] bookieSocketAddresses = new BookieSocketAddress[numOfRacks * numOfBookiesPerRack]; + BookieId[] bookieSocketAddresses = new BookieId[numOfRacks * numOfBookiesPerRack]; for (int i = 0; i < numOfRacks; i++) { for (int j = 0; j < numOfBookiesPerRack; j++) { int index = i * numOfBookiesPerRack + j; - bookieSocketAddresses[index] = new BookieSocketAddress("128.0.0." + index, 3181); - StaticDNSResolver.addNodeToRack(bookieSocketAddresses[index].getHostName(), "/default-region/r" + i); + bookieSocketAddresses[index] = new BookieSocketAddress("128.0.0." + index, 3181).toBookieId(); + StaticDNSResolver.addNodeToRack("128.0.0." + index, "/default-region/r" + i); } } - Set addrs = new HashSet(); - repp.onClusterChanged(new HashSet(Arrays.asList(bookieSocketAddresses)), - new HashSet()); + Set addrs = new HashSet(); + repp.onClusterChanged(new HashSet(Arrays.asList(bookieSocketAddresses)), + new HashSet()); /* * in this scenario we have enough number of racks (2 * @@ -821,65 +1016,100 @@ public void testNewEnsembleWithSufficientRacksAndEnforceMinNumRacks() throws Exc * ensembleSizes (as long as there are enough number of bookies in each * rack). */ - List ensemble; + EnsemblePlacementPolicy.PlacementResult> ensembleResponse; + List ensemble; + PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy; for (int ensembleSize = effectiveMinNumRacksPerWriteQuorum; ensembleSize < 40; ensembleSize++) { - ensemble = repp.newEnsemble(ensembleSize, writeQuorumSize, ackQuorumSize, null, new HashSet<>()); + ensembleResponse = repp.newEnsemble(ensembleSize, writeQuorumSize, ackQuorumSize, null, new HashSet<>()); + ensemble = ensembleResponse.getResult(); + isEnsembleAdheringToPlacementPolicy = ensembleResponse.getAdheringToPolicy(); assertEquals("Number of writeQuorum sets covered", ensembleSize, - getNumCoveredWriteQuorums(ensemble, writeQuorumSize, clientConf.getMinNumRacksPerWriteQuorum())); + getNumCoveredWriteQuorums(ensemble, writeQuorumSize, clientConf.getMinNumRacksPerWriteQuorum(), + repp.bookieAddressResolver)); + assertEquals(PlacementPolicyAdherence.MEETS_STRICT, isEnsembleAdheringToPlacementPolicy); - ensemble = repp.newEnsemble(ensembleSize, writeQuorumSize, ackQuorumSize, new HashSet<>(), + ensembleResponse = repp.newEnsemble(ensembleSize, writeQuorumSize, ackQuorumSize, new HashSet<>(), EnsembleForReplacementWithNoConstraints.INSTANCE, TruePredicate.INSTANCE); + ensemble = ensembleResponse.getResult(); + isEnsembleAdheringToPlacementPolicy = ensembleResponse.getAdheringToPolicy(); assertEquals("Number of writeQuorum sets covered", ensembleSize, - getNumCoveredWriteQuorums(ensemble, writeQuorumSize, clientConf.getMinNumRacksPerWriteQuorum())); + getNumCoveredWriteQuorums(ensemble, writeQuorumSize, clientConf.getMinNumRacksPerWriteQuorum(), + repp.bookieAddressResolver)); + assertEquals(PlacementPolicyAdherence.MEETS_STRICT, isEnsembleAdheringToPlacementPolicy); } } @Test public void testReplaceBookieWithEnforceMinNumRacks() throws Exception { + String defaultRackForThisTest = NetworkTopology.DEFAULT_REGION_AND_RACK; repp.uninitalize(); + updateMyRack(defaultRackForThisTest); int minNumRacksPerWriteQuorum = 4; ClientConfiguration clientConf = new ClientConfiguration(conf); clientConf.setMinNumRacksPerWriteQuorum(minNumRacksPerWriteQuorum); // set enforceMinNumRacksPerWriteQuorum clientConf.setEnforceMinNumRacksPerWriteQuorum(true); + TestStatsProvider statsProvider = new TestStatsProvider(); + TestStatsLogger statsLogger = statsProvider.getStatsLogger(""); repp = new RackawareEnsemblePlacementPolicy(); repp.initialize(clientConf, Optional. empty(), timer, DISABLE_ALL, - NullStatsLogger.INSTANCE); - repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); + statsLogger, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + repp.withDefaultRack(defaultRackForThisTest); + Gauge numBookiesInDefaultRackGauge = statsLogger + .getGauge(BookKeeperClientStats.NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK); int numOfRacks = 3; int numOfBookiesPerRack = 5; - Set bookieSocketAddresses = new HashSet(); - Map bookieRackMap = new HashMap(); - BookieSocketAddress bookieAddress; + Set bookieSocketAddresses = new HashSet(); + Map bookieRackMap = new HashMap(); + BookieId bookieAddress; String rack; for (int i = 0; i < numOfRacks; i++) { for (int j = 0; j < numOfBookiesPerRack; j++) { int index = i * numOfBookiesPerRack + j; - bookieAddress = new BookieSocketAddress("128.0.0." + index, 3181); + bookieAddress = new BookieSocketAddress("128.0.0." + index, 3181).toBookieId(); rack = "/default-region/r" + i; - StaticDNSResolver.addNodeToRack(bookieAddress.getHostName(), rack); + StaticDNSResolver.addNodeToRack("128.0.0." + index, rack); bookieSocketAddresses.add(bookieAddress); bookieRackMap.put(bookieAddress, rack); } } + /* + * bookies in this default rack should not be returned for replacebookie + * response. + */ + int numOfBookiesInDefaultRack = 5; + BookieId[] bookieSocketAddressesInDefaultRack = new BookieId[numOfBookiesInDefaultRack]; + for (int i = 0; i < numOfBookiesInDefaultRack; i++) { + bookieSocketAddressesInDefaultRack[i] = new BookieSocketAddress("127.0.0." + (i + 100), 3181).toBookieId(); + StaticDNSResolver.addNodeToRack("127.0.0." + (i + 100), + defaultRackForThisTest); + } - repp.onClusterChanged(bookieSocketAddresses, new HashSet()); + Set nonDefaultRackBookiesList = bookieSocketAddresses; + List defaultRackBookiesList = Arrays.asList(bookieSocketAddressesInDefaultRack); + Set writableBookies = new HashSet(nonDefaultRackBookiesList); + writableBookies.addAll(defaultRackBookiesList); + repp.onClusterChanged(writableBookies, new HashSet()); + assertEquals("NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK guage value", numOfBookiesInDefaultRack, + numBookiesInDefaultRackGauge.getSample()); /* * Though minNumRacksPerWriteQuorum is set to 4, since writeQuorum is 3 * and there are enough bookies in 3 racks, this newEnsemble call should * succeed. */ - List ensemble; + EnsemblePlacementPolicy.PlacementResult> ensembleResponse; + List ensemble; int ensembleSize = numOfRacks * numOfBookiesPerRack; int writeQuorumSize = numOfRacks; int ackQuorumSize = numOfRacks; - ensemble = repp.newEnsemble(ensembleSize, writeQuorumSize, ackQuorumSize, null, new HashSet<>()); + ensembleResponse = repp.newEnsemble(ensembleSize, writeQuorumSize, ackQuorumSize, null, new HashSet<>()); + ensemble = ensembleResponse.getResult(); - BookieSocketAddress bookieInEnsembleToBeReplaced = ensemble.get(7); + BookieId bookieInEnsembleToBeReplaced = ensemble.get(7); // get rack of some other bookie String rackOfOtherBookieInEnsemble = bookieRackMap.get(ensemble.get(8)); BookieSocketAddress newBookieAddress1 = new BookieSocketAddress("128.0.0.100", 3181); @@ -888,13 +1118,16 @@ public void testReplaceBookieWithEnforceMinNumRacks() throws Exception { * ensemble */ StaticDNSResolver.addNodeToRack(newBookieAddress1.getHostName(), rackOfOtherBookieInEnsemble); - bookieSocketAddresses.add(newBookieAddress1); - bookieRackMap.put(newBookieAddress1, rackOfOtherBookieInEnsemble); + bookieSocketAddresses.add(newBookieAddress1.toBookieId()); + writableBookies.add(newBookieAddress1.toBookieId()); + bookieRackMap.put(newBookieAddress1.toBookieId(), rackOfOtherBookieInEnsemble); - repp.onClusterChanged(bookieSocketAddresses, new HashSet()); + repp.onClusterChanged(writableBookies, new HashSet()); + assertEquals("NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK guage value", numOfBookiesInDefaultRack, + numBookiesInDefaultRackGauge.getSample()); try { repp.replaceBookie(ensembleSize, writeQuorumSize, ackQuorumSize, null, - new HashSet(ensemble), bookieInEnsembleToBeReplaced, new HashSet<>()); + ensemble, bookieInEnsembleToBeReplaced, new HashSet<>()); fail("Should get not enough bookies exception since there are no more bookies in rack" + "of 'bookieInEnsembleToReplace'" + "and new bookie added belongs to the rack of some other bookie in the ensemble"); @@ -908,24 +1141,35 @@ public void testReplaceBookieWithEnforceMinNumRacks() throws Exception { * add the newBookie to a new rack. */ StaticDNSResolver.addNodeToRack(newBookieAddress2.getHostName(), newRack); - bookieSocketAddresses.add(newBookieAddress2); - bookieRackMap.put(newBookieAddress2, newRack); + bookieSocketAddresses.add(newBookieAddress2.toBookieId()); + writableBookies.add(newBookieAddress2.toBookieId()); + bookieRackMap.put(newBookieAddress2.toBookieId(), newRack); - repp.onClusterChanged(bookieSocketAddresses, new HashSet()); + repp.onClusterChanged(writableBookies, new HashSet()); + assertEquals("NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK guage value", numOfBookiesInDefaultRack, + numBookiesInDefaultRackGauge.getSample()); /* * this replaceBookie should succeed, because a new bookie is added to a * new rack. */ - BookieSocketAddress replacedBookieAddress = repp.replaceBookie(ensembleSize, writeQuorumSize, ackQuorumSize, - null, new HashSet(ensemble), bookieInEnsembleToBeReplaced, new HashSet<>()); - assertEquals("It should be newBookieAddress2", newBookieAddress2, replacedBookieAddress); - - Set bookiesToExclude = new HashSet<>(); - bookiesToExclude.add(newBookieAddress2); - repp.onClusterChanged(bookieSocketAddresses, new HashSet()); + EnsemblePlacementPolicy.PlacementResult replaceBookieResponse; + BookieId replacedBookieAddress; + PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy; + replaceBookieResponse = repp.replaceBookie(ensembleSize, writeQuorumSize, ackQuorumSize, null, ensemble, + bookieInEnsembleToBeReplaced, new HashSet<>()); + replacedBookieAddress = replaceBookieResponse.getResult(); + isEnsembleAdheringToPlacementPolicy = replaceBookieResponse.getAdheringToPolicy(); + assertEquals("It should be newBookieAddress2", newBookieAddress2.toBookieId(), replacedBookieAddress); + assertEquals(PlacementPolicyAdherence.MEETS_STRICT, isEnsembleAdheringToPlacementPolicy); + + Set bookiesToExclude = new HashSet<>(); + bookiesToExclude.add(newBookieAddress2.toBookieId()); + repp.onClusterChanged(writableBookies, new HashSet()); + assertEquals("NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK guage value", numOfBookiesInDefaultRack, + numBookiesInDefaultRackGauge.getSample()); try { - repp.replaceBookie(ensembleSize, writeQuorumSize, ackQuorumSize, null, - new HashSet(ensemble), bookieInEnsembleToBeReplaced, bookiesToExclude); + repp.replaceBookie(ensembleSize, writeQuorumSize, ackQuorumSize, null, ensemble, + bookieInEnsembleToBeReplaced, bookiesToExclude); fail("Should get not enough bookies exception since the only available bookie to replace" + "is added to excludedBookies list"); } catch (BKNotEnoughBookiesException bnebe) { @@ -939,18 +1183,24 @@ public void testReplaceBookieWithEnforceMinNumRacks() throws Exception { * add the newBookie to rack of the bookie to be replaced. */ StaticDNSResolver.addNodeToRack(newBookieAddress3.getHostName(), rackOfBookieToBeReplaced); - bookieSocketAddresses.add(newBookieAddress3); - bookieRackMap.put(newBookieAddress3, rackOfBookieToBeReplaced); + bookieSocketAddresses.add(newBookieAddress3.toBookieId()); + writableBookies.add(newBookieAddress3.toBookieId()); + bookieRackMap.put(newBookieAddress3.toBookieId(), rackOfBookieToBeReplaced); - repp.onClusterChanged(bookieSocketAddresses, new HashSet()); + repp.onClusterChanged(writableBookies, new HashSet()); + assertEquals("NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK guage value", numOfBookiesInDefaultRack, + numBookiesInDefaultRackGauge.getSample()); /* * here we have added new bookie to the rack of the bookie to be * replaced, so we should be able to replacebookie though * newBookieAddress2 is added to excluded bookies list. */ - replacedBookieAddress = repp.replaceBookie(ensembleSize, writeQuorumSize, ackQuorumSize, null, - new HashSet(ensemble), bookieInEnsembleToBeReplaced, bookiesToExclude); - assertEquals("It should be newBookieAddress3", newBookieAddress3, replacedBookieAddress); + replaceBookieResponse = repp.replaceBookie(ensembleSize, writeQuorumSize, ackQuorumSize, null, + ensemble, bookieInEnsembleToBeReplaced, bookiesToExclude); + replacedBookieAddress = replaceBookieResponse.getResult(); + isEnsembleAdheringToPlacementPolicy = replaceBookieResponse.getAdheringToPolicy(); + assertEquals("It should be newBookieAddress3", newBookieAddress3.toBookieId(), replacedBookieAddress); + assertEquals(PlacementPolicyAdherence.MEETS_STRICT, isEnsembleAdheringToPlacementPolicy); } @Test @@ -964,30 +1214,30 @@ public void testSelectBookieFromNetworkLoc() throws Exception { clientConf.setEnforceMinNumRacksPerWriteQuorum(true); repp = new RackawareEnsemblePlacementPolicy(); repp.initialize(clientConf, Optional. empty(), timer, DISABLE_ALL, - NullStatsLogger.INSTANCE); + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); int numOfRacks = 3; int numOfBookiesPerRack = 5; String[] rackLocationNames = new String[numOfRacks]; - List bookieSocketAddresses = new ArrayList(); - Map bookieRackMap = new HashMap(); - BookieSocketAddress bookieAddress; + List bookieSocketAddresses = new ArrayList(); + Map bookieRackMap = new HashMap(); + BookieId bookieAddress; for (int i = 0; i < numOfRacks; i++) { rackLocationNames[i] = "/default-region/r" + i; for (int j = 0; j < numOfBookiesPerRack; j++) { int index = i * numOfBookiesPerRack + j; - bookieAddress = new BookieSocketAddress("128.0.0." + index, 3181); - StaticDNSResolver.addNodeToRack(bookieAddress.getHostName(), rackLocationNames[i]); + bookieAddress = new BookieSocketAddress("128.0.0." + index, 3181).toBookieId(); + StaticDNSResolver.addNodeToRack("128.0.0." + index, rackLocationNames[i]); bookieSocketAddresses.add(bookieAddress); bookieRackMap.put(bookieAddress, rackLocationNames[i]); } } String nonExistingRackLocation = "/default-region/r25"; - repp.onClusterChanged(new HashSet(bookieSocketAddresses), - new HashSet()); + repp.onClusterChanged(new HashSet(bookieSocketAddresses), + new HashSet()); String rack = bookieRackMap.get(bookieSocketAddresses.get(0)); BookieNode bookieNode = repp.selectFromNetworkLocation(rack, new HashSet(), TruePredicate.INSTANCE, @@ -1008,7 +1258,7 @@ public void testSelectBookieFromNetworkLoc() throws Exception { repp.selectFromNetworkLocation(nonExistingRackLocation, new HashSet(), TruePredicate.INSTANCE, EnsembleForReplacementWithNoConstraints.INSTANCE, true); - Set excludeBookiesOfRackR0 = new HashSet(); + Set excludeBookiesOfRackR0 = new HashSet(); for (int i = 0; i < numOfBookiesPerRack; i++) { excludeBookiesOfRackR0.add(bookieSocketAddresses.get(i)); } @@ -1042,31 +1292,31 @@ public void testSelectBookieFromExcludingRacks() throws Exception { clientConf.setEnforceMinNumRacksPerWriteQuorum(true); repp = new RackawareEnsemblePlacementPolicy(); repp.initialize(clientConf, Optional. empty(), timer, DISABLE_ALL, - NullStatsLogger.INSTANCE); + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); int numOfRacks = 3; int numOfBookiesPerRack = 5; String[] rackLocationNames = new String[numOfRacks]; - List bookieSocketAddresses = new ArrayList(); - Map bookieRackMap = new HashMap(); - BookieSocketAddress bookieAddress; + List bookieSocketAddresses = new ArrayList(); + Map bookieRackMap = new HashMap(); + BookieId bookieAddress; for (int i = 0; i < numOfRacks; i++) { rackLocationNames[i] = "/default-region/r" + i; for (int j = 0; j < numOfBookiesPerRack; j++) { int index = i * numOfBookiesPerRack + j; - bookieAddress = new BookieSocketAddress("128.0.0." + index, 3181); - StaticDNSResolver.addNodeToRack(bookieAddress.getHostName(), rackLocationNames[i]); + bookieAddress = new BookieSocketAddress("128.0.0." + index, 3181).toBookieId(); + StaticDNSResolver.addNodeToRack("128.0.0." + index, rackLocationNames[i]); bookieSocketAddresses.add(bookieAddress); bookieRackMap.put(bookieAddress, rackLocationNames[i]); } } - repp.onClusterChanged(new HashSet(bookieSocketAddresses), - new HashSet()); + repp.onClusterChanged(new HashSet(bookieSocketAddresses), + new HashSet()); - Set excludeBookiesOfRackR0 = new HashSet(); + Set excludeBookiesOfRackR0 = new HashSet(); for (int i = 0; i < numOfBookiesPerRack; i++) { excludeBookiesOfRackR0.add(bookieSocketAddresses.get(i)); } @@ -1110,32 +1360,32 @@ public void testSelectBookieFromNetworkLocAndExcludingRacks() throws Exception { clientConf.setEnforceMinNumRacksPerWriteQuorum(true); repp = new RackawareEnsemblePlacementPolicy(); repp.initialize(clientConf, Optional. empty(), timer, DISABLE_ALL, - NullStatsLogger.INSTANCE); + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); int numOfRacks = 3; int numOfBookiesPerRack = 5; String[] rackLocationNames = new String[numOfRacks]; - List bookieSocketAddresses = new ArrayList(); - Map bookieRackMap = new HashMap(); - BookieSocketAddress bookieAddress; + List bookieSocketAddresses = new ArrayList(); + Map bookieRackMap = new HashMap(); + BookieId bookieAddress; for (int i = 0; i < numOfRacks; i++) { rackLocationNames[i] = "/default-region/r" + i; for (int j = 0; j < numOfBookiesPerRack; j++) { int index = i * numOfBookiesPerRack + j; - bookieAddress = new BookieSocketAddress("128.0.0." + index, 3181); - StaticDNSResolver.addNodeToRack(bookieAddress.getHostName(), rackLocationNames[i]); + bookieAddress = new BookieSocketAddress("128.0.0." + index, 3181).toBookieId(); + StaticDNSResolver.addNodeToRack("128.0.0." + index, rackLocationNames[i]); bookieSocketAddresses.add(bookieAddress); bookieRackMap.put(bookieAddress, rackLocationNames[i]); } } String nonExistingRackLocation = "/default-region/r25"; - repp.onClusterChanged(new HashSet(bookieSocketAddresses), - new HashSet()); + repp.onClusterChanged(new HashSet(bookieSocketAddresses), + new HashSet()); - Set excludeBookiesOfRackR0 = new HashSet(); + Set excludeBookiesOfRackR0 = new HashSet(); for (int i = 0; i < numOfBookiesPerRack; i++) { excludeBookiesOfRackR0.add(bookieSocketAddresses.get(i)); } @@ -1176,6 +1426,104 @@ public void testSelectBookieFromNetworkLocAndExcludingRacks() throws Exception { || rackLocationNames[2].equals(bookieNode.getNetworkLocation())); } + @Test + public void testSelectBookieByExcludingRacksAndBookies() throws Exception { + repp.uninitalize(); + + int minNumRacksPerWriteQuorum = 4; + ClientConfiguration clientConf = new ClientConfiguration(conf); + clientConf.setMinNumRacksPerWriteQuorum(minNumRacksPerWriteQuorum); + // set enforceMinNumRacksPerWriteQuorum + clientConf.setEnforceMinNumRacksPerWriteQuorum(true); + /* + * Durability is enforced + * + * When durability is being enforced; we must not violate the predicate + * even when selecting a random bookie; as durability guarantee is not + * best effort; correctness is implied by it + */ + repp = new RackawareEnsemblePlacementPolicy(true); + repp.initialize(clientConf, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); + + int numOfRacks = 3; + int numOfBookiesPerRack = 5; + String[] rackLocationNames = new String[numOfRacks]; + List bookieSocketAddresses = new ArrayList(); + Map bookieRackMap = new HashMap(); + BookieId bookieAddress; + + for (int i = 0; i < numOfRacks; i++) { + rackLocationNames[i] = "/default-region/r" + i; + for (int j = 0; j < numOfBookiesPerRack; j++) { + int index = i * numOfBookiesPerRack + j; + bookieAddress = new BookieSocketAddress("128.0.0." + index, 3181).toBookieId(); + StaticDNSResolver.addNodeToRack("128.0.0." + index, rackLocationNames[i]); + bookieSocketAddresses.add(bookieAddress); + bookieRackMap.put(bookieAddress, rackLocationNames[i]); + } + } + + repp.onClusterChanged(new HashSet(bookieSocketAddresses), + new HashSet()); + + Set excludeBookiesOfRackR0 = new HashSet(); + for (int i = 0; i < numOfBookiesPerRack; i++) { + excludeBookiesOfRackR0.add(bookieSocketAddresses.get(i)); + } + + Set excludeBookieNodesOfRackR0 = repp.convertBookiesToNodes(excludeBookiesOfRackR0); + + Set excludeRackR1 = new HashSet(); + excludeRackR1.add(rackLocationNames[1]); + + BookieNode nodeSelected; + nodeSelected = repp.selectFromNetworkLocation(excludeRackR1, excludeBookieNodesOfRackR0, TruePredicate.INSTANCE, + EnsembleForReplacementWithNoConstraints.INSTANCE, false); + assertEquals("BookieNode should be from Rack2", rackLocationNames[2], nodeSelected.getNetworkLocation()); + + try { + /* + * durability is enforced, so false predicate will reject all + * bookies. + */ + repp.selectFromNetworkLocation(excludeRackR1, excludeBookieNodesOfRackR0, (candidate, chosenBookies) -> { + return false; + }, EnsembleForReplacementWithNoConstraints.INSTANCE, false); + fail("Should get not enough bookies exception since we are using false predicate"); + } catch (BKNotEnoughBookiesException bnebe) { + // this is expected + } + + try { + /* + * Using ensemble which rejects all the nodes. + */ + repp.selectFromNetworkLocation(excludeRackR1, excludeBookieNodesOfRackR0, TruePredicate.INSTANCE, + new Ensemble() { + + @Override + public boolean addNode(BookieNode node) { + return false; + } + + @Override + public List toList() { + return null; + } + + @Override + public boolean validate() { + return false; + } + }, false); + fail("Should get not enough bookies exception since ensemble rejects all the nodes"); + } catch (BKNotEnoughBookiesException bnebe) { + // this is expected + } + } + @Test public void testNewEnsembleWithMultipleRacks() throws Exception { BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.1", 3181); @@ -1188,43 +1536,308 @@ public void testNewEnsembleWithMultipleRacks() throws Exception { StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/default-region/r2"); StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/default-region/r2"); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); try { int ensembleSize = 3; int writeQuorumSize = 2; int acqQuorumSize = 2; - List ensemble = repp.newEnsemble(ensembleSize, writeQuorumSize, acqQuorumSize, - null, new HashSet<>()); - int numCovered = getNumCoveredWriteQuorums(ensemble, 2, conf.getMinNumRacksPerWriteQuorum()); + EnsemblePlacementPolicy.PlacementResult> ensembleResponse = + repp.newEnsemble(ensembleSize, writeQuorumSize, + acqQuorumSize, null, new HashSet<>()); + List ensemble = ensembleResponse.getResult(); + PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy = ensembleResponse.getAdheringToPolicy(); + int numCovered = getNumCoveredWriteQuorums(ensemble, writeQuorumSize, + conf.getMinNumRacksPerWriteQuorum(), repp.bookieAddressResolver); assertTrue(numCovered >= 1 && numCovered < 3); + assertEquals(PlacementPolicyAdherence.FAIL, isEnsembleAdheringToPlacementPolicy); ensembleSize = 4; - List ensemble2 = repp.newEnsemble(ensembleSize, writeQuorumSize, acqQuorumSize, - null, new HashSet<>()); - numCovered = getNumCoveredWriteQuorums(ensemble2, 2, conf.getMinNumRacksPerWriteQuorum()); + EnsemblePlacementPolicy.PlacementResult> ensembleResponse2 = + repp.newEnsemble(ensembleSize, writeQuorumSize, + acqQuorumSize, null, new HashSet<>()); + List ensemble2 = ensembleResponse2.getResult(); + PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy2 = ensembleResponse2.getAdheringToPolicy(); + numCovered = getNumCoveredWriteQuorums(ensemble2, writeQuorumSize, + conf.getMinNumRacksPerWriteQuorum(), repp.bookieAddressResolver); assertTrue(numCovered >= 1 && numCovered < 3); + assertEquals(PlacementPolicyAdherence.FAIL, isEnsembleAdheringToPlacementPolicy2); } catch (BKNotEnoughBookiesException bnebe) { fail("Should not get not enough bookies exception even there is only one rack."); } } + //see: https://github.com/apache/bookkeeper/issues/3722 + @Test + public void testNewEnsembleWithMultipleRacksWithCommonRack() throws Exception { + ClientConfiguration clientConf = new ClientConfiguration(conf); + clientConf.setEnforceMinNumRacksPerWriteQuorum(true); + clientConf.setMinNumRacksPerWriteQuorum(3); + repp.uninitalize(); + repp = new RackawareEnsemblePlacementPolicy(); + repp.initialize(clientConf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); + + BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.1", 3181); + BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.2", 3181); + BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.3", 3181); + BookieSocketAddress addr4 = new BookieSocketAddress("127.0.0.4", 3181); + BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.5", 3181); + BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.6", 3181); + BookieSocketAddress addr7 = new BookieSocketAddress("127.0.0.7", 3181); + BookieSocketAddress addr8 = new BookieSocketAddress("127.0.0.8", 3181); + BookieSocketAddress addr9 = new BookieSocketAddress("127.0.0.9", 3181); + BookieSocketAddress addr10 = new BookieSocketAddress("127.0.0.10", 3181); + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/default-region/r1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/default-region/r1"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/default-region/r1"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/default-region/r1"); + StaticDNSResolver.addNodeToRack(addr5.getHostName(), "/default-region/r1"); + StaticDNSResolver.addNodeToRack(addr6.getHostName(), "/default-region/r1"); + StaticDNSResolver.addNodeToRack(addr7.getHostName(), "/default-region/r1"); + StaticDNSResolver.addNodeToRack(addr8.getHostName(), "/default-region/r1"); + StaticDNSResolver.addNodeToRack(addr9.getHostName(), "/default-region/r2"); + StaticDNSResolver.addNodeToRack(addr10.getHostName(), "/default-region/r3"); + // Update cluster + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + addrs.add(addr6.toBookieId()); + addrs.add(addr7.toBookieId()); + addrs.add(addr8.toBookieId()); + addrs.add(addr9.toBookieId()); + addrs.add(addr10.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + + try { + int ensembleSize = 10; + int writeQuorumSize = 10; + int ackQuorumSize = 2; + + for (int i = 0; i < 50; ++i) { + Set excludeBookies = new HashSet<>(); + EnsemblePlacementPolicy.PlacementResult> ensembleResponse = + repp.newEnsemble(ensembleSize, writeQuorumSize, + ackQuorumSize, null, excludeBookies); + } + } catch (Exception e) { + fail("Can not new ensemble selection succeed"); + } + } + + @Test + public void testNewEnsembleWithMultipleRacksWithCommonRackFailed() throws Exception { + ClientConfiguration clientConf = new ClientConfiguration(conf); + clientConf.setEnforceMinNumRacksPerWriteQuorum(true); + clientConf.setMinNumRacksPerWriteQuorum(3); + repp.uninitalize(); + repp = new RackawareEnsemblePlacementPolicy(); + repp.initialize(clientConf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); + + BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.1", 3181); + BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.2", 3181); + BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.3", 3181); + BookieSocketAddress addr4 = new BookieSocketAddress("127.0.0.4", 3181); + BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.5", 3181); + BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.6", 3181); + BookieSocketAddress addr7 = new BookieSocketAddress("127.0.0.7", 3181); + BookieSocketAddress addr8 = new BookieSocketAddress("127.0.0.8", 3181); + BookieSocketAddress addr9 = new BookieSocketAddress("127.0.0.9", 3181); + BookieSocketAddress addr10 = new BookieSocketAddress("127.0.0.10", 3181); + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/default-region/r1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/default-region/r1"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/default-region/r1"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/default-region/r1"); + StaticDNSResolver.addNodeToRack(addr5.getHostName(), "/default-region/r1"); + StaticDNSResolver.addNodeToRack(addr6.getHostName(), "/default-region/r1"); + StaticDNSResolver.addNodeToRack(addr7.getHostName(), "/default-region/r1"); + StaticDNSResolver.addNodeToRack(addr8.getHostName(), "/default-region/r1"); + StaticDNSResolver.addNodeToRack(addr9.getHostName(), "/default-region/r1"); + StaticDNSResolver.addNodeToRack(addr10.getHostName(), "/default-region/r2"); + // Update cluster + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + addrs.add(addr6.toBookieId()); + addrs.add(addr7.toBookieId()); + addrs.add(addr8.toBookieId()); + addrs.add(addr9.toBookieId()); + addrs.add(addr10.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + + try { + int ensembleSize = 10; + int writeQuorumSize = 10; + int ackQuorumSize = 2; + + Set excludeBookies = new HashSet<>(); + EnsemblePlacementPolicy.PlacementResult> ensembleResponse = + repp.newEnsemble(ensembleSize, writeQuorumSize, + ackQuorumSize, null, excludeBookies); + fail("Can not new ensemble selection succeed"); + } catch (Exception e) { + assertTrue(e instanceof BKNotEnoughBookiesException); + } + } + + @Test + public void testNewEnsembleWithPickDifferentRack() throws Exception { + BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.1", 3181); + BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.2", 3181); + BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.3", 3181); + BookieSocketAddress addr4 = new BookieSocketAddress("127.0.0.4", 3181); + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/default-region/r1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/default-region/r1"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/default-region/r2"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/default-region/r3"); + // Update cluster + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + + int ensembleSize = 3; + int writeQuorumSize = 3; + int ackQuorumSize = 2; + + Set excludeBookies = new HashSet<>(); + + for (int i = 0; i < 50; ++i) { + EnsemblePlacementPolicy.PlacementResult> ensembleResponse = + repp.newEnsemble(ensembleSize, writeQuorumSize, + ackQuorumSize, null, excludeBookies); + List ensemble = ensembleResponse.getResult(); + if (ensemble.contains(addr1.toBookieId()) && ensemble.contains(addr2.toBookieId())) { + fail("addr1 and addr2 is same rack."); + } + } + + //addr4 shutdown. + addrs.remove(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + for (int i = 0; i < 50; ++i) { + EnsemblePlacementPolicy.PlacementResult> ensembleResponse = + repp.newEnsemble(ensembleSize, writeQuorumSize, + ackQuorumSize, null, excludeBookies); + List ensemble = ensembleResponse.getResult(); + assertTrue(ensemble.contains(addr1.toBookieId()) && ensemble.contains(addr2.toBookieId())); + } + } + + @Test + public void testNewEnsemblePickLocalRackBookiesByHostname() throws Exception { + testNewEnsemblePickLocalRackBookiesInternal(true); + } + + @Test + public void testNewEnsemblePickLocalRackBookiesByIP() throws Exception { + testNewEnsemblePickLocalRackBookiesInternal(false); + } + + public void testNewEnsemblePickLocalRackBookiesInternal(boolean useHostnameResolveLocalNodePlacementPolicy) + throws Exception { + BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.1", 3181); + BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.2", 3181); + BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.3", 3181); + BookieSocketAddress addr4 = new BookieSocketAddress("127.0.0.4", 3181); + BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.5", 3181); + BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.6", 3181); + BookieSocketAddress addr7 = new BookieSocketAddress("127.0.0.7", 3181); + + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/default-region/r1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/default-region/r2"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/default-region/r2"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/default-region/r2"); + StaticDNSResolver.addNodeToRack(addr5.getHostName(), "/default-region/r3"); + StaticDNSResolver.addNodeToRack(addr6.getHostName(), "/default-region/r4"); + StaticDNSResolver.addNodeToRack(addr7.getHostName(), "/default-region/r5"); + + String hostname = useHostnameResolveLocalNodePlacementPolicy + ? InetAddress.getLocalHost().getCanonicalHostName() : InetAddress.getLocalHost().getHostAddress(); + StaticDNSResolver.addNodeToRack(hostname, "/default-region/r1"); + if (useHostnameResolveLocalNodePlacementPolicy) { + conf.setUseHostnameResolveLocalNodePlacementPolicy(useHostnameResolveLocalNodePlacementPolicy); + } + + repp.initialize(conf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); + // Update cluster + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + addrs.add(addr6.toBookieId()); + addrs.add(addr7.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + + int ensembleSize = 3; + int writeQuorumSize = 3; + int ackQuorumSize = 2; + + Set excludeBookies = new HashSet<>(); + + for (int i = 0; i < 50000; ++i) { + EnsemblePlacementPolicy.PlacementResult> ensembleResponse = + repp.newEnsemble(ensembleSize, writeQuorumSize, + ackQuorumSize, null, excludeBookies); + List ensemble = ensembleResponse.getResult(); + if (!ensemble.contains(addr1.toBookieId())) { + fail("Failed to select bookie located on the same rack with bookie client"); + } + if (ensemble.contains(addr2.toBookieId()) && ensemble.contains(addr3.toBookieId())) { + fail("addr2 and addr3 is same rack."); + } + } + + //addr4 shutdown. + addrs.remove(addr5.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + for (int i = 0; i < 50000; ++i) { + EnsemblePlacementPolicy.PlacementResult> ensembleResponse = + repp.newEnsemble(ensembleSize, writeQuorumSize, + ackQuorumSize, null, excludeBookies); + List ensemble = ensembleResponse.getResult(); + if (!ensemble.contains(addr1.toBookieId())) { + fail("Failed to select bookie located on the same rack with bookie client"); + } + } + + } + @Test public void testMinNumRacksPerWriteQuorumOfRacks() throws Exception { int numOfRacksToCreate = 6; int numOfNodesInEachRack = 5; // Update cluster - Set addrs = new HashSet(); - BookieSocketAddress addr; + Set addrs = new HashSet(); + BookieId addr; for (int i = 0; i < numOfRacksToCreate; i++) { for (int j = 0; j < numOfNodesInEachRack; j++) { - addr = new BookieSocketAddress("128.0.0." + ((i * numOfNodesInEachRack) + j), 3181); + addr = new BookieSocketAddress("128.0.0." + ((i * numOfNodesInEachRack) + j), 3181).toBookieId(); // update dns mapping - StaticDNSResolver.addNodeToRack(addr.getHostName(), "/default-region/r" + i); + StaticDNSResolver.addNodeToRack("128.0.0." + ((i * numOfNodesInEachRack) + j), "/default-region/r" + i); addrs.add(addr); } } @@ -1267,19 +1880,24 @@ public void testMinNumRacksPerWriteQuorumOfRacks() throws Exception { } } - void validateNumOfWriteQuorumsCoveredInEnsembleCreation(Set addrs, + void validateNumOfWriteQuorumsCoveredInEnsembleCreation(Set addrs, int minNumRacksPerWriteQuorum, int ensembleSize, int writeQuorumSize) throws Exception { ClientConfiguration newConf = new ClientConfiguration(conf); newConf.setMinNumRacksPerWriteQuorum(minNumRacksPerWriteQuorum); repp = new RackawareEnsemblePlacementPolicy(); - repp.initialize(newConf, Optional. empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(newConf, Optional. empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); - repp.onClusterChanged(addrs, new HashSet()); - - List ensemble = repp.newEnsemble(ensembleSize, writeQuorumSize, writeQuorumSize, null, - new HashSet<>()); - int numCovered = getNumCoveredWriteQuorums(ensemble, writeQuorumSize, minNumRacksPerWriteQuorum); + repp.onClusterChanged(addrs, new HashSet()); + EnsemblePlacementPolicy.PlacementResult> ensembleResponse = + repp.newEnsemble(ensembleSize, writeQuorumSize, + writeQuorumSize, null, new HashSet<>()); + List ensemble = ensembleResponse.getResult(); + PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy = ensembleResponse.getAdheringToPolicy(); + int numCovered = getNumCoveredWriteQuorums(ensemble, writeQuorumSize, + minNumRacksPerWriteQuorum, repp.bookieAddressResolver); assertEquals("minimum number of racks covered for writequorum ensemble: " + ensemble, ensembleSize, numCovered); + assertEquals(PlacementPolicyAdherence.MEETS_STRICT, isEnsembleAdheringToPlacementPolicy); } @Test @@ -1303,28 +1921,39 @@ public void testNewEnsembleWithEnoughRacks() throws Exception { StaticDNSResolver.addNodeToRack(addr8.getHostName(), "/default-region/r4"); int availableNumOfRacks = 4; // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - addrs.add(addr5); - addrs.add(addr6); - addrs.add(addr7); - addrs.add(addr8); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + addrs.add(addr6.toBookieId()); + addrs.add(addr7.toBookieId()); + addrs.add(addr8.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); try { int ensembleSize = 3; int writeQuorumSize = 3; int ackQuorumSize = 2; - List ensemble1 = repp.newEnsemble(ensembleSize, writeQuorumSize, ackQuorumSize, - null, new HashSet<>()); - assertEquals(ensembleSize, getNumCoveredWriteQuorums(ensemble1, 2, conf.getMinNumRacksPerWriteQuorum())); + EnsemblePlacementPolicy.PlacementResult> ensembleResponse = + repp.newEnsemble(ensembleSize, writeQuorumSize, + ackQuorumSize, null, new HashSet<>()); + List ensemble1 = ensembleResponse.getResult(); + PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy1 = ensembleResponse.getAdheringToPolicy(); + assertEquals(ensembleSize, + getNumCoveredWriteQuorums(ensemble1, writeQuorumSize, conf.getMinNumRacksPerWriteQuorum(), + repp.bookieAddressResolver)); + assertEquals(PlacementPolicyAdherence.MEETS_STRICT, isEnsembleAdheringToPlacementPolicy1); ensembleSize = 4; writeQuorumSize = 4; - List ensemble2 = repp.newEnsemble(ensembleSize, writeQuorumSize, 2, null, - new HashSet<>()); - assertEquals(ensembleSize, getNumCoveredWriteQuorums(ensemble2, 2, conf.getMinNumRacksPerWriteQuorum())); + EnsemblePlacementPolicy.PlacementResult> ensembleResponse2 = + repp.newEnsemble(ensembleSize, writeQuorumSize, 2, null, new HashSet<>()); + List ensemble2 = ensembleResponse2.getResult(); + PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy2 = ensembleResponse2.getAdheringToPolicy(); + assertEquals(ensembleSize, + getNumCoveredWriteQuorums(ensemble2, writeQuorumSize, conf.getMinNumRacksPerWriteQuorum(), + repp.bookieAddressResolver)); + assertEquals(PlacementPolicyAdherence.MEETS_STRICT, isEnsembleAdheringToPlacementPolicy2); } catch (BKNotEnoughBookiesException bnebe) { fail("Should not get not enough bookies exception even there is only one rack."); } @@ -1345,14 +1974,14 @@ public void testRemoveBookieFromCluster() { StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/default-region/r2"); StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/default-region/r3"); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); - addrs.remove(addr1); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + addrs.remove(addr1.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); } @Test @@ -1371,38 +2000,47 @@ public void testWeightedPlacementAndReplaceBookieWithEnoughBookiesInSameRack() t StaticDNSResolver.addNodeToRack(addr4.getSocketAddress().getAddress().getHostAddress(), NetworkTopology.DEFAULT_REGION + "/r2"); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); int multiple = 10; conf.setDiskWeightBasedPlacementEnabled(true); conf.setBookieMaxWeightMultipleForWeightBasedPlacement(-1); // no max cap on weight - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); - repp.onClusterChanged(addrs, new HashSet()); - Map bookieInfoMap = new HashMap(); - bookieInfoMap.put(addr1, new BookieInfo(100L, 100L)); - bookieInfoMap.put(addr2, new BookieInfo(100L, 100L)); - bookieInfoMap.put(addr3, new BookieInfo(100L, 100L)); - bookieInfoMap.put(addr4, new BookieInfo(multiple * 100L, multiple * 100L)); + repp.onClusterChanged(addrs, new HashSet()); + Map bookieInfoMap = new HashMap(); + bookieInfoMap.put(addr1.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr2.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr3.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr4.toBookieId(), new BookieInfo(multiple * 100L, multiple * 100L)); repp.updateBookieInfo(bookieInfoMap); - Map selectionCounts = new HashMap(); - selectionCounts.put(addr3, 0L); - selectionCounts.put(addr4, 0L); + Map selectionCounts = new HashMap(); + selectionCounts.put(addr3.toBookieId(), 0L); + selectionCounts.put(addr4.toBookieId(), 0L); int numTries = 50000; - BookieSocketAddress replacedBookie; + EnsemblePlacementPolicy.PlacementResult replaceBookieResponse; + PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy; + BookieId replacedBookie; for (int i = 0; i < numTries; i++) { // replace node under r2 - replacedBookie = repp.replaceBookie(1, 1, 1, null, new HashSet<>(), addr2, new HashSet<>()); - assertTrue("replaced : " + replacedBookie, addr3.equals(replacedBookie) || addr4.equals(replacedBookie)); + replaceBookieResponse = repp.replaceBookie(1, 1, 1, null, new ArrayList<>(), + addr2.toBookieId(), new HashSet<>()); + replacedBookie = replaceBookieResponse.getResult(); + isEnsembleAdheringToPlacementPolicy = replaceBookieResponse.getAdheringToPolicy(); + assertTrue("replaced : " + replacedBookie, addr3.toBookieId().equals(replacedBookie) + || addr4.toBookieId().equals(replacedBookie)); + assertEquals(PlacementPolicyAdherence.MEETS_STRICT, isEnsembleAdheringToPlacementPolicy); selectionCounts.put(replacedBookie, selectionCounts.get(replacedBookie) + 1); } - double observedMultiple = ((double) selectionCounts.get(addr4) / (double) selectionCounts.get(addr3)); + double observedMultiple = ((double) selectionCounts.get(addr4.toBookieId()) + / (double) selectionCounts.get(addr3.toBookieId())); assertTrue("Weights not being honored " + observedMultiple, Math.abs(observedMultiple - multiple) < 1); } @@ -1426,59 +2064,71 @@ public void testWeightedPlacementAndReplaceBookieWithoutEnoughBookiesInSameRack( StaticDNSResolver.addNodeToRack(addr4.getSocketAddress().getAddress().getHostAddress(), NetworkTopology.DEFAULT_REGION + "/r4"); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr0); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); + Set addrs = new HashSet(); + addrs.add(addr0.toBookieId()); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); int multiple = 10, maxMultiple = 4; conf.setDiskWeightBasedPlacementEnabled(true); conf.setBookieMaxWeightMultipleForWeightBasedPlacement(maxMultiple); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); - repp.onClusterChanged(addrs, new HashSet()); - Map bookieInfoMap = new HashMap(); - bookieInfoMap.put(addr0, new BookieInfo(50L, 50L)); - bookieInfoMap.put(addr1, new BookieInfo(100L, 100L)); - bookieInfoMap.put(addr2, new BookieInfo(100L, 100L)); - bookieInfoMap.put(addr3, new BookieInfo(200L, 200L)); - bookieInfoMap.put(addr4, new BookieInfo(multiple * 50L, multiple * 50L)); + repp.onClusterChanged(addrs, new HashSet()); + Map bookieInfoMap = new HashMap(); + bookieInfoMap.put(addr0.toBookieId(), new BookieInfo(50L, 50L)); + bookieInfoMap.put(addr1.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr2.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr3.toBookieId(), new BookieInfo(200L, 200L)); + bookieInfoMap.put(addr4.toBookieId(), new BookieInfo(multiple * 50L, multiple * 50L)); repp.updateBookieInfo(bookieInfoMap); - Map selectionCounts = new HashMap(); - selectionCounts.put(addr0, 0L); - selectionCounts.put(addr1, 0L); - selectionCounts.put(addr2, 0L); - selectionCounts.put(addr3, 0L); - selectionCounts.put(addr4, 0L); + Map selectionCounts = new HashMap(); + selectionCounts.put(addr0.toBookieId(), 0L); + selectionCounts.put(addr1.toBookieId(), 0L); + selectionCounts.put(addr2.toBookieId(), 0L); + selectionCounts.put(addr3.toBookieId(), 0L); + selectionCounts.put(addr4.toBookieId(), 0L); int numTries = 50000; - BookieSocketAddress replacedBookie; + EnsemblePlacementPolicy.PlacementResult replaceBookieResponse; + BookieId replacedBookie; + PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy; for (int i = 0; i < numTries; i++) { // addr2 is on /r2 and this is the only one on this rack. So the replacement // will come from other racks. However, the weight should be honored in such // selections as well - replacedBookie = repp.replaceBookie(1, 1, 1, null, new HashSet<>(), addr2, new HashSet<>()); - assertTrue(addr0.equals(replacedBookie) || addr1.equals(replacedBookie) || addr3.equals(replacedBookie) - || addr4.equals(replacedBookie)); + replaceBookieResponse = repp.replaceBookie(1, 1, 1, null, new ArrayList<>(), + addr2.toBookieId(), new HashSet<>()); + replacedBookie = replaceBookieResponse.getResult(); + isEnsembleAdheringToPlacementPolicy = replaceBookieResponse.getAdheringToPolicy(); + assertTrue(addr0.toBookieId().equals(replacedBookie) + || addr1.toBookieId().equals(replacedBookie) + || addr3.toBookieId().equals(replacedBookie) + || addr4.toBookieId().equals(replacedBookie)); + assertEquals(PlacementPolicyAdherence.MEETS_STRICT, isEnsembleAdheringToPlacementPolicy); selectionCounts.put(replacedBookie, selectionCounts.get(replacedBookie) + 1); } /* - * since addr2 has to be replaced, the remaining bookies weight are - 50, 100, 200, 500 (10*50) - * So the median calculated by WeightedRandomSelection is (100 + 200) / 2 = 150 + * Even though addr2 has to be replaced, but being excluded bookie weight is not excluded in the choose list. + * All the bookies weight are - 50, 100, 100, 200, 500 (10*50) + * So the median calculated by WeightedRandomSelection is 100 */ - double medianWeight = 150; - double medianSelectionCounts = (double) (medianWeight / bookieInfoMap.get(addr1).getWeight()) - * selectionCounts.get(addr1); - double observedMultiple1 = ((double) selectionCounts.get(addr4) / (double) medianSelectionCounts); - double observedMultiple2 = ((double) selectionCounts.get(addr4) / (double) selectionCounts.get(addr3)); + double medianWeight = 100; + double medianSelectionCounts = (double) (medianWeight / bookieInfoMap.get(addr1.toBookieId()).getWeight()) + * selectionCounts.get(addr1.toBookieId()); + double observedMultiple1 = ((double) selectionCounts.get(addr4.toBookieId()) + / (double) medianSelectionCounts); + double observedMultiple2 = ((double) selectionCounts.get(addr4.toBookieId()) + / (double) selectionCounts.get(addr3.toBookieId())); LOG.info("oM1 " + observedMultiple1 + " oM2 " + observedMultiple2); assertTrue("Weights not being honored expected " + maxMultiple + " observed " + observedMultiple1, Math.abs(observedMultiple1 - maxMultiple) < 1); // expected multiple for addr3 - double expected = (medianWeight * maxMultiple) / bookieInfoMap.get(addr3).getWeight(); + double expected = (medianWeight * maxMultiple) / bookieInfoMap.get(addr3.toBookieId()).getWeight(); assertTrue("Weights not being honored expected " + expected + " observed " + observedMultiple2, Math.abs(observedMultiple2 - expected) < 1); } @@ -1516,45 +2166,47 @@ public void testWeightedPlacementAndNewEnsembleWithEnoughBookiesInSameRack() thr NetworkTopology.DEFAULT_REGION + "/r3"); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - addrs.add(addr5); - addrs.add(addr6); - addrs.add(addr7); - addrs.add(addr8); - addrs.add(addr9); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + addrs.add(addr6.toBookieId()); + addrs.add(addr7.toBookieId()); + addrs.add(addr8.toBookieId()); + addrs.add(addr9.toBookieId()); int maxMultiple = 4; conf.setDiskWeightBasedPlacementEnabled(true); conf.setBookieMaxWeightMultipleForWeightBasedPlacement(maxMultiple); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); - repp.onClusterChanged(addrs, new HashSet()); - Map bookieInfoMap = new HashMap(); - bookieInfoMap.put(addr1, new BookieInfo(100L, 100L)); - bookieInfoMap.put(addr2, new BookieInfo(100L, 100L)); - bookieInfoMap.put(addr3, new BookieInfo(100L, 100L)); - bookieInfoMap.put(addr4, new BookieInfo(100L, 100L)); - bookieInfoMap.put(addr5, new BookieInfo(1000L, 1000L)); - bookieInfoMap.put(addr6, new BookieInfo(100L, 100L)); - bookieInfoMap.put(addr7, new BookieInfo(100L, 100L)); - bookieInfoMap.put(addr8, new BookieInfo(100L, 100L)); - bookieInfoMap.put(addr9, new BookieInfo(1000L, 1000L)); + repp.onClusterChanged(addrs, new HashSet()); + Map bookieInfoMap = new HashMap(); + bookieInfoMap.put(addr1.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr2.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr3.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr4.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr5.toBookieId(), new BookieInfo(1000L, 1000L)); + bookieInfoMap.put(addr6.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr7.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr8.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr9.toBookieId(), new BookieInfo(1000L, 1000L)); repp.updateBookieInfo(bookieInfoMap); - Map selectionCounts = new HashMap(); - for (BookieSocketAddress b : addrs) { + Map selectionCounts = new HashMap(); + for (BookieId b : addrs) { selectionCounts.put(b, 0L); } int numTries = 10000; - Set excludeList = new HashSet(); - List ensemble; + Set excludeList = new HashSet(); + EnsemblePlacementPolicy.PlacementResult> ensembleResponse; + List ensemble; int ensembleSize = 3; int writeQuorumSize = 2; int acqQuorumSize = 2; @@ -1562,20 +2214,25 @@ public void testWeightedPlacementAndNewEnsembleWithEnoughBookiesInSameRack() thr // addr2 is on /r2 and this is the only one on this rack. So the replacement // will come from other racks. However, the weight should be honored in such // selections as well - ensemble = repp.newEnsemble(ensembleSize, writeQuorumSize, acqQuorumSize, null, excludeList); + ensembleResponse = repp.newEnsemble(ensembleSize, writeQuorumSize, acqQuorumSize, null, excludeList); + ensemble = ensembleResponse.getResult(); assertTrue( "Rackaware selection not happening " - + getNumCoveredWriteQuorums(ensemble, writeQuorumSize, conf.getMinNumRacksPerWriteQuorum()), - getNumCoveredWriteQuorums(ensemble, writeQuorumSize, conf.getMinNumRacksPerWriteQuorum()) >= 2); - for (BookieSocketAddress b : ensemble) { + + getNumCoveredWriteQuorums(ensemble, writeQuorumSize, + conf.getMinNumRacksPerWriteQuorum(), repp.bookieAddressResolver), + getNumCoveredWriteQuorums(ensemble, writeQuorumSize, + conf.getMinNumRacksPerWriteQuorum(), repp.bookieAddressResolver) >= 2); + for (BookieId b : ensemble) { selectionCounts.put(b, selectionCounts.get(b) + 1); } } // the median weight used is 100 since addr2 and addr6 have the same weight, we use their // selection counts as the same as median - double observedMultiple1 = ((double) selectionCounts.get(addr5) / (double) selectionCounts.get(addr2)); - double observedMultiple2 = ((double) selectionCounts.get(addr9) / (double) selectionCounts.get(addr6)); + double observedMultiple1 = ((double) selectionCounts.get(addr5.toBookieId()) + / (double) selectionCounts.get(addr2.toBookieId())); + double observedMultiple2 = ((double) selectionCounts.get(addr9.toBookieId()) + / (double) selectionCounts.get(addr6.toBookieId())); assertTrue("Weights not being honored expected 2 observed " + observedMultiple1, Math.abs(observedMultiple1 - maxMultiple) < 0.5); assertTrue("Weights not being honored expected 4 observed " + observedMultiple2, @@ -1602,58 +2259,61 @@ public void testWeightedPlacementAndNewEnsembleWithoutEnoughBookies() throws Exc StaticDNSResolver.addNodeToRack(addr5.getSocketAddress().getAddress().getHostAddress(), NetworkTopology.DEFAULT_REGION + "/r3"); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - addrs.add(addr5); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); int maxMultiple = 4; conf.setDiskWeightBasedPlacementEnabled(true); conf.setBookieMaxWeightMultipleForWeightBasedPlacement(maxMultiple); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); - repp.onClusterChanged(addrs, new HashSet()); - Map bookieInfoMap = new HashMap(); - bookieInfoMap.put(addr1, new BookieInfo(100L, 100L)); - bookieInfoMap.put(addr2, new BookieInfo(100L, 100L)); - bookieInfoMap.put(addr3, new BookieInfo(1000L, 1000L)); - bookieInfoMap.put(addr4, new BookieInfo(100L, 100L)); - bookieInfoMap.put(addr5, new BookieInfo(1000L, 1000L)); + repp.onClusterChanged(addrs, new HashSet()); + Map bookieInfoMap = new HashMap(); + bookieInfoMap.put(addr1.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr2.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr3.toBookieId(), new BookieInfo(1000L, 1000L)); + bookieInfoMap.put(addr4.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr5.toBookieId(), new BookieInfo(1000L, 1000L)); repp.updateBookieInfo(bookieInfoMap); - - List ensemble = new ArrayList(); - Set excludeList = new HashSet(); + EnsemblePlacementPolicy.PlacementResult> ensembleResponse; + List ensemble; + Set excludeList = new HashSet(); try { - excludeList.add(addr1); - excludeList.add(addr2); - excludeList.add(addr3); - excludeList.add(addr4); - ensemble = repp.newEnsemble(3, 2, 2, null, excludeList); + excludeList.add(addr1.toBookieId()); + excludeList.add(addr2.toBookieId()); + excludeList.add(addr3.toBookieId()); + excludeList.add(addr4.toBookieId()); + ensembleResponse = repp.newEnsemble(3, 2, 2, null, excludeList); + ensemble = ensembleResponse.getResult(); fail("Should throw BKNotEnoughBookiesException when there is not enough bookies" + ensemble); } catch (BKNotEnoughBookiesException e) { // this is expected } try { - ensemble = repp.newEnsemble(1, 1, 1, null, excludeList); + ensembleResponse = repp.newEnsemble(1, 1, 1, null, excludeList); + ensemble = ensembleResponse.getResult(); } catch (BKNotEnoughBookiesException e) { fail("Should not throw BKNotEnoughBookiesException when there are enough bookies for the ensemble"); } } - static int getNumCoveredWriteQuorums(List ensemble, int writeQuorumSize, - int minNumRacksPerWriteQuorumConfValue) throws Exception { + static int getNumCoveredWriteQuorums(List ensemble, int writeQuorumSize, + int minNumRacksPerWriteQuorumConfValue, BookieAddressResolver bookieAddressResolver) throws Exception { int ensembleSize = ensemble.size(); int numCoveredWriteQuorums = 0; for (int i = 0; i < ensembleSize; i++) { Set racks = new HashSet(); for (int j = 0; j < writeQuorumSize; j++) { int bookieIdx = (i + j) % ensembleSize; - BookieSocketAddress addr = ensemble.get(bookieIdx); - racks.add(StaticDNSResolver.getRack(addr.getHostName())); + BookieId addr = ensemble.get(bookieIdx); + racks.add(StaticDNSResolver.getRack(bookieAddressResolver.resolve(addr).getHostName())); } int numOfRacksToCoverTo = Math.max(Math.min(writeQuorumSize, minNumRacksPerWriteQuorumConfValue), 2); numCoveredWriteQuorums += (racks.size() >= numOfRacksToCoverTo ? 1 : 0); @@ -1667,34 +2327,97 @@ public void testNodeWithFailures() throws Exception { updateMyRack(NetworkTopology.DEFAULT_REGION_AND_RACK); repp = new RackawareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); - HashMap bookieFailures = new HashMap(); + HashMap bookieFailures = new HashMap(); - bookieFailures.put(addr1, 20L); - bookieFailures.put(addr2, 22L); + bookieFailures.put(addr1.toBookieId(), 20L); + bookieFailures.put(addr2.toBookieId(), 22L); // remove failure bookies: addr1 and addr2 - addrs = new HashSet(); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); + addrs = new HashSet(); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); - DistributionSchedule.WriteSet reoderSet = repp.reorderReadSequence( + DistributionSchedule.WriteSet reorderSet = repp.reorderReadSequence( ensemble, getBookiesHealthInfo(bookieFailures, new HashMap<>()), writeSet); - LOG.info("reorder set : {}", reoderSet); - assertEquals(ensemble.get(reoderSet.get(2)), addr1); - assertEquals(ensemble.get(reoderSet.get(3)), addr2); - assertEquals(ensemble.get(reoderSet.get(0)), addr3); - assertEquals(ensemble.get(reoderSet.get(1)), addr4); + LOG.info("reorder set : {}", reorderSet); + assertEquals(ensemble.get(reorderSet.get(2)), addr1.toBookieId()); + assertEquals(ensemble.get(reorderSet.get(3)), addr2.toBookieId()); + assertEquals(ensemble.get(reorderSet.get(0)), addr3.toBookieId()); + assertEquals(ensemble.get(reorderSet.get(1)), addr4.toBookieId()); + StaticDNSResolver.reset(); + } + + @Test + public void testReplaceNotAvailableBookieWithDefaultRack() throws Exception { + repp.uninitalize(); + repp.withDefaultRack(NetworkTopology.DEFAULT_RACK); + AtomicInteger counter = new AtomicInteger(); + BookieAddressResolver mockResolver = new BookieAddressResolver() { + @Override + public BookieSocketAddress resolve(BookieId bookieId) throws BookieIdNotResolvedException { + if (bookieId.equals(addr1.toBookieId()) && counter.getAndIncrement() >= 1) { + throw new BookieIdNotResolvedException(bookieId, + new RuntimeException(addr1.toBookieId() + " shutdown")); + } + try { + return new BookieSocketAddress(bookieId.toString()); + } catch (UnknownHostException err) { + throw new BookieIdNotResolvedException(bookieId, err); + } + } + }; + + repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE, + mockResolver); + BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.2", 3181); + BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.3", 3181); + BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.4", 3181); + BookieSocketAddress addr4 = new BookieSocketAddress("127.0.0.5", 3181); + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), NetworkTopology.DEFAULT_RACK); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/r1"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/r1"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), NetworkTopology.DEFAULT_RACK); + + // Update cluster + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + + // replace node under r1 + EnsemblePlacementPolicy.PlacementResult replaceBookieResponse = + repp.replaceBookie(1, 1, 1, null, new ArrayList<>(), addr1.toBookieId(), new HashSet<>()); + BookieId replacedBookie = replaceBookieResponse.getResult(); + assertEquals(addr4.toBookieId(), replacedBookie); + + //clear history bookies and make addr1 shutdown. + repp = new RackawareEnsemblePlacementPolicy(); + repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE, + mockResolver); + + addrs.remove(addr1.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + + // replace node under r1 again + replaceBookieResponse = + repp.replaceBookie(1, 1, 1, null, new ArrayList<>(), addr1.toBookieId(), new HashSet<>()); + replacedBookie = replaceBookieResponse.getResult(); + assertEquals(addr4.toBookieId(), replacedBookie); } @Test @@ -1706,30 +2429,38 @@ public void testPlacementOnStabilizeNetworkTopology() throws Exception { ClientConfiguration confLocal = new ClientConfiguration(); confLocal.addConfiguration(conf); confLocal.setNetworkTopologyStabilizePeriodSeconds(99999); - repp.initialize(confLocal, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(confLocal, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); // addr4 left - addrs.remove(addr4); - Set deadBookies = repp.onClusterChanged(addrs, new HashSet()); + addrs.remove(addr4.toBookieId()); + Set deadBookies = repp.onClusterChanged(addrs, new HashSet()); assertTrue(deadBookies.isEmpty()); // we will never use addr4 even it is in the stabilized network topology for (int i = 0; i < 5; i++) { - List ensemble = - repp.newEnsemble(3, 3, 3, null, new HashSet()); - assertFalse(ensemble.contains(addr4)); + EnsemblePlacementPolicy.PlacementResult> ensembleResponse = + repp.newEnsemble(3, 2, 2, null, new HashSet()); + List ensemble = ensembleResponse.getResult(); + PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy = ensembleResponse.getAdheringToPolicy(); + assertFalse(ensemble.contains(addr4.toBookieId())); + assertEquals(PlacementPolicyAdherence.FAIL, isEnsembleAdheringToPlacementPolicy); } // we could still use addr4 for urgent allocation if it is just bookie flapping - List ensemble = repp.newEnsemble(4, 4, 4, null, new HashSet()); - assertTrue(ensemble.contains(addr4)); + EnsemblePlacementPolicy.PlacementResult> ensembleResponse = + repp.newEnsemble(4, 2, 2, null, new HashSet()); + List ensemble = ensembleResponse.getResult(); + PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy = ensembleResponse.getAdheringToPolicy(); + assertEquals(PlacementPolicyAdherence.FAIL, isEnsembleAdheringToPlacementPolicy); + assertTrue(ensemble.contains(addr4.toBookieId())); } @Test @@ -1836,4 +2567,678 @@ public void testShuffleWithMask() { assertTrue(shuffleOccurred); } + @Test + public void testUpdateTopologyWithRackChange() throws Exception { + String defaultRackForThisTest = NetworkTopology.DEFAULT_REGION_AND_RACK; + repp.uninitalize(); + updateMyRack(defaultRackForThisTest); + + // Update cluster + BookieSocketAddress newAddr1 = new BookieSocketAddress("127.0.0.100", 3181); + BookieSocketAddress newAddr2 = new BookieSocketAddress("127.0.0.101", 3181); + BookieSocketAddress newAddr3 = new BookieSocketAddress("127.0.0.102", 3181); + BookieSocketAddress newAddr4 = new BookieSocketAddress("127.0.0.103", 3181); + + // update dns mapping + StaticDNSResolver.addNodeToRack(newAddr1.getHostName(), defaultRackForThisTest); + StaticDNSResolver.addNodeToRack(newAddr2.getHostName(), defaultRackForThisTest); + StaticDNSResolver.addNodeToRack(newAddr3.getHostName(), defaultRackForThisTest); + StaticDNSResolver.addNodeToRack(newAddr4.getHostName(), defaultRackForThisTest); + + TestStatsProvider statsProvider = new TestStatsProvider(); + TestStatsLogger statsLogger = statsProvider.getStatsLogger(""); + + repp = new RackawareEnsemblePlacementPolicy(); + repp.initialize(conf, Optional. empty(), timer, + DISABLE_ALL, statsLogger, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + repp.withDefaultRack(defaultRackForThisTest); + + Gauge numBookiesInDefaultRackGauge = statsLogger + .getGauge(BookKeeperClientStats.NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK); + + Set writeableBookies = new HashSet<>(); + Set readOnlyBookies = new HashSet<>(); + writeableBookies.add(newAddr1.toBookieId()); + writeableBookies.add(newAddr2.toBookieId()); + writeableBookies.add(newAddr3.toBookieId()); + writeableBookies.add(newAddr4.toBookieId()); + repp.onClusterChanged(writeableBookies, readOnlyBookies); + // only writable bookie - newAddr1 in default rack + assertEquals("NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK guage value", 4, numBookiesInDefaultRackGauge.getSample()); + + // newAddr4 rack is changed and it is not in default anymore + StaticDNSResolver + .changeRack(Collections.singletonList(newAddr3), Collections.singletonList("/default-region/r4")); + assertEquals("NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK guage value", 3, numBookiesInDefaultRackGauge.getSample()); + + StaticDNSResolver + .changeRack(Collections.singletonList(newAddr1), Collections.singletonList(defaultRackForThisTest)); + assertEquals("NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK guage value", 3, numBookiesInDefaultRackGauge.getSample()); + } + + @Test + public void testNumBookiesInDefaultRackGauge() throws Exception { + String defaultRackForThisTest = NetworkTopology.DEFAULT_REGION_AND_RACK; + repp.uninitalize(); + updateMyRack(defaultRackForThisTest); + + // Update cluster + BookieSocketAddress newAddr1 = new BookieSocketAddress("127.0.0.100", 3181); + BookieSocketAddress newAddr2 = new BookieSocketAddress("127.0.0.101", 3181); + BookieSocketAddress newAddr3 = new BookieSocketAddress("127.0.0.102", 3181); + BookieSocketAddress newAddr4 = new BookieSocketAddress("127.0.0.103", 3181); + + // update dns mapping + StaticDNSResolver.addNodeToRack(newAddr1.getHostName(), defaultRackForThisTest); + StaticDNSResolver.addNodeToRack(newAddr2.getHostName(), "/default-region/r2"); + StaticDNSResolver.addNodeToRack(newAddr3.getHostName(), "/default-region/r3"); + StaticDNSResolver.addNodeToRack(newAddr4.getHostName(), defaultRackForThisTest); + + TestStatsProvider statsProvider = new TestStatsProvider(); + TestStatsLogger statsLogger = statsProvider.getStatsLogger(""); + + repp = new RackawareEnsemblePlacementPolicy(); + repp.initialize(conf, Optional. empty(), timer, + DISABLE_ALL, statsLogger, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + repp.withDefaultRack(defaultRackForThisTest); + + Gauge numBookiesInDefaultRackGauge = statsLogger + .getGauge(BookKeeperClientStats.NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK); + + Set writeableBookies = new HashSet(); + writeableBookies.add(newAddr1.toBookieId()); + writeableBookies.add(newAddr2.toBookieId()); + Set readOnlyBookies = new HashSet(); + readOnlyBookies.add(newAddr3.toBookieId()); + readOnlyBookies.add(newAddr4.toBookieId()); + repp.onClusterChanged(writeableBookies, readOnlyBookies); + // only writable bookie - newAddr1 in default rack + assertEquals("NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK guage value", 1, numBookiesInDefaultRackGauge.getSample()); + + readOnlyBookies.remove(newAddr4.toBookieId()); + writeableBookies.add(newAddr4.toBookieId()); + repp.onClusterChanged(writeableBookies, readOnlyBookies); + // newAddr4 is also added to writable bookie so 2 writable bookies - + // newAddr1 and newAddr4 + assertEquals("NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK guage value", 2, numBookiesInDefaultRackGauge.getSample()); + + // newAddr4 rack is changed and it is not in default anymore + StaticDNSResolver + .changeRack(Collections.singletonList(newAddr4), Collections.singletonList("/default-region/r4")); + assertEquals("NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK guage value", 1, numBookiesInDefaultRackGauge.getSample()); + + writeableBookies.clear(); + // writeableBookies is empty so 0 writable bookies in default rack + repp.onClusterChanged(writeableBookies, readOnlyBookies); + assertEquals("NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK guage value", 0, numBookiesInDefaultRackGauge.getSample()); + + StaticDNSResolver + .changeRack(Collections.singletonList(newAddr1), Collections.singletonList("/default-region/r2")); + readOnlyBookies.clear(); + writeableBookies.add(newAddr1.toBookieId()); + writeableBookies.add(newAddr2.toBookieId()); + writeableBookies.add(newAddr3.toBookieId()); + writeableBookies.add(newAddr4.toBookieId()); + repp.onClusterChanged(writeableBookies, readOnlyBookies); + // newAddr1 rack is changed and it is not in default anymore. So no + // bookies in default rack anymore + assertEquals("NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK guage value", 0, numBookiesInDefaultRackGauge.getSample()); + } + + @Test + public void testNewEnsembleExcludesDefaultRackBookiesEnforceMinNumRacks() throws Exception { + String defaultRackForThisTest = NetworkTopology.DEFAULT_REGION_AND_RACK; + repp.uninitalize(); + updateMyRack(defaultRackForThisTest); + int minNumRacksPerWriteQuorum = 4; + ClientConfiguration clientConf = new ClientConfiguration(conf); + clientConf.setMinNumRacksPerWriteQuorum(minNumRacksPerWriteQuorum); + // set enforceMinNumRacksPerWriteQuorum + clientConf.setEnforceMinNumRacksPerWriteQuorum(true); + + TestStatsProvider statsProvider = new TestStatsProvider(); + TestStatsLogger statsLogger = statsProvider.getStatsLogger(""); + + repp = new RackawareEnsemblePlacementPolicy(); + repp.initialize(clientConf, Optional. empty(), timer, + DISABLE_ALL, statsLogger, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + repp.withDefaultRack(defaultRackForThisTest); + Gauge numBookiesInDefaultRackGauge = statsLogger + .getGauge(BookKeeperClientStats.NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK); + + int writeQuorumSize = 3; + int ackQuorumSize = 3; + int effectiveMinNumRacksPerWriteQuorum = Math.min(minNumRacksPerWriteQuorum, writeQuorumSize); + + int numOfRacks = 2 * effectiveMinNumRacksPerWriteQuorum - 1; + int numOfBookiesPerRack = 20; + BookieId[] bookieSocketAddresses = new BookieId[numOfRacks * numOfBookiesPerRack]; + + for (int i = 0; i < numOfRacks; i++) { + for (int j = 0; j < numOfBookiesPerRack; j++) { + int index = i * numOfBookiesPerRack + j; + bookieSocketAddresses[index] = new BookieSocketAddress("128.0.0." + index, 3181).toBookieId(); + StaticDNSResolver.addNodeToRack("128.0.0." + index, "/default-region/r" + i); + } + } + + int numOfBookiesInDefaultRack = 10; + BookieId[] bookieSocketAddressesInDefaultRack = new BookieId[numOfBookiesInDefaultRack]; + for (int i = 0; i < numOfBookiesInDefaultRack; i++) { + bookieSocketAddressesInDefaultRack[i] = new BookieSocketAddress("127.0.0." + (i + 100), 3181).toBookieId(); + StaticDNSResolver.addNodeToRack("127.0.0." + (i + 100), defaultRackForThisTest); + } + + Set writableBookies = new HashSet( + Arrays.asList(bookieSocketAddresses)); + writableBookies.addAll(Arrays.asList(bookieSocketAddressesInDefaultRack)); + repp.onClusterChanged(writableBookies, new HashSet()); + assertEquals("NUM_WRITABLE_BOOKIES_IN_DEFAULT_RACK guage value", numOfBookiesInDefaultRack, + numBookiesInDefaultRackGauge.getSample()); + + /* + * in this scenario we have enough number of racks (2 * + * effectiveMinNumRacksPerWriteQuorum - 1) and more number of bookies in + * each rack. So we should be able to create ensemble for all + * ensembleSizes (as long as there are enough number of bookies in each + * rack). + * + * Since minNumRacksPerWriteQuorum is enforced, it shouldn't select node + * from default rack. + */ + EnsemblePlacementPolicy.PlacementResult> ensembleResponse; + List ensemble; + PlacementPolicyAdherence isEnsembleAdheringToPlacementPolicy; + for (int ensembleSize = effectiveMinNumRacksPerWriteQuorum; ensembleSize < 40; ensembleSize++) { + ensembleResponse = repp.newEnsemble(ensembleSize, writeQuorumSize, ackQuorumSize, null, new HashSet<>()); + ensemble = ensembleResponse.getResult(); + isEnsembleAdheringToPlacementPolicy = ensembleResponse.getAdheringToPolicy(); + assertEquals("Number of writeQuorum sets covered", ensembleSize, + getNumCoveredWriteQuorums(ensemble, writeQuorumSize, clientConf.getMinNumRacksPerWriteQuorum(), + repp.bookieAddressResolver)); + assertEquals(PlacementPolicyAdherence.MEETS_STRICT, isEnsembleAdheringToPlacementPolicy); + + ensembleResponse = repp.newEnsemble(ensembleSize, writeQuorumSize, ackQuorumSize, null, new HashSet<>()); + ensemble = ensembleResponse.getResult(); + isEnsembleAdheringToPlacementPolicy = ensembleResponse.getAdheringToPolicy(); + assertEquals("Number of writeQuorum sets covered", ensembleSize, + getNumCoveredWriteQuorums(ensemble, writeQuorumSize, clientConf.getMinNumRacksPerWriteQuorum(), + repp.bookieAddressResolver)); + assertEquals(PlacementPolicyAdherence.MEETS_STRICT, isEnsembleAdheringToPlacementPolicy); + Collection bookiesOfDefaultRackInEnsemble = CollectionUtils + .intersection(Arrays.asList(bookieSocketAddressesInDefaultRack), ensemble); + assertTrue("Ensemble is not supposed to contain bookies from default rack, but ensemble contains - " + + bookiesOfDefaultRackInEnsemble, bookiesOfDefaultRackInEnsemble.isEmpty()); + } + } + + private void testAreAckedBookiesAdheringToPlacementPolicyHelper(int minNumRacksPerWriteQuorumConfValue, + int ensembleSize, + int writeQuorumSize, + int ackQuorumSize, + int numOfBookiesInDefaultRack, + int numOfRacks, + int numOfBookiesPerRack) throws Exception { + String defaultRackForThisTest = NetworkTopology.DEFAULT_REGION_AND_RACK; + repp.uninitalize(); + updateMyRack(defaultRackForThisTest); + + ClientConfiguration conf = new ClientConfiguration(this.conf); + conf.setMinNumRacksPerWriteQuorum(minNumRacksPerWriteQuorumConfValue); + + TestStatsProvider statsProvider = new TestStatsProvider(); + TestStatsLogger statsLogger = statsProvider.getStatsLogger(""); + + repp = new RackawareEnsemblePlacementPolicy(); + repp.initialize(conf, Optional.empty(), timer, + DISABLE_ALL, statsLogger, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + repp.withDefaultRack(defaultRackForThisTest); + + List bookieSocketAddressesDefaultRack = new ArrayList<>(); + List bookieSocketAddressesNonDefaultRack = new ArrayList<>(); + Set writableBookies; + Set bookiesForEntry = new HashSet<>(); + + for (int i = 0; i < numOfRacks; i++) { + for (int j = 0; j < numOfBookiesPerRack; j++) { + int index = i * numOfBookiesPerRack + j; + bookieSocketAddressesNonDefaultRack.add(new BookieSocketAddress("128.0.0." + index, 3181).toBookieId()); + StaticDNSResolver.addNodeToRack("128.0.0." + index, "/default-region/r" + i); + } + } + + for (int i = 0; i < numOfBookiesInDefaultRack; i++) { + bookieSocketAddressesDefaultRack.add(new BookieSocketAddress("127.0.0." + (i + 100), 3181).toBookieId()); + StaticDNSResolver.addNodeToRack("127.0.0." + (i + 100), defaultRackForThisTest); + } + + writableBookies = new HashSet<>(bookieSocketAddressesNonDefaultRack); + writableBookies.addAll(bookieSocketAddressesDefaultRack); + repp.onClusterChanged(writableBookies, new HashSet<>()); + + // Case 1 : Bookies in the ensemble from the same rack. + // Manually crafting the ensemble here to create the error case when the check should return false + + List ensemble = new ArrayList<>(bookieSocketAddressesDefaultRack); + for (int entryId = 0; entryId < 10; entryId++) { + DistributionSchedule ds = new RoundRobinDistributionSchedule(writeQuorumSize, ackQuorumSize, ensembleSize); + DistributionSchedule.WriteSet ws = ds.getWriteSet(entryId); + + for (int i = 0; i < ws.size(); i++) { + bookiesForEntry.add(ensemble.get(ws.get(i))); + } + + assertFalse(repp.areAckedBookiesAdheringToPlacementPolicy(bookiesForEntry, writeQuorumSize, ackQuorumSize)); + } + + // Case 2 : Bookies in the ensemble from the different racks + + EnsemblePlacementPolicy.PlacementResult> + ensembleResponse = repp.newEnsemble(ensembleSize, + writeQuorumSize, + ackQuorumSize, + null, + new HashSet<>()); + ensemble = ensembleResponse.getResult(); + for (int entryId = 0; entryId < 10; entryId++) { + DistributionSchedule ds = new RoundRobinDistributionSchedule(writeQuorumSize, ackQuorumSize, ensembleSize); + DistributionSchedule.WriteSet ws = ds.getWriteSet(entryId); + + for (int i = 0; i < ws.size(); i++) { + bookiesForEntry.add(ensemble.get(ws.get(i))); + } + + assertTrue(repp.areAckedBookiesAdheringToPlacementPolicy(bookiesForEntry, writeQuorumSize, ackQuorumSize)); + } + } + + /** + * This tests areAckedBookiesAdheringToPlacementPolicy function in RackawareEnsemblePlacementPolicy. + */ + @Test + public void testAreAckedBookiesAdheringToPlacementPolicy() throws Exception { + testAreAckedBookiesAdheringToPlacementPolicyHelper(2, 7, 3, 2, 7, 3, 3); + testAreAckedBookiesAdheringToPlacementPolicyHelper(4, 6, 3, 2, 6, 3, 3); + testAreAckedBookiesAdheringToPlacementPolicyHelper(5, 7, 5, 3, 7, 5, 2); + } + + @SuppressWarnings("unchecked") + @Test + public void testReplaceToAdherePlacementPolicy() throws Exception { + final BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.1", 3181); + final BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.2", 3181); + final BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.3", 3181); + final BookieSocketAddress addr4 = new BookieSocketAddress("127.0.0.4", 3181); + final BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.5", 3181); + final BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.6", 3181); + final BookieSocketAddress addr7 = new BookieSocketAddress("127.0.0.7", 3181); + final BookieSocketAddress addr8 = new BookieSocketAddress("127.0.0.8", 3181); + final BookieSocketAddress addr9 = new BookieSocketAddress("127.0.0.9", 3181); + + final String rackName1 = NetworkTopology.DEFAULT_REGION + "/r1"; + final String rackName2 = NetworkTopology.DEFAULT_REGION + "/r2"; + final String rackName3 = NetworkTopology.DEFAULT_REGION + "/r3"; + + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getSocketAddress().getAddress().getHostAddress(), rackName1); + StaticDNSResolver.addNodeToRack(addr2.getSocketAddress().getAddress().getHostAddress(), rackName1); + StaticDNSResolver.addNodeToRack(addr3.getSocketAddress().getAddress().getHostAddress(), rackName1); + StaticDNSResolver.addNodeToRack(addr4.getSocketAddress().getAddress().getHostAddress(), rackName2); + StaticDNSResolver.addNodeToRack(addr5.getSocketAddress().getAddress().getHostAddress(), rackName2); + StaticDNSResolver.addNodeToRack(addr6.getSocketAddress().getAddress().getHostAddress(), rackName2); + StaticDNSResolver.addNodeToRack(addr7.getSocketAddress().getAddress().getHostAddress(), rackName3); + StaticDNSResolver.addNodeToRack(addr8.getSocketAddress().getAddress().getHostAddress(), rackName3); + StaticDNSResolver.addNodeToRack(addr9.getSocketAddress().getAddress().getHostAddress(), rackName3); + + // Update cluster + final Set addrs = new HashSet<>(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + addrs.add(addr6.toBookieId()); + addrs.add(addr7.toBookieId()); + addrs.add(addr8.toBookieId()); + addrs.add(addr9.toBookieId()); + + final ClientConfiguration newConf = new ClientConfiguration(conf); + newConf.setDiskWeightBasedPlacementEnabled(false); + newConf.setMinNumRacksPerWriteQuorum(2); + newConf.setEnforceMinNumRacksPerWriteQuorum(true); + + repp.initialize(newConf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); + + repp.onClusterChanged(addrs, new HashSet<>()); + final Map bookieInfoMap = new HashMap<>(); + bookieInfoMap.put(addr1.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr2.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr3.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr4.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr5.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr6.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr7.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr8.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr9.toBookieId(), new BookieInfo(100L, 100L)); + + repp.updateBookieInfo(bookieInfoMap); + + final Set excludeList = new HashSet<>(); + final int ensembleSize = 7; + final int writeQuorumSize = 2; + final int ackQuorumSize = 2; + + final BookieRackMatcher rack1 = new BookieRackMatcher(rackName1); + final BookieRackMatcher rack2 = new BookieRackMatcher(rackName2); + final BookieRackMatcher rack3 = new BookieRackMatcher(rackName3); + final BookieRackMatcher rack12 = new BookieRackMatcher(rackName1, rackName2); + final BookieRackMatcher rack13 = new BookieRackMatcher(rackName1, rackName3); + final BookieRackMatcher rack23 = new BookieRackMatcher(rackName2, rackName3); + final BookieRackMatcher rack123 = new BookieRackMatcher(rackName1, rackName2, rackName3); + final Consumer, Matcher>>> test = (pair) -> { + // RackawareEnsemblePlacementPolicyImpl#isEnsembleAdheringToPlacementPolicy + // is not scope of this test case. So, use the method in assertion for convenience. + assertEquals(PlacementPolicyAdherence.FAIL, + repp.isEnsembleAdheringToPlacementPolicy(pair.getLeft(), writeQuorumSize, ackQuorumSize)); + final EnsemblePlacementPolicy.PlacementResult> result = + repp.replaceToAdherePlacementPolicy(ensembleSize, writeQuorumSize, ackQuorumSize, + excludeList, pair.getLeft()); + if (LOG.isDebugEnabled()) { + LOG.debug("input: {}, result: {}", pair.getLeft(), result.getResult()); + } + assertEquals(PlacementPolicyAdherence.MEETS_STRICT, result.getAdheringToPolicy()); + assertThat(result.getResult(), pair.getRight()); + }; + + for (int i = 0; i < 1000; i++) { + test.accept(Pair.of(Arrays.asList(addr1.toBookieId(), addr4.toBookieId(), addr7.toBookieId(), + addr2.toBookieId(), addr5.toBookieId(), addr8.toBookieId(), addr9.toBookieId()), + // first, same, same, same, same, same, condition[0] + contains(is(addr1.toBookieId()), is(addr4.toBookieId()), is(addr7.toBookieId()), + is(addr2.toBookieId()), is(addr5.toBookieId()), is(addr8.toBookieId()), + is(addr6.toBookieId())))); + + test.accept(Pair.of(Arrays.asList(addr6.toBookieId(), addr4.toBookieId(), addr7.toBookieId(), + addr2.toBookieId(), addr5.toBookieId(), addr8.toBookieId(), addr3.toBookieId()), + // first, condition[0], same, same, same, same, same + contains(is(addr6.toBookieId()), is(addr1.toBookieId()), is(addr7.toBookieId()), + is(addr2.toBookieId()), is(addr5.toBookieId()), is(addr8.toBookieId()), + is(addr3.toBookieId())))); + + test.accept(Pair.of(Arrays.asList(addr1.toBookieId(), addr2.toBookieId(), addr3.toBookieId(), + addr4.toBookieId(), addr5.toBookieId(), addr6.toBookieId(), addr7.toBookieId()), + // first, candidate[0], same, same, candidate[0], same, same + contains(is(addr1.toBookieId()), is(rack3), is(addr3.toBookieId()), + is(addr4.toBookieId()), is(rack13), is(addr6.toBookieId()), is(addr7.toBookieId())))); + + test.accept(Pair.of(Arrays.asList(addr1.toBookieId(), addr2.toBookieId(), addr4.toBookieId(), + addr5.toBookieId(), addr7.toBookieId(), addr8.toBookieId(), addr9.toBookieId()), + contains(is(addr1.toBookieId()), is(rack23), is(rack123), is(rack123), + is(rack123), is(rack123), is(rack23)))); + } + StaticDNSResolver.reset(); + } + + @SuppressWarnings("unchecked") + @Test + public void testReplaceToAdherePlacementPolicyWithOutOfOrder() throws Exception { + final BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.1", 3181); + final BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.2", 3181); + final BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.3", 3181); + final BookieSocketAddress addr4 = new BookieSocketAddress("127.0.0.4", 3181); + final BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.5", 3181); + final BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.6", 3181); + + final String rackName1 = NetworkTopology.DEFAULT_REGION + "/r1"; + final String rackName2 = NetworkTopology.DEFAULT_REGION + "/r2"; + + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getSocketAddress().getAddress().getHostAddress(), rackName1); + StaticDNSResolver.addNodeToRack(addr2.getSocketAddress().getAddress().getHostAddress(), rackName1); + StaticDNSResolver.addNodeToRack(addr3.getSocketAddress().getAddress().getHostAddress(), rackName1); + StaticDNSResolver.addNodeToRack(addr4.getSocketAddress().getAddress().getHostAddress(), rackName2); + StaticDNSResolver.addNodeToRack(addr5.getSocketAddress().getAddress().getHostAddress(), rackName2); + StaticDNSResolver.addNodeToRack(addr6.getSocketAddress().getAddress().getHostAddress(), rackName2); + + // Update cluster + final Set addrs = new HashSet<>(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + addrs.add(addr6.toBookieId()); + + final ClientConfiguration newConf = new ClientConfiguration(conf); + newConf.setDiskWeightBasedPlacementEnabled(false); + newConf.setMinNumRacksPerWriteQuorum(2); + newConf.setEnforceMinNumRacksPerWriteQuorum(true); + + repp.initialize(newConf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); + + repp.onClusterChanged(addrs, new HashSet<>()); + final Map bookieInfoMap = new HashMap<>(); + bookieInfoMap.put(addr1.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr2.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr3.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr4.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr5.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr6.toBookieId(), new BookieInfo(100L, 100L)); + + repp.updateBookieInfo(bookieInfoMap); + + final Set excludeList = new HashSet<>(); + final int ensembleSize = 6; + final int writeQuorumSize = 2; + final int ackQuorumSize = 2; + + final Consumer, Matcher>>> test = (pair) -> { + // RackawareEnsemblePlacementPolicyImpl#isEnsembleAdheringToPlacementPolicy + // is not scope of this test case. So, use the method in assertion for convenience. + assertEquals(PlacementPolicyAdherence.FAIL, + repp.isEnsembleAdheringToPlacementPolicy(pair.getLeft(), writeQuorumSize, ackQuorumSize)); + final EnsemblePlacementPolicy.PlacementResult> result = + repp.replaceToAdherePlacementPolicy(ensembleSize, writeQuorumSize, ackQuorumSize, + excludeList, pair.getLeft()); + if (LOG.isDebugEnabled()) { + LOG.debug("input: {}, result: {}", pair.getLeft(), result.getResult()); + } + assertEquals(PlacementPolicyAdherence.MEETS_STRICT, result.getAdheringToPolicy()); + }; + + for (int i = 0; i < 1000; i++) { + //All bookies already in the ensemble, the bookie order not adhere the placement policy. + test.accept(Pair.of(Arrays.asList(addr1.toBookieId(), addr2.toBookieId(), addr3.toBookieId(), + addr4.toBookieId(), addr5.toBookieId(), addr6.toBookieId()), + //The result is not predict. We know the best min replace place is 2. + //1,2,3,4,5,6 => 1,5,3,4,2,6 + //But maybe the final result is 1,6,3,4,2,5. + //When we from index 0 to replace, the first bookie(1) is /rack1, we only pick /rack2 bookie + //for the second bookie, so we can choose 4,5,6, the choice is random. If we pick 6 for the second, + // the final result is 1,6,3,4,2,5. If we pick 5 for the second, the final result is 1,5,3,4,2,6 + null)); + } + StaticDNSResolver.reset(); + } + + @SuppressWarnings("unchecked") + @Test + public void testReplaceToAdherePlacementPolicyWithNoMoreRackBookie() throws Exception { + final BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.1", 3181); + final BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.2", 3181); + final BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.3", 3181); + final BookieSocketAddress addr4 = new BookieSocketAddress("127.0.0.4", 3181); + final BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.5", 3181); + final BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.6", 3181); + + final String rackName1 = NetworkTopology.DEFAULT_REGION + "/r1"; + final String rackName2 = NetworkTopology.DEFAULT_REGION + "/r2"; + + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getSocketAddress().getAddress().getHostAddress(), rackName1); + StaticDNSResolver.addNodeToRack(addr2.getSocketAddress().getAddress().getHostAddress(), rackName1); + StaticDNSResolver.addNodeToRack(addr3.getSocketAddress().getAddress().getHostAddress(), rackName1); + StaticDNSResolver.addNodeToRack(addr4.getSocketAddress().getAddress().getHostAddress(), rackName2); + StaticDNSResolver.addNodeToRack(addr5.getSocketAddress().getAddress().getHostAddress(), rackName2); + StaticDNSResolver.addNodeToRack(addr6.getSocketAddress().getAddress().getHostAddress(), rackName2); + + // Update cluster + final Set addrs = new HashSet<>(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + addrs.add(addr6.toBookieId()); + + final ClientConfiguration newConf = new ClientConfiguration(conf); + newConf.setDiskWeightBasedPlacementEnabled(false); + newConf.setMinNumRacksPerWriteQuorum(2); + newConf.setEnforceMinNumRacksPerWriteQuorum(true); + + repp.initialize(newConf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); + + repp.onClusterChanged(addrs, new HashSet<>()); + final Map bookieInfoMap = new HashMap<>(); + bookieInfoMap.put(addr1.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr2.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr3.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr4.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr5.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr6.toBookieId(), new BookieInfo(100L, 100L)); + + repp.updateBookieInfo(bookieInfoMap); + + final Set excludeList = new HashSet<>(); + final int ensembleSize = 3; + final int writeQuorumSize = 2; + final int ackQuorumSize = 2; + + final Consumer, Matcher>>> test = (pair) -> { + // RackawareEnsemblePlacementPolicyImpl#isEnsembleAdheringToPlacementPolicy + // is not scope of this test case. So, use the method in assertion for convenience. + assertEquals(PlacementPolicyAdherence.FAIL, + repp.isEnsembleAdheringToPlacementPolicy(pair.getLeft(), writeQuorumSize, ackQuorumSize)); + final EnsemblePlacementPolicy.PlacementResult> result = + repp.replaceToAdherePlacementPolicy(ensembleSize, writeQuorumSize, ackQuorumSize, + excludeList, pair.getLeft()); + if (LOG.isDebugEnabled()) { + LOG.debug("input: {}, result: {}", pair.getLeft(), result.getResult()); + } + assertEquals(PlacementPolicyAdherence.FAIL, result.getAdheringToPolicy()); + assertEquals(0, result.getResult().size()); + }; + + for (int i = 0; i < 1000; i++) { + test.accept(Pair.of(Arrays.asList(addr1.toBookieId(), addr2.toBookieId(), addr4.toBookieId()), + null)); + } + StaticDNSResolver.reset(); + } + + @SuppressWarnings("unchecked") + @Test + public void testReplaceToAdherePlacementPolicyWithUnknowBookie() throws Exception { + final BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.1", 3181); + final BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.2", 3181); + final BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.3", 3181); + final BookieSocketAddress addr4 = new BookieSocketAddress("127.0.0.4", 3181); + final BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.5", 3181); + final BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.6", 3181); + + final String rackName1 = NetworkTopology.DEFAULT_REGION + "/r1"; + final String rackName2 = NetworkTopology.DEFAULT_REGION + "/r2"; + + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getSocketAddress().getAddress().getHostAddress(), rackName1); + StaticDNSResolver.addNodeToRack(addr2.getSocketAddress().getAddress().getHostAddress(), rackName1); + StaticDNSResolver.addNodeToRack(addr3.getSocketAddress().getAddress().getHostAddress(), rackName1); + StaticDNSResolver.addNodeToRack(addr4.getSocketAddress().getAddress().getHostAddress(), rackName2); + StaticDNSResolver.addNodeToRack(addr5.getSocketAddress().getAddress().getHostAddress(), rackName2); + StaticDNSResolver.addNodeToRack(addr6.getSocketAddress().getAddress().getHostAddress(), rackName2); + + // Update cluster + final Set addrs = new HashSet<>(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + addrs.add(addr6.toBookieId()); + + final ClientConfiguration newConf = new ClientConfiguration(conf); + newConf.setDiskWeightBasedPlacementEnabled(false); + newConf.setMinNumRacksPerWriteQuorum(2); + newConf.setEnforceMinNumRacksPerWriteQuorum(true); + + repp.initialize(newConf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); + + repp.onClusterChanged(addrs, new HashSet<>()); + final Map bookieInfoMap = new HashMap<>(); + bookieInfoMap.put(addr1.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr2.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr3.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr4.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr5.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr6.toBookieId(), new BookieInfo(100L, 100L)); + + repp.updateBookieInfo(bookieInfoMap); + + final Set excludeList = new HashSet<>(); + final int ensembleSize = 6; + final int writeQuorumSize = 2; + final int ackQuorumSize = 2; + + final BookieRackMatcher rack1 = new BookieRackMatcher(rackName1); + + final Consumer, Matcher>>> test = (pair) -> { + // RackawareEnsemblePlacementPolicyImpl#isEnsembleAdheringToPlacementPolicy + // is not scope of this test case. So, use the method in assertion for convenience. + assertEquals(PlacementPolicyAdherence.FAIL, + repp.isEnsembleAdheringToPlacementPolicy(pair.getLeft(), writeQuorumSize, ackQuorumSize)); + final EnsemblePlacementPolicy.PlacementResult> result = + repp.replaceToAdherePlacementPolicy(ensembleSize, writeQuorumSize, ackQuorumSize, + excludeList, pair.getLeft()); + if (LOG.isDebugEnabled()) { + LOG.debug("input: {}, result: {}", pair.getLeft(), result.getResult()); + } + assertEquals(PlacementPolicyAdherence.MEETS_STRICT, result.getAdheringToPolicy()); + assertThat(result.getResult(), pair.getRight()); + }; + + for (int i = 0; i < 1000; i++) { + test.accept(Pair.of(Arrays.asList(BookieId.parse("127.0.0.10:3181"), BookieId.parse("127.0.0.11:3181"), + addr3.toBookieId(), + addr4.toBookieId(), addr5.toBookieId(), addr6.toBookieId()), + contains(is(rack1), is(addr5.toBookieId()), is(addr3.toBookieId()), + is(addr4.toBookieId()), is(rack1), is(addr6.toBookieId())))); + } + StaticDNSResolver.reset(); + } + + private static class BookieRackMatcher extends TypeSafeMatcher { + final List expectedRacks; + + public BookieRackMatcher(String... expectedRacks) { + this.expectedRacks = Arrays.asList(expectedRacks); + } + + @Override + protected boolean matchesSafely(BookieId bookieId) { + return expectedRacks.contains(StaticDNSResolver.getRack(bookieId.toString().split(":")[0])); + } + + @Override + public void describeTo(Description description) { + description.appendText("expected racks " + expectedRacks); + } + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestRackawareEnsemblePlacementPolicyUsingScript.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestRackawareEnsemblePlacementPolicyUsingScript.java index d9f253507e5..c61cdb6138b 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestRackawareEnsemblePlacementPolicyUsingScript.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestRackawareEnsemblePlacementPolicyUsingScript.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -28,16 +28,16 @@ import static org.junit.Assert.fail; import com.google.common.util.concurrent.ThreadFactoryBuilder; - import io.netty.util.HashedWheelTimer; +import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Optional; import java.util.Set; import java.util.concurrent.TimeUnit; - import org.apache.bookkeeper.client.BKException.BKNotEnoughBookiesException; import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.net.CommonConfigurationKeys; import org.apache.bookkeeper.net.DNSToSwitchMapping; @@ -83,7 +83,8 @@ public void setUp() throws Exception { conf.getTimeoutTimerNumTicks()); repp = new RackawareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); } @After @@ -104,15 +105,16 @@ public void testReplaceBookieWithEnoughBookiesInSameRack() throws Exception { BookieSocketAddress addr4 = new BookieSocketAddress("127.0.0.4", 3181); // /4 rack // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); // replace node under r2 - BookieSocketAddress replacedBookie = repp.replaceBookie(1, 1, 1, null, new HashSet<>(), addr2, new HashSet<>()); - assertEquals(addr3, replacedBookie); + BookieId replacedBookie = repp.replaceBookie(1, 1, 1, null, new ArrayList<>(), + addr2.toBookieId(), new HashSet<>()).getResult(); + assertEquals(addr3.toBookieId(), replacedBookie); } @Test @@ -124,19 +126,21 @@ public void testReplaceBookieWithEnoughBookiesInDifferentRack() throws Exception BookieSocketAddress addr4 = new BookieSocketAddress("127.0.0.4", 3181); // /4 rack // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); // replace node under r2 - Set excludedAddrs = new HashSet(); - excludedAddrs.add(addr1); - BookieSocketAddress replacedBookie = repp.replaceBookie(1, 1, 1, null, new HashSet<>(), addr2, excludedAddrs); - - assertFalse(addr1.equals(replacedBookie)); - assertTrue(addr3.equals(replacedBookie) || addr4.equals(replacedBookie)); + Set excludedAddrs = new HashSet(); + excludedAddrs.add(addr1.toBookieId()); + BookieId replacedBookie = repp.replaceBookie(1, 1, 1, null, new ArrayList<>(), + addr2.toBookieId(), excludedAddrs).getResult(); + + assertFalse(addr1.toBookieId().equals(replacedBookie)); + assertTrue(addr3.toBookieId().equals(replacedBookie) + || addr4.toBookieId().equals(replacedBookie)); } @Test @@ -148,19 +152,19 @@ public void testReplaceBookieWithNotEnoughBookies() throws Exception { BookieSocketAddress addr4 = new BookieSocketAddress("127.0.0.4", 3181); // /4 rack // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); // replace node under r2 - Set excludedAddrs = new HashSet(); - excludedAddrs.add(addr1); - excludedAddrs.add(addr3); - excludedAddrs.add(addr4); + Set excludedAddrs = new HashSet(); + excludedAddrs.add(addr1.toBookieId()); + excludedAddrs.add(addr3.toBookieId()); + excludedAddrs.add(addr4.toBookieId()); try { - repp.replaceBookie(1, 1, 1, null, new HashSet(), addr2, excludedAddrs); + repp.replaceBookie(1, 1, 1, null, new ArrayList(), addr2.toBookieId(), excludedAddrs); fail("Should throw BKNotEnoughBookiesException when there is not enough bookies"); } catch (BKNotEnoughBookiesException bnebe) { // should throw not BKNotEnoughBookiesException @@ -183,25 +187,26 @@ public void testReplaceBookieWithScriptMappingError() throws Exception { BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.2", 3181); // /2 rack // Update cluster, add node that maps to non-default rack - Set addrs = new HashSet(); - addrs.add(addr1); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); - repp.onClusterChanged(addrs, new HashSet()); + repp.onClusterChanged(addrs, new HashSet()); - addrs = new HashSet(); - addrs.add(addr0); - addrs.add(addr1); - addrs.add(addr2); - repp.onClusterChanged(addrs, new HashSet()); + addrs = new HashSet(); + addrs.add(addr0.toBookieId()); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); // replace node under r2 - Set excludedAddrs = new HashSet(); - excludedAddrs.add(addr1); - BookieSocketAddress replacedBookie = repp.replaceBookie(1, 1, 1, null, new HashSet<>(), addr2, excludedAddrs); - - assertFalse(addr1.equals(replacedBookie)); - assertFalse(addr2.equals(replacedBookie)); - assertTrue(addr0.equals(replacedBookie)); + Set excludedAddrs = new HashSet(); + excludedAddrs.add(addr1.toBookieId()); + BookieId replacedBookie = repp.replaceBookie(1, 1, 1, null, new ArrayList<>(), + addr2.toBookieId(), excludedAddrs).getResult(); + + assertFalse(addr1.toBookieId().equals(replacedBookie)); + assertFalse(addr2.toBookieId().equals(replacedBookie)); + assertTrue(addr0.toBookieId().equals(replacedBookie)); } /* @@ -221,25 +226,26 @@ public void testReplaceBookieWithScriptMappingError2() throws Exception { BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.2", 3181); // /2 rack // Update cluster, add node that maps to default rack first - Set addrs = new HashSet(); - addrs.add(addr0); + Set addrs = new HashSet(); + addrs.add(addr0.toBookieId()); - repp.onClusterChanged(addrs, new HashSet()); + repp.onClusterChanged(addrs, new HashSet()); - addrs = new HashSet(); - addrs.add(addr0); - addrs.add(addr1); - addrs.add(addr2); - repp.onClusterChanged(addrs, new HashSet()); + addrs = new HashSet(); + addrs.add(addr0.toBookieId()); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); // replace node under r2 - Set excludedAddrs = new HashSet(); - excludedAddrs.add(addr1); - BookieSocketAddress replacedBookie = repp.replaceBookie(1, 1, 1, null, new HashSet<>(), addr2, excludedAddrs); - - assertFalse(addr1.equals(replacedBookie)); - assertFalse(addr2.equals(replacedBookie)); - assertTrue(addr0.equals(replacedBookie)); + Set excludedAddrs = new HashSet(); + excludedAddrs.add(addr1.toBookieId()); + BookieId replacedBookie = repp.replaceBookie(1, 1, 1, null, new ArrayList<>(), + addr2.toBookieId(), excludedAddrs).getResult(); + + assertFalse(addr1.toBookieId().equals(replacedBookie)); + assertFalse(addr2.toBookieId().equals(replacedBookie)); + assertTrue(addr0.toBookieId().equals(replacedBookie)); } @Test @@ -250,16 +256,18 @@ public void testNewEnsembleWithSingleRack() throws Exception { BookieSocketAddress addr3 = new BookieSocketAddress("127.0.2.1", 3181); // /1 rack BookieSocketAddress addr4 = new BookieSocketAddress("127.0.3.1", 3181); // /1 rack // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); try { - List ensemble = repp.newEnsemble(3, 2, 2, null, new HashSet<>()); + List ensemble = repp.newEnsemble(3, 2, 2, null, + new HashSet<>()).getResult(); assertEquals(0, getNumCoveredWriteQuorums(ensemble, 2)); - List ensemble2 = repp.newEnsemble(4, 2, 2, null, new HashSet<>()); + List ensemble2 = repp.newEnsemble(4, 2, 2, null, + new HashSet<>()).getResult(); assertEquals(0, getNumCoveredWriteQuorums(ensemble2, 2)); } catch (BKNotEnoughBookiesException bnebe) { fail("Should not get not enough bookies exception even there is only one rack."); @@ -274,17 +282,19 @@ public void testNewEnsembleWithMultipleRacks() throws Exception { BookieSocketAddress addr3 = new BookieSocketAddress("127.0.1.2", 3181); // /2 rack BookieSocketAddress addr4 = new BookieSocketAddress("127.0.2.2", 3181); // /2 rack // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); try { - List ensemble = repp.newEnsemble(3, 2, 2, null, new HashSet<>()); + List ensemble = repp.newEnsemble(3, 2, 2, null, + new HashSet<>()).getResult(); int numCovered = getNumCoveredWriteQuorums(ensemble, 2); assertTrue(numCovered == 2); - List ensemble2 = repp.newEnsemble(4, 2, 2, null, new HashSet<>()); + List ensemble2 = repp.newEnsemble(4, 2, 2, null, + new HashSet<>()).getResult(); numCovered = getNumCoveredWriteQuorums(ensemble2, 2); assertTrue(numCovered == 2); } catch (BKNotEnoughBookiesException bnebe) { @@ -304,20 +314,22 @@ public void testNewEnsembleWithEnoughRacks() throws Exception { BookieSocketAddress addr7 = new BookieSocketAddress("127.0.1.3", 3181); // /3 rack BookieSocketAddress addr8 = new BookieSocketAddress("127.0.1.4", 3181); // /4 rack // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - addrs.add(addr5); - addrs.add(addr6); - addrs.add(addr7); - addrs.add(addr8); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + addrs.add(addr6.toBookieId()); + addrs.add(addr7.toBookieId()); + addrs.add(addr8.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); try { - List ensemble1 = repp.newEnsemble(3, 2, 2, null, new HashSet<>()); + List ensemble1 = repp.newEnsemble(3, 2, 2, null, + new HashSet<>()).getResult(); assertEquals(3, getNumCoveredWriteQuorums(ensemble1, 2)); - List ensemble2 = repp.newEnsemble(4, 2, 2, null, new HashSet<>()); + List ensemble2 = repp.newEnsemble(4, 2, 2, null, + new HashSet<>()).getResult(); assertEquals(4, getNumCoveredWriteQuorums(ensemble2, 2)); } catch (BKNotEnoughBookiesException bnebe) { fail("Should not get not enough bookies exception."); @@ -336,17 +348,121 @@ public void testRemoveBookieFromCluster() { BookieSocketAddress addr3 = new BookieSocketAddress("127.0.1.2", 3181); // /2 rack BookieSocketAddress addr4 = new BookieSocketAddress("127.0.0.4", 3181); // /4 rack // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); - addrs.remove(addr1); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + addrs.remove(addr1.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + } + + @Test + public void testNetworkTopologyScriptFileNameIsEmpty() throws Exception { + ignoreTestIfItIsWindowsOS(); + repp.uninitalize(); + + ClientConfiguration newConf = new ClientConfiguration(); + newConf.setProperty(REPP_DNS_RESOLVER_CLASS, ScriptBasedMapping.class.getName()); + newConf.setProperty(CommonConfigurationKeys.NET_TOPOLOGY_SCRIPT_FILE_NAME_KEY, ""); + newConf.setEnforceMinNumRacksPerWriteQuorum(false); + timer = new HashedWheelTimer(new ThreadFactoryBuilder().setNameFormat("TestTimer-%d").build(), + newConf.getTimeoutTimerTickDurationMs(), TimeUnit.MILLISECONDS, newConf.getTimeoutTimerNumTicks()); + + repp = new RackawareEnsemblePlacementPolicy(); + try { + repp.initialize(newConf, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + } catch (RuntimeException re) { + fail("EnforceMinNumRacksPerWriteQuorum is not set, so repp.initialize should succeed even if" + + " networkTopologyScriptFileName is empty"); + } + repp.uninitalize(); + + newConf.setEnforceMinNumRacksPerWriteQuorum(true); + repp = new RackawareEnsemblePlacementPolicy(); + try { + repp.initialize(newConf, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + fail("EnforceMinNumRacksPerWriteQuorum is set, so repp.initialize should fail if" + + " networkTopologyScriptFileName is empty"); + } catch (RuntimeException re) { + } + repp.uninitalize(); + + newConf.setProperty(CommonConfigurationKeys.NET_TOPOLOGY_SCRIPT_FILE_NAME_KEY, + "src/test/resources/networkmappingscript.sh"); + try { + repp.initialize(newConf, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + } catch (RuntimeException re) { + fail("EnforceMinNumRacksPerWriteQuorum is set and networkTopologyScriptFileName is not empty," + + " so it should succeed"); + } + repp.uninitalize(); + } + + @Test + public void testIfValidateConfFails() throws Exception { + ignoreTestIfItIsWindowsOS(); + repp.uninitalize(); + + ClientConfiguration newConf = new ClientConfiguration(); + newConf.setProperty(REPP_DNS_RESOLVER_CLASS, ScriptBasedMapping.class.getName()); + /* + * this script, exits with error value if no argument is passed to it. + * So mapping.validateConf will fail. + */ + newConf.setProperty(CommonConfigurationKeys.NET_TOPOLOGY_SCRIPT_FILE_NAME_KEY, + "src/test/resources/networkmappingscriptwithargs.sh"); + timer = new HashedWheelTimer(new ThreadFactoryBuilder().setNameFormat("TestTimer-%d").build(), + newConf.getTimeoutTimerTickDurationMs(), TimeUnit.MILLISECONDS, newConf.getTimeoutTimerNumTicks()); + + repp = new RackawareEnsemblePlacementPolicy(); + repp.initialize(newConf, Optional. empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + + repp.uninitalize(); + repp = new RackawareEnsemblePlacementPolicy(); + try { + repp.initialize(newConf, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + } catch (RuntimeException re) { + fail("EnforceMinNumRacksPerWriteQuorum is not set, so repp.initialize should succeed" + + " even if mapping.validateConf fails"); + } + + newConf.setEnforceMinNumRacksPerWriteQuorum(true); + repp.uninitalize(); + repp = new RackawareEnsemblePlacementPolicy(); + try { + repp.initialize(newConf, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + fail("EnforceMinNumRacksPerWriteQuorum is set, so repp.initialize should fail" + + " if mapping.validateConf fails"); + } catch (RuntimeException re) { + + } + + /* + * this script returns successfully even if no argument is passed to it. + * So mapping.validateConf will succeed. + */ + newConf.setProperty(CommonConfigurationKeys.NET_TOPOLOGY_SCRIPT_FILE_NAME_KEY, + "src/test/resources/networkmappingscript.sh"); + repp.uninitalize(); + repp = new RackawareEnsemblePlacementPolicy(); + try { + repp.initialize(newConf, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + } catch (RuntimeException re) { + fail("EnforceMinNumRacksPerWriteQuorum is set, and mapping.validateConf succeeds." + + " So repp.initialize should succeed"); + } } - private int getNumCoveredWriteQuorums(List ensemble, int writeQuorumSize) + private int getNumCoveredWriteQuorums(List ensemble, int writeQuorumSize) throws Exception { int ensembleSize = ensemble.size(); int numCoveredWriteQuorums = 0; @@ -354,8 +470,9 @@ private int getNumCoveredWriteQuorums(List ensemble, int wr Set racks = new HashSet(); for (int j = 0; j < writeQuorumSize; j++) { int bookieIdx = (i + j) % ensembleSize; - BookieSocketAddress addr = ensemble.get(bookieIdx); - String hostAddress = addr.getSocketAddress().getAddress().getHostAddress(); + BookieId addr = ensemble.get(bookieIdx); + String hostAddress = repp.bookieAddressResolver.resolve(addr) + .getSocketAddress().getAddress().getHostAddress(); String rack = "/" + hostAddress.charAt(hostAddress.length() - 1); racks.add(rack); } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestRackawarePolicyNotificationUpdates.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestRackawarePolicyNotificationUpdates.java index 7dc1d39d1d4..03ecc5b61d0 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestRackawarePolicyNotificationUpdates.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestRackawarePolicyNotificationUpdates.java @@ -22,9 +22,7 @@ import com.google.common.collect.Sets; import com.google.common.util.concurrent.ThreadFactoryBuilder; - import io.netty.util.HashedWheelTimer; - import java.net.InetAddress; import java.util.ArrayList; import java.util.Collections; @@ -33,10 +31,9 @@ import java.util.Optional; import java.util.Set; import java.util.concurrent.TimeUnit; - import junit.framework.TestCase; - import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.net.DNSToSwitchMapping; import org.apache.bookkeeper.net.NetworkTopology; @@ -73,7 +70,8 @@ protected void setUp() throws Exception { conf.getTimeoutTimerTickDurationMs(), TimeUnit.MILLISECONDS, conf.getTimeoutTimerNumTicks()); repp = new RackawareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional. empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional. empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); } @@ -98,18 +96,19 @@ public void testNotifyRackChange() throws Exception { int numOfAvailableRacks = 2; // Update cluster - Set addrs = Sets.newHashSet(addr1, addr2, addr3, addr4); + Set addrs = Sets.newHashSet(addr1.toBookieId(), + addr2.toBookieId(), addr3.toBookieId(), addr4.toBookieId()); repp.onClusterChanged(addrs, new HashSet<>()); int ensembleSize = 3; int writeQuorumSize = 2; int acqQuorumSize = 2; - List ensemble = repp.newEnsemble(ensembleSize, writeQuorumSize, acqQuorumSize, - Collections.emptyMap(), Collections.emptySet()); + List ensemble = repp.newEnsemble(ensembleSize, writeQuorumSize, + acqQuorumSize, Collections.emptyMap(), Collections.emptySet()).getResult(); int numCovered = TestRackawareEnsemblePlacementPolicy.getNumCoveredWriteQuorums(ensemble, writeQuorumSize, - conf.getMinNumRacksPerWriteQuorum()); + conf.getMinNumRacksPerWriteQuorum(), repp.bookieAddressResolver); assertTrue(numCovered >= 1 && numCovered < 3); - assertTrue(ensemble.contains(addr1)); + assertTrue(ensemble.contains(addr1.toBookieId())); List bookieAddressList = new ArrayList<>(); List rackList = new ArrayList<>(); @@ -119,10 +118,10 @@ public void testNotifyRackChange() throws Exception { numOfAvailableRacks = numOfAvailableRacks + 1; acqQuorumSize = 1; ensemble = repp.newEnsemble(ensembleSize, writeQuorumSize, acqQuorumSize, Collections.emptyMap(), - Collections.emptySet()); + Collections.emptySet()).getResult(); assertEquals(3, TestRackawareEnsemblePlacementPolicy.getNumCoveredWriteQuorums(ensemble, writeQuorumSize, - conf.getMinNumRacksPerWriteQuorum())); - assertTrue(ensemble.contains(addr1)); - assertTrue(ensemble.contains(addr2)); + conf.getMinNumRacksPerWriteQuorum(), repp.bookieAddressResolver)); + assertTrue(ensemble.contains(addr1.toBookieId())); + assertTrue(ensemble.contains(addr2.toBookieId())); } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestReadEntryListener.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestReadEntryListener.java index 7f28cc3e078..16a2616ca4d 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestReadEntryListener.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestReadEntryListener.java @@ -29,7 +29,7 @@ import java.util.Map; import java.util.concurrent.CountDownLatch; import org.apache.bookkeeper.client.BookKeeper.DigestType; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadEntryListener; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.junit.Test; @@ -230,8 +230,8 @@ private void readWithFailedBookiesTest(boolean parallelRead) throws Exception { LedgerHandle lh = bkc.openLedger(id, digestType, passwd); - List ensemble = - lh.getLedgerMetadata().getEnsemble(5); + List ensemble = + lh.getLedgerMetadata().getEnsembleAt(5); // kill two bookies killBookie(ensemble.get(0)); killBookie(ensemble.get(1)); @@ -269,8 +269,8 @@ private void readFailureWithFailedBookiesTest(boolean parallelRead) throws Excep LedgerHandle lh = bkc.openLedger(id, digestType, passwd); - List ensemble = - lh.getLedgerMetadata().getEnsemble(5); + List ensemble = + lh.getLedgerMetadata().getEnsembleAt(5); // kill bookies killBookie(ensemble.get(0)); killBookie(ensemble.get(1)); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestReadLastConfirmedAndEntry.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestReadLastConfirmedAndEntry.java index 7c2cf5b785f..843fa358724 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestReadLastConfirmedAndEntry.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestReadLastConfirmedAndEntry.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,12 +20,11 @@ */ package org.apache.bookkeeper.client; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; import io.netty.buffer.ByteBuf; - import java.io.IOException; import java.util.Arrays; import java.util.Collection; @@ -34,17 +33,16 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; - import org.apache.bookkeeper.bookie.Bookie; import org.apache.bookkeeper.bookie.BookieException; import org.apache.bookkeeper.bookie.InterleavedLedgerStorage; import org.apache.bookkeeper.bookie.LedgerStorage; import org.apache.bookkeeper.bookie.SortedLedgerStorage; +import org.apache.bookkeeper.bookie.TestBookieImpl; import org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorage; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; -import org.apache.zookeeper.KeeperException; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -78,13 +76,13 @@ public static Collection configs() { }); } - static class FakeBookie extends Bookie { + static class FakeBookie extends TestBookieImpl { final long expectedEntryToFail; final boolean stallOrRespondNull; public FakeBookie(ServerConfiguration conf, long expectedEntryToFail, boolean stallOrRespondNull) - throws InterruptedException, BookieException, KeeperException, IOException { + throws Exception { super(conf); this.expectedEntryToFail = expectedEntryToFail; this.stallOrRespondNull = stallOrRespondNull; @@ -92,7 +90,7 @@ public FakeBookie(ServerConfiguration conf, long expectedEntryToFail, boolean st @Override public ByteBuf readEntry(long ledgerId, long entryId) - throws IOException, NoLedgerException { + throws IOException, NoLedgerException, BookieException { if (entryId == expectedEntryToFail) { if (stallOrRespondNull) { try { @@ -125,8 +123,7 @@ public void testAdvancedLacWithEmptyResponse() throws Exception { for (int i = 0; i < numBookies; i++) { ServerConfiguration conf = newServerConfiguration(); Bookie b = new FakeBookie(conf, expectedEntryIdToFail, i != 0); - bs.add(startBookie(conf, b)); - bsConfs.add(conf); + startAndAddBookie(conf, b); } // create bookkeeper @@ -176,21 +173,21 @@ public void readLastConfirmedAndEntryComplete(int rc, long lastConfirmed, Ledger assertEquals(BKException.Code.OK, rcHolder.get()); } - static class SlowReadLacBookie extends Bookie { + static class SlowReadLacBookie extends TestBookieImpl { private final long lacToSlowRead; private final CountDownLatch readLatch; public SlowReadLacBookie(ServerConfiguration conf, long lacToSlowRead, CountDownLatch readLatch) - throws IOException, KeeperException, InterruptedException, BookieException { + throws Exception { super(conf); this.lacToSlowRead = lacToSlowRead; this.readLatch = readLatch; } @Override - public long readLastAddConfirmed(long ledgerId) throws IOException { + public long readLastAddConfirmed(long ledgerId) throws IOException, BookieException { long lac = super.readLastAddConfirmed(ledgerId); logger.info("Last Add Confirmed for ledger {} is {}", ledgerId, lac); if (lacToSlowRead == lac) { @@ -242,9 +239,7 @@ public void testRaceOnLastAddConfirmed() throws Exception { ServerConfiguration bsConf = killBookie(0); // start it with a slow bookie Bookie b = new SlowReadLacBookie(bsConf, lacToSlowRead, readLatch); - bs.add(startBookie(bsConf, b)); - bsConfs.add(bsConf); - + startAndAddBookie(bsConf, b); // create bookkeeper BookKeeper newBk = new BookKeeper(newConf); // create ledger @@ -291,4 +286,4 @@ public void testRaceOnLastAddConfirmed() throws Exception { newBk.close(); } -} +} \ No newline at end of file diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestReadLastConfirmedLongPoll.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestReadLastConfirmedLongPoll.java index d05f864b538..7633e953fc5 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestReadLastConfirmedLongPoll.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestReadLastConfirmedLongPoll.java @@ -25,7 +25,6 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; - import org.apache.bookkeeper.bookie.InterleavedLedgerStorage; import org.apache.bookkeeper.bookie.LedgerStorage; import org.apache.bookkeeper.bookie.SortedLedgerStorage; @@ -37,12 +36,14 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameters; - +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Test read last confirmed long by polling. */ @RunWith(Parameterized.class) public class TestReadLastConfirmedLongPoll extends BookKeeperClusterTestCase { + private static final Logger log = LoggerFactory.getLogger(TestReadLastConfirmedLongPoll.class); final DigestType digestType; public TestReadLastConfirmedLongPoll(Class storageClass) { @@ -153,7 +154,7 @@ public void testReadLACLongPollWhenSomeBookiesDown() throws Exception { ServerConfiguration[] confs = new ServerConfiguration[numEntries - 1]; for (int j = 0; j < numEntries - 1; j++) { int idx = (i + 1 + j) % numEntries; - confs[j] = killBookie(lh.getCurrentEnsemble().get(idx)); + confs[j] = killBookie(LedgerMetadataUtils.getLastEnsembleValue(lh.getLedgerMetadata()).get(idx)); } final AtomicBoolean entryAsExpected = new AtomicBoolean(false); @@ -186,8 +187,7 @@ public void readLastConfirmedComplete(int rc, long lastConfirmed, Object ctx) { // start the bookies for (ServerConfiguration conf : confs) { - bs.add(startBookie(conf)); - bsConfs.add(conf); + startAndAddBookie(conf); } } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestReadLastEntry.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestReadLastEntry.java index 35c0f3b69af..0748d30350f 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestReadLastEntry.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestReadLastEntry.java @@ -87,7 +87,7 @@ public void testTryReadLastEntryOnEmptyLedger() throws Exception { @Test public void testTryReadLastEntryAsync() throws Exception { final LedgerHandle lh = bkc.createLedger(1, 1, 1, digestType, "".getBytes()); - byte data[] = new byte[1024]; + byte[] data = new byte[1024]; Arrays.fill(data, (byte) 'x'); for (int j = 0; j < 100; j++) { data[1023] = Integer.valueOf(j).byteValue(); @@ -105,7 +105,7 @@ public void testTryReadLastEntryAsync() throws Exception { public void readComplete(int rc, LedgerHandle lh, Enumeration seq, Object ctx) { rcStore.set(rc); LedgerEntry entry = seq.nextElement(); - lastByteStore.set(Integer.valueOf(entry.getEntry()[1023])); + lastByteStore.set(entry.getEntry()[1023]); latch1.countDown(); } }, null); @@ -122,7 +122,7 @@ public void readComplete(int rc, LedgerHandle lh, Enumeration seq, @Test public void testTryReadLastEntrySync() throws Exception { final LedgerHandle lh = bkc.createLedger(1, 1, 1, digestType, "".getBytes()); - byte data[] = new byte[1024]; + byte[] data = new byte[1024]; Arrays.fill(data, (byte) 'x'); for (int j = 0; j < 100; j++) { data[1023] = Integer.valueOf(j).byteValue(); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestReadTimeout.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestReadTimeout.java index d244218faf9..160741b97d2 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestReadTimeout.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestReadTimeout.java @@ -24,10 +24,9 @@ import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicBoolean; - import org.apache.bookkeeper.client.AsyncCallback.AddCallback; import org.apache.bookkeeper.client.BookKeeper.DigestType; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.junit.Assert; import org.junit.Test; @@ -61,10 +60,10 @@ public void testReadTimeout() throws Exception { writelh.addEntry(tmp.getBytes()); } - Set beforeSet = new HashSet(); - beforeSet.addAll(writelh.getLedgerMetadata().getEnsemble(numEntries)); + Set beforeSet = new HashSet(); + beforeSet.addAll(writelh.getLedgerMetadata().getEnsembleAt(numEntries)); - final BookieSocketAddress bookieToSleep = writelh.getLedgerMetadata().getEnsemble(numEntries).get(0); + final BookieId bookieToSleep = writelh.getLedgerMetadata().getEnsembleAt(numEntries).get(0); int sleeptime = baseClientConf.getReadTimeout() * 3; CountDownLatch latch = sleepBookie(bookieToSleep, sleeptime); latch.await(); @@ -79,8 +78,8 @@ public void addComplete(int rc, LedgerHandle lh, Thread.sleep((baseClientConf.getReadTimeout() * 3) * 1000); Assert.assertTrue("Write request did not finish", completed.get()); - Set afterSet = new HashSet(); - afterSet.addAll(writelh.getLedgerMetadata().getEnsemble(numEntries + 1)); + Set afterSet = new HashSet(); + afterSet.addAll(writelh.getLedgerMetadata().getEnsembleAt(numEntries + 1)); beforeSet.removeAll(afterSet); Assert.assertTrue("Bookie set should not match", beforeSet.size() != 0); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestRegionAwareEnsemblePlacementPolicy.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestRegionAwareEnsemblePlacementPolicy.java index e541230e35b..8f2562763d3 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestRegionAwareEnsemblePlacementPolicy.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestRegionAwareEnsemblePlacementPolicy.java @@ -27,9 +27,19 @@ import static org.apache.bookkeeper.client.RegionAwareEnsemblePlacementPolicy.REPP_REGIONS_TO_WRITE; import static org.apache.bookkeeper.client.RoundRobinDistributionSchedule.writeSetFromValues; import static org.apache.bookkeeper.feature.SettableFeatureProvider.DISABLE_ALL; - +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +import com.google.common.collect.Sets; import com.google.common.util.concurrent.ThreadFactoryBuilder; import io.netty.util.HashedWheelTimer; +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; import java.net.InetAddress; import java.util.ArrayList; import java.util.HashMap; @@ -39,39 +49,45 @@ import java.util.Optional; import java.util.Set; import java.util.concurrent.TimeUnit; -import junit.framework.TestCase; import org.apache.bookkeeper.client.BKException.BKNotEnoughBookiesException; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.feature.FeatureProvider; import org.apache.bookkeeper.feature.SettableFeature; import org.apache.bookkeeper.feature.SettableFeatureProvider; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.net.DNSToSwitchMapping; import org.apache.bookkeeper.net.NetworkTopology; import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.util.BookKeeperConstants; import org.apache.bookkeeper.util.StaticDNSResolver; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledForJreRange; +import org.junit.jupiter.api.condition.JRE; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Test a region-aware ensemble placement policy. */ -public class TestRegionAwareEnsemblePlacementPolicy extends TestCase { +public class TestRegionAwareEnsemblePlacementPolicy { static final Logger LOG = LoggerFactory.getLogger(TestRegionAwareEnsemblePlacementPolicy.class); RegionAwareEnsemblePlacementPolicy repp; final ClientConfiguration conf = new ClientConfiguration(); - final ArrayList ensemble = new ArrayList(); + final ArrayList ensemble = new ArrayList(); DistributionSchedule.WriteSet writeSet = DistributionSchedule.NULL_WRITE_SET; - BookieSocketAddress addr1, addr2, addr3, addr4; + BookieSocketAddress addr1; + BookieSocketAddress addr2, addr3, addr4; HashedWheelTimer timer; static void updateMyRack(String rack) throws Exception { StaticDNSResolver.addNodeToRack(InetAddress.getLocalHost().getHostAddress(), rack); StaticDNSResolver.addNodeToRack(InetAddress.getLocalHost().getHostName(), rack); + StaticDNSResolver.addNodeToRack(InetAddress.getLocalHost().getCanonicalHostName(), rack); BookieSocketAddress bookieAddress = new BookieSocketAddress( InetAddress.getLocalHost().getHostAddress(), 0); StaticDNSResolver.addNodeToRack(bookieAddress.getSocketAddress().getHostName(), rack); @@ -80,9 +96,8 @@ static void updateMyRack(String rack) throws Exception { StaticDNSResolver.addNodeToRack("localhost", rack); } - @Override + @BeforeEach protected void setUp() throws Exception { - super.setUp(); StaticDNSResolver.reset(); updateMyRack(NetworkTopology.DEFAULT_REGION_AND_RACK); LOG.info("Set up static DNS Resolver."); @@ -97,10 +112,10 @@ protected void setUp() throws Exception { StaticDNSResolver.addNodeToRack(addr2.getHostName(), NetworkTopology.DEFAULT_REGION_AND_RACK); StaticDNSResolver.addNodeToRack(addr3.getHostName(), NetworkTopology.DEFAULT_REGION_AND_RACK); StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/r1/rack2"); - ensemble.add(addr1); - ensemble.add(addr2); - ensemble.add(addr3); - ensemble.add(addr4); + ensemble.add(addr1.toBookieId()); + ensemble.add(addr2.toBookieId()); + ensemble.add(addr3.toBookieId()); + ensemble.add(addr4.toBookieId()); writeSet = writeSetFromValues(0, 1, 2, 3); @@ -110,29 +125,29 @@ protected void setUp() throws Exception { conf.getTimeoutTimerNumTicks()); repp = new RegionAwareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); } - @Override + @AfterEach protected void tearDown() throws Exception { repp.uninitalize(); - super.tearDown(); } static BookiesHealthInfo getBookiesHealthInfo() { return getBookiesHealthInfo(new HashMap<>(), new HashMap<>()); } - static BookiesHealthInfo getBookiesHealthInfo(Map bookieFailureHistory, - Map bookiePendingRequests) { + static BookiesHealthInfo getBookiesHealthInfo(Map bookieFailureHistory, + Map bookiePendingRequests) { return new BookiesHealthInfo() { @Override - public long getBookieFailureHistory(BookieSocketAddress bookieSocketAddress) { + public long getBookieFailureHistory(BookieId bookieSocketAddress) { return bookieFailureHistory.getOrDefault(bookieSocketAddress, -1L); } @Override - public long getBookiePendingRequests(BookieSocketAddress bookieSocketAddress) { + public long getBookiePendingRequests(BookieId bookieSocketAddress) { return bookiePendingRequests.getOrDefault(bookieSocketAddress, 0L); } }; @@ -145,7 +160,8 @@ public void testNotReorderReadIfInDefaultRack() throws Exception { updateMyRack(NetworkTopology.DEFAULT_REGION_AND_RACK); repp = new RegionAwareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); DistributionSchedule.WriteSet origWriteSet = writeSet.copy(); DistributionSchedule.WriteSet reorderSet = repp.reorderReadSequence( @@ -159,17 +175,18 @@ public void testNodeInSameRegion() throws Exception { updateMyRack("/r1/rack3"); repp = new RegionAwareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); // make sure we've detected the right region assertEquals("r1", repp.myRegion); - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); DistributionSchedule.WriteSet reorderSet = repp.reorderReadSequence( ensemble, getBookiesHealthInfo(), writeSet.copy()); @@ -178,7 +195,7 @@ public void testNodeInSameRegion() throws Exception { LOG.info("reorder set : {}", reorderSet); LOG.info("expected set : {}", expectedSet); LOG.info("reorder equals {}", reorderSet.equals(writeSet)); - assertFalse(reorderSet.equals(writeSet)); + assertNotEquals(reorderSet, writeSet); assertEquals(expectedSet, reorderSet); } @@ -188,7 +205,8 @@ public void testNodeNotInSameRegions() throws Exception { updateMyRack("/r2/rack1"); repp = new RegionAwareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); DistributionSchedule.WriteSet origWriteSet = writeSet.copy(); DistributionSchedule.WriteSet reorderSet = repp.reorderReadSequence( @@ -203,24 +221,25 @@ public void testNodeDown() throws Exception { updateMyRack("/r1/rack1"); repp = new RegionAwareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); - addrs.remove(addr1); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + addrs.remove(addr1.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); DistributionSchedule.WriteSet origWriteSet = writeSet.copy(); DistributionSchedule.WriteSet reorderSet = repp.reorderReadSequence( ensemble, getBookiesHealthInfo(), writeSet); DistributionSchedule.WriteSet expectedSet = writeSetFromValues(3, 1, 2, 0); LOG.info("reorder set : {}", reorderSet); - assertFalse(reorderSet.equals(origWriteSet)); + assertNotEquals(reorderSet, origWriteSet); assertEquals(expectedSet, reorderSet); } @@ -230,18 +249,19 @@ public void testNodeReadOnly() throws Exception { updateMyRack("/r1/rack1"); repp = new RegionAwareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); - addrs.remove(addr1); - Set ro = new HashSet(); - ro.add(addr1); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + addrs.remove(addr1.toBookieId()); + Set ro = new HashSet(); + ro.add(addr1.toBookieId()); repp.onClusterChanged(addrs, ro); DistributionSchedule.WriteSet origWriteSet = writeSet.copy(); @@ -249,7 +269,7 @@ public void testNodeReadOnly() throws Exception { ensemble, getBookiesHealthInfo(), writeSet); DistributionSchedule.WriteSet expectedSet = writeSetFromValues(3, 1, 2, 0); LOG.info("reorder set : {}", reorderSet); - assertFalse(reorderSet.equals(origWriteSet)); + assertNotEquals(reorderSet, origWriteSet); assertEquals(expectedSet, reorderSet); } @@ -259,18 +279,19 @@ public void testNodeSlow() throws Exception { updateMyRack("/r1/rack1"); repp = new RegionAwareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); - repp.registerSlowBookie(addr1, 0L); - Map bookiePendingMap = new HashMap<>(); - bookiePendingMap.put(addr1, 1L); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + repp.registerSlowBookie(addr1.toBookieId(), 0L); + Map bookiePendingMap = new HashMap<>(); + bookiePendingMap.put(addr1.toBookieId(), 1L); repp.onClusterChanged(addrs, new HashSet<>()); DistributionSchedule.WriteSet origWriteSet = writeSet.copy(); @@ -278,7 +299,7 @@ public void testNodeSlow() throws Exception { ensemble, getBookiesHealthInfo(new HashMap<>(), bookiePendingMap), writeSet); DistributionSchedule.WriteSet expectedSet = writeSetFromValues(3, 1, 2, 0); LOG.info("reorder set : {}", reorderSet); - assertFalse(reorderSet.equals(origWriteSet)); + assertNotEquals(reorderSet, origWriteSet); assertEquals(expectedSet, reorderSet); } @@ -288,20 +309,21 @@ public void testTwoNodesSlow() throws Exception { updateMyRack("/r1/rack1"); repp = new RegionAwareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); - repp.registerSlowBookie(addr1, 0L); - repp.registerSlowBookie(addr2, 0L); - Map bookiePendingMap = new HashMap<>(); - bookiePendingMap.put(addr1, 1L); - bookiePendingMap.put(addr2, 2L); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + repp.registerSlowBookie(addr1.toBookieId(), 0L); + repp.registerSlowBookie(addr2.toBookieId(), 0L); + Map bookiePendingMap = new HashMap<>(); + bookiePendingMap.put(addr1.toBookieId(), 1L); + bookiePendingMap.put(addr2.toBookieId(), 2L); repp.onClusterChanged(addrs, new HashSet<>()); DistributionSchedule.WriteSet origWriteSet = writeSet.copy(); @@ -309,7 +331,7 @@ public void testTwoNodesSlow() throws Exception { ensemble, getBookiesHealthInfo(new HashMap<>(), bookiePendingMap), writeSet); DistributionSchedule.WriteSet expectedSet = writeSetFromValues(3, 2, 0, 1); LOG.info("reorder set : {}", reorderSet); - assertFalse(reorderSet.equals(origWriteSet)); + assertNotEquals(reorderSet, origWriteSet); assertEquals(expectedSet, reorderSet); } @@ -319,25 +341,26 @@ public void testTwoNodesDown() throws Exception { updateMyRack("/r1/rack1"); repp = new RegionAwareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); - addrs.remove(addr1); - addrs.remove(addr2); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + addrs.remove(addr1.toBookieId()); + addrs.remove(addr2.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); DistributionSchedule.WriteSet origWriteSet = writeSet.copy(); DistributionSchedule.WriteSet reorderSet = repp.reorderReadSequence( ensemble, getBookiesHealthInfo(), writeSet); DistributionSchedule.WriteSet expectedSet = writeSetFromValues(3, 2, 0, 1); LOG.info("reorder set : {}", reorderSet); - assertFalse(reorderSet.equals(origWriteSet)); + assertNotEquals(reorderSet, origWriteSet); assertEquals(expectedSet, reorderSet); } @@ -347,27 +370,28 @@ public void testNodeDownAndNodeSlow() throws Exception { updateMyRack("/r1/rack1"); repp = new RegionAwareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); - repp.registerSlowBookie(addr1, 0L); - Map bookiePendingMap = new HashMap<>(); - bookiePendingMap.put(addr1, 1L); - addrs.remove(addr2); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + repp.registerSlowBookie(addr1.toBookieId(), 0L); + Map bookiePendingMap = new HashMap<>(); + bookiePendingMap.put(addr1.toBookieId(), 1L); + addrs.remove(addr2.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); DistributionSchedule.WriteSet origWriteSet = writeSet.copy(); DistributionSchedule.WriteSet reorderSet = repp.reorderReadSequence( ensemble, getBookiesHealthInfo(new HashMap<>(), bookiePendingMap), writeSet); DistributionSchedule.WriteSet expectedSet = writeSetFromValues(3, 2, 0, 1); LOG.info("reorder set : {}", reorderSet); - assertFalse(reorderSet.equals(origWriteSet)); + assertNotEquals(reorderSet, origWriteSet); assertEquals(expectedSet, reorderSet); } @@ -377,22 +401,23 @@ public void testNodeDownAndReadOnlyAndNodeSlow() throws Exception { updateMyRack("/r1/rack1"); repp = new RegionAwareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); - addrs.remove(addr1); - addrs.remove(addr2); - Set ro = new HashSet<>(); - ro.add(addr2); - repp.registerSlowBookie(addr3, 0L); - Map bookiePendingMap = new HashMap<>(); - bookiePendingMap.put(addr3, 1L); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + addrs.remove(addr1.toBookieId()); + addrs.remove(addr2.toBookieId()); + Set ro = new HashSet<>(); + ro.add(addr2.toBookieId()); + repp.registerSlowBookie(addr3.toBookieId(), 0L); + Map bookiePendingMap = new HashMap<>(); + bookiePendingMap.put(addr3.toBookieId(), 1L); repp.onClusterChanged(addrs, ro); DistributionSchedule.WriteSet origWriteSet = writeSet.copy(); @@ -400,7 +425,7 @@ public void testNodeDownAndReadOnlyAndNodeSlow() throws Exception { ensemble, getBookiesHealthInfo(new HashMap<>(), bookiePendingMap), writeSet); DistributionSchedule.WriteSet expectedSet = writeSetFromValues(3, 1, 2, 0); LOG.info("reorder set : {}", reorderSet); - assertFalse(reorderSet.equals(origWriteSet)); + assertNotEquals(reorderSet, origWriteSet); assertEquals(expectedSet, reorderSet); } @@ -416,16 +441,16 @@ public void testReplaceBookieWithEnoughBookiesInSameRegion() throws Exception { StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/region1/r2"); StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/default-region/r3"); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); // replace node under r2 - BookieSocketAddress replacedBookie = repp.replaceBookie(1, 1, 1, null, new HashSet(), - addr2, new HashSet()); - assertEquals(addr3, replacedBookie); + BookieId replacedBookie = repp.replaceBookie(1, 1, 1, null, + new ArrayList(), addr2.toBookieId(), new HashSet()).getResult(); + assertEquals(addr3.toBookieId(), replacedBookie); } @Test @@ -440,20 +465,21 @@ public void testReplaceBookieWithEnoughBookiesInDifferentRegion() throws Excepti StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/region2/r3"); StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/region3/r4"); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); // replace node under r2 - Set excludedAddrs = new HashSet(); - excludedAddrs.add(addr1); - BookieSocketAddress replacedBookie = repp.replaceBookie(1, 1, 1, null, - new HashSet(), addr2, excludedAddrs); - - assertFalse(addr1.equals(replacedBookie)); - assertTrue(addr3.equals(replacedBookie) || addr4.equals(replacedBookie)); + Set excludedAddrs = new HashSet(); + excludedAddrs.add(addr1.toBookieId()); + BookieId replacedBookie = repp.replaceBookie(1, 1, 1, null, + new ArrayList(), addr2.toBookieId(), excludedAddrs).getResult(); + + assertNotEquals(addr1.toBookieId(), replacedBookie); + assertTrue(addr3.toBookieId().equals(replacedBookie) + || addr4.toBookieId().equals(replacedBookie)); } @Test @@ -468,14 +494,15 @@ public void testNewEnsembleBookieWithNotEnoughBookies() throws Exception { StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/region3/r3"); StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/region4/r4"); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); try { - List list = repp.newEnsemble(5, 5, 3, null, new HashSet()); + List list = repp.newEnsemble(5, 5, 3, null, + new HashSet()).getResult(); LOG.info("Ensemble : {}", list); fail("Should throw BKNotEnoughBookiesException when there is not enough bookies"); } catch (BKNotEnoughBookiesException bnebe) { @@ -483,6 +510,52 @@ public void testNewEnsembleBookieWithNotEnoughBookies() throws Exception { } } + @Test + @EnabledForJreRange(max = JRE.JAVA_11) + public void testNewEnsembleBookieWithOneEmptyRegion() throws Exception { + BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.2", 3181); + BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.3", 3181); + BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.4", 3181); + BookieSocketAddress addr4 = new BookieSocketAddress("127.0.0.5", 3181); + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), NetworkTopology.DEFAULT_REGION_AND_RACK); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/region2/r2"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/region3/r3"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/region4/r4"); + // Update cluster + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + + Field logField = repp.getClass().getDeclaredField("LOG"); + Logger mockLogger = mock(Logger.class); + + Field modifiers = Field.class.getDeclaredField("modifiers"); + modifiers.setAccessible(true); + modifiers.setInt(logField, logField.getModifiers() & ~Modifier.FINAL); + logField.setAccessible(true); + logField.set(null, mockLogger); + + repp.onClusterChanged(addrs, new HashSet()); + repp.newEnsemble(3, 3, 3, null, + new HashSet()).getResult(); + verify(mockLogger, times(0)).warn("Could not allocate {} bookies in region {}, try allocating {} bookies", + 1, "UnknownRegion", 0); + addrs = new HashSet(); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + + repp.newEnsemble(3, 3, 3, null, + new HashSet()).getResult(); + + verify(mockLogger, times(0)).warn("Could not allocate {} bookies in region {}, try allocating {} bookies", + 1, "UnknownRegion", 0); + } + @Test public void testReplaceBookieWithNotEnoughBookies() throws Exception { BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.2", 3181); @@ -495,19 +568,19 @@ public void testReplaceBookieWithNotEnoughBookies() throws Exception { StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/region3/r3"); StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/region4/r4"); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); // replace node under r2 - Set excludedAddrs = new HashSet(); - excludedAddrs.add(addr1); - excludedAddrs.add(addr3); - excludedAddrs.add(addr4); + Set excludedAddrs = new HashSet(); + excludedAddrs.add(addr1.toBookieId()); + excludedAddrs.add(addr3.toBookieId()); + excludedAddrs.add(addr4.toBookieId()); try { - repp.replaceBookie(1, 1, 1, null, new HashSet(), addr2, excludedAddrs); + repp.replaceBookie(1, 1, 1, null, new ArrayList(), addr2.toBookieId(), excludedAddrs); fail("Should throw BKNotEnoughBookiesException when there is not enough bookies"); } catch (BKNotEnoughBookiesException bnebe) { // should throw not enou @@ -518,7 +591,8 @@ public void testReplaceBookieWithNotEnoughBookies() throws Exception { public void testNewEnsembleWithSingleRegion() throws Exception { repp.uninitalize(); repp = new RegionAwareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.2", 3181); BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.3", 3181); BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.4", 3181); @@ -529,18 +603,18 @@ public void testNewEnsembleWithSingleRegion() throws Exception { StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/region1/r2"); StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/region1/r2"); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); try { - List ensemble = repp.newEnsemble(3, 2, 2, null, - new HashSet()); + List ensemble = repp.newEnsemble(3, 2, 2, null, + new HashSet()).getResult(); assertEquals(0, getNumCoveredRegionsInWriteQuorum(ensemble, 2)); - List ensemble2 = repp.newEnsemble(4, 2, 2, null, - new HashSet()); + List ensemble2 = repp.newEnsemble(4, 2, 2, null, + new HashSet()).getResult(); assertEquals(0, getNumCoveredRegionsInWriteQuorum(ensemble2, 2)); } catch (BKNotEnoughBookiesException bnebe) { fail("Should not get not enough bookies exception even there is only one rack."); @@ -551,7 +625,8 @@ public void testNewEnsembleWithSingleRegion() throws Exception { public void testNewEnsembleWithMultipleRegions() throws Exception { repp.uninitalize(); repp = new RegionAwareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.2", 3181); BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.3", 3181); BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.4", 3181); @@ -562,15 +637,15 @@ public void testNewEnsembleWithMultipleRegions() throws Exception { StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/region1/r2"); StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/region1/r2"); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); try { - List ensemble = repp.newEnsemble(3, 2, 2, null, - new HashSet()); + List ensemble = repp.newEnsemble(3, 2, 2, null, + new HashSet()).getResult(); int numCovered = getNumCoveredRegionsInWriteQuorum(ensemble, 2); assertTrue(numCovered >= 1); assertTrue(numCovered < 3); @@ -578,8 +653,8 @@ public void testNewEnsembleWithMultipleRegions() throws Exception { fail("Should not get not enough bookies exception even there is only one rack."); } try { - List ensemble2 = repp.newEnsemble(4, 2, 2, null, - new HashSet()); + List ensemble2 = repp.newEnsemble(4, 2, 2, null, + new HashSet()).getResult(); int numCovered = getNumCoveredRegionsInWriteQuorum(ensemble2, 2); assertTrue(numCovered >= 1 && numCovered < 3); } catch (BKNotEnoughBookiesException bnebe) { @@ -587,6 +662,64 @@ public void testNewEnsembleWithMultipleRegions() throws Exception { } } + @Test + public void testNewEnsembleWithPickDifferentRack() throws Exception { + ClientConfiguration clientConf = new ClientConfiguration(conf); + clientConf.setMinNumRacksPerWriteQuorum(2); + clientConf.setEnforceMinNumFaultDomainsForWrite(false); + repp.uninitalize(); + repp = new RegionAwareEnsemblePlacementPolicy(); + repp.initialize(clientConf, Optional.empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + + BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.1", 3181); + BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.2", 3181); + BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.3", 3181); + BookieSocketAddress addr4 = new BookieSocketAddress("127.0.0.4", 3181); + BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.5", 3181); + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/region-1/r1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/region-1/r1"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/region-1/r2"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/region-1/r3"); + StaticDNSResolver.addNodeToRack(addr5.getHostName(), "/region-2/r1"); + // Update cluster + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + + int ensembleSize = 4; + int writeQuorumSize = 4; + int ackQuorumSize = 2; + + Set excludeBookies = new HashSet<>(); + + for (int i = 0; i < 50; ++i) { + EnsemblePlacementPolicy.PlacementResult> ensembleResponse = + repp.newEnsemble(ensembleSize, writeQuorumSize, + ackQuorumSize, null, excludeBookies); + List ensemble = ensembleResponse.getResult(); + if (ensemble.contains(addr1.toBookieId()) && ensemble.contains(addr2.toBookieId())) { + fail("addr1 and addr2 is same rack."); + } + } + + //addr4 shutdown. + addrs.remove(addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + for (int i = 0; i < 50; ++i) { + EnsemblePlacementPolicy.PlacementResult> ensembleResponse = + repp.newEnsemble(ensembleSize, writeQuorumSize, + ackQuorumSize, null, excludeBookies); + List ensemble = ensembleResponse.getResult(); + assertTrue(ensemble.contains(addr1.toBookieId()) && ensemble.contains(addr2.toBookieId())); + } + } + @Test public void testNewEnsembleWithEnoughRegions() throws Exception { BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.2", 3181); @@ -607,33 +740,95 @@ public void testNewEnsembleWithEnoughRegions() throws Exception { StaticDNSResolver.addNodeToRack(addr7.getHostName(), "/region2/r13"); StaticDNSResolver.addNodeToRack(addr8.getHostName(), "/region3/r14"); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - addrs.add(addr5); - addrs.add(addr6); - addrs.add(addr7); - addrs.add(addr8); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + addrs.add(addr6.toBookieId()); + addrs.add(addr7.toBookieId()); + addrs.add(addr8.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); try { - List ensemble1 = repp.newEnsemble(3, 2, 2, null, - new HashSet()); + List ensemble1 = repp.newEnsemble(3, 2, 2, null, + new HashSet()).getResult(); assertEquals(3, getNumCoveredRegionsInWriteQuorum(ensemble1, 2)); - List ensemble2 = repp.newEnsemble(4, 2, 2, null, - new HashSet()); + List ensemble2 = repp.newEnsemble(4, 2, 2, null, + new HashSet()).getResult(); assertEquals(4, getNumCoveredRegionsInWriteQuorum(ensemble2, 2)); } catch (BKNotEnoughBookiesException bnebe) { fail("Should not get not enough bookies exception even there is only one rack."); } } + @Test + public void testNewEnsembleWithMultipleRacksWithCommonRack() throws Exception { + ClientConfiguration clientConf = new ClientConfiguration(conf); + clientConf.setEnforceMinNumRacksPerWriteQuorum(true); + clientConf.setMinNumRacksPerWriteQuorum(3); + repp.uninitalize(); + repp = new RegionAwareEnsemblePlacementPolicy(); + repp.initialize(clientConf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + + BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.1", 3181); + BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.2", 3181); + BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.3", 3181); + BookieSocketAddress addr4 = new BookieSocketAddress("127.0.0.4", 3181); + BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.5", 3181); + BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.6", 3181); + BookieSocketAddress addr7 = new BookieSocketAddress("127.0.0.7", 3181); + BookieSocketAddress addr8 = new BookieSocketAddress("127.0.0.8", 3181); + BookieSocketAddress addr9 = new BookieSocketAddress("127.0.0.9", 3181); + BookieSocketAddress addr10 = new BookieSocketAddress("127.0.0.10", 3181); + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/region1/r1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/region1/r1"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/region1/r1"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/region1/r1"); + StaticDNSResolver.addNodeToRack(addr5.getHostName(), "/region1/r1"); + StaticDNSResolver.addNodeToRack(addr6.getHostName(), "/region1/r1"); + StaticDNSResolver.addNodeToRack(addr7.getHostName(), "/region1/r2"); + StaticDNSResolver.addNodeToRack(addr8.getHostName(), "/region1/r3"); + StaticDNSResolver.addNodeToRack(addr9.getHostName(), "/region2/r1"); + StaticDNSResolver.addNodeToRack(addr10.getHostName(), "/region3/r1"); + // Update cluster + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + addrs.add(addr6.toBookieId()); + addrs.add(addr7.toBookieId()); + addrs.add(addr8.toBookieId()); + addrs.add(addr9.toBookieId()); + addrs.add(addr10.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + + try { + int ensembleSize = 10; + int writeQuorumSize = 10; + int ackQuorumSize = 2; + + for (int i = 0; i < 50; ++i) { + Set excludeBookies = new HashSet<>(); + EnsemblePlacementPolicy.PlacementResult> ensembleResponse = + repp.newEnsemble(ensembleSize, writeQuorumSize, + ackQuorumSize, null, excludeBookies); + } + } catch (Exception e) { + fail("RegionAwareEnsemblePlacementPolicy should newEnsemble succeed."); + } + } + @Test public void testNewEnsembleWithThreeRegions() throws Exception { repp.uninitalize(); repp = new RegionAwareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.2", 3181); BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.3", 3181); BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.4", 3181); @@ -656,38 +851,38 @@ public void testNewEnsembleWithThreeRegions() throws Exception { StaticDNSResolver.addNodeToRack(addr9.getHostName(), "/region2/r23"); StaticDNSResolver.addNodeToRack(addr10.getHostName(), "/region1/r24"); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - addrs.add(addr5); - addrs.add(addr6); - addrs.add(addr7); - addrs.add(addr8); - addrs.add(addr9); - addrs.add(addr10); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + addrs.add(addr6.toBookieId()); + addrs.add(addr7.toBookieId()); + addrs.add(addr8.toBookieId()); + addrs.add(addr9.toBookieId()); + addrs.add(addr10.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); try { - List ensemble = repp.newEnsemble(6, 6, 4, null, - new HashSet()); - assert(ensemble.contains(addr4)); - assert(ensemble.contains(addr8)); + List ensemble = repp.newEnsemble(6, 6, 4, null, + new HashSet()).getResult(); + assert(ensemble.contains(addr4.toBookieId())); + assert(ensemble.contains(addr8.toBookieId())); assert(ensemble.size() == 6); assertEquals(3, getNumRegionsInEnsemble(ensemble)); - ensemble = repp.newEnsemble(7, 7, 4, null, new HashSet()); - assert(ensemble.contains(addr4)); - assert(ensemble.contains(addr8)); + ensemble = repp.newEnsemble(7, 7, 4, null, new HashSet()).getResult(); + assert(ensemble.contains(addr4.toBookieId())); + assert(ensemble.contains(addr8.toBookieId())); assert(ensemble.size() == 7); assertEquals(3, getNumRegionsInEnsemble(ensemble)); - ensemble = repp.newEnsemble(8, 8, 5, null, new HashSet()); - assert(ensemble.contains(addr4)); - assert(ensemble.contains(addr8)); + ensemble = repp.newEnsemble(8, 8, 5, null, new HashSet()).getResult(); + assert(ensemble.contains(addr4.toBookieId())); + assert(ensemble.contains(addr8.toBookieId())); assert(ensemble.size() == 8); assertEquals(3, getNumRegionsInEnsemble(ensemble)); - ensemble = repp.newEnsemble(9, 9, 5, null, new HashSet()); - assert(ensemble.contains(addr4)); - assert(ensemble.contains(addr8)); + ensemble = repp.newEnsemble(9, 9, 5, null, new HashSet()).getResult(); + assert(ensemble.contains(addr4.toBookieId())); + assert(ensemble.contains(addr8.toBookieId())); assert(ensemble.size() == 9); assertEquals(3, getNumRegionsInEnsemble(ensemble)); } catch (BKNotEnoughBookiesException bnebe) { @@ -701,7 +896,8 @@ public void testNewEnsembleWithThreeRegionsWithDisable() throws Exception { repp.uninitalize(); repp = new RegionAwareEnsemblePlacementPolicy(); conf.setProperty(REPP_DISALLOW_BOOKIE_PLACEMENT_IN_REGION_FEATURE_NAME, "disallowBookies"); - repp.initialize(conf, Optional.empty(), timer, featureProvider, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, featureProvider, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.2", 3181); BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.3", 3181); BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.4", 3181); @@ -724,51 +920,50 @@ public void testNewEnsembleWithThreeRegionsWithDisable() throws Exception { StaticDNSResolver.addNodeToRack(addr9.getHostName(), "/region2/r23"); StaticDNSResolver.addNodeToRack(addr10.getHostName(), "/region1/r24"); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - addrs.add(addr5); - addrs.add(addr6); - addrs.add(addr7); - addrs.add(addr8); - addrs.add(addr9); - addrs.add(addr10); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + addrs.add(addr6.toBookieId()); + addrs.add(addr7.toBookieId()); + addrs.add(addr8.toBookieId()); + addrs.add(addr9.toBookieId()); + addrs.add(addr10.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); try { ((SettableFeature) featureProvider.scope("region1").getFeature("disallowBookies")).set(true); - List ensemble = repp.newEnsemble(6, 6, 4, null, - new HashSet()); + List ensemble = repp.newEnsemble(6, 6, 4, null, + new HashSet()).getResult(); assertEquals(2, getNumRegionsInEnsemble(ensemble)); - assert(ensemble.contains(addr1)); - assert(ensemble.contains(addr3)); - assert(ensemble.contains(addr4)); - assert(ensemble.contains(addr7)); - assert(ensemble.contains(addr8)); - assert(ensemble.contains(addr9)); + assert(ensemble.contains(addr1.toBookieId())); + assert(ensemble.contains(addr3.toBookieId())); + assert(ensemble.contains(addr4.toBookieId())); + assert(ensemble.contains(addr7.toBookieId())); + assert(ensemble.contains(addr8.toBookieId())); + assert(ensemble.contains(addr9.toBookieId())); assert(ensemble.size() == 6); } catch (BKNotEnoughBookiesException bnebe) { fail("Should not get not enough bookies exception even there is only one rack."); } try { ((SettableFeature) featureProvider.scope("region2").getFeature("disallowBookies")).set(true); - List ensemble = repp.newEnsemble(6, 6, 4, null, - new HashSet()); + repp.newEnsemble(6, 6, 4, null, new HashSet()); fail("Should get not enough bookies exception even there is only one region with insufficient bookies."); } catch (BKNotEnoughBookiesException bnebe) { // Expected } try { ((SettableFeature) featureProvider.scope("region2").getFeature("disallowBookies")).set(false); - List ensemble = repp.newEnsemble(6, 6, 4, null, - new HashSet()); - assert(ensemble.contains(addr1)); - assert(ensemble.contains(addr3)); - assert(ensemble.contains(addr4)); - assert(ensemble.contains(addr7)); - assert(ensemble.contains(addr8)); - assert(ensemble.contains(addr9)); + List ensemble = repp.newEnsemble(6, 6, 4, null, + new HashSet()).getResult(); + assert(ensemble.contains(addr1.toBookieId())); + assert(ensemble.contains(addr3.toBookieId())); + assert(ensemble.contains(addr4.toBookieId())); + assert(ensemble.contains(addr7.toBookieId())); + assert(ensemble.contains(addr8.toBookieId())); + assert(ensemble.contains(addr9.toBookieId())); assert(ensemble.size() == 6); assertEquals(2, getNumRegionsInEnsemble(ensemble)); } catch (BKNotEnoughBookiesException bnebe) { @@ -783,7 +978,8 @@ public void testNewEnsembleWithFiveRegions() throws Exception { repp = new RegionAwareEnsemblePlacementPolicy(); conf.setProperty(REPP_REGIONS_TO_WRITE, "region1;region2;region3;region4;region5"); conf.setProperty(REPP_MINIMUM_REGIONS_FOR_DURABILITY, 5); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); BookieSocketAddress addr1 = new BookieSocketAddress("127.1.0.2", 3181); BookieSocketAddress addr2 = new BookieSocketAddress("127.1.0.3", 3181); BookieSocketAddress addr3 = new BookieSocketAddress("127.1.0.4", 3181); @@ -816,27 +1012,27 @@ public void testNewEnsembleWithFiveRegions() throws Exception { StaticDNSResolver.addNodeToRack(addr14.getHostName(), "/region5/r34"); StaticDNSResolver.addNodeToRack(addr15.getHostName(), "/region5/r35"); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - addrs.add(addr5); - addrs.add(addr6); - addrs.add(addr7); - addrs.add(addr8); - addrs.add(addr9); - addrs.add(addr10); - addrs.add(addr11); - addrs.add(addr12); - addrs.add(addr13); - addrs.add(addr14); - addrs.add(addr15); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + addrs.add(addr6.toBookieId()); + addrs.add(addr7.toBookieId()); + addrs.add(addr8.toBookieId()); + addrs.add(addr9.toBookieId()); + addrs.add(addr10.toBookieId()); + addrs.add(addr11.toBookieId()); + addrs.add(addr12.toBookieId()); + addrs.add(addr13.toBookieId()); + addrs.add(addr14.toBookieId()); + addrs.add(addr15.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); try { - List ensemble = repp.newEnsemble(10, 10, 10, null, - new HashSet()); + List ensemble = repp.newEnsemble(10, 10, 10, null, + new HashSet()).getResult(); assert(ensemble.size() == 10); assertEquals(5, getNumRegionsInEnsemble(ensemble)); } catch (BKNotEnoughBookiesException bnebe) { @@ -845,10 +1041,11 @@ public void testNewEnsembleWithFiveRegions() throws Exception { } try { - Set excludedAddrs = new HashSet(); - excludedAddrs.add(addr10); - List ensemble = repp.newEnsemble(10, 10, 10, null, excludedAddrs); - assert(ensemble.contains(addr11) && ensemble.contains(addr12)); + Set excludedAddrs = new HashSet(); + excludedAddrs.add(addr10.toBookieId()); + List ensemble = repp.newEnsemble(10, 10, 10, null, + excludedAddrs).getResult(); + assert(ensemble.contains(addr11.toBookieId()) && ensemble.contains(addr12.toBookieId())); assert(ensemble.size() == 10); assertEquals(5, getNumRegionsInEnsemble(ensemble)); } catch (BKNotEnoughBookiesException bnebe) { @@ -890,7 +1087,8 @@ public void testEnsembleWithThreeRegionsReplaceInternal(int minDurability, boole } conf.setProperty(REPP_DISALLOW_BOOKIE_PLACEMENT_IN_REGION_FEATURE_NAME, "disallowBookies"); - repp.initialize(conf, Optional.empty(), timer, featureProvider, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, featureProvider, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); BookieSocketAddress addr1 = new BookieSocketAddress("127.1.0.2", 3181); BookieSocketAddress addr2 = new BookieSocketAddress("127.1.0.3", 3181); BookieSocketAddress addr3 = new BookieSocketAddress("127.1.0.4", 3181); @@ -912,17 +1110,17 @@ public void testEnsembleWithThreeRegionsReplaceInternal(int minDurability, boole StaticDNSResolver.addNodeToRack(addr9.getHostName(), "/region3/r23"); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - addrs.add(addr5); - addrs.add(addr6); - addrs.add(addr7); - addrs.add(addr8); - addrs.add(addr9); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + addrs.add(addr6.toBookieId()); + addrs.add(addr7.toBookieId()); + addrs.add(addr8.toBookieId()); + addrs.add(addr9.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); SettableFeature disableDurabilityFeature = (SettableFeature) featureProvider.getFeature( @@ -937,9 +1135,9 @@ public void testEnsembleWithThreeRegionsReplaceInternal(int minDurability, boole ackQuorum = 5; } - List ensemble; + List ensemble; try { - ensemble = repp.newEnsemble(6, 6, ackQuorum, null, new HashSet()); + ensemble = repp.newEnsemble(6, 6, ackQuorum, null, new HashSet()).getResult(); assert(ensemble.size() == 6); assertEquals(3, getNumRegionsInEnsemble(ensemble)); } catch (BKNotEnoughBookiesException bnebe) { @@ -950,19 +1148,18 @@ public void testEnsembleWithThreeRegionsReplaceInternal(int minDurability, boole if (disableOneRegion) { ((SettableFeature) featureProvider.scope("region2").getFeature("disallowBookies")).set(true); - Set region2Bookies = new HashSet(); - region2Bookies.add(addr4); - region2Bookies.add(addr5); - region2Bookies.add(addr6); - Set region1And3Bookies = new HashSet(addrs); + Set region2Bookies = new HashSet(); + region2Bookies.add(addr4.toBookieId()); + region2Bookies.add(addr5.toBookieId()); + region2Bookies.add(addr6.toBookieId()); + Set region1And3Bookies = new HashSet(addrs); region1And3Bookies.removeAll(region2Bookies); - Set excludedAddrs = new HashSet(); - for (BookieSocketAddress addr: region2Bookies) { + Set excludedAddrs = new HashSet(); + for (BookieId addr: region2Bookies) { if (ensemble.contains(addr)) { - BookieSocketAddress replacedBookie = repp.replaceBookie( - 6, 6, ackQuorum, null, - new HashSet<>(ensemble), addr, excludedAddrs); + BookieId replacedBookie = repp.replaceBookie(6, 6, ackQuorum, null, + ensemble, addr, excludedAddrs).getResult(); ensemble.remove(addr); ensemble.add(replacedBookie); } @@ -970,26 +1167,25 @@ public void testEnsembleWithThreeRegionsReplaceInternal(int minDurability, boole assertEquals(2, getNumRegionsInEnsemble(ensemble)); assertTrue(ensemble.containsAll(region1And3Bookies)); } else { - BookieSocketAddress bookieToReplace; - BookieSocketAddress replacedBookieExpected; - if (ensemble.contains(addr4)) { - bookieToReplace = addr4; - if (ensemble.contains(addr5)) { - replacedBookieExpected = addr6; + BookieId bookieToReplace; + BookieId replacedBookieExpected; + if (ensemble.contains(addr4.toBookieId())) { + bookieToReplace = addr4.toBookieId(); + if (ensemble.contains(addr5.toBookieId())) { + replacedBookieExpected = addr6.toBookieId(); } else { - replacedBookieExpected = addr5; + replacedBookieExpected = addr5.toBookieId(); } } else { - replacedBookieExpected = addr4; - bookieToReplace = addr5; + replacedBookieExpected = addr4.toBookieId(); + bookieToReplace = addr5.toBookieId(); } - Set excludedAddrs = new HashSet(); + Set excludedAddrs = new HashSet(); try { - BookieSocketAddress replacedBookie = repp.replaceBookie( - 6, 6, ackQuorum, null, - new HashSet<>(ensemble), bookieToReplace, excludedAddrs); - assert (replacedBookie.equals(replacedBookieExpected)); + BookieId replacedBookie = repp.replaceBookie(6, 6, ackQuorum, null, + ensemble, bookieToReplace, excludedAddrs).getResult(); + assertEquals(replacedBookieExpected, replacedBookie); assertEquals(3, getNumRegionsInEnsemble(ensemble)); } catch (BKNotEnoughBookiesException bnebe) { fail("Should not get not enough bookies exception even there is only one rack."); @@ -997,9 +1193,7 @@ public void testEnsembleWithThreeRegionsReplaceInternal(int minDurability, boole excludedAddrs.add(replacedBookieExpected); try { - BookieSocketAddress replacedBookie = repp.replaceBookie( - 6, 6, ackQuorum, null, - new HashSet<>(ensemble), bookieToReplace, excludedAddrs); + repp.replaceBookie(6, 6, ackQuorum, null, ensemble, bookieToReplace, excludedAddrs); if (minDurability > 1 && !disableDurabilityFeature.isAvailable()) { fail("Should throw BKNotEnoughBookiesException when there is not enough bookies"); } @@ -1033,7 +1227,8 @@ public void testEnsembleDurabilityDisabledInternal(int minDurability, boolean di conf.setProperty(REPP_ENABLE_DURABILITY_ENFORCEMENT_IN_REPLACE, true); } - repp.initialize(conf, Optional.empty(), timer, featureProvider, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, featureProvider, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); BookieSocketAddress addr1 = new BookieSocketAddress("127.1.0.2", 3181); BookieSocketAddress addr2 = new BookieSocketAddress("127.1.0.3", 3181); BookieSocketAddress addr3 = new BookieSocketAddress("127.1.0.4", 3181); @@ -1055,17 +1250,17 @@ public void testEnsembleDurabilityDisabledInternal(int minDurability, boolean di StaticDNSResolver.addNodeToRack(addr9.getHostName(), "/region1/r23"); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - addrs.add(addr5); - addrs.add(addr6); - addrs.add(addr7); - addrs.add(addr8); - addrs.add(addr9); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + addrs.add(addr6.toBookieId()); + addrs.add(addr7.toBookieId()); + addrs.add(addr8.toBookieId()); + addrs.add(addr9.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); if (disableDurability) { ((SettableFeature) featureProvider.getFeature( @@ -1073,9 +1268,9 @@ public void testEnsembleDurabilityDisabledInternal(int minDurability, boolean di .set(true); } - List ensemble; + List ensemble; try { - ensemble = repp.newEnsemble(6, 6, 4, null, new HashSet()); + ensemble = repp.newEnsemble(6, 6, 4, null, new HashSet()).getResult(); assert(ensemble.size() == 6); } catch (BKNotEnoughBookiesException bnebe) { LOG.error("BKNotEnoughBookiesException", bnebe); @@ -1083,12 +1278,10 @@ public void testEnsembleDurabilityDisabledInternal(int minDurability, boolean di throw bnebe; } - Set excludedAddrs = new HashSet(); + Set excludedAddrs = new HashSet(); try { - repp.replaceBookie( - 6, 6, 4, null, - new HashSet<>(ensemble), addr4, excludedAddrs); + repp.replaceBookie(6, 6, 4, null, ensemble, ensemble.get(2), excludedAddrs); } catch (BKNotEnoughBookiesException bnebe) { fail("Should not get not enough bookies exception even there is only one rack."); } @@ -1101,7 +1294,8 @@ public void testNewEnsembleFailWithFiveRegions() throws Exception { conf.setProperty(REPP_REGIONS_TO_WRITE, "region1;region2;region3;region4;region5"); conf.setProperty(REPP_MINIMUM_REGIONS_FOR_DURABILITY, 5); conf.setProperty(REPP_ENABLE_VALIDATION, false); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.2", 3181); BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.3", 3181); BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.4", 3181); @@ -1124,25 +1318,24 @@ public void testNewEnsembleFailWithFiveRegions() throws Exception { StaticDNSResolver.addNodeToRack(addr9.getHostName(), "/region5/r23"); StaticDNSResolver.addNodeToRack(addr10.getHostName(), "/region5/r24"); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - addrs.add(addr5); - addrs.add(addr6); - addrs.add(addr7); - addrs.add(addr8); - addrs.add(addr9); - addrs.add(addr10); - repp.onClusterChanged(addrs, new HashSet()); - - Set excludedAddrs = new HashSet(); - excludedAddrs.add(addr10); - excludedAddrs.add(addr9); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + addrs.add(addr6.toBookieId()); + addrs.add(addr7.toBookieId()); + addrs.add(addr8.toBookieId()); + addrs.add(addr9.toBookieId()); + addrs.add(addr10.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + + Set excludedAddrs = new HashSet(); + excludedAddrs.add(addr10.toBookieId()); + excludedAddrs.add(addr9.toBookieId()); try { - List list = repp.newEnsemble(5, 5, 5, null, excludedAddrs); - LOG.info("Ensemble : {}", list); + LOG.info("Ensemble : {}", repp.newEnsemble(5, 5, 5, null, excludedAddrs).getResult()); fail("Should throw BKNotEnoughBookiesException when there is not enough bookies"); } catch (BKNotEnoughBookiesException bnebe) { // should throw not enou @@ -1154,7 +1347,8 @@ private void prepareNetworkTopologyForReorderTests(String myRegion) throws Excep updateMyRack("/" + myRegion); repp = new RegionAwareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.2", 3181); BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.3", 3181); @@ -1176,17 +1370,17 @@ private void prepareNetworkTopologyForReorderTests(String myRegion) throws Excep StaticDNSResolver.addNodeToRack(addr8.getHostName(), "/region3/r2"); StaticDNSResolver.addNodeToRack(addr9.getHostName(), "/region3/r3"); // Update cluster - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - addrs.add(addr5); - addrs.add(addr6); - addrs.add(addr7); - addrs.add(addr8); - addrs.add(addr9); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + addrs.add(addr6.toBookieId()); + addrs.add(addr7.toBookieId()); + addrs.add(addr8.toBookieId()); + addrs.add(addr9.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); } @Test @@ -1201,8 +1395,8 @@ public void testBasicReorderReadLACSequenceWithLocalRegion() throws Exception { private void basicReorderReadSequenceWithLocalRegionTest(String myRegion, boolean isReadLAC) throws Exception { prepareNetworkTopologyForReorderTests(myRegion); - - List ensemble = repp.newEnsemble(9, 9, 5, null, new HashSet()); + List ensemble = repp.newEnsemble(9, 9, 5, null, + new HashSet()).getResult(); assertEquals(9, getNumCoveredRegionsInWriteQuorum(ensemble, 9)); DistributionSchedule ds = new RoundRobinDistributionSchedule(9, 9, 9); @@ -1229,18 +1423,22 @@ private void basicReorderReadSequenceWithLocalRegionTest(String myRegion, boolea // first few nodes less than REMOTE_NODE_IN_REORDER_SEQUENCE should be local region int k = 0; for (; k < RegionAwareEnsemblePlacementPolicy.REMOTE_NODE_IN_REORDER_SEQUENCE; k++) { - BookieSocketAddress address = ensemble.get(readSet.get(k)); - assertEquals(myRegion, StaticDNSResolver.getRegion(address.getHostName())); + BookieId address = ensemble.get(readSet.get(k)); + assertEquals(myRegion, StaticDNSResolver.getRegion(repp.bookieAddressResolver + .resolve(address).getHostName())); } - BookieSocketAddress remoteAddress = ensemble.get(readSet.get(k)); - assertFalse(myRegion.equals(StaticDNSResolver.getRegion(remoteAddress.getHostName()))); + BookieId remoteAddress = ensemble.get(readSet.get(k)); + assertNotEquals(myRegion, StaticDNSResolver.getRegion(repp.bookieAddressResolver + .resolve(remoteAddress).getHostName())); k++; - BookieSocketAddress localAddress = ensemble.get(readSet.get(k)); - assertEquals(myRegion, StaticDNSResolver.getRegion(localAddress.getHostName())); + BookieId localAddress = ensemble.get(readSet.get(k)); + assertEquals(myRegion, StaticDNSResolver.getRegion(repp.bookieAddressResolver + .resolve(localAddress).getHostName())); k++; for (; k < ensembleSize; k++) { - BookieSocketAddress address = ensemble.get(readSet.get(k)); - assertFalse(myRegion.equals(StaticDNSResolver.getRegion(address.getHostName()))); + BookieId address = ensemble.get(readSet.get(k)); + assertNotEquals(myRegion, StaticDNSResolver.getRegion(repp.bookieAddressResolver + .resolve(address).getHostName())); } } } @@ -1258,7 +1456,8 @@ public void testBasicReorderReadLACSequenceWithRemoteRegion() throws Exception { private void basicReorderReadSequenceWithRemoteRegionTest(String myRegion, boolean isReadLAC) throws Exception { prepareNetworkTopologyForReorderTests(myRegion); - List ensemble = repp.newEnsemble(9, 9, 5, null, new HashSet()); + List ensemble = repp.newEnsemble(9, 9, 5, null, + new HashSet()).getResult(); assertEquals(9, getNumCoveredRegionsInWriteQuorum(ensemble, 9)); DistributionSchedule ds = new RoundRobinDistributionSchedule(9, 9, 9); @@ -1296,10 +1495,11 @@ public void testReorderReadLACSequenceWithUnavailableOrReadOnlyBookies() throws reorderReadSequenceWithUnavailableOrReadOnlyBookiesTest(true); } - static Set getBookiesForRegion(List ensemble, String region) { - Set regionBookies = new HashSet(); - for (BookieSocketAddress address : ensemble) { - String r = StaticDNSResolver.getRegion(address.getHostName()); + private Set getBookiesForRegion(List ensemble, String region) { + Set regionBookies = new HashSet(); + for (BookieId address : ensemble) { + String r = StaticDNSResolver.getRegion(repp.bookieAddressResolver + .resolve(address).getHostName()); if (r.equals(region)) { regionBookies.add(address); } @@ -1307,13 +1507,14 @@ static Set getBookiesForRegion(List en return regionBookies; } - static void appendBookieIndexByRegion(List ensemble, + void appendBookieIndexByRegion(List ensemble, DistributionSchedule.WriteSet writeSet, String region, List finalSet) { for (int i = 0; i < writeSet.size(); i++) { int bi = writeSet.get(i); - String r = StaticDNSResolver.getRegion(ensemble.get(bi).getHostName()); + String r = StaticDNSResolver.getRegion(repp.bookieAddressResolver + .resolve(ensemble.get(bi)).getHostName()); if (r.equals(region)) { finalSet.add(bi); } @@ -1328,15 +1529,16 @@ private void reorderReadSequenceWithUnavailableOrReadOnlyBookiesTest(boolean isR prepareNetworkTopologyForReorderTests(myRegion); - List ensemble = repp.newEnsemble(9, 9, 5, null, new HashSet()); + List ensemble = repp.newEnsemble(9, 9, 5, null, + new HashSet()).getResult(); assertEquals(9, getNumCoveredRegionsInWriteQuorum(ensemble, 9)); DistributionSchedule ds = new RoundRobinDistributionSchedule(9, 9, 9); LOG.info("My region is {}, ensemble : {}", repp.myRegion, ensemble); - Set readOnlyBookies = getBookiesForRegion(ensemble, readOnlyRegion); - Set writeBookies = getBookiesForRegion(ensemble, writeRegion); + Set readOnlyBookies = getBookiesForRegion(ensemble, readOnlyRegion); + Set writeBookies = getBookiesForRegion(ensemble, writeRegion); repp.onClusterChanged(writeBookies, readOnlyBookies); @@ -1372,15 +1574,15 @@ ensemble, getBookiesHealthInfo(), } } - private int getNumRegionsInEnsemble(List ensemble) { + private int getNumRegionsInEnsemble(List ensemble) { Set regions = new HashSet(); - for (BookieSocketAddress addr: ensemble) { - regions.add(StaticDNSResolver.getRegion(addr.getHostName())); + for (BookieId addr: ensemble) { + regions.add(StaticDNSResolver.getRegion(repp.bookieAddressResolver.resolve(addr).getHostName())); } return regions.size(); } - private int getNumCoveredRegionsInWriteQuorum(List ensemble, int writeQuorumSize) + private int getNumCoveredRegionsInWriteQuorum(List ensemble, int writeQuorumSize) throws Exception { int ensembleSize = ensemble.size(); int numCoveredWriteQuorums = 0; @@ -1388,21 +1590,70 @@ private int getNumCoveredRegionsInWriteQuorum(List ensemble Set regions = new HashSet(); for (int j = 0; j < writeQuorumSize; j++) { int bookieIdx = (i + j) % ensembleSize; - BookieSocketAddress addr = ensemble.get(bookieIdx); - regions.add(StaticDNSResolver.getRegion(addr.getHostName())); + BookieId addr = ensemble.get(bookieIdx); + regions.add(StaticDNSResolver.getRegion(repp.bookieAddressResolver.resolve(addr).getHostName())); } numCoveredWriteQuorums += (regions.size() > 1 ? 1 : 0); } return numCoveredWriteQuorums; } + @Test + public void testRecoveryOnNodeFailure() throws Exception { + repp.uninitalize(); + repp = new RegionAwareEnsemblePlacementPolicy(); + repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.2", 3181); + BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.3", 3181); + BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.4", 3181); + BookieSocketAddress addr4 = new BookieSocketAddress("127.0.0.5", 3181); + BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.6", 3181); + BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.7", 3181); + + // Update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/region1/r1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/region1/r1"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/region2/r2"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/region2/r2"); + StaticDNSResolver.addNodeToRack(addr5.getHostName(), "/region3/r3"); + StaticDNSResolver.addNodeToRack(addr6.getHostName(), "/region3/r3"); + + // Update cluster + Set addrs = new HashSet<>(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + addrs.add(addr6.toBookieId()); + + repp.onClusterChanged(addrs, new HashSet<>()); + + Set bookiesLeftSet = new HashSet<>(); + bookiesLeftSet.add(addr1.toBookieId()); + repp.handleBookiesThatLeft(bookiesLeftSet); + + List currentEnsemble = new ArrayList<>(); + currentEnsemble.add(addr1.toBookieId()); + currentEnsemble.add(addr3.toBookieId()); + currentEnsemble.add(addr6.toBookieId()); + + EnsemblePlacementPolicy.PlacementResult placementResult = repp.replaceBookie(3, + 3, 2, null, + currentEnsemble, addr1.toBookieId(), new HashSet<>()); + + assertEquals(placementResult.getResult(), addr2.toBookieId()); + } + @Test public void testNodeWithFailures() throws Exception { repp.uninitalize(); updateMyRack("/r2/rack1"); repp = new RegionAwareEnsemblePlacementPolicy(); - repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, NullStatsLogger.INSTANCE); + repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.6", 3181); BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.7", 3181); @@ -1415,43 +1666,390 @@ public void testNodeWithFailures() throws Exception { StaticDNSResolver.addNodeToRack(addr6.getHostName(), "/r2/rack3"); StaticDNSResolver.addNodeToRack(addr7.getHostName(), "/r2/rack4"); StaticDNSResolver.addNodeToRack(addr8.getHostName(), "/r1/rack4"); - ensemble.add(addr5); - ensemble.add(addr6); - ensemble.add(addr7); - ensemble.add(addr8); + ensemble.add(addr5.toBookieId()); + ensemble.add(addr6.toBookieId()); + ensemble.add(addr7.toBookieId()); + ensemble.add(addr8.toBookieId()); DistributionSchedule.WriteSet writeSet2 = writeSetFromValues(0, 1, 2, 3, 4, 5, 6, 7); - Set addrs = new HashSet(); - addrs.add(addr1); - addrs.add(addr2); - addrs.add(addr3); - addrs.add(addr4); - addrs.add(addr5); - addrs.add(addr6); - addrs.add(addr7); - addrs.add(addr8); - repp.onClusterChanged(addrs, new HashSet()); + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + addrs.add(addr6.toBookieId()); + addrs.add(addr7.toBookieId()); + addrs.add(addr8.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); - HashMap bookieFailures = new HashMap(); + HashMap bookieFailures = new HashMap(); - bookieFailures.put(addr1, 20L); - bookieFailures.put(addr2, 22L); - bookieFailures.put(addr3, 24L); - bookieFailures.put(addr4, 25L); + bookieFailures.put(addr1.toBookieId(), 20L); + bookieFailures.put(addr2.toBookieId(), 22L); + bookieFailures.put(addr3.toBookieId(), 24L); + bookieFailures.put(addr4.toBookieId(), 25L); LOG.info("write set : {}", writeSet2); - DistributionSchedule.WriteSet reoderSet = repp.reorderReadSequence( + DistributionSchedule.WriteSet reorderSet = repp.reorderReadSequence( ensemble, getBookiesHealthInfo(bookieFailures, new HashMap<>()), writeSet2); - LOG.info("reorder set : {}", reoderSet); - assertEquals(ensemble.get(reoderSet.get(0)), addr6); - assertEquals(ensemble.get(reoderSet.get(1)), addr7); - assertEquals(ensemble.get(reoderSet.get(2)), addr5); - assertEquals(ensemble.get(reoderSet.get(3)), addr2); - assertEquals(ensemble.get(reoderSet.get(4)), addr3); - assertEquals(ensemble.get(reoderSet.get(5)), addr8); - assertEquals(ensemble.get(reoderSet.get(6)), addr1); - assertEquals(ensemble.get(reoderSet.get(7)), addr4); + LOG.info("reorder set : {}", reorderSet); + assertEquals(ensemble.get(reorderSet.get(0)), addr6.toBookieId()); + assertEquals(ensemble.get(reorderSet.get(1)), addr7.toBookieId()); + assertEquals(ensemble.get(reorderSet.get(2)), addr5.toBookieId()); + assertEquals(ensemble.get(reorderSet.get(3)), addr2.toBookieId()); + assertEquals(ensemble.get(reorderSet.get(4)), addr3.toBookieId()); + assertEquals(ensemble.get(reorderSet.get(5)), addr8.toBookieId()); + assertEquals(ensemble.get(reorderSet.get(6)), addr1.toBookieId()); + assertEquals(ensemble.get(reorderSet.get(7)), addr4.toBookieId()); + } + + @Test + public void testNewEnsembleSetWithFiveRegions() throws Exception { + repp.uninitalize(); + repp = new RegionAwareEnsemblePlacementPolicy(); + repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.2", 3181); + BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.3", 3181); + BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.4", 3181); + BookieSocketAddress addr4 = new BookieSocketAddress("127.0.0.5", 3181); + BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.6", 3181); + + // Update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/region1/r1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/region2/r2"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/region3/r3"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/region4/r4"); + StaticDNSResolver.addNodeToRack(addr5.getHostName(), "/region5/r5"); + + // Update cluster + Set addrs = new HashSet<>(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + + repp.onClusterChanged(addrs, new HashSet<>()); + try { + List ensemble1 = repp.newEnsemble(3, 3, 2, + null, new HashSet<>()).getResult(); + assertEquals(ensemble1.size(), 3); + List ensemble2 = repp.newEnsemble(3, 3, 2, + null, new HashSet<>()).getResult(); + ensemble1.retainAll(ensemble2); + assert(!ensemble1.isEmpty()); + + List ensemble3 = repp.newEnsemble(3, 3, 2, + null, new HashSet<>()).getResult(); + ensemble2.removeAll(ensemble3); + assert(!ensemble2.isEmpty()); + } catch (BKNotEnoughBookiesException bnebe) { + fail("Should not get not enough bookies exception even there is only one rack."); + } + } + + @Test + public void testRegionsWithDiskWeight() throws Exception { + repp.uninitalize(); + repp = new RegionAwareEnsemblePlacementPolicy(); + conf.setProperty(REPP_ENABLE_VALIDATION, false); + conf.setDiskWeightBasedPlacementEnabled(true); + repp.initialize(conf, Optional.empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.2", 3181); + BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.3", 3181); + BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.4", 3181); + BookieSocketAddress addr4 = new BookieSocketAddress("127.0.0.5", 3181); + BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.6", 3181); + + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/region1/r1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/region2/r3"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/region3/r11"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/region4/r13"); + StaticDNSResolver.addNodeToRack(addr5.getHostName(), "/region5/r23"); + // Update cluster + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + + repp.onClusterChanged(addrs, new HashSet()); + + List ensemble = repp.newEnsemble(3, 3, 2, null, + new HashSet<>()).getResult(); + + assertEquals(3, ensemble.size()); + } + + @Test + public void testNotifyRackChangeWithOldRegion() throws Exception { + BookieSocketAddress addr1 = new BookieSocketAddress("127.0.1.1", 3181); + BookieSocketAddress addr2 = new BookieSocketAddress("127.0.1.2", 3181); + BookieSocketAddress addr3 = new BookieSocketAddress("127.0.1.3", 3181); + BookieSocketAddress addr4 = new BookieSocketAddress("127.0.1.4", 3181); + + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/region1/rack-1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/region1/rack-1"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/region2/rack-1"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/region2/rack-1"); + + // Update cluster + Set addrs = Sets.newHashSet(addr1.toBookieId(), + addr2.toBookieId(), addr3.toBookieId(), addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet<>()); + + assertEquals(4, repp.knownBookies.size()); + assertEquals("/region1/rack-1", repp.knownBookies.get(addr1.toBookieId()).getNetworkLocation()); + assertEquals("/region1/rack-1", repp.knownBookies.get(addr2.toBookieId()).getNetworkLocation()); + assertEquals("/region2/rack-1", repp.knownBookies.get(addr3.toBookieId()).getNetworkLocation()); + assertEquals("/region2/rack-1", repp.knownBookies.get(addr4.toBookieId()).getNetworkLocation()); + + assertEquals(2, repp.perRegionPlacement.size()); + TopologyAwareEnsemblePlacementPolicy region1Placement = repp.perRegionPlacement.get("region1"); + assertEquals(2, region1Placement.knownBookies.keySet().size()); + assertEquals("/region1/rack-1", region1Placement.knownBookies.get(addr1.toBookieId()).getNetworkLocation()); + assertEquals("/region1/rack-1", region1Placement.knownBookies.get(addr2.toBookieId()).getNetworkLocation()); + + TopologyAwareEnsemblePlacementPolicy region2Placement = repp.perRegionPlacement.get("region2"); + assertEquals(2, region2Placement.knownBookies.keySet().size()); + assertEquals("/region2/rack-1", region2Placement.knownBookies.get(addr3.toBookieId()).getNetworkLocation()); + assertEquals("/region2/rack-1", region2Placement.knownBookies.get(addr4.toBookieId()).getNetworkLocation()); + + assertEquals("region1", repp.address2Region.get(addr1.toBookieId())); + assertEquals("region1", repp.address2Region.get(addr2.toBookieId())); + assertEquals("region2", repp.address2Region.get(addr3.toBookieId())); + assertEquals("region2", repp.address2Region.get(addr4.toBookieId())); + + // Update the rack. + // change addr2 rack info. /region1/rack-1 -> /region1/rack-2. + // change addr4 rack info. /region2/rack-1 -> /region1/rack-2 + List bookieAddressList = new ArrayList<>(); + List rackList = new ArrayList<>(); + bookieAddressList.add(addr2); + rackList.add("/region1/rack-2"); + bookieAddressList.add(addr4); + rackList.add("/region1/rack-2"); + StaticDNSResolver.changeRack(bookieAddressList, rackList); + + assertEquals(4, repp.knownBookies.size()); + assertEquals("/region1/rack-1", repp.knownBookies.get(addr1.toBookieId()).getNetworkLocation()); + assertEquals("/region1/rack-2", repp.knownBookies.get(addr2.toBookieId()).getNetworkLocation()); + assertEquals("/region2/rack-1", repp.knownBookies.get(addr3.toBookieId()).getNetworkLocation()); + assertEquals("/region1/rack-2", repp.knownBookies.get(addr4.toBookieId()).getNetworkLocation()); + + assertEquals(2, repp.perRegionPlacement.size()); + region1Placement = repp.perRegionPlacement.get("region1"); + assertEquals(3, region1Placement.knownBookies.keySet().size()); + assertEquals("/region1/rack-1", region1Placement.knownBookies.get(addr1.toBookieId()).getNetworkLocation()); + assertEquals("/region1/rack-2", region1Placement.knownBookies.get(addr2.toBookieId()).getNetworkLocation()); + assertEquals("/region1/rack-2", region1Placement.knownBookies.get(addr4.toBookieId()).getNetworkLocation()); + + region2Placement = repp.perRegionPlacement.get("region2"); + assertEquals(1, region2Placement.knownBookies.keySet().size()); + assertEquals("/region2/rack-1", region2Placement.knownBookies.get(addr3.toBookieId()).getNetworkLocation()); + + assertEquals("region1", repp.address2Region.get(addr1.toBookieId())); + assertEquals("region1", repp.address2Region.get(addr2.toBookieId())); + assertEquals("region2", repp.address2Region.get(addr3.toBookieId())); + assertEquals("region1", repp.address2Region.get(addr4.toBookieId())); + } + + @Test + public void testNotifyRackChangeWithNewRegion() throws Exception { + BookieSocketAddress addr1 = new BookieSocketAddress("127.0.1.1", 3181); + BookieSocketAddress addr2 = new BookieSocketAddress("127.0.1.2", 3181); + BookieSocketAddress addr3 = new BookieSocketAddress("127.0.1.3", 3181); + BookieSocketAddress addr4 = new BookieSocketAddress("127.0.1.4", 3181); + + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/region1/rack-1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/region1/rack-1"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/region2/rack-1"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/region2/rack-1"); + + // Update cluster + Set addrs = Sets.newHashSet(addr1.toBookieId(), + addr2.toBookieId(), addr3.toBookieId(), addr4.toBookieId()); + repp.onClusterChanged(addrs, new HashSet<>()); + + assertEquals(4, repp.knownBookies.size()); + assertEquals("/region1/rack-1", repp.knownBookies.get(addr1.toBookieId()).getNetworkLocation()); + assertEquals("/region1/rack-1", repp.knownBookies.get(addr2.toBookieId()).getNetworkLocation()); + assertEquals("/region2/rack-1", repp.knownBookies.get(addr3.toBookieId()).getNetworkLocation()); + assertEquals("/region2/rack-1", repp.knownBookies.get(addr4.toBookieId()).getNetworkLocation()); + + assertEquals(2, repp.perRegionPlacement.size()); + TopologyAwareEnsemblePlacementPolicy region1Placement = repp.perRegionPlacement.get("region1"); + assertEquals(2, region1Placement.knownBookies.keySet().size()); + assertEquals("/region1/rack-1", region1Placement.knownBookies.get(addr1.toBookieId()).getNetworkLocation()); + assertEquals("/region1/rack-1", region1Placement.knownBookies.get(addr2.toBookieId()).getNetworkLocation()); + + TopologyAwareEnsemblePlacementPolicy region2Placement = repp.perRegionPlacement.get("region2"); + assertEquals(2, region2Placement.knownBookies.keySet().size()); + assertEquals("/region2/rack-1", region2Placement.knownBookies.get(addr3.toBookieId()).getNetworkLocation()); + assertEquals("/region2/rack-1", region2Placement.knownBookies.get(addr4.toBookieId()).getNetworkLocation()); + + assertEquals("region1", repp.address2Region.get(addr1.toBookieId())); + assertEquals("region1", repp.address2Region.get(addr2.toBookieId())); + assertEquals("region2", repp.address2Region.get(addr3.toBookieId())); + assertEquals("region2", repp.address2Region.get(addr4.toBookieId())); + + // Update the rack. + // change addr2 rack info. /region1/rack-1 -> /region3/rack-1. + // change addr4 rack info. /region2/rack-1 -> /region3/rack-1 + List bookieAddressList = new ArrayList<>(); + List rackList = new ArrayList<>(); + bookieAddressList.add(addr2); + rackList.add("/region3/rack-1"); + bookieAddressList.add(addr4); + rackList.add("/region3/rack-1"); + StaticDNSResolver.changeRack(bookieAddressList, rackList); + + assertEquals(4, repp.knownBookies.size()); + assertEquals("/region1/rack-1", repp.knownBookies.get(addr1.toBookieId()).getNetworkLocation()); + assertEquals("/region3/rack-1", repp.knownBookies.get(addr2.toBookieId()).getNetworkLocation()); + assertEquals("/region2/rack-1", repp.knownBookies.get(addr3.toBookieId()).getNetworkLocation()); + assertEquals("/region3/rack-1", repp.knownBookies.get(addr4.toBookieId()).getNetworkLocation()); + + assertEquals(3, repp.perRegionPlacement.size()); + region1Placement = repp.perRegionPlacement.get("region1"); + assertEquals(1, region1Placement.knownBookies.keySet().size()); + assertEquals("/region1/rack-1", region1Placement.knownBookies.get(addr1.toBookieId()).getNetworkLocation()); + + region2Placement = repp.perRegionPlacement.get("region2"); + assertEquals(1, region2Placement.knownBookies.keySet().size()); + assertEquals("/region2/rack-1", region2Placement.knownBookies.get(addr3.toBookieId()).getNetworkLocation()); + + TopologyAwareEnsemblePlacementPolicy region3Placement = repp.perRegionPlacement.get("region3"); + assertEquals(2, region3Placement.knownBookies.keySet().size()); + assertEquals("/region3/rack-1", region3Placement.knownBookies.get(addr2.toBookieId()).getNetworkLocation()); + assertEquals("/region3/rack-1", region3Placement.knownBookies.get(addr4.toBookieId()).getNetworkLocation()); + + assertEquals("region1", repp.address2Region.get(addr1.toBookieId())); + assertEquals("region3", repp.address2Region.get(addr2.toBookieId())); + assertEquals("region2", repp.address2Region.get(addr3.toBookieId())); + assertEquals("region3", repp.address2Region.get(addr4.toBookieId())); } + + @Test + public void testNewEnsemblePickLocalRegionBookies() + throws Exception { + repp.uninitalize(); + BookieSocketAddress addr1 = new BookieSocketAddress("127.0.0.10", 3181); + BookieSocketAddress addr2 = new BookieSocketAddress("127.0.0.2", 3181); + BookieSocketAddress addr3 = new BookieSocketAddress("127.0.0.3", 3181); + BookieSocketAddress addr4 = new BookieSocketAddress("127.0.0.4", 3181); + BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.5", 3181); + BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.6", 3181); + BookieSocketAddress addr7 = new BookieSocketAddress("127.0.0.7", 3181); + BookieSocketAddress addr8 = new BookieSocketAddress("127.0.0.8", 3181); + BookieSocketAddress addr9 = new BookieSocketAddress("127.0.0.9", 3181); + + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/region1/r1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/region2/r2"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/region2/r2"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/region2/r2"); + StaticDNSResolver.addNodeToRack(addr5.getHostName(), "/region3/r3"); + StaticDNSResolver.addNodeToRack(addr6.getHostName(), "/region4/r4"); + StaticDNSResolver.addNodeToRack(addr7.getHostName(), "/region5/r5"); + StaticDNSResolver.addNodeToRack(addr8.getHostName(), "/region1/r2"); + StaticDNSResolver.addNodeToRack(addr9.getHostName(), "/region1/r2"); + + + updateMyRack("/region1/r2"); + repp = new RegionAwareEnsemblePlacementPolicy(); + repp.initialize(conf, Optional.empty(), timer, + DISABLE_ALL, NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + repp.withDefaultRack(NetworkTopology.DEFAULT_REGION_AND_RACK); + // Update cluster + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + addrs.add(addr6.toBookieId()); + addrs.add(addr7.toBookieId()); + addrs.add(addr8.toBookieId()); + addrs.add(addr9.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + + int ensembleSize = 3; + int writeQuorumSize = 3; + int ackQuorumSize = 2; + + Set excludeBookies = new HashSet<>(); + + int bookie1Count = 0; + int bookie8Count = 0; + int bookie9Count = 0; + for (int i = 0; i < 100; ++i) { + EnsemblePlacementPolicy.PlacementResult> ensembleResponse = + repp.newEnsemble(ensembleSize, writeQuorumSize, + ackQuorumSize, null, excludeBookies); + List ensemble = ensembleResponse.getResult(); + if (ensemble.contains(addr1.toBookieId())) { + bookie1Count++; + } + if (ensemble.contains(addr8.toBookieId())) { + bookie8Count++; + } + if (ensemble.contains(addr9.toBookieId())) { + bookie9Count++; + } + + if (!ensemble.contains(addr8.toBookieId()) && !ensemble.contains(addr9.toBookieId())) { + fail("Failed to select bookie located on the same region and rack with bookie client"); + } + if (ensemble.contains(addr2.toBookieId()) && ensemble.contains(addr3.toBookieId())) { + fail("addr2 and addr3 is same rack."); + } + } + LOG.info("Bookie1 Count: {}, Bookie8 Count: {}, Bookie9 Count: {}", bookie1Count, bookie8Count, bookie9Count); + + //shutdown all the bookies located in the same region and rack with local node + // to test new ensemble should contain addr1 + addrs.remove(addr8.toBookieId()); + addrs.remove(addr9.toBookieId()); + repp.onClusterChanged(addrs, new HashSet()); + bookie1Count = 0; + bookie8Count = 0; + bookie9Count = 0; + for (int i = 0; i < 100; ++i) { + try { + EnsemblePlacementPolicy.PlacementResult> ensembleResponse = + repp.newEnsemble(ensembleSize, writeQuorumSize, + ackQuorumSize, null, excludeBookies); + List ensemble = ensembleResponse.getResult(); + if (ensemble.contains(addr1.toBookieId())) { + bookie1Count++; + } + if (ensemble.contains(addr8.toBookieId())) { + bookie8Count++; + } + if (ensemble.contains(addr9.toBookieId())) { + bookie9Count++; + } + if (!ensemble.contains(addr1.toBookieId())) { + fail("Failed to select bookie located on the same region with bookie client"); + } + if (ensemble.contains(addr8.toBookieId()) || ensemble.contains(addr9.toBookieId())) { + fail("Selected the shutdown bookies"); + } + } catch (BKNotEnoughBookiesException e) { + fail("Failed to select the ensemble."); + } + } + LOG.info("Bookie1 Count: {}, Bookie8 Count: {}, Bookie9 Count: {}", bookie1Count, bookie8Count, bookie9Count); + + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestSequenceRead.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestSequenceRead.java index 283cf759161..1570599f628 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestSequenceRead.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestSequenceRead.java @@ -22,16 +22,8 @@ import static org.junit.Assert.assertEquals; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.SortedMap; -import java.util.TreeMap; -import java.util.concurrent.CountDownLatch; - +import com.google.common.collect.Lists; import org.apache.bookkeeper.client.BookKeeper.DigestType; -import org.apache.bookkeeper.net.BookieSocketAddress; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.junit.Test; import org.slf4j.Logger; @@ -44,53 +36,29 @@ public class TestSequenceRead extends BookKeeperClusterTestCase { private static final Logger logger = LoggerFactory.getLogger(TestSequenceRead.class); - final DigestType digestType; - final byte[] passwd = "sequence-read".getBytes(); - public TestSequenceRead() { super(5); - this.digestType = DigestType.CRC32; } - private LedgerHandle createLedgerWithDuplicatedBookies() throws Exception { - final LedgerHandle lh = bkc.createLedger(3, 3, 3, digestType, passwd); + private long createLedgerWithDuplicatedBookies() throws Exception { + long ledgerId = 12345L; // introduce duplicated bookies in an ensemble. - SortedMap> ensembles = lh.getLedgerMetadata().getEnsembles(); - TreeMap> newEnsembles = new TreeMap<>(); - for (Map.Entry> entry : ensembles.entrySet()) { - List newList = new ArrayList(entry.getValue().size()); - BookieSocketAddress firstBookie = entry.getValue().get(0); - for (BookieSocketAddress ignored : entry.getValue()) { - newList.add(firstBookie); - } - newEnsembles.put(entry.getKey(), newList); - } - lh.getLedgerMetadata().setEnsembles(newEnsembles); - // update the ledger metadata with duplicated bookies - final CountDownLatch latch = new CountDownLatch(1); - bkc.getLedgerManager().writeLedgerMetadata(lh.getId(), lh.getLedgerMetadata(), - new BookkeeperInternalCallbacks.GenericCallback() { - @Override - public void operationComplete(int rc, LedgerMetadata result) { - if (BKException.Code.OK == rc) { - latch.countDown(); - } else { - logger.error("Error on writing ledger metadata for ledger {} : ", lh.getId(), - BKException.getMessage(rc)); - } - } - }); - latch.await(); - logger.info("Update ledger metadata with duplicated bookies for ledger {}.", lh.getId()); - return lh; + LedgerMetadataBuilder builder = LedgerMetadataBuilder.create() + .withId(ledgerId).withEnsembleSize(3).withWriteQuorumSize(3).withAckQuorumSize(3) + .newEnsembleEntry(0L, Lists.newArrayList(getBookie(0), getBookie(0), getBookie(0))); + ClientUtil.setupLedger(bkc.getLedgerManager(), ledgerId, builder); + + logger.info("Update ledger metadata with duplicated bookies for ledger {}.", ledgerId); + return ledgerId; } @Test public void testSequenceReadOnDuplicatedBookies() throws Exception { - final LedgerHandle lh = createLedgerWithDuplicatedBookies(); + final long ledgerId = createLedgerWithDuplicatedBookies(); // should be able to open the ledger even it has duplicated bookies - final LedgerHandle readLh = bkc.openLedger(lh.getId(), digestType, passwd); + final LedgerHandle readLh = bkc.openLedger( + ledgerId, DigestType.fromApiDigestType(ClientUtil.DIGEST_TYPE), ClientUtil.PASSWD); assertEquals(LedgerHandle.INVALID_ENTRY_ID, readLh.getLastAddConfirmed()); } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestSpeculativeBatchRead.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestSpeculativeBatchRead.java new file mode 100644 index 00000000000..21b65c5d8f7 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestSpeculativeBatchRead.java @@ -0,0 +1,399 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.client; + +import static org.apache.bookkeeper.client.BookKeeperClientStats.CLIENT_SCOPE; +import static org.apache.bookkeeper.client.BookKeeperClientStats.SPECULATIVE_READ_COUNT; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.util.BitSet; +import java.util.Enumeration; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import org.apache.bookkeeper.bookie.LocalBookieEnsemblePlacementPolicy; +import org.apache.bookkeeper.client.AsyncCallback.ReadCallback; +import org.apache.bookkeeper.client.BookKeeper.DigestType; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.apache.bookkeeper.test.TestStatsProvider; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This unit test tests ledger fencing. + * + */ +public class TestSpeculativeBatchRead extends BookKeeperClusterTestCase { + private static final Logger LOG = LoggerFactory.getLogger(TestSpeculativeBatchRead.class); + + private final DigestType digestType; + byte[] passwd = "specPW".getBytes(); + + public TestSpeculativeBatchRead() { + super(10); + this.digestType = DigestType.CRC32; + } + + long getLedgerToRead(int ensemble, int quorum) throws Exception { + byte[] data = "Data for test".getBytes(); + LedgerHandle l = bkc.createLedger(ensemble, quorum, digestType, passwd); + for (int i = 0; i < 10; i++) { + l.addEntry(data); + } + l.close(); + + return l.getId(); + } + + @SuppressWarnings("deprecation") + BookKeeperTestClient createClient(int specTimeout) throws Exception { + ClientConfiguration conf = new ClientConfiguration() + .setSpeculativeReadTimeout(specTimeout) + .setReadTimeout(30000) + .setUseV2WireProtocol(true) + .setReorderReadSequenceEnabled(true) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + return new BookKeeperTestClient(conf, new TestStatsProvider()); + } + + class LatchCallback implements ReadCallback { + CountDownLatch l = new CountDownLatch(1); + boolean success = false; + long startMillis = System.currentTimeMillis(); + long endMillis = Long.MAX_VALUE; + + public void readComplete(int rc, + LedgerHandle lh, + Enumeration seq, + Object ctx) { + endMillis = System.currentTimeMillis(); + if (LOG.isDebugEnabled()) { + LOG.debug("Got response {} {}", rc, getDuration()); + } + success = rc == BKException.Code.OK; + l.countDown(); + } + + long getDuration() { + return endMillis - startMillis; + } + + void expectSuccess(int milliseconds) throws Exception { + boolean await = l.await(milliseconds, TimeUnit.MILLISECONDS); + System.out.println(await); + } + + void expectFail(int milliseconds) throws Exception { + assertTrue(l.await(milliseconds, TimeUnit.MILLISECONDS)); + assertFalse(success); + } + + void expectTimeout(int milliseconds) throws Exception { + assertFalse(l.await(milliseconds, TimeUnit.MILLISECONDS)); + } + } + + /** + * Test basic speculative functionality. + * - Create 2 clients with read timeout disabled, one with spec + * read enabled, the other not. + * - create ledger + * - sleep second bookie in ensemble + * - read first entry, both should find on first bookie. + * - read second bookie, spec client should find on bookie three, + * non spec client should hang. + */ + @Test + public void testSpeculativeRead() throws Exception { + long id = getLedgerToRead(3, 2); + BookKeeperTestClient bknospec = createClient(0); // disabled + BookKeeperTestClient bkspec = createClient(2000); + + LedgerHandle lnospec = bknospec.openLedger(id, digestType, passwd); + LedgerHandle lspec = bkspec.openLedger(id, digestType, passwd); + + // sleep second bookie + CountDownLatch sleepLatch = new CountDownLatch(1); + BookieId second = lnospec.getLedgerMetadata().getAllEnsembles().get(0L).get(1); + sleepBookie(second, sleepLatch); + + try { + // read first entry, both go to first bookie, should be fine + LatchCallback nospeccb = new LatchCallback(); + LatchCallback speccb = new LatchCallback(); + lnospec.asyncBatchReadEntries(0, 1, 1024, nospeccb, null); + lspec.asyncBatchReadEntries(0, 1, 1024, speccb, null); + nospeccb.expectSuccess(2000); + speccb.expectSuccess(2000); + + // read second entry, both look for second book, spec read client + // tries third bookie, nonspec client hangs as read timeout is very long. + nospeccb = new LatchCallback(); + speccb = new LatchCallback(); + lnospec.asyncReadEntries(1, 1, nospeccb, null); + lspec.asyncReadEntries(1, 1, speccb, null); + speccb.expectSuccess(4000); + nospeccb.expectTimeout(4000); + // Check that the second bookie is registered as slow at entryId 1 + RackawareEnsemblePlacementPolicy rep = (RackawareEnsemblePlacementPolicy) bkspec.getPlacementPolicy(); + assertTrue(rep.slowBookies.asMap().size() == 1); + + assertTrue( + "Stats should not reflect speculative reads if disabled", + bknospec.getTestStatsProvider() + .getCounter(CLIENT_SCOPE + "." + SPECULATIVE_READ_COUNT).get() == 0); + assertTrue( + "Stats should reflect speculative reads", + bkspec.getTestStatsProvider() + .getCounter(CLIENT_SCOPE + "." + SPECULATIVE_READ_COUNT).get() > 0); + } finally { + sleepLatch.countDown(); + lspec.close(); + lnospec.close(); + bkspec.close(); + bknospec.close(); + } + } + + /** + * Test that if more than one replica is down, we can still read, as long as the quorum + * size is larger than the number of down replicas. + */ + @Test + public void testSpeculativeReadMultipleReplicasDown() throws Exception { + long id = getLedgerToRead(5, 5); + int timeout = 5000; + BookKeeper bkspec = createClient(timeout); + + LedgerHandle l = bkspec.openLedger(id, digestType, passwd); + + // sleep bookie 1, 2 & 4 + CountDownLatch sleepLatch = new CountDownLatch(1); + sleepBookie(l.getLedgerMetadata().getAllEnsembles().get(0L).get(1), sleepLatch); + sleepBookie(l.getLedgerMetadata().getAllEnsembles().get(0L).get(2), sleepLatch); + sleepBookie(l.getLedgerMetadata().getAllEnsembles().get(0L).get(4), sleepLatch); + + try { + // read first entry, should complete faster than timeout + // as bookie 0 has the entry + LatchCallback latch0 = new LatchCallback(); + l.asyncBatchReadEntries(0, 1, 1024, latch0, null); + latch0.expectSuccess(timeout / 2); + + // second should have to hit two timeouts (bookie 1 & 2) + // bookie 3 has the entry + LatchCallback latch1 = new LatchCallback(); + l.asyncBatchReadEntries(1, 1, 1024, latch1, null); + latch1.expectTimeout(timeout); + latch1.expectSuccess(timeout * 2); + LOG.info("Timeout {} latch1 duration {}", timeout, latch1.getDuration()); + assertTrue("should have taken longer than two timeouts, but less than 3", + latch1.getDuration() >= timeout * 2 + && latch1.getDuration() < timeout * 3); + + // bookies 1 & 2 should be registered as slow bookies because of speculative reads + Set expectedSlowBookies = new HashSet<>(); + expectedSlowBookies.add(l.getLedgerMetadata().getAllEnsembles().get(0L).get(1)); + expectedSlowBookies.add(l.getLedgerMetadata().getAllEnsembles().get(0L).get(2)); + assertEquals(((RackawareEnsemblePlacementPolicy) bkspec.getPlacementPolicy()).slowBookies.asMap().keySet(), + expectedSlowBookies); + + // third should not hit timeouts since bookies 1 & 2 are registered as slow + // bookie 3 has the entry + LatchCallback latch2 = new LatchCallback(); + l.asyncBatchReadEntries(2, 1, 1024, latch2, null); + latch2.expectSuccess(timeout); + + // fourth should have no timeout + // bookie 3 has the entry + LatchCallback latch3 = new LatchCallback(); + l.asyncBatchReadEntries(3, 1, 1024, latch3, null); + latch3.expectSuccess(timeout / 2); + + // fifth should hit one timeout, (bookie 4) + // bookie 0 has the entry + LatchCallback latch4 = new LatchCallback(); + l.asyncBatchReadEntries(4, 1, 1024, latch4, null); + latch4.expectTimeout(timeout / 2); + latch4.expectSuccess(timeout); + LOG.info("Timeout {} latch4 duration {}", timeout, latch4.getDuration()); + assertTrue("should have taken longer than one timeout, but less than 2", + latch4.getDuration() >= timeout + && latch4.getDuration() < timeout * 2); + } finally { + sleepLatch.countDown(); + l.close(); + bkspec.close(); + } + } + + /** + * Test that if after a speculative read is kicked off, the original read completes + * nothing bad happens. + */ + @Test + public void testSpeculativeReadFirstReadCompleteIsOk() throws Exception { + long id = getLedgerToRead(2, 2); + int timeout = 1000; + BookKeeper bkspec = createClient(timeout); + + LedgerHandle l = bkspec.openLedger(id, digestType, passwd); + + // sleep bookies + CountDownLatch sleepLatch0 = new CountDownLatch(1); + CountDownLatch sleepLatch1 = new CountDownLatch(1); + sleepBookie(l.getLedgerMetadata().getAllEnsembles().get(0L).get(0), sleepLatch0); + sleepBookie(l.getLedgerMetadata().getAllEnsembles().get(0L).get(1), sleepLatch1); + + try { + // read goes to first bookie, spec read timeout occurs, + // goes to second + LatchCallback latch0 = new LatchCallback(); + l.asyncBatchReadEntries(0, 1, 1024, latch0, null); + latch0.expectTimeout(timeout); + + // wake up first bookie + sleepLatch0.countDown(); + latch0.expectSuccess(timeout / 2); + + sleepLatch1.countDown(); + + // check we can read next entry without issue + LatchCallback latch1 = new LatchCallback(); + l.asyncBatchReadEntries(1, 1, 1024, latch1, null); + latch1.expectSuccess(timeout / 2); + } finally { + sleepLatch0.countDown(); + sleepLatch1.countDown(); + l.close(); + bkspec.close(); + } + } + + /** + * Unit test to check if the scheduled speculative task gets cancelled + * on successful read. + */ + @Test + public void testSpeculativeReadScheduledTaskCancel() throws Exception { + long id = getLedgerToRead(3, 2); + int timeout = 1000; + BookKeeper bkspec = createClient(timeout); + LedgerHandle l = bkspec.openLedger(id, digestType, passwd); + BatchedReadOp op = null; + try { + op = new BatchedReadOp(l, bkspec.getClientCtx(), 0, 5, 5120, false); + op.initiate(); + op.future().get(); + } finally { + assertNull("Speculative Read tasks must be null", op.getSpeculativeTask()); + } + } + + /** + * Unit test for the speculative read scheduling method. + */ + @Test + public void testSpeculativeReadScheduling() throws Exception { + long id = getLedgerToRead(3, 2); + int timeout = 1000; + BookKeeper bkspec = createClient(timeout); + + LedgerHandle l = bkspec.openLedger(id, digestType, passwd); + + List ensemble = l.getLedgerMetadata().getAllEnsembles().get(0L); + BitSet allHosts = new BitSet(ensemble.size()); + for (int i = 0; i < ensemble.size(); i++) { + allHosts.set(i, true); + } + BitSet noHost = new BitSet(ensemble.size()); + BitSet secondHostOnly = new BitSet(ensemble.size()); + secondHostOnly.set(1, true); + BatchedReadOp.LedgerEntryRequest req0 = null, req2 = null, req4 = null; + try { + BatchedReadOp op = new BatchedReadOp(l, bkspec.getClientCtx(), 0, 5, 5120, false); + // if we've already heard from all hosts, + // we only send the initial read + req0 = op.new SequenceReadRequest(ensemble, l.getId(), 0, 1, 1024); + assertTrue("Should have sent to first", + req0.maybeSendSpeculativeRead(allHosts).equals(ensemble.get(0))); + assertNull("Should not have sent another", + req0.maybeSendSpeculativeRead(allHosts)); + + // if we have heard from some hosts, but not one we have sent to + // send again + req2 = op.new SequenceReadRequest(ensemble, l.getId(), 2, 1, 1024); + assertTrue("Should have sent to third", + req2.maybeSendSpeculativeRead(noHost).equals(ensemble.get(2))); + assertTrue("Should have sent to first", + req2.maybeSendSpeculativeRead(secondHostOnly).equals(ensemble.get(0))); + + // if we have heard from some hosts, which includes one we sent to + // do not read again + req4 = op.new SequenceReadRequest(ensemble, l.getId(), 4, 1, 1024); + assertTrue("Should have sent to second", + req4.maybeSendSpeculativeRead(noHost).equals(ensemble.get(1))); + assertNull("Should not have sent another", + req4.maybeSendSpeculativeRead(secondHostOnly)); + } finally { + for (BatchedReadOp.LedgerEntryRequest req + : new BatchedReadOp.LedgerEntryRequest[] { req0, req2, req4 }) { + if (req != null) { + int i = 0; + while (!req.isComplete()) { + if (i++ > 10) { + break; // wait for up to 10 seconds + } + Thread.sleep(1000); + } + assertTrue("Request should be done", req.isComplete()); + } + } + + l.close(); + bkspec.close(); + } + } + + @Test + public void testSequenceReadLocalEnsemble() throws Exception { + ClientConfiguration conf = new ClientConfiguration() + .setSpeculativeReadTimeout(1000) + .setEnsemblePlacementPolicy(LocalBookieEnsemblePlacementPolicy.class) + .setReorderReadSequenceEnabled(true) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + BookKeeper bkspec = new BookKeeperTestClient(conf, new TestStatsProvider()); + LedgerHandle l = bkspec.createLedger(1, 1, digestType, passwd); + List ensemble = l.getLedgerMetadata().getAllEnsembles().get(0L); + BatchedReadOp op = new BatchedReadOp(l, bkspec.getClientCtx(), 0, 5, 5120, false); + BatchedReadOp.LedgerEntryRequest req0 = op.new SequenceReadRequest(ensemble, l.getId(), 0, 1, 1024); + assertNotNull(req0.writeSet); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestSpeculativeRead.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestSpeculativeRead.java index 5d251a954a9..9db20a6f86a 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestSpeculativeRead.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestSpeculativeRead.java @@ -24,6 +24,7 @@ import static org.apache.bookkeeper.client.BookKeeperClientStats.SPECULATIVE_READ_COUNT; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; @@ -34,11 +35,11 @@ import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; - +import org.apache.bookkeeper.bookie.LocalBookieEnsemblePlacementPolicy; import org.apache.bookkeeper.client.AsyncCallback.ReadCallback; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.conf.ClientConfiguration; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.apache.bookkeeper.test.TestStatsProvider; import org.junit.Test; @@ -77,7 +78,6 @@ BookKeeperTestClient createClient(int specTimeout) throws Exception { .setSpeculativeReadTimeout(specTimeout) .setReadTimeout(30000) .setReorderReadSequenceEnabled(true) - .setEnsemblePlacementPolicySlowBookies(true) .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); return new BookKeeperTestClient(conf, new TestStatsProvider()); } @@ -93,7 +93,9 @@ public void readComplete(int rc, Enumeration seq, Object ctx) { endMillis = System.currentTimeMillis(); - LOG.debug("Got response {} {}", rc, getDuration()); + if (LOG.isDebugEnabled()) { + LOG.debug("Got response {} {}", rc, getDuration()); + } success = rc == BKException.Code.OK; l.countDown(); } @@ -138,7 +140,7 @@ public void testSpeculativeRead() throws Exception { // sleep second bookie CountDownLatch sleepLatch = new CountDownLatch(1); - BookieSocketAddress second = lnospec.getLedgerMetadata().getEnsembles().get(0L).get(1); + BookieId second = lnospec.getLedgerMetadata().getAllEnsembles().get(0L).get(1); sleepBookie(second, sleepLatch); try { @@ -194,9 +196,9 @@ public void testSpeculativeReadMultipleReplicasDown() throws Exception { // sleep bookie 1, 2 & 4 CountDownLatch sleepLatch = new CountDownLatch(1); - sleepBookie(l.getLedgerMetadata().getEnsembles().get(0L).get(1), sleepLatch); - sleepBookie(l.getLedgerMetadata().getEnsembles().get(0L).get(2), sleepLatch); - sleepBookie(l.getLedgerMetadata().getEnsembles().get(0L).get(4), sleepLatch); + sleepBookie(l.getLedgerMetadata().getAllEnsembles().get(0L).get(1), sleepLatch); + sleepBookie(l.getLedgerMetadata().getAllEnsembles().get(0L).get(2), sleepLatch); + sleepBookie(l.getLedgerMetadata().getAllEnsembles().get(0L).get(4), sleepLatch); try { // read first entry, should complete faster than timeout @@ -217,9 +219,9 @@ public void testSpeculativeReadMultipleReplicasDown() throws Exception { && latch1.getDuration() < timeout * 3); // bookies 1 & 2 should be registered as slow bookies because of speculative reads - Set expectedSlowBookies = new HashSet<>(); - expectedSlowBookies.add(l.getLedgerMetadata().getEnsembles().get(0L).get(1)); - expectedSlowBookies.add(l.getLedgerMetadata().getEnsembles().get(0L).get(2)); + Set expectedSlowBookies = new HashSet<>(); + expectedSlowBookies.add(l.getLedgerMetadata().getAllEnsembles().get(0L).get(1)); + expectedSlowBookies.add(l.getLedgerMetadata().getAllEnsembles().get(0L).get(2)); assertEquals(((RackawareEnsemblePlacementPolicy) bkspec.getPlacementPolicy()).slowBookies.asMap().keySet(), expectedSlowBookies); @@ -268,8 +270,8 @@ public void testSpeculativeReadFirstReadCompleteIsOk() throws Exception { // sleep bookies CountDownLatch sleepLatch0 = new CountDownLatch(1); CountDownLatch sleepLatch1 = new CountDownLatch(1); - sleepBookie(l.getLedgerMetadata().getEnsembles().get(0L).get(0), sleepLatch0); - sleepBookie(l.getLedgerMetadata().getEnsembles().get(0L).get(1), sleepLatch1); + sleepBookie(l.getLedgerMetadata().getAllEnsembles().get(0L).get(0), sleepLatch0); + sleepBookie(l.getLedgerMetadata().getAllEnsembles().get(0L).get(1), sleepLatch1); try { // read goes to first bookie, spec read timeout occurs, @@ -297,6 +299,26 @@ public void testSpeculativeReadFirstReadCompleteIsOk() throws Exception { } } + /** + * Unit test to check if the scheduled speculative task gets cancelled + * on successful read. + */ + @Test + public void testSpeculativeReadScheduledTaskCancel() throws Exception { + long id = getLedgerToRead(3, 2); + int timeout = 1000; + BookKeeper bkspec = createClient(timeout); + LedgerHandle l = bkspec.openLedger(id, digestType, passwd); + PendingReadOp op = null; + try { + op = new PendingReadOp(l, bkspec.getClientCtx(), 0, 5, false); + op.initiate(); + op.future().get(); + } finally { + assertNull("Speculative Read tasks must be null", op.getSpeculativeTask()); + } + } + /** * Unit test for the speculative read scheduling method. */ @@ -308,7 +330,7 @@ public void testSpeculativeReadScheduling() throws Exception { LedgerHandle l = bkspec.openLedger(id, digestType, passwd); - List ensemble = l.getLedgerMetadata().getEnsembles().get(0L); + List ensemble = l.getLedgerMetadata().getAllEnsembles().get(0L); BitSet allHosts = new BitSet(ensemble.size()); for (int i = 0; i < ensemble.size(); i++) { allHosts.set(i, true); @@ -353,7 +375,7 @@ public void testSpeculativeReadScheduling() throws Exception { } Thread.sleep(1000); } - assertTrue("Request should be done", req0.isComplete()); + assertTrue("Request should be done", req.isComplete()); } } @@ -361,4 +383,20 @@ public void testSpeculativeReadScheduling() throws Exception { bkspec.close(); } } + + @Test + public void testSequenceReadLocalEnsemble() throws Exception { + ClientConfiguration conf = new ClientConfiguration() + .setSpeculativeReadTimeout(1000) + .setEnsemblePlacementPolicy(LocalBookieEnsemblePlacementPolicy.class) + .setReorderReadSequenceEnabled(true) + .setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + try (BookKeeper bkc = new BookKeeperTestClient(conf, new TestStatsProvider())) { + LedgerHandle l = bkc.createLedger(1, 1, digestType, passwd); + List ensemble = l.getLedgerMetadata().getAllEnsembles().get(0L); + PendingReadOp op = new PendingReadOp(l, bkc.getClientCtx(), 0, 5, false); + PendingReadOp.LedgerEntryRequest req0 = op.new SequenceReadRequest(ensemble, l.getId(), 0); + assertNotNull(req0.writeSet); + } + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestTryReadLastConfirmed.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestTryReadLastConfirmed.java index a4a63fa6c8a..18d070ebada 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestTryReadLastConfirmed.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestTryReadLastConfirmed.java @@ -25,7 +25,6 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; - import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; @@ -144,8 +143,7 @@ public void readLastConfirmedComplete(int rc, long lastConfirmed, Object ctx) { // start the bookies for (ServerConfiguration conf : confs) { - bs.add(startBookie(conf)); - bsConfs.add(conf); + startAndAddBookie(conf); } } lh.close(); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestWatchEnsembleChange.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestWatchEnsembleChange.java index 456839a6320..18c783327c3 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestWatchEnsembleChange.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestWatchEnsembleChange.java @@ -24,6 +24,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import com.google.common.collect.Lists; import com.google.common.util.concurrent.UncheckedExecutionException; import java.nio.ByteBuffer; import java.util.Arrays; @@ -33,17 +34,19 @@ import java.util.concurrent.TimeUnit; import lombok.Cleanup; import org.apache.bookkeeper.client.BookKeeper.DigestType; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.meta.HierarchicalLedgerManagerFactory; import org.apache.bookkeeper.meta.LedgerIdGenerator; import org.apache.bookkeeper.meta.LedgerManager; import org.apache.bookkeeper.meta.LedgerManagerFactory; import org.apache.bookkeeper.meta.LongHierarchicalLedgerManagerFactory; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.BookieSocketAddress; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.LedgerMetadataListener; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -92,9 +95,9 @@ public void testWatchEnsembleChange() throws Exception { LedgerHandle readLh = bkc.openLedgerNoRecovery(lh.getId(), digestType, "".getBytes()); long lastLAC = readLh.getLastAddConfirmed(); assertEquals(numEntries - 2, lastLAC); - List ensemble = + List ensemble = lh.getCurrentEnsemble(); - for (BookieSocketAddress addr : ensemble) { + for (BookieId addr : ensemble) { killBookie(addr); } // write another batch of entries, which will trigger ensemble change @@ -130,22 +133,27 @@ private void testWatchMetadataRemoval(LedgerManagerFactory factory) throws Excep final CountDownLatch createLatch = new CountDownLatch(1); final CountDownLatch removeLatch = new CountDownLatch(1); + List ensemble = Lists.newArrayList(new BookieSocketAddress("192.0.2.1", 1234).toBookieId(), + new BookieSocketAddress("192.0.2.2", 1234).toBookieId(), + new BookieSocketAddress("192.0.2.3", 1234).toBookieId(), + new BookieSocketAddress("192.0.2.4", 1234).toBookieId()); idGenerator.generateLedgerId(new GenericCallback() { - @Override - public void operationComplete(int rc, final Long lid) { - manager.createLedgerMetadata(lid, new LedgerMetadata(4, 2, 2, digestType, "fpj was here".getBytes()), - new BookkeeperInternalCallbacks.GenericCallback(){ - - @Override - public void operationComplete(int rc, LedgerMetadata result) { - bbLedgerId.putLong(lid); - bbLedgerId.flip(); - createLatch.countDown(); - } - }); - - } - }); + @Override + public void operationComplete(int rc, final Long lid) { + LedgerMetadata metadata = LedgerMetadataBuilder.create() + .withId(lid) + .withDigestType(digestType.toApiDigestType()).withPassword(new byte[0]) + .withEnsembleSize(4).withWriteQuorumSize(2) + .withAckQuorumSize(2) + .newEnsembleEntry(0L, ensemble).build(); + manager.createLedgerMetadata(lid, metadata) + .whenComplete((result, exception) -> { + bbLedgerId.putLong(lid); + bbLedgerId.flip(); + createLatch.countDown(); + }); + } + }); assertTrue(createLatch.await(2000, TimeUnit.MILLISECONDS)); final long createdLid = bbLedgerId.getLong(); @@ -154,21 +162,14 @@ public void operationComplete(int rc, LedgerMetadata result) { new LedgerMetadataListener() { @Override - public void onChanged(long ledgerId, LedgerMetadata metadata) { + public void onChanged(long ledgerId, Versioned metadata) { assertEquals(ledgerId, createdLid); assertEquals(metadata, null); removeLatch.countDown(); } }); - manager.removeLedgerMetadata(createdLid, Version.ANY, - new BookkeeperInternalCallbacks.GenericCallback() { - - @Override - public void operationComplete(int rc, Void result) { - assertEquals(rc, BKException.Code.OK); - } - }); - assertTrue(removeLatch.await(2000, TimeUnit.MILLISECONDS)); + manager.removeLedgerMetadata(createdLid, Version.ANY).get(2, TimeUnit.SECONDS); + assertTrue(removeLatch.await(2, TimeUnit.SECONDS)); } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestWeightedRandomSelection.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestWeightedRandomSelection.java index fe8f74bfb1f..1fe2d656e11 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestWeightedRandomSelection.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestWeightedRandomSelection.java @@ -21,45 +21,70 @@ import static org.junit.Assert.assertTrue; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; - +import java.util.Set; import org.apache.bookkeeper.client.WeightedRandomSelection.WeightedObject; import org.apache.commons.configuration.CompositeConfiguration; import org.apache.commons.configuration.Configuration; import org.junit.After; +import org.junit.Assume; import org.junit.Before; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Test weighted random selection methods. */ +@RunWith(Parameterized.class) public class TestWeightedRandomSelection { static final Logger LOG = LoggerFactory.getLogger(TestWeightedRandomSelection.class); static class TestObj implements WeightedObject { long val; + TestObj(long value) { this.val = value; } + @Override public long getWeight() { return val; } } + Class weightedRandomSelectionClass; WeightedRandomSelection wRS; Configuration conf = new CompositeConfiguration(); int multiplier = 3; + @Parameters + public static Collection weightedRandomSelectionClass() { + return Arrays.asList( + new Object[][] { { WeightedRandomSelectionImpl.class }, { DynamicWeightedRandomSelectionImpl.class } }); + } + + public TestWeightedRandomSelection(Class weightedRandomSelectionClass) { + this.weightedRandomSelectionClass = weightedRandomSelectionClass; + } + @Before public void setUp() throws Exception { - wRS = new WeightedRandomSelection(); + if (weightedRandomSelectionClass.equals(WeightedRandomSelectionImpl.class)) { + wRS = new WeightedRandomSelectionImpl(); + } else { + wRS = new DynamicWeightedRandomSelectionImpl(); + } } @After @@ -90,7 +115,7 @@ public void testSelectionWithEqualWeights() throws Exception { double actualPct = ((double) e.getValue() / (double) totalTries) * 100; double delta = (Math.abs(expectedPct - actualPct) / expectedPct) * 100; System.out.println("Key:" + e.getKey() + " Value:" + e.getValue() + " Expected: " + expectedPct - + " Actual: " + actualPct); + + " Actual: " + actualPct + " delta: " + delta); // should be within 5% of expected assertTrue("Not doing uniform selection when weights are equal", delta < 5); } @@ -145,7 +170,8 @@ void verifyResult(Map map, Map randomSe double expected; if (map.get(e.getKey()).getWeight() == 0) { - // if the value is 0 for any key, we make it equal to the first non zero value + // if the value is 0 for any key, we make it equal to the first + // non zero value expected = (double) minWeight / (double) totalWeight; } else { expected = (double) map.get(e.getKey()).getWeight() / (double) totalWeight; @@ -153,14 +179,16 @@ void verifyResult(Map map, Map randomSe if (multiplier > 0 && expected > multiplier * medianExpectedWeight) { expected = multiplier * medianExpectedWeight; } - // We can't compare these weights because they are derived from different - // values. But if we express them as a multiple of the min in each, then + // We can't compare these weights because they are derived from + // different + // values. But if we express them as a multiple of the min in each, + // then // they should be comparable double expectedMultiple = expected / medianExpectedWeight; double observedMultiple = observed / medianObservedWeight; double delta = (Math.abs(expectedMultiple - observedMultiple) / expectedMultiple) * 100; - System.out.println("Key:" + e.getKey() + " Value:" + e.getValue() - + " Expected " + expectedMultiple + " actual " + observedMultiple + " delta " + delta + "%"); + System.out.println("Key:" + e.getKey() + " Value:" + e.getValue() + " Expected " + expectedMultiple + + " actual " + observedMultiple + " delta " + delta + "%"); // the observed should be within 5% of expected assertTrue("Not doing uniform selection when weights are equal", delta < 5); @@ -178,7 +206,7 @@ public void testSelectionWithSomeZeroWeights() throws Exception { for (Integer i = 0; i < numKeys; i++) { if (i < numKeys / 3) { val = 0L; - } else if (i < 2 * (numKeys / 3)){ + } else if (i < 2 * (numKeys / 3)) { val = minWeight; } else { val = 2 * minWeight; @@ -189,7 +217,7 @@ public void testSelectionWithSomeZeroWeights() throws Exception { } wRS.updateMap(map); - int totalTries = 10000000; + int totalTries = 1000000; for (int i = 0; i < totalTries; i++) { String key = wRS.getNextRandom(); randomSelection.put(key, randomSelection.get(key) + 1); @@ -208,7 +236,7 @@ public void testSelectionWithUnequalWeights() throws Exception { for (Integer i = 0; i < numKeys; i++) { if (i < numKeys / 3) { val = minWeight; - } else if (i < 2 * (numKeys / 3)){ + } else if (i < 2 * (numKeys / 3)) { val = 2 * minWeight; } else { val = 10 * minWeight; @@ -219,7 +247,7 @@ public void testSelectionWithUnequalWeights() throws Exception { } wRS.updateMap(map); - int totalTries = 10000000; + int totalTries = 1000000; for (int i = 0; i < totalTries; i++) { String key = wRS.getNextRandom(); randomSelection.put(key, randomSelection.get(key) + 1); @@ -247,7 +275,7 @@ public void testSelectionWithHotNode() throws Exception { } wRS.updateMap(map); - int totalTries = 10000000; + int totalTries = 1000000; for (int i = 0; i < totalTries; i++) { String key = wRS.getNextRandom(); randomSelection.put(key, randomSelection.get(key) + 1); @@ -275,11 +303,39 @@ public void testSelectionWithHotNodeWithLimit() throws Exception { } wRS.updateMap(map); - int totalTries = 10000000; + int totalTries = 1000000; for (int i = 0; i < totalTries; i++) { String key = wRS.getNextRandom(); randomSelection.put(key, randomSelection.get(key) + 1); } verifyResult(map, randomSelection, multiplier, minWeight, medianWeight, total, totalTries); } + + @Test + public void testSelectionFromSelectedNodesWithEqualWeights() throws Exception { + /* + * this testcase is for only DynamicWeightedRandomSelectionImpl + */ + Assume.assumeTrue(weightedRandomSelectionClass.equals(DynamicWeightedRandomSelectionImpl.class)); + Map map = new HashMap(); + + Long val = 100L; + int numKeys = 50, totalTries = 1000; + Map randomSelection = new HashMap(); + for (Integer i = 0; i < numKeys; i++) { + map.put(i.toString(), new TestObj(val)); + randomSelection.put(i.toString(), 0); + } + + Set selectFrom = new HashSet(); + for (int i = 0; i < numKeys / 2; i++) { + selectFrom.add(Integer.toString(i)); + } + + wRS.updateMap(map); + for (int i = 0; i < totalTries; i++) { + String selectedKey = wRS.getNextRandom(selectFrom); + assertTrue("NextRandom key should be from selected list", selectFrom.contains(selectedKey)); + } + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestZoneawareEnsemblePlacementPolicy.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestZoneawareEnsemblePlacementPolicy.java new file mode 100644 index 00000000000..e98a19e4636 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/TestZoneawareEnsemblePlacementPolicy.java @@ -0,0 +1,1368 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.client; + +import static org.apache.bookkeeper.client.RackawareEnsemblePlacementPolicyImpl.REPP_DNS_RESOLVER_CLASS; +import static org.apache.bookkeeper.client.RoundRobinDistributionSchedule.writeSetFromValues; +import static org.apache.bookkeeper.feature.SettableFeatureProvider.DISABLE_ALL; +import static org.junit.Assert.assertNotEquals; + +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import io.netty.util.HashedWheelTimer; +import java.net.InetAddress; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import junit.framework.TestCase; +import org.apache.bookkeeper.client.BookieInfoReader.BookieInfo; +import org.apache.bookkeeper.client.EnsemblePlacementPolicy.PlacementPolicyAdherence; +import org.apache.bookkeeper.client.EnsemblePlacementPolicy.PlacementResult; +import org.apache.bookkeeper.client.ZoneawareEnsemblePlacementPolicyImpl.ZoneAwareNodeLocation; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.DNSToSwitchMapping; +import org.apache.bookkeeper.net.NetworkTopology; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.util.StaticDNSResolver; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Test the zoneaware ensemble placement policy. + */ +public class TestZoneawareEnsemblePlacementPolicy extends TestCase { + + static final Logger LOG = LoggerFactory.getLogger(TestZoneawareEnsemblePlacementPolicy.class); + + ZoneawareEnsemblePlacementPolicy zepp; + final List ensemble = new ArrayList(); + DistributionSchedule.WriteSet writeSet = DistributionSchedule.NULL_WRITE_SET; + ClientConfiguration conf = new ClientConfiguration(); + BookieSocketAddress addr1; + BookieSocketAddress addr2, addr3, addr4; + io.netty.util.HashedWheelTimer timer; + + @Override + protected void setUp() throws Exception { + super.setUp(); + StaticDNSResolver.reset(); + StaticDNSResolver.addNodeToRack(InetAddress.getLocalHost().getHostAddress(), + NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + StaticDNSResolver.addNodeToRack("127.0.0.1", NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + StaticDNSResolver.addNodeToRack("localhost", NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + LOG.info("Set up static DNS Resolver."); + conf.setProperty(REPP_DNS_RESOLVER_CLASS, StaticDNSResolver.class.getName()); + addr1 = new BookieSocketAddress("127.0.0.2", 3181); + addr2 = new BookieSocketAddress("127.0.0.3", 3181); + addr3 = new BookieSocketAddress("127.0.0.4", 3181); + addr4 = new BookieSocketAddress("127.0.0.5", 3181); + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), NetworkTopology.DEFAULT_ZONE + "/ud1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), NetworkTopology.DEFAULT_ZONE + "/ud2"); + ensemble.add(addr1.toBookieId()); + ensemble.add(addr2.toBookieId()); + ensemble.add(addr3.toBookieId()); + ensemble.add(addr4.toBookieId()); + writeSet = writeSetFromValues(0, 1, 2, 3); + + timer = new HashedWheelTimer(new ThreadFactoryBuilder().setNameFormat("TestTimer-%d").build(), + conf.getTimeoutTimerTickDurationMs(), TimeUnit.MILLISECONDS, conf.getTimeoutTimerNumTicks()); + + zepp = new ZoneawareEnsemblePlacementPolicy(); + zepp.initialize(conf, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + zepp.withDefaultFaultDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + } + + @Override + protected void tearDown() throws Exception { + zepp.uninitalize(); + super.tearDown(); + } + + static BookiesHealthInfo getBookiesHealthInfo() { + return getBookiesHealthInfo(new HashMap<>(), new HashMap<>()); + } + + static BookiesHealthInfo getBookiesHealthInfo(Map bookieFailureHistory, + Map bookiePendingRequests) { + return new BookiesHealthInfo() { + @Override + public long getBookieFailureHistory(BookieId bookieSocketAddress) { + return bookieFailureHistory.getOrDefault(bookieSocketAddress, -1L); + } + + @Override + public long getBookiePendingRequests(BookieId bookieSocketAddress) { + return bookiePendingRequests.getOrDefault(bookieSocketAddress, 0L); + } + }; + } + + static void updateMyUpgradeDomain(String zoneAndUD) throws Exception { + StaticDNSResolver.addNodeToRack(InetAddress.getLocalHost().getHostAddress(), zoneAndUD); + StaticDNSResolver.addNodeToRack(InetAddress.getLocalHost().getHostName(), zoneAndUD); + StaticDNSResolver.addNodeToRack("127.0.0.1", zoneAndUD); + StaticDNSResolver.addNodeToRack("localhost", zoneAndUD); + } + + @Test + public void testNotEnoughRWBookies() throws Exception { + zepp.uninitalize(); + updateMyUpgradeDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + // Update cluster + BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.6", 3181); + BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.7", 3181); + + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/zone2/ud1"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/zone3/ud1"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/zone4/ud1"); + StaticDNSResolver.addNodeToRack(addr5.getHostName(), "/zone5/ud1"); + StaticDNSResolver.addNodeToRack(addr6.getHostName(), "/zone6/ud1"); + + ClientConfiguration newConf = (ClientConfiguration) this.conf.clone(); + newConf.setDesiredNumZonesPerWriteQuorum(1); + newConf.setMinNumZonesPerWriteQuorum(1); + zepp = new ZoneawareEnsemblePlacementPolicy(); + zepp.initialize(newConf, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + zepp.withDefaultFaultDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + Set rwAddrs = new HashSet(); + Set roAddrs = new HashSet(); + rwAddrs.add(addr1.toBookieId()); + rwAddrs.add(addr2.toBookieId()); + rwAddrs.add(addr3.toBookieId()); + + zepp.onClusterChanged(rwAddrs, roAddrs); + try { + // only 3 rw bookies are available + zepp.newEnsemble(6, 3, 2, null, new HashSet<>()); + fail("newEnsemble is expected to fail because enough writable nodes are not available"); + } catch (BKException.BKNotEnoughBookiesException bke) { + // expected to get BKNotEnoughBookiesException + } + + roAddrs.add(addr4.toBookieId()); + roAddrs.add(addr5.toBookieId()); + roAddrs.add(addr6.toBookieId()); + zepp.onClusterChanged(rwAddrs, roAddrs); + try { + // only 3 rw bookies are available + zepp.newEnsemble(6, 3, 2, null, new HashSet<>()); + fail("newEnsemble is expected to fail because enough writable nodes are not available"); + } catch (BKException.BKNotEnoughBookiesException bke) { + // expected to get BKNotEnoughBookiesException + } + + rwAddrs.clear(); + roAddrs.add(addr1.toBookieId()); + roAddrs.add(addr2.toBookieId()); + roAddrs.add(addr3.toBookieId()); + zepp.onClusterChanged(rwAddrs, roAddrs); + try { + // no rw bookie is available + zepp.newEnsemble(6, 3, 2, null, new HashSet<>()); + fail("newEnsemble is expected to fail because enough writable nodes are not available"); + } catch (BKException.BKNotEnoughBookiesException bke) { + // expected to get BKNotEnoughBookiesException + } + } + + @Test + public void testEnoughRWBookies() throws Exception { + zepp.uninitalize(); + updateMyUpgradeDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + // Update cluster + BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.6", 3181); + BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.7", 3181); + + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/zone2/ud1"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/zone3/ud1"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/zone4/ud1"); + StaticDNSResolver.addNodeToRack(addr5.getHostName(), "/zone5/ud1"); + StaticDNSResolver.addNodeToRack(addr6.getHostName(), "/zone6/ud1"); + + ClientConfiguration newConf = (ClientConfiguration) this.conf.clone(); + newConf.setDesiredNumZonesPerWriteQuorum(4); + newConf.setMinNumZonesPerWriteQuorum(2); + zepp = new ZoneawareEnsemblePlacementPolicy(); + zepp.initialize(newConf, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + zepp.withDefaultFaultDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + Set rwAddrs = new HashSet(); + Set roAddrs = new HashSet(); + rwAddrs.add(addr1.toBookieId()); + rwAddrs.add(addr2.toBookieId()); + rwAddrs.add(addr3.toBookieId()); + rwAddrs.add(addr4.toBookieId()); + rwAddrs.add(addr5.toBookieId()); + rwAddrs.add(addr6.toBookieId()); + + zepp.onClusterChanged(rwAddrs, roAddrs); + /* + * there are enough bookies so newEnsemble should succeed. + */ + PlacementResult> newEnsemblePlacementResult = zepp.newEnsemble(6, 3, 2, null, + new HashSet<>()); + Set newEnsembleSet = new HashSet( + newEnsemblePlacementResult.getResult()); + assertTrue("New ensemble should contain all 6 rw bookies", newEnsembleSet.containsAll(rwAddrs)); + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.MEETS_STRICT, + newEnsemblePlacementResult.getAdheringToPolicy()); + + /* + * there are enough bookies so newEnsemble should succeed. + */ + newEnsemblePlacementResult = zepp.newEnsemble(3, 3, 2, null, new HashSet<>()); + newEnsembleSet = new HashSet(newEnsemblePlacementResult.getResult()); + assertTrue("New ensemble should contain 3 rw bookies", + (newEnsembleSet.size() == 3) && (rwAddrs.containsAll(newEnsembleSet))); + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.MEETS_STRICT, + newEnsemblePlacementResult.getAdheringToPolicy()); + } + + @Test + public void testWithDefaultBookies() throws Exception { + zepp.uninitalize(); + updateMyUpgradeDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/zone2/ud1"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/zone3/ud1"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/zone4/ud1"); + + // Update cluster + BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.6", 3181); + BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.7", 3181); + BookieSocketAddress addr7 = new BookieSocketAddress("127.0.0.8", 3181); + Set bookiesInDefaultFaultDomain = new HashSet(); + bookiesInDefaultFaultDomain.add(addr5.toBookieId()); + bookiesInDefaultFaultDomain.add(addr6.toBookieId()); + bookiesInDefaultFaultDomain.add(addr7.toBookieId()); + + ClientConfiguration newConf = (ClientConfiguration) this.conf.clone(); + newConf.setDesiredNumZonesPerWriteQuorum(4); + zepp = new ZoneawareEnsemblePlacementPolicy(); + zepp.initialize(newConf, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + zepp.withDefaultFaultDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + Set rwAddrs = new HashSet(); + Set roAddrs = new HashSet(); + rwAddrs.add(addr1.toBookieId()); + rwAddrs.add(addr2.toBookieId()); + rwAddrs.add(addr3.toBookieId()); + rwAddrs.add(addr4.toBookieId()); + rwAddrs.add(addr5.toBookieId()); + rwAddrs.add(addr6.toBookieId()); + rwAddrs.add(addr7.toBookieId()); + + zepp.onClusterChanged(rwAddrs, roAddrs); + for (int i = 0; i < 3; i++) { + /* + * make sure bookies from DEFAULT_ZONE_AND_UPGRADEDOMAIN are not + * part of the new ensemble created. + */ + PlacementResult> newEnsemblePlacementResult = zepp.newEnsemble(4, 4, 2, null, + new HashSet<>()); + Set newEnsembleSet = new HashSet( + newEnsemblePlacementResult.getResult()); + assertTrue("Bookie from default faultDomain shouldn't be part of ensemble", + Collections.disjoint(newEnsembleSet, bookiesInDefaultFaultDomain)); + + newEnsemblePlacementResult = zepp.newEnsemble(3, 3, 2, null, new HashSet<>()); + newEnsembleSet = new HashSet(newEnsemblePlacementResult.getResult()); + assertTrue("Bookie from default faultDomain shouldn't be part of ensemble", + Collections.disjoint(newEnsembleSet, bookiesInDefaultFaultDomain)); + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.MEETS_STRICT, + newEnsemblePlacementResult.getAdheringToPolicy()); + } + } + + @Test + public void testMinZonesPerWriteQuorum() throws Exception { + zepp.uninitalize(); + updateMyUpgradeDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + // Update cluster + BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.6", 3181); + BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.7", 3181); + BookieSocketAddress addr7 = new BookieSocketAddress("127.0.0.8", 3181); + BookieSocketAddress addr8 = new BookieSocketAddress("127.0.0.9", 3181); + BookieSocketAddress addr9 = new BookieSocketAddress("127.0.0.10", 3181); + BookieSocketAddress addr10 = new BookieSocketAddress("127.0.0.11", 3181); + + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/zone2/ud1"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/zone3/ud1"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/zone1/ud2"); + StaticDNSResolver.addNodeToRack(addr5.getHostName(), "/zone2/ud2"); + StaticDNSResolver.addNodeToRack(addr6.getHostName(), "/zone3/ud2"); + StaticDNSResolver.addNodeToRack(addr7.getHostName(), "/zone1/ud3"); + StaticDNSResolver.addNodeToRack(addr8.getHostName(), "/zone2/ud3"); + StaticDNSResolver.addNodeToRack(addr9.getHostName(), NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + StaticDNSResolver.addNodeToRack(addr10.getHostName(), NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + ClientConfiguration newConf = (ClientConfiguration) this.conf.clone(); + newConf.setDesiredNumZonesPerWriteQuorum(4); + newConf.setMinNumZonesPerWriteQuorum(3); + zepp = new ZoneawareEnsemblePlacementPolicy(); + zepp.initialize(newConf, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + zepp.withDefaultFaultDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + Set rwAddrs = new HashSet(); + Set roAddrs = new HashSet(); + Set bookiesInDefaultFaultDomain = new HashSet(); + rwAddrs.add(addr1.toBookieId()); + rwAddrs.add(addr2.toBookieId()); + rwAddrs.add(addr3.toBookieId()); + rwAddrs.add(addr4.toBookieId()); + rwAddrs.add(addr5.toBookieId()); + rwAddrs.add(addr6.toBookieId()); + rwAddrs.add(addr9.toBookieId()); + rwAddrs.add(addr10.toBookieId()); + roAddrs.add(addr7.toBookieId()); + roAddrs.add(addr8.toBookieId()); + bookiesInDefaultFaultDomain.add(addr9.toBookieId()); + bookiesInDefaultFaultDomain.add(addr10.toBookieId()); + + zepp.onClusterChanged(rwAddrs, roAddrs); + PlacementResult> newEnsemblePlacementResult; + + newEnsemblePlacementResult = zepp.newEnsemble(4, 4, 2, null, new HashSet<>()); + Set newEnsembleSet = new HashSet( + newEnsemblePlacementResult.getResult()); + assertTrue("New ensemble should contain all 6 rw bookies in non-default fault domains", + rwAddrs.containsAll(newEnsembleSet) && (newEnsembleSet.size() == 4)); + assertTrue("Bookie from default faultDomain shouldn't be part of ensemble", + Collections.disjoint(newEnsembleSet, bookiesInDefaultFaultDomain)); + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.MEETS_SOFT, + newEnsemblePlacementResult.getAdheringToPolicy()); + + try { + /* + * If ensembleSize is not multiple of writeQuorumSize, then it is + * expected to fail with IllegalArgumentException. + */ + zepp.newEnsemble(4, 3, 2, null, new HashSet<>()); + fail("newEnsemble is expected to fail with IllegalArgumentException"); + } catch (IllegalArgumentException illExc) { + // expected IllegalArgumentException + } + zepp.uninitalize(); + newConf = (ClientConfiguration) this.conf.clone(); + newConf.setDesiredNumZonesPerWriteQuorum(4); + newConf.setMinNumZonesPerWriteQuorum(3); + newConf.setEnforceStrictZoneawarePlacement(false); + zepp = new ZoneawareEnsemblePlacementPolicy(); + zepp.initialize(newConf, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + zepp.withDefaultFaultDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + zepp.onClusterChanged(rwAddrs, roAddrs); + + /* + * If enforceStrictZoneawarePlacement is not enabled, then there are no + * limitations on eligible values of ensembleSize and writeQuorumSize. + */ + newEnsemblePlacementResult = zepp.newEnsemble(4, 3, 2, null, new HashSet<>()); + newEnsembleSet = new HashSet(newEnsemblePlacementResult.getResult()); + assertTrue("New ensemble should contain 4 different bookies", newEnsembleSet.size() == 4); + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.FAIL, + newEnsemblePlacementResult.getAdheringToPolicy()); + } + + @Test + public void testMinUDsNotAvailable() throws Exception { + zepp.uninitalize(); + updateMyUpgradeDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + // Update cluster + BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.6", 3181); + BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.7", 3181); + BookieSocketAddress addr7 = new BookieSocketAddress("127.0.0.8", 3181); + BookieSocketAddress addr8 = new BookieSocketAddress("127.0.0.9", 3181); + BookieSocketAddress addr9 = new BookieSocketAddress("127.0.0.10", 3181); + BookieSocketAddress addr10 = new BookieSocketAddress("127.0.0.11", 3181); + + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/zone2/ud1"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/zone3/ud1"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr5.getHostName(), "/zone2/ud1"); + StaticDNSResolver.addNodeToRack(addr6.getHostName(), "/zone3/ud1"); + StaticDNSResolver.addNodeToRack(addr7.getHostName(), "/zone1/ud3"); + StaticDNSResolver.addNodeToRack(addr8.getHostName(), "/zone2/ud3"); + StaticDNSResolver.addNodeToRack(addr9.getHostName(), NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + StaticDNSResolver.addNodeToRack(addr10.getHostName(), NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + ClientConfiguration newConf = (ClientConfiguration) this.conf.clone(); + newConf.setDesiredNumZonesPerWriteQuorum(4); + newConf.setMinNumZonesPerWriteQuorum(2); + zepp = new ZoneawareEnsemblePlacementPolicy(); + zepp.initialize(newConf, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + zepp.withDefaultFaultDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + Set rwAddrs = new HashSet(); + Set roAddrs = new HashSet(); + Set bookiesInDefaultFaultDomain = new HashSet(); + rwAddrs.add(addr1.toBookieId()); + rwAddrs.add(addr2.toBookieId()); + rwAddrs.add(addr3.toBookieId()); + rwAddrs.add(addr4.toBookieId()); + rwAddrs.add(addr5.toBookieId()); + rwAddrs.add(addr6.toBookieId()); + rwAddrs.add(addr9.toBookieId()); + rwAddrs.add(addr10.toBookieId()); + + roAddrs.add(addr7.toBookieId()); + roAddrs.add(addr8.toBookieId()); + + bookiesInDefaultFaultDomain.add(addr9.toBookieId()); + bookiesInDefaultFaultDomain.add(addr10.toBookieId()); + + zepp.onClusterChanged(rwAddrs, roAddrs); + PlacementResult> newEnsemblePlacementResult; + try { + /* + * since rw bookies are not spread across UDs in zones, newEnsemble + * of writeQuorum 6 is expected to fail. + */ + zepp.newEnsemble(6, 6, 2, null, new HashSet<>()); + fail("newEnsemble is expected to fail because writeQuorum cannot be created with insufficient UDs"); + } catch (BKException.BKNotEnoughBookiesException bkne) { + // expected NotEnoughBookiesException + } + + int ensSize = 6; + int writeQuorum = 3; + /* + * though bookies are not spread across UDs in zones, newEnsemble would + * succeed because writeQuorum is just 3. + */ + newEnsemblePlacementResult = zepp.newEnsemble(ensSize, writeQuorum, 2, null, new HashSet<>()); + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.MEETS_STRICT, + newEnsemblePlacementResult.getAdheringToPolicy()); + List newEnsemble = newEnsemblePlacementResult.getResult(); + Set newEnsembleSet = new HashSet(newEnsemble); + assertTrue("New ensemble should contain all 6 rw bookies in non-default fault domains", + rwAddrs.containsAll(newEnsembleSet) && (newEnsembleSet.size() == 6)); + assertTrue("Bookie from default faultDomain shouldn't be part of ensemble", + Collections.disjoint(newEnsembleSet, bookiesInDefaultFaultDomain)); + + Set zonesOfBookiesInAWriteQuorum = new HashSet(); + for (int i = 0; i < 6; i++) { + zonesOfBookiesInAWriteQuorum.clear(); + for (int j = 0; j < writeQuorum; j++) { + zonesOfBookiesInAWriteQuorum + .add(zepp.getZoneAwareNodeLocation(newEnsemble.get((i + j) % ensSize)).getZone()); + } + assertEquals("Since bookies are not spread across multiple UDs in a zone, write quorum should" + + " contain bookies from all 3 zones", 3, zonesOfBookiesInAWriteQuorum.size()); + } + } + + @Test + public void testUniqueUds() throws Exception { + zepp.uninitalize(); + updateMyUpgradeDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + // Update cluster + BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.6", 3181); + BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.7", 3181); + BookieSocketAddress addr7 = new BookieSocketAddress("127.0.0.8", 3181); + BookieSocketAddress addr8 = new BookieSocketAddress("127.0.0.9", 3181); + BookieSocketAddress addr9 = new BookieSocketAddress("127.0.0.10", 3181); + BookieSocketAddress addr10 = new BookieSocketAddress("127.0.0.11", 3181); + BookieSocketAddress addr11 = new BookieSocketAddress("127.0.0.12", 3181); + BookieSocketAddress addr12 = new BookieSocketAddress("127.0.0.13", 3181); + + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/zone1/ud2"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/zone1/ud2"); + StaticDNSResolver.addNodeToRack(addr5.getHostName(), "/zone1/ud3"); + StaticDNSResolver.addNodeToRack(addr6.getHostName(), "/zone1/ud3"); + StaticDNSResolver.addNodeToRack(addr7.getHostName(), "/zone2/ud1"); + StaticDNSResolver.addNodeToRack(addr8.getHostName(), "/zone2/ud1"); + StaticDNSResolver.addNodeToRack(addr9.getHostName(), "/zone2/ud2"); + StaticDNSResolver.addNodeToRack(addr10.getHostName(), "/zone2/ud2"); + StaticDNSResolver.addNodeToRack(addr11.getHostName(), "/zone2/ud3"); + StaticDNSResolver.addNodeToRack(addr12.getHostName(), "/zone2/ud3"); + + ClientConfiguration newConf = (ClientConfiguration) this.conf.clone(); + newConf.setDesiredNumZonesPerWriteQuorum(4); + newConf.setMinNumZonesPerWriteQuorum(2); + zepp = new ZoneawareEnsemblePlacementPolicy(); + zepp.initialize(newConf, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + zepp.withDefaultFaultDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + Set rwAddrs = new HashSet(); + Set roAddrs = new HashSet(); + rwAddrs.add(addr1.toBookieId()); + rwAddrs.add(addr2.toBookieId()); + rwAddrs.add(addr3.toBookieId()); + rwAddrs.add(addr4.toBookieId()); + rwAddrs.add(addr5.toBookieId()); + rwAddrs.add(addr6.toBookieId()); + rwAddrs.add(addr7.toBookieId()); + rwAddrs.add(addr8.toBookieId()); + rwAddrs.add(addr9.toBookieId()); + rwAddrs.add(addr10.toBookieId()); + rwAddrs.add(addr11.toBookieId()); + rwAddrs.add(addr12.toBookieId()); + + zepp.onClusterChanged(rwAddrs, roAddrs); + /* + * Since there are enough bookies in different UDs in 2 zones + * (MinNumZonesPerWriteQuorum), new ensemble should succeed. + */ + PlacementResult> newEnsemblePlacementResult = zepp.newEnsemble(6, 6, 2, null, + new HashSet<>()); + List newEnsembleList = newEnsemblePlacementResult.getResult(); + Set newEnsembleSet = new HashSet(newEnsembleList); + assertTrue("New ensemble should contain 6 rw bookies in non-default fault domains", + rwAddrs.containsAll(newEnsembleSet) && (newEnsembleSet.size() == 6)); + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.MEETS_SOFT, + newEnsemblePlacementResult.getAdheringToPolicy()); + Set bookiesNetworkLocations = new HashSet(); + + for (BookieId bookieAddr : newEnsembleSet) { + bookiesNetworkLocations.add(zepp.resolveNetworkLocation(bookieAddr)); + } + /* + * Since there are enough bookies in different UDs, bookies from same + * zone should be from different UDs. + */ + assertTrue("Bookies should be from different UpgradeDomains if they belong to same zone", + (bookiesNetworkLocations.size() == 6)); + List bookiesNodeLocationList = new ArrayList(); + for (BookieId bookieAddr : newEnsembleList) { + bookiesNodeLocationList.add(zepp.getZoneAwareNodeLocation(bookieAddr)); + } + for (int i = 0; i < 5; i++) { + /* + * in newEnsemble order, bookies should be from alternating zones. + */ + assertNotEquals("Alternate bookies should be from different zones", + bookiesNodeLocationList.get(i).getZone(), bookiesNodeLocationList.get(i + 1).getZone()); + } + } + + @Test + public void testNewBookieUniformDistributionWithMinZoneAndMinUDs() throws Exception { + zepp.uninitalize(); + updateMyUpgradeDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + // Update cluster + BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.6", 3181); + BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.7", 3181); + BookieSocketAddress addr7 = new BookieSocketAddress("127.0.0.8", 3181); + BookieSocketAddress addr8 = new BookieSocketAddress("127.0.0.9", 3181); + BookieSocketAddress addr9 = new BookieSocketAddress("127.0.0.10", 3181); + BookieSocketAddress addr10 = new BookieSocketAddress("127.0.0.11", 3181); + BookieSocketAddress addr11 = new BookieSocketAddress("127.0.0.12", 3181); + BookieSocketAddress addr12 = new BookieSocketAddress("127.0.0.13", 3181); + BookieSocketAddress addr13 = new BookieSocketAddress("127.0.0.14", 3181); + BookieSocketAddress addr14 = new BookieSocketAddress("127.0.0.15", 3181); + + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/zone1/ud2"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/zone1/ud2"); + StaticDNSResolver.addNodeToRack(addr5.getHostName(), "/zone2/ud1"); + StaticDNSResolver.addNodeToRack(addr6.getHostName(), "/zone2/ud1"); + StaticDNSResolver.addNodeToRack(addr7.getHostName(), "/zone2/ud2"); + StaticDNSResolver.addNodeToRack(addr8.getHostName(), "/zone2/ud2"); + StaticDNSResolver.addNodeToRack(addr9.getHostName(), "/zone3/ud1"); + StaticDNSResolver.addNodeToRack(addr10.getHostName(), "/zone3/ud1"); + StaticDNSResolver.addNodeToRack(addr11.getHostName(), "/zone3/ud2"); + StaticDNSResolver.addNodeToRack(addr12.getHostName(), "/zone3/ud2"); + StaticDNSResolver.addNodeToRack(addr13.getHostName(), NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + StaticDNSResolver.addNodeToRack(addr14.getHostName(), NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + Set rwAddrs = new HashSet(); + Set roAddrs = new HashSet(); + rwAddrs.add(addr1.toBookieId()); + rwAddrs.add(addr2.toBookieId()); + rwAddrs.add(addr3.toBookieId()); + rwAddrs.add(addr4.toBookieId()); + rwAddrs.add(addr5.toBookieId()); + rwAddrs.add(addr6.toBookieId()); + rwAddrs.add(addr7.toBookieId()); + rwAddrs.add(addr8.toBookieId()); + rwAddrs.add(addr9.toBookieId()); + rwAddrs.add(addr10.toBookieId()); + rwAddrs.add(addr11.toBookieId()); + rwAddrs.add(addr12.toBookieId()); + rwAddrs.add(addr13.toBookieId()); + rwAddrs.add(addr14.toBookieId()); + + int minNumZonesPerWriteQuorum = 3; + ClientConfiguration newConf = (ClientConfiguration) this.conf.clone(); + newConf.setDesiredNumZonesPerWriteQuorum(5); + newConf.setMinNumZonesPerWriteQuorum(minNumZonesPerWriteQuorum); + zepp = new ZoneawareEnsemblePlacementPolicy(); + zepp.initialize(newConf, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + zepp.withDefaultFaultDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + zepp.onClusterChanged(rwAddrs, roAddrs); + Set excludedBookies = new HashSet(); + + PlacementResult> newEnsemblePlacementResult = zepp.newEnsemble(6, 6, 4, null, + excludedBookies); + List newEnsembleList = newEnsemblePlacementResult.getResult(); + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.MEETS_SOFT, + newEnsemblePlacementResult.getAdheringToPolicy()); + Set newEnsembleSet = new HashSet(newEnsembleList); + Set bookiesNetworkLocationsSet = new HashSet(); + List bookiesNodeLocationList = new ArrayList(); + for (BookieId bookieAddr : newEnsembleSet) { + bookiesNetworkLocationsSet.add(zepp.resolveNetworkLocation(bookieAddr)); + } + for (BookieId bookieAddr : newEnsembleList) { + bookiesNodeLocationList.add(zepp.getZoneAwareNodeLocation(bookieAddr)); + } + /* + * since there are enough bookies from minNumZonesPerWriteQuorum (3), + * bookies should be from 3 different zones and 2 different UDs. + */ + assertTrue("Bookies should be from different UpgradeDomains if they belong to same zone", + (bookiesNetworkLocationsSet.size() == 6)); + Set zonesOfFirstNodes = new HashSet(); + for (int i = 0; i < minNumZonesPerWriteQuorum; i++) { + zonesOfFirstNodes.add(bookiesNodeLocationList.get(i).getZone()); + } + assertEquals("Num of zones", minNumZonesPerWriteQuorum, zonesOfFirstNodes.size()); + for (int i = 0; i < minNumZonesPerWriteQuorum; i++) { + assertEquals("Zone", bookiesNodeLocationList.get(i).getZone(), + bookiesNodeLocationList.get(i + minNumZonesPerWriteQuorum).getZone()); + assertNotEquals("UpgradeDomain", bookiesNodeLocationList.get(i).getUpgradeDomain(), + bookiesNodeLocationList.get(i + minNumZonesPerWriteQuorum).getUpgradeDomain()); + } + } + + @Test + public void testReplaceBookie() throws Exception { + zepp.uninitalize(); + updateMyUpgradeDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + // Update cluster + BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.6", 3181); + BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.7", 3181); + BookieSocketAddress addr7 = new BookieSocketAddress("127.0.0.8", 3181); + BookieSocketAddress addr8 = new BookieSocketAddress("127.0.0.9", 3181); + BookieSocketAddress addr9 = new BookieSocketAddress("127.0.0.10", 3181); + BookieSocketAddress addr10 = new BookieSocketAddress("127.0.0.11", 3181); + BookieSocketAddress addr11 = new BookieSocketAddress("127.0.0.12", 3181); + BookieSocketAddress addr12 = new BookieSocketAddress("127.0.0.13", 3181); + BookieSocketAddress addr13 = new BookieSocketAddress("127.0.0.14", 3181); + BookieSocketAddress addr14 = new BookieSocketAddress("127.0.0.15", 3181); + + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/zone1/ud2"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/zone1/ud2"); + StaticDNSResolver.addNodeToRack(addr5.getHostName(), "/zone2/ud1"); + StaticDNSResolver.addNodeToRack(addr6.getHostName(), "/zone2/ud1"); + StaticDNSResolver.addNodeToRack(addr7.getHostName(), "/zone2/ud2"); + StaticDNSResolver.addNodeToRack(addr8.getHostName(), "/zone2/ud2"); + StaticDNSResolver.addNodeToRack(addr9.getHostName(), "/zone3/ud1"); + StaticDNSResolver.addNodeToRack(addr10.getHostName(), "/zone3/ud1"); + StaticDNSResolver.addNodeToRack(addr11.getHostName(), "/zone3/ud2"); + StaticDNSResolver.addNodeToRack(addr12.getHostName(), "/zone3/ud2"); + StaticDNSResolver.addNodeToRack(addr13.getHostName(), NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + StaticDNSResolver.addNodeToRack(addr14.getHostName(), NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + ClientConfiguration newConf = (ClientConfiguration) this.conf.clone(); + newConf.setDesiredNumZonesPerWriteQuorum(3); + newConf.setMinNumZonesPerWriteQuorum(3); + zepp = new ZoneawareEnsemblePlacementPolicy(); + zepp.initialize(newConf, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + zepp.withDefaultFaultDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + Set rwAddrs = new HashSet(); + Set roAddrs = new HashSet(); + rwAddrs.add(addr1.toBookieId()); + rwAddrs.add(addr2.toBookieId()); + rwAddrs.add(addr3.toBookieId()); + rwAddrs.add(addr4.toBookieId()); + rwAddrs.add(addr5.toBookieId()); + rwAddrs.add(addr6.toBookieId()); + rwAddrs.add(addr7.toBookieId()); + rwAddrs.add(addr8.toBookieId()); + rwAddrs.add(addr9.toBookieId()); + rwAddrs.add(addr10.toBookieId()); + rwAddrs.add(addr11.toBookieId()); + rwAddrs.add(addr12.toBookieId()); + rwAddrs.add(addr13.toBookieId()); + rwAddrs.add(addr14.toBookieId()); + + zepp.onClusterChanged(rwAddrs, roAddrs); + List ensemble = new ArrayList(); + Set excludedBookies = new HashSet(); + ensemble.add(addr1.toBookieId()); + ensemble.add(addr5.toBookieId()); + ensemble.add(addr9.toBookieId()); + ensemble.add(addr3.toBookieId()); + ensemble.add(addr7.toBookieId()); + ensemble.add(addr11.toBookieId()); + /* + * since addr5 (/zone2/ud1) is already part of ensemble of size 6, write + * quorum of size 6, to replace bookie addr7 (/zone2/ud2), new bookie + * should be from /zone2/ud2. + */ + PlacementResult replacePlacementResult = zepp.replaceBookie(6, 6, 2, null, ensemble, + addr7.toBookieId(), + excludedBookies); + BookieId replacedBookie = replacePlacementResult.getResult(); + assertEquals("replaced bookie", addr8.toBookieId(), replacedBookie); + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.MEETS_STRICT, + replacePlacementResult.getAdheringToPolicy()); + + excludedBookies.add(addr8.toBookieId()); + /* + * here addr8 is excluded, and writeQuorumSize is 3. So to replace + * bookie addr7, addr6 (belonging to same zone) is the candidate. + */ + replacePlacementResult = zepp.replaceBookie(6, 3, 2, null, ensemble, addr7.toBookieId(), + excludedBookies); + replacedBookie = replacePlacementResult.getResult(); + assertEquals("replaced bookie", addr6.toBookieId(), replacedBookie); + + excludedBookies.add(addr6.toBookieId()); + try { + /* + * here addr6 is also excluded, so replaceBookie should fail. + */ + replacedBookie = zepp.replaceBookie(6, 3, 2, null, ensemble, addr7.toBookieId(), excludedBookies) + .getResult(); + fail("Expected BKNotEnoughBookiesException for replaceBookie with added excludedBookies"); + } catch (BKException.BKNotEnoughBookiesException bkne) { + // expected NotEnoughBookiesException + } + } + + @Test + public void testReplaceBookieMinUDs() throws Exception { + zepp.uninitalize(); + updateMyUpgradeDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + // Update cluster + BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.6", 3181); + BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.7", 3181); + BookieSocketAddress addr7 = new BookieSocketAddress("127.0.0.8", 3181); + BookieSocketAddress addr8 = new BookieSocketAddress("127.0.0.9", 3181); + BookieSocketAddress addr9 = new BookieSocketAddress("127.0.0.10", 3181); + BookieSocketAddress addr10 = new BookieSocketAddress("127.0.0.11", 3181); + BookieSocketAddress addr11 = new BookieSocketAddress("127.0.0.12", 3181); + + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/zone2/ud1"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/zone3/ud1"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/zone3/ud2"); + StaticDNSResolver.addNodeToRack(addr5.getHostName(), "/zone3/ud2"); + StaticDNSResolver.addNodeToRack(addr6.getHostName(), "/zone3/ud2"); + StaticDNSResolver.addNodeToRack(addr7.getHostName(), "/zone3/ud2"); + StaticDNSResolver.addNodeToRack(addr8.getHostName(), "/zone3/ud2"); + StaticDNSResolver.addNodeToRack(addr9.getHostName(), "/zone3/ud2"); + StaticDNSResolver.addNodeToRack(addr10.getHostName(), NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + StaticDNSResolver.addNodeToRack(addr11.getHostName(), NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + ClientConfiguration newConf = (ClientConfiguration) this.conf.clone(); + newConf.setDesiredNumZonesPerWriteQuorum(4); + newConf.setMinNumZonesPerWriteQuorum(3); + zepp = new ZoneawareEnsemblePlacementPolicy(); + zepp.initialize(newConf, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + zepp.withDefaultFaultDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + Set rwAddrs = new HashSet(); + Set roAddrs = new HashSet(); + rwAddrs.add(addr1.toBookieId()); + rwAddrs.add(addr2.toBookieId()); + rwAddrs.add(addr3.toBookieId()); + rwAddrs.add(addr4.toBookieId()); + rwAddrs.add(addr5.toBookieId()); + rwAddrs.add(addr6.toBookieId()); + rwAddrs.add(addr7.toBookieId()); + rwAddrs.add(addr8.toBookieId()); + rwAddrs.add(addr9.toBookieId()); + rwAddrs.add(addr10.toBookieId()); + rwAddrs.add(addr11.toBookieId()); + + zepp.onClusterChanged(rwAddrs, roAddrs); + List ensemble = new ArrayList(); + Set excludedBookies = new HashSet(); + ensemble.add(addr1.toBookieId()); + ensemble.add(addr2.toBookieId()); + ensemble.add(addr3.toBookieId()); + ensemble.add(addr4.toBookieId()); + ensemble.add(addr5.toBookieId()); + ensemble.add(addr6.toBookieId()); + /* + * though all the remaining non-default bookies are in /zone3/ud2, for + * replacing addr4 replaceBookie should be able to find some other + * bookie in /zone3/ud2. + */ + PlacementResult replaceResponse = zepp.replaceBookie(6, 6, 2, null, ensemble, addr4.toBookieId(), + excludedBookies); + BookieId replacedBookie = replaceResponse.getResult(); + assertEquals("replaced bookie", "/zone3/ud2", zepp.resolveNetworkLocation(replacedBookie)); + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.MEETS_SOFT, + replaceResponse.getAdheringToPolicy()); + } + + @Test + public void testAreAckedBookiesAdheringToPlacementPolicy() throws Exception { + zepp.uninitalize(); + updateMyUpgradeDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + // Update cluster + BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.6", 3181); + BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.7", 3181); + BookieSocketAddress addr7 = new BookieSocketAddress("127.0.0.8", 3181); + BookieSocketAddress addr8 = new BookieSocketAddress("127.0.0.9", 3181); + BookieSocketAddress addr9 = new BookieSocketAddress("127.0.0.10", 3181); + + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/zone2/ud1"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/zone3/ud1"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/zone1/ud2"); + StaticDNSResolver.addNodeToRack(addr5.getHostName(), "/zone2/ud2"); + StaticDNSResolver.addNodeToRack(addr6.getHostName(), "/zone3/ud2"); + StaticDNSResolver.addNodeToRack(addr7.getHostName(), "/zone1/ud3"); + StaticDNSResolver.addNodeToRack(addr8.getHostName(), "/zone2/ud3"); + StaticDNSResolver.addNodeToRack(addr9.getHostName(), "/zone3/ud3"); + + ClientConfiguration newConf = (ClientConfiguration) this.conf.clone(); + newConf.setDesiredNumZonesPerWriteQuorum(4); + newConf.setMinNumZonesPerWriteQuorum(2); + zepp = new ZoneawareEnsemblePlacementPolicy(); + zepp.initialize(newConf, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + zepp.withDefaultFaultDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + Set rwAddrs = new HashSet(); + Set roAddrs = new HashSet(); + rwAddrs.add(addr1.toBookieId()); + rwAddrs.add(addr2.toBookieId()); + rwAddrs.add(addr3.toBookieId()); + rwAddrs.add(addr4.toBookieId()); + rwAddrs.add(addr5.toBookieId()); + rwAddrs.add(addr6.toBookieId()); + rwAddrs.add(addr7.toBookieId()); + rwAddrs.add(addr8.toBookieId()); + rwAddrs.add(addr9.toBookieId()); + + zepp.onClusterChanged(rwAddrs, roAddrs); + Set ackedBookies = new HashSet(); + ackedBookies.add(addr1.toBookieId()); + ackedBookies.add(addr4.toBookieId()); + assertFalse("since both the bookies are in the same zone, it should return false", + zepp.areAckedBookiesAdheringToPlacementPolicy(ackedBookies, 10, 2)); + ackedBookies.clear(); + ackedBookies.add(addr1.toBookieId()); + ackedBookies.add(addr2.toBookieId()); + assertFalse("since ackQuorumSize is 3, it should return false", + zepp.areAckedBookiesAdheringToPlacementPolicy(ackedBookies, 10, 3)); + assertTrue("since ackQuorumSize is 2 and bookies are from minNumZonesPerWriteQuorum it should return true", + zepp.areAckedBookiesAdheringToPlacementPolicy(ackedBookies, 10, 2)); + + zepp.uninitalize(); + newConf = (ClientConfiguration) this.conf.clone(); + newConf.setDesiredNumZonesPerWriteQuorum(4); + newConf.setMinNumZonesPerWriteQuorum(4); + zepp = new ZoneawareEnsemblePlacementPolicy(); + zepp.initialize(newConf, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + zepp.withDefaultFaultDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + zepp.onClusterChanged(rwAddrs, roAddrs); + ackedBookies.clear(); + ackedBookies.add(addr1.toBookieId()); + ackedBookies.add(addr2.toBookieId()); + ackedBookies.add(addr3.toBookieId()); + assertFalse("since minNumZonesPerWriteQuorum is set to 4, it should return false", + zepp.areAckedBookiesAdheringToPlacementPolicy(ackedBookies, 4, 3)); + assertTrue("since writeQuorumSize is set to 3, it should return true", + zepp.areAckedBookiesAdheringToPlacementPolicy(ackedBookies, 3, 3)); + ackedBookies.clear(); + ackedBookies.add(addr1.toBookieId()); + ackedBookies.add(addr2.toBookieId()); + ackedBookies.add(addr4.toBookieId()); + assertFalse("since bookies are in just 2 zones but not in 3 zones, it should return false", + zepp.areAckedBookiesAdheringToPlacementPolicy(ackedBookies, 3, 3)); + } + + @Test + public void testWeightedPlacement() throws Exception { + zepp.uninitalize(); + updateMyUpgradeDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + // Update cluster + BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.6", 3181); + + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/zone1/ud2"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/zone2/ud1"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/zone2/ud2"); + StaticDNSResolver.addNodeToRack(addr5.getHostName(), NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + // Update cluster + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + addrs.add(addr5.toBookieId()); + + int multiple = 10; + + ClientConfiguration newConf = new ClientConfiguration(conf); + newConf.addConfiguration(conf); + newConf.setDiskWeightBasedPlacementEnabled(true); + /* + * since BookieMaxWeightMultipleForWeightBasedPlacement is set to -1, + * there is no max cap on weight. + */ + newConf.setBookieMaxWeightMultipleForWeightBasedPlacement(-1); + newConf.setMinNumZonesPerWriteQuorum(0); + zepp.initialize(newConf, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + zepp.withDefaultFaultDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + zepp.onClusterChanged(addrs, new HashSet()); + Map bookieInfoMap = new HashMap(); + bookieInfoMap.put(addr1.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr2.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr3.toBookieId(), new BookieInfo(100L, 100L)); + bookieInfoMap.put(addr4.toBookieId(), new BookieInfo(multiple * 100L, multiple * 100L)); + bookieInfoMap.put(addr5.toBookieId(), new BookieInfo(100L, 100L)); + zepp.updateBookieInfo(bookieInfoMap); + + Map selectionCounts = new HashMap(); + int numTries = 50000; + EnsemblePlacementPolicy.PlacementResult> newEnsembleResponse; + List newEnsemble; + for (BookieId addr : addrs) { + selectionCounts.put(addr, (long) 0); + } + for (int i = 0; i < numTries; i++) { + // new ensemble response + newEnsembleResponse = zepp.newEnsemble(1, 1, 1, null, new HashSet()); + newEnsemble = newEnsembleResponse.getResult(); + selectionCounts.put(newEnsemble.get(0), selectionCounts.get(newEnsemble.get(0)) + 1); + } + double observedMultiple = ((double) selectionCounts.get(addr4.toBookieId()) + / (double) selectionCounts.get(addr3.toBookieId())); + /* + * since there is no cap on maxWeight, observedMultiple should be + * roughly equal to multiple + */ + assertTrue("Weights not being honored " + observedMultiple, Math.abs(observedMultiple - multiple) < 1); + + selectionCounts.clear(); + selectionCounts.put(addr3.toBookieId(), (long) 0); + selectionCounts.put(addr4.toBookieId(), (long) 0); + newEnsemble = new ArrayList(); + newEnsemble.add(addr2.toBookieId()); + Set excludedBookies = new HashSet(); + excludedBookies.add(addr1.toBookieId()); + EnsemblePlacementPolicy.PlacementResult replacedBookieResponse; + BookieId replacedBookie; + for (int i = 0; i < numTries; i++) { + // replace bookie response + replacedBookieResponse = zepp.replaceBookie(1, 1, 1, null, newEnsemble, addr2.toBookieId(), + excludedBookies); + replacedBookie = replacedBookieResponse.getResult(); + /* + * only addr3 and addr4 are eligible for replacedBookie. + */ + assertTrue("replaced : " + replacedBookie, addr3.toBookieId().equals(replacedBookie) + || addr4.toBookieId().equals(replacedBookie)); + selectionCounts.put(replacedBookie, selectionCounts.get(replacedBookie) + 1); + } + observedMultiple = ((double) selectionCounts.get(addr4.toBookieId()) + / (double) selectionCounts.get(addr3.toBookieId())); + /* + * since there is no cap on maxWeight, observedMultiple should be + * roughly equal to multiple + */ + assertTrue("Weights not being honored " + observedMultiple, Math.abs(observedMultiple - multiple) < 1); + } + + @Test + public void testPlacementOnStabilizeNetworkTopology() throws Exception { + zepp.uninitalize(); + updateMyUpgradeDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/zone2/ud1"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/zone3/ud1"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/zone4/ud1"); + + zepp = new ZoneawareEnsemblePlacementPolicy(); + ClientConfiguration confLocal = new ClientConfiguration(); + confLocal.addConfiguration(conf); + confLocal.setNetworkTopologyStabilizePeriodSeconds(99999); + zepp.initialize(confLocal, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + zepp.withDefaultFaultDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + Set addrs = new HashSet(); + addrs.add(addr1.toBookieId()); + addrs.add(addr2.toBookieId()); + addrs.add(addr3.toBookieId()); + addrs.add(addr4.toBookieId()); + zepp.onClusterChanged(addrs, new HashSet()); + // addr4 left + addrs.remove(addr4.toBookieId()); + Set deadBookies = zepp.onClusterChanged(addrs, new HashSet()); + assertTrue(deadBookies.isEmpty()); + + // we will never use addr4 even it is in the stabilized network topology + for (int i = 0; i < 5; i++) { + EnsemblePlacementPolicy.PlacementResult> ensembleResponse = zepp.newEnsemble(3, 3, + 2, null, new HashSet()); + List ensemble = ensembleResponse.getResult(); + assertFalse(ensemble.contains(addr4.toBookieId())); + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.MEETS_STRICT, + ensembleResponse.getAdheringToPolicy()); + } + + // we could still use addr4 for urgent allocation if it is just bookie + // flapping + EnsemblePlacementPolicy.PlacementResult> ensembleResponse = zepp.newEnsemble(4, 4, 2, + null, new HashSet()); + List ensemble = ensembleResponse.getResult(); + assertTrue(ensemble.contains(addr4.toBookieId())); + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.MEETS_STRICT, + ensembleResponse.getAdheringToPolicy()); + } + + @Test + public void testCreateNewEnsembleRandomly() throws Exception { + zepp.uninitalize(); + updateMyUpgradeDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + // Update cluster + BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.6", 3181); + + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + StaticDNSResolver.addNodeToRack(addr5.getHostName(), "/zone1/ud1"); + + zepp = new ZoneawareEnsemblePlacementPolicy(); + ClientConfiguration confLocal = new ClientConfiguration(); + confLocal.addConfiguration(conf); + confLocal.setEnforceStrictZoneawarePlacement(false); + confLocal.setMinNumZonesPerWriteQuorum(3); + confLocal.setDesiredNumZonesPerWriteQuorum(4); + zepp.initialize(confLocal, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + zepp.withDefaultFaultDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + Set rwAddrs = new HashSet(); + Set roAddrs = new HashSet(); + Set excludeBookies = new HashSet(); + rwAddrs.add(addr1.toBookieId()); + rwAddrs.add(addr2.toBookieId()); + rwAddrs.add(addr3.toBookieId()); + rwAddrs.add(addr4.toBookieId()); + rwAddrs.add(addr5.toBookieId()); + excludeBookies.add(addr5.toBookieId()); + zepp.onClusterChanged(rwAddrs, roAddrs); + /* + * if enforceStrictZoneawarePlacement is not enabled, then there is no + * restrictions on ensSize and writeQSize and also bookie belonging to + * DEFAULT_ZONE_AND_UPGRADEDOMAIN can be a candidate. + */ + PlacementResult> newEnsemblePlacementResult = zepp.newEnsemble(4, 3, 2, null, + excludeBookies); + Set newEnsembleSet = new HashSet( + newEnsemblePlacementResult.getResult()); + assertEquals("New ensemble should contain 4 rw bookies", 4, newEnsembleSet.size()); + assertFalse("excludeBookie should not be included in the ensemble", + newEnsembleSet.contains(addr5.toBookieId())); + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.FAIL, + newEnsemblePlacementResult.getAdheringToPolicy()); + + rwAddrs.remove(addr4.toBookieId()); + roAddrs.add(addr4.toBookieId()); + zepp.onClusterChanged(rwAddrs, roAddrs); + try { + /* + * since there is no bookie available, newEnsemble should fail. + */ + zepp.newEnsemble(4, 3, 2, null, excludeBookies); + fail("Creation of new ensemble randomly should fail because of not sufficient bookies"); + } catch (BKException.BKNotEnoughBookiesException bkne) { + // expected NotEnoughBookiesException + } + } + + @Test + public void testReplaceBookieRandomly() throws Exception { + zepp.uninitalize(); + updateMyUpgradeDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + // Update cluster + BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.6", 3181); + BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.7", 3181); + BookieSocketAddress addr7 = new BookieSocketAddress("127.0.0.8", 3181); + + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr5.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr6.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr7.getHostName(), NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + zepp = new ZoneawareEnsemblePlacementPolicy(); + ClientConfiguration confLocal = new ClientConfiguration(); + confLocal.addConfiguration(conf); + confLocal.setEnforceStrictZoneawarePlacement(false); + confLocal.setMinNumZonesPerWriteQuorum(3); + confLocal.setDesiredNumZonesPerWriteQuorum(4); + zepp.initialize(confLocal, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + zepp.withDefaultFaultDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + Set rwAddrs = new HashSet(); + Set roAddrs = new HashSet(); + Set excludeBookies = new HashSet(); + rwAddrs.add(addr1.toBookieId()); + rwAddrs.add(addr2.toBookieId()); + rwAddrs.add(addr3.toBookieId()); + rwAddrs.add(addr4.toBookieId()); + rwAddrs.add(addr5.toBookieId()); + rwAddrs.add(addr7.toBookieId()); + + roAddrs.add(addr6.toBookieId()); + excludeBookies.add(addr5.toBookieId()); + zepp.onClusterChanged(rwAddrs, roAddrs); + List ensembleList = new ArrayList(); + ensembleList.add(addr1.toBookieId()); + ensembleList.add(addr2.toBookieId()); + ensembleList.add(addr3.toBookieId()); + ensembleList.add(addr4.toBookieId()); + + PlacementResult replaceResponse = zepp.replaceBookie(4, 3, 2, null, ensembleList, addr3.toBookieId(), + excludeBookies); + BookieId replaceBookie = replaceResponse.getResult(); + /* + * if enforceStrictZoneawarePlacement is not enabled, then there is no + * restrictions on ensSize and writeQSize and also bookie belonging to + * DEFAULT_ZONE_AND_UPGRADEDOMAIN can be a candidate. + */ + assertEquals("ReplaceBookie candidate", addr7.toBookieId(), replaceBookie); + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.FAIL, + replaceResponse.getAdheringToPolicy()); + + rwAddrs.remove(addr7.toBookieId()); + excludeBookies.add(addr7.toBookieId()); + zepp.onClusterChanged(rwAddrs, roAddrs); + try { + /* + * since there is no bookie available, replaceBookie should fail. + */ + zepp.replaceBookie(4, 3, 2, null, ensembleList, addr3.toBookieId(), excludeBookies); + fail("ReplaceBookie should fail because of unavailable bookies"); + } catch (BKException.BKNotEnoughBookiesException bkne) { + // expected NotEnoughBookiesException + } + } + + @Test + public void testIsEnsembleAdheringToPlacementPolicy() throws Exception { + zepp.uninitalize(); + updateMyUpgradeDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + // Update cluster + BookieSocketAddress addr5 = new BookieSocketAddress("127.0.0.6", 3181); + BookieSocketAddress addr6 = new BookieSocketAddress("127.0.0.7", 3181); + BookieSocketAddress addr7 = new BookieSocketAddress("127.0.0.8", 3181); + BookieSocketAddress addr8 = new BookieSocketAddress("127.0.0.9", 3181); + BookieSocketAddress addr9 = new BookieSocketAddress("127.0.0.10", 3181); + BookieSocketAddress addr10 = new BookieSocketAddress("127.0.0.11", 3181); + + // update dns mapping + StaticDNSResolver.addNodeToRack(addr1.getHostName(), "/zone1/ud1"); + StaticDNSResolver.addNodeToRack(addr2.getHostName(), "/zone1/ud2"); + StaticDNSResolver.addNodeToRack(addr3.getHostName(), "/zone1/ud2"); + StaticDNSResolver.addNodeToRack(addr4.getHostName(), "/zone2/ud1"); + StaticDNSResolver.addNodeToRack(addr5.getHostName(), "/zone2/ud2"); + StaticDNSResolver.addNodeToRack(addr6.getHostName(), "/zone2/ud2"); + StaticDNSResolver.addNodeToRack(addr7.getHostName(), "/zone3/ud1"); + StaticDNSResolver.addNodeToRack(addr8.getHostName(), "/zone3/ud2"); + StaticDNSResolver.addNodeToRack(addr9.getHostName(), "/zone3/ud2"); + StaticDNSResolver.addNodeToRack(addr10.getHostName(), NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + zepp = new ZoneawareEnsemblePlacementPolicy(); + ClientConfiguration confLocal = new ClientConfiguration(); + confLocal.addConfiguration(conf); + confLocal.setEnforceStrictZoneawarePlacement(true); + confLocal.setMinNumZonesPerWriteQuorum(2); + confLocal.setDesiredNumZonesPerWriteQuorum(3); + zepp.initialize(confLocal, Optional. empty(), timer, DISABLE_ALL, + NullStatsLogger.INSTANCE, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + zepp.withDefaultFaultDomain(NetworkTopology.DEFAULT_ZONE_AND_UPGRADEDOMAIN); + + List emptyEnsmeble = new ArrayList<>(); + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.FAIL, + zepp.isEnsembleAdheringToPlacementPolicy(emptyEnsmeble, 3, 2)); + + List ensemble = new ArrayList(); + ensemble.add(addr1.toBookieId()); + ensemble.add(addr2.toBookieId()); + ensemble.add(addr3.toBookieId()); + // all bookies in same rack + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.FAIL, + zepp.isEnsembleAdheringToPlacementPolicy(ensemble, 3, 2)); + + ensemble.clear(); + ensemble.add(addr1.toBookieId()); + ensemble.add(addr2.toBookieId()); + ensemble.add(addr4.toBookieId()); + // bookies spread across minZones + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.MEETS_SOFT, + zepp.isEnsembleAdheringToPlacementPolicy(ensemble, 3, 2)); + + ensemble.clear(); + ensemble.add(addr1.toBookieId()); + ensemble.add(addr4.toBookieId()); + ensemble.add(addr7.toBookieId()); + // bookies spread across desirednumofzones + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.MEETS_STRICT, + zepp.isEnsembleAdheringToPlacementPolicy(ensemble, 3, 2)); + + ensemble.clear(); + ensemble.add(addr1.toBookieId()); + ensemble.add(addr4.toBookieId()); + // writeQuorum should be greater than minZones + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.FAIL, + zepp.isEnsembleAdheringToPlacementPolicy(ensemble, 2, 2)); + + ensemble.clear(); + ensemble.add(addr2.toBookieId()); + ensemble.add(addr3.toBookieId()); + ensemble.add(addr4.toBookieId()); + // bookies from zone1 (addr2 and addr3) are in same UD + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.FAIL, + zepp.isEnsembleAdheringToPlacementPolicy(ensemble, 3, 2)); + + ensemble.clear(); + ensemble.add(addr1.toBookieId()); + ensemble.add(addr4.toBookieId()); + ensemble.add(addr7.toBookieId()); + ensemble.add(addr10.toBookieId()); + // bookie from default faultdomain will cause PlacementPolicyAdherence + // to fail + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.FAIL, + zepp.isEnsembleAdheringToPlacementPolicy(ensemble, 4, 2)); + + ensemble.clear(); + ensemble.add(addr1.toBookieId()); + ensemble.add(addr4.toBookieId()); + ensemble.add(addr7.toBookieId()); + ensemble.add(addr8.toBookieId()); + ensemble.add(addr9.toBookieId()); + // bookies are spread across desired zones and bookie from same zone are + // spread across 2 UDs + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.MEETS_STRICT, + zepp.isEnsembleAdheringToPlacementPolicy(ensemble, 5, 2)); + + ensemble.clear(); + ensemble.add(addr1.toBookieId()); + ensemble.add(addr4.toBookieId()); + ensemble.add(addr7.toBookieId()); + ensemble.add(addr2.toBookieId()); + ensemble.add(addr8.toBookieId()); + ensemble.add(addr9.toBookieId()); + /* + * writeset of addr2, addr8 and addr9 fails, because addr8 and addr9 + * belong to z3u2 + */ + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.FAIL, + zepp.isEnsembleAdheringToPlacementPolicy(ensemble, 3, 2)); + + ensemble.clear(); + ensemble.add(addr1.toBookieId()); + ensemble.add(addr4.toBookieId()); + ensemble.add(addr9.toBookieId()); + ensemble.add(addr2.toBookieId()); + ensemble.add(addr8.toBookieId()); + ensemble.add(addr7.toBookieId()); + /* + * writeset of addr9, addr2 and addr8 fails, because addr8 and addr9 + * belong to z3u2 + */ + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.FAIL, + zepp.isEnsembleAdheringToPlacementPolicy(ensemble, 3, 2)); + + ensemble.clear(); + ensemble.add(addr1.toBookieId()); + ensemble.add(addr4.toBookieId()); + ensemble.add(addr9.toBookieId()); + ensemble.add(addr2.toBookieId()); + ensemble.add(addr7.toBookieId()); + ensemble.add(addr8.toBookieId()); + /* + * writeset of addr2, addr7 and addr8 just meets soft. + */ + assertEquals("PlacementPolicyAdherence", PlacementPolicyAdherence.MEETS_SOFT, + zepp.isEnsembleAdheringToPlacementPolicy(ensemble, 3, 2)); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/UpdateLedgerCmdTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/UpdateLedgerCmdTest.java index 6eca0de22f1..e9c2fb69a52 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/UpdateLedgerCmdTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/UpdateLedgerCmdTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -29,12 +29,12 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; - -import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.BookieImpl; import org.apache.bookkeeper.bookie.BookieShell; import org.apache.bookkeeper.client.AsyncCallback.AddCallback; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.apache.zookeeper.KeeperException; @@ -53,6 +53,7 @@ public class UpdateLedgerCmdTest extends BookKeeperClusterTestCase { public UpdateLedgerCmdTest() { super(3); + useUUIDasBookieId = false; baseConf.setGcWaitTime(100000); } @@ -71,18 +72,43 @@ public void testUpdateLedgersToHostname() throws Exception { } String[] argv = new String[] { "updateledgers", "-b", "hostname", "-v", "true", "-p", "2" }; - final ServerConfiguration conf = bsConfs.get(0); + final ServerConfiguration conf = confByIndex(0); conf.setUseHostNameAsBookieID(true); - BookieSocketAddress toBookieId = Bookie.getBookieAddress(conf); - BookieSocketAddress toBookieAddr = new BookieSocketAddress(toBookieId.getHostName() + ":" - + conf.getBookiePort()); - + BookieSocketAddress toBookieId = BookieImpl.getBookieAddress(conf); + BookieId toBookieAddr = new BookieSocketAddress(toBookieId.getHostName() + ":" + + conf.getBookiePort()).toBookieId(); updateLedgerCmd(argv, 0, conf); int updatedLedgersCount = getUpdatedLedgersCount(bk, ledgers, toBookieAddr); assertEquals("Failed to update the ledger metadata to use bookie host name", 40, updatedLedgersCount); } + /** + * replace bookie address in ledger. + */ + @Test + public void testUpdateBookieInLedger() throws Exception { + BookKeeper bk = new BookKeeper(baseClientConf, zkc); + LOG.info("Create ledger and add entries to it"); + List ledgers = new ArrayList(); + LedgerHandle lh1 = createLedgerWithEntries(bk, 0); + ledgers.add(lh1); + for (int i = 1; i < 40; i++) { + ledgers.add(createLedgerWithEntries(bk, 0)); + } + BookieId srcBookie = getBookie(0); + BookieId destBookie = new BookieSocketAddress("1.1.1.1", 2181).toBookieId(); + String[] argv = new String[] { "updateBookieInLedger", "-sb", srcBookie.toString(), "-db", + destBookie.toString(), "-v", "true", "-p", "2" }; + final ServerConfiguration conf = confByIndex(0); + killBookie(0); + updateLedgerCmd(argv, 0, conf); + int updatedLedgersCount = getUpdatedLedgersCount(bk, ledgers, srcBookie); + assertEquals("Failed to update the ledger metadata with new bookie-address", 0, updatedLedgersCount); + updatedLedgersCount = getUpdatedLedgersCount(bk, ledgers, destBookie); + assertEquals("Failed to update the ledger metadata with new bookie-address", 40, updatedLedgersCount); + } + private void updateLedgerCmd(String[] argv, int exitCode, ServerConfiguration conf) throws KeeperException, InterruptedException, IOException, UnknownHostException, Exception { LOG.info("Perform updateledgers command"); @@ -92,17 +118,14 @@ private void updateLedgerCmd(String[] argv, int exitCode, ServerConfiguration co assertEquals("Failed to return exit code!", exitCode, bkShell.run(argv)); } - private int getUpdatedLedgersCount(BookKeeper bk, List ledgers, BookieSocketAddress toBookieAddr) + private int getUpdatedLedgersCount(BookKeeper bk, List ledgers, BookieId toBookieAddr) throws InterruptedException, BKException { - List ensemble; + List ensemble; int updatedLedgersCount = 0; for (LedgerHandle lh : ledgers) { - // ledger#close() would hit BadVersion exception as rename - // increments cversion. But LedgerMetadata#isConflictWith() - // gracefully handles this conflicts. lh.close(); LedgerHandle openLedger = bk.openLedger(lh.getId(), digestType, PASSWORD.getBytes()); - ensemble = openLedger.getLedgerMetadata().getEnsemble(0); + ensemble = openLedger.getLedgerMetadata().getEnsembleAt(0); if (ensemble.contains(toBookieAddr)) { updatedLedgersCount++; } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/UpdateLedgerOpTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/UpdateLedgerOpTest.java index 91628e7e8d7..d024ad21d99 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/UpdateLedgerOpTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/UpdateLedgerOpTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -30,16 +30,17 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; - -import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.BookieImpl; import org.apache.bookkeeper.bookie.BookieShell.UpdateLedgerNotifier; import org.apache.bookkeeper.client.AsyncCallback.AddCallback; import org.apache.bookkeeper.client.BookKeeper.DigestType; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.proto.BookieServer; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; -import org.apache.bookkeeper.util.MathUtils; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -49,12 +50,13 @@ */ public class UpdateLedgerOpTest extends BookKeeperClusterTestCase { private static final Logger LOG = LoggerFactory.getLogger(UpdateLedgerOpTest.class); - private DigestType digestType = DigestType.CRC32; + private final DigestType digestType = DigestType.CRC32; private static final String PASSWORD = "testPasswd"; private static final int printprogress = 5; public UpdateLedgerOpTest() { super(3); + useUUIDasBookieId = false; baseConf.setGcWaitTime(100000); } @@ -88,7 +90,7 @@ public void testManyLedgersWithShortHostname() throws Exception { public void testManyLedgers(boolean useShortHostName) throws Exception { try (BookKeeper bk = new BookKeeper(baseClientConf, zkc); - BookKeeperAdmin bkadmin = new BookKeeperAdmin(bk)) { + BookKeeperAdmin bkadmin = new BookKeeperAdmin(bk, baseClientConf)) { LOG.info("Create ledger and add entries to it"); List ledgers = new ArrayList(); @@ -98,28 +100,26 @@ public void testManyLedgers(boolean useShortHostName) throws Exception { ledgers.add(createLedgerWithEntries(bk, 0)); } - List ensemble = lh1.getLedgerMetadata().getEnsemble(0); + List ensemble = lh1.getLedgerMetadata().getEnsembleAt(0); - BookieSocketAddress curBookieAddr = ensemble.get(0); + BookieSocketAddress curBookieAddr = bk.getBookieAddressResolver().resolve(ensemble.get(0)); baseConf.setUseHostNameAsBookieID(true); baseConf.setUseShortHostName(useShortHostName); - BookieSocketAddress curBookieId = Bookie.getBookieAddress(baseConf); - BookieSocketAddress toBookieAddr = new BookieSocketAddress(curBookieId.getHostName() + ":" - + curBookieAddr.getPort()); + BookieSocketAddress curBookieId = BookieImpl.getBookieAddress(baseConf); + BookieId toBookieAddr = new BookieSocketAddress(curBookieId.getHostName() + ":" + + curBookieAddr.getPort()).toBookieId(); UpdateLedgerOp updateLedgerOp = new UpdateLedgerOp(bk, bkadmin); - updateLedgerOp.updateBookieIdInLedgers(curBookieAddr, toBookieAddr, 5, Integer.MIN_VALUE, progressable); + updateLedgerOp.updateBookieIdInLedgers(curBookieAddr.toBookieId(), toBookieAddr, + 5, 25, Integer.MIN_VALUE, progressable); for (LedgerHandle lh : ledgers) { - // ledger#close() would hit BadVersion exception as rename - // increments cversion. But LedgerMetadata#isConflictWith() - // gracefully handles this conflicts. lh.close(); LedgerHandle openLedger = bk.openLedger(lh.getId(), digestType, PASSWORD.getBytes()); - ensemble = openLedger.getLedgerMetadata().getEnsemble(0); + ensemble = openLedger.getLedgerMetadata().getEnsembleAt(0); assertTrue("Failed to update the ledger metadata to use bookie host name", ensemble.contains(toBookieAddr)); assertFalse("Failed to update the ledger metadata to use bookie host name", - ensemble.contains(curBookieAddr)); + ensemble.contains(curBookieAddr.toBookieId())); } } } @@ -130,7 +130,7 @@ public void testManyLedgers(boolean useShortHostName) throws Exception { @Test public void testLimitLessThanTotalLedgers() throws Exception { try (BookKeeper bk = new BookKeeper(baseClientConf, zkc); - BookKeeperAdmin bkadmin = new BookKeeperAdmin(bk)) { + BookKeeperAdmin bkadmin = new BookKeeperAdmin(bk, baseClientConf)) { LOG.info("Create ledger and add entries to it"); List ledgers = new ArrayList(); @@ -140,30 +140,31 @@ public void testLimitLessThanTotalLedgers() throws Exception { ledgers.add(createLedgerWithEntries(bk, 0)); } - List ensemble = lh1.getLedgerMetadata().getEnsemble(0); + List ensemble = lh1.getLedgerMetadata().getEnsembleAt(0); - BookieSocketAddress curBookieAddr = ensemble.get(0); + BookieId curBookieAddr = ensemble.get(0); baseConf.setUseHostNameAsBookieID(true); - BookieSocketAddress toBookieId = Bookie.getBookieAddress(baseConf); - BookieSocketAddress toBookieAddr = new BookieSocketAddress(toBookieId.getHostName() + ":" - + curBookieAddr.getPort()); + + BookieSocketAddress toBookieId = BookieImpl.getBookieAddress(baseConf); + BookieId toBookieAddr = new BookieSocketAddress(toBookieId.getHostName() + ":" + + bk.getBookieAddressResolver().resolve(curBookieAddr).getPort()).toBookieId(); UpdateLedgerOp updateLedgerOp = new UpdateLedgerOp(bk, bkadmin); - updateLedgerOp.updateBookieIdInLedgers(curBookieAddr, toBookieAddr, 7, 4, progressable); + updateLedgerOp.updateBookieIdInLedgers(curBookieAddr, toBookieAddr, 7, 35, 4, progressable); int updatedLedgersCount = getUpdatedLedgersCount(bk, ledgers, toBookieAddr); assertEquals("Failed to update the ledger metadata to use bookie host name", 4, updatedLedgersCount); // next execution - updateLedgerOp.updateBookieIdInLedgers(curBookieAddr, toBookieAddr, 2, 10, progressable); + updateLedgerOp.updateBookieIdInLedgers(curBookieAddr, toBookieAddr, 2, 10, 10, progressable); updatedLedgersCount = getUpdatedLedgersCount(bk, ledgers, toBookieAddr); assertEquals("Failed to update the ledger metadata to use bookie host name", 10, updatedLedgersCount); // no ledgers - updateLedgerOp.updateBookieIdInLedgers(curBookieAddr, toBookieAddr, 3, 20, progressable); + updateLedgerOp.updateBookieIdInLedgers(curBookieAddr, toBookieAddr, 3, 15, 20, progressable); updatedLedgersCount = getUpdatedLedgersCount(bk, ledgers, toBookieAddr); assertEquals("Failed to update the ledger metadata to use bookie host name", 10, updatedLedgersCount); // no ledgers - updateLedgerOp.updateBookieIdInLedgers(curBookieAddr, toBookieAddr, 3, Integer.MIN_VALUE, progressable); + updateLedgerOp.updateBookieIdInLedgers(curBookieAddr, toBookieAddr, 3, 15, Integer.MIN_VALUE, progressable); updatedLedgersCount = getUpdatedLedgersCount(bk, ledgers, toBookieAddr); assertEquals("Failed to update the ledger metadata to use bookie host name", 10, updatedLedgersCount); } @@ -190,37 +191,34 @@ public void testChangeEnsembleAfterRenamingToShortHostname() throws Exception { public void testChangeEnsembleAfterRenaming(boolean useShortHostName) throws Exception { try (BookKeeper bk = new BookKeeper(baseClientConf, zkc); - BookKeeperAdmin bkadmin = new BookKeeperAdmin(bk)) { + BookKeeperAdmin bkadmin = new BookKeeperAdmin(bk, baseClientConf)) { LOG.info("Create ledger and add entries to it"); LedgerHandle lh = createLedgerWithEntries(bk, 100); - BookieServer bookieServer = bs.get(0); - List ensemble = lh.getLedgerMetadata().getEnsemble(0); + BookieServer bookieServer = serverByIndex(0); + List ensemble = lh.getLedgerMetadata().getEnsembleAt(0); BookieSocketAddress curBookieAddr = null; - for (BookieSocketAddress bookieSocketAddress : ensemble) { - if (bookieServer.getLocalAddress().equals(bookieSocketAddress)) { - curBookieAddr = bookieSocketAddress; + for (BookieId bookieSocketAddress : ensemble) { + BookieSocketAddress resolved = bk.getBookieAddressResolver().resolve(bookieSocketAddress); + if (bookieServer.getLocalAddress().equals(resolved)) { + curBookieAddr = resolved; } } assertNotNull("Couldn't find the bookie in ledger metadata!", curBookieAddr); baseConf.setUseHostNameAsBookieID(true); baseConf.setUseShortHostName(useShortHostName); - BookieSocketAddress toBookieId = Bookie.getBookieAddress(baseConf); - BookieSocketAddress toBookieAddr = new BookieSocketAddress(toBookieId.getHostName() + ":" - + curBookieAddr.getPort()); + BookieSocketAddress toBookieId = BookieImpl.getBookieAddress(baseConf); + BookieId toBookieAddr = new BookieSocketAddress(toBookieId.getHostName() + ":" + + curBookieAddr.getPort()).toBookieId(); UpdateLedgerOp updateLedgerOp = new UpdateLedgerOp(bk, bkadmin); - updateLedgerOp.updateBookieIdInLedgers(curBookieAddr, toBookieAddr, 5, 100, progressable); + updateLedgerOp.updateBookieIdInLedgers(curBookieAddr.toBookieId(), toBookieAddr, 5, 25, 100, progressable); bookieServer.shutdown(); ServerConfiguration serverConf1 = newServerConfiguration(); - bsConfs.add(serverConf1); - bs.add(startBookie(serverConf1)); + startAndAddBookie(serverConf1); - // ledger#asyncAddEntry() would hit BadVersion exception as rename incr - // cversion. But LedgerMetadata#isConflictWith() gracefully handles - // this conflicts. final CountDownLatch latch = new CountDownLatch(1); final AtomicInteger rc = new AtomicInteger(BKException.Code.OK); lh.asyncAddEntry("foobar".getBytes(), new AddCallback() { @@ -239,8 +237,8 @@ public void addComplete(int rccb, LedgerHandle lh, long entryId, Object ctx) { lh.close(); LedgerHandle openLedger = bk.openLedger(lh.getId(), digestType, PASSWORD.getBytes()); final LedgerMetadata ledgerMetadata = openLedger.getLedgerMetadata(); - assertEquals("Failed to reform ensemble!", 2, ledgerMetadata.getEnsembles().size()); - ensemble = ledgerMetadata.getEnsemble(0); + assertEquals("Failed to reform ensemble!", 2, ledgerMetadata.getAllEnsembles().size()); + ensemble = ledgerMetadata.getEnsembleAt(0); assertTrue("Failed to update the ledger metadata to use bookie host name", ensemble.contains(toBookieAddr)); } @@ -253,7 +251,7 @@ public void addComplete(int rccb, LedgerHandle lh, long entryId, Object ctx) { @Test public void testRenameWhenAddEntryInProgress() throws Exception { try (final BookKeeper bk = new BookKeeper(baseClientConf, zkc); - BookKeeperAdmin bkadmin = new BookKeeperAdmin(bk)) { + BookKeeperAdmin bkadmin = new BookKeeperAdmin(bk, baseClientConf)) { LOG.info("Create ledger and add entries to it"); final int numOfEntries = 5000; @@ -279,11 +277,11 @@ public void addComplete(int rccb, LedgerHandle lh, long entryId, Object ctx) { } }; th.start(); - List ensemble = lh.getLedgerMetadata().getEnsemble(0); - BookieSocketAddress curBookieAddr = ensemble.get(0); - BookieSocketAddress toBookieAddr = new BookieSocketAddress("localhost:" + curBookieAddr.getPort()); + List ensemble = lh.getLedgerMetadata().getEnsembleAt(0); + BookieSocketAddress curBookieAddr = bk.getBookieAddressResolver().resolve(ensemble.get(0)); + BookieId toBookieAddr = BookieId.parse("localhost:" + curBookieAddr.getPort()); UpdateLedgerOp updateLedgerOp = new UpdateLedgerOp(bk, bkadmin); - updateLedgerOp.updateBookieIdInLedgers(curBookieAddr, toBookieAddr, 5, 100, progressable); + updateLedgerOp.updateBookieIdInLedgers(curBookieAddr.toBookieId(), toBookieAddr, 5, 25, 100, progressable); if (!latch.await(120, TimeUnit.SECONDS)) { throw new Exception("Entries took too long to add"); @@ -293,23 +291,20 @@ public void addComplete(int rccb, LedgerHandle lh, long entryId, Object ctx) { } lh.close(); LedgerHandle openLedger = bk.openLedger(lh.getId(), digestType, PASSWORD.getBytes()); - ensemble = openLedger.getLedgerMetadata().getEnsemble(0); + ensemble = openLedger.getLedgerMetadata().getEnsembleAt(0); assertTrue("Failed to update the ledger metadata to use bookie host name", ensemble.contains(toBookieAddr)); } } - private int getUpdatedLedgersCount(BookKeeper bk, List ledgers, BookieSocketAddress toBookieAddr) + private int getUpdatedLedgersCount(BookKeeper bk, List ledgers, BookieId toBookieAddr) throws InterruptedException, BKException { - List ensemble; + List ensemble; int updatedLedgersCount = 0; for (LedgerHandle lh : ledgers) { - // ledger#close() would hit BadVersion exception as rename - // increments cversion. But LedgerMetadata#isConflictWith() - // gracefully handles this conflicts. lh.close(); LedgerHandle openLedger = bk.openLedger(lh.getId(), digestType, PASSWORD.getBytes()); - ensemble = openLedger.getLedgerMetadata().getEnsemble(0); + ensemble = openLedger.getLedgerMetadata().getEnsembleAt(0); if (ensemble.contains(toBookieAddr)) { updatedLedgersCount++; } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/BookKeeperApiTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/BookKeeperApiTest.java index 5429c0dba25..5cd71247a9d 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/BookKeeperApiTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/BookKeeperApiTest.java @@ -20,7 +20,7 @@ */ package org.apache.bookkeeper.client.api; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.bookkeeper.common.concurrent.FutureUtils.result; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.hasItem; @@ -41,7 +41,7 @@ import org.apache.bookkeeper.client.BKException.BKDigestMatchException; import org.apache.bookkeeper.client.BKException.BKDuplicateEntryIdException; import org.apache.bookkeeper.client.BKException.BKLedgerFencedException; -import org.apache.bookkeeper.client.BKException.BKNoSuchLedgerExistsException; +import org.apache.bookkeeper.client.BKException.BKNoSuchLedgerExistsOnMetadataServerException; import org.apache.bookkeeper.client.BKException.BKUnauthorizedAccessException; import org.apache.bookkeeper.client.MockBookKeeperTestCase; import org.apache.bookkeeper.conf.ClientConfiguration; @@ -339,7 +339,7 @@ public void testOpenLedgerWithRecovery() throws Exception { } } - @Test(expected = BKNoSuchLedgerExistsException.class) + @Test(expected = BKNoSuchLedgerExistsOnMetadataServerException.class) public void testDeleteLedger() throws Exception { long lId; @@ -358,7 +358,7 @@ public void testDeleteLedger() throws Exception { .execute()); } - @Test(expected = BKNoSuchLedgerExistsException.class) + @Test(expected = BKNoSuchLedgerExistsOnMetadataServerException.class) public void testCannotDeleteLedgerTwice() throws Exception { long lId; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/BookKeeperBuildersOpenLedgerTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/BookKeeperBuildersOpenLedgerTest.java new file mode 100644 index 00000000000..00e14fe2c12 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/BookKeeperBuildersOpenLedgerTest.java @@ -0,0 +1,167 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.client.api; + +import static org.apache.bookkeeper.common.concurrent.FutureUtils.result; +import static org.junit.Assert.fail; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyInt; +import static org.mockito.Mockito.anyLong; +import static org.mockito.Mockito.doAnswer; + +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeper; +import org.apache.bookkeeper.client.LedgerMetadataBuilder; +import org.apache.bookkeeper.client.MockBookKeeperTestCase; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.BookieProtocol; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +/** + * Tests for BookKeeper open ledger operations. + */ +@RunWith(Parameterized.class) +public class BookKeeperBuildersOpenLedgerTest extends MockBookKeeperTestCase { + + private static final int ensembleSize = 3; + private static final int writeQuorumSize = 2; + private static final int ackQuorumSize = 1; + private static final long ledgerId = 12342L; + private static final Map customMetadata = new HashMap<>(); + private static final byte[] password = new byte[3]; + private static final byte[] entryData = new byte[32]; + + private boolean withRecovery; + + public BookKeeperBuildersOpenLedgerTest(boolean withRecovery) { + this.withRecovery = withRecovery; + } + + @Parameterized.Parameters(name = "withRecovery:({0})") + public static Collection data() { + return Arrays.asList(new Object[][]{ + {true}, + {false} + }); + } + + @Test + public void testOpenLedger() throws Exception { + LedgerMetadata ledgerMetadata = generateLedgerMetadata(ensembleSize, + writeQuorumSize, ackQuorumSize, password, customMetadata); + registerMockLedgerMetadata(ledgerId, ledgerMetadata); + + ledgerMetadata.getAllEnsembles().values().forEach(bookieAddressList -> { + bookieAddressList.forEach(bookieAddress -> { + registerMockEntryForRead(ledgerId, BookieProtocol.LAST_ADD_CONFIRMED, bookieAddress, entryData, -1); + registerMockEntryForRead(ledgerId, 0, bookieAddress, entryData, -1); + }); + }); + + result(newOpenLedgerOp() + .withPassword(ledgerMetadata.getPassword()) + .withDigestType(DigestType.CRC32) + .withLedgerId(ledgerId) + .withRecovery(withRecovery) + .execute()); + } + + @Test + public void testOpenLedgerWithTimeoutEx() throws Exception { + mockReadEntryTimeout(); + LedgerMetadata ledgerMetadata = generateLedgerMetadata(ensembleSize, + writeQuorumSize, ackQuorumSize, password, customMetadata); + registerMockLedgerMetadata(ledgerId, ledgerMetadata); + ledgerMetadata.getAllEnsembles().values().forEach(bookieAddressList -> { + bookieAddressList.forEach(bookieAddress -> { + registerMockEntryForRead(ledgerId, BookieProtocol.LAST_ADD_CONFIRMED, bookieAddress, entryData, -1); + registerMockEntryForRead(ledgerId, 0, bookieAddress, entryData, -1); + }); + }); + try { + result(newOpenLedgerOp() + .withPassword(ledgerMetadata.getPassword()) + .withDigestType(DigestType.CRC32) + .withLedgerId(ledgerId) + .withRecovery(withRecovery) + .execute()); + fail("Expect timeout error"); + } catch (BKException.BKTimeoutException timeoutException) { + // Expect timeout error. + } + // Reset bk client. + resetBKClient(); + } + + protected LedgerMetadata generateLedgerMetadata(int ensembleSize, + int writeQuorumSize, int ackQuorumSize, byte[] password, + Map customMetadata) throws BKException.BKNotEnoughBookiesException { + return LedgerMetadataBuilder.create() + .withId(12L) + .withEnsembleSize(ensembleSize) + .withWriteQuorumSize(writeQuorumSize) + .withAckQuorumSize(ackQuorumSize) + .withPassword(password) + .withDigestType(BookKeeper.DigestType.CRC32.toApiDigestType()) + .withCustomMetadata(customMetadata) + .withCreationTime(System.currentTimeMillis()) + .newEnsembleEntry(0, generateNewEnsemble(ensembleSize)).build(); + } + + private void mockReadEntryTimeout() { + // Mock read entry. + doAnswer(invocation -> { + long ledgerId = (long) invocation.getArguments()[1]; + long entryId = (long) invocation.getArguments()[2]; + + BookkeeperInternalCallbacks.ReadEntryCallback callback = + (BookkeeperInternalCallbacks.ReadEntryCallback) invocation.getArguments()[3]; + Object ctx = invocation.getArguments()[4]; + callback.readEntryComplete(BKException.Code.TimeoutException, ledgerId, entryId, null, ctx); + return null; + }).when(bookieClient).readEntry(any(BookieId.class), + anyLong(), anyLong(), any(BookkeeperInternalCallbacks.ReadEntryCallback.class), + any(), anyInt(), any()); + // Mock read lac. + doAnswer(invocation -> { + long ledgerId = (long) invocation.getArguments()[1]; + BookkeeperInternalCallbacks.ReadLacCallback callback = + (BookkeeperInternalCallbacks.ReadLacCallback) invocation.getArguments()[2]; + Object ctx = invocation.getArguments()[3]; + callback.readLacComplete(BKException.Code.TimeoutException, ledgerId, null, null, ctx); + return null; + }).when(bookieClient).readLac(any(BookieId.class), + anyLong(), any(BookkeeperInternalCallbacks.ReadLacCallback.class), + any()); + } + + private void resetBKClient() throws Exception { + tearDown(); + setup(); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/BookKeeperBuildersTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/BookKeeperBuildersTest.java index 65ea441a86a..cc1d29c1522 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/BookKeeperBuildersTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/BookKeeperBuildersTest.java @@ -25,18 +25,19 @@ import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; + import java.util.EnumSet; import java.util.HashMap; import java.util.Map; +import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.client.BKException.BKClientClosedException; import org.apache.bookkeeper.client.BKException.BKIncorrectParameterException; -import org.apache.bookkeeper.client.BKException.BKNoSuchLedgerExistsException; +import org.apache.bookkeeper.client.BKException.BKNoSuchLedgerExistsOnMetadataServerException; import org.apache.bookkeeper.client.BookKeeper; import org.apache.bookkeeper.client.LedgerHandle; -import org.apache.bookkeeper.client.LedgerMetadata; +import org.apache.bookkeeper.client.LedgerMetadataBuilder; import org.apache.bookkeeper.client.MockBookKeeperTestCase; import org.apache.bookkeeper.conf.ClientConfiguration; -import org.apache.bookkeeper.proto.BookieProtocol; import org.junit.Test; /** @@ -168,7 +169,7 @@ public void testFailDigestTypeNullAndAutodetectionFalse() throws Exception { .withDigestType(null) .withPassword(password) .execute()); - fail("shoud not be able to create a ledger with such specs"); + fail("should not be able to create a ledger with such specs"); } @Test(expected = BKClientClosedException.class) @@ -177,7 +178,7 @@ public void testFailDigestTypeNullAndBookkKeeperClosed() throws Exception { result(newCreateLedgerOp() .withPassword(password) .execute()); - fail("shoud not be able to create a ledger, client is closed"); + fail("should not be able to create a ledger, client is closed"); } @Test @@ -304,15 +305,15 @@ public void testFailCreateAdvLedgerBadFixedLedgerIdNegative() throws Exception { .makeAdv() .withLedgerId(-2) .execute()); - fail("shoud not be able to create a ledger with such specs"); + fail("should not be able to create a ledger with such specs"); } - @Test(expected = BKNoSuchLedgerExistsException.class) + @Test(expected = BKNoSuchLedgerExistsOnMetadataServerException.class) public void testOpenLedgerNoId() throws Exception { result(newOpenLedgerOp().execute()); } - @Test(expected = BKNoSuchLedgerExistsException.class) + @Test(expected = BKNoSuchLedgerExistsOnMetadataServerException.class) public void testOpenLedgerBadId() throws Exception { result(newOpenLedgerOp() .withPassword(password) @@ -329,47 +330,6 @@ public void testOpenLedgerClientClosed() throws Exception { .execute()); } - @Test - public void testOpenLedgerNoRecovery() throws Exception { - LedgerMetadata ledgerMetadata = generateLedgerMetadata(ensembleSize, - writeQuorumSize, ackQuorumSize, password, customMetadata); - registerMockLedgerMetadata(ledgerId, ledgerMetadata); - - ledgerMetadata.getEnsembles().values().forEach(bookieAddressList -> { - bookieAddressList.forEach(bookieAddress -> { - registerMockEntryForRead(ledgerId, BookieProtocol.LAST_ADD_CONFIRMED, bookieAddress, entryData, -1); - registerMockEntryForRead(ledgerId, 0, bookieAddress, entryData, -1); - }); - }); - - result(newOpenLedgerOp() - .withPassword(ledgerMetadata.getPassword()) - .withDigestType(DigestType.CRC32) - .withLedgerId(ledgerId) - .withRecovery(false) - .execute()); - } - - @Test - public void testOpenLedgerRecovery() throws Exception { - LedgerMetadata ledgerMetadata = generateLedgerMetadata(ensembleSize, - writeQuorumSize, ackQuorumSize, password, customMetadata); - registerMockLedgerMetadata(ledgerId, ledgerMetadata); - - ledgerMetadata.getEnsembles().values().forEach(bookieAddressList -> { - bookieAddressList.forEach(bookieAddress -> { - registerMockEntryForRead(ledgerId, BookieProtocol.LAST_ADD_CONFIRMED, bookieAddress, entryData, -1); - registerMockEntryForRead(ledgerId, 0, bookieAddress, entryData, -1); - }); - }); - result(newOpenLedgerOp() - .withPassword(ledgerMetadata.getPassword()) - .withDigestType(DigestType.CRC32) - .withLedgerId(ledgerId) - .withRecovery(true) - .execute()); - } - @Test(expected = BKIncorrectParameterException.class) public void testDeleteLedgerNoLedgerId() throws Exception { result(newDeleteLedgerOp() @@ -404,17 +364,64 @@ public void testDeleteLedgerBookKeeperClosed() throws Exception { protected LedgerMetadata generateLedgerMetadata(int ensembleSize, int writeQuorumSize, int ackQuorumSize, byte[] password, - Map customMetadata) { - LedgerMetadata ledgerMetadata = new LedgerMetadata( - ensembleSize, - writeQuorumSize, - ackQuorumSize, - BookKeeper.DigestType.CRC32, - password, - customMetadata, - true); - ledgerMetadata.addEnsemble(0, generateNewEnsemble(ensembleSize)); - return ledgerMetadata; + Map customMetadata) throws BKException.BKNotEnoughBookiesException { + return LedgerMetadataBuilder.create() + .withId(12L) + .withEnsembleSize(ensembleSize) + .withWriteQuorumSize(writeQuorumSize) + .withAckQuorumSize(ackQuorumSize) + .withPassword(password) + .withDigestType(BookKeeper.DigestType.CRC32.toApiDigestType()) + .withCustomMetadata(customMetadata) + .withCreationTime(System.currentTimeMillis()) + .newEnsembleEntry(0, generateNewEnsemble(ensembleSize)).build(); + } + + @Test + public void testCreateLedgerWithOpportunisticStriping() throws Exception { + + maxNumberOfAvailableBookies = 4; + int bigEnsembleSize = 15; + int expectedWriteQuorumSize = 4; + + ClientConfiguration config = new ClientConfiguration(); + config.setOpportunisticStriping(true); + setBookKeeperConfig(config); + + setNewGeneratedLedgerId(ledgerId); + WriteHandle writer = newCreateLedgerOp() + .withAckQuorumSize(expectedWriteQuorumSize) + .withEnsembleSize(bigEnsembleSize) + .withWriteQuorumSize(expectedWriteQuorumSize) + .withCustomMetadata(customMetadata) + .withPassword(password) + .execute() + .get(); + assertEquals(ledgerId, writer.getId()); + LedgerMetadata metadata = getLedgerMetadata(ledgerId); + assertEquals(expectedWriteQuorumSize, metadata.getEnsembleSize()); + assertEquals(expectedWriteQuorumSize, metadata.getAckQuorumSize()); + assertEquals(expectedWriteQuorumSize, metadata.getWriteQuorumSize()); + assertArrayEquals(password, metadata.getPassword()); + + } + + @Test(expected = BKException.BKNotEnoughBookiesException.class) + public void testNotEnoughBookies() throws Exception { + + maxNumberOfAvailableBookies = 1; + ClientConfiguration config = new ClientConfiguration(); + config.setOpportunisticStriping(false); + setBookKeeperConfig(config); + + setNewGeneratedLedgerId(ledgerId); + result(newCreateLedgerOp() + .withAckQuorumSize(ackQuorumSize) + .withEnsembleSize(ensembleSize) + .withWriteQuorumSize(writeQuorumSize) + .withCustomMetadata(customMetadata) + .withPassword(password) + .execute()); } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/ExplicitLACWithWriteHandleAPITest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/ExplicitLACWithWriteHandleAPITest.java new file mode 100644 index 00000000000..9917149ce64 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/ExplicitLACWithWriteHandleAPITest.java @@ -0,0 +1,83 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.client.api; + +import java.util.concurrent.TimeUnit; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.apache.bookkeeper.util.TestUtils; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Tests about ExplicitLAC and {@link Handle} API. + */ +public class ExplicitLACWithWriteHandleAPITest extends BookKeeperClusterTestCase { + + private static final Logger LOG = LoggerFactory.getLogger(ExplicitLACWithWriteHandleAPITest.class); + + public ExplicitLACWithWriteHandleAPITest() { + super(1); + } + + @Test + public void testUseExplicitLAC() throws Exception { + ClientConfiguration conf = new ClientConfiguration(baseClientConf); + conf.setExplictLacInterval(1000); + try (BookKeeper bkc = BookKeeper + .newBuilder(conf) + .build();) { + try (WriteHandle writer = bkc.newCreateLedgerOp() + .withAckQuorumSize(1) + .withEnsembleSize(1) + .withPassword(new byte[0]) + .withWriteQuorumSize(1) + .execute() + .get();) { + writer.append("foo".getBytes("utf-8")); + writer.append("foo".getBytes("utf-8")); + writer.append("foo".getBytes("utf-8")); + long expectedLastAddConfirmed = writer.append("foo".getBytes("utf-8")); + + // since BK 4.12.0 the reader automatically uses ExplicitLAC + try (ReadHandle r = bkc.newOpenLedgerOp() + .withRecovery(false) + .withPassword(new byte[0]) + .withLedgerId(writer.getId()) + .execute() + .get()) { + TestUtils.assertEventuallyTrue("ExplicitLAC did not ork", () -> { + try { + long value = r.readLastAddConfirmed(); + LOG.info("current value " + value + " vs " + expectedLastAddConfirmed); + return value == expectedLastAddConfirmed; + } catch (Exception ex) { + throw new RuntimeException(ex); + } + }, 30, TimeUnit.SECONDS); + } + + } + + } + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/LedgerMetadataTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/LedgerMetadataTest.java new file mode 100644 index 00000000000..7d7f6c19afc --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/LedgerMetadataTest.java @@ -0,0 +1,193 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.client.api; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +import java.util.Iterator; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.junit.Test; + +/** + * Bookkeeper Client API ledger metadata and ledgers listing test. + */ +public class LedgerMetadataTest extends BookKeeperClusterTestCase { + + public LedgerMetadataTest() { + super(3); + } + + @Test + public void testGetLedgerMetadata() + throws Exception { + + ClientConfiguration conf = new ClientConfiguration(); + conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + try (BookKeeper bkc = BookKeeper.newBuilder(conf).build();) { + long ledgerId; + try (WriteHandle l = bkc + .newCreateLedgerOp() + .withDigestType(DigestType.CRC32) + .withPassword("testPasswd".getBytes()) + .execute() + .get();) { + ledgerId = l.getId(); + } + + LedgerMetadata metadata = FutureUtils.result(bkc.getLedgerMetadata(ledgerId)); + assertEquals(ledgerId, metadata.getLedgerId()); + assertEquals(3, metadata.getEnsembleSize()); + assertEquals(2, metadata.getAckQuorumSize()); + assertEquals(2, metadata.getWriteQuorumSize()); + assertArrayEquals("testPasswd".getBytes(), metadata.getPassword()); + } + + } + + @Test + public void testListLedgers() + throws Exception { + int numOfLedgers = 10; + + ClientConfiguration conf = new ClientConfiguration(); + conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + + try (BookKeeper bkc = BookKeeper.newBuilder(conf).build();) { + long[] ledgerIds = new long[numOfLedgers]; + for (int i = 0; i < numOfLedgers; i++) { + + try (WriteHandle l = bkc + .newCreateLedgerOp() + .withDigestType(DigestType.CRC32) + .withPassword("testPasswd".getBytes()) + .execute() + .get();) { + ledgerIds[i] = l.getId(); + } + } + + try (ListLedgersResult result = FutureUtils.result(bkc.newListLedgersOp().execute());) { + int count = 0; + + for (long ledgerId : result.toIterable()) { + assertEquals(ledgerIds[count++], ledgerId); + } + + assertEquals("Unexpected ledgers count", numOfLedgers, count); + try { + result.iterator(); + fail("Should thrown error"); + } catch (IllegalStateException e) { + // ok + } + try { + result.toIterable(); + fail("Should thrown error"); + } catch (IllegalStateException e) { + // ok + } + } + + try (ListLedgersResult result = FutureUtils.result(bkc.newListLedgersOp().execute());) { + int count = 0; + + for (LedgersIterator iterator = result.iterator(); iterator.hasNext();) { + long ledgerId = iterator.next(); + assertEquals(ledgerIds[count++], ledgerId); + + } + assertEquals("Unexpected ledgers count", numOfLedgers, count); + try { + result.iterator(); + fail("Should thrown error"); + } catch (IllegalStateException e) { + // ok + } + try { + result.toIterable(); + fail("Should thrown error"); + } catch (IllegalStateException e) { + // ok + } + } + } + + // check closed + { + ListLedgersResult result = FutureUtils.result(bkc.newListLedgersOp().execute()); + result.close(); + try { + result.toIterable(); + fail("Should thrown error"); + } catch (IllegalStateException e) { + // ok + } + + try { + result.iterator(); + fail("Should thrown error"); + } catch (IllegalStateException e) { + // ok + } + } + + { // iterator + ListLedgersResult result = FutureUtils.result(bkc.newListLedgersOp().execute()); + LedgersIterator it = result.iterator(); + result.close(); + try { + it.hasNext(); + fail("Should thrown error"); + } catch (IllegalStateException e) { + // ok + } + + try { + it.next(); + fail("Should thrown error"); + } catch (IllegalStateException e) { + // ok + } + } + + { // iterable + ListLedgersResult result = FutureUtils.result(bkc.newListLedgersOp().execute()); + Iterator it = result.toIterable().iterator(); + result.close(); + try { + it.hasNext(); + fail("Should thrown error"); + } catch (IllegalStateException e) { + // ok + } + + try { + it.next(); + fail("Should thrown error"); + } catch (IllegalStateException e) { + // ok + } + } + } + +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/WriteAdvHandleTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/WriteAdvHandleTest.java index 585a7b21ee1..6c469c44e41 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/WriteAdvHandleTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/WriteAdvHandleTest.java @@ -19,7 +19,7 @@ package org.apache.bookkeeper.client.api; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertArrayEquals; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyInt; @@ -29,7 +29,7 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; -import static org.powermock.api.mockito.PowerMockito.when; +import static org.mockito.Mockito.when; import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBufUtil; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/WriteFlagTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/WriteFlagTest.java index 746535a061e..902bb50a7de 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/WriteFlagTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/WriteFlagTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/WriteHandleTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/WriteHandleTest.java index 113f5857693..576d5537715 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/WriteHandleTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/api/WriteHandleTest.java @@ -19,7 +19,7 @@ package org.apache.bookkeeper.client.api; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertArrayEquals; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyInt; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/impl/LedgerEntriesImplTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/impl/LedgerEntriesImplTest.java index 901e41ef049..ff7117c8e2e 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/impl/LedgerEntriesImplTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/impl/LedgerEntriesImplTest.java @@ -19,7 +19,7 @@ package org.apache.bookkeeper.client.impl; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/impl/LedgerEntryImplTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/impl/LedgerEntryImplTest.java index ee716803df0..2b7cc0f6737 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/impl/LedgerEntryImplTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/impl/LedgerEntryImplTest.java @@ -19,7 +19,7 @@ package org.apache.bookkeeper.client.impl; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/conf/AbstractConfigurationTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/conf/AbstractConfigurationTest.java index 8039b0caad3..a6333a47d32 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/conf/AbstractConfigurationTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/conf/AbstractConfigurationTest.java @@ -22,10 +22,12 @@ import static org.mockito.Mockito.CALLS_REAL_METHODS; import static org.mockito.Mockito.mock; +import org.apache.bookkeeper.common.allocator.LeakDetectionPolicy; import org.apache.bookkeeper.meta.AbstractZkLedgerManagerFactory; import org.apache.bookkeeper.meta.HierarchicalLedgerManagerFactory; import org.apache.bookkeeper.meta.LedgerManagerFactory; import org.apache.bookkeeper.meta.LongHierarchicalLedgerManagerFactory; +import org.apache.commons.configuration.ConfigurationException; import org.junit.Before; import org.junit.Test; @@ -76,7 +78,7 @@ public void testSetMetadataServiceUri() throws Exception { } @SuppressWarnings({ "unchecked" }) - @Test(expected = UnsupportedOperationException.class) + @Test(expected = ConfigurationException.class) public void testUnsupportedLedgerManagerFactory() throws Exception { LedgerManagerFactory mockFactory = mock(LedgerManagerFactory.class, CALLS_REAL_METHODS); conf.setLedgerManagerFactoryClass(mockFactory.getClass()); @@ -129,4 +131,52 @@ public void testUnknownZkLedgerManagerFactory() throws Exception { conf.getMetadataServiceUri(); } + @Test + public void testAllocatorLeakDetectionPolicy() { + String nettyOldLevelKey = "io.netty.leakDetectionLevel"; + String nettyLevelKey = "io.netty.leakDetection.level"; + + String nettyOldLevelStr = System.getProperty(nettyOldLevelKey); + String nettyLevelStr = System.getProperty(nettyLevelKey); + + //Remove netty property for test. + System.getProperties().remove(nettyOldLevelKey); + System.getProperties().remove(nettyLevelKey); + + assertEquals(LeakDetectionPolicy.Disabled, conf.getAllocatorLeakDetectionPolicy()); + + System.getProperties().put(nettyOldLevelKey, "zazaza"); + assertEquals(LeakDetectionPolicy.Disabled, conf.getAllocatorLeakDetectionPolicy()); + + conf.setProperty(AbstractConfiguration.ALLOCATOR_LEAK_DETECTION_POLICY, "zazaza"); + assertEquals(LeakDetectionPolicy.Disabled, conf.getAllocatorLeakDetectionPolicy()); + + System.getProperties().put(nettyOldLevelKey, "simple"); + assertEquals(LeakDetectionPolicy.Simple, conf.getAllocatorLeakDetectionPolicy()); + + System.getProperties().put(nettyLevelKey, "disabled"); + assertEquals(LeakDetectionPolicy.Disabled, conf.getAllocatorLeakDetectionPolicy()); + + System.getProperties().put(nettyLevelKey, "advanCed"); + assertEquals(LeakDetectionPolicy.Advanced, conf.getAllocatorLeakDetectionPolicy()); + + conf.setProperty(AbstractConfiguration.ALLOCATOR_LEAK_DETECTION_POLICY, "simPle"); + assertEquals(LeakDetectionPolicy.Advanced, conf.getAllocatorLeakDetectionPolicy()); + + conf.setProperty(AbstractConfiguration.ALLOCATOR_LEAK_DETECTION_POLICY, "advanCed"); + assertEquals(LeakDetectionPolicy.Advanced, conf.getAllocatorLeakDetectionPolicy()); + + conf.setProperty(AbstractConfiguration.ALLOCATOR_LEAK_DETECTION_POLICY, "paranoiD"); + assertEquals(LeakDetectionPolicy.Paranoid, conf.getAllocatorLeakDetectionPolicy()); + + System.getProperties().remove(nettyOldLevelKey); + System.getProperties().remove(nettyLevelKey); + //Revert the netty properties. + if (nettyOldLevelStr != null) { + System.getProperties().put(nettyOldLevelKey, nettyOldLevelStr); + } + if (nettyLevelStr != null) { + System.getProperties().put(nettyLevelKey, nettyLevelStr); + } + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/conf/NoSystemPropertiesConfigurationTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/conf/NoSystemPropertiesConfigurationTest.java index 6f03697be2c..5fe86aa209d 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/conf/NoSystemPropertiesConfigurationTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/conf/NoSystemPropertiesConfigurationTest.java @@ -19,6 +19,7 @@ import static org.junit.Assert.assertEquals; +import java.util.NoSuchElementException; import org.junit.Test; /** @@ -32,11 +33,14 @@ public class NoSystemPropertiesConfigurationTest { // this property is read when AbstractConfiguration class is loaded. // this test will work as expected only using a new JVM (or classloader) for the test System.setProperty(ClientConfiguration.THROTTLE, "10"); + System.setProperty(ClientConfiguration.CLIENT_TCP_USER_TIMEOUT_MILLIS, "20000"); } - @Test + @Test(expected = NoSuchElementException.class) public void testUseSystemProperty() { ClientConfiguration clientConfiguration = new ClientConfiguration(); assertEquals(5000, clientConfiguration.getThrottleValue()); + // This should throw NoSuchElementException if the property has not been set. + clientConfiguration.getTcpUserTimeoutMillis(); } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/conf/SystemPropertiesConfigurationTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/conf/SystemPropertiesConfigurationTest.java index db02515a734..8ce89523354 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/conf/SystemPropertiesConfigurationTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/conf/SystemPropertiesConfigurationTest.java @@ -33,11 +33,13 @@ public class SystemPropertiesConfigurationTest { // this test will work as expected only using a new JVM (or classloader) for the test System.setProperty(AbstractConfiguration.READ_SYSTEM_PROPERTIES_PROPERTY, "true"); System.setProperty(ClientConfiguration.THROTTLE, "10"); + System.setProperty(ClientConfiguration.CLIENT_TCP_USER_TIMEOUT_MILLIS, "20000"); } @Test public void testUseSystemProperty() { ClientConfiguration clientConfiguration = new ClientConfiguration(); assertEquals(10, clientConfiguration.getThrottleValue()); + assertEquals(20000, clientConfiguration.getTcpUserTimeoutMillis()); } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/conf/TestBKConfiguration.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/conf/TestBKConfiguration.java index aba12a748ce..4b1d64fc94b 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/conf/TestBKConfiguration.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/conf/TestBKConfiguration.java @@ -25,7 +25,9 @@ import java.net.SocketException; import java.util.Collections; import java.util.Enumeration; - +import org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorage; +import org.apache.bookkeeper.common.allocator.PoolingPolicy; +import org.apache.bookkeeper.util.PortManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -45,14 +47,22 @@ public class TestBKConfiguration { */ public static ServerConfiguration newServerConfiguration() { ServerConfiguration confReturn = new ServerConfiguration(); + confReturn.setTLSEnabledProtocols("TLSv1.2,TLSv1.1"); confReturn.setJournalFlushWhenQueueEmpty(true); // enable journal format version confReturn.setJournalFormatVersionToWrite(5); - confReturn.setAllowEphemeralPorts(true); - confReturn.setBookiePort(0); + confReturn.setAllowEphemeralPorts(false); + confReturn.setBookiePort(PortManager.nextFreePort()); confReturn.setGcWaitTime(1000); confReturn.setDiskUsageThreshold(0.999f); confReturn.setDiskUsageWarnThreshold(0.99f); + confReturn.setAllocatorPoolingPolicy(PoolingPolicy.UnpooledHeap); + confReturn.setProperty(DbLedgerStorage.WRITE_CACHE_MAX_SIZE_MB, 4); + confReturn.setProperty(DbLedgerStorage.READ_AHEAD_CACHE_MAX_SIZE_MB, 4); + /** + * if testcase has zk error,just try 0 time for fast running + */ + confReturn.setZkRetryBackoffMaxRetries(0); setLoopbackInterfaceAndAllowLoopback(confReturn); return confReturn; } @@ -78,4 +88,12 @@ public static ServerConfiguration setLoopbackInterfaceAndAllowLoopback(ServerCon serverConf.setAllowLoopback(true); return serverConf; } + + public static ClientConfiguration newClientConfiguration() { + ClientConfiguration clientConfiguration = new ClientConfiguration(); + clientConfiguration.setTLSEnabledProtocols("TLSv1.2,TLSv1.1"); + // if testcase has zk error,just try 0 time for fast running + clientConfiguration.setZkRetryBackoffMaxRetries(0); + return clientConfiguration; + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/conf/TestServerConfiguration.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/conf/TestServerConfiguration.java index 424202d7fe5..04ac87818f7 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/conf/TestServerConfiguration.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/conf/TestServerConfiguration.java @@ -22,10 +22,13 @@ package org.apache.bookkeeper.conf; import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import org.apache.commons.configuration.ConfigurationException; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -111,4 +114,130 @@ public void testValidityOfJournalAndFileInfoVersions() throws ConfigurationExcep conf.setFileInfoFormatVersionToWrite(1); conf.validate(); } + + @Test + public void testEntryLogSizeLimit() throws ConfigurationException { + ServerConfiguration conf = new ServerConfiguration(); + try { + conf.setEntryLogSizeLimit(-1); + fail("should fail setEntryLogSizeLimit since `logSizeLimit` is too small"); + } catch (IllegalArgumentException iae) { + // expected + } + try { + conf.setProperty("logSizeLimit", "-1"); + conf.validate(); + fail("Invalid configuration since `logSizeLimit` is too small"); + } catch (ConfigurationException ce) { + // expected + } + + try { + conf.setEntryLogSizeLimit(2 * 1024 * 1024 * 1024L - 1); + fail("Should fail setEntryLogSizeLimit size `logSizeLimit` is too large"); + } catch (IllegalArgumentException iae) { + // expected + } + try { + conf.validate(); + fail("Invalid configuration since `logSizeLimit` is too large"); + } catch (ConfigurationException ce) { + // expected + } + + conf.setEntryLogSizeLimit(512 * 1024 * 1024); + conf.validate(); + assertEquals(512 * 1024 * 1024, conf.getEntryLogSizeLimit()); + + conf.setEntryLogSizeLimit(1073741824); + conf.validate(); + assertEquals(1073741824, conf.getEntryLogSizeLimit()); + } + + @Test + public void testCompactionSettings() throws ConfigurationException { + ServerConfiguration conf = new ServerConfiguration(); + long major, minor; + + // Default Values + major = conf.getMajorCompactionMaxTimeMillis(); + minor = conf.getMinorCompactionMaxTimeMillis(); + Assert.assertEquals(-1, major); + Assert.assertEquals(-1, minor); + + // Set values major then minor + conf.setMajorCompactionMaxTimeMillis(500).setMinorCompactionMaxTimeMillis(250); + major = conf.getMajorCompactionMaxTimeMillis(); + minor = conf.getMinorCompactionMaxTimeMillis(); + Assert.assertEquals(500, major); + Assert.assertEquals(250, minor); + + // Set values minor then major + conf.setMinorCompactionMaxTimeMillis(150).setMajorCompactionMaxTimeMillis(1500); + major = conf.getMajorCompactionMaxTimeMillis(); + minor = conf.getMinorCompactionMaxTimeMillis(); + Assert.assertEquals(1500, major); + Assert.assertEquals(150, minor); + + // Default Values + major = conf.getMajorCompactionInterval(); + minor = conf.getMinorCompactionInterval(); + Assert.assertEquals(3600, minor); + Assert.assertEquals(86400, major); + + // Set values major then minor + conf.setMajorCompactionInterval(43200).setMinorCompactionInterval(1800); + major = conf.getMajorCompactionInterval(); + minor = conf.getMinorCompactionInterval(); + Assert.assertEquals(1800, minor); + Assert.assertEquals(43200, major); + + // Set values minor then major + conf.setMinorCompactionInterval(900).setMajorCompactionInterval(21700); + major = conf.getMajorCompactionInterval(); + minor = conf.getMinorCompactionInterval(); + Assert.assertEquals(900, minor); + Assert.assertEquals(21700, major); + + conf.setMinorCompactionInterval(500); + try { + conf.validate(); + fail(); + } catch (ConfigurationException ignore) { + } + + conf.setMinorCompactionInterval(600); + conf.validate(); + + conf.setMajorCompactionInterval(550); + try { + conf.validate(); + fail(); + } catch (ConfigurationException ignore) { + } + + conf.setMajorCompactionInterval(600); + conf.validate(); + + // Default Values + double majorThreshold, minorThreshold; + majorThreshold = conf.getMajorCompactionThreshold(); + minorThreshold = conf.getMinorCompactionThreshold(); + Assert.assertEquals(0.8, majorThreshold, 0.00001); + Assert.assertEquals(0.2, minorThreshold, 0.00001); + + // Set values major then minor + conf.setMajorCompactionThreshold(0.7).setMinorCompactionThreshold(0.1); + majorThreshold = conf.getMajorCompactionThreshold(); + minorThreshold = conf.getMinorCompactionThreshold(); + Assert.assertEquals(0.7, majorThreshold, 0.00001); + Assert.assertEquals(0.1, minorThreshold, 0.00001); + + // Set values minor then major + conf.setMinorCompactionThreshold(0.3).setMajorCompactionThreshold(0.6); + majorThreshold = conf.getMajorCompactionThreshold(); + minorThreshold = conf.getMinorCompactionThreshold(); + Assert.assertEquals(0.6, majorThreshold, 0.00001); + Assert.assertEquals(0.3, minorThreshold, 0.00001); + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/AbstractTestZkRegistrationClient.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/AbstractTestZkRegistrationClient.java new file mode 100644 index 00000000000..127dd4b28d8 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/AbstractTestZkRegistrationClient.java @@ -0,0 +1,601 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.discover; + +import static org.apache.bookkeeper.common.concurrent.FutureUtils.collect; +import static org.apache.bookkeeper.common.concurrent.FutureUtils.result; +import static org.apache.bookkeeper.common.testing.MoreAsserts.assertSetEquals; +import static org.apache.bookkeeper.discover.ZKRegistrationClient.ZK_CONNECT_BACKOFF_MS; +import static org.apache.bookkeeper.util.BookKeeperConstants.AVAILABLE_NODE; +import static org.apache.bookkeeper.util.BookKeeperConstants.COOKIE_NODE; +import static org.apache.bookkeeper.util.BookKeeperConstants.READONLY; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import com.google.common.collect.Lists; +import java.time.Duration; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ScheduledExecutorService; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.client.BKException.ZKException; +import org.apache.bookkeeper.common.testing.executors.MockExecutorController; +import org.apache.bookkeeper.discover.RegistrationClient.RegistrationListener; +import org.apache.bookkeeper.discover.ZKRegistrationClient.WatchTask; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.versioning.LongVersion; +import org.apache.bookkeeper.versioning.Versioned; +import org.apache.bookkeeper.zookeeper.MockZooKeeperTestCase; +import org.apache.zookeeper.AsyncCallback.Children2Callback; +import org.apache.zookeeper.KeeperException.Code; +import org.apache.zookeeper.Watcher; +import org.apache.zookeeper.Watcher.Event.EventType; +import org.apache.zookeeper.Watcher.Event.KeeperState; +import org.apache.zookeeper.data.Stat; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; +import org.junit.runner.RunWith; +import org.mockito.junit.MockitoJUnitRunner; + +/** + * Unit test of {@link RegistrationClient}. + */ +@RunWith(MockitoJUnitRunner.Silent.class) +@Slf4j +public abstract class AbstractTestZkRegistrationClient extends MockZooKeeperTestCase { + + + + @Rule + public final TestName runtime = new TestName(); + + private String ledgersPath; + private String regPath; + private String regAllPath; + private String regReadonlyPath; + private ZKRegistrationClient zkRegistrationClient; + private ScheduledExecutorService mockExecutor; + private MockExecutorController controller; + + private final boolean bookieAddressChangeTracking; + + public AbstractTestZkRegistrationClient(boolean bookieAddressChangeTracking) { + this.bookieAddressChangeTracking = bookieAddressChangeTracking; + } + + + @Override + @Before + public void setup() throws Exception { + super.setup(); + + this.ledgersPath = "/" + runtime.getMethodName(); + this.regPath = ledgersPath + "/" + AVAILABLE_NODE; + this.regAllPath = ledgersPath + "/" + COOKIE_NODE; + this.regReadonlyPath = regPath + "/" + READONLY; + this.mockExecutor = mock(ScheduledExecutorService.class); + this.controller = new MockExecutorController() + .controlExecute(mockExecutor) + .controlSubmit(mockExecutor) + .controlSchedule(mockExecutor) + .controlScheduleAtFixedRate(mockExecutor, 10); + this.zkRegistrationClient = new ZKRegistrationClient( + mockZk, + ledgersPath, + mockExecutor, + bookieAddressChangeTracking + ); + assertEquals(bookieAddressChangeTracking, zkRegistrationClient.isBookieAddressTracking()); + } + + @After + public void teardown() throws Exception{ + super.teardown(); + + if (null != zkRegistrationClient) { + zkRegistrationClient.close(); + } + } + + private static Set prepareNBookies(int num) { + Set bookies = new HashSet<>(); + for (int i = 0; i < num; i++) { + bookies.add(new BookieSocketAddress("127.0.0.1", 3181 + i).toBookieId()); + } + return bookies; + } + + private void prepareReadBookieServiceInfo(BookieId address, boolean readonly) throws Exception { + if (readonly) { + mockZkGetData(regPath + "/" + address.toString(), + zkRegistrationClient.isBookieAddressTracking(), + Code.NONODE.intValue(), + new byte[] {}, + new Stat()); + mockZkGetData(regReadonlyPath + "/" + address, + zkRegistrationClient.isBookieAddressTracking(), + Code.OK.intValue(), + new byte[] {}, + new Stat()); + } else { + mockZkGetData(regPath + "/" + address.toString(), + zkRegistrationClient.isBookieAddressTracking(), + Code.OK.intValue(), + new byte[] {}, + new Stat()); + mockZkGetData(regReadonlyPath + "/" + address, + zkRegistrationClient.isBookieAddressTracking(), + Code.NONODE.intValue(), + new byte[] {}, + new Stat()); + } + } + + @Test + public void testGetWritableBookies() throws Exception { + Set addresses = prepareNBookies(10); + List children = Lists.newArrayList(); + for (BookieId address : addresses) { + children.add(address.toString()); + prepareReadBookieServiceInfo(address, false); + } + + Stat stat = mock(Stat.class); + when(stat.getCversion()).thenReturn(1234); + mockGetChildren( + regPath, false, + Code.OK.intValue(), children, stat); + + Versioned> result = + result(zkRegistrationClient.getWritableBookies()); + + assertEquals(new LongVersion(1234), result.getVersion()); + assertSetEquals( + addresses, result.getValue()); + } + + @Test + public void testGetAllBookies() throws Exception { + Set addresses = prepareNBookies(10); + List children = Lists.newArrayList(); + + int i = 0; + for (BookieId address : addresses) { + children.add(address.toString()); + boolean readonly = i++ % 2 == 0; + prepareReadBookieServiceInfo(address, readonly); + } + Stat stat = mock(Stat.class); + when(stat.getCversion()).thenReturn(1234); + mockGetChildren( + regAllPath, false, + Code.OK.intValue(), children, stat); + + Versioned> result = + result(zkRegistrationClient.getAllBookies()); + + assertEquals(new LongVersion(1234), result.getVersion()); + assertSetEquals( + addresses, result.getValue()); + } + + @Test + public void testGetReadOnlyBookies() throws Exception { + Set addresses = prepareNBookies(10); + List children = Lists.newArrayList(); + for (BookieId address : addresses) { + children.add(address.toString()); + prepareReadBookieServiceInfo(address, false); + } + Stat stat = mock(Stat.class); + when(stat.getCversion()).thenReturn(1234); + mockGetChildren( + regReadonlyPath, false, + Code.OK.intValue(), children, stat); + + Versioned> result = + result(zkRegistrationClient.getReadOnlyBookies()); + + assertEquals(new LongVersion(1234), result.getVersion()); + assertSetEquals( + addresses, result.getValue()); + } + + @Test + public void testGetWritableBookiesFailure() throws Exception { + mockGetChildren( + regPath, false, + Code.NONODE.intValue(), null, null); + + try { + result(zkRegistrationClient.getWritableBookies()); + fail("Should fail to get writable bookies"); + } catch (ZKException zke) { + // expected to throw zookeeper exception + } + } + + @Test + public void testGetAllBookiesFailure() throws Exception { + mockGetChildren( + regAllPath, false, + Code.NONODE.intValue(), null, null); + + try { + result(zkRegistrationClient.getAllBookies()); + fail("Should fail to get all bookies"); + } catch (ZKException zke) { + // expected to throw zookeeper exception + } + } + + @Test + public void testGetReadOnlyBookiesFailure() throws Exception { + mockGetChildren( + regReadonlyPath, false, + Code.NONODE.intValue(), null, null); + + try { + result(zkRegistrationClient.getReadOnlyBookies()); + fail("Should fail to get writable bookies"); + } catch (ZKException zke) { + // expected to throw zookeeper exception + } + } + + @Test + public void testWatchWritableBookiesSuccess() throws Exception { + testWatchBookiesSuccess(true); + } + + @Test + public void testWatchReadonlyBookiesSuccess() throws Exception { + testWatchBookiesSuccess(false); + } + + @SuppressWarnings("unchecked") + private void testWatchBookiesSuccess(boolean isWritable) + throws Exception { + // + // 1. test watch bookies with a listener + // + + LinkedBlockingQueue>> updates = + spy(new LinkedBlockingQueue<>()); + RegistrationListener listener = bookies -> { + try { + updates.put(bookies); + } catch (InterruptedException e) { + log.warn("Interrupted on enqueue bookie updates", e); + } + }; + + Set addresses = prepareNBookies(10); + List children = Lists.newArrayList(); + for (BookieId address : addresses) { + children.add(address.toString()); + prepareReadBookieServiceInfo(address, !isWritable); + } + Stat stat = mock(Stat.class); + when(stat.getCversion()).thenReturn(1234); + + mockGetChildren( + isWritable ? regPath : regReadonlyPath, + true, + Code.OK.intValue(), children, stat); + + if (isWritable) { + result(zkRegistrationClient.watchWritableBookies(listener)); + } else { + result(zkRegistrationClient.watchReadOnlyBookies(listener)); + } + + Versioned> update = updates.take(); + verify(updates, times(1)).put(any(Versioned.class)); + assertEquals(new LongVersion(1234), update.getVersion()); + assertSetEquals( + addresses, update.getValue()); + + verify(mockZk, times(1)) + .getChildren(anyString(), any(Watcher.class), any(Children2Callback.class), any()); + + // + // 2. test watch bookies with a second listener. the second listener returns cached bookies + // without calling `getChildren` again + // + + // register another listener + LinkedBlockingQueue>> secondUpdates = + spy(new LinkedBlockingQueue<>()); + RegistrationListener secondListener = bookies -> { + try { + secondUpdates.put(bookies); + } catch (InterruptedException e) { + log.warn("Interrupted on enqueue bookie updates", e); + } + }; + if (isWritable) { + result(zkRegistrationClient.watchWritableBookies(secondListener)); + } else { + result(zkRegistrationClient.watchReadOnlyBookies(secondListener)); + } + Versioned> secondListenerUpdate = secondUpdates.take(); + // first listener will not be notified with any update + verify(updates, times(1)).put(any(Versioned.class)); + // second listener will receive same update as the first listener received before + verify(secondUpdates, times(1)).put(any(Versioned.class)); + assertSame(update.getVersion(), secondListenerUpdate.getVersion()); + assertSame(update.getValue(), secondListenerUpdate.getValue()); + + // the second listener will return the cached value without issuing another getChildren call + verify(mockZk, times(1)) + .getChildren(anyString(), any(Watcher.class), any(Children2Callback.class), any()); + + // + // 3. simulate session expire, it will trigger watcher to refetch bookies again. + // but since there is no updates on bookies, the registered listeners will not be notified. + // + + notifyWatchedEvent( + EventType.None, + KeeperState.Expired, + isWritable ? regPath : regReadonlyPath); + + // if session expires, the watcher task will get into backoff state + controller.advance(Duration.ofMillis(ZK_CONNECT_BACKOFF_MS)); + + // the same updates returns, the getChildren calls increase to 2 + // but since there is no updates, so no notification is sent. + verify(mockZk, times(2)) + .getChildren(anyString(), any(Watcher.class), any(Children2Callback.class), any()); + assertNull(updates.poll()); + // both listener and secondListener will not receive any old update + verify(updates, times(1)).put(any(Versioned.class)); + verify(secondUpdates, times(1)).put(any(Versioned.class)); + + // + // 4. notify with new bookies. both listeners will be notified with new bookies. + // + + Set newAddresses = prepareNBookies(20); + List newChildren = Lists.newArrayList(); + for (BookieId address : newAddresses) { + newChildren.add(address.toString()); + prepareReadBookieServiceInfo(address, !isWritable); + } + Stat newStat = mock(Stat.class); + when(newStat.getCversion()).thenReturn(1235); + + mockGetChildren( + isWritable ? regPath : regReadonlyPath, + true, + Code.OK.intValue(), newChildren, newStat); + + // trigger watcher + notifyWatchedEvent( + EventType.NodeChildrenChanged, + KeeperState.SyncConnected, + isWritable ? regPath : regReadonlyPath); + + update = updates.take(); + assertEquals(new LongVersion(1235), update.getVersion()); + assertSetEquals( + newAddresses, update.getValue()); + secondListenerUpdate = secondUpdates.take(); + assertSame(update.getVersion(), secondListenerUpdate.getVersion()); + assertSame(update.getValue(), secondListenerUpdate.getValue()); + + verify(mockZk, times(3)) + .getChildren(anyString(), any(Watcher.class), any(Children2Callback.class), any()); + verify(updates, times(2)).put(any(Versioned.class)); + verify(secondUpdates, times(2)).put(any(Versioned.class)); + + // + // 5. unwatch the second listener and notify with new bookies again. only first listener will + // be notified with new bookies. + // + + newAddresses = prepareNBookies(25); + newChildren.clear(); + newChildren = Lists.newArrayList(); + for (BookieId address : newAddresses) { + newChildren.add(address.toString()); + prepareReadBookieServiceInfo(address, !isWritable); + } + newStat = mock(Stat.class); + when(newStat.getCversion()).thenReturn(1236); + + mockGetChildren( + isWritable ? regPath : regReadonlyPath, + true, + Code.OK.intValue(), newChildren, newStat); + + if (isWritable) { + assertEquals(2, zkRegistrationClient.getWatchWritableBookiesTask().getNumListeners()); + zkRegistrationClient.unwatchWritableBookies(secondListener); + assertEquals(1, zkRegistrationClient.getWatchWritableBookiesTask().getNumListeners()); + } else { + assertEquals(2, zkRegistrationClient.getWatchReadOnlyBookiesTask().getNumListeners()); + zkRegistrationClient.unwatchReadOnlyBookies(secondListener); + assertEquals(1, zkRegistrationClient.getWatchReadOnlyBookiesTask().getNumListeners()); + } + + // trigger watcher + notifyWatchedEvent( + EventType.NodeChildrenChanged, + KeeperState.SyncConnected, + isWritable ? regPath : regReadonlyPath); + + update = updates.take(); + assertEquals(new LongVersion(1236), update.getVersion()); + assertSetEquals( + newAddresses, update.getValue()); + secondListenerUpdate = secondUpdates.poll(); + assertNull(secondListenerUpdate); + + verify(mockZk, times(4)) + .getChildren(anyString(), any(Watcher.class), any(Children2Callback.class), any()); + verify(updates, times(3)).put(any(Versioned.class)); + verify(secondUpdates, times(2)).put(any(Versioned.class)); + + // + // 6. unwatch the first listener. the watch task will be closed and zk watcher will be removed. + // + // + + WatchTask expectedWatcher; + if (isWritable) { + expectedWatcher = zkRegistrationClient.getWatchWritableBookiesTask(); + assertFalse(expectedWatcher.isClosed()); + zkRegistrationClient.unwatchWritableBookies(listener); + assertNull(zkRegistrationClient.getWatchWritableBookiesTask()); + } else { + expectedWatcher = zkRegistrationClient.getWatchReadOnlyBookiesTask(); + assertFalse(expectedWatcher.isClosed()); + zkRegistrationClient.unwatchReadOnlyBookies(listener); + assertNull(zkRegistrationClient.getWatchReadOnlyBookiesTask()); + } + // the watch task will not be closed since there is still a listener + assertTrue(expectedWatcher.isClosed()); + } + + @Test + public void testWatchWritableBookiesTwice() throws Exception { + testWatchBookiesTwice(true); + } + + @Test + public void testWatchReadonlyBookiesTwice() throws Exception { + testWatchBookiesTwice(false); + } + + private void testWatchBookiesTwice(boolean isWritable) + throws Exception { + int zkCallbackDelayMs = 100; + + Set addresses = prepareNBookies(10); + List children = Lists.newArrayList(); + for (BookieId address : addresses) { + children.add(address.toString()); + prepareReadBookieServiceInfo(address, !isWritable); + } + Stat stat = mock(Stat.class); + when(stat.getCversion()).thenReturn(1234); + + mockGetChildren( + isWritable ? regPath : regReadonlyPath, + true, + Code.OK.intValue(), children, stat, zkCallbackDelayMs); + + CompletableFuture>> firstResult = new CompletableFuture<>(); + RegistrationListener firstListener = bookies -> firstResult.complete(bookies); + + CompletableFuture>> secondResult = new CompletableFuture<>(); + RegistrationListener secondListener = bookies -> secondResult.complete(bookies); + + List> watchFutures = Lists.newArrayListWithExpectedSize(2); + if (isWritable) { + watchFutures.add(zkRegistrationClient.watchWritableBookies(firstListener)); + watchFutures.add(zkRegistrationClient.watchWritableBookies(secondListener)); + } else { + watchFutures.add(zkRegistrationClient.watchReadOnlyBookies(firstListener)); + watchFutures.add(zkRegistrationClient.watchReadOnlyBookies(secondListener)); + } + + // trigger zkCallbackExecutor to execute getChildren callback + zkCallbackController.advance(Duration.ofMillis(zkCallbackDelayMs)); + + result(collect(watchFutures)); + assertEquals(firstResult.get().getVersion(), secondResult.get().getVersion()); + assertSetEquals(firstResult.get().getValue(), secondResult.get().getValue()); + } + + @Test + public void testWatchWritableBookiesFailure() throws Exception { + testWatchBookiesFailure(true); + } + + @Test + public void testWatchReadonlyBookiesFailure() throws Exception { + testWatchBookiesFailure(false); + } + + private void testWatchBookiesFailure(boolean isWritable) + throws Exception { + int zkCallbackDelayMs = 100; + + mockGetChildren( + isWritable ? regPath : regReadonlyPath, + true, + Code.NONODE.intValue(), null, null, zkCallbackDelayMs); + + CompletableFuture>> listenerResult = new CompletableFuture<>(); + RegistrationListener listener = bookies -> listenerResult.complete(bookies); + + CompletableFuture watchFuture; + + WatchTask watchTask; + if (isWritable) { + watchFuture = zkRegistrationClient.watchWritableBookies(listener); + watchTask = zkRegistrationClient.getWatchWritableBookiesTask(); + } else { + watchFuture = zkRegistrationClient.watchReadOnlyBookies(listener); + watchTask = zkRegistrationClient.getWatchReadOnlyBookiesTask(); + } + assertNotNull(watchTask); + assertEquals(1, watchTask.getNumListeners()); + + // trigger zkCallbackExecutor to execute getChildren callback + zkCallbackController.advance(Duration.ofMillis(zkCallbackDelayMs)); + + try { + result(watchFuture); + fail("Should fail to watch writable bookies if reg path doesn't exist"); + } catch (ZKException zke) { + // expected + } + assertEquals(0, watchTask.getNumListeners()); + assertTrue(watchTask.isClosed()); + if (isWritable) { + assertNull(zkRegistrationClient.getWatchWritableBookiesTask()); + } else { + assertNull(zkRegistrationClient.getWatchReadOnlyBookiesTask()); + } + } + +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/BookieServiceInfoTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/BookieServiceInfoTest.java new file mode 100644 index 00000000000..4173d630002 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/BookieServiceInfoTest.java @@ -0,0 +1,93 @@ +/* + * Copyright 2020 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.discover; + +import static org.apache.bookkeeper.discover.ZKRegistrationClient.deserializeBookieServiceInfo; +import static org.apache.bookkeeper.discover.ZKRegistrationManager.serializeBookieServiceInfo; +import static org.hamcrest.Matchers.is; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThat; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import org.apache.bookkeeper.discover.BookieServiceInfo.Endpoint; +import org.apache.bookkeeper.net.BookieId; +import org.junit.Test; + +/** + * Unit test of the {@link BookieServiceInfo} serialization/deserialization methods. + */ +public class BookieServiceInfoTest { + + @Test + public void testSerializeDeserializeBookieServiceInfo() throws Exception { + String bookieId = "127.0.0.1:3181"; + { + BookieServiceInfo expected = new BookieServiceInfo(); + Endpoint endpointRPC = new Endpoint("1", 1281, "localhost", "bookie-rpc", + Collections.emptyList(), Collections.emptyList()); + Endpoint endpointHTTP = new Endpoint("2", 1281, "localhost", "bookie-http", + Collections.emptyList(), Collections.emptyList()); + expected.setEndpoints(Arrays.asList(endpointRPC, endpointHTTP)); + + Map properties = new HashMap<>(); + properties.put("test", "value"); + expected.setProperties(properties); + + byte[] serialized = serializeBookieServiceInfo(expected); + BookieServiceInfo deserialized = deserializeBookieServiceInfo(BookieId.parse(bookieId), serialized); + + assertBookieServiceInfoEquals(expected, deserialized); + } + } + + @Test + public void testDeserializeBookieServiceInfo() throws Exception { + BookieId bookieId = BookieId.parse("127.0.0.1:3181"); + { + BookieServiceInfo expected = BookieServiceInfoUtils.buildLegacyBookieServiceInfo(bookieId.toString()); + BookieServiceInfo deserialized = deserializeBookieServiceInfo(bookieId, null); + + assertBookieServiceInfoEquals(expected, deserialized); + } + { + BookieServiceInfo expected = BookieServiceInfoUtils.buildLegacyBookieServiceInfo(bookieId.toString()); + BookieServiceInfo deserialized = deserializeBookieServiceInfo(bookieId, new byte[]{}); + + assertBookieServiceInfoEquals(expected, deserialized); + } + } + + private void assertBookieServiceInfoEquals(BookieServiceInfo expected, BookieServiceInfo provided) { + for (Endpoint ep : expected.getEndpoints()) { + Endpoint e = provided.getEndpoints().stream() + .filter(ee -> Objects.equals(ee.getId(), ep.getId())) + .findFirst() + .get(); + assertThat(e.getHost(), is(ep.getHost())); + assertThat(e.getPort(), is(ep.getPort())); + assertThat(e.getProtocol(), is(ep.getProtocol())); + assertArrayEquals(e.getAuth().toArray(), ep.getAuth().toArray()); + assertArrayEquals(e.getExtensions().toArray(), ep.getExtensions().toArray()); + } + assertEquals(expected.getProperties(), provided.getProperties()); + } + +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/MockRegistrationClient.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/MockRegistrationClient.java index 40178b98f88..74455f9a897 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/MockRegistrationClient.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/MockRegistrationClient.java @@ -18,14 +18,14 @@ package org.apache.bookkeeper.discover; +import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.Set; - import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.versioning.LongVersion; import org.apache.bookkeeper.versioning.Versioned; @@ -37,8 +37,9 @@ public class MockRegistrationClient implements RegistrationClient { final ExecutorService executor; private long currentVersion = 0; - private Set bookies = new HashSet(); - private Set readOnlyBookies = new HashSet(); + private Set bookies = new HashSet(); + private Set allBookies = new HashSet(); + private Set readOnlyBookies = new HashSet(); private Set bookieWatchers = new HashSet(); private Set readOnlyBookieWatchers = new HashSet(); @@ -51,56 +52,48 @@ public void close() { executor.shutdownNow(); } - private static Versioned> versioned(Set bookies, long version) { + private static Versioned> versioned(Set bookies, long version) { return new Versioned<>(Collections.unmodifiableSet(bookies), new LongVersion(version)); } - CompletableFuture addBookies(BookieSocketAddress... bookies) { + public CompletableFuture addBookies(BookieId... bookies) { CompletableFuture promise = new CompletableFuture<>(); executor.submit(() -> { currentVersion++; - for (BookieSocketAddress b : bookies) { - this.bookies.add(b); - } + Collections.addAll(this.bookies, bookies); bookieWatchers.forEach(w -> w.onBookiesChanged(versioned(this.bookies, currentVersion))); promise.complete(null); }); return promise; } - CompletableFuture removeBookies(BookieSocketAddress... bookies) { + public CompletableFuture removeBookies(BookieId... bookies) { CompletableFuture promise = new CompletableFuture<>(); executor.submit(() -> { currentVersion++; - for (BookieSocketAddress b : bookies) { - this.bookies.add(b); - } + this.bookies.addAll(Arrays.asList(bookies)); bookieWatchers.forEach(w -> w.onBookiesChanged(versioned(this.bookies, currentVersion))); promise.complete(null); }); return promise; } - CompletableFuture addReadOnlyBookies(BookieSocketAddress... bookies) { + public CompletableFuture addReadOnlyBookies(BookieId... bookies) { CompletableFuture promise = new CompletableFuture<>(); executor.submit(() -> { currentVersion++; - for (BookieSocketAddress b : bookies) { - this.readOnlyBookies.add(b); - } + this.readOnlyBookies.addAll(Arrays.asList(bookies)); readOnlyBookieWatchers.forEach(w -> w.onBookiesChanged(versioned(readOnlyBookies, currentVersion))); promise.complete(null); }); return promise; } - CompletableFuture removeReadOnlyBookies(BookieSocketAddress... bookies) { + public CompletableFuture removeReadOnlyBookies(BookieId... bookies) { CompletableFuture promise = new CompletableFuture<>(); executor.submit(() -> { currentVersion++; - for (BookieSocketAddress b : bookies) { - this.readOnlyBookies.add(b); - } + this.readOnlyBookies.addAll(Arrays.asList(bookies)); readOnlyBookieWatchers.forEach(w -> w.onBookiesChanged(versioned(readOnlyBookies, currentVersion))); promise.complete(null); }); @@ -108,15 +101,22 @@ CompletableFuture removeReadOnlyBookies(BookieSocketAddress... bookies) { } @Override - public CompletableFuture>> getWritableBookies() { - CompletableFuture>> promise = new CompletableFuture<>(); + public CompletableFuture>> getWritableBookies() { + CompletableFuture>> promise = new CompletableFuture<>(); executor.submit(() -> promise.complete(versioned(bookies, currentVersion))); return promise; } @Override - public CompletableFuture>> getReadOnlyBookies() { - CompletableFuture>> promise = new CompletableFuture<>(); + public CompletableFuture>> getAllBookies() { + CompletableFuture>> promise = new CompletableFuture<>(); + executor.submit(() -> promise.complete(versioned(allBookies, currentVersion))); + return promise; + } + + @Override + public CompletableFuture>> getReadOnlyBookies() { + CompletableFuture>> promise = new CompletableFuture<>(); executor.submit(() -> promise.complete(versioned(readOnlyBookies, currentVersion))); return promise; } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/MockRegistrationManager.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/MockRegistrationManager.java new file mode 100644 index 00000000000..44631018b29 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/MockRegistrationManager.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.discover; + +import java.util.concurrent.ConcurrentHashMap; +import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.versioning.LongVersion; +import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; + +/** + * Mock implementation of RegistrationManager. + */ +public class MockRegistrationManager implements RegistrationManager { + private final ConcurrentHashMap> cookies = new ConcurrentHashMap<>(); + + @Override + public void close() {} + + @Override + public String getClusterInstanceId() throws BookieException { + return "mock-cluster"; + } + + @Override + public void registerBookie(BookieId bookieId, boolean readOnly, + BookieServiceInfo serviceInfo) throws BookieException { + throw new UnsupportedOperationException("Not implemented in mock. Implement if you need it"); + } + + @Override + public void unregisterBookie(BookieId bookieId, boolean readOnly) throws BookieException { + throw new UnsupportedOperationException("Not implemented in mock. Implement if you need it"); + } + + @Override + public boolean isBookieRegistered(BookieId bookieId) throws BookieException { + throw new UnsupportedOperationException("Not implemented in mock. Implement if you need it"); + } + + @Override + public void writeCookie(BookieId bookieId, Versioned cookieData) throws BookieException { + try { + cookies.compute(bookieId, (bookieId1, current) -> { + if (cookieData.getVersion() == Version.NEW) { + if (current == null) { + return new Versioned(cookieData.getValue(), new LongVersion(1)); + } else { + throw new RuntimeException(new BookieException.CookieExistException(bookieId.getId())); + } + } else { + if (current != null + && cookieData.getVersion().equals(current.getVersion())) { + LongVersion oldVersion = (LongVersion) current.getVersion(); + LongVersion newVersion = new LongVersion(oldVersion.getLongVersion() + 1); + return new Versioned(cookieData.getValue(), newVersion); + } else { + throw new RuntimeException(new BookieException.CookieExistException(bookieId.getId())); + } + } + }); + } catch (RuntimeException e) { + if (e.getCause() instanceof BookieException) { + throw (BookieException) e.getCause(); + } + } + } + + @Override + public Versioned readCookie(BookieId bookieId) throws BookieException { + Versioned cookie = cookies.get(bookieId); + if (cookie == null) { + throw new BookieException.CookieNotFoundException(bookieId.toString()); + } + return cookie; + } + + @Override + public void removeCookie(BookieId bookieId, Version version) throws BookieException { + try { + cookies.compute(bookieId, (bookieId1, current) -> { + if (current == null) { + throw new RuntimeException(new BookieException.CookieNotFoundException(bookieId.toString())); + } else if (current.getVersion().equals(version)) { + return null; + } else { + throw new RuntimeException(new BookieException.MetadataStoreException("Bad version")); + } + }); + } catch (RuntimeException e) { + if (e.getCause() instanceof BookieException) { + throw (BookieException) e.getCause(); + } + } + + } + + @Override + public boolean prepareFormat() throws Exception { + throw new UnsupportedOperationException("Not implemented in mock. Implement if you need it"); + } + + @Override + public boolean initNewCluster() throws Exception { + throw new UnsupportedOperationException("Not implemented in mock. Implement if you need it"); + } + + @Override + public boolean format() throws Exception { + throw new UnsupportedOperationException("Not implemented in mock. Implement if you need it"); + } + + @Override + public boolean nukeExistingCluster() throws Exception { + throw new UnsupportedOperationException("Not implemented in mock. Implement if you need it"); + } + + @Override + public void addRegistrationListener(RegistrationListener listener) { + throw new UnsupportedOperationException("Not implemented in mock. Implement if you need it"); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/TestZkRegistrationClient.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/TestZkRegistrationClient.java deleted file mode 100644 index 1f5e0b68963..00000000000 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/TestZkRegistrationClient.java +++ /dev/null @@ -1,518 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.bookkeeper.discover; - -import static org.apache.bookkeeper.common.concurrent.FutureUtils.collect; -import static org.apache.bookkeeper.common.concurrent.FutureUtils.result; -import static org.apache.bookkeeper.common.testing.MoreAsserts.assertSetEquals; -import static org.apache.bookkeeper.discover.ZKRegistrationClient.ZK_CONNECT_BACKOFF_MS; -import static org.apache.bookkeeper.util.BookKeeperConstants.AVAILABLE_NODE; -import static org.apache.bookkeeper.util.BookKeeperConstants.READONLY; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.spy; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import com.google.common.collect.Lists; - -import java.time.Duration; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.ScheduledExecutorService; - -import lombok.extern.slf4j.Slf4j; - -import org.apache.bookkeeper.client.BKException.ZKException; -import org.apache.bookkeeper.common.testing.executors.MockExecutorController; -import org.apache.bookkeeper.discover.RegistrationClient.RegistrationListener; -import org.apache.bookkeeper.discover.ZKRegistrationClient.WatchTask; -import org.apache.bookkeeper.net.BookieSocketAddress; -import org.apache.bookkeeper.util.ZkUtils; -import org.apache.bookkeeper.versioning.LongVersion; -import org.apache.bookkeeper.versioning.Versioned; -import org.apache.bookkeeper.zookeeper.MockZooKeeperTestCase; -import org.apache.zookeeper.AsyncCallback.Children2Callback; -import org.apache.zookeeper.KeeperException.Code; -import org.apache.zookeeper.Watcher; -import org.apache.zookeeper.Watcher.Event.EventType; -import org.apache.zookeeper.Watcher.Event.KeeperState; -import org.apache.zookeeper.data.Stat; -import org.junit.After; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TestName; -import org.junit.runner.RunWith; -import org.powermock.core.classloader.annotations.PrepareForTest; -import org.powermock.modules.junit4.PowerMockRunner; - -/** - * Unit test of {@link RegistrationClient}. - */ -@RunWith(PowerMockRunner.class) -@PrepareForTest({ ZKRegistrationClient.class, ZkUtils.class }) -@Slf4j -public class TestZkRegistrationClient extends MockZooKeeperTestCase { - - @Rule - public final TestName runtime = new TestName(); - - private String ledgersPath; - private String regPath; - private String regReadonlyPath; - private ZKRegistrationClient zkRegistrationClient; - private ScheduledExecutorService mockExecutor; - private MockExecutorController controller; - - @Override - @Before - public void setup() throws Exception { - super.setup(); - - this.ledgersPath = "/" + runtime.getMethodName(); - this.regPath = ledgersPath + "/" + AVAILABLE_NODE; - this.regReadonlyPath = regPath + "/" + READONLY; - this.mockExecutor = mock(ScheduledExecutorService.class); - this.controller = new MockExecutorController() - .controlExecute(mockExecutor) - .controlSubmit(mockExecutor) - .controlSchedule(mockExecutor) - .controlScheduleAtFixedRate(mockExecutor, 10); - this.zkRegistrationClient = new ZKRegistrationClient( - mockZk, - ledgersPath, - mockExecutor - ); - } - - @After - public void teardown() { - if (null != zkRegistrationClient) { - zkRegistrationClient.close(); - } - } - - private static Set prepareNBookies(int num) { - Set bookies = new HashSet<>(); - for (int i = 0; i < num; i++) { - bookies.add(new BookieSocketAddress("127.0.0.1", 3181 + i)); - } - return bookies; - } - - @Test - public void testGetWritableBookies() throws Exception { - Set addresses = prepareNBookies(10); - List children = Lists.newArrayList(); - for (BookieSocketAddress address : addresses) { - children.add(address.toString()); - } - Stat stat = mock(Stat.class); - when(stat.getCversion()).thenReturn(1234); - mockGetChildren( - regPath, false, - Code.OK.intValue(), children, stat); - - Versioned> result = - result(zkRegistrationClient.getWritableBookies()); - - assertEquals(new LongVersion(1234), result.getVersion()); - assertSetEquals( - addresses, result.getValue()); - } - - @Test - public void testGetReadOnlyBookies() throws Exception { - Set addresses = prepareNBookies(10); - List children = Lists.newArrayList(); - for (BookieSocketAddress address : addresses) { - children.add(address.toString()); - } - Stat stat = mock(Stat.class); - when(stat.getCversion()).thenReturn(1234); - mockGetChildren( - regReadonlyPath, false, - Code.OK.intValue(), children, stat); - - Versioned> result = - result(zkRegistrationClient.getReadOnlyBookies()); - - assertEquals(new LongVersion(1234), result.getVersion()); - assertSetEquals( - addresses, result.getValue()); - } - - @Test - public void testGetWritableBookiesFailure() throws Exception { - mockGetChildren( - regPath, false, - Code.NONODE.intValue(), null, null); - - try { - result(zkRegistrationClient.getWritableBookies()); - fail("Should fail to get writable bookies"); - } catch (ZKException zke) { - // expected to throw zookeeper exception - } - } - - @Test - public void testGetReadOnlyBookiesFailure() throws Exception { - mockGetChildren( - regReadonlyPath, false, - Code.NONODE.intValue(), null, null); - - try { - result(zkRegistrationClient.getReadOnlyBookies()); - fail("Should fail to get writable bookies"); - } catch (ZKException zke) { - // expected to throw zookeeper exception - } - } - - @Test - public void testWatchWritableBookiesSuccess() throws Exception { - testWatchBookiesSuccess(true); - } - - @Test - public void testWatchReadonlyBookiesSuccess() throws Exception { - testWatchBookiesSuccess(false); - } - - @SuppressWarnings("unchecked") - private void testWatchBookiesSuccess(boolean isWritable) - throws Exception { - // - // 1. test watch bookies with a listener - // - - LinkedBlockingQueue>> updates = - spy(new LinkedBlockingQueue<>()); - RegistrationListener listener = bookies -> { - try { - updates.put(bookies); - } catch (InterruptedException e) { - log.warn("Interrupted on enqueue bookie updates", e); - } - }; - - Set addresses = prepareNBookies(10); - List children = Lists.newArrayList(); - for (BookieSocketAddress address : addresses) { - children.add(address.toString()); - } - Stat stat = mock(Stat.class); - when(stat.getCversion()).thenReturn(1234); - - mockGetChildren( - isWritable ? regPath : regReadonlyPath, - true, - Code.OK.intValue(), children, stat); - - if (isWritable) { - result(zkRegistrationClient.watchWritableBookies(listener)); - } else { - result(zkRegistrationClient.watchReadOnlyBookies(listener)); - } - - Versioned> update = updates.take(); - verify(updates, times(1)).put(any(Versioned.class)); - assertEquals(new LongVersion(1234), update.getVersion()); - assertSetEquals( - addresses, update.getValue()); - - verify(mockZk, times(1)) - .getChildren(anyString(), any(Watcher.class), any(Children2Callback.class), any()); - - // - // 2. test watch bookies with a second listener. the second listener returns cached bookies - // without calling `getChildren` again - // - - // register another listener - LinkedBlockingQueue>> secondUpdates = - spy(new LinkedBlockingQueue<>()); - RegistrationListener secondListener = bookies -> { - try { - secondUpdates.put(bookies); - } catch (InterruptedException e) { - log.warn("Interrupted on enqueue bookie updates", e); - } - }; - if (isWritable) { - result(zkRegistrationClient.watchWritableBookies(secondListener)); - } else { - result(zkRegistrationClient.watchReadOnlyBookies(secondListener)); - } - Versioned> secondListenerUpdate = secondUpdates.take(); - // first listener will not be notified with any update - verify(updates, times(1)).put(any(Versioned.class)); - // second listener will receive same update as the first listener received before - verify(secondUpdates, times(1)).put(any(Versioned.class)); - assertSame(update.getVersion(), secondListenerUpdate.getVersion()); - assertSame(update.getValue(), secondListenerUpdate.getValue()); - - // the second listener will return the cached value without issuing another getChildren call - verify(mockZk, times(1)) - .getChildren(anyString(), any(Watcher.class), any(Children2Callback.class), any()); - - // - // 3. simulate session expire, it will trigger watcher to refetch bookies again. - // but since there is no updates on bookies, the registered listeners will not be notified. - // - - notifyWatchedEvent( - EventType.None, - KeeperState.Expired, - isWritable ? regPath : regReadonlyPath); - - // if session expires, the watcher task will get into backoff state - controller.advance(Duration.ofMillis(ZK_CONNECT_BACKOFF_MS)); - - // the same updates returns, the getChildren calls increase to 2 - // but since there is no updates, so no notification is sent. - verify(mockZk, times(2)) - .getChildren(anyString(), any(Watcher.class), any(Children2Callback.class), any()); - assertNull(updates.poll()); - // both listener and secondListener will not receive any old update - verify(updates, times(1)).put(any(Versioned.class)); - verify(secondUpdates, times(1)).put(any(Versioned.class)); - - // - // 4. notify with new bookies. both listeners will be notified with new bookies. - // - - Set newAddresses = prepareNBookies(20); - List newChildren = Lists.newArrayList(); - for (BookieSocketAddress address : newAddresses) { - newChildren.add(address.toString()); - } - Stat newStat = mock(Stat.class); - when(newStat.getCversion()).thenReturn(1235); - - mockGetChildren( - isWritable ? regPath : regReadonlyPath, - true, - Code.OK.intValue(), newChildren, newStat); - - // trigger watcher - notifyWatchedEvent( - EventType.NodeChildrenChanged, - KeeperState.SyncConnected, - isWritable ? regPath : regReadonlyPath); - - update = updates.take(); - assertEquals(new LongVersion(1235), update.getVersion()); - assertSetEquals( - newAddresses, update.getValue()); - secondListenerUpdate = secondUpdates.take(); - assertSame(update.getVersion(), secondListenerUpdate.getVersion()); - assertSame(update.getValue(), secondListenerUpdate.getValue()); - - verify(mockZk, times(3)) - .getChildren(anyString(), any(Watcher.class), any(Children2Callback.class), any()); - verify(updates, times(2)).put(any(Versioned.class)); - verify(secondUpdates, times(2)).put(any(Versioned.class)); - - // - // 5. unwatch the second listener and notify with new bookies again. only first listener will - // be notified with new bookies. - // - - newAddresses = prepareNBookies(25); - newChildren.clear(); - newChildren = Lists.newArrayList(); - for (BookieSocketAddress address : newAddresses) { - newChildren.add(address.toString()); - } - newStat = mock(Stat.class); - when(newStat.getCversion()).thenReturn(1236); - - mockGetChildren( - isWritable ? regPath : regReadonlyPath, - true, - Code.OK.intValue(), newChildren, newStat); - - if (isWritable) { - assertEquals(2, zkRegistrationClient.getWatchWritableBookiesTask().getNumListeners()); - zkRegistrationClient.unwatchWritableBookies(secondListener); - assertEquals(1, zkRegistrationClient.getWatchWritableBookiesTask().getNumListeners()); - } else { - assertEquals(2, zkRegistrationClient.getWatchReadOnlyBookiesTask().getNumListeners()); - zkRegistrationClient.unwatchReadOnlyBookies(secondListener); - assertEquals(1, zkRegistrationClient.getWatchReadOnlyBookiesTask().getNumListeners()); - } - - // trigger watcher - notifyWatchedEvent( - EventType.NodeChildrenChanged, - KeeperState.SyncConnected, - isWritable ? regPath : regReadonlyPath); - - update = updates.take(); - assertEquals(new LongVersion(1236), update.getVersion()); - assertSetEquals( - newAddresses, update.getValue()); - secondListenerUpdate = secondUpdates.poll(); - assertNull(secondListenerUpdate); - - verify(mockZk, times(4)) - .getChildren(anyString(), any(Watcher.class), any(Children2Callback.class), any()); - verify(updates, times(3)).put(any(Versioned.class)); - verify(secondUpdates, times(2)).put(any(Versioned.class)); - - // - // 6. unwatch the first listener. the watch task will be closed and zk watcher will be removed. - // - // - - WatchTask expectedWatcher; - if (isWritable) { - expectedWatcher = zkRegistrationClient.getWatchWritableBookiesTask(); - assertFalse(expectedWatcher.isClosed()); - zkRegistrationClient.unwatchWritableBookies(listener); - assertNull(zkRegistrationClient.getWatchWritableBookiesTask()); - } else { - expectedWatcher = zkRegistrationClient.getWatchReadOnlyBookiesTask(); - assertFalse(expectedWatcher.isClosed()); - zkRegistrationClient.unwatchReadOnlyBookies(listener); - assertNull(zkRegistrationClient.getWatchReadOnlyBookiesTask()); - } - // the watch task will not be closed since there is still a listener - assertTrue(expectedWatcher.isClosed()); - } - - @Test - public void testWatchWritableBookiesTwice() throws Exception { - testWatchBookiesTwice(true); - } - - @Test - public void testWatchReadonlyBookiesTwice() throws Exception { - testWatchBookiesTwice(false); - } - - private void testWatchBookiesTwice(boolean isWritable) - throws Exception { - int zkCallbackDelayMs = 100; - - Set addresses = prepareNBookies(10); - List children = Lists.newArrayList(); - for (BookieSocketAddress address : addresses) { - children.add(address.toString()); - } - Stat stat = mock(Stat.class); - when(stat.getCversion()).thenReturn(1234); - - mockGetChildren( - isWritable ? regPath : regReadonlyPath, - true, - Code.OK.intValue(), children, stat, zkCallbackDelayMs); - - CompletableFuture>> firstResult = new CompletableFuture<>(); - RegistrationListener firstListener = bookies -> firstResult.complete(bookies); - - CompletableFuture>> secondResult = new CompletableFuture<>(); - RegistrationListener secondListener = bookies -> secondResult.complete(bookies); - - List> watchFutures = Lists.newArrayListWithExpectedSize(2); - if (isWritable) { - watchFutures.add(zkRegistrationClient.watchWritableBookies(firstListener)); - watchFutures.add(zkRegistrationClient.watchWritableBookies(secondListener)); - } else { - watchFutures.add(zkRegistrationClient.watchReadOnlyBookies(firstListener)); - watchFutures.add(zkRegistrationClient.watchReadOnlyBookies(secondListener)); - } - - // trigger zkCallbackExecutor to execute getChildren callback - zkCallbackController.advance(Duration.ofMillis(zkCallbackDelayMs)); - - result(collect(watchFutures)); - assertEquals(firstResult.get().getVersion(), secondResult.get().getVersion()); - assertSetEquals(firstResult.get().getValue(), secondResult.get().getValue()); - } - - @Test - public void testWatchWritableBookiesFailure() throws Exception { - testWatchBookiesFailure(true); - } - - @Test - public void testWatchReadonlyBookiesFailure() throws Exception { - testWatchBookiesFailure(false); - } - - private void testWatchBookiesFailure(boolean isWritable) - throws Exception { - int zkCallbackDelayMs = 100; - - mockGetChildren( - isWritable ? regPath : regReadonlyPath, - true, - Code.NONODE.intValue(), null, null, zkCallbackDelayMs); - - CompletableFuture>> listenerResult = new CompletableFuture<>(); - RegistrationListener listener = bookies -> listenerResult.complete(bookies); - - CompletableFuture watchFuture; - - WatchTask watchTask; - if (isWritable) { - watchFuture = zkRegistrationClient.watchWritableBookies(listener); - watchTask = zkRegistrationClient.getWatchWritableBookiesTask(); - } else { - watchFuture = zkRegistrationClient.watchReadOnlyBookies(listener); - watchTask = zkRegistrationClient.getWatchReadOnlyBookiesTask(); - } - assertNotNull(watchTask); - assertEquals(1, watchTask.getNumListeners()); - - // trigger zkCallbackExecutor to execute getChildren callback - zkCallbackController.advance(Duration.ofMillis(zkCallbackDelayMs)); - - try { - result(watchFuture); - fail("Should fail to watch writable bookies if reg path doesn't exist"); - } catch (ZKException zke) { - // expected - } - assertEquals(0, watchTask.getNumListeners()); - assertTrue(watchTask.isClosed()); - if (isWritable) { - assertNull(zkRegistrationClient.getWatchWritableBookiesTask()); - } else { - assertNull(zkRegistrationClient.getWatchReadOnlyBookiesTask()); - } - } - -} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/TestZkRegistrationClientWithBookieAddressTracking.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/TestZkRegistrationClientWithBookieAddressTracking.java new file mode 100644 index 00000000000..f9b6de342c2 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/TestZkRegistrationClientWithBookieAddressTracking.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.discover; + +/** + * Unit test of {@link RegistrationClient} with Bookie Address Tracking feature. + */ +public class TestZkRegistrationClientWithBookieAddressTracking extends AbstractTestZkRegistrationClient { + + public TestZkRegistrationClientWithBookieAddressTracking() { + super(true); + } + +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/TestZkRegistrationClientWithoutBookieAddressTracking.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/TestZkRegistrationClientWithoutBookieAddressTracking.java new file mode 100644 index 00000000000..39ed8eacc81 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/TestZkRegistrationClientWithoutBookieAddressTracking.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package org.apache.bookkeeper.discover; + +/** + * Unit test of {@link RegistrationClient} without Bookie Address Tracking feature. + */ +public class TestZkRegistrationClientWithoutBookieAddressTracking extends AbstractTestZkRegistrationClient { + + public TestZkRegistrationClientWithoutBookieAddressTracking() { + super(false); + } + +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/TestZkRegistrationManager.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/TestZkRegistrationManager.java index ff1b7ea555b..4e1b06ba220 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/TestZkRegistrationManager.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/discover/TestZkRegistrationManager.java @@ -18,8 +18,50 @@ */ package org.apache.bookkeeper.discover; +import static org.junit.Assert.assertNotNull; + +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.test.ZooKeeperCluster; +import org.apache.bookkeeper.test.ZooKeeperUtil; +import org.apache.zookeeper.ZooKeeper; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + /** * Unit test of {@link RegistrationManager}. */ public class TestZkRegistrationManager { + + private ZooKeeperCluster localZkServer; + private ZooKeeper zkc; + + @Before + public void setup() throws Exception { + localZkServer = new ZooKeeperUtil(); + localZkServer.startCluster(); + } + + @After + public void teardown() throws Exception { + localZkServer.stopCluster(); + } + + @Test + public void testPrepareFormat () throws Exception { + try { + ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setMetadataServiceUri("zk+hierarchical://localhost:2181/test/ledgers"); + zkc = localZkServer.getZooKeeperClient(); + ZKRegistrationManager zkRegistrationManager = new ZKRegistrationManager(conf, zkc); + zkRegistrationManager.prepareFormat(); + assertNotNull(zkc.exists("/test/ledgers", false)); + } finally { + if (zkc != null) { + zkc.close(); + } + } + } + } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/AbstractZkLedgerManagerTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/AbstractZkLedgerManagerTest.java index b2cf68d25c3..8e53f2088d1 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/AbstractZkLedgerManagerTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/AbstractZkLedgerManagerTest.java @@ -34,13 +34,15 @@ import static org.mockito.Mockito.CALLS_REAL_METHODS; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockStatic; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import static org.mockito.Mockito.withSettings; +import com.google.common.collect.Lists; import java.time.Duration; -import java.util.Collections; +import java.util.List; import java.util.Optional; import java.util.Set; import java.util.concurrent.Executors; @@ -49,16 +51,19 @@ import java.util.concurrent.TimeUnit; import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.client.BKException.Code; -import org.apache.bookkeeper.client.BookKeeper.DigestType; -import org.apache.bookkeeper.client.LedgerMetadata; +import org.apache.bookkeeper.client.LedgerMetadataBuilder; +import org.apache.bookkeeper.client.api.DigestType; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.common.testing.executors.MockExecutorController; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallbackFuture; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.LedgerMetadataListener; import org.apache.bookkeeper.util.ZkUtils; import org.apache.bookkeeper.versioning.LongVersion; import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; import org.apache.bookkeeper.zookeeper.MockZooKeeperTestCase; import org.apache.zookeeper.AsyncCallback.DataCallback; import org.apache.zookeeper.AsyncCallback.StatCallback; @@ -73,15 +78,13 @@ import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; -import org.powermock.api.mockito.PowerMockito; -import org.powermock.core.classloader.annotations.PrepareForTest; -import org.powermock.modules.junit4.PowerMockRunner; +import org.mockito.MockedStatic; +import org.mockito.junit.MockitoJUnitRunner; /** * Unit test of {@link AbstractZkLedgerManager}. */ -@RunWith(PowerMockRunner.class) -@PrepareForTest({ AbstractZkLedgerManager.class, ZkUtils.class }) +@RunWith(MockitoJUnitRunner.Silent.class) public class AbstractZkLedgerManagerTest extends MockZooKeeperTestCase { private ClientConfiguration conf; @@ -89,21 +92,23 @@ public class AbstractZkLedgerManagerTest extends MockZooKeeperTestCase { private ScheduledExecutorService scheduler; private MockExecutorController schedulerController; private LedgerMetadata metadata; + private LedgerMetadataSerDe serDe; + private MockedStatic executorsMockedStatic; @Before public void setup() throws Exception { - PowerMockito.mockStatic(Executors.class); + executorsMockedStatic = mockStatic(Executors.class, CALLS_REAL_METHODS); super.setup(); - this.scheduler = PowerMockito.mock(ScheduledExecutorService.class); + this.scheduler = mock(ScheduledExecutorService.class); this.schedulerController = new MockExecutorController() .controlSubmit(scheduler) .controlSchedule(scheduler) .controlExecute(scheduler) .controlScheduleAtFixedRate(scheduler, 10); - PowerMockito.when(Executors.newSingleThreadScheduledExecutor(any())) - .thenReturn(scheduler); + + executorsMockedStatic.when(() -> Executors.newSingleThreadScheduledExecutor(any())).thenReturn(scheduler); this.conf = new ClientConfiguration(); this.ledgerManager = mock( @@ -111,12 +116,19 @@ public void setup() throws Exception { withSettings() .useConstructor(conf, mockZk) .defaultAnswer(CALLS_REAL_METHODS)); - this.metadata = new LedgerMetadata( - 5, 3, 3, - DigestType.CRC32, - new byte[0], - Collections.emptyMap(), - false); + List ensemble = Lists.newArrayList(new BookieSocketAddress("192.0.2.1", 3181).toBookieId(), + new BookieSocketAddress("192.0.2.2", 3181).toBookieId(), + new BookieSocketAddress("192.0.2.3", 3181).toBookieId(), + new BookieSocketAddress("192.0.2.4", 3181).toBookieId(), + new BookieSocketAddress("192.0.2.5", 3181).toBookieId()); + this.metadata = LedgerMetadataBuilder.create() + .withId(123L) + .withDigestType(DigestType.CRC32C).withPassword(new byte[0]) + .withEnsembleSize(5) + .withWriteQuorumSize(3) + .withAckQuorumSize(3) + .newEnsembleEntry(0L, ensemble) + .withCreationTime(12345L).build(); doAnswer(invocationOnMock -> { long ledgerId = invocationOnMock.getArgument(0); @@ -132,10 +144,16 @@ public void setup() throws Exception { assertSame(mockZk, ledgerManager.zk); assertSame(conf, ledgerManager.conf); assertSame(scheduler, ledgerManager.scheduler); + + this.serDe = new LedgerMetadataSerDe(); } @After public void teardown() throws Exception { + super.teardown(); + + executorsMockedStatic.close(); + if (null != ledgerManager) { ledgerManager.close(); @@ -154,13 +172,9 @@ public void testCreateLedgerMetadataSuccess() throws Exception { KeeperException.Code.OK.intValue(), ledgerStr ); - assertEquals(Version.NEW, metadata.getVersion()); - - GenericCallbackFuture callbackFuture = new GenericCallbackFuture<>(); - ledgerManager.createLedgerMetadata(ledgerId, metadata, callbackFuture); - callbackFuture.get(); + Versioned result = ledgerManager.createLedgerMetadata(ledgerId, metadata).get(); - assertEquals(new LongVersion(0), metadata.getVersion()); + assertEquals(new LongVersion(0), result.getVersion()); } @Test @@ -170,22 +184,26 @@ public void testCreateLedgerMetadataNodeExists() throws Exception { mockZkUtilsAsyncCreateFullPathOptimistic( ledgerStr, CreateMode.PERSISTENT, KeeperException.Code.NODEEXISTS.intValue(), null); - - assertEquals(Version.NEW, metadata.getVersion()); - - GenericCallbackFuture callbackFuture = new GenericCallbackFuture<>(); - ledgerManager.createLedgerMetadata(ledgerId, metadata, callbackFuture); + Stat stat = mock(Stat.class); + when(stat.getVersion()).thenReturn(1234); + when(stat.getCtime()).thenReturn(metadata.getCtime()); + /* + * this is needed because in AbstractZkLedgerManager.readLedgerMetadata + * if MetadataFormatVersion is >2, then for createLedgerMetadata if we + * get NODEEXISTS exception then it will try to read to make sure ledger + * creation is robust to ZK connection loss. Please check Issue #1967. + */ + mockZkGetData( + ledgerStr, false, + KeeperException.Code.OK.intValue(), serDe.serialize(metadata), stat); try { - result(callbackFuture); + result(ledgerManager.createLedgerMetadata(ledgerId, metadata)); fail("Should fail to create ledger metadata if the ledger already exists"); } catch (Exception e) { assertTrue(e instanceof BKException); BKException bke = (BKException) e; assertEquals(Code.LedgerExistException, bke.getCode()); } - - // creation failed, so metadata should not be modified - assertEquals(Version.NEW, metadata.getVersion()); } @Test @@ -196,21 +214,15 @@ public void testCreateLedgerMetadataException() throws Exception { ledgerStr, CreateMode.PERSISTENT, KeeperException.Code.CONNECTIONLOSS.intValue(), null); - assertEquals(Version.NEW, metadata.getVersion()); - - GenericCallbackFuture callbackFuture = new GenericCallbackFuture<>(); - ledgerManager.createLedgerMetadata(ledgerId, metadata, callbackFuture); try { - result(callbackFuture); + result(ledgerManager.createLedgerMetadata(ledgerId, metadata)); fail("Should fail to create ledger metadata when encountering zookeeper exception"); } catch (Exception e) { assertTrue(e instanceof BKException); BKException bke = (BKException) e; assertEquals(Code.ZKException, bke.getCode()); + assertTrue(bke.getCause() instanceof KeeperException); } - - // creation failed, so metadata should not be modified - assertEquals(Version.NEW, metadata.getVersion()); } @Test @@ -223,9 +235,7 @@ public void testRemoveLedgerMetadataSuccess() throws Exception { ledgerStr, (int) version.getLongVersion(), KeeperException.Code.OK.intValue()); - GenericCallbackFuture callbackFuture = new GenericCallbackFuture<>(); - ledgerManager.removeLedgerMetadata(ledgerId, version, callbackFuture); - result(callbackFuture); + ledgerManager.removeLedgerMetadata(ledgerId, version).get(); verify(mockZk, times(1)) .delete(eq(ledgerStr), eq(1234), any(VoidCallback.class), eq(null)); @@ -240,9 +250,7 @@ public void testRemoveLedgerMetadataVersionAny() throws Exception { ledgerStr, -1, KeeperException.Code.OK.intValue()); - GenericCallbackFuture callbackFuture = new GenericCallbackFuture<>(); - ledgerManager.removeLedgerMetadata(ledgerId, Version.ANY, callbackFuture); - result(callbackFuture); + ledgerManager.removeLedgerMetadata(ledgerId, Version.ANY).get(); verify(mockZk, times(1)) .delete(eq(ledgerStr), eq(-1), any(VoidCallback.class), eq(null)); @@ -262,10 +270,8 @@ public void testRemoveLedgerMetadataUnknownVersionType() throws Exception { private void testRemoveLedgerMetadataInvalidVersion(Version version) throws Exception { long ledgerId = System.currentTimeMillis(); - GenericCallbackFuture callbackFuture = new GenericCallbackFuture<>(); - ledgerManager.removeLedgerMetadata(ledgerId, version, callbackFuture); try { - result(callbackFuture); + result(ledgerManager.removeLedgerMetadata(ledgerId, version)); fail("Should fail to remove metadata if version is " + Version.NEW); } catch (BKException bke) { assertEquals(Code.MetadataVersionException, bke.getCode()); @@ -282,13 +288,10 @@ public void testRemoveLedgerMetadataNoNode() throws Exception { ledgerStr, (int) version.getLongVersion(), KeeperException.Code.NONODE.intValue()); - GenericCallbackFuture callbackFuture = new GenericCallbackFuture<>(); - ledgerManager.removeLedgerMetadata(ledgerId, version, callbackFuture); try { - result(callbackFuture); - fail("Should fail to remove metadata if no such ledger exists"); + result(ledgerManager.removeLedgerMetadata(ledgerId, version)); } catch (BKException bke) { - assertEquals(Code.NoSuchLedgerExistsException, bke.getCode()); + fail("Should succeed"); } verify(mockZk, times(1)) @@ -305,11 +308,9 @@ public void testRemoveLedgerMetadataException() throws Exception { ledgerStr, (int) version.getLongVersion(), KeeperException.Code.CONNECTIONLOSS.intValue()); - GenericCallbackFuture callbackFuture = new GenericCallbackFuture<>(); - ledgerManager.removeLedgerMetadata(ledgerId, version, callbackFuture); try { - result(callbackFuture); - fail("Should fail to remove metadata if no such ledger exists"); + result(ledgerManager.removeLedgerMetadata(ledgerId, version)); + fail("Should fail to remove metadata upon ZKException"); } catch (BKException bke) { assertEquals(Code.ZKException, bke.getCode()); } @@ -339,12 +340,8 @@ private void testRemoveLedgerMetadataHierarchicalLedgerManager(AbstractZkLedgerM ledgerStr, (int) version.getLongVersion(), KeeperException.Code.OK.intValue()); - GenericCallbackFuture callbackFuture = new GenericCallbackFuture<>(); - lm.removeLedgerMetadata(ledgerId, version, callbackFuture); - result(callbackFuture); + lm.removeLedgerMetadata(ledgerId, version).get(); - PowerMockito.verifyStatic( - ZkUtils.class, times(1)); ZkUtils.asyncDeleteFullPathOptimistic( eq(mockZk), eq(ledgerStr), eq(1234), any(VoidCallback.class), eq(ledgerStr)); @@ -355,18 +352,16 @@ public void testReadLedgerMetadataSuccess() throws Exception { long ledgerId = System.currentTimeMillis(); String ledgerStr = String.valueOf(ledgerId); - metadata.setVersion(new LongVersion(1234L)); Stat stat = mock(Stat.class); when(stat.getVersion()).thenReturn(1234); when(stat.getCtime()).thenReturn(metadata.getCtime()); mockZkGetData( ledgerStr, false, - KeeperException.Code.OK.intValue(), metadata.serialize(), stat); + KeeperException.Code.OK.intValue(), serDe.serialize(metadata), stat); - GenericCallbackFuture callbackFuture = new GenericCallbackFuture<>(); - ledgerManager.readLedgerMetadata(ledgerId, callbackFuture); - LedgerMetadata readMetadata = result(callbackFuture); - assertEquals(metadata, readMetadata); + Versioned readMetadata = result(ledgerManager.readLedgerMetadata(ledgerId)); + assertEquals(metadata, readMetadata.getValue()); + assertEquals(new LongVersion(1234), readMetadata.getVersion()); verify(mockZk, times(1)) .getData(eq(ledgerStr), eq(null), any(DataCallback.class), any()); @@ -381,13 +376,11 @@ public void testReadLedgerMetadataNoNode() throws Exception { ledgerStr, false, KeeperException.Code.NONODE.intValue(), null, null); - GenericCallbackFuture callbackFuture = new GenericCallbackFuture<>(); - ledgerManager.readLedgerMetadata(ledgerId, callbackFuture); try { - result(callbackFuture); + result(ledgerManager.readLedgerMetadata(ledgerId)); fail("Should fail on reading ledger metadata if a ledger doesn't exist"); } catch (BKException bke) { - assertEquals(Code.NoSuchLedgerExistsException, bke.getCode()); + assertEquals(Code.NoSuchLedgerExistsOnMetadataServerException, bke.getCode()); } verify(mockZk, times(1)) @@ -403,10 +396,8 @@ public void testReadLedgerMetadataException() throws Exception { ledgerStr, false, KeeperException.Code.CONNECTIONLOSS.intValue(), null, null); - GenericCallbackFuture callbackFuture = new GenericCallbackFuture<>(); - ledgerManager.readLedgerMetadata(ledgerId, callbackFuture); try { - result(callbackFuture); + result(ledgerManager.readLedgerMetadata(ledgerId)); fail("Should fail on reading ledger metadata if a ledger doesn't exist"); } catch (BKException bke) { assertEquals(Code.ZKException, bke.getCode()); @@ -423,12 +414,10 @@ public void testReadLedgerMetadataStatMissing() throws Exception { mockZkGetData( ledgerStr, false, - KeeperException.Code.OK.intValue(), metadata.serialize(), null); + KeeperException.Code.OK.intValue(), serDe.serialize(metadata), null); - GenericCallbackFuture callbackFuture = new GenericCallbackFuture<>(); - ledgerManager.readLedgerMetadata(ledgerId, callbackFuture); try { - result(callbackFuture); + result(ledgerManager.readLedgerMetadata(ledgerId)); fail("Should fail on reading ledger metadata if a ledger doesn't exist"); } catch (BKException bke) { assertEquals(Code.ZKException, bke.getCode()); @@ -443,7 +432,6 @@ public void testReadLedgerMetadataDataCorrupted() throws Exception { long ledgerId = System.currentTimeMillis(); String ledgerStr = String.valueOf(ledgerId); - metadata.setVersion(new LongVersion(1234L)); Stat stat = mock(Stat.class); when(stat.getVersion()).thenReturn(1234); when(stat.getCtime()).thenReturn(metadata.getCtime()); @@ -451,10 +439,8 @@ public void testReadLedgerMetadataDataCorrupted() throws Exception { ledgerStr, false, KeeperException.Code.OK.intValue(), new byte[0], stat); - GenericCallbackFuture callbackFuture = new GenericCallbackFuture<>(); - ledgerManager.readLedgerMetadata(ledgerId, callbackFuture); try { - result(callbackFuture); + result(ledgerManager.readLedgerMetadata(ledgerId)); fail("Should fail on reading ledger metadata if a ledger doesn't exist"); } catch (BKException bke) { assertEquals(Code.ZKException, bke.getCode()); @@ -469,21 +455,16 @@ public void testWriteLedgerMetadataSuccess() throws Exception { long ledgerId = System.currentTimeMillis(); String ledgerStr = String.valueOf(ledgerId); - metadata.setVersion(new LongVersion(1234L)); Stat stat = mock(Stat.class); when(stat.getVersion()).thenReturn(1235); when(stat.getCtime()).thenReturn(metadata.getCtime()); mockZkSetData( - ledgerStr, metadata.serialize(), 1234, + ledgerStr, serDe.serialize(metadata), 1234, KeeperException.Code.OK.intValue(), stat); - assertEquals(new LongVersion(1234L), metadata.getVersion()); + Version v = ledgerManager.writeLedgerMetadata(ledgerId, metadata, new LongVersion(1234L)).get().getVersion(); - GenericCallbackFuture callbackFuture = new GenericCallbackFuture<>(); - ledgerManager.writeLedgerMetadata(ledgerId, metadata, callbackFuture); - result(callbackFuture); - - assertEquals(new LongVersion(1235L), metadata.getVersion()); + assertEquals(new LongVersion(1235L), v); verify(mockZk, times(1)) .setData(eq(ledgerStr), any(byte[].class), eq(1234), any(StatCallback.class), any()); @@ -494,25 +475,17 @@ public void testWriteLedgerMetadataBadVersion() throws Exception { long ledgerId = System.currentTimeMillis(); String ledgerStr = String.valueOf(ledgerId); - metadata.setVersion(new LongVersion(1234L)); mockZkSetData( - ledgerStr, metadata.serialize(), 1234, + ledgerStr, serDe.serialize(metadata), 1234, KeeperException.Code.BADVERSION.intValue(), null); - assertEquals(new LongVersion(1234L), metadata.getVersion()); - - GenericCallbackFuture callbackFuture = new GenericCallbackFuture<>(); - ledgerManager.writeLedgerMetadata(ledgerId, metadata, callbackFuture); try { - result(callbackFuture); + result(ledgerManager.writeLedgerMetadata(ledgerId, metadata, new LongVersion(1234L))); fail("Should fail on writing ledger metadata if encountering bad version"); } catch (BKException bke) { assertEquals(Code.MetadataVersionException, bke.getCode()); } - // version remain unchanged - assertEquals(new LongVersion(1234L), metadata.getVersion()); - verify(mockZk, times(1)) .setData(eq(ledgerStr), any(byte[].class), eq(1234), any(StatCallback.class), any()); } @@ -522,24 +495,17 @@ public void testWriteLedgerMetadataException() throws Exception { long ledgerId = System.currentTimeMillis(); String ledgerStr = String.valueOf(ledgerId); - metadata.setVersion(new LongVersion(1234L)); mockZkSetData( - ledgerStr, metadata.serialize(), 1234, + ledgerStr, serDe.serialize(metadata), 1234, KeeperException.Code.CONNECTIONLOSS.intValue(), null); - assertEquals(new LongVersion(1234L), metadata.getVersion()); - - GenericCallbackFuture callbackFuture = new GenericCallbackFuture<>(); - ledgerManager.writeLedgerMetadata(ledgerId, metadata, callbackFuture); try { - result(callbackFuture); + result(ledgerManager.writeLedgerMetadata(ledgerId, metadata, new LongVersion(1234L))); fail("Should fail on writing ledger metadata if encountering zookeeper exceptions"); } catch (BKException bke) { assertEquals(Code.ZKException, bke.getCode()); } - // version remain unchanged - assertEquals(new LongVersion(1234L), metadata.getVersion()); verify(mockZk, times(1)) .setData(eq(ledgerStr), any(byte[].class), eq(1234), any(StatCallback.class), any()); @@ -560,12 +526,8 @@ public void testWriteLedgerMetadataInvalidVersion() throws Exception { private void testWriteLedgerMetadataInvalidVersion(Version invalidVersion) throws Exception { long ledgerId = System.currentTimeMillis(); - metadata.setVersion(invalidVersion); - - GenericCallbackFuture callbackFuture = new GenericCallbackFuture<>(); - ledgerManager.writeLedgerMetadata(ledgerId, metadata, callbackFuture); try { - result(callbackFuture); + result(ledgerManager.writeLedgerMetadata(ledgerId, metadata, invalidVersion)); fail("Should fail on writing ledger metadata if an invalid version is provided."); } catch (BKException bke) { assertEquals(Code.MetadataVersionException, bke.getCode()); @@ -581,15 +543,14 @@ public void testLedgerMetadataListener() throws Exception { String ledgerStr = String.valueOf(ledgerId); LinkedBlockingQueue changes = new LinkedBlockingQueue<>(); - LedgerMetadataListener listener = (ledgerId1, metadata) -> changes.add(metadata); + LedgerMetadataListener listener = (ledgerId1, metadata) -> changes.add(metadata.getValue()); - metadata.setVersion(new LongVersion(1234L)); Stat stat = mock(Stat.class); when(stat.getVersion()).thenReturn(1234); when(stat.getCtime()).thenReturn(metadata.getCtime()); mockZkGetData( ledgerStr, true, - KeeperException.Code.OK.intValue(), metadata.serialize(), stat); + KeeperException.Code.OK.intValue(), serDe.serialize(metadata), stat); ledgerManager.registerLedgerMetadataListener(ledgerId, listener); @@ -606,11 +567,10 @@ public void testLedgerMetadataListener() throws Exception { Watcher registeredWatcher1 = watcherSet1.stream().findFirst().get(); // mock get data to return an updated metadata - metadata.setVersion(new LongVersion(1235L)); when(stat.getVersion()).thenReturn(1235); mockZkGetData( ledgerStr, true, - KeeperException.Code.OK.intValue(), metadata.serialize(), stat); + KeeperException.Code.OK.intValue(), serDe.serialize(metadata), stat); // notify the watcher event notifyWatchedEvent( @@ -644,15 +604,15 @@ public void testLedgerMetadataListenerOnLedgerDeleted() throws Exception { String ledgerStr = String.valueOf(ledgerId); LinkedBlockingQueue> changes = new LinkedBlockingQueue<>(); - LedgerMetadataListener listener = (ledgerId1, metadata) -> changes.add(Optional.ofNullable(metadata)); + LedgerMetadataListener listener = + (ledgerId1, metadata) -> changes.add(Optional.ofNullable(metadata != null ? metadata.getValue() : null)); - metadata.setVersion(new LongVersion(1234L)); Stat stat = mock(Stat.class); when(stat.getVersion()).thenReturn(1234); when(stat.getCtime()).thenReturn(metadata.getCtime()); mockZkGetData( ledgerStr, true, - KeeperException.Code.OK.intValue(), metadata.serialize(), stat); + KeeperException.Code.OK.intValue(), serDe.serialize(metadata), stat); ledgerManager.registerLedgerMetadataListener(ledgerId, listener); assertTrue(ledgerManager.listeners.containsKey(ledgerId)); @@ -695,15 +655,16 @@ public void testLedgerMetadataListenerOnLedgerDeletedEvent() throws Exception { String ledgerStr = String.valueOf(ledgerId); LinkedBlockingQueue> changes = new LinkedBlockingQueue<>(); - LedgerMetadataListener listener = (ledgerId1, metadata) -> changes.add(Optional.ofNullable(metadata)); + LedgerMetadataListener listener = + (ledgerId1, metadata) -> changes.add( + Optional.ofNullable(metadata != null ? metadata.getValue() : null)); - metadata.setVersion(new LongVersion(1234L)); Stat stat = mock(Stat.class); when(stat.getVersion()).thenReturn(1234); when(stat.getCtime()).thenReturn(metadata.getCtime()); mockZkGetData( ledgerStr, true, - KeeperException.Code.OK.intValue(), metadata.serialize(), stat); + KeeperException.Code.OK.intValue(), serDe.serialize(metadata), stat); ledgerManager.registerLedgerMetadataListener(ledgerId, listener); assertTrue(ledgerManager.listeners.containsKey(ledgerId)); @@ -737,9 +698,8 @@ public void testLedgerMetadataListenerOnRetries() throws Exception { String ledgerStr = String.valueOf(ledgerId); LinkedBlockingQueue changes = new LinkedBlockingQueue<>(); - LedgerMetadataListener listener = (ledgerId1, metadata) -> changes.add(metadata); + LedgerMetadataListener listener = (ledgerId1, metadata) -> changes.add(metadata.getValue()); - metadata.setVersion(new LongVersion(1234L)); Stat stat = mock(Stat.class); when(stat.getVersion()).thenReturn(1234); when(stat.getCtime()).thenReturn(metadata.getCtime()); @@ -766,7 +726,7 @@ public void testLedgerMetadataListenerOnRetries() throws Exception { // mock get data to return a valid response mockZkGetData( ledgerStr, true, - KeeperException.Code.OK.intValue(), metadata.serialize(), stat); + KeeperException.Code.OK.intValue(), serDe.serialize(metadata), stat); schedulerController.advance(Duration.ofMillis(ZK_CONNECT_BACKOFF_MS)); @@ -786,15 +746,14 @@ public void testLedgerMetadataListenerOnSessionExpired() throws Exception { String ledgerStr = String.valueOf(ledgerId); LinkedBlockingQueue changes = new LinkedBlockingQueue<>(); - LedgerMetadataListener listener = (ledgerId1, metadata) -> changes.add(metadata); + LedgerMetadataListener listener = (ledgerId1, metadata) -> changes.add(metadata.getValue()); - metadata.setVersion(new LongVersion(1234L)); Stat stat = mock(Stat.class); when(stat.getVersion()).thenReturn(1234); when(stat.getCtime()).thenReturn(metadata.getCtime()); mockZkGetData( ledgerStr, true, - KeeperException.Code.OK.intValue(), metadata.serialize(), stat); + KeeperException.Code.OK.intValue(), serDe.serialize(metadata), stat); ledgerManager.registerLedgerMetadataListener(ledgerId, listener); @@ -835,15 +794,14 @@ public void testUnregisterLedgerMetadataListener() throws Exception { String ledgerStr = String.valueOf(ledgerId); LinkedBlockingQueue changes = new LinkedBlockingQueue<>(); - LedgerMetadataListener listener = (ledgerId1, metadata) -> changes.add(metadata); + LedgerMetadataListener listener = (ledgerId1, metadata) -> changes.add(metadata.getValue()); - metadata.setVersion(new LongVersion(1234L)); Stat stat = mock(Stat.class); when(stat.getVersion()).thenReturn(1234); when(stat.getCtime()).thenReturn(metadata.getCtime()); mockZkGetData( ledgerStr, true, - KeeperException.Code.OK.intValue(), metadata.serialize(), stat); + KeeperException.Code.OK.intValue(), serDe.serialize(metadata), stat); ledgerManager.registerLedgerMetadataListener(ledgerId, listener); assertTrue(ledgerManager.listeners.containsKey(ledgerId)); @@ -861,15 +819,21 @@ public void testUnregisterLedgerMetadataListener() throws Exception { Watcher registeredWatcher1 = watcherSet1.stream().findFirst().get(); // mock get data to return an updated metadata - metadata.setVersion(new LongVersion(1235L)); when(stat.getVersion()).thenReturn(1235); mockZkGetData( ledgerStr, true, - KeeperException.Code.OK.intValue(), metadata.serialize(), stat); + KeeperException.Code.OK.intValue(), serDe.serialize(metadata), stat); + + mockZkRemoveWatcher(); // unregister the listener ledgerManager.unregisterLedgerMetadataListener(ledgerId, listener); assertFalse(ledgerManager.listeners.containsKey(ledgerId)); + assertFalse(watchers.containsKey(ledgerStr)); + verify(mockZk, times(1)).removeWatches(eq(ledgerManager.getLedgerPath(ledgerId)), + any(Watcher.class), any(Watcher.WatcherType.class), any(Boolean.class), + any(VoidCallback.class), any()); + // notify the watcher event notifyWatchedEvent( diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/CleanupLedgerManagerTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/CleanupLedgerManagerTest.java new file mode 100644 index 00000000000..fabbdc7f059 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/CleanupLedgerManagerTest.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.meta; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.util.concurrent.CompletableFuture; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +/** + * Unit test of {@link CleanupLedgerManager}. + */ +public class CleanupLedgerManagerTest { + + protected LedgerManager ledgerManager = null; + protected CleanupLedgerManager cleanupLedgerManager = null; + + @Before + public void setup() throws Exception { + ledgerManager = mock(LedgerManager.class); + CompletableFuture> future = new CompletableFuture<>(); + future.completeExceptionally(new Exception("LedgerNotExistException")); + when(ledgerManager.createLedgerMetadata(anyLong(), any())).thenReturn(future); + when(ledgerManager.readLedgerMetadata(anyLong())).thenReturn(future); + when(ledgerManager.writeLedgerMetadata(anyLong(), any(), any())).thenReturn( + future); + CompletableFuture removeFuture = new CompletableFuture<>(); + removeFuture.completeExceptionally(new Exception("LedgerNotExistException")); + when(ledgerManager.removeLedgerMetadata(anyLong(), any())).thenReturn(removeFuture); + cleanupLedgerManager = new CleanupLedgerManager(ledgerManager); + } + + @Test + public void testCreateLedgerMetadataException() throws Exception { + cleanupLedgerManager.createLedgerMetadata(anyLong(), any(LedgerMetadata.class)); + Assert.assertEquals(0, cleanupLedgerManager.getCurrentFuturePromiseSize()); + } + + @Test + public void testReadLedgerMetadataException() throws Exception { + cleanupLedgerManager.readLedgerMetadata(anyLong()); + Assert.assertEquals(0, cleanupLedgerManager.getCurrentFuturePromiseSize()); + } + + @Test + public void testWriteLedgerMetadataException() throws Exception { + cleanupLedgerManager.writeLedgerMetadata(anyLong(), any(LedgerMetadata.class), any(Version.class)); + Assert.assertEquals(0, cleanupLedgerManager.getCurrentFuturePromiseSize()); + } + + @Test + public void testRemoveLedgerMetadataException() throws Exception { + cleanupLedgerManager.removeLedgerMetadata(anyLong(), any(Version.class)); + Assert.assertEquals(0, cleanupLedgerManager.getCurrentFuturePromiseSize()); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/GcLedgersTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/GcLedgersTest.java index 1459500c2f7..fec74a8202f 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/GcLedgersTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/GcLedgersTest.java @@ -21,8 +21,6 @@ package org.apache.bookkeeper.meta; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.ACTIVE_LEDGER_COUNT; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.DELETED_LEDGER_COUNT; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; @@ -30,48 +28,57 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import com.google.common.collect.Lists; import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; +import java.util.EnumSet; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.NavigableMap; +import java.util.PrimitiveIterator.OfLong; import java.util.Queue; import java.util.Random; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.BookieImpl; import org.apache.bookkeeper.bookie.CheckpointSource; import org.apache.bookkeeper.bookie.CheckpointSource.Checkpoint; import org.apache.bookkeeper.bookie.Checkpointer; import org.apache.bookkeeper.bookie.CompactableLedgerStorage; import org.apache.bookkeeper.bookie.EntryLocation; -import org.apache.bookkeeper.bookie.EntryLogger; import org.apache.bookkeeper.bookie.GarbageCollector; import org.apache.bookkeeper.bookie.LastAddConfirmedUpdateNotification; import org.apache.bookkeeper.bookie.LedgerDirsManager; import org.apache.bookkeeper.bookie.ScanAndCompareGarbageCollector; import org.apache.bookkeeper.bookie.StateManager; import org.apache.bookkeeper.client.BKException; -import org.apache.bookkeeper.client.BookKeeper.DigestType; -import org.apache.bookkeeper.client.LedgerMetadata; +import org.apache.bookkeeper.client.LedgerMetadataBuilder; +import org.apache.bookkeeper.client.api.DigestType; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.common.util.Watcher; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.meta.LedgerManager.LedgerRange; import org.apache.bookkeeper.meta.LedgerManager.LedgerRangeIterator; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.stats.StatsLogger; import org.apache.bookkeeper.test.TestStatsProvider; import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -86,11 +93,19 @@ public GcLedgersTest(Class lmFactoryCls) { super(lmFactoryCls); } + private void createLedgers(int numLedgers, final Set createdLedgers) throws IOException { + BookieId selfBookie = BookieImpl.getBookieId(baseConf); + createLedgers(numLedgers, createdLedgers, selfBookie); + } + /** * Create ledgers. */ - private void createLedgers(int numLedgers, final Set createdLedgers) throws IOException { + private void createLedgers(int numLedgers, final Set createdLedgers, BookieId selfBookie) + throws IOException { final AtomicInteger expected = new AtomicInteger(numLedgers); + List ensemble = Lists.newArrayList(selfBookie); + for (int i = 0; i < numLedgers; i++) { getLedgerIdGenerator().generateLedgerId(new GenericCallback() { @Override @@ -105,20 +120,23 @@ public void operationComplete(int rc, final Long ledgerId) { return; } - getLedgerManager().createLedgerMetadata(ledgerId, - new LedgerMetadata(1, 1, 1, DigestType.MAC, "".getBytes()), - new GenericCallback() { - @Override - public void operationComplete(int rc, LedgerMetadata writtenMetadata) { - if (rc == BKException.Code.OK) { - activeLedgers.put(ledgerId, true); - createdLedgers.add(ledgerId); - } - synchronized (expected) { - int num = expected.decrementAndGet(); - if (num == 0) { - expected.notify(); - } + LedgerMetadata md = LedgerMetadataBuilder.create() + .withId(ledgerId) + .withDigestType(DigestType.CRC32C) + .withPassword(new byte[0]) + .withEnsembleSize(1).withWriteQuorumSize(1).withAckQuorumSize(1) + .newEnsembleEntry(0L, ensemble).build(); + + getLedgerManager().createLedgerMetadata(ledgerId, md) + .whenComplete((result, exception) -> { + if (exception == null) { + activeLedgers.put(ledgerId, true); + createdLedgers.add(ledgerId); + } + synchronized (expected) { + int num = expected.decrementAndGet(); + if (num == 0) { + expected.notify(); } } }); @@ -137,18 +155,7 @@ public void operationComplete(int rc, LedgerMetadata writtenMetadata) { } private void removeLedger(long ledgerId) throws Exception { - final AtomicInteger rc = new AtomicInteger(0); - final CountDownLatch latch = new CountDownLatch(1); - getLedgerManager().removeLedgerMetadata(ledgerId, Version.ANY, - new GenericCallback() { - @Override - public void operationComplete(int rc2, Void result) { - rc.set(rc2); - latch.countDown(); - } - }); - assertTrue(latch.await(10, TimeUnit.SECONDS)); - assertEquals("Remove should have succeeded for ledgerId: " + ledgerId, 0, rc.get()); + getLedgerManager().removeLedgerMetadata(ledgerId, Version.ANY).get(10, TimeUnit.SECONDS); } @Test @@ -169,18 +176,7 @@ public void testGarbageCollectLedgers() throws Exception { // random remove several ledgers for (int i = 0; i < numRemovedLedgers; i++) { long ledgerId = tmpList.get(i); - synchronized (removedLedgers) { - getLedgerManager().removeLedgerMetadata(ledgerId, Version.ANY, - new GenericCallback() { - @Override - public void operationComplete(int rc, Void result) { - synchronized (removedLedgers) { - removedLedgers.notify(); - } - } - }); - removedLedgers.wait(); - } + getLedgerManager().removeLedgerMetadata(ledgerId, Version.ANY).get(); removedLedgers.add(ledgerId); createdLedgers.remove(ledgerId); } @@ -189,7 +185,7 @@ public void operationComplete(int rc, Void result) { final CountDownLatch endLatch = new CountDownLatch(2); final CompactableLedgerStorage mockLedgerStorage = new MockLedgerStorage(); TestStatsProvider stats = new TestStatsProvider(); - final GarbageCollector garbageCollector = new ScanAndCompareGarbageCollector(getLedgerManager(), + final ScanAndCompareGarbageCollector garbageCollector = new ScanAndCompareGarbageCollector(getLedgerManager(), mockLedgerStorage, baseConf, stats.getStatsLogger("gc")); Thread gcThread = new Thread() { @Override @@ -251,12 +247,9 @@ public void run() { for (Long ledger : createdLedgers) { assertTrue(activeLedgers.containsKey(ledger)); } - assertTrue( - "Wrong DELETED_LEDGER_COUNT", - stats.getCounter("gc." + DELETED_LEDGER_COUNT).get() == removedLedgers.size()); assertTrue( "Wrong ACTIVE_LEDGER_COUNT", - stats.getGauge("gc." + ACTIVE_LEDGER_COUNT).getSample().intValue() == createdLedgers.size()); + garbageCollector.getNumActiveLedgers() == createdLedgers.size()); } @Test @@ -269,7 +262,7 @@ public void testGcLedgersOutsideRange() throws Exception { MockLedgerStorage mockLedgerStorage = new MockLedgerStorage(); TestStatsProvider stats = new TestStatsProvider(); - final GarbageCollector garbageCollector = new ScanAndCompareGarbageCollector(getLedgerManager(), + final ScanAndCompareGarbageCollector garbageCollector = new ScanAndCompareGarbageCollector(getLedgerManager(), mockLedgerStorage, baseConf, stats.getStatsLogger("gc")); GarbageCollector.GarbageCleaner cleaner = new GarbageCollector.GarbageCleaner() { @Override @@ -287,37 +280,26 @@ public void clean(long ledgerId) { garbageCollector.gc(cleaner); assertNull("Should have cleaned nothing", cleaned.poll()); - assertTrue( - "Wrong DELETED_LEDGER_COUNT", - stats.getCounter("gc." + DELETED_LEDGER_COUNT).get() == 0); assertTrue( "Wrong ACTIVE_LEDGER_COUNT", - stats.getGauge( - "gc." + ACTIVE_LEDGER_COUNT).getSample().intValue() == numLedgers); + garbageCollector.getNumActiveLedgers() == numLedgers); long last = createdLedgers.last(); removeLedger(last); garbageCollector.gc(cleaner); assertNotNull("Should have cleaned something", cleaned.peek()); assertEquals("Should have cleaned last ledger" + last, (long) last, (long) cleaned.poll()); - assertTrue( - "Wrong DELETED_LEDGER_COUNT", - stats.getCounter("gc." + DELETED_LEDGER_COUNT).get() == 1); long first = createdLedgers.first(); removeLedger(first); garbageCollector.gc(cleaner); assertNotNull("Should have cleaned something", cleaned.peek()); assertEquals("Should have cleaned first ledger" + first, (long) first, (long) cleaned.poll()); - assertTrue( - "Wrong DELETED_LEDGER_COUNT", - stats.getCounter("gc." + DELETED_LEDGER_COUNT).get() == 2); garbageCollector.gc(cleaner); assertTrue( "Wrong ACTIVE_LEDGER_COUNT", - stats.getGauge( - "gc." + ACTIVE_LEDGER_COUNT).getSample().intValue() == (numLedgers - 2)); + garbageCollector.getNumActiveLedgers() == (numLedgers - 2)); } @@ -342,7 +324,7 @@ public void clean(long ledgerId) { }; SortedSet scannedLedgers = new TreeSet(); - LedgerRangeIterator iterator = getLedgerManager().getLedgerRanges(); + LedgerRangeIterator iterator = getLedgerManager().getLedgerRanges(0); while (iterator.hasNext()) { LedgerRange ledgerRange = iterator.next(); scannedLedgers.addAll(ledgerRange.getLedgers()); @@ -432,7 +414,7 @@ public void clean(long ledgerId) { * * ScanAndCompareGarbageCollector/GC should clean data of ledger only if both the LedgerManager.getLedgerRanges says * that ledger is not existing and also ledgerManager.readLedgerMetadata fails with error - * NoSuchLedgerExistsException. + * NoSuchLedgerExistsOnMetadataServerException. * */ @Test @@ -448,7 +430,7 @@ public void testGcLedgersIfLedgerManagerIteratorFails() throws Exception { LedgerManager mockLedgerManager = new CleanupLedgerManager(getLedgerManager()) { @Override - public LedgerRangeIterator getLedgerRanges() { + public LedgerRangeIterator getLedgerRanges(long zkOpTimeout) { return new LedgerRangeIterator() { @Override public LedgerRange next() throws IOException { @@ -490,7 +472,7 @@ public void clean(long ledgerId) { * * ScanAndCompareGarbageCollector/GC should clean data of ledger only if both the LedgerManager.getLedgerRanges says * that ledger is not existing and also ledgerManager.readLedgerMetadata fails with error - * NoSuchLedgerExistsException. + * NoSuchLedgerExistsOnMetadataServerException. * */ @Test @@ -503,10 +485,12 @@ public void testGcLedgersIfReadLedgerMetadataSaysNoSuchLedger() throws Exception createLedgers(numLedgers, createdLedgers); + CompletableFuture> errorFuture = new CompletableFuture<>(); + errorFuture.completeExceptionally(new BKException.BKNoSuchLedgerExistsException()); LedgerManager mockLedgerManager = new CleanupLedgerManager(getLedgerManager()) { @Override - public void readLedgerMetadata(long ledgerId, GenericCallback readCb) { - readCb.operationComplete(BKException.Code.NoSuchLedgerExistsException, null); + public CompletableFuture> readLedgerMetadata(long ledgerId) { + return errorFuture; } }; @@ -532,7 +516,8 @@ public void clean(long ledgerId) { * * ScanAndCompareGarbageCollector/GC should clean data of ledger only if both the LedgerManager.getLedgerRanges says * that ledger is not existing and also ledgerManager.readLedgerMetadata fails with error - * NoSuchLedgerExistsException, but is shouldn't delete if the readLedgerMetadata fails with any other error. + * NoSuchLedgerExistsOnMetadataServerException, but is shouldn't delete if the readLedgerMetadata fails with any + * other error. */ @Test public void testGcLedgersIfReadLedgerMetadataFailsForDeletedLedgers() throws Exception { @@ -545,10 +530,12 @@ public void testGcLedgersIfReadLedgerMetadataFailsForDeletedLedgers() throws Exc createLedgers(numLedgers, createdLedgers); + CompletableFuture> errorFuture = new CompletableFuture<>(); + errorFuture.completeExceptionally(new BKException.ZKException()); LedgerManager mockLedgerManager = new CleanupLedgerManager(getLedgerManager()) { @Override - public void readLedgerMetadata(long ledgerId, GenericCallback readCb) { - readCb.operationComplete(BKException.Code.ZKException, null); + public CompletableFuture> readLedgerMetadata(long ledgerId) { + return errorFuture; } }; @@ -574,7 +561,7 @@ public void clean(long ledgerId) { public void validateLedgerRangeIterator(SortedSet createdLedgers) throws IOException { SortedSet scannedLedgers = new TreeSet(); - LedgerRangeIterator iterator = getLedgerManager().getLedgerRanges(); + LedgerRangeIterator iterator = getLedgerManager().getLedgerRanges(0); while (iterator.hasNext()) { LedgerRange ledgerRange = iterator.next(); scannedLedgers.addAll(ledgerRange.getLedgers()); @@ -591,12 +578,17 @@ public void initialize( LedgerManager ledgerManager, LedgerDirsManager ledgerDirsManager, LedgerDirsManager indexDirsManager, - StateManager stateManager, - CheckpointSource checkpointSource, - Checkpointer checkpointer, - StatsLogger statsLogger) throws IOException { + StatsLogger statsLogger, + ByteBufAllocator allocator) throws IOException { } + @Override + public void setStateManager(StateManager stateManager) {} + @Override + public void setCheckpointSource(CheckpointSource checkpointSource) {} + @Override + public void setCheckpointer(Checkpointer checkpointer) {} + @Override public void start() { } @@ -611,7 +603,7 @@ public long getLastAddConfirmed(long ledgerId) throws IOException { } @Override - public void setExplicitlac(long ledgerId, ByteBuf lac) throws IOException { + public void setExplicitLac(long ledgerId, ByteBuf lac) throws IOException { } @Override @@ -624,6 +616,11 @@ public boolean ledgerExists(long ledgerId) throws IOException { return false; } + @Override + public boolean entryExists(long ledgerId, long entryId) throws IOException { + return false; + } + @Override public boolean setFenced(long ledgerId) throws IOException { return false; @@ -675,11 +672,6 @@ public Iterable getActiveLedgersInRange(long firstLedgerId, long lastLedge return subBkActiveLedgers.keySet(); } - @Override - public EntryLogger getEntryLogger() { - return null; - } - @Override public void updateEntriesLocations(Iterable locations) throws IOException { } @@ -699,5 +691,98 @@ public boolean waitForLastAddConfirmedUpdate(long ledgerId, throws IOException { return false; } + + @Override + public void cancelWaitForLastAddConfirmedUpdate(long ledgerId, + Watcher watcher) + throws IOException { + } + + @Override + public OfLong getListOfEntriesOfLedger(long ledgerId) throws IOException { + return null; + } + + @Override + public void setLimboState(long ledgerId) throws IOException { + throw new UnsupportedOperationException( + "Limbo state only supported for DbLedgerStorage"); + } + + @Override + public boolean hasLimboState(long ledgerId) throws IOException { + throw new UnsupportedOperationException( + "Limbo state only supported for DbLedgerStorage"); + } + + @Override + public void clearLimboState(long ledgerId) throws IOException { + throw new UnsupportedOperationException( + "Limbo state only supported for DbLedgerStorage"); + } + + @Override + public EnumSet getStorageStateFlags() throws IOException { + return EnumSet.noneOf(StorageState.class); + } + + @Override + public void setStorageStateFlag(StorageState flag) throws IOException { + } + + @Override + public void clearStorageStateFlag(StorageState flag) throws IOException { + } + } + + /** + * Verifies that gc should cleaned up overreplicatd ledgers which is not + * owned by the bookie anymore. + * + * @throws Exception + */ + @Test + public void testGcLedgersForOverreplicated() throws Exception { + baseConf.setVerifyMetadataOnGc(true); + final SortedSet createdLedgers = Collections.synchronizedSortedSet(new TreeSet()); + final SortedSet cleaned = Collections.synchronizedSortedSet(new TreeSet()); + + // Create few ledgers + final int numLedgers = 5; + + BookieId bookieAddress = new BookieSocketAddress("192.0.0.1", 1234).toBookieId(); + createLedgers(numLedgers, createdLedgers, bookieAddress); + + LedgerManager mockLedgerManager = new CleanupLedgerManager(getLedgerManager()) { + @Override + public LedgerRangeIterator getLedgerRanges(long zkOpTimeout) { + return new LedgerRangeIterator() { + @Override + public LedgerRange next() throws IOException { + return null; + } + + @Override + public boolean hasNext() throws IOException { + return false; + } + }; + } + }; + + final GarbageCollector garbageCollector = new ScanAndCompareGarbageCollector(mockLedgerManager, + new MockLedgerStorage(), baseConf, NullStatsLogger.INSTANCE); + GarbageCollector.GarbageCleaner cleaner = new GarbageCollector.GarbageCleaner() { + @Override + public void clean(long ledgerId) { + LOG.info("Cleaned {}", ledgerId); + cleaned.add(ledgerId); + } + }; + + validateLedgerRangeIterator(createdLedgers); + + garbageCollector.gc(cleaner); + assertEquals("Should have cleaned all ledgers", cleaned.size(), numLedgers); } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/LedgerManagerIteratorTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/LedgerManagerIteratorTest.java index 1804b2a99f3..53e4c4fd5dc 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/LedgerManagerIteratorTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/LedgerManagerIteratorTest.java @@ -25,37 +25,37 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import com.google.common.collect.Lists; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.List; -import java.util.Optional; -import java.util.Queue; import java.util.Random; import java.util.Set; import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.ConcurrentSkipListSet; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; - import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.client.BookKeeper; -import org.apache.bookkeeper.client.LedgerMetadata; +import org.apache.bookkeeper.client.LedgerMetadataBuilder; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.meta.LedgerManager.LedgerRangeIterator; import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; -import org.apache.bookkeeper.util.MathUtils; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.util.ZkUtils; import org.apache.bookkeeper.versioning.Version; -import org.apache.mina.util.ConcurrentHashSet; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.ZooDefs; -import org.junit.After; import org.junit.Assert; import org.junit.Assume; import org.junit.Test; @@ -68,119 +68,36 @@ public LedgerManagerIteratorTest(Class lmFactory super(lmFactoryCls); } - final Queue exceptions = new ConcurrentLinkedQueue<>(); - - Runnable safeWrapper(Runnable r) { - return () -> { - try { - r.run(); - } catch (Throwable e) { - exceptions.add(e); - } - }; - } - - @After - public void throwAsyncErrors() throws Throwable { - while (exceptions.peek() != null) { - throw exceptions.remove(); - } - } - - class RCCheckCB implements GenericCallback { - private final String opType; - private final CountDownLatch latch; - private final Optional rcExpected; - private final long ledgerId; - - public RCCheckCB(String opType, CountDownLatch latch, Optional rcExpected, long ledgerId) { - this.opType = opType; - this.latch = latch; - this.rcExpected = rcExpected; - this.ledgerId = ledgerId; - } - - @Override - public void operationComplete(int rc, LedgerMetadata writtenMetadata) { - safeWrapper(() -> { - try { - rcExpected.map((Integer expected) -> { - assertEquals( - "Incorrect rc on ledger: " + ledgerId + ", op type: " + opType, - expected.longValue(), rc); - return null; - }); - } finally { - latch.countDown(); - } - }).run(); - } - } - - class VoidRCCheckCB implements GenericCallback { - private final String opType; - private final CountDownLatch latch; - private final Optional rcExpected; - private final long ledgerId; - - public VoidRCCheckCB(String opType, CountDownLatch latch, Optional rcExpected, long ledgerId) { - this.opType = opType; - this.latch = latch; - this.rcExpected = rcExpected; - this.ledgerId = ledgerId; - } - - @Override - public void operationComplete(int rc, Void result) { - safeWrapper(() -> { - try { - rcExpected.map((Integer expected) -> { - assertEquals( - "Incorrect rc on ledger: " + ledgerId + ", op type: " + opType, - expected.longValue(), rc); - return null; - }); - } finally { - latch.countDown(); - } - }).run(); - } - } - /** - * Remove ledger using lm syncronously. + * Remove ledger using lm synchronously. * * @param lm * @param ledgerId - * @param rcExpected return value expected, -1 to ignore * @throws InterruptedException */ - void removeLedger(LedgerManager lm, Long ledgerId, Optional rcExpected) throws Throwable { - CountDownLatch latch = new CountDownLatch(1); - lm.removeLedgerMetadata( - ledgerId, Version.ANY, new VoidRCCheckCB("removeLedger", latch, rcExpected, ledgerId)); - latch.await(); - throwAsyncErrors(); - + void removeLedger(LedgerManager lm, Long ledgerId) throws Exception { + lm.removeLedgerMetadata(ledgerId, Version.ANY).get(); } /** - * Create ledger using lm syncronously. + * Create ledger using lm synchronously. * * @param lm * @param ledgerId - * @param rcExpected return value expected, -1 to ignore * @throws InterruptedException */ - void createLedger(LedgerManager lm, Long ledgerId, Optional rcExpected) throws Throwable { - LedgerMetadata meta = new LedgerMetadata( - 3, 3, 2, - BookKeeper.DigestType.CRC32, "passwd".getBytes()); - CountDownLatch latch = new CountDownLatch(1); - lm.createLedgerMetadata( - ledgerId, meta, new RCCheckCB("createLedger", latch, rcExpected, ledgerId)); - latch.await(); - throwAsyncErrors(); + void createLedger(LedgerManager lm, Long ledgerId) throws Exception { + List ensemble = Lists.newArrayList(new BookieSocketAddress("192.0.2.1", 1234).toBookieId(), + new BookieSocketAddress("192.0.2.2", 1234).toBookieId(), + new BookieSocketAddress("192.0.2.3", 1234).toBookieId()); + LedgerMetadata meta = LedgerMetadataBuilder.create() + .withId(ledgerId) + .withEnsembleSize(3).withWriteQuorumSize(3).withAckQuorumSize(2) + .withPassword("passwd".getBytes()) + .withDigestType(BookKeeper.DigestType.CRC32.toApiDigestType()) + .newEnsembleEntry(0L, ensemble) + .build(); + lm.createLedgerMetadata(ledgerId, meta).get(); } static Set ledgerRangeToSet(LedgerRangeIterator lri) throws IOException { @@ -217,7 +134,7 @@ static Set getLedgerIdsByUsingAsyncProcessLedgers(LedgerManager lm) throws @Test public void testIterateNoLedgers() throws Exception { LedgerManager lm = getLedgerManager(); - LedgerRangeIterator lri = lm.getLedgerRanges(); + LedgerRangeIterator lri = lm.getLedgerRanges(0); assertNotNull(lri); if (lri.hasNext()) { lri.next(); @@ -231,9 +148,9 @@ public void testSingleLedger() throws Throwable { LedgerManager lm = getLedgerManager(); long id = 2020202; - createLedger(lm, id, Optional.of(BKException.Code.OK)); + createLedger(lm, id); - LedgerRangeIterator lri = lm.getLedgerRanges(); + LedgerRangeIterator lri = lm.getLedgerRanges(0); assertNotNull(lri); Set lids = ledgerRangeToSet(lri); assertEquals(lids.size(), 1); @@ -249,10 +166,10 @@ public void testTwoLedgers() throws Throwable { Set ids = new TreeSet<>(Arrays.asList(101010101L, 2020340302L)); for (Long id: ids) { - createLedger(lm, id, Optional.of(BKException.Code.OK)); + createLedger(lm, id); } - LedgerRangeIterator lri = lm.getLedgerRanges(); + LedgerRangeIterator lri = lm.getLedgerRanges(0); assertNotNull(lri); Set returnedIds = ledgerRangeToSet(lri); assertEquals(ids, returnedIds); @@ -267,11 +184,11 @@ public void testSeveralContiguousLedgers() throws Throwable { Set ids = new TreeSet<>(); for (long i = 0; i < 2000; ++i) { - createLedger(lm, i, Optional.of(BKException.Code.OK)); + createLedger(lm, i); ids.add(i); } - LedgerRangeIterator lri = lm.getLedgerRanges(); + LedgerRangeIterator lri = lm.getLedgerRanges(0); assertNotNull(lri); Set returnedIds = ledgerRangeToSet(lri); assertEquals(ids, returnedIds); @@ -311,18 +228,18 @@ public void testRemovalOfNodeJustTraversed() throws Throwable { ids.addAll(toRemove); ids.addAll(mustHave); for (Long id: ids) { - createLedger(lm, id, Optional.of(BKException.Code.OK)); + createLedger(lm, id); } Set found = new TreeSet<>(); - LedgerRangeIterator lri = lm.getLedgerRanges(); + LedgerRangeIterator lri = lm.getLedgerRanges(0); while (lri.hasNext()) { LedgerManager.LedgerRange lr = lri.next(); found.addAll(lr.getLedgers()); if (lr.getLedgers().contains(first)) { for (long id: toRemove) { - removeLedger(lm, id, Optional.of(BKException.Code.OK)); + removeLedger(lm, id); } toRemove.clear(); } @@ -348,10 +265,10 @@ public void validateEmptyL4PathSkipped() throws Throwable { 6334994393848474732L, 7349370101927398483L)); for (Long id: ids) { - createLedger(lm, id, Optional.of(BKException.Code.OK)); + createLedger(lm, id); } - String paths[] = { + String[] paths = { "/ledgers/633/4994/3938/4948", // Empty L4 path, must be skipped }; @@ -362,7 +279,7 @@ public void validateEmptyL4PathSkipped() throws Throwable { path, "data".getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); } - LedgerRangeIterator lri = lm.getLedgerRanges(); + LedgerRangeIterator lri = lm.getLedgerRanges(0); assertNotNull(lri); Set returnedIds = ledgerRangeToSet(lri); assertEquals(ids, returnedIds); @@ -370,7 +287,7 @@ public void validateEmptyL4PathSkipped() throws Throwable { Set ledgersReadAsync = getLedgerIdsByUsingAsyncProcessLedgers(lm); assertEquals("Comparing LedgersIds read asynchronously", ids, ledgersReadAsync); - lri = lm.getLedgerRanges(); + lri = lm.getLedgerRanges(0); int emptyRanges = 0; while (lri.hasNext()) { if (lri.next().getLedgers().isEmpty()) { @@ -395,10 +312,10 @@ public void testWithSeveralIncompletePaths() throws Throwable { 6334994393848474732L, 7349370101927398483L)); for (Long id: ids) { - createLedger(lm, id, Optional.of(BKException.Code.OK)); + createLedger(lm, id); } - String paths[] = { + String[] paths = { "/ledgers/000/0000/0000", // top level, W-4292762 "/ledgers/234/5678/9999", // shares two path segments with the first one, comes after "/ledgers/339/0000/0000", // shares one path segment with the second one, comes first @@ -412,7 +329,7 @@ public void testWithSeveralIncompletePaths() throws Throwable { path, "data".getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); } - LedgerRangeIterator lri = lm.getLedgerRanges(); + LedgerRangeIterator lri = lm.getLedgerRanges(0); assertNotNull(lri); Set returnedIds = ledgerRangeToSet(lri); assertEquals(ids, returnedIds); @@ -438,57 +355,46 @@ public void checkConcurrentModifications() throws Throwable { if (!longRange) { lid %= 1000000; } - createLedger(lm, lid, Optional.of(BKException.Code.OK)); + createLedger(lm, lid); mustExist.add(lid); } final long start = MathUtils.nowInNano(); final CountDownLatch latch = new CountDownLatch(1); - ArrayList threads = new ArrayList<>(); - final ConcurrentHashSet createdLedgers = new ConcurrentHashSet<>(); + ArrayList> futures = new ArrayList<>(); + ExecutorService executor = Executors.newCachedThreadPool(); + final ConcurrentSkipListSet createdLedgers = new ConcurrentSkipListSet<>(); for (int i = 0; i < numWriters; ++i) { - Thread thread = new Thread(safeWrapper(() -> { - LedgerManager writerLM = getIndependentLedgerManager(); - Random writerRNG = new Random(rng.nextLong()); - try { + Future f = executor.submit(() -> { + LedgerManager writerLM = getIndependentLedgerManager(); + Random writerRNG = new Random(rng.nextLong()); + latch.await(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - fail("Checker interrupted"); - } - while (MathUtils.elapsedNanos(start) < runtime) { - long candidate = 0; - do { - candidate = Math.abs(writerRNG.nextLong()); - if (!longRange) { - candidate %= 1000000; - } - } while (mustExist.contains(candidate) || !createdLedgers.add(candidate)); - try { - createLedger(writerLM, candidate, Optional.empty()); - removeLedger(writerLM, candidate, Optional.empty()); - } catch (Throwable e) { - fail("Got exception thrashing store: " + e.toString()); + + while (MathUtils.elapsedNanos(start) < runtime) { + long candidate = 0; + do { + candidate = Math.abs(writerRNG.nextLong()); + if (!longRange) { + candidate %= 1000000; + } + } while (mustExist.contains(candidate) || !createdLedgers.add(candidate)); + + createLedger(writerLM, candidate); + removeLedger(writerLM, candidate); } - } - })); - thread.start(); - threads.add(thread); + return null; + }); + futures.add(f); } for (int i = 0; i < numCheckers; ++i) { - Thread thread = new Thread(safeWrapper(() -> { - LedgerManager checkerLM = getIndependentLedgerManager(); - try { + Future f = executor.submit(() -> { + LedgerManager checkerLM = getIndependentLedgerManager(); latch.await(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - fail("Checker interrupted"); - e.printStackTrace(); - } - while (MathUtils.elapsedNanos(start) < runtime) { - try { - LedgerRangeIterator lri = checkerLM.getLedgerRanges(); + + while (MathUtils.elapsedNanos(start) < runtime) { + LedgerRangeIterator lri = checkerLM.getLedgerRanges(0); Set returnedIds = ledgerRangeToSet(lri); for (long id: mustExist) { assertTrue(returnedIds.contains(id)); @@ -498,20 +404,17 @@ public void checkConcurrentModifications() throws Throwable { for (long id: mustExist) { assertTrue(ledgersReadAsync.contains(id)); } - } catch (IOException | InterruptedException e) { - e.printStackTrace(); - fail("Got exception scanning ledgers: " + e.toString()); } - } - })); - thread.start(); - threads.add(thread); + return null; + }); + futures.add(f); } latch.countDown(); - for (Thread thread: threads) { - thread.join(); + for (Future f : futures) { + f.get(); } + executor.shutdownNow(); } @SuppressWarnings("deprecation") @@ -559,7 +462,7 @@ public void testLedgerManagerFormat() throws Throwable { ids.add(1234567891234L); } for (Long id : ids) { - createLedger(lm, id, Optional.of(BKException.Code.OK)); + createLedger(lm, id); } // create some invalid nodes under zkLedgersRootPath @@ -602,11 +505,11 @@ public void testLedgerManagerFormat() throws Throwable { public void hierarchicalLedgerManagerAsyncProcessLedgersTest() throws Throwable { Assume.assumeTrue(baseConf.getLedgerManagerFactoryClass().equals(HierarchicalLedgerManagerFactory.class)); LedgerManager lm = getLedgerManager(); - LedgerRangeIterator lri = lm.getLedgerRanges(); + LedgerRangeIterator lri = lm.getLedgerRanges(0); Set ledgerIds = new TreeSet<>(Arrays.asList(1234L, 123456789123456789L)); for (Long ledgerId : ledgerIds) { - createLedger(lm, ledgerId, Optional.of(BKException.Code.OK)); + createLedger(lm, ledgerId); } Set ledgersReadThroughIterator = ledgerRangeToSet(lri); assertEquals("Comparing LedgersIds read through Iterator", ledgerIds, ledgersReadThroughIterator); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/LedgerManagerTestCase.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/LedgerManagerTestCase.java index 3aa2b8ac562..c81c3f7586d 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/LedgerManagerTestCase.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/LedgerManagerTestCase.java @@ -22,20 +22,22 @@ package org.apache.bookkeeper.meta; import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; import java.io.IOException; import java.net.URI; import java.util.Arrays; import java.util.Collection; +import java.util.EnumSet; import java.util.Map; import java.util.NavigableMap; import java.util.Optional; +import java.util.PrimitiveIterator.OfLong; import org.apache.bookkeeper.bookie.BookieException; import org.apache.bookkeeper.bookie.CheckpointSource; import org.apache.bookkeeper.bookie.CheckpointSource.Checkpoint; import org.apache.bookkeeper.bookie.Checkpointer; import org.apache.bookkeeper.bookie.CompactableLedgerStorage; import org.apache.bookkeeper.bookie.EntryLocation; -import org.apache.bookkeeper.bookie.EntryLogger; import org.apache.bookkeeper.bookie.LastAddConfirmedUpdateNotification; import org.apache.bookkeeper.bookie.LedgerDirsManager; import org.apache.bookkeeper.bookie.StateManager; @@ -53,7 +55,7 @@ import org.junit.runners.Parameterized.Parameters; /** - * Test case to run over serveral ledger managers. + * Test case to run over several ledger managers. */ @RunWith(Parameterized.class) public abstract class LedgerManagerTestCase extends BookKeeperClusterTestCase { @@ -170,12 +172,17 @@ public void initialize( LedgerManager ledgerManager, LedgerDirsManager ledgerDirsManager, LedgerDirsManager indexDirsManager, - StateManager stateManager, - CheckpointSource checkpointSource, - Checkpointer checkpointer, - StatsLogger statsLogger) throws IOException { + StatsLogger statsLogger, + ByteBufAllocator allocator) throws IOException { } + @Override + public void setStateManager(StateManager stateManager) {} + @Override + public void setCheckpointSource(CheckpointSource checkpointSource) {} + @Override + public void setCheckpointer(Checkpointer checkpointer) {} + @Override public void start() { } @@ -189,6 +196,11 @@ public boolean ledgerExists(long ledgerId) throws IOException { return false; } + @Override + public boolean entryExists(long ledgerId, long entryId) throws IOException { + return false; + } + @Override public boolean setFenced(long ledgerId) throws IOException { return false; @@ -249,11 +261,6 @@ public Iterable getActiveLedgersInRange(long firstLedgerId, long lastLedge return subBkActiveLedgers.keySet(); } - @Override - public EntryLogger getEntryLogger() { - return null; - } - @Override public void updateEntriesLocations(Iterable locations) throws IOException { } @@ -271,12 +278,54 @@ public boolean waitForLastAddConfirmedUpdate(long ledgerId, } @Override - public void setExplicitlac(long ledgerId, ByteBuf lac) throws IOException { + public void cancelWaitForLastAddConfirmedUpdate(long ledgerId, + Watcher watcher) + throws IOException { + } + + @Override + public void setExplicitLac(long ledgerId, ByteBuf lac) throws IOException { } @Override public ByteBuf getExplicitLac(long ledgerId) { return null; } + + @Override + public OfLong getListOfEntriesOfLedger(long ledgerId) { + return null; + } + + @Override + public void setLimboState(long ledgerId) throws IOException { + throw new UnsupportedOperationException( + "Limbo state only supported for DbLedgerStorage"); + } + + @Override + public boolean hasLimboState(long ledgerId) throws IOException { + throw new UnsupportedOperationException( + "Limbo state only supported for DbLedgerStorage"); + } + + @Override + public void clearLimboState(long ledgerId) throws IOException { + throw new UnsupportedOperationException( + "Limbo state only supported for DbLedgerStorage"); + } + + @Override + public EnumSet getStorageStateFlags() throws IOException { + return EnumSet.noneOf(StorageState.class); + } + + @Override + public void setStorageStateFlag(StorageState flag) throws IOException { + } + + @Override + public void clearStorageStateFlag(StorageState flag) throws IOException { + } } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/LedgerMetadataCreationTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/LedgerMetadataCreationTest.java index d4572dec36d..2bba38a9459 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/LedgerMetadataCreationTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/LedgerMetadataCreationTest.java @@ -31,7 +31,6 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; - import org.apache.bookkeeper.client.BookKeeper; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/MetadataDriversTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/MetadataDriversTest.java index 2d69cd2bec1..90f956a8c86 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/MetadataDriversTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/MetadataDriversTest.java @@ -36,7 +36,6 @@ import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.discover.RegistrationClient; import org.apache.bookkeeper.discover.RegistrationManager; -import org.apache.bookkeeper.discover.RegistrationManager.RegistrationListener; import org.apache.bookkeeper.meta.MetadataDrivers.MetadataBookieDriverInfo; import org.apache.bookkeeper.meta.MetadataDrivers.MetadataClientDriverInfo; import org.apache.bookkeeper.meta.exceptions.MetadataException; @@ -79,6 +78,10 @@ public LayoutManager getLayoutManager() { @Override public void close() { } + + @Override + public void setSessionStateListener(SessionStateListener sessionStateListener) { + } } static class ClientDriver1 extends TestClientDriver { @@ -88,6 +91,7 @@ public String getScheme() { return "driver1"; } + } static class ClientDriver2 extends TestClientDriver { @@ -102,13 +106,12 @@ public String getScheme() { abstract static class TestBookieDriver implements MetadataBookieDriver { @Override public MetadataBookieDriver initialize(ServerConfiguration conf, - RegistrationListener listener, StatsLogger statsLogger) throws MetadataException { return this; } @Override - public RegistrationManager getRegistrationManager() { + public RegistrationManager createRegistrationManager() { return mock(RegistrationManager.class); } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/MockLedgerManager.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/MockLedgerManager.java index 4586e09842e..3704e39e33f 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/MockLedgerManager.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/MockLedgerManager.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -19,29 +19,28 @@ */ package org.apache.bookkeeper.meta; -import com.google.common.base.Optional; - +import com.google.common.collect.Lists; import java.util.ArrayList; -import java.util.HashMap; +import java.util.Comparator; +import java.util.HashSet; import java.util.List; import java.util.Map; - +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.function.Consumer; - import org.apache.bookkeeper.client.BKException; -import org.apache.bookkeeper.client.LedgerMetadata; - -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.common.concurrent.FutureUtils; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.LedgerMetadataListener; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.Processor; - import org.apache.bookkeeper.versioning.LongVersion; import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; import org.apache.commons.lang3.tuple.Pair; import org.apache.zookeeper.AsyncCallback; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -51,15 +50,21 @@ public class MockLedgerManager implements LedgerManager { static final Logger LOG = LoggerFactory.getLogger(MockLedgerManager.class); - boolean stallingWrites = false; - final List> stalledWrites = new ArrayList<>(); + /** + * Hook for injecting errors or delays. + */ + public interface Hook { + CompletableFuture runHook(long ledgerId, LedgerMetadata metadata); + } final Map> metadataMap; final ExecutorService executor; final boolean ownsExecutor; + final LedgerMetadataSerDe serDe; + private Hook preWriteHook = (ledgerId, metadata) -> FutureUtils.value(null); public MockLedgerManager() { - this(new HashMap<>(), + this(new ConcurrentHashMap<>(), Executors.newSingleThreadExecutor((r) -> new Thread(r, "MockLedgerManager")), true); } @@ -68,38 +73,24 @@ private MockLedgerManager(Map> metadataMap, this.metadataMap = metadataMap; this.executor = executor; this.ownsExecutor = ownsExecutor; + this.serDe = new LedgerMetadataSerDe(); } public MockLedgerManager newClient() { return new MockLedgerManager(metadataMap, executor, false); } - private LedgerMetadata readMetadata(long ledgerId) throws Exception { + private Versioned readMetadata(long ledgerId) throws Exception { Pair pair = metadataMap.get(ledgerId); if (pair == null) { return null; } else { - return LedgerMetadata.parseConfig(pair.getRight(), pair.getLeft(), Optional.absent()); + return new Versioned<>(serDe.parseConfig(pair.getRight(), ledgerId, Optional.empty()), pair.getLeft()); } } - public void stallWrites() throws Exception { - synchronized (this) { - stallingWrites = true; - } - } - - public void releaseStalledWrites(int rc) { - List> toRelease; - synchronized (this) { - stallingWrites = false; - toRelease = new ArrayList<>(stalledWrites); - stalledWrites.clear(); - } - - executor.execute(() -> { - toRelease.forEach(w -> w.accept(rc)); - }); + public void setPreWriteHook(Hook hook) { + this.preWriteHook = hook; } public void executeCallback(Runnable r) { @@ -107,82 +98,84 @@ public void executeCallback(Runnable r) { } @Override - public void createLedgerMetadata(long ledgerId, LedgerMetadata metadata, GenericCallback cb) { + public CompletableFuture> createLedgerMetadata(long ledgerId, LedgerMetadata metadata) { + CompletableFuture> promise = new CompletableFuture<>(); executor.submit(() -> { if (metadataMap.containsKey(ledgerId)) { - executeCallback(() -> cb.operationComplete(BKException.Code.LedgerExistException, null)); + executeCallback(() -> promise.completeExceptionally(new BKException.BKLedgerExistException())); } else { - metadataMap.put(ledgerId, Pair.of(new LongVersion(0L), metadata.serialize())); try { - LedgerMetadata readBack = readMetadata(ledgerId); - executeCallback(() -> cb.operationComplete(BKException.Code.OK, readBack)); + metadataMap.put(ledgerId, Pair.of(new LongVersion(0L), serDe.serialize(metadata))); + Versioned readBack = readMetadata(ledgerId); + executeCallback(() -> promise.complete(readBack)); } catch (Exception e) { LOG.error("Error reading back written metadata", e); - executeCallback(() -> cb.operationComplete(BKException.Code.MetaStoreException, null)); + executeCallback(() -> promise.completeExceptionally(new BKException.MetaStoreException())); } } }); + return promise; } @Override - public void removeLedgerMetadata(long ledgerId, Version version, GenericCallback cb) {} + public CompletableFuture removeLedgerMetadata(long ledgerId, Version version) { + return CompletableFuture.completedFuture(null); + } @Override - public void readLedgerMetadata(long ledgerId, GenericCallback cb) { + public CompletableFuture> readLedgerMetadata(long ledgerId) { + CompletableFuture> promise = new CompletableFuture<>(); executor.submit(() -> { try { - LedgerMetadata metadata = readMetadata(ledgerId); + Versioned metadata = readMetadata(ledgerId); if (metadata == null) { - executeCallback( - () -> cb.operationComplete(BKException.Code.NoSuchLedgerExistsException, null)); + executeCallback(() -> promise.completeExceptionally( + new BKException.BKNoSuchLedgerExistsOnMetadataServerException())); } else { - executeCallback(() -> cb.operationComplete(BKException.Code.OK, metadata)); + executeCallback(() -> promise.complete(metadata)); } } catch (Exception e) { LOG.error("Error reading metadata", e); - executeCallback(() -> cb.operationComplete(BKException.Code.MetaStoreException, null)); + executeCallback(() -> promise.completeExceptionally(new BKException.MetaStoreException())); } }); + return promise; } @Override - public void writeLedgerMetadata(long ledgerId, LedgerMetadata metadata, GenericCallback cb) { - Runnable write = () -> { - try { - LedgerMetadata oldMetadata = readMetadata(ledgerId); - if (oldMetadata == null) { - executeCallback(() -> cb.operationComplete(BKException.Code.NoSuchLedgerExistsException, null)); - } else if (!oldMetadata.getVersion().equals(metadata.getVersion())) { - executeCallback(() -> cb.operationComplete(BKException.Code.MetadataVersionException, null)); - } else { - LongVersion oldVersion = (LongVersion) oldMetadata.getVersion(); - metadataMap.put(ledgerId, Pair.of(new LongVersion(oldVersion.getLongVersion() + 1), - metadata.serialize())); - LedgerMetadata readBack = readMetadata(ledgerId); - executeCallback(() -> cb.operationComplete(BKException.Code.OK, readBack)); - } - } catch (Exception e) { - LOG.error("Error writing metadata", e); - executeCallback(() -> cb.operationComplete(BKException.Code.MetaStoreException, null)); - } - }; - - synchronized (this) { - if (stallingWrites) { - LOG.info("[L{}, stallId={}] Stalling write of metadata", ledgerId, System.identityHashCode(write)); - stalledWrites.add((rc) -> { - LOG.info("[L{}, stallid={}] Unstalled write", ledgerId, System.identityHashCode(write)); - - if (rc == BKException.Code.OK) { - write.run(); + public CompletableFuture> writeLedgerMetadata(long ledgerId, LedgerMetadata metadata, + Version currentVersion) { + CompletableFuture> promise = new CompletableFuture<>(); + preWriteHook.runHook(ledgerId, metadata) + .thenComposeAsync((ignore) -> { + try { + Versioned oldMetadata = readMetadata(ledgerId); + if (oldMetadata == null) { + return FutureUtils.exception( + new BKException.BKNoSuchLedgerExistsOnMetadataServerException()); + } else if (!oldMetadata.getVersion().equals(currentVersion)) { + return FutureUtils.exception(new BKException.BKMetadataVersionException()); } else { - executeCallback(() -> cb.operationComplete(rc, null)); + LongVersion oldVersion = (LongVersion) oldMetadata.getVersion(); + metadataMap.put(ledgerId, Pair.of(new LongVersion(oldVersion.getLongVersion() + 1), + serDe.serialize(metadata))); + Versioned readBack = readMetadata(ledgerId); + return FutureUtils.value(readBack); } - }); - } else { - executor.execute(write); - } - } + } catch (Exception e) { + LOG.error("Error writing metadata", e); + return FutureUtils.exception(e); + } + }, executor) + .whenComplete((res, ex) -> { + if (ex != null) { + Throwable cause = (ex instanceof CompletionException) ? ex.getCause() : ex; + executeCallback(() -> promise.completeExceptionally(cause)); + } else { + executeCallback(() -> promise.complete(res)); + } + }); + return promise; } @Override @@ -197,8 +190,26 @@ public void asyncProcessLedgers(Processor processor, AsyncCallback.VoidCal } @Override - public LedgerRangeIterator getLedgerRanges() { - return null; + public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) { + List ledgerIds = new ArrayList<>(metadataMap.keySet()); + ledgerIds.sort(Comparator.naturalOrder()); + List> partitions = Lists.partition(ledgerIds, 100); + return new LedgerRangeIterator() { + int i = 0; + @Override + public boolean hasNext() { + if (i >= partitions.size()) { + return false; + } else { + return true; + } + } + + @Override + public LedgerRange next() { + return new LedgerRange(new HashSet<>(partitions.get(i++))); + } + }; } @Override diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/TestLedgerManager.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/TestLedgerManager.java index 499a6e9521b..a91485a1d32 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/TestLedgerManager.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/TestLedgerManager.java @@ -183,7 +183,7 @@ public void testBadZkContents() throws Exception { String root0 = "/badzk0"; zkc.create(root0, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); - conf.setMetadataServiceUri(newMetadataServiceUri(root0)); + conf.setMetadataServiceUri(newMetadataServiceUri(root0, HierarchicalLedgerManagerFactory.NAME)); LedgerLayout layout = new LedgerLayout("DoesNotExist", 0xdeadbeef); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/TestLedgerMetadataSerDe.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/TestLedgerMetadataSerDe.java new file mode 100644 index 00000000000..be2a0f34a8c --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/TestLedgerMetadataSerDe.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.meta; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import com.google.common.collect.Lists; +import java.io.IOException; +import java.util.Base64; +import java.util.Optional; +import java.util.Random; +import org.apache.bookkeeper.client.LedgerMetadataBuilder; +import org.apache.bookkeeper.client.api.DigestType; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.net.BookieSocketAddress; +import org.junit.Assert; +import org.junit.Test; + +/** + * Test Ledger Metadata serialization and deserialization. + */ +public class TestLedgerMetadataSerDe { + // as used in 4.0.x & 4.1.x + private static final String version1 = + "Qm9va2llTWV0YWRhdGFGb3JtYXRWZXJzaW9uCTEKMgozCjAKMAkxOTIuMC4yLjE6MTIzNAkxOTIu" + + "MC4yLjI6MTIzNAkxOTIuMC4yLjM6MTIzNAotMTAyCUNMT1NFRA=="; + + // as used in 4.2.x & 4.3.x (text protobuf based metadata, password and digest introduced) + private static final String version2 = + "Qm9va2llTWV0YWRhdGFGb3JtYXRWZXJzaW9uCTIKcXVvcnVtU2l6ZTogMgplbnNlbWJsZVNpemU6I" + + "DMKbGVuZ3RoOiAwCmxhc3RFbnRyeUlkOiAtMQpzdGF0ZTogSU5fUkVDT1ZFUlkKc2VnbWVudCB7" + + "CiAgZW5zZW1ibGVNZW1iZXI6ICIxOTIuMC4yLjE6MTIzNCIKICBlbnNlbWJsZU1lbWJlcjogIjE" + + "5Mi4wLjIuMjoxMjM0IgogIGVuc2VtYmxlTWVtYmVyOiAiMTkyLjAuMi4zOjEyMzQiCiAgZmlyc3" + + "RFbnRyeUlkOiAwCn0KZGlnZXN0VHlwZTogQ1JDMzIKcGFzc3dvcmQ6ICJwYXNzd2QiCmFja1F1b" + + "3J1bVNpemU6IDIK"; + + // version 2 + ctime, as used in 4.4.x to 4.8.x (ctime is optional from 4.6.x onwards) + private static final String version2ctime = + "Qm9va2llTWV0YWRhdGFGb3JtYXRWZXJzaW9uCTIKcXVvcnVtU2l6ZTogMgplbnNlbWJsZVNpemU6I" + + "DMKbGVuZ3RoOiAwCmxhc3RFbnRyeUlkOiAtMQpzdGF0ZTogSU5fUkVDT1ZFUlkKc2VnbWVudCB7" + + "CiAgZW5zZW1ibGVNZW1iZXI6ICIxOTIuMC4yLjE6MTIzNCIKICBlbnNlbWJsZU1lbWJlcjogIjE" + + "5Mi4wLjIuMjoxMjM0IgogIGVuc2VtYmxlTWVtYmVyOiAiMTkyLjAuMi4zOjEyMzQiCiAgZmlyc3" + + "RFbnRyeUlkOiAwCn0KZGlnZXN0VHlwZTogQ1JDMzIKcGFzc3dvcmQ6ICJwYXNzd2QiCmFja1F1b" + + "3J1bVNpemU6IDIKY3RpbWU6IDE1NDQwMDIzODMwNzUK"; + + // version 3, since 4.9.x, protobuf binary format + private static final String version3 = + "Qm9va2llTWV0YWRhdGFGb3JtYXRWZXJzaW9uCTMKYAgCEAMYACD///////////8BKAEyMgoOMTkyL" + + "jAuMi4xOjMxODEKDjE5Mi4wLjIuMjozMTgxCg4xOTIuMC4yLjM6MzE4MRAAOANCBmZvb2JhckgB" + + "UP///////////wFgAA=="; + + private static void testDecodeEncode(String encoded) throws Exception { + LedgerMetadataSerDe serDe = new LedgerMetadataSerDe(); + LedgerMetadata md = serDe.parseConfig(Base64.getDecoder().decode(encoded), 59L, Optional.empty()); + String reserialized = Base64.getEncoder().encodeToString(serDe.serialize(md)); + + Assert.assertEquals(encoded, reserialized); + } + + @Test + public void testVersion1SerDe() throws Exception { + testDecodeEncode(version1); + } + + @Test + public void testVersion2SerDe() throws Exception { + testDecodeEncode(version2); + } + + @Test + public void testVersion2CtimeSerDe() throws Exception { + testDecodeEncode(version2ctime); + } + + @Test + public void testVersion3SerDe() throws Exception { + testDecodeEncode(version3); + } + + @Test(expected = IOException.class) + public void testJunkSerDe() throws Exception { + LedgerMetadataSerDe serDe = new LedgerMetadataSerDe(); + String junk = ""; + serDe.parseConfig(junk.getBytes(UTF_8), 59L, Optional.empty()); + } + + @Test(expected = IOException.class) + public void testJunk2SerDe() throws Exception { + byte[] randomBytes = new byte[1000]; + new Random().nextBytes(randomBytes); + LedgerMetadataSerDe serDe = new LedgerMetadataSerDe(); + serDe.parseConfig(randomBytes, 59L, Optional.empty()); + } + + @Test(expected = IOException.class) + public void testJunkVersionSerDe() throws Exception { + byte[] junkVersion = "BookieMetadataFormatVersion\tfoobar\nblahblah".getBytes(UTF_8); + LedgerMetadataSerDe serDe = new LedgerMetadataSerDe(); + serDe.parseConfig(junkVersion, 59L, Optional.empty()); + } + + @Test(expected = IOException.class) + public void testVeryLongVersionSerDe() throws Exception { + byte[] veryLongVersion = "BookieMetadataFormatVersion\t123456789123456789\nblahblah".getBytes(UTF_8); + LedgerMetadataSerDe serDe = new LedgerMetadataSerDe(); + serDe.parseConfig(veryLongVersion, 59L, Optional.empty()); + } + + @Test + public void testPeggedToV3SerDe() throws Exception { + LedgerMetadataSerDe serDe = new LedgerMetadataSerDe(); + LedgerMetadata metadata = LedgerMetadataBuilder.create().withId(13L) + .withEnsembleSize(3).withWriteQuorumSize(2).withAckQuorumSize(1) + .withPassword("foobar".getBytes(UTF_8)).withDigestType(DigestType.CRC32C) + .newEnsembleEntry(0L, Lists.newArrayList(new BookieSocketAddress("192.0.2.1", 3181).toBookieId(), + new BookieSocketAddress("192.0.2.2", 3181).toBookieId(), + new BookieSocketAddress("192.0.2.3", 3181).toBookieId())) + .build(); + byte[] encoded = serDe.serialize(metadata); + + LedgerMetadata decoded = serDe.parseConfig(encoded, 59L, Optional.empty()); + Assert.assertEquals(LedgerMetadataSerDe.METADATA_FORMAT_VERSION_3, decoded.getMetadataFormatVersion()); + } + + @Test + public void testStoreSystemtimeAsLedgerCtimeEnabledWithNewerVersion() + throws Exception { + LedgerMetadata lm = LedgerMetadataBuilder.create().withId(13L) + .withEnsembleSize(3).withWriteQuorumSize(2).withAckQuorumSize(1) + .withPassword("foobar".getBytes(UTF_8)).withDigestType(DigestType.CRC32C) + .newEnsembleEntry(0L, Lists.newArrayList(new BookieSocketAddress("192.0.2.1", 1234).toBookieId(), + new BookieSocketAddress("192.0.2.2", 1234).toBookieId(), + new BookieSocketAddress("192.0.2.3", 1234).toBookieId())) + .withCreationTime(123456L) + .storingCreationTime(true) + .build(); + LedgerMetadataSerDe serDe = new LedgerMetadataSerDe(); + byte[] serialized = serDe.serialize(lm); + LedgerMetadata deserialized = serDe.parseConfig(serialized, 59L, Optional.of(654321L)); + Assert.assertEquals(deserialized.getCtime(), 123456L); + + // give it another round + LedgerMetadata deserialized2 = serDe.parseConfig(serDe.serialize(deserialized), 59L, Optional.of(98765L)); + Assert.assertEquals(deserialized2.getCtime(), 123456L); + } + + @Test + public void testStoreSystemtimeAsLedgerCtimeDisabledWithNewerVersion() + throws Exception { + LedgerMetadata lm = LedgerMetadataBuilder.create().withId(13L) + .withEnsembleSize(3).withWriteQuorumSize(2).withAckQuorumSize(1) + .withPassword("foobar".getBytes(UTF_8)).withDigestType(DigestType.CRC32C) + .newEnsembleEntry(0L, Lists.newArrayList(new BookieSocketAddress("192.0.2.1", 1234).toBookieId(), + new BookieSocketAddress("192.0.2.2", 1234).toBookieId(), + new BookieSocketAddress("192.0.2.3", 1234).toBookieId())) + .build(); + LedgerMetadataSerDe serDe = new LedgerMetadataSerDe(); + byte[] serialized = serDe.serialize(lm); + LedgerMetadata deserialized = serDe.parseConfig(serialized, 59L, Optional.of(654321L)); + Assert.assertEquals(654321L, deserialized.getCtime()); + + // give it another round + LedgerMetadata deserialized2 = serDe.parseConfig(serDe.serialize(deserialized), 59L, Optional.of(98765L)); + Assert.assertEquals(98765L, deserialized2.getCtime()); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/TestLongZkLedgerIdGenerator.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/TestLongZkLedgerIdGenerator.java index bc6ac03db41..b409cc6b2fa 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/TestLongZkLedgerIdGenerator.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/TestLongZkLedgerIdGenerator.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -23,9 +23,7 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; - import junit.framework.TestCase; - import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.test.ZooKeeperUtil; import org.apache.bookkeeper.util.ZkUtils; @@ -58,7 +56,7 @@ public void setUp() throws Exception { super.setUp(); zkutil = new ZooKeeperUtil(); - zkutil.startServer(); + zkutil.startCluster(); zk = zkutil.getZooKeeperClient(); ZkLedgerIdGenerator shortLedgerIdGenerator = new ZkLedgerIdGenerator(zk, @@ -73,7 +71,7 @@ public void tearDown() throws Exception { LOG.info("Tearing down test"); ledgerIdGenerator.close(); zk.close(); - zkutil.killServer(); + zkutil.killCluster(); super.tearDown(); } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/TestZkLedgerIdGenerator.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/TestZkLedgerIdGenerator.java index 3779af40bfe..44c739334aa 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/TestZkLedgerIdGenerator.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/TestZkLedgerIdGenerator.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -23,9 +23,7 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; - import junit.framework.TestCase; - import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.test.ZooKeeperUtil; import org.apache.zookeeper.KeeperException.Code; @@ -55,7 +53,7 @@ public void setUp() throws Exception { super.setUp(); zkutil = new ZooKeeperUtil(); - zkutil.startServer(); + zkutil.startCluster(); zk = zkutil.getZooKeeperClient(); ledgerIdGenerator = new ZkLedgerIdGenerator(zk, @@ -68,7 +66,7 @@ public void tearDown() throws Exception { LOG.info("Tearing down test"); ledgerIdGenerator.close(); zk.close(); - zkutil.killServer(); + zkutil.killCluster(); super.tearDown(); } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/ZkLedgerLayoutTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/ZkLedgerLayoutTest.java index c1b27793245..cfe5a686ebd 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/ZkLedgerLayoutTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/ZkLedgerLayoutTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/ZkLedgerUnderreplicationManagerTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/ZkLedgerUnderreplicationManagerTest.java new file mode 100644 index 00000000000..11f5ed8c54f --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/ZkLedgerUnderreplicationManagerTest.java @@ -0,0 +1,244 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.meta; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import lombok.Cleanup; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.proto.DataFormats.UnderreplicatedLedgerFormat; +import org.apache.bookkeeper.replication.ReplicationException; +import org.apache.bookkeeper.suites.BookKeeperClusterTestSuite; +import org.apache.bookkeeper.util.ZkUtils; +import org.apache.bookkeeper.zookeeper.BoundExponentialBackoffRetryPolicy; +import org.apache.bookkeeper.zookeeper.ZooKeeperClient; +import org.apache.commons.lang3.StringUtils; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.KeeperException.NoNodeException; +import org.apache.zookeeper.ZooKeeper; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Unit test {@link ZkLedgerUnderreplicationManager}. + */ +public class ZkLedgerUnderreplicationManagerTest extends BookKeeperClusterTestSuite { + + @BeforeClass + public static void setUpCluster() throws Exception { + BookKeeperClusterTestSuite.setUpCluster(0); + } + + @AfterClass + public static void tearDownCluster() throws Exception { + BookKeeperClusterTestSuite.tearDownCluster(); + } + + private static String getZooKeeperConnectString() throws Exception { + String[] serviceHosts = metadataStore.getServiceUri().getServiceHosts(); + return StringUtils.join(serviceHosts, ','); + } + + private static ZooKeeper createZooKeeper() throws Exception { + return ZooKeeperClient.newBuilder() + .connectString(getZooKeeperConnectString()) + .connectRetryPolicy( + new BoundExponentialBackoffRetryPolicy(1, 10, 100)) + .operationRetryPolicy( + new BoundExponentialBackoffRetryPolicy(1, 10, 100)) + .sessionTimeoutMs(60000) + .build(); + } + + private ZooKeeper zk; + private ZkLedgerUnderreplicationManager urMgr; + + @Before + public void setUp() throws Exception { + this.zk = createZooKeeper(); + ServerConfiguration conf = new ServerConfiguration(baseServerConf); + conf.setStoreSystemTimeAsLedgerUnderreplicatedMarkTime(true); + this.urMgr = new ZkLedgerUnderreplicationManager(conf, zk); + } + + @After + public void tearDown() throws Exception { + if (null != urMgr) { + this.urMgr.close(); + } + if (null != zk) { + this.zk.close(); + } + } + + /** + * Test basic operation on {@link ZkLedgerUnderreplicationManager#markLedgerUnderreplicatedAsync(long, Collection)}. + */ + @Test + public void testMarkLedgerUnderreplicatedBasic() throws Exception { + long ledgerId = 0xabcdef; + Collection missingBookies = Lists.newArrayList("bookie-1"); + + long prevCtime = -1L; + + // mark when it hasn't been marked before + FutureUtils.result(urMgr.markLedgerUnderreplicatedAsync(ledgerId, missingBookies)); + UnderreplicatedLedger urLedgerFormat = urMgr.getLedgerUnreplicationInfo(ledgerId); + assertEquals(missingBookies, urLedgerFormat.getReplicaList()); + assertTrue(urLedgerFormat.getCtime() > prevCtime); + prevCtime = urLedgerFormat.getCtime(); + + // mark when it has been marked. but the missing bookies already duplicated there + FutureUtils.result(urMgr.markLedgerUnderreplicatedAsync(ledgerId, missingBookies)); + urLedgerFormat = urMgr.getLedgerUnreplicationInfo(ledgerId); + assertEquals(missingBookies, urLedgerFormat.getReplicaList()); + assertTrue(urLedgerFormat.getCtime() >= prevCtime); + prevCtime = urLedgerFormat.getCtime(); + + // mark with new bookies when it has been marked + Collection newMissingBookies = Lists.newArrayList("bookie-2", "bookie-3"); + FutureUtils.result(urMgr.markLedgerUnderreplicatedAsync(ledgerId, newMissingBookies)); + urLedgerFormat = urMgr.getLedgerUnreplicationInfo(ledgerId); + assertEquals( + Lists.newArrayList("bookie-1", "bookie-2", "bookie-3"), + urLedgerFormat.getReplicaList() + ); + assertTrue(urLedgerFormat.getCtime() >= prevCtime); + } + + /** + * Test {@link ZkLedgerUnderreplicationManager#markLedgerUnderreplicatedAsync(long, Collection)} handles corrupted + * data. + */ + @Test + public void testMarkLedgerWithCorruptedDataExists() throws Exception { + long ledgerId = 0xabcdee; + String ledgerPath = urMgr.getUrLedgerZnode(ledgerId); + ZkUtils.createFullPathOptimistic( + zk, ledgerPath, "junk data".getBytes(UTF_8), ZkUtils.getACLs(baseServerConf), CreateMode.PERSISTENT); + Collection missingBookies = Lists.newArrayList("bookie-1"); + try { + FutureUtils.result(urMgr.markLedgerUnderreplicatedAsync(ledgerId, missingBookies)); + fail("Should fail to mark ledger underreplicated if there is already corrupted data on zookeeper"); + } catch (ReplicationException re) { + assertTrue(re.getMessage().contains("Invalid underreplicated ledger data for ledger " + ledgerPath)); + } + byte[] data = zk.getData(ledgerPath, null, null); + assertEquals("junk data", new String(data, UTF_8)); + } + + @Test + public void testMarkLedgerUnderreplicatedConcurrently() throws Exception { + final int numLedgers = 20; + List> futures = Lists.newArrayListWithExpectedSize(numLedgers); + long ledgerId = 0xabcc00; + Set expectedBookies = Sets.newHashSet(); + for (int i = 0; i < numLedgers; i++) { + futures.add( + urMgr.markLedgerUnderreplicatedAsync( + ledgerId, Lists.newArrayList("bookie-" + i))); + expectedBookies.add("bookie-" + i); + } + FutureUtils.result(FutureUtils.collect(futures)); + + UnderreplicatedLedger urLedgerFormat = urMgr.getLedgerUnreplicationInfo(ledgerId); + Set actualBookies = Sets.newHashSet(); + actualBookies.addAll(urLedgerFormat.getReplicaList()); + + assertEquals(expectedBookies, actualBookies); + } + + @Test + public void testMarkLedgerUnderreplicatedConcurrentlyWithDifferentClients() throws Exception { + final int numLedgers = 20; + List zks = new ArrayList<>(numLedgers); + List urMgrs = new ArrayList<>(numLedgers); + + for (int i = 0; i < numLedgers; i++) { + zks.add(createZooKeeper()); + urMgrs.add(new ZkLedgerUnderreplicationManager(baseServerConf, zks.get(i))); + } + + List> futures = Lists.newArrayListWithExpectedSize(numLedgers); + long ledgerId = 0xabcd00; + Set expectedBookies = Sets.newHashSet(); + for (int i = 0; i < numLedgers; i++) { + futures.add( + urMgrs.get(i).markLedgerUnderreplicatedAsync( + ledgerId, Lists.newArrayList("bookie-" + i))); + expectedBookies.add("bookie-" + i); + } + + FutureUtils.result(FutureUtils.collect(futures)); + + UnderreplicatedLedger urLedgerFormat = urMgr.getLedgerUnreplicationInfo(ledgerId); + Set actualBookies = Sets.newHashSet(); + actualBookies.addAll(urLedgerFormat.getReplicaList()); + + assertEquals(expectedBookies, actualBookies); + + for (LedgerUnderreplicationManager urMgr : urMgrs) { + urMgr.close(); + } + for (ZooKeeper zk : zks) { + zk.close(); + } + } + + + @Test + public void testMarkLedgerUnderreplicatedWhenSessionExpired() throws Exception { + final long ledgerId = 0xabbd00; + @Cleanup + ZooKeeper zk = new ZooKeeper(getZooKeeperConnectString(), 60000, null); + @Cleanup + LedgerUnderreplicationManager urMgr = new ZkLedgerUnderreplicationManager(baseServerConf, zk); + // open another zookeeper client to expire current session + @Cleanup + ZooKeeper newZk = new ZooKeeper( + getZooKeeperConnectString(), 60000, null, zk.getSessionId(), zk.getSessionPasswd()); + try { + FutureUtils.result(urMgr.markLedgerUnderreplicatedAsync( + ledgerId, Lists.newArrayList("bookie-1"))); + fail("Should fail if encountered zookeeper exceptions"); + } catch (KeeperException ke) { + // expected + } + try { + UnderreplicatedLedger urLedgerFormat = + ZkLedgerUnderreplicationManagerTest.this.urMgr.getLedgerUnreplicationInfo(ledgerId); + fail("The ledger shouldn't been marked as underreplicated"); + } catch (ReplicationException.UnavailableException ue) { + // expected + } + } + + +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/zk/ZKMetadataBookieDriverTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/zk/ZKMetadataBookieDriverTest.java index ca3279de678..de2e9f9fdc7 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/zk/ZKMetadataBookieDriverTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/zk/ZKMetadataBookieDriverTest.java @@ -18,33 +18,30 @@ */ package org.apache.bookkeeper.meta.zk; -import static org.junit.Assert.assertNull; import static org.junit.Assert.assertSame; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.same; +import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.discover.RegistrationManager; -import org.apache.bookkeeper.discover.RegistrationManager.RegistrationListener; import org.apache.bookkeeper.discover.ZKRegistrationManager; import org.apache.bookkeeper.stats.NullStatsLogger; -import org.apache.bookkeeper.zookeeper.ZooKeeperClient; import org.apache.zookeeper.ZooKeeper; +import org.junit.After; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; -import org.powermock.api.mockito.PowerMockito; -import org.powermock.core.classloader.annotations.PrepareForTest; -import org.powermock.modules.junit4.PowerMockRunner; +import org.mockito.junit.MockitoJUnitRunner; /** * Unit test {@link ZKMetadataBookieDriver}. */ -@RunWith(PowerMockRunner.class) -@PrepareForTest({ ZKMetadataDriverBase.class, ZooKeeperClient.class, ZKMetadataBookieDriver.class }) +@RunWith(MockitoJUnitRunner.class) public class ZKMetadataBookieDriverTest extends ZKMetadataDriverTestBase { private ZKMetadataBookieDriver driver; @@ -55,44 +52,30 @@ public void setup() throws Exception { this.conf = new ServerConfiguration(); super.setup(conf); - driver = new ZKMetadataBookieDriver(); + driver = spy(new ZKMetadataBookieDriver()); + } + + @After + public void teardown() { + super.teardown(); + driver.close(); } @Test public void testGetRegManager() throws Exception { - RegistrationListener listener = mock(RegistrationListener.class); - driver.initialize(conf, listener, NullStatsLogger.INSTANCE); + driver.initialize(conf, NullStatsLogger.INSTANCE); assertSame(conf, driver.serverConf); - assertSame(listener, driver.listener); - assertNull(driver.regManager); - ZKRegistrationManager mockRegManager = PowerMockito.mock(ZKRegistrationManager.class); + ZKRegistrationManager mockRegManager = mock(ZKRegistrationManager.class); + doReturn(mockRegManager).when(driver).newZKRegistrationManager(any(ServerConfiguration.class), + any(ZooKeeper.class)); - PowerMockito.whenNew(ZKRegistrationManager.class) - .withParameterTypes( - ServerConfiguration.class, - ZooKeeper.class, - RegistrationListener.class) - .withArguments( - any(ServerConfiguration.class), - any(ZooKeeper.class), - any(RegistrationListener.class)) - .thenReturn(mockRegManager); + try (RegistrationManager manager = driver.createRegistrationManager()) { + assertSame(mockRegManager, manager); - RegistrationManager manager = driver.getRegistrationManager(); - assertSame(mockRegManager, manager); - assertSame(mockRegManager, driver.regManager); - - PowerMockito.verifyNew(ZKRegistrationManager.class, times(1)) - .withArguments( - same(conf), - same(mockZkc), - same(listener)); - - driver.close(); - verify(mockRegManager, times(1)).close(); - assertNull(driver.regManager); + verify(driver, times(1)).newZKRegistrationManager(same(conf), same(mockZkc)); + } } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/zk/ZKMetadataClientDriverTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/zk/ZKMetadataClientDriverTest.java index 626a055815c..8b9ed9905d6 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/zk/ZKMetadataClientDriverTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/zk/ZKMetadataClientDriverTest.java @@ -21,9 +21,12 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertSame; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyBoolean; import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -33,20 +36,16 @@ import org.apache.bookkeeper.discover.RegistrationClient; import org.apache.bookkeeper.discover.ZKRegistrationClient; import org.apache.bookkeeper.stats.NullStatsLogger; -import org.apache.bookkeeper.zookeeper.ZooKeeperClient; import org.apache.zookeeper.ZooKeeper; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; -import org.powermock.api.mockito.PowerMockito; -import org.powermock.core.classloader.annotations.PrepareForTest; -import org.powermock.modules.junit4.PowerMockRunner; +import org.mockito.junit.MockitoJUnitRunner; /** * Unit test {@link ZKMetadataClientDriver}. */ -@RunWith(PowerMockRunner.class) -@PrepareForTest({ ZKMetadataDriverBase.class, ZooKeeperClient.class, ZKMetadataClientDriver.class }) +@RunWith(MockitoJUnitRunner.class) public class ZKMetadataClientDriverTest extends ZKMetadataDriverTestBase { private ZKMetadataClientDriver driver; @@ -57,7 +56,7 @@ public void setup() throws Exception { this.conf = new ClientConfiguration(); super.setup(conf); - driver = new ZKMetadataClientDriver(); + driver = spy(new ZKMetadataClientDriver()); } @Test @@ -69,19 +68,17 @@ public void testGetRegClient() throws Exception { assertSame(mockExecutor, driver.scheduler); assertNull(driver.regClient); - ZKRegistrationClient mockRegClient = PowerMockito.mock(ZKRegistrationClient.class); + ZKRegistrationClient mockRegClient = mock(ZKRegistrationClient.class); - PowerMockito.whenNew(ZKRegistrationClient.class) - .withParameterTypes(ZooKeeper.class, String.class, ScheduledExecutorService.class) - .withArguments(any(ZooKeeper.class), anyString(), any(ScheduledExecutorService.class)) - .thenReturn(mockRegClient); + doReturn(mockRegClient).when(driver).newZKRegistrationClient(any(ZooKeeper.class), + anyString(), any(ScheduledExecutorService.class), anyBoolean()); RegistrationClient client = driver.getRegistrationClient(); assertSame(mockRegClient, client); assertSame(mockRegClient, driver.regClient); - PowerMockito.verifyNew(ZKRegistrationClient.class, times(1)) - .withArguments(eq(mockZkc), eq(ledgersRootPath), eq(mockExecutor)); + verify(driver, times(1)).newZKRegistrationClient(eq(mockZkc), + eq(ledgersRootPath), eq(mockExecutor), anyBoolean()); driver.close(); verify(mockRegClient, times(1)).close(); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/zk/ZKMetadataDriverBaseStaticTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/zk/ZKMetadataDriverBaseStaticTest.java index 0ed3f7d31b6..66b096172bc 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/zk/ZKMetadataDriverBaseStaticTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/zk/ZKMetadataDriverBaseStaticTest.java @@ -64,11 +64,20 @@ public void testResolveLedgerManagerFactoryUnknownScheme() { } @Test - public void testResolveLedgerManagerFactoryDefaultValue() { + public void testResolveLedgerManagerFactoryUnspecifiedLayout() { assertEquals( - HierarchicalLedgerManagerFactory.class, + null, ZKMetadataDriverBase.resolveLedgerManagerFactory( - URI.create("zk://127.0.0.1/ledgers")) + URI.create("zk://127.0.0.1/ledgers")) + ); + } + + @Test + public void testResolveLedgerManagerFactoryNullLayout() { + assertEquals( + null, + ZKMetadataDriverBase.resolveLedgerManagerFactory( + URI.create("zk+null://127.0.0.1/ledgers")) ); } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/zk/ZKMetadataDriverBaseTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/zk/ZKMetadataDriverBaseTest.java index 2386a7fe062..3a387d15960 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/zk/ZKMetadataDriverBaseTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/zk/ZKMetadataDriverBaseTest.java @@ -30,30 +30,29 @@ import static org.mockito.ArgumentMatchers.same; import static org.mockito.Mockito.CALLS_REAL_METHODS; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockStatic; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; -import static org.powermock.api.mockito.PowerMockito.mockStatic; -import static org.powermock.api.mockito.PowerMockito.verifyStatic; import java.util.Optional; +import lombok.Cleanup; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.meta.AbstractZkLedgerManagerFactory; import org.apache.bookkeeper.meta.LedgerManagerFactory; import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.zookeeper.RetryPolicy; import org.apache.bookkeeper.zookeeper.ZooKeeperClient; +import org.junit.After; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; -import org.powermock.api.mockito.PowerMockito; -import org.powermock.core.classloader.annotations.PrepareForTest; -import org.powermock.modules.junit4.PowerMockRunner; +import org.mockito.MockedStatic; +import org.mockito.junit.MockitoJUnitRunner; /** * Unit test of {@link ZKMetadataDriverBase}. */ -@RunWith(PowerMockRunner.class) -@PrepareForTest({ ZKMetadataDriverBase.class, ZooKeeperClient.class, AbstractZkLedgerManagerFactory.class }) +@RunWith(MockitoJUnitRunner.class) public class ZKMetadataDriverBaseTest extends ZKMetadataDriverTestBase { private ZKMetadataDriverBase driver; @@ -66,6 +65,11 @@ public void setup() throws Exception { retryPolicy = mock(RetryPolicy.class); } + @After + public void teardown() { + super.teardown(); + } + @Test public void testInitialize() throws Exception { driver.initialize( @@ -78,7 +82,7 @@ public void testInitialize() throws Exception { String readonlyPath = "/path/to/ledgers/" + AVAILABLE_NODE + "/" + READONLY; assertSame(mockZkc, driver.zk); - verifyStatic(ZooKeeperClient.class, times(1)); + ZooKeeperClient.newBuilder(); verify(mockZkBuilder, times(1)).build(); verify(mockZkc, times(1)) @@ -88,7 +92,7 @@ public void testInitialize() throws Exception { driver.close(); - verify(mockZkc, times(1)).close(); + verify(mockZkc, times(1)).close(5000); assertNull(driver.zk); } @@ -106,7 +110,7 @@ public void testInitializeExternalZooKeeper() throws Exception { String readonlyPath = "/path/to/ledgers/" + AVAILABLE_NODE; assertSame(anotherZk, driver.zk); - verifyStatic(ZooKeeperClient.class, times(0)); + ZooKeeperClient.newBuilder(); verify(mockZkBuilder, times(0)).build(); verify(mockZkc, times(0)) @@ -125,18 +129,17 @@ public void testGetLedgerManagerFactory() throws Exception { driver.initialize( conf, NullStatsLogger.INSTANCE, retryPolicy, Optional.empty()); - mockStatic(AbstractZkLedgerManagerFactory.class); + @Cleanup + MockedStatic abstractZkLedgerManagerFactoryMockedStatic = + mockStatic(AbstractZkLedgerManagerFactory.class); LedgerManagerFactory factory = mock(LedgerManagerFactory.class); - PowerMockito.when( - AbstractZkLedgerManagerFactory.class, - "newLedgerManagerFactory", - same(conf), - same(driver.layoutManager)) - .thenReturn(factory); + abstractZkLedgerManagerFactoryMockedStatic.when(() -> + AbstractZkLedgerManagerFactory.newLedgerManagerFactory(same(conf), same(driver.layoutManager))) + .thenReturn(factory); assertSame(factory, driver.getLedgerManagerFactory()); assertSame(factory, driver.lmFactory); - verifyStatic(AbstractZkLedgerManagerFactory.class, times(1)); + AbstractZkLedgerManagerFactory.newLedgerManagerFactory( same(conf), same(driver.layoutManager)); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/zk/ZKMetadataDriverTestBase.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/zk/ZKMetadataDriverTestBase.java index c0f33835128..e5129fb716e 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/zk/ZKMetadataDriverTestBase.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/zk/ZKMetadataDriverTestBase.java @@ -25,13 +25,13 @@ import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -import static org.powermock.api.mockito.PowerMockito.mockStatic; import org.apache.bookkeeper.conf.AbstractConfiguration; import org.apache.bookkeeper.stats.StatsLogger; import org.apache.bookkeeper.zookeeper.RetryPolicy; import org.apache.bookkeeper.zookeeper.ZooKeeperClient; -import org.powermock.api.mockito.PowerMockito; +import org.mockito.MockedStatic; +import org.mockito.Mockito; /** * Unit test of {@link ZKMetadataDriverBase}. @@ -43,6 +43,7 @@ public abstract class ZKMetadataDriverTestBase { protected String metadataServiceUri; protected ZooKeeperClient.Builder mockZkBuilder; protected ZooKeeperClient mockZkc; + protected MockedStatic zooKeeperClientMockedStatic; public void setup(AbstractConfiguration conf) throws Exception { ledgersRootPath = "/path/to/ledgers"; @@ -64,9 +65,12 @@ public void setup(AbstractConfiguration conf) throws Exception { when(mockZkBuilder.build()).thenReturn(mockZkc); - mockStatic(ZooKeeperClient.class); - PowerMockito.when(ZooKeeperClient.class, "newBuilder") - .thenReturn(mockZkBuilder); + zooKeeperClientMockedStatic = Mockito.mockStatic(ZooKeeperClient.class); + zooKeeperClientMockedStatic.when(() -> ZooKeeperClient.newBuilder()).thenReturn(mockZkBuilder); + } + + public void teardown() { + zooKeeperClientMockedStatic.close(); } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/metastore/MetastoreScannableTableAsyncToSyncConverter.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/metastore/MetastoreScannableTableAsyncToSyncConverter.java index f85279c4eef..72f20a5391b 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/metastore/MetastoreScannableTableAsyncToSyncConverter.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/metastore/MetastoreScannableTableAsyncToSyncConverter.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,7 +18,6 @@ package org.apache.bookkeeper.metastore; import java.util.Set; - import org.apache.bookkeeper.metastore.MetastoreScannableTable.Order; /** diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/metastore/MetastoreTableAsyncToSyncConverter.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/metastore/MetastoreTableAsyncToSyncConverter.java index b1775e8e7f0..aee1b0f83f9 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/metastore/MetastoreTableAsyncToSyncConverter.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/metastore/MetastoreTableAsyncToSyncConverter.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -20,7 +20,6 @@ import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; - import org.apache.bookkeeper.metastore.MSException.Code; import org.apache.bookkeeper.versioning.Version; import org.apache.bookkeeper.versioning.Versioned; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/metastore/TestMetaStore.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/metastore/TestMetaStore.java index f61d14b8983..bd89733cec2 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/metastore/TestMetaStore.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/metastore/TestMetaStore.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -31,14 +31,12 @@ import com.google.common.collect.MapDifference; import com.google.common.collect.Maps; import com.google.common.collect.Sets; - import java.util.Collections; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Set; import java.util.TreeMap; - import org.apache.bookkeeper.metastore.InMemoryMetastoreTable.MetadataVersion; import org.apache.bookkeeper.metastore.MSException.Code; import org.apache.bookkeeper.metastore.MetastoreScannableTable.Order; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/net/BookieIdTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/net/BookieIdTest.java new file mode 100644 index 00000000000..d32378b5001 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/net/BookieIdTest.java @@ -0,0 +1,91 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.net; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; + +import java.util.UUID; +import org.junit.Test; + +/** + * Unit tests for BookieId class. + */ +public class BookieIdTest { + + @Test + public void testToString() { + assertEquals("test", BookieId.parse("test").toString()); + } + + @Test + public void testParse() { + assertEquals("test", BookieId.parse("test").getId()); + } + + @Test + public void testEquals() { + assertEquals(BookieId.parse("test"), BookieId.parse("test")); + assertNotEquals(BookieId.parse("test"), BookieId.parse("test2")); + } + + @Test + public void testHashcode() { + assertEquals(BookieId.parse("test").hashCode(), BookieId.parse("test").hashCode()); + } + + @Test(expected = IllegalArgumentException.class) + public void testValidate1() { + BookieId.parse("non valid"); + } + + @Test(expected = IllegalArgumentException.class) + public void testValidate2() { + BookieId.parse("non$valid"); + } + + @Test(expected = IllegalArgumentException.class) + public void testValidateReservedWord() { + // 'readonly' is a reserved word for the ZK based implementation + BookieId.parse("readonly"); + } + + @Test + public void testValidateHostnamePort() { + BookieId.parse("this.is.an.hostname:1234"); + } + + @Test + public void testValidateIPv4Port() { + BookieId.parse("1.2.3.4:1234"); + } + + @Test + public void testValidateUUID() { + BookieId.parse(UUID.randomUUID().toString()); + } + + @Test + public void testWithDashAndUnderscore() { + BookieId.parse("testRegisterUnregister_ReadonlyBookie-readonly:3181"); + } + +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/net/NetworkTopologyImplTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/net/NetworkTopologyImplTest.java new file mode 100644 index 00000000000..3da28642761 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/net/NetworkTopologyImplTest.java @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.net; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.util.Set; +import org.junit.Test; + +/** + * Tests for {@link NetworkTopologyImpl}. + */ +public class NetworkTopologyImplTest { + + @Test + public void getLeavesShouldReturnEmptySetForNonExistingScope() { + NetworkTopologyImpl networkTopology = new NetworkTopologyImpl(); + final Set leaves = networkTopology.getLeaves("/non-existing-scope"); + assertTrue(leaves.isEmpty()); + } + + @Test + public void getLeavesShouldReturnNodesInScope() { + // GIVEN + // Topology with two racks and 1 bookie in each rack. + NetworkTopologyImpl networkTopology = new NetworkTopologyImpl(); + + String rack0Scope = "/rack-0"; + BookieId bookieIdScopeRack0 = BookieId.parse("bookieIdScopeRack0"); + BookieNode bookieRack0ScopeNode = new BookieNode(bookieIdScopeRack0, rack0Scope); + + String rack1Scope = "/rack-1"; + BookieId bookieIdScopeRack1 = BookieId.parse("bookieIdScopeRack1"); + BookieNode bookieRack1ScopeNode = new BookieNode(bookieIdScopeRack1, rack1Scope); + + networkTopology.add(bookieRack0ScopeNode); + networkTopology.add(bookieRack1ScopeNode); + + // WHEN + Set leavesScopeRack0 = networkTopology.getLeaves(rack0Scope); + Set leavesScopeRack1 = networkTopology.getLeaves(rack1Scope); + + // THEN + assertTrue(leavesScopeRack0.size() == 1); + assertTrue(leavesScopeRack0.contains(bookieRack0ScopeNode)); + + assertTrue(leavesScopeRack1.size() == 1); + assertTrue(leavesScopeRack1.contains(bookieRack1ScopeNode)); + } + + @Test + public void testRestartBKWithNewRackDepth() { + NetworkTopologyImpl networkTopology = new NetworkTopologyImpl(); + String dp1Rack = "/rack-1"; + String dp2Rack = "/dp/rack-1"; + BookieId bkId1 = BookieId.parse("bookieIdScopeRack0"); + BookieId bkId2 = BookieId.parse("bookieIdScopeRack1"); + + // Register 2 BKs with depth 1 rack. + BookieNode dp1BkNode1 = new BookieNode(bkId1, dp1Rack); + BookieNode dp1BkNode2 = new BookieNode(bkId2, dp1Rack); + networkTopology.add(dp1BkNode1); + networkTopology.add(dp1BkNode2); + + // Update one BK with depth 2 rack. + // Assert it can not be added due to different depth. + networkTopology.remove(dp1BkNode1); + BookieNode dp2BkNode1 = new BookieNode(bkId1, dp2Rack); + try { + networkTopology.add(dp2BkNode1); + fail("Expected add node failed caused by different depth of rack"); + } catch (NetworkTopologyImpl.InvalidTopologyException ex) { + // Expected ex. + } + Set leaves = networkTopology.getLeaves(dp1Rack); + assertEquals(leaves.size(), 1); + assertTrue(leaves.contains(dp1BkNode2)); + + // Update all Bks with depth 2 rack. + // Verify update success. + networkTopology.remove(dp1BkNode2); + BookieNode dp2BkNode2 = new BookieNode(bkId2, dp2Rack); + networkTopology.add(dp2BkNode1); + networkTopology.add(dp2BkNode2); + leaves = networkTopology.getLeaves(dp2Rack); + assertEquals(leaves.size(), 2); + assertTrue(leaves.contains(dp2BkNode1)); + assertTrue(leaves.contains(dp2BkNode2)); + } + + @Test + public void getLeavesShouldReturnLeavesThatAreNotInExcludedScope() { + // GIVEN + // Topology with three racks and 1 bookie in each rack. + NetworkTopologyImpl networkTopology = new NetworkTopologyImpl(); + + String rack0Scope = "/rack-0"; + BookieId bookieIdScopeRack0 = BookieId.parse("bookieIdScopeRack0"); + BookieNode bookieRack0ScopeNode = new BookieNode(bookieIdScopeRack0, rack0Scope); + + String rack1Scope = "/rack-1"; + BookieId bookieIdScopeRack1 = BookieId.parse("bookieIdScopeRack1"); + BookieNode bookieRack1ScopeNode = new BookieNode(bookieIdScopeRack1, rack1Scope); + + String rack2Scope = "/rack-2"; + BookieId bookieIdScopeRack2 = BookieId.parse("bookieIdScopeRack2"); + BookieNode bookieRack2ScopeNode = new BookieNode(bookieIdScopeRack2, rack2Scope); + + networkTopology.add(bookieRack0ScopeNode); + networkTopology.add(bookieRack1ScopeNode); + networkTopology.add(bookieRack2ScopeNode); + + // Excluded scopes are beginned with '~' character. + String scopeExcludingRack1 = "~/rack-1"; + + // WHEN + // ask for leaves not being at rack1 scope. + Set leavesExcludingRack2Scope = networkTopology.getLeaves(scopeExcludingRack1); + + // THEN + assertTrue(leavesExcludingRack2Scope.size() == 2); + assertTrue(leavesExcludingRack2Scope.contains(bookieRack0ScopeNode)); + assertTrue(leavesExcludingRack2Scope.contains(bookieRack2ScopeNode)); + } + + @Test + public void testInvalidRackName() { + NetworkTopologyImpl networkTopology = new NetworkTopologyImpl(); + String rack0Scope = ""; + BookieId bookieIdScopeRack0 = BookieId.parse("bookieIdScopeRack0"); + BookieNode bookieRack0ScopeNode = new BookieNode(bookieIdScopeRack0, rack0Scope); + + String rack1Scope = "/"; + BookieId bookieIdScopeRack1 = BookieId.parse("bookieIdScopeRack1"); + BookieNode bookieRack1ScopeNode = new BookieNode(bookieIdScopeRack1, rack1Scope); + + try { + networkTopology.add(bookieRack0ScopeNode); + fail(); + } catch (IllegalArgumentException e) { + assertEquals("bookieIdScopeRack0, which is located at , is not a descendent of /", e.getMessage()); + } + + try { + networkTopology.add(bookieRack1ScopeNode); + fail(); + } catch (IllegalArgumentException e) { + assertEquals("bookieIdScopeRack1, which is located at , is not a descendent of /", e.getMessage()); + } + + } +} \ No newline at end of file diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/net/ResolvedBookieSocketAddressTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/net/ResolvedBookieSocketAddressTest.java new file mode 100644 index 00000000000..62d59b5999e --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/net/ResolvedBookieSocketAddressTest.java @@ -0,0 +1,50 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.net; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.net.InetSocketAddress; +import org.junit.Test; + +/** + * Tests for BookieSocketAddress getSocketAddress cache logic. + */ + +public class ResolvedBookieSocketAddressTest { + + @Test + public void testHostnameBookieId() throws Exception { + BookieSocketAddress hostnameAddress = new BookieSocketAddress("localhost", 3181); + InetSocketAddress inetSocketAddress1 = hostnameAddress.getSocketAddress(); + InetSocketAddress inetSocketAddress2 = hostnameAddress.getSocketAddress(); + assertFalse("InetSocketAddress should be recreated", inetSocketAddress1 == inetSocketAddress2); + } + + @Test + public void testIPAddressBookieId() throws Exception { + BookieSocketAddress ipAddress = new BookieSocketAddress("127.0.0.1", 3181); + InetSocketAddress inetSocketAddress1 = ipAddress.getSocketAddress(); + InetSocketAddress inetSocketAddress2 = ipAddress.getSocketAddress(); + assertTrue("InetSocketAddress should be cached", inetSocketAddress1 == inetSocketAddress2); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/BatchedReadEntryProcessorTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/BatchedReadEntryProcessorTest.java new file mode 100644 index 00000000000..3f897558384 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/BatchedReadEntryProcessorTest.java @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.proto; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.channel.Channel; +import io.netty.channel.ChannelHandlerContext; +import io.netty.channel.ChannelPromise; +import io.netty.channel.DefaultChannelPromise; +import io.netty.channel.EventLoop; +import java.io.IOException; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.atomic.AtomicReference; +import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.proto.BookieProtocol.Response; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.junit.Before; +import org.junit.Test; + + + +/** + * Unit test {@link ReadEntryProcessor}. + */ +public class BatchedReadEntryProcessorTest { + + private Channel channel; + private BookieRequestHandler requestHandler; + private BookieRequestProcessor requestProcessor; + private Bookie bookie; + + @Before + public void setup() throws IOException, BookieException { + channel = mock(Channel.class); + when(channel.isOpen()).thenReturn(true); + + requestHandler = mock(BookieRequestHandler.class); + ChannelHandlerContext ctx = mock(ChannelHandlerContext.class); + when(ctx.channel()).thenReturn(channel); + when(requestHandler.ctx()).thenReturn(ctx); + + bookie = mock(Bookie.class); + requestProcessor = mock(BookieRequestProcessor.class); + when(requestProcessor.getBookie()).thenReturn(bookie); + when(requestProcessor.getWaitTimeoutOnBackpressureMillis()).thenReturn(-1L); + when(requestProcessor.getRequestStats()).thenReturn(new RequestStats(NullStatsLogger.INSTANCE)); + when(channel.voidPromise()).thenReturn(mock(ChannelPromise.class)); + when(channel.writeAndFlush(any())).thenReturn(mock(ChannelPromise.class)); + EventLoop eventLoop = mock(EventLoop.class); + when(eventLoop.inEventLoop()).thenReturn(true); + when(channel.eventLoop()).thenReturn(eventLoop); + ByteBuf buffer0 = ByteBufAllocator.DEFAULT.buffer(4); + ByteBuf buffer1 = ByteBufAllocator.DEFAULT.buffer(4); + ByteBuf buffer2 = ByteBufAllocator.DEFAULT.buffer(4); + ByteBuf buffer3 = ByteBufAllocator.DEFAULT.buffer(4); + ByteBuf buffer4 = ByteBufAllocator.DEFAULT.buffer(4); + + when(bookie.readEntry(anyLong(), anyLong())).thenReturn(buffer0).thenReturn(buffer1).thenReturn(buffer2) + .thenReturn(buffer3).thenReturn(buffer4); + } + + @Test + public void testSuccessfulAsynchronousFenceRequest() throws Exception { + testAsynchronousRequest(true, BookieProtocol.EOK); + } + + @Test + public void testFailedAsynchronousFenceRequest() throws Exception { + testAsynchronousRequest(false, BookieProtocol.EIO); + } + + private void testAsynchronousRequest(boolean result, int errorCode) throws Exception { + CompletableFuture fenceResult = FutureUtils.createFuture(); + when(bookie.fenceLedger(anyLong(), any())).thenReturn(fenceResult); + + ChannelPromise promise = new DefaultChannelPromise(channel); + AtomicReference writtenObject = new AtomicReference<>(); + CountDownLatch latch = new CountDownLatch(1); + doAnswer(invocationOnMock -> { + writtenObject.set(invocationOnMock.getArgument(0)); + promise.setSuccess(); + latch.countDown(); + return promise; + }).when(channel).writeAndFlush(any(Response.class)); + + long requestId = 0; + int maxCount = 5; + long maxSize = 1024; + ExecutorService service = Executors.newCachedThreadPool(); + long ledgerId = System.currentTimeMillis(); + BookieProtocol.BatchedReadRequest request = BookieProtocol.BatchedReadRequest.create( + BookieProtocol.CURRENT_PROTOCOL_VERSION, ledgerId, 1, BookieProtocol.FLAG_DO_FENCING, new byte[] {}, + requestId, maxCount, maxSize); + ReadEntryProcessor processor = BatchedReadEntryProcessor.create( + request, requestHandler, requestProcessor, service, true, 1024 * 1024 * 5); + processor.run(); + + fenceResult.complete(result); + latch.await(); + verify(channel, times(1)).writeAndFlush(any(Response.class)); + + assertTrue(writtenObject.get() instanceof Response); + Response response = (Response) writtenObject.get(); + assertEquals(1, response.getEntryId()); + assertEquals(ledgerId, response.getLedgerId()); + assertEquals(BookieProtocol.BATCH_READ_ENTRY, response.getOpCode()); + assertEquals(errorCode, response.getErrorCode()); + service.shutdown(); + } + + @Test + public void testSuccessfulSynchronousFenceRequest() throws Exception { + testSynchronousRequest(true, BookieProtocol.EOK); + } + + @Test + public void testFailedSynchronousFenceRequest() throws Exception { + testSynchronousRequest(false, BookieProtocol.EIO); + } + + private void testSynchronousRequest(boolean result, int errorCode) throws Exception { + CompletableFuture fenceResult = FutureUtils.createFuture(); + when(bookie.fenceLedger(anyLong(), any())).thenReturn(fenceResult); + ChannelPromise promise = new DefaultChannelPromise(channel); + AtomicReference writtenObject = new AtomicReference<>(); + CountDownLatch latch = new CountDownLatch(1); + doAnswer(invocationOnMock -> { + writtenObject.set(invocationOnMock.getArgument(0)); + promise.setSuccess(); + latch.countDown(); + return promise; + }).when(channel).writeAndFlush(any(Response.class)); + + long requestId = 0; + int maxCount = 5; + long maxSize = 1024; + ExecutorService service = Executors.newCachedThreadPool(); + long ledgerId = System.currentTimeMillis(); + BookieProtocol.BatchedReadRequest request = BookieProtocol.BatchedReadRequest.create( + BookieProtocol.CURRENT_PROTOCOL_VERSION, ledgerId, 1, BookieProtocol.FLAG_DO_FENCING, new byte[] {}, + requestId, maxCount, maxSize); + ReadEntryProcessor processor = BatchedReadEntryProcessor.create( + request, requestHandler, requestProcessor, service, true, 1024 * 1024 * 5); + fenceResult.complete(result); + processor.run(); + + latch.await(); + verify(channel, times(1)).writeAndFlush(any(Response.class)); + + assertTrue(writtenObject.get() instanceof Response); + Response response = (Response) writtenObject.get(); + assertEquals(1, response.getEntryId()); + assertEquals(ledgerId, response.getLedgerId()); + assertEquals(BookieProtocol.BATCH_READ_ENTRY, response.getOpCode()); + assertEquals(errorCode, response.getErrorCode()); + } + + @Test + public void testNonFenceRequest() throws Exception { + ChannelPromise promise = new DefaultChannelPromise(channel); + AtomicReference writtenObject = new AtomicReference<>(); + CountDownLatch latch = new CountDownLatch(1); + doAnswer(invocationOnMock -> { + writtenObject.set(invocationOnMock.getArgument(0)); + promise.setSuccess(); + latch.countDown(); + return promise; + }).when(channel).writeAndFlush(any(Response.class)); + + long requestId = 0; + int maxCount = 5; + long maxSize = 1024; + ExecutorService service = Executors.newCachedThreadPool(); + long ledgerId = System.currentTimeMillis(); + BookieProtocol.BatchedReadRequest request = BookieProtocol.BatchedReadRequest.create( + BookieProtocol.CURRENT_PROTOCOL_VERSION, ledgerId, 1, BookieProtocol.FLAG_DO_FENCING, new byte[] {}, + requestId, maxCount, maxSize); + ReadEntryProcessor processor = BatchedReadEntryProcessor.create( + request, requestHandler, requestProcessor, service, true, 1024 * 1024 * 5); + processor.run(); + + latch.await(); + verify(channel, times(1)).writeAndFlush(any(Response.class)); + + assertTrue(writtenObject.get() instanceof Response); + Response response = (Response) writtenObject.get(); + assertEquals(1, response.getEntryId()); + assertEquals(ledgerId, response.getLedgerId()); + assertEquals(BookieProtocol.BATCH_READ_ENTRY, response.getOpCode()); + assertEquals(BookieProtocol.EOK, response.getErrorCode()); + } +} \ No newline at end of file diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/BookieBackpressureForV2Test.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/BookieBackpressureForV2Test.java new file mode 100644 index 00000000000..8721a2c7819 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/BookieBackpressureForV2Test.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.proto; + +import org.apache.bookkeeper.client.BookKeeperTestClient; +import org.apache.bookkeeper.test.TestStatsProvider; +import org.junit.Before; + +/** + * Tests for bckpressure handling on the server side with V2 protocol. + */ +public class BookieBackpressureForV2Test extends BookieBackpressureTest { + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + baseClientConf.setUseV2WireProtocol(true); + bkc = new BookKeeperTestClient(baseClientConf, new TestStatsProvider()); + + // the backpressure will bloc the read response, disable it to let it use backpressure mechanism + confByIndex(0).setReadWorkerThreadsThrottlingEnabled(false); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/BookieBackpressureTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/BookieBackpressureTest.java index 5454a702dfe..1fe7c3ddbf0 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/BookieBackpressureTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/BookieBackpressureTest.java @@ -25,6 +25,7 @@ import static org.mockito.Mockito.spy; import static org.mockito.Mockito.when; +import io.netty.buffer.UnpooledByteBufAllocator; import java.lang.reflect.Field; import java.nio.channels.FileChannel; import java.util.Enumeration; @@ -32,12 +33,13 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; - import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.BookieImpl; import org.apache.bookkeeper.bookie.Journal; import org.apache.bookkeeper.bookie.SlowBufferedChannel; import org.apache.bookkeeper.bookie.SlowInterleavedLedgerStorage; import org.apache.bookkeeper.bookie.SlowSortedLedgerStorage; +import org.apache.bookkeeper.bookie.TestBookieImpl; import org.apache.bookkeeper.client.AsyncCallback.AddCallback; import org.apache.bookkeeper.client.AsyncCallback.ReadCallback; import org.apache.bookkeeper.client.AsyncCallback.ReadLastConfirmedCallback; @@ -45,13 +47,12 @@ import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.client.LedgerEntry; import org.apache.bookkeeper.client.LedgerHandle; - +import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.hamcrest.Matchers; import org.junit.Assert; import org.junit.Before; import org.junit.Test; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -59,7 +60,6 @@ /** * Tests for backpressure handling on the server side. */ -// PowerMock usage is problematic here due to https://github.com/powermock/powermock/issues/822 public class BookieBackpressureTest extends BookKeeperClusterTestCase implements AddCallback, ReadCallback, ReadLastConfirmedCallback { @@ -121,16 +121,18 @@ void setLedgerEntries(Enumeration ls) { } } - private void mockJournal(Bookie bookie, long getDelay, long addDelay, long flushDelay) throws Exception { + private Bookie bookieWithMockedJournal(ServerConfiguration conf, + long getDelay, long addDelay, long flushDelay) throws Exception { + Bookie bookie = new TestBookieImpl(conf); if (getDelay <= 0 && addDelay <= 0 && flushDelay <= 0) { - return; + return bookie; } List journals = getJournals(bookie); for (int i = 0; i < journals.size(); i++) { Journal mock = spy(journals.get(i)); when(mock.getBufferedChannelBuilder()).thenReturn((FileChannel fc, int capacity) -> { - SlowBufferedChannel sbc = new SlowBufferedChannel(fc, capacity); + SlowBufferedChannel sbc = new SlowBufferedChannel(UnpooledByteBufAllocator.DEFAULT, fc, capacity); sbc.setAddDelay(addDelay); sbc.setGetDelay(getDelay); sbc.setFlushDelay(flushDelay); @@ -139,11 +141,12 @@ private void mockJournal(Bookie bookie, long getDelay, long addDelay, long flush journals.set(i, mock); } + return bookie; } @SuppressWarnings("unchecked") private List getJournals(Bookie bookie) throws NoSuchFieldException, IllegalAccessException { - Field f = bookie.getClass().getDeclaredField("journals"); + Field f = BookieImpl.class.getDeclaredField("journals"); f.setAccessible(true); return (List) f.get(bookie); @@ -152,7 +155,7 @@ private List getJournals(Bookie bookie) throws NoSuchFieldException, Il @Test public void testWriteNoBackpressureSlowJournal() throws Exception { //disable backpressure for writes - bsConfs.get(0).setMaxAddsInProgressLimit(0); + confByIndex(0).setMaxAddsInProgressLimit(0); addDelay = 1; doWritesNoBackpressure(0); @@ -161,9 +164,9 @@ public void testWriteNoBackpressureSlowJournal() throws Exception { @Test public void testWriteNoBackpressureSlowJournalFlush() throws Exception { //disable backpressure for writes - bsConfs.get(0).setMaxAddsInProgressLimit(0); + confByIndex(0).setMaxAddsInProgressLimit(0); // to increase frequency of flushes - bsConfs.get(0).setJournalAdaptiveGroupWrites(false); + confByIndex(0).setJournalAdaptiveGroupWrites(false); flushDelay = 1; doWritesNoBackpressure(0); @@ -172,7 +175,7 @@ public void testWriteNoBackpressureSlowJournalFlush() throws Exception { @Test public void testWriteWithBackpressureSlowJournal() throws Exception { //enable backpressure with MAX_PENDING writes in progress - bsConfs.get(0).setMaxAddsInProgressLimit(MAX_PENDING); + confByIndex(0).setMaxAddsInProgressLimit(MAX_PENDING); flushDelay = 1; doWritesWithBackpressure(0); @@ -182,9 +185,9 @@ public void testWriteWithBackpressureSlowJournal() throws Exception { @Test public void testWriteWithBackpressureSlowJournalFlush() throws Exception { //enable backpressure with MAX_PENDING writes in progress - bsConfs.get(0).setMaxAddsInProgressLimit(MAX_PENDING); + confByIndex(0).setMaxAddsInProgressLimit(MAX_PENDING); // to increase frequency of flushes - bsConfs.get(0).setJournalAdaptiveGroupWrites(false); + confByIndex(0).setJournalAdaptiveGroupWrites(false); flushDelay = 1; doWritesWithBackpressure(0); @@ -193,11 +196,11 @@ public void testWriteWithBackpressureSlowJournalFlush() throws Exception { @Test public void testWriteNoBackpressureSlowInterleavedStorage() throws Exception { //disable backpressure for writes - bsConfs.get(0).setMaxAddsInProgressLimit(0); - bsConfs.get(0).setLedgerStorageClass(SlowInterleavedLedgerStorage.class.getName()); - bsConfs.get(0).setWriteBufferBytes(data.length); + confByIndex(0).setMaxAddsInProgressLimit(0); + confByIndex(0).setLedgerStorageClass(SlowInterleavedLedgerStorage.class.getName()); + confByIndex(0).setWriteBufferBytes(data.length); - bsConfs.get(0).setProperty(SlowInterleavedLedgerStorage.PROP_SLOW_STORAGE_ADD_DELAY, "1"); + confByIndex(0).setProperty(SlowInterleavedLedgerStorage.PROP_SLOW_STORAGE_ADD_DELAY, "1"); doWritesNoBackpressure(0); } @@ -205,11 +208,11 @@ public void testWriteNoBackpressureSlowInterleavedStorage() throws Exception { @Test public void testWriteWithBackpressureSlowInterleavedStorage() throws Exception { //enable backpressure with MAX_PENDING writes in progress - bsConfs.get(0).setMaxAddsInProgressLimit(MAX_PENDING); - bsConfs.get(0).setLedgerStorageClass(SlowInterleavedLedgerStorage.class.getName()); - bsConfs.get(0).setWriteBufferBytes(data.length); + confByIndex(0).setMaxAddsInProgressLimit(MAX_PENDING); + confByIndex(0).setLedgerStorageClass(SlowInterleavedLedgerStorage.class.getName()); + confByIndex(0).setWriteBufferBytes(data.length); - bsConfs.get(0).setProperty(SlowInterleavedLedgerStorage.PROP_SLOW_STORAGE_ADD_DELAY, "1"); + confByIndex(0).setProperty(SlowInterleavedLedgerStorage.PROP_SLOW_STORAGE_ADD_DELAY, "1"); doWritesWithBackpressure(0); } @@ -217,11 +220,11 @@ public void testWriteWithBackpressureSlowInterleavedStorage() throws Exception { @Test public void testWriteNoBackpressureSlowInterleavedStorageFlush() throws Exception { //disable backpressure for writes - bsConfs.get(0).setMaxAddsInProgressLimit(0); - bsConfs.get(0).setLedgerStorageClass(SlowInterleavedLedgerStorage.class.getName()); - bsConfs.get(0).setWriteBufferBytes(data.length); + confByIndex(0).setMaxAddsInProgressLimit(0); + confByIndex(0).setLedgerStorageClass(SlowInterleavedLedgerStorage.class.getName()); + confByIndex(0).setWriteBufferBytes(data.length); - bsConfs.get(0).setProperty(SlowInterleavedLedgerStorage.PROP_SLOW_STORAGE_FLUSH_DELAY, "10"); + confByIndex(0).setProperty(SlowInterleavedLedgerStorage.PROP_SLOW_STORAGE_FLUSH_DELAY, "10"); doWritesNoBackpressure(0); } @@ -229,11 +232,11 @@ public void testWriteNoBackpressureSlowInterleavedStorageFlush() throws Exceptio @Test public void testWriteWithBackpressureSlowInterleavedStorageFlush() throws Exception { //enable backpressure with MAX_PENDING writes in progress - bsConfs.get(0).setMaxAddsInProgressLimit(MAX_PENDING); - bsConfs.get(0).setLedgerStorageClass(SlowInterleavedLedgerStorage.class.getName()); - bsConfs.get(0).setWriteBufferBytes(data.length); + confByIndex(0).setMaxAddsInProgressLimit(MAX_PENDING); + confByIndex(0).setLedgerStorageClass(SlowInterleavedLedgerStorage.class.getName()); + confByIndex(0).setWriteBufferBytes(data.length); - bsConfs.get(0).setProperty(SlowInterleavedLedgerStorage.PROP_SLOW_STORAGE_FLUSH_DELAY, "10"); + confByIndex(0).setProperty(SlowInterleavedLedgerStorage.PROP_SLOW_STORAGE_FLUSH_DELAY, "10"); doWritesWithBackpressure(0); } @@ -241,16 +244,16 @@ public void testWriteWithBackpressureSlowInterleavedStorageFlush() throws Except @Test public void testWriteNoBackpressureSortedStorage() throws Exception { //disable backpressure for writes - bsConfs.get(0).setMaxAddsInProgressLimit(0); - bsConfs.get(0).setLedgerStorageClass(SlowSortedLedgerStorage.class.getName()); - bsConfs.get(0).setWriteBufferBytes(data.length); + confByIndex(0).setMaxAddsInProgressLimit(0); + confByIndex(0).setLedgerStorageClass(SlowSortedLedgerStorage.class.getName()); + confByIndex(0).setWriteBufferBytes(data.length); // one for memtable being flushed, one for the part accepting the data assertTrue("for the test, memtable should not keep more entries than allowed", ENTRIES_IN_MEMTABLE * 2 <= MAX_PENDING); - bsConfs.get(0).setSkipListSizeLimit(data.length * ENTRIES_IN_MEMTABLE - 1); - bsConfs.get(0).setProperty(SlowInterleavedLedgerStorage.PROP_SLOW_STORAGE_ADD_DELAY, "1"); - bsConfs.get(0).setProperty(SlowInterleavedLedgerStorage.PROP_SLOW_STORAGE_FLUSH_DELAY, "10"); + confByIndex(0).setSkipListSizeLimit(data.length * ENTRIES_IN_MEMTABLE - 1); + confByIndex(0).setProperty(SlowInterleavedLedgerStorage.PROP_SLOW_STORAGE_ADD_DELAY, "1"); + confByIndex(0).setProperty(SlowInterleavedLedgerStorage.PROP_SLOW_STORAGE_FLUSH_DELAY, "10"); doWritesNoBackpressure(0); } @@ -258,16 +261,16 @@ public void testWriteNoBackpressureSortedStorage() throws Exception { @Test public void testWriteWithBackpressureSortedStorage() throws Exception { //enable backpressure with MAX_PENDING writes in progress - bsConfs.get(0).setMaxAddsInProgressLimit(MAX_PENDING); - bsConfs.get(0).setLedgerStorageClass(SlowSortedLedgerStorage.class.getName()); - bsConfs.get(0).setWriteBufferBytes(data.length); + confByIndex(0).setMaxAddsInProgressLimit(MAX_PENDING); + confByIndex(0).setLedgerStorageClass(SlowSortedLedgerStorage.class.getName()); + confByIndex(0).setWriteBufferBytes(data.length); // one for memtable being flushed, one for the part accepting the data assertTrue("for the test, memtable should not keep more entries than allowed", ENTRIES_IN_MEMTABLE * 2 <= MAX_PENDING); - bsConfs.get(0).setSkipListSizeLimit(data.length * ENTRIES_IN_MEMTABLE - 1); - bsConfs.get(0).setProperty(SlowInterleavedLedgerStorage.PROP_SLOW_STORAGE_ADD_DELAY, "1"); - bsConfs.get(0).setProperty(SlowInterleavedLedgerStorage.PROP_SLOW_STORAGE_FLUSH_DELAY, "10"); + confByIndex(0).setSkipListSizeLimit(data.length * ENTRIES_IN_MEMTABLE - 1); + confByIndex(0).setProperty(SlowInterleavedLedgerStorage.PROP_SLOW_STORAGE_ADD_DELAY, "1"); + confByIndex(0).setProperty(SlowInterleavedLedgerStorage.PROP_SLOW_STORAGE_FLUSH_DELAY, "10"); doWritesWithBackpressure(0); } @@ -275,11 +278,11 @@ public void testWriteWithBackpressureSortedStorage() throws Exception { @Test public void testReadsNoBackpressure() throws Exception { //disable backpressure for reads - bsConfs.get(0).setMaxReadsInProgressLimit(0); - bsConfs.get(0).setLedgerStorageClass(SlowInterleavedLedgerStorage.class.getName()); - bsConfs.get(0).setWriteBufferBytes(data.length); + confByIndex(0).setMaxReadsInProgressLimit(0); + confByIndex(0).setLedgerStorageClass(SlowInterleavedLedgerStorage.class.getName()); + confByIndex(0).setWriteBufferBytes(data.length); - bsConfs.get(0).setProperty(SlowInterleavedLedgerStorage.PROP_SLOW_STORAGE_GET_DELAY, "1"); + confByIndex(0).setProperty(SlowInterleavedLedgerStorage.PROP_SLOW_STORAGE_GET_DELAY, "1"); final BookieRequestProcessor brp = generateDataAndDoReads(0); @@ -290,11 +293,11 @@ public void testReadsNoBackpressure() throws Exception { @Test public void testReadsWithBackpressure() throws Exception { //enable backpressure for reads - bsConfs.get(0).setMaxReadsInProgressLimit(MAX_PENDING); - bsConfs.get(0).setLedgerStorageClass(SlowInterleavedLedgerStorage.class.getName()); - bsConfs.get(0).setWriteBufferBytes(data.length); + confByIndex(0).setMaxReadsInProgressLimit(MAX_PENDING); + confByIndex(0).setLedgerStorageClass(SlowInterleavedLedgerStorage.class.getName()); + confByIndex(0).setWriteBufferBytes(data.length); - bsConfs.get(0).setProperty(SlowInterleavedLedgerStorage.PROP_SLOW_STORAGE_GET_DELAY, "1"); + confByIndex(0).setProperty(SlowInterleavedLedgerStorage.PROP_SLOW_STORAGE_GET_DELAY, "1"); final BookieRequestProcessor brp = generateDataAndDoReads(0); @@ -303,12 +306,12 @@ public void testReadsWithBackpressure() throws Exception { } private BookieRequestProcessor generateDataAndDoReads(final int bkId) throws Exception { - BookieServer bks = bs.get(bkId); - bks.shutdown(); - bks = new BookieServer(bsConfs.get(bkId)); - mockJournal(bks.bookie, getDelay, addDelay, flushDelay); - bks.start(); - bs.set(bkId, bks); + Assert.assertThat("should be only one bookie", + bookieCount(), Matchers.equalTo(1)); + ServerConfiguration conf = killBookie(0); + BookieServer bks = startAndAddBookie(conf, + bookieWithMockedJournal(conf, getDelay, addDelay, flushDelay)) + .getServer(); LOG.info("creating ledgers"); // Create ledgers @@ -344,12 +347,12 @@ private BookieRequestProcessor generateDataAndDoReads(final int bkId) throws Exc // here we expect that backpressure is disabled and number of writes in progress // will exceed the limit private void doWritesNoBackpressure(final int bkId) throws Exception { - BookieServer bks = bs.get(bkId); - bks.shutdown(); - bks = new BookieServer(bsConfs.get(bkId)); - mockJournal(bks.bookie, getDelay, addDelay, flushDelay); - bks.start(); - bs.set(bkId, bks); + Assert.assertThat("should be only one bookie", + bookieCount(), Matchers.equalTo(1)); + ServerConfiguration conf = killBookie(0); + BookieServer bks = startAndAddBookie(conf, + bookieWithMockedJournal(conf, getDelay, addDelay, flushDelay)) + .getServer(); LOG.info("Creating ledgers"); LedgerHandle[] lhs = new LedgerHandle[NUM_OF_LEDGERS]; @@ -389,12 +392,12 @@ private void doWritesNoBackpressure(final int bkId) throws Exception { // here we expect that backpressure is enabled and number of writes in progress // will never exceed the limit private void doWritesWithBackpressure(final int bkId) throws Exception { - BookieServer bks = bs.get(bkId); - bks.shutdown(); - bks = new BookieServer(bsConfs.get(bkId)); - mockJournal(bks.bookie, getDelay, addDelay, flushDelay); - bks.start(); - bs.set(bkId, bks); + Assert.assertThat("should be only one bookie", + bookieCount(), Matchers.equalTo(1)); + ServerConfiguration conf = killBookie(0); + BookieServer bks = startAndAddBookie(conf, + bookieWithMockedJournal(conf, getDelay, addDelay, flushDelay)) + .getServer(); LOG.info("Creating ledgers"); LedgerHandle[] lhs = new LedgerHandle[NUM_OF_LEDGERS]; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/BookieProtoEncodingTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/BookieProtoEncodingTest.java index a7f0c9251ad..4f719ddfc08 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/BookieProtoEncodingTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/BookieProtoEncodingTest.java @@ -19,6 +19,7 @@ package org.apache.bookkeeper.proto; import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.bookkeeper.proto.BookieProtocol.FLAG_NONE; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import static org.mockito.ArgumentMatchers.any; @@ -44,6 +45,7 @@ import org.apache.bookkeeper.proto.BookkeeperProtocol.OperationType; import org.apache.bookkeeper.proto.BookkeeperProtocol.ProtocolVersion; import org.apache.bookkeeper.proto.BookkeeperProtocol.StatusCode; +import org.apache.bookkeeper.util.ByteBufList; import org.junit.Before; import org.junit.Test; @@ -91,7 +93,7 @@ public void testV3ResponseDecoderNoFallback() throws Exception { ResponseEnDeCoderPreV3 v2Encoder = new ResponseEnDeCoderPreV3(registry); ResponseEnDecoderV3 v3Encoder = new ResponseEnDecoderV3(registry); - ResponseDecoder v3Decoder = new ResponseDecoder(registry, false); + ResponseDecoder v3Decoder = new ResponseDecoder(registry, false, false); try { v3Decoder.channelRead(ctx, v2Encoder.encode(v2Resp, UnpooledByteBufAllocator.DEFAULT) @@ -102,9 +104,9 @@ public void testV3ResponseDecoderNoFallback() throws Exception { } assertEquals(0, outList.size()); - v3Decoder.channelRead( - ctx, - v3Encoder.encode(v3Resp, UnpooledByteBufAllocator.DEFAULT)); + ByteBuf serWithFrameSize = (ByteBuf) v3Encoder.encode(v3Resp, UnpooledByteBufAllocator.DEFAULT); + ByteBuf ser = serWithFrameSize.slice(4, serWithFrameSize.readableBytes() - 4); + v3Decoder.channelRead(ctx, ser); assertEquals(1, outList.size()); } @@ -131,4 +133,36 @@ public void testV2RequestDecoderThrowExceptionOnUnknownRequests() throws Excepti v2ReqEncoder.decode((ByteBuf) v3ReqEncoder.encode(v3Req, UnpooledByteBufAllocator.DEFAULT)); } + @Test + public void testV2BatchReadRequest() throws Exception { + RequestEnDeCoderPreV3 v2ReqEncoder = new RequestEnDeCoderPreV3(registry); + BookieProtocol.BatchedReadRequest req = BookieProtocol.BatchedReadRequest.create( + BookieProtocol.CURRENT_PROTOCOL_VERSION, 1L, 1L, FLAG_NONE, null, 1L, 10, 1024L); + ByteBuf buf = (ByteBuf) v2ReqEncoder.encode(req, UnpooledByteBufAllocator.DEFAULT); + buf.readInt(); // Skip the frame size. + BookieProtocol.BatchedReadRequest reqDecoded = (BookieProtocol.BatchedReadRequest) v2ReqEncoder.decode(buf); + assertEquals(req.ledgerId, reqDecoded.ledgerId); + assertEquals(req.entryId, reqDecoded.entryId); + assertEquals(req.maxSize, reqDecoded.maxSize); + assertEquals(req.maxCount, reqDecoded.maxCount); + reqDecoded.recycle(); + } + + @Test + public void testV2BatchReadResponse() throws Exception { + ResponseEnDeCoderPreV3 v2ReqEncoder = new ResponseEnDeCoderPreV3(registry); + ByteBuf first = UnpooledByteBufAllocator.DEFAULT.buffer(4).writeInt(10); + ByteBuf second = UnpooledByteBufAllocator.DEFAULT.buffer(8).writeLong(10L); + ByteBufList data = ByteBufList.get(first, second); + BookieProtocol.BatchedReadResponse res = new BookieProtocol.BatchedReadResponse( + BookieProtocol.CURRENT_PROTOCOL_VERSION, 1, 1L, 1L, 1L, data); + ByteBuf buf = (ByteBuf) v2ReqEncoder.encode(res, UnpooledByteBufAllocator.DEFAULT); + buf.readInt(); // Skip the frame size. + BookieProtocol.BatchedReadResponse resDecoded = (BookieProtocol.BatchedReadResponse) v2ReqEncoder.decode(buf); + assertEquals(res.ledgerId, resDecoded.ledgerId); + assertEquals(res.entryId, resDecoded.entryId); + assertEquals(res.getData().size(), resDecoded.getData().size()); + assertEquals(res.getData().readableBytes(), resDecoded.getData().readableBytes()); + } + } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/ForceLedgerProcessorV3Test.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/ForceLedgerProcessorV3Test.java index 37d4647343d..3bc9cbee427 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/ForceLedgerProcessorV3Test.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/ForceLedgerProcessorV3Test.java @@ -30,11 +30,13 @@ import static org.mockito.Mockito.when; import io.netty.channel.Channel; +import io.netty.channel.ChannelHandlerContext; import io.netty.channel.ChannelPromise; import io.netty.channel.DefaultChannelPromise; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicReference; import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.BookieImpl; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback; import org.apache.bookkeeper.proto.BookkeeperProtocol.BKPacketHeader; import org.apache.bookkeeper.proto.BookkeeperProtocol.ForceLedgerRequest; @@ -54,6 +56,8 @@ public class ForceLedgerProcessorV3Test { private Request request; private ForceLedgerProcessorV3 processor; + + private BookieRequestHandler requestHandler; private Channel channel; private BookieRequestProcessor requestProcessor; private Bookie bookie; @@ -70,18 +74,25 @@ public void setup() { .setLedgerId(System.currentTimeMillis()) .build()) .build(); + + channel = mock(Channel.class); + when(channel.isOpen()).thenReturn(true); + when(channel.isActive()).thenReturn(true); + + requestHandler = mock(BookieRequestHandler.class); + ChannelHandlerContext ctx = mock(ChannelHandlerContext.class); + when(ctx.channel()).thenReturn(channel); + when(requestHandler.ctx()).thenReturn(ctx); + bookie = mock(Bookie.class); requestProcessor = mock(BookieRequestProcessor.class); when(requestProcessor.getBookie()).thenReturn(bookie); when(requestProcessor.getWaitTimeoutOnBackpressureMillis()).thenReturn(-1L); - when(requestProcessor.getForceLedgerStats()) - .thenReturn(NullStatsLogger.INSTANCE.getOpStatsLogger("force_ledger")); - when(requestProcessor.getForceLedgerRequestStats()) - .thenReturn(NullStatsLogger.INSTANCE.getOpStatsLogger("force_ledger_request")); + when(requestProcessor.getRequestStats()).thenReturn(new RequestStats(NullStatsLogger.INSTANCE)); processor = new ForceLedgerProcessorV3( request, - channel, + requestHandler, requestProcessor); } @@ -95,14 +106,14 @@ public void testForceLedger() throws Exception { wc.writeComplete( 0, request.getForceLedgerRequest().getLedgerId(), - Bookie.METAENTRY_ID_FORCE_LEDGER, + BookieImpl.METAENTRY_ID_FORCE_LEDGER, null, null); return null; }).when(bookie).forceLedger( eq(request.getForceLedgerRequest().getLedgerId()), any(WriteCallback.class), - same(channel)); + same(requestHandler)); ChannelPromise promise = new DefaultChannelPromise(channel); AtomicReference writtenObject = new AtomicReference<>(); @@ -117,7 +128,7 @@ public void testForceLedger() throws Exception { verify(bookie, times(1)) .forceLedger(eq(request.getForceLedgerRequest().getLedgerId()), - any(WriteCallback.class), same(channel)); + any(WriteCallback.class), same(requestHandler)); verify(channel, times(1)).writeAndFlush(any(Response.class)); latch.await(); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/GetBookieInfoProcessorV3Test.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/GetBookieInfoProcessorV3Test.java new file mode 100644 index 00000000000..5e986515e92 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/GetBookieInfoProcessorV3Test.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.proto; + +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import io.netty.channel.Channel; +import io.netty.channel.ChannelHandlerContext; +import java.io.IOException; +import java.util.concurrent.TimeUnit; +import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.stats.OpStatsLogger; +import org.junit.Before; +import org.junit.Test; + +/** + * Unit test {@link GetBookieInfoProcessorV3}. + */ +public class GetBookieInfoProcessorV3Test { + + private BookieRequestHandler requestHandler; + private Channel channel; + private BookieRequestProcessor requestProcessor; + private Bookie bookie; + private RequestStats requestStats; + private OpStatsLogger getBookieInfoStats; + private OpStatsLogger channelWriteStats; + private OpStatsLogger getBookieInfoRequestStats; + + @Before + public void setup() { + getBookieInfoStats = mock(OpStatsLogger.class); + channelWriteStats = mock(OpStatsLogger.class); + getBookieInfoRequestStats = mock(OpStatsLogger.class); + requestStats = mock(RequestStats.class); + requestProcessor = mock(BookieRequestProcessor.class); + bookie = mock(Bookie.class); + when(requestProcessor.getBookie()).thenReturn(bookie); + + requestHandler = mock(BookieRequestHandler.class); + + channel = mock(Channel.class); + when(channel.isOpen()).thenReturn(true); + when(channel.isActive()).thenReturn(true); + + ChannelHandlerContext ctx = mock(ChannelHandlerContext.class); + when(ctx.channel()).thenReturn(channel); + when(requestHandler.ctx()).thenReturn(ctx); + + when(requestProcessor.getRequestStats()).thenReturn(requestStats); + when(requestProcessor.getRequestStats().getGetBookieInfoStats()) + .thenReturn(getBookieInfoStats); + when(requestProcessor.getRequestStats().getChannelWriteStats()) + .thenReturn(channelWriteStats); + when(requestProcessor.getRequestStats().getGetBookieInfoRequestStats()) + .thenReturn(getBookieInfoRequestStats); + } + + @Test + public void testGetBookieInfoProcessorStats() throws IOException { + final BookkeeperProtocol.BKPacketHeader.Builder headerBuilder = + BookkeeperProtocol.BKPacketHeader.newBuilder() + .setVersion(BookkeeperProtocol.ProtocolVersion.VERSION_THREE) + .setOperation(BookkeeperProtocol.OperationType.GET_BOOKIE_INFO) + .setTxnId(0); + + final BookkeeperProtocol.GetBookieInfoRequest.Builder getBookieInfoBuilder = + BookkeeperProtocol.GetBookieInfoRequest.newBuilder() + .setRequested(BookkeeperProtocol.GetBookieInfoRequest.Flags.FREE_DISK_SPACE_VALUE); + + final BookkeeperProtocol.Request getBookieInfoRequest = BookkeeperProtocol.Request.newBuilder() + .setHeader(headerBuilder) + .setGetBookieInfoRequest(getBookieInfoBuilder) + .build(); + + GetBookieInfoProcessorV3 getBookieInfo = new GetBookieInfoProcessorV3( + getBookieInfoRequest, requestHandler, requestProcessor); + getBookieInfo.run(); + + // get BookieInfo succeeded. + verify(getBookieInfoStats, times(1)) + .registerSuccessfulEvent(anyLong(), eq(TimeUnit.NANOSECONDS)); + + // get BookieInfo failed. + when(requestProcessor.getBookie().getTotalFreeSpace()).thenThrow(new IOException("test for failed.")); + getBookieInfo.run(); + verify(getBookieInfoStats, times(1)) + .registerFailedEvent(anyLong(), eq(TimeUnit.NANOSECONDS)); + } +} \ No newline at end of file diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/LongPollReadEntryProcessorV3Test.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/LongPollReadEntryProcessorV3Test.java new file mode 100644 index 00000000000..33a4fdc8295 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/LongPollReadEntryProcessorV3Test.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.proto; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.google.protobuf.ByteString; +import io.netty.buffer.Unpooled; +import io.netty.channel.Channel; +import io.netty.channel.ChannelHandlerContext; +import io.netty.util.HashedWheelTimer; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.proto.BookkeeperProtocol.BKPacketHeader; +import org.apache.bookkeeper.proto.BookkeeperProtocol.OperationType; +import org.apache.bookkeeper.proto.BookkeeperProtocol.ProtocolVersion; +import org.apache.bookkeeper.proto.BookkeeperProtocol.ReadRequest; +import org.apache.bookkeeper.proto.BookkeeperProtocol.Request; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + + + + +/** + * Unit test {@link LongPollReadEntryProcessorV3}. + */ +public class LongPollReadEntryProcessorV3Test { + ExecutorService executor; + HashedWheelTimer timer; + + @Before + public void setup() { + executor = Executors.newSingleThreadExecutor(); + timer = new HashedWheelTimer(); + } + + @After + public void teardown() { + timer.stop(); + executor.shutdownNow(); + } + + @Test + public void testWatchIsCancelledOnTimeout() throws Exception { + Request request = Request.newBuilder() + .setHeader(BKPacketHeader.newBuilder() + .setTxnId(System.currentTimeMillis()) + .setVersion(ProtocolVersion.VERSION_THREE) + .setOperation(OperationType.READ_ENTRY) + .build()) + .setReadRequest(ReadRequest.newBuilder() + .setLedgerId(10) + .setEntryId(1) + .setMasterKey(ByteString.copyFrom(new byte[0])) + .setPreviousLAC(0) + .setTimeOut(1) + .build()) + .build(); + + Channel channel = mock(Channel.class); + when(channel.isOpen()).thenReturn(true); + + BookieRequestHandler requestHandler = mock(BookieRequestHandler.class); + ChannelHandlerContext ctx = mock(ChannelHandlerContext.class); + when(ctx.channel()).thenReturn(channel); + when(requestHandler.ctx()).thenReturn(ctx); + + Bookie bookie = mock(Bookie.class); + + BookieRequestProcessor requestProcessor = mock(BookieRequestProcessor.class); + when(requestProcessor.getBookie()).thenReturn(bookie); + when(requestProcessor.getRequestStats()).thenReturn(new RequestStats(NullStatsLogger.INSTANCE)); + + when(bookie.waitForLastAddConfirmedUpdate(anyLong(), anyLong(), any())) + .thenReturn(true); + when(bookie.readEntry(anyLong(), anyLong())).thenReturn(Unpooled.buffer()); + when(bookie.readLastAddConfirmed(anyLong())).thenReturn(Long.valueOf(1)); + + CompletableFuture cancelFuture = new CompletableFuture<>(); + + doAnswer(invocationOnMock -> { + cancelFuture.complete(null); + return null; + }).when(bookie).cancelWaitForLastAddConfirmedUpdate(anyLong(), any()); + + LongPollReadEntryProcessorV3 processor = new LongPollReadEntryProcessorV3( + request, + requestHandler, + requestProcessor, + executor, executor, timer); + + processor.run(); + + cancelFuture.get(10, TimeUnit.SECONDS); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/MockBookieClient.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/MockBookieClient.java index 79cc5ba3f5f..37317317475 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/MockBookieClient.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/MockBookieClient.java @@ -20,202 +20,258 @@ */ package org.apache.bookkeeper.proto; -import static org.apache.bookkeeper.util.SafeRunnable.safeRun; +import static org.apache.bookkeeper.proto.BookieProtocol.FLAG_RECOVERY_ADD; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; - -import java.util.ArrayList; +import io.netty.util.ReferenceCounted; +import java.util.Arrays; import java.util.Collections; import java.util.EnumSet; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.Set; -import java.util.TreeMap; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; -import java.util.function.Consumer; - +import lombok.Getter; import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.client.api.WriteFlag; +import org.apache.bookkeeper.common.concurrent.FutureUtils; import org.apache.bookkeeper.common.util.OrderedExecutor; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ForceLedgerCallback; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.FutureGetListOfEntriesOfLedger; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GetBookieInfoCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadEntryCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadLacCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteLacCallback; +import org.apache.bookkeeper.util.AvailabilityOfEntriesOfLedger; import org.apache.bookkeeper.util.ByteBufList; -import org.apache.bookkeeper.util.SafeRunnable; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; + /** * Mock implementation of BookieClient. */ public class MockBookieClient implements BookieClient { static final Logger LOG = LoggerFactory.getLogger(MockBookieClient.class); + @Getter final OrderedExecutor executor; - final ConcurrentHashMap> data = new ConcurrentHashMap<>(); - final Set errorBookies = - Collections.newSetFromMap(new ConcurrentHashMap()); + final MockBookies mockBookies; + final Set errorBookies = + Collections.newSetFromMap(new ConcurrentHashMap<>()); + + /** + * Runs before or after an operation. Can stall the operation or error it. + */ + public interface Hook { + CompletableFuture runHook(BookieId bookie, long ledgerId, long entryId); + } - final Map stalledBookies = new HashMap<>(); - final Map>> stalledRequests = new HashMap<>(); + /** + * Runs before or after an operation. Can stall the operation or error it. + */ + public interface BatchHook { + CompletableFuture runHook(BookieId bookie, long ledgerId, long startEntryId, int maxCount, long maxSize); + } + + private Hook preReadHook = (bookie, ledgerId, entryId) -> FutureUtils.value(null); + private Hook postReadHook = (bookie, ledgerId, entryId) -> FutureUtils.value(null); + private Hook preWriteHook = (bookie, ledgerId, entryId) -> FutureUtils.value(null); + private Hook postWriteHook = (bookie, ledgerId, entryId) -> FutureUtils.value(null); + private BatchHook preBatchReadHook = (bookie, ledgerId, startEntryId, maxCount, maxSize) -> FutureUtils.value(null); + private BatchHook postBatchReadHook = (bookie, ledgerId, startEntryId, maxCount, maxSize) -> FutureUtils.value( + null); public MockBookieClient(OrderedExecutor executor) { this.executor = executor; + this.mockBookies = new MockBookies(); } - public void stallBookie(BookieSocketAddress bookie) { - synchronized (this) { - stalledBookies.put(bookie, true); - } + public MockBookieClient(OrderedExecutor executor, + MockBookies mockBookies) { + this.executor = executor; + this.mockBookies = mockBookies; } - public void releaseStalledBookie(BookieSocketAddress bookie, int rc) { - synchronized (this) { - stalledBookies.remove(bookie); - stalledRequests.remove(bookie).forEach((r) -> r.accept(rc)); - } + public void setPreReadHook(Hook hook) { + this.preReadHook = hook; } - public void errorBookies(BookieSocketAddress... bookies) { - for (BookieSocketAddress b : bookies) { - errorBookies.add(b); - } + public void setPostReadHook(Hook hook) { + this.postReadHook = hook; + } + + public void setPreWriteHook(Hook hook) { + this.preWriteHook = hook; + } + + public void setPostWriteHook(Hook hook) { + this.postWriteHook = hook; } - public void removeErrors(BookieSocketAddress... bookies) { - for (BookieSocketAddress b : bookies) { + public void errorBookies(BookieId... bookies) { + errorBookies.addAll(Arrays.asList(bookies)); + } + + public void removeErrors(BookieId... bookies) { + for (BookieId b : bookies) { errorBookies.remove(b); } } + public boolean isErrored(BookieId bookieId) { + return errorBookies.contains(bookieId); + } + + public MockBookies getMockBookies() { + return mockBookies; + } + @Override - public List getFaultyBookies() { + public List getFaultyBookies() { return Collections.emptyList(); } @Override - public boolean isWritable(BookieSocketAddress address, long ledgerId) { + public boolean isWritable(BookieId address, long ledgerId) { return true; } @Override - public long getNumPendingRequests(BookieSocketAddress address, long ledgerId) { + public long getNumPendingRequests(BookieId address, long ledgerId) { return 0; } @Override - public void forceLedger(BookieSocketAddress addr, long ledgerId, + public void forceLedger(BookieId addr, long ledgerId, ForceLedgerCallback cb, Object ctx) { executor.executeOrdered(ledgerId, - safeRun(() -> { - cb.forceLedgerComplete(BKException.Code.IllegalOpException, - ledgerId, addr, ctx); - })); + () -> cb.forceLedgerComplete(BKException.Code.IllegalOpException, ledgerId, addr, ctx)); } @Override - public void writeLac(BookieSocketAddress addr, long ledgerId, byte[] masterKey, + public void writeLac(BookieId addr, long ledgerId, byte[] masterKey, long lac, ByteBufList toSend, WriteLacCallback cb, Object ctx) { executor.executeOrdered(ledgerId, - safeRun(() -> { - cb.writeLacComplete(BKException.Code.IllegalOpException, - ledgerId, addr, ctx); - })); + () -> cb.writeLacComplete(BKException.Code.IllegalOpException, ledgerId, addr, ctx)); } @Override - public void addEntry(BookieSocketAddress addr, long ledgerId, byte[] masterKey, - long entryId, ByteBufList toSend, WriteCallback cb, Object ctx, + public void addEntry(BookieId addr, long ledgerId, byte[] masterKey, + long entryId, ReferenceCounted toSend, WriteCallback cb, Object ctx, int options, boolean allowFastFail, EnumSet writeFlags) { - SafeRunnable write = safeRun(() -> { - LOG.info("[{};L{}] write entry {}", addr, ledgerId, entryId); - if (errorBookies.contains(addr)) { - LOG.warn("[{};L{}] erroring write {}", addr, ledgerId, entryId); - cb.writeComplete(BKException.Code.WriteException, ledgerId, entryId, addr, ctx); - return; - } - LedgerData ledger = getBookieData(addr).computeIfAbsent(ledgerId, LedgerData::new); - ledger.addEntry(entryId, copyData(toSend)); - cb.writeComplete(BKException.Code.OK, ledgerId, entryId, addr, ctx); - toSend.release(); - }); - toSend.retain(); - synchronized (this) { - if (stalledBookies.getOrDefault(addr, false)) { - LOG.info("[{};{};{}] Stalling write {}", addr, ledgerId, System.identityHashCode(write), entryId); - stalledRequests.computeIfAbsent(addr, (key) -> new ArrayList<>()) - .add((rc) -> { - LOG.info("[{};{};{}] Unstalled write {}", - addr, ledgerId, System.identityHashCode(write), entryId); - if (rc == BKException.Code.OK) { - executor.executeOrdered(ledgerId, write); - } else { - executor.executeOrdered( - ledgerId, safeRun(() -> { - cb.writeComplete(rc, ledgerId, entryId, addr, ctx); - toSend.release(); - })); + preWriteHook.runHook(addr, ledgerId, entryId) + .thenComposeAsync( + (ignore) -> { + LOG.info("[{};L{}] write entry {}", addr, ledgerId, entryId); + if (isErrored(addr)) { + LOG.warn("[{};L{}] erroring write {}", addr, ledgerId, entryId); + return FutureUtils.exception(new BKException.BKWriteException()); } - }); - } else { - executor.executeOrdered(ledgerId, write); - } - } + + try { + if ((options & FLAG_RECOVERY_ADD) == FLAG_RECOVERY_ADD) { + mockBookies.recoveryAddEntry(addr, ledgerId, entryId, copyData(toSend)); + } else { + mockBookies.addEntry(addr, ledgerId, entryId, copyData(toSend)); + } + } catch (BKException bke) { + return FutureUtils.exception(bke); + } finally { + toSend.release(); + } + + return FutureUtils.value(null); + }, executor.chooseThread(ledgerId)) + .thenCompose((res) -> postWriteHook.runHook(addr, ledgerId, entryId)) + .whenCompleteAsync((res, ex) -> { + if (ex != null) { + cb.writeComplete(BKException.getExceptionCode(ex, BKException.Code.WriteException), + ledgerId, entryId, addr, ctx); + } else { + cb.writeComplete(BKException.Code.OK, ledgerId, entryId, addr, ctx); + } + }, executor.chooseThread(ledgerId)); } @Override - public void readLac(BookieSocketAddress addr, long ledgerId, ReadLacCallback cb, Object ctx) { + public void readLac(BookieId addr, long ledgerId, ReadLacCallback cb, Object ctx) { executor.executeOrdered(ledgerId, - safeRun(() -> { - cb.readLacComplete(BKException.Code.IllegalOpException, - ledgerId, null, null, ctx); - })); + () -> cb.readLacComplete(BKException.Code.IllegalOpException, ledgerId, null, null, ctx)); } @Override - public void readEntry(BookieSocketAddress addr, long ledgerId, long entryId, + public void readEntry(BookieId addr, long ledgerId, long entryId, ReadEntryCallback cb, Object ctx, int flags, byte[] masterKey, boolean allowFastFail) { - executor.executeOrdered(ledgerId, - safeRun(() -> { - LOG.info("[{};L{}] read entry {}", addr, ledgerId, entryId); - if (errorBookies.contains(addr)) { - LOG.warn("[{};L{}] erroring read {}", addr, ledgerId, entryId); - cb.readEntryComplete(BKException.Code.ReadException, ledgerId, entryId, null, ctx); - return; - } - - LedgerData ledger = getBookieData(addr).get(ledgerId); - if (ledger == null) { - LOG.warn("[{};L{}] ledger not found", addr, ledgerId); - cb.readEntryComplete(BKException.Code.NoSuchLedgerExistsException, - ledgerId, entryId, null, ctx); - return; - } - - ByteBuf entry = ledger.getEntry(entryId); - if (entry == null) { - LOG.warn("[{};L{}] entry({}) not found", addr, ledgerId, entryId); - cb.readEntryComplete(BKException.Code.NoSuchEntryException, - ledgerId, entryId, null, ctx); - return; - } - + preReadHook.runHook(addr, ledgerId, entryId) + .thenComposeAsync((res) -> { + LOG.info("[{};L{}] read entry {}", addr, ledgerId, entryId); + if (isErrored(addr)) { + LOG.warn("[{};L{}] erroring read {}", addr, ledgerId, entryId); + return FutureUtils.exception(new BKException.BKReadException()); + } + + try { + ByteBuf entry = mockBookies.readEntry(addr, flags, ledgerId, entryId); + return FutureUtils.value(entry); + } catch (BKException bke) { + return FutureUtils.exception(bke); + } + }, executor.chooseThread(ledgerId)) + .thenCompose((buf) -> postReadHook.runHook(addr, ledgerId, entryId).thenApply((res) -> buf)) + .whenCompleteAsync((res, ex) -> { + if (ex != null) { + cb.readEntryComplete(BKException.getExceptionCode(ex, BKException.Code.ReadException), + ledgerId, entryId, null, ctx); + } else { cb.readEntryComplete(BKException.Code.OK, - ledgerId, entryId, entry.slice(), ctx); - })); + ledgerId, entryId, res.slice(), ctx); + } + }, executor.chooseThread(ledgerId)); } @Override - public void readEntryWaitForLACUpdate(BookieSocketAddress addr, + public void batchReadEntries(BookieId addr, long ledgerId, long startEntryId, int maxCount, long maxSize, + BookkeeperInternalCallbacks.BatchedReadEntryCallback cb, Object ctx, int flags, byte[] masterKey, + boolean allowFastFail) { + preBatchReadHook.runHook(addr, ledgerId, startEntryId, maxCount, maxSize) + .thenComposeAsync((res) -> { + LOG.info("[{};L{}] batch read entries startEntryId:{} maxCount:{} maxSize:{}", + addr, ledgerId, startEntryId, maxCount, maxSize); + if (isErrored(addr)) { + LOG.warn("[{};L{}] erroring batch read entries startEntryId:{} maxCount:{} maxSize:{}", + addr, ledgerId, startEntryId, maxCount, maxSize); + return FutureUtils.exception(new BKException.BKReadException()); + } + + try { + ByteBufList data = mockBookies.batchReadEntries(addr, flags, ledgerId, startEntryId, + maxCount, maxSize); + return FutureUtils.value(data); + } catch (BKException bke) { + return FutureUtils.exception(bke); + } + }, executor.chooseThread(ledgerId)) + .thenCompose((buf) -> postBatchReadHook.runHook(addr, ledgerId, startEntryId, maxCount, maxSize) + .thenApply((res) -> buf)) + .whenCompleteAsync((res, ex) -> { + if (ex != null) { + cb.readEntriesComplete(BKException.getExceptionCode(ex, BKException.Code.ReadException), + ledgerId, startEntryId, null, ctx); + } else { + cb.readEntriesComplete(BKException.Code.OK, + ledgerId, startEntryId, res, ctx); + } + }, executor.chooseThread(ledgerId)); + } + + @Override + public void readEntryWaitForLACUpdate(BookieId addr, long ledgerId, long entryId, long previousLAC, @@ -224,20 +280,25 @@ public void readEntryWaitForLACUpdate(BookieSocketAddress addr, ReadEntryCallback cb, Object ctx) { executor.executeOrdered(ledgerId, - safeRun(() -> { - cb.readEntryComplete(BKException.Code.IllegalOpException, - ledgerId, entryId, null, ctx); - })); + () -> cb.readEntryComplete(BKException.Code.IllegalOpException, ledgerId, entryId, null, ctx)); } @Override - public void getBookieInfo(BookieSocketAddress addr, long requested, + public void getBookieInfo(BookieId addr, long requested, GetBookieInfoCallback cb, Object ctx) { executor.executeOrdered(addr, - safeRun(() -> { - cb.getBookieInfoComplete(BKException.Code.IllegalOpException, - null, ctx); - })); + () -> cb.getBookieInfoComplete(BKException.Code.IllegalOpException, null, ctx)); + } + + @Override + public CompletableFuture getListOfEntriesOfLedger(BookieId address, + long ledgerId) { + FutureGetListOfEntriesOfLedger futureResult = new FutureGetListOfEntriesOfLedger(ledgerId); + executor.executeOrdered(address, () -> + futureResult.completeExceptionally( + BKException.create(BKException.Code.IllegalOpException).fillInStackTrace()) + ); + return futureResult; } @Override @@ -249,40 +310,29 @@ public boolean isClosed() { public void close() { } - private ConcurrentHashMap getBookieData(BookieSocketAddress addr) { - return data.computeIfAbsent(addr, (key) -> new ConcurrentHashMap<>()); - } - - private static ByteBuf copyData(ByteBufList list) { - ByteBuf buf = Unpooled.buffer(list.readableBytes()); - for (int i = 0; i < list.size(); i++) { - buf.writeBytes(list.getBuffer(i).slice()); + public static ByteBuf copyData(ReferenceCounted rc) { + ByteBuf res; + if (rc instanceof ByteBuf) { + res = Unpooled.copiedBuffer((ByteBuf) rc); + } else { + res = ByteBufList.coalesce((ByteBufList) rc); } - return buf; + + return res; } - private static class LedgerData { - final long ledgerId; - private TreeMap entries = new TreeMap<>(); - LedgerData(long ledgerId) { - this.ledgerId = ledgerId; + public static ByteBuf copyDataWithSkipHeader(ReferenceCounted rc) { + ByteBuf res; + if (rc instanceof ByteBuf) { + res = Unpooled.copiedBuffer((ByteBuf) rc); + } else { + res = ByteBufList.coalesce((ByteBufList) rc); } - void addEntry(long entryId, ByteBuf entry) { - entries.put(entryId, entry); - } + // Skip headers + res.skipBytes(28); + rc.release(); - ByteBuf getEntry(long entryId) { - if (entryId == BookieProtocol.LAST_ADD_CONFIRMED) { - Map.Entry lastEntry = entries.lastEntry(); - if (lastEntry != null) { - return lastEntry.getValue(); - } else { - return null; - } - } else { - return entries.get(entryId); - } - } + return res; } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/MockBookies.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/MockBookies.java new file mode 100644 index 00000000000..ac338b9757d --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/MockBookies.java @@ -0,0 +1,165 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.proto; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import io.netty.buffer.UnpooledByteBufAllocator; +import java.util.List; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.BiPredicate; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.DistributionSchedule; +import org.apache.bookkeeper.client.RoundRobinDistributionSchedule; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.checksum.DigestManager; +import org.apache.bookkeeper.util.ByteBufList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Mocks an ensemble of bookies and can be shared between more than one MockBookieClient + * so that it can be used to check two writers accessing the same ledger. + */ +public class MockBookies { + static final Logger LOG = LoggerFactory.getLogger(MockBookies.class); + final ConcurrentHashMap> data = new ConcurrentHashMap<>(); + + public void seedLedgerForBookie(BookieId bookieId, long ledgerId, + LedgerMetadata metadata) throws Exception { + seedLedgerBase(ledgerId, metadata, (bookie, entry) -> bookie.equals(bookieId)); + } + + public void seedLedger(long ledgerId, LedgerMetadata metadata) throws Exception { + seedLedgerBase(ledgerId, metadata, (bookie, entry) -> true); + } + + public void seedLedgerBase(long ledgerId, LedgerMetadata metadata, + BiPredicate shouldSeed) throws Exception { + DistributionSchedule schedule = new RoundRobinDistributionSchedule(metadata.getWriteQuorumSize(), + metadata.getAckQuorumSize(), + metadata.getEnsembleSize()); + long lastEntry = metadata.isClosed() + ? metadata.getLastEntryId() : metadata.getAllEnsembles().lastEntry().getKey() - 1; + long lac = -1; + for (long e = 0; e <= lastEntry; e++) { + List ensemble = metadata.getEnsembleAt(e); + DistributionSchedule.WriteSet ws = schedule.getWriteSet(e); + for (int i = 0; i < ws.size(); i++) { + BookieId bookieId = ensemble.get(ws.get(i)); + if (shouldSeed.test(bookieId, e)) { + seedEntries(bookieId, ledgerId, e, lac); + } + } + lac = e; + } + } + + public void seedEntries(BookieId bookieId, long ledgerId, long entryId, long lac) throws Exception { + ByteBuf entry = generateEntry(ledgerId, entryId, lac); + MockLedgerData ledger = getBookieData(bookieId).computeIfAbsent(ledgerId, MockLedgerData::new); + ledger.addEntry(entryId, entry); + } + + public ByteBuf generateEntry(long ledgerId, long entryId, long lac) throws Exception { + DigestManager digestManager = DigestManager.instantiate(ledgerId, new byte[0], + DataFormats.LedgerMetadataFormat.DigestType.CRC32C, + UnpooledByteBufAllocator.DEFAULT, false); + return ByteBufList.coalesce((ByteBufList) digestManager.computeDigestAndPackageForSending( + entryId, lac, 0, Unpooled.buffer(10), new byte[20], 0)); + + } + + public void addEntry(BookieId bookieId, long ledgerId, long entryId, ByteBuf entry) throws BKException { + MockLedgerData ledger = getBookieData(bookieId).computeIfAbsent(ledgerId, MockLedgerData::new); + if (ledger.isFenced()) { + throw new BKException.BKLedgerFencedException(); + } + ledger.addEntry(entryId, entry); + } + + public void recoveryAddEntry(BookieId bookieId, long ledgerId, long entryId, ByteBuf entry) throws BKException { + MockLedgerData ledger = getBookieData(bookieId).computeIfAbsent(ledgerId, MockLedgerData::new); + ledger.addEntry(entryId, entry); + } + + public ByteBuf readEntry(BookieId bookieId, int flags, long ledgerId, long entryId) throws BKException { + MockLedgerData ledger = getBookieData(bookieId).get(ledgerId); + + if (ledger == null) { + LOG.warn("[{};L{}] ledger not found", bookieId, ledgerId); + throw new BKException.BKNoSuchLedgerExistsException(); + } + + if ((flags & BookieProtocol.FLAG_DO_FENCING) == BookieProtocol.FLAG_DO_FENCING) { + ledger.fence(); + } + + ByteBuf entry = ledger.getEntry(entryId); + if (entry == null) { + LOG.warn("[{};L{}] entry({}) not found", bookieId, ledgerId, entryId); + throw new BKException.BKNoSuchEntryException(); + } + + return entry; + } + + public ByteBufList batchReadEntries(BookieId bookieId, int flags, long ledgerId, long startEntryId, + int maxCount, long maxSize) throws BKException { + MockLedgerData ledger = getBookieData(bookieId).get(ledgerId); + + if (ledger == null) { + LOG.warn("[{};L{}] ledger not found", bookieId, ledgerId); + throw new BKException.BKNoSuchLedgerExistsException(); + } + + if ((flags & BookieProtocol.FLAG_DO_FENCING) == BookieProtocol.FLAG_DO_FENCING) { + ledger.fence(); + } + //Refer: BatchedReadEntryProcessor.readData + ByteBufList data = null; + if (maxCount <= 0) { + maxCount = Integer.MAX_VALUE; + } + long frameSize = 24 + 8 + 4; + for (long i = startEntryId; i < startEntryId + maxCount; i++) { + ByteBuf entry = ledger.getEntry(i); + frameSize += entry.readableBytes() + 4; + if (data == null) { + data = ByteBufList.get(entry); + } else { + if (frameSize > maxSize) { + entry.release(); + break; + } + data.add(entry); + } + } + return data; + } + + public ConcurrentHashMap getBookieData(BookieId bookieId) { + return data.computeIfAbsent(bookieId, (key) -> new ConcurrentHashMap<>()); + } + + +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/MockLedgerData.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/MockLedgerData.java new file mode 100644 index 00000000000..99f27d12284 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/MockLedgerData.java @@ -0,0 +1,63 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.proto; + +import io.netty.buffer.ByteBuf; +import java.util.Map; +import java.util.TreeMap; + +/** + Mock ledger data. + */ +public class MockLedgerData { + final long ledgerId; + boolean isFenced; + private TreeMap entries = new TreeMap<>(); + + MockLedgerData(long ledgerId) { + this.ledgerId = ledgerId; + } + + boolean isFenced() { + return isFenced; + } + + void fence() { + isFenced = true; + } + + void addEntry(long entryId, ByteBuf entry) { + entries.put(entryId, entry); + } + + ByteBuf getEntry(long entryId) { + if (entryId == BookieProtocol.LAST_ADD_CONFIRMED) { + Map.Entry lastEntry = entries.lastEntry(); + if (lastEntry != null) { + return lastEntry.getValue(); + } else { + return null; + } + } else { + return entries.get(entryId); + } + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/NetworkLessBookieTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/NetworkLessBookieTest.java index 0c3e7b5dcd3..ec848578c4a 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/NetworkLessBookieTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/NetworkLessBookieTest.java @@ -25,7 +25,6 @@ import io.netty.channel.Channel; import io.netty.channel.local.LocalChannel; - import org.apache.bookkeeper.client.BookKeeper; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.client.LedgerHandle; @@ -34,7 +33,7 @@ import org.junit.Test; /** - * Tests of the main BookKeeper client using networkless comunication. + * Tests of the main BookKeeper client using networkless communication. */ public class NetworkLessBookieTest extends BookKeeperClusterTestCase { @@ -56,8 +55,8 @@ public void testUseLocalBookie() throws Exception { } } - for (BookieServer bk : bs) { - for (Channel channel : bk.nettyServer.allChannels) { + for (int i = 0; i < bookieCount(); i++) { + for (Channel channel : serverByIndex(i).nettyServer.allChannels) { if (!(channel instanceof LocalChannel)) { fail(); } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/ReadEntryProcessorTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/ReadEntryProcessorTest.java new file mode 100644 index 00000000000..251f900c096 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/ReadEntryProcessorTest.java @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.proto; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import io.netty.channel.Channel; +import io.netty.channel.ChannelHandlerContext; +import io.netty.channel.ChannelPromise; +import io.netty.channel.DefaultChannelPromise; +import io.netty.channel.EventLoop; +import java.io.IOException; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.atomic.AtomicReference; +import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.proto.BookieProtocol.ReadRequest; +import org.apache.bookkeeper.proto.BookieProtocol.Response; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.junit.Before; +import org.junit.Test; + +/** + * Unit test {@link ReadEntryProcessor}. + */ +public class ReadEntryProcessorTest { + + private Channel channel; + private BookieRequestHandler requestHandler; + private BookieRequestProcessor requestProcessor; + private Bookie bookie; + + @Before + public void setup() throws IOException, BookieException { + channel = mock(Channel.class); + when(channel.isOpen()).thenReturn(true); + + requestHandler = mock(BookieRequestHandler.class); + ChannelHandlerContext ctx = mock(ChannelHandlerContext.class); + when(ctx.channel()).thenReturn(channel); + when(requestHandler.ctx()).thenReturn(ctx); + + bookie = mock(Bookie.class); + requestProcessor = mock(BookieRequestProcessor.class); + when(requestProcessor.getBookie()).thenReturn(bookie); + when(requestProcessor.getWaitTimeoutOnBackpressureMillis()).thenReturn(-1L); + when(requestProcessor.getRequestStats()).thenReturn(new RequestStats(NullStatsLogger.INSTANCE)); + when(channel.voidPromise()).thenReturn(mock(ChannelPromise.class)); + when(channel.writeAndFlush(any())).thenReturn(mock(ChannelPromise.class)); + + EventLoop eventLoop = mock(EventLoop.class); + when(eventLoop.inEventLoop()).thenReturn(true); + when(channel.eventLoop()).thenReturn(eventLoop); + } + + @Test + public void testSuccessfulAsynchronousFenceRequest() throws Exception { + testAsynchronousRequest(true, BookieProtocol.EOK); + } + + @Test + public void testFailedAsynchronousFenceRequest() throws Exception { + testAsynchronousRequest(false, BookieProtocol.EIO); + } + + private void testAsynchronousRequest(boolean result, int errorCode) throws Exception { + CompletableFuture fenceResult = FutureUtils.createFuture(); + when(bookie.fenceLedger(anyLong(), any())).thenReturn(fenceResult); + + ChannelPromise promise = new DefaultChannelPromise(channel); + AtomicReference writtenObject = new AtomicReference<>(); + CountDownLatch latch = new CountDownLatch(1); + doAnswer(invocationOnMock -> { + writtenObject.set(invocationOnMock.getArgument(0)); + promise.setSuccess(); + latch.countDown(); + return promise; + }).when(channel).writeAndFlush(any(Response.class)); + + ExecutorService service = Executors.newCachedThreadPool(); + long ledgerId = System.currentTimeMillis(); + ReadRequest request = ReadRequest.create(BookieProtocol.CURRENT_PROTOCOL_VERSION, ledgerId, + 1, BookieProtocol.FLAG_DO_FENCING, new byte[]{}); + ReadEntryProcessor processor = ReadEntryProcessor.create( + request, requestHandler, requestProcessor, service, true); + processor.run(); + + fenceResult.complete(result); + latch.await(); + verify(channel, times(1)).writeAndFlush(any(Response.class)); + + assertTrue(writtenObject.get() instanceof Response); + Response response = (Response) writtenObject.get(); + assertEquals(1, response.getEntryId()); + assertEquals(ledgerId, response.getLedgerId()); + assertEquals(BookieProtocol.READENTRY, response.getOpCode()); + assertEquals(errorCode, response.getErrorCode()); + service.shutdown(); + } + + @Test + public void testSuccessfulSynchronousFenceRequest() throws Exception { + testSynchronousRequest(true, BookieProtocol.EOK); + } + + @Test + public void testFailedSynchronousFenceRequest() throws Exception { + testSynchronousRequest(false, BookieProtocol.EIO); + } + + private void testSynchronousRequest(boolean result, int errorCode) throws Exception { + CompletableFuture fenceResult = FutureUtils.createFuture(); + when(bookie.fenceLedger(anyLong(), any())).thenReturn(fenceResult); + ChannelPromise promise = new DefaultChannelPromise(channel); + AtomicReference writtenObject = new AtomicReference<>(); + CountDownLatch latch = new CountDownLatch(1); + doAnswer(invocationOnMock -> { + writtenObject.set(invocationOnMock.getArgument(0)); + promise.setSuccess(); + latch.countDown(); + return promise; + }).when(channel).writeAndFlush(any(Response.class)); + + long ledgerId = System.currentTimeMillis(); + ReadRequest request = ReadRequest.create(BookieProtocol.CURRENT_PROTOCOL_VERSION, ledgerId, + 1, BookieProtocol.FLAG_DO_FENCING, new byte[]{}); + ReadEntryProcessor processor = ReadEntryProcessor.create(request, requestHandler, requestProcessor, null, true); + fenceResult.complete(result); + processor.run(); + + latch.await(); + verify(channel, times(1)).writeAndFlush(any(Response.class)); + + assertTrue(writtenObject.get() instanceof Response); + Response response = (Response) writtenObject.get(); + assertEquals(1, response.getEntryId()); + assertEquals(ledgerId, response.getLedgerId()); + assertEquals(BookieProtocol.READENTRY, response.getOpCode()); + assertEquals(errorCode, response.getErrorCode()); + } + + @Test + public void testNonFenceRequest() throws Exception { + ChannelPromise promise = new DefaultChannelPromise(channel); + AtomicReference writtenObject = new AtomicReference<>(); + CountDownLatch latch = new CountDownLatch(1); + doAnswer(invocationOnMock -> { + writtenObject.set(invocationOnMock.getArgument(0)); + promise.setSuccess(); + latch.countDown(); + return promise; + }).when(channel).writeAndFlush(any(Response.class)); + + long ledgerId = System.currentTimeMillis(); + ReadRequest request = ReadRequest.create(BookieProtocol.CURRENT_PROTOCOL_VERSION, ledgerId, + 1, (short) 0, new byte[]{}); + ReadEntryProcessor processor = ReadEntryProcessor.create(request, requestHandler, requestProcessor, null, true); + processor.run(); + + latch.await(); + verify(channel, times(1)).writeAndFlush(any(Response.class)); + + assertTrue(writtenObject.get() instanceof Response); + Response response = (Response) writtenObject.get(); + assertEquals(1, response.getEntryId()); + assertEquals(ledgerId, response.getLedgerId()); + assertEquals(BookieProtocol.READENTRY, response.getOpCode()); + assertEquals(BookieProtocol.EOK, response.getErrorCode()); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/TestBKStats.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/TestBKStats.java index fa13a13453e..c6b82f02f3b 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/TestBKStats.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/TestBKStats.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -31,7 +31,7 @@ public class TestBKStats { /** - * Tests that updatLatency should not fail with + * Tests that updateLatency should not fail with * ArrayIndexOutOfBoundException when latency time coming as negative. */ @Test diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/TestBackwardCompatCMS42.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/TestBackwardCompatCMS42.java index a07acef811a..1edd74c1fc6 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/TestBackwardCompatCMS42.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/TestBackwardCompatCMS42.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -26,21 +26,19 @@ import com.google.protobuf.ByteString; import com.google.protobuf.ExtensionRegistry; - import io.netty.channel.Channel; import io.netty.channel.ChannelFuture; import io.netty.channel.EventLoopGroup; import io.netty.channel.nio.NioEventLoopGroup; - import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.CountDownLatch; - import org.apache.bookkeeper.auth.AuthProviderFactoryFactory; import org.apache.bookkeeper.auth.ClientAuthProvider; import org.apache.bookkeeper.auth.TestAuth; import org.apache.bookkeeper.common.util.OrderedExecutor; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.proto.BookieProtocol.AuthRequest; import org.apache.bookkeeper.proto.BookieProtocol.AuthResponse; @@ -87,7 +85,7 @@ public void testAuthSingleMessage() throws Exception { builder.setPayload(ByteString.copyFrom(PAYLOAD_MESSAGE)); final AuthMessage authMessage = builder.build(); - CompatClient42 client = newCompatClient(bookie1.getLocalAddress()); + CompatClient42 client = newCompatClient(bookie1.getBookieId()); Request request = new AuthRequest(BookieProtocol.CURRENT_PROTOCOL_VERSION, authMessage); client.sendRequest(request); @@ -108,7 +106,7 @@ public void testAuthMultiMessage() throws Exception { .setAuthPluginName(TestAuth.TEST_AUTH_PROVIDER_PLUGIN_NAME); builder.setPayload(ByteString.copyFrom(PAYLOAD_MESSAGE)); final AuthMessage authMessage = builder.build(); - CompatClient42 client = newCompatClient(bookie1.getLocalAddress()); + CompatClient42 client = newCompatClient(bookie1.getBookieId()); Request request = new AuthRequest(BookieProtocol.CURRENT_PROTOCOL_VERSION, authMessage); for (int i = 0; i < 3; i++) { @@ -141,7 +139,7 @@ public void testAuthFail() throws Exception { .setAuthPluginName(TestAuth.TEST_AUTH_PROVIDER_PLUGIN_NAME); builder.setPayload(ByteString.copyFrom(PAYLOAD_MESSAGE)); final AuthMessage authMessage = builder.build(); - CompatClient42 client = newCompatClient(bookie1.getLocalAddress()); + CompatClient42 client = newCompatClient(bookie1.getBookieId()); Request request = new AuthRequest(BookieProtocol.CURRENT_PROTOCOL_VERSION, authMessage); for (int i = 0; i < 3; i++) { @@ -163,8 +161,9 @@ public void testAuthFail() throws Exception { } - client.sendRequest(new ReadRequest(BookieProtocol.CURRENT_PROTOCOL_VERSION, - 1L, 1L, (short) 0, null)); + ReadRequest read = ReadRequest.create(BookieProtocol.CURRENT_PROTOCOL_VERSION, + 1L, 1L, (short) 0, null); + client.sendRequest(read); Response response = client.takeResponse(); assertEquals("Should have failed", response.getErrorCode(), BookieProtocol.EUA); @@ -172,13 +171,10 @@ public void testAuthFail() throws Exception { // copy from TestAuth BookieServer startAndStoreBookie(ServerConfiguration conf) throws Exception { - bsConfs.add(conf); - BookieServer s = startBookie(conf); - bs.add(s); - return s; + return startAndAddBookie(conf).getServer(); } - CompatClient42 newCompatClient(BookieSocketAddress addr) throws Exception { + CompatClient42 newCompatClient(BookieId addr) throws Exception { ClientConfiguration conf = new ClientConfiguration(); conf.setUseV2WireProtocol(true); return new CompatClient42(conf, executor, eventLoopGroup, addr, authProvider, extRegistry); @@ -193,18 +189,18 @@ class CompatClient42 extends PerChannelBookieClient { CompatClient42(ClientConfiguration conf, OrderedExecutor executor, EventLoopGroup eventLoopGroup, - BookieSocketAddress addr, + BookieId addr, ClientAuthProvider.Factory authProviderFactory, ExtensionRegistry extRegistry) throws Exception { - super( - conf, + super(conf, executor, eventLoopGroup, addr, NullStatsLogger.INSTANCE, authProviderFactory, extRegistry, - null); + null, + BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); state = ConnectionState.CONNECTING; ChannelFuture future = connect(); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/TestBookieRequestProcessor.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/TestBookieRequestProcessor.java index 4ebe01cfccb..46304023433 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/TestBookieRequestProcessor.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/TestBookieRequestProcessor.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -27,8 +27,14 @@ import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import com.google.protobuf.ByteString; +import io.netty.buffer.UnpooledByteBufAllocator; +import io.netty.channel.Channel; +import io.netty.channel.ChannelHandlerContext; +import io.netty.channel.group.ChannelGroup; +import io.netty.channel.group.DefaultChannelGroup; import org.apache.bookkeeper.bookie.Bookie; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.proto.BookkeeperProtocol.AddRequest; @@ -49,12 +55,15 @@ public class TestBookieRequestProcessor { final BookieRequestProcessor requestProcessor = mock(BookieRequestProcessor.class); + private final ChannelGroup channelGroup = new DefaultChannelGroup(null); + @Test public void testConstructLongPollThreads() throws Exception { // long poll threads == read threads ServerConfiguration conf = new ServerConfiguration(); try (BookieRequestProcessor processor = new BookieRequestProcessor( - conf, mock(Bookie.class), NullStatsLogger.INSTANCE, null)) { + conf, mock(Bookie.class), NullStatsLogger.INSTANCE, null, UnpooledByteBufAllocator.DEFAULT, + channelGroup)) { assertSame(processor.getReadThreadPool(), processor.getLongPollThreadPool()); } @@ -62,7 +71,8 @@ conf, mock(Bookie.class), NullStatsLogger.INSTANCE, null)) { conf = new ServerConfiguration(); conf.setNumReadWorkerThreads(0); try (BookieRequestProcessor processor = new BookieRequestProcessor( - conf, mock(Bookie.class), NullStatsLogger.INSTANCE, null)) { + conf, mock(Bookie.class), NullStatsLogger.INSTANCE, null, UnpooledByteBufAllocator.DEFAULT, + channelGroup)) { assertNull(processor.getReadThreadPool()); assertNotNull(processor.getLongPollThreadPool()); } @@ -72,7 +82,8 @@ conf, mock(Bookie.class), NullStatsLogger.INSTANCE, null)) { conf.setNumReadWorkerThreads(2); conf.setNumLongPollWorkerThreads(2); try (BookieRequestProcessor processor = new BookieRequestProcessor( - conf, mock(Bookie.class), NullStatsLogger.INSTANCE, null)) { + conf, mock(Bookie.class), NullStatsLogger.INSTANCE, null, UnpooledByteBufAllocator.DEFAULT, + channelGroup)) { assertNotNull(processor.getReadThreadPool()); assertNotNull(processor.getLongPollThreadPool()); assertNotSame(processor.getReadThreadPool(), processor.getLongPollThreadPool()); @@ -136,7 +147,14 @@ public void testToString() { .setBody(ByteString.copyFrom("entrydata".getBytes())).build(); Request request = Request.newBuilder().setHeader(header).setAddRequest(addRequest).build(); - WriteEntryProcessorV3 writeEntryProcessorV3 = new WriteEntryProcessorV3(request, null, requestProcessor); + Channel channel = mock(Channel.class); + ChannelHandlerContext ctx = mock(ChannelHandlerContext.class); + when(ctx.channel()).thenReturn(channel); + BookieRequestHandler requestHandler = mock(BookieRequestHandler.class); + when(requestHandler.ctx()).thenReturn(ctx); + + WriteEntryProcessorV3 writeEntryProcessorV3 = new WriteEntryProcessorV3(request, requestHandler, + requestProcessor); String toString = writeEntryProcessorV3.toString(); assertFalse("writeEntryProcessorV3's toString should have filtered out body", toString.contains("body")); assertFalse("writeEntryProcessorV3's toString should have filtered out masterKey", @@ -154,7 +172,7 @@ public void testToString() { .setBody(ByteString.copyFrom("entrydata".getBytes())).setFlag(Flag.RECOVERY_ADD).setWriteFlags(0) .build(); request = Request.newBuilder().setHeader(header).setAddRequest(addRequest).build(); - writeEntryProcessorV3 = new WriteEntryProcessorV3(request, null, requestProcessor); + writeEntryProcessorV3 = new WriteEntryProcessorV3(request, requestHandler, requestProcessor); toString = writeEntryProcessorV3.toString(); assertFalse("writeEntryProcessorV3's toString should have filtered out body", toString.contains("body")); assertFalse("writeEntryProcessorV3's toString should have filtered out masterKey", diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/TestPerChannelBookieClient.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/TestPerChannelBookieClient.java index 7dc62779e1c..b52e8b95db7 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/TestPerChannelBookieClient.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/TestPerChannelBookieClient.java @@ -20,35 +20,41 @@ */ package org.apache.bookkeeper.proto; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import com.google.protobuf.ExtensionRegistry; - import io.netty.buffer.ByteBuf; import io.netty.channel.Channel; import io.netty.channel.EventLoopGroup; +import io.netty.channel.epoll.Epoll; +import io.netty.channel.epoll.EpollChannelOption; +import io.netty.channel.epoll.EpollEventLoopGroup; import io.netty.channel.nio.NioEventLoopGroup; - import java.io.IOException; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; - import org.apache.bookkeeper.auth.AuthProviderFactoryFactory; import org.apache.bookkeeper.auth.ClientAuthProvider; import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.TestBookieImpl; import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.common.util.OrderedExecutor; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadEntryCallback; import org.apache.bookkeeper.proto.PerChannelBookieClient.ConnectionState; +import org.apache.bookkeeper.stats.StatsLogger; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; -import org.apache.bookkeeper.util.SafeRunnable; +import org.junit.Assume; import org.junit.Test; +import org.mockito.Mockito; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -81,10 +87,10 @@ public void testConnectCloseRace() throws Exception { EventLoopGroup eventLoopGroup = new NioEventLoopGroup(); OrderedExecutor executor = getOrderedSafeExecutor(); - BookieSocketAddress addr = getBookie(0); + BookieId addr = getBookie(0); for (int i = 0; i < 1000; i++) { PerChannelBookieClient client = new PerChannelBookieClient(executor, eventLoopGroup, addr, - authProvider, extRegistry); + authProvider, extRegistry, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); client.connectIfNeededAndDoOp(new GenericCallback() { @Override public void operationComplete(int rc, PerChannelBookieClient client) { @@ -124,10 +130,11 @@ public void operationComplete(int rc, PerChannelBookieClient pcbc) { EventLoopGroup eventLoopGroup = new NioEventLoopGroup(); OrderedExecutor executor = getOrderedSafeExecutor(); - BookieSocketAddress addr = getBookie(0); + BookieId addr = getBookie(0); for (int i = 0; i < 100; i++) { PerChannelBookieClient client = new PerChannelBookieClient(executor, eventLoopGroup, addr, - authProvider, extRegistry); + authProvider, extRegistry, + BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); for (int j = i; j < 10; j++) { client.connectIfNeededAndDoOp(nullop); } @@ -155,10 +162,11 @@ public void operationComplete(int rc, PerChannelBookieClient client) { final int iterations = 100000; EventLoopGroup eventLoopGroup = new NioEventLoopGroup(); OrderedExecutor executor = getOrderedSafeExecutor(); - BookieSocketAddress addr = getBookie(0); + BookieId addr = getBookie(0); final PerChannelBookieClient client = new PerChannelBookieClient(executor, eventLoopGroup, - addr, authProvider, extRegistry); + addr, authProvider, extRegistry, + BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); final AtomicBoolean shouldFail = new AtomicBoolean(false); final AtomicBoolean running = new AtomicBoolean(true); final CountDownLatch disconnectRunning = new CountDownLatch(1); @@ -233,10 +241,10 @@ public void run() { public void testRequestCompletesAfterDisconnectRace() throws Exception { ServerConfiguration conf = killBookie(0); - Bookie delayBookie = new Bookie(conf) { + Bookie delayBookie = new TestBookieImpl(conf) { @Override public ByteBuf readEntry(long ledgerId, long entryId) - throws IOException, NoLedgerException { + throws IOException, NoLedgerException, BookieException { try { Thread.sleep(3000); } catch (InterruptedException ie) { @@ -246,15 +254,15 @@ public ByteBuf readEntry(long ledgerId, long entryId) return super.readEntry(ledgerId, entryId); } }; - bsConfs.add(conf); - bs.add(startBookie(conf, delayBookie)); + startAndAddBookie(conf, delayBookie); EventLoopGroup eventLoopGroup = new NioEventLoopGroup(); final OrderedExecutor executor = getOrderedSafeExecutor(); - BookieSocketAddress addr = getBookie(0); + BookieId addr = getBookie(0); final PerChannelBookieClient client = new PerChannelBookieClient(executor, eventLoopGroup, - addr, authProvider, extRegistry); + addr, authProvider, extRegistry, + BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); final CountDownLatch completion = new CountDownLatch(1); final ReadEntryCallback cb = new ReadEntryCallback() { @Override @@ -268,12 +276,7 @@ public void readEntryComplete(int rc, long ledgerId, long entryId, @Override public void operationComplete(final int rc, PerChannelBookieClient pcbc) { if (rc != BKException.Code.OK) { - executor.executeOrdered(1, new SafeRunnable() { - @Override - public void safeRun() { - cb.readEntryComplete(rc, 1, 1, null, null); - } - }); + executor.executeOrdered(1, () -> cb.readEntryComplete(rc, 1, 1, null, null)); return; } @@ -291,4 +294,42 @@ public void safeRun() { eventLoopGroup.shutdownGracefully(); executor.shutdown(); } + + /** + * Test that TCP user timeout is correctly set in EpollEventLoopGroup. + */ + @Test + public void testEpollChannelTcpUserTimeout() throws Exception { + // Epoll is needed for this test to work. + Assume.assumeTrue(Epoll.isAvailable()); + + EventLoopGroup eventLoopGroup = new EpollEventLoopGroup(); + OrderedExecutor executor = getOrderedSafeExecutor(); + ClientConfiguration conf = new ClientConfiguration(); + int tcpUserTimeout = 1236; // this value may be rounded on some Linux implementations + BookieId addr = getBookie(0); + + // Pass to the PerChannelBookieClient object the client configuration with TCP user timeout. + PerChannelBookieClient channel = new PerChannelBookieClient(conf, executor, eventLoopGroup, + addr, Mockito.mock(StatsLogger.class), authProvider, extRegistry, + Mockito.mock(PerChannelBookieClientPool.class), BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + + // Verify that the configured value has not been set in the channel if does not exist in config. + assertEquals(channel.connect().channel().config() + .getOption(EpollChannelOption.TCP_USER_TIMEOUT).intValue(), 0); + channel.close(); + + // Create a new channel with new TCP user timeout set. + conf.setTcpUserTimeoutMillis(tcpUserTimeout); + channel = new PerChannelBookieClient(conf, executor, eventLoopGroup, + addr, Mockito.mock(StatsLogger.class), authProvider, extRegistry, + Mockito.mock(PerChannelBookieClientPool.class), BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + + // Verify that the configured value has been set. + assertEquals(channel.connect().channel().config() + .getOption(EpollChannelOption.TCP_USER_TIMEOUT).intValue(), tcpUserTimeout); + channel.close(); + eventLoopGroup.shutdownGracefully(); + executor.shutdown(); + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/WriteEntryProcessorTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/WriteEntryProcessorTest.java index 5901c2f5823..a02cde4ab99 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/WriteEntryProcessorTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/WriteEntryProcessorTest.java @@ -22,6 +22,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.ArgumentMatchers.same; import static org.mockito.Mockito.doAnswer; @@ -33,10 +34,13 @@ import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; import io.netty.channel.Channel; +import io.netty.channel.ChannelHandlerContext; import io.netty.channel.ChannelPromise; +import io.netty.channel.DefaultChannelPromise; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicReference; import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.BookieException; import org.apache.bookkeeper.proto.BookieProtocol.ParsedAddRequest; import org.apache.bookkeeper.proto.BookieProtocol.Response; import org.apache.bookkeeper.stats.NullStatsLogger; @@ -51,6 +55,8 @@ public class WriteEntryProcessorTest { private ParsedAddRequest request; private WriteEntryProcessor processor; private Channel channel; + private ChannelHandlerContext ctx; + private BookieRequestHandler requestHandler; private BookieRequestProcessor requestProcessor; private Bookie bookie; @@ -64,16 +70,22 @@ public void setup() { new byte[0], Unpooled.wrappedBuffer("test-entry-data".getBytes(UTF_8))); channel = mock(Channel.class); + when(channel.isOpen()).thenReturn(true); + + requestHandler = mock(BookieRequestHandler.class); + ctx = mock(ChannelHandlerContext.class); + when(ctx.channel()).thenReturn(channel); + when(requestHandler.ctx()).thenReturn(ctx); + bookie = mock(Bookie.class); requestProcessor = mock(BookieRequestProcessor.class); when(requestProcessor.getBookie()).thenReturn(bookie); - when(requestProcessor.getAddEntryStats()) - .thenReturn(NullStatsLogger.INSTANCE.getOpStatsLogger("add_entry")); - when(requestProcessor.getAddRequestStats()) - .thenReturn(NullStatsLogger.INSTANCE.getOpStatsLogger("add_requests")); + when(requestProcessor.getRequestStats()).thenReturn(new RequestStats(NullStatsLogger.INSTANCE)); + when(channel.isActive()).thenReturn(true); + when(channel.isWritable()).thenReturn(true); processor = WriteEntryProcessor.create( request, - channel, + requestHandler, requestProcessor); } @@ -91,14 +103,16 @@ private void reinitRequest(short flags) { Unpooled.wrappedBuffer("test-entry-data".getBytes(UTF_8))); processor = WriteEntryProcessor.create( request, - channel, + requestHandler, requestProcessor); } @Test public void testNoneHighPriorityWritesOnReadOnlyBookie() throws Exception { when(bookie.isReadOnly()).thenReturn(true); - when(channel.voidPromise()).thenReturn(mock(ChannelPromise.class)); + ChannelPromise mockPromise = mock(ChannelPromise.class); + when(channel.newPromise()).thenReturn(mockPromise); + when(mockPromise.addListener(any())).thenReturn(mockPromise); AtomicReference writtenObject = new AtomicReference<>(); CountDownLatch latch = new CountDownLatch(1); @@ -106,11 +120,11 @@ public void testNoneHighPriorityWritesOnReadOnlyBookie() throws Exception { writtenObject.set(invocationOnMock.getArgument(0)); latch.countDown(); return null; - }).when(channel).writeAndFlush(any(), any(ChannelPromise.class)); + }).when(channel).writeAndFlush(any(), any()); processor.run(); - verify(channel, times(1)).writeAndFlush(any(), any(ChannelPromise.class)); + verify(channel, times(1)).writeAndFlush(any(), any()); latch.await(); @@ -128,7 +142,9 @@ public void testHighPriorityWritesOnReadOnlyBookieWhenHighPriorityWritesDisallow when(bookie.isReadOnly()).thenReturn(true); when(bookie.isAvailableForHighPriorityWrites()).thenReturn(false); - when(channel.voidPromise()).thenReturn(mock(ChannelPromise.class)); + ChannelPromise mockPromise = mock(ChannelPromise.class); + when(channel.newPromise()).thenReturn(mockPromise); + when(mockPromise.addListener(any())).thenReturn(mockPromise); AtomicReference writtenObject = new AtomicReference<>(); CountDownLatch latch = new CountDownLatch(1); @@ -136,11 +152,11 @@ public void testHighPriorityWritesOnReadOnlyBookieWhenHighPriorityWritesDisallow writtenObject.set(invocationOnMock.getArgument(0)); latch.countDown(); return null; - }).when(channel).writeAndFlush(any(), any(ChannelPromise.class)); + }).when(channel).writeAndFlush(any(), any()); processor.run(); - verify(channel, times(1)).writeAndFlush(any(), any(ChannelPromise.class)); + verify(channel, times(1)).writeAndFlush(any(), any()); latch.await(); @@ -158,67 +174,95 @@ public void testHighPriorityWritesOnReadOnlyBookieWhenHighPriorityWritesAllowed( when(bookie.isReadOnly()).thenReturn(true); when(bookie.isAvailableForHighPriorityWrites()).thenReturn(true); - when(channel.voidPromise()).thenReturn(mock(ChannelPromise.class)); + ChannelPromise mockPromise = mock(ChannelPromise.class); + when(channel.newPromise()).thenReturn(mockPromise); + when(mockPromise.addListener(any())).thenReturn(mockPromise); doAnswer(invocationOnMock -> { processor.writeComplete(0, request.ledgerId, request.entryId, null, null); return null; - }).when(bookie).addEntry(any(ByteBuf.class), eq(false), same(processor), same(channel), eq(new byte[0])); + }).when(bookie).addEntry(any(ByteBuf.class), eq(false), same(processor), same(requestHandler), eq(new byte[0])); - AtomicReference writtenObject = new AtomicReference<>(); + AtomicReference writtenObject = new AtomicReference<>(); CountDownLatch latch = new CountDownLatch(1); doAnswer(invocationOnMock -> { writtenObject.set(invocationOnMock.getArgument(0)); latch.countDown(); return null; - }).when(channel).writeAndFlush(any(), any(ChannelPromise.class)); + }).when(requestHandler).prepareSendResponseV2(anyInt(), any()); processor.run(); verify(bookie, times(1)) - .addEntry(any(ByteBuf.class), eq(false), same(processor), same(channel), eq(new byte[0])); - verify(channel, times(1)).writeAndFlush(any(), any(ChannelPromise.class)); + .addEntry(any(ByteBuf.class), eq(false), same(processor), same(requestHandler), eq(new byte[0])); + verify(requestHandler, times(1)).prepareSendResponseV2(anyInt(), any()); +// verify(channel, times(1)).writeAndFlush(any(), any()); latch.await(); - assertTrue(writtenObject.get() instanceof Response); - Response response = (Response) writtenObject.get(); - assertEquals(BookieProtocol.EOK, response.getErrorCode()); - - response.release(); - response.recycle(); + assertTrue(writtenObject.get() instanceof Integer); + assertEquals(BookieProtocol.EOK, (int) writtenObject.get()); } @Test public void testNormalWritesOnWritableBookie() throws Exception { when(bookie.isReadOnly()).thenReturn(false); - when(channel.voidPromise()).thenReturn(mock(ChannelPromise.class)); + ChannelPromise mockPromise = mock(ChannelPromise.class); + when(channel.newPromise()).thenReturn(mockPromise); + when(mockPromise.addListener(any())).thenReturn(mockPromise); doAnswer(invocationOnMock -> { processor.writeComplete(0, request.ledgerId, request.entryId, null, null); return null; - }).when(bookie).addEntry(any(ByteBuf.class), eq(false), same(processor), same(channel), eq(new byte[0])); + }).when(bookie).addEntry(any(ByteBuf.class), eq(false), same(processor), same(requestHandler), eq(new byte[0])); - AtomicReference writtenObject = new AtomicReference<>(); + AtomicReference writtenObject = new AtomicReference<>(); CountDownLatch latch = new CountDownLatch(1); doAnswer(invocationOnMock -> { writtenObject.set(invocationOnMock.getArgument(0)); latch.countDown(); return null; - }).when(channel).writeAndFlush(any(), any(ChannelPromise.class)); + }).when(requestHandler).prepareSendResponseV2(anyInt(), any()); processor.run(); verify(bookie, times(1)) - .addEntry(any(ByteBuf.class), eq(false), same(processor), same(channel), eq(new byte[0])); - verify(channel, times(1)).writeAndFlush(any(), any(ChannelPromise.class)); + .addEntry(any(ByteBuf.class), eq(false), same(processor), same(requestHandler), eq(new byte[0])); + verify(requestHandler, times(1)).prepareSendResponseV2(anyInt(), any()); latch.await(); + assertEquals(BookieProtocol.EOK, (int) writtenObject.get()); + } + + @Test + public void testWritesCacheFlushTimeout() throws Exception { + when(bookie.isReadOnly()).thenReturn(false); + ChannelPromise mockPromise = mock(ChannelPromise.class); + when(channel.newPromise()).thenReturn(mockPromise); + when(mockPromise.addListener(any())).thenReturn(mockPromise); + when(channel.writeAndFlush(any())).thenReturn(mock(ChannelPromise.class)); + doAnswer(invocationOnMock -> { + throw new BookieException.OperationRejectedException(); + }).when(bookie).addEntry( + any(ByteBuf.class), eq(false), same(processor), same(requestHandler), eq(new byte[0])); + + ChannelPromise promise = new DefaultChannelPromise(channel); + AtomicReference writtenObject = new AtomicReference<>(); + CountDownLatch latch = new CountDownLatch(1); + doAnswer(invocationOnMock -> { + writtenObject.set(invocationOnMock.getArgument(0)); + latch.countDown(); + return promise; + }).when(channel).writeAndFlush(any(), any()); + + processor.run(); + + verify(bookie, times(1)) + .addEntry(any(ByteBuf.class), eq(false), same(processor), same(requestHandler), eq(new byte[0])); + verify(channel, times(1)).writeAndFlush(any(Response.class), any()); + + latch.await(); assertTrue(writtenObject.get() instanceof Response); Response response = (Response) writtenObject.get(); - assertEquals(BookieProtocol.EOK, response.getErrorCode()); - - response.release(); - response.recycle(); + assertEquals(BookieProtocol.ETOOMANYREQUESTS, response.getErrorCode()); } - } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/WriteEntryProcessorV3Test.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/WriteEntryProcessorV3Test.java index df7b1532b63..94024aa5c0d 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/WriteEntryProcessorV3Test.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/WriteEntryProcessorV3Test.java @@ -32,11 +32,13 @@ import com.google.protobuf.ByteString; import io.netty.buffer.ByteBuf; import io.netty.channel.Channel; +import io.netty.channel.ChannelHandlerContext; import io.netty.channel.ChannelPromise; import io.netty.channel.DefaultChannelPromise; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicReference; import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.BookieException; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback; import org.apache.bookkeeper.proto.BookkeeperProtocol.AddRequest; import org.apache.bookkeeper.proto.BookkeeperProtocol.BKPacketHeader; @@ -56,7 +58,9 @@ public class WriteEntryProcessorV3Test { private Request request; private WriteEntryProcessorV3 processor; + private Channel channel; + private BookieRequestHandler requestHandler; private BookieRequestProcessor requestProcessor; private Bookie bookie; @@ -76,19 +80,22 @@ public void setup() { .build()) .build(); channel = mock(Channel.class); + when(channel.isOpen()).thenReturn(true); + + requestHandler = mock(BookieRequestHandler.class); + ChannelHandlerContext ctx = mock(ChannelHandlerContext.class); + when(ctx.channel()).thenReturn(channel); + when(requestHandler.ctx()).thenReturn(ctx); + bookie = mock(Bookie.class); requestProcessor = mock(BookieRequestProcessor.class); when(requestProcessor.getBookie()).thenReturn(bookie); when(requestProcessor.getWaitTimeoutOnBackpressureMillis()).thenReturn(-1L); - when(requestProcessor.getAddEntryStats()) - .thenReturn(NullStatsLogger.INSTANCE.getOpStatsLogger("add_entry")); - when(requestProcessor.getAddRequestStats()) - .thenReturn(NullStatsLogger.INSTANCE.getOpStatsLogger("add_requests")); - when(requestProcessor.getChannelWriteStats()) - .thenReturn(NullStatsLogger.INSTANCE.getOpStatsLogger("CHANNEL_WRITE")); + when(requestProcessor.getRequestStats()).thenReturn(new RequestStats(NullStatsLogger.INSTANCE)); + when(channel.isActive()).thenReturn(true); processor = new WriteEntryProcessorV3( request, - channel, + requestHandler, requestProcessor); } @@ -101,7 +108,7 @@ private void reinitRequest(int priority) { processor = new WriteEntryProcessorV3( request, - channel, + requestHandler, requestProcessor); } @@ -246,7 +253,38 @@ public void testNormalWritesOnWritableBookie() throws Exception { } @Test - public void testWritesWithClientNotAcceptingReponses() throws Exception { + public void testWritesCacheFlushTimeout() throws Exception { + when(bookie.isReadOnly()).thenReturn(false); + when(channel.voidPromise()).thenReturn(mock(ChannelPromise.class)); + when(channel.writeAndFlush(any())).thenReturn(mock(ChannelPromise.class)); + doAnswer(invocationOnMock -> { + throw new BookieException.OperationRejectedException(); + }).when(bookie).addEntry( + any(ByteBuf.class), eq(false), any(WriteCallback.class), same(channel), eq(new byte[0])); + + ChannelPromise promise = new DefaultChannelPromise(channel); + AtomicReference writtenObject = new AtomicReference<>(); + CountDownLatch latch = new CountDownLatch(1); + doAnswer(invocationOnMock -> { + writtenObject.set(invocationOnMock.getArgument(0)); + latch.countDown(); + return promise; + }).when(channel).writeAndFlush(any()); + + processor.run(); + + verify(bookie, times(1)) + .addEntry(any(ByteBuf.class), eq(false), any(WriteCallback.class), same(channel), eq(new byte[0])); + verify(channel, times(1)).writeAndFlush(any(Response.class)); + + latch.await(); + assertTrue(writtenObject.get() instanceof Response); + Response response = (Response) writtenObject.get(); + assertEquals(StatusCode.ETOOMANYREQUESTS, response.getStatus()); + } + + @Test + public void testWritesWithClientNotAcceptingResponses() throws Exception { when(requestProcessor.getWaitTimeoutOnBackpressureMillis()).thenReturn(5L); doAnswer(invocationOnMock -> { diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/checksum/CompositeByteBufUnwrapBugReproduceTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/checksum/CompositeByteBufUnwrapBugReproduceTest.java new file mode 100644 index 00000000000..45219c43161 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/checksum/CompositeByteBufUnwrapBugReproduceTest.java @@ -0,0 +1,280 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.proto.checksum; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; + +import com.scurrilous.circe.checksum.IntHash; +import com.scurrilous.circe.checksum.Java8IntHash; +import com.scurrilous.circe.checksum.Java9IntHash; +import com.scurrilous.circe.checksum.JniIntHash; +import com.scurrilous.circe.crc.Sse42Crc32C; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.ByteBufUtil; +import io.netty.buffer.CompositeByteBuf; +import io.netty.buffer.Unpooled; +import io.netty.util.ReferenceCounted; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import org.apache.bookkeeper.proto.BookieProtoEncoding; +import org.apache.bookkeeper.proto.BookieProtocol; +import org.apache.bookkeeper.util.ByteBufList; +import org.apache.bookkeeper.util.ByteBufVisitor; +import org.apache.commons.lang3.RandomUtils; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +/** + * This test class was added to reproduce a bug in the checksum calculation when + * the payload is a CompositeByteBuf and this buffer has a reader index state other than 0. + * The reader index state gets lost in the unwrapping process. + * + * There were at least 2 different bugs. One that occurred when the + * payload was >= BookieProtoEncoding.SMALL_ENTRY_SIZE_THRESHOLD and the other when + * it was < BookieProtoEncoding.SMALL_ENTRY_SIZE_THRESHOLD. + * This test covers both useV2Protocol=true and useV2Protocol=false since the bug was triggered differently. + * + * The bug has been fixed and this test is here to make sure it doesn't happen again. + */ +@RunWith(Parameterized.class) +public class CompositeByteBufUnwrapBugReproduceTest { + final byte[] testPayLoad; + final int defaultBufferPrefixLength; + private final boolean useV2Protocol; + + // set to 0 to 3 to run a single scenario for debugging purposes + private static final int RUN_SINGLE_SCENARIO_FOR_DEBUGGING = -1; + + @Parameterized.Parameters + public static Collection testScenarios() { + List scenarios = Arrays.asList(new Object[][] { + {BookieProtoEncoding.SMALL_ENTRY_SIZE_THRESHOLD - 1, true}, + {BookieProtoEncoding.SMALL_ENTRY_SIZE_THRESHOLD - 1, false}, + {BookieProtoEncoding.SMALL_ENTRY_SIZE_THRESHOLD, true}, + {BookieProtoEncoding.SMALL_ENTRY_SIZE_THRESHOLD, false} + }); + if (RUN_SINGLE_SCENARIO_FOR_DEBUGGING >= 0) { + // pick a single scenario for debugging + scenarios = scenarios.subList(RUN_SINGLE_SCENARIO_FOR_DEBUGGING, 1); + } + return scenarios; + } + + public CompositeByteBufUnwrapBugReproduceTest(int payloadSize, boolean useV2Protocol) { + this.testPayLoad = createTestPayLoad(payloadSize); + this.defaultBufferPrefixLength = payloadSize / 7; + this.useV2Protocol = useV2Protocol; + } + + private static byte[] createTestPayLoad(int payloadSize) { + byte[] payload = new byte[payloadSize]; + for (int i = 0; i < payloadSize; i++) { + payload[i] = (byte) i; + } + return payload; + } + + + /** + * A DigestManager that uses the given IntHash implementation for testing. + */ + static class TestIntHashDigestManager extends DigestManager { + private final IntHash intHash; + + public TestIntHashDigestManager(IntHash intHash, long ledgerId, boolean useV2Protocol, + ByteBufAllocator allocator) { + super(ledgerId, useV2Protocol, allocator); + this.intHash = intHash; + } + + @Override + int getMacCodeLength() { + return 4; + } + + @Override + boolean isInt32Digest() { + return true; + } + + @Override + void populateValueAndReset(int digest, ByteBuf buf) { + buf.writeInt(digest); + } + + @Override + int internalUpdate(int digest, ByteBuf data, int offset, int len) { + return intHash.resume(digest, data, offset, len); + } + + @Override + int internalUpdate(int digest, byte[] buffer, int offset, int len) { + return intHash.resume(digest, buffer, offset, len); + } + + @Override + boolean acceptsMemoryAddressBuffer() { + return intHash.acceptsMemoryAddressBuffer(); + } + } + + @Test + public void shouldCalculateChecksumForCompositeBuffer() { + ByteBuf testPayload = Unpooled.wrappedBuffer(testPayLoad); + byte[] referenceOutput = computeDigestAndPackageForSending(new Java8IntHash(), testPayload.retainedDuplicate()); + assertDigestAndPackageMatchesReference(new Java8IntHash(), testPayload, referenceOutput); + assertDigestAndPackageMatchesReference(new Java9IntHash(), testPayload, referenceOutput); + if (Sse42Crc32C.isSupported()) { + assertDigestAndPackageMatchesReference(new JniIntHash(), testPayload, referenceOutput); + } + testPayload.release(); + } + + private void assertDigestAndPackageMatchesReference(IntHash intHash, ByteBuf payload, byte[] referenceOutput) { + assertDigestAndPackageScenario(intHash, payload.retainedDuplicate(), referenceOutput, testPayLoad, + "plain payload, no wrapping"); + + ByteBuf payload2 = wrapWithPrefixAndCompositeByteBufWithReaderIndexState(payload.retainedDuplicate(), + defaultBufferPrefixLength); + assertDigestAndPackageScenario(intHash, payload2, referenceOutput, testPayLoad, + "payload with prefix wrapped in CompositeByteBuf with readerIndex state"); + + ByteBuf payload3 = wrapWithPrefixAndMultipleCompositeByteBufWithReaderIndexStateAndMultipleLayersOfDuplicate( + payload.retainedDuplicate(), defaultBufferPrefixLength); + assertDigestAndPackageScenario(intHash, payload3, referenceOutput, testPayLoad, + "payload with prefix wrapped in 2 layers of CompositeByteBuf with readerIndex state in the outer " + + "composite. In addition, the outer composite is duplicated twice."); + + ByteBuf payload4 = wrapInCompositeByteBufAndSlice(payload.retainedDuplicate(), defaultBufferPrefixLength); + assertDigestAndPackageScenario(intHash, payload4, referenceOutput, testPayLoad, + "payload with prefix wrapped in CompositeByteBuf and sliced"); + } + + private void assertDigestAndPackageScenario(IntHash intHash, ByteBuf payload, byte[] referenceOutput, + byte[] testPayLoadArray, + String scenario) { + // this validates that the readable bytes in the payload match the TEST_PAYLOAD content + assertArrayEquals(testPayLoadArray, ByteBufUtil.getBytes(payload.duplicate()), + "input is invalid for scenario '" + scenario + "'"); + + ByteBuf visitedCopy = Unpooled.buffer(payload.readableBytes()); + ByteBufVisitor.visitBuffers(payload, payload.readerIndex(), payload.readableBytes(), + new ByteBufVisitor.ByteBufVisitorCallback() { + @Override + public void visitBuffer(Void context, ByteBuf visitBuffer, int visitIndex, int visitLength) { + visitedCopy.writeBytes(visitBuffer, visitIndex, visitLength); + } + + @Override + public void visitArray(Void context, byte[] visitArray, int visitIndex, int visitLength) { + visitedCopy.writeBytes(visitArray, visitIndex, visitLength); + } + }, null); + + assertArrayEquals(ByteBufUtil.getBytes(visitedCopy), testPayLoadArray, + "visited copy is invalid for scenario '" + scenario + "'. Bug in ByteBufVisitor?"); + + // compute the digest and package + byte[] output = computeDigestAndPackageForSending(intHash, payload.duplicate()); + if (referenceOutput == null) { + referenceOutput = + computeDigestAndPackageForSending(new Java8IntHash(), Unpooled.wrappedBuffer(testPayLoadArray)); + } + // this validates that the output matches the reference output + assertArrayEquals(referenceOutput, output, "output is invalid for scenario '" + scenario + "'"); + } + + private byte[] computeDigestAndPackageForSending(IntHash intHash, ByteBuf data) { + DigestManager digestManager = new TestIntHashDigestManager(intHash, 1, useV2Protocol, ByteBufAllocator.DEFAULT); + ReferenceCounted packagedBuffer = + digestManager.computeDigestAndPackageForSending(1, 0, data.readableBytes(), data, + MacDigestManager.EMPTY_LEDGER_KEY, BookieProtocol.FLAG_NONE); + return packagedBufferToBytes(packagedBuffer); + } + + ByteBuf wrapWithPrefixAndCompositeByteBufWithReaderIndexState(ByteBuf payload, int bufferPrefixLength) { + // create a new buffer with a prefix and the actual payload + ByteBuf prefixedPayload = ByteBufAllocator.DEFAULT.buffer(bufferPrefixLength + payload.readableBytes()); + prefixedPayload.writeBytes(RandomUtils.nextBytes(bufferPrefixLength)); + prefixedPayload.writeBytes(payload); + + // wrap the buffer in a composite buffer + CompositeByteBuf outerComposite = ByteBufAllocator.DEFAULT.compositeBuffer(); + outerComposite.addComponent(true, prefixedPayload); + + // set reader index state. this is the state that gets lost in the unwrapping process + outerComposite.readerIndex(bufferPrefixLength); + + return outerComposite; + } + + ByteBuf wrapWithPrefixAndMultipleCompositeByteBufWithReaderIndexStateAndMultipleLayersOfDuplicate(ByteBuf payload, + int bufferPrefixLength) { + // create a new buffer with a prefix and the actual payload + ByteBuf prefixedPayload = ByteBufAllocator.DEFAULT.buffer(bufferPrefixLength + payload.readableBytes()); + prefixedPayload.writeBytes(RandomUtils.nextBytes(bufferPrefixLength)); + prefixedPayload.writeBytes(payload); + + CompositeByteBuf innerComposite = ByteBufAllocator.DEFAULT.compositeBuffer(); + innerComposite.addComponent(true, prefixedPayload); + innerComposite.addComponent(true, Unpooled.EMPTY_BUFFER); + + // wrap the buffer in a composite buffer + CompositeByteBuf outerComposite = ByteBufAllocator.DEFAULT.compositeBuffer(); + outerComposite.addComponent(true, innerComposite); + outerComposite.addComponent(true, Unpooled.EMPTY_BUFFER); + + // set reader index state. this is the state that gets lost in the unwrapping process + outerComposite.readerIndex(bufferPrefixLength); + + return outerComposite.duplicate().duplicate(); + } + + ByteBuf wrapInCompositeByteBufAndSlice(ByteBuf payload, int bufferPrefixLength) { + // create a composite buffer + CompositeByteBuf compositeWithPrefix = ByteBufAllocator.DEFAULT.compositeBuffer(); + compositeWithPrefix.addComponent(true, Unpooled.wrappedBuffer(RandomUtils.nextBytes(bufferPrefixLength))); + compositeWithPrefix.addComponent(true, payload); + + // return a slice of the composite buffer so that it returns the payload + return compositeWithPrefix.slice(bufferPrefixLength, payload.readableBytes()); + } + + private static byte[] packagedBufferToBytes(ReferenceCounted packagedBuffer) { + byte[] output; + if (packagedBuffer instanceof ByteBufList) { + ByteBufList bufList = (ByteBufList) packagedBuffer; + output = new byte[bufList.readableBytes()]; + bufList.getBytes(output); + for (int i = 0; i < bufList.size(); i++) { + bufList.getBuffer(i).release(); + } + } else if (packagedBuffer instanceof ByteBuf) { + output = ByteBufUtil.getBytes((ByteBuf) packagedBuffer); + packagedBuffer.release(); + } else { + throw new RuntimeException("Unexpected type: " + packagedBuffer.getClass()); + } + return output; + } +} \ No newline at end of file diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorBookieCheckTaskTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorBookieCheckTaskTest.java new file mode 100644 index 00000000000..dea54d4fa00 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorBookieCheckTaskTest.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.replication; + +import static org.apache.bookkeeper.replication.ReplicationStats.AUDITOR_SCOPE; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.anyCollection; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import com.beust.jcommander.internal.Lists; +import com.beust.jcommander.internal.Sets; +import com.google.common.collect.Maps; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.stats.OpStatsLogger; +import org.apache.bookkeeper.test.TestStatsProvider; +import org.apache.bookkeeper.versioning.LongVersion; +import org.apache.bookkeeper.versioning.Versioned; +import org.junit.Before; +import org.junit.Test; + +/** + * Unit test {@link AuditorBookieCheckTask}. + */ +public class AuditorBookieCheckTaskTest { + + private AuditorStats auditorStats; + private BookKeeperAdmin admin; + private LedgerManager ledgerManager; + private LedgerUnderreplicationManager underreplicationManager; + private BookieLedgerIndexer ledgerIndexer; + private AuditorBookieCheckTask bookieCheckTask; + private final AtomicBoolean shutdownCompleted = new AtomicBoolean(false); + private final AuditorTask.ShutdownTaskHandler shutdownTaskHandler = () -> shutdownCompleted.set(true); + private long startLedgerId = 0; + + @Before + public void setup() { + ServerConfiguration conf = mock(ServerConfiguration.class); + TestStatsProvider statsProvider = new TestStatsProvider(); + TestStatsProvider.TestStatsLogger statsLogger = statsProvider.getStatsLogger(AUDITOR_SCOPE); + final AuditorStats auditorStats = new AuditorStats(statsLogger); + this.auditorStats = spy(auditorStats); + admin = mock(BookKeeperAdmin.class); + ledgerManager = mock(LedgerManager.class); + underreplicationManager = mock(LedgerUnderreplicationManager.class); + ledgerIndexer = mock(BookieLedgerIndexer.class); + AuditorBookieCheckTask bookieCheckTask1 = new AuditorBookieCheckTask( + conf, this.auditorStats, admin, ledgerManager, underreplicationManager, + shutdownTaskHandler, ledgerIndexer, null, null); + bookieCheckTask = spy(bookieCheckTask1); + } + + @Test + public void testShutdownAuditBookiesException() + throws BKException, ReplicationException.BKAuditException, InterruptedException { + doThrow(new ReplicationException.BKAuditException("test failed")) + .when(bookieCheckTask) + .auditBookies(); + bookieCheckTask.startAudit(true); + + assertTrue("shutdownTaskHandler should be execute.", shutdownCompleted.get()); + } + + @Test + public void testAuditBookies() + throws ReplicationException.UnavailableException, ReplicationException.BKAuditException, BKException { + final String bookieId1 = "127.0.0.1:1000"; + final String bookieId2 = "127.0.0.1:1001"; + final long bookie1LedgersCount = 10; + final long bookie2LedgersCount = 20; + + final Map> bookiesAndLedgers = Maps.newHashMap(); + bookiesAndLedgers.put(bookieId1, getLedgers(bookie1LedgersCount)); + bookiesAndLedgers.put(bookieId2, getLedgers(bookie2LedgersCount)); + when(ledgerIndexer.getBookieToLedgerIndex()).thenReturn(bookiesAndLedgers); + when(underreplicationManager.isLedgerReplicationEnabled()).thenReturn(true); + + CompletableFuture> metaPromise = new CompletableFuture<>(); + final LongVersion version = mock(LongVersion.class); + final LedgerMetadata metadata = mock(LedgerMetadata.class); + metaPromise.complete(new Versioned<>(metadata, version)); + when(ledgerManager.readLedgerMetadata(anyLong())).thenReturn(metaPromise); + + CompletableFuture markPromise = new CompletableFuture<>(); + markPromise.complete(null); + when(underreplicationManager.markLedgerUnderreplicatedAsync(anyLong(), anyCollection())) + .thenReturn(markPromise); + + OpStatsLogger numUnderReplicatedLedgerStats = mock(OpStatsLogger.class); + when(auditorStats.getNumUnderReplicatedLedger()).thenReturn(numUnderReplicatedLedgerStats); + + final List availableBookies = Lists.newArrayList(); + final List readOnlyBookies = Lists.newArrayList(); + // test bookie1 lost + availableBookies.add(BookieId.parse(bookieId2)); + when(admin.getAvailableBookies()).thenReturn(availableBookies); + when(admin.getReadOnlyBookies()).thenReturn(readOnlyBookies); + bookieCheckTask.startAudit(true); + verify(numUnderReplicatedLedgerStats, times(1)) + .registerSuccessfulValue(eq(bookie1LedgersCount)); + + // test bookie2 lost + numUnderReplicatedLedgerStats = mock(OpStatsLogger.class); + when(auditorStats.getNumUnderReplicatedLedger()).thenReturn(numUnderReplicatedLedgerStats); + availableBookies.clear(); + availableBookies.add(BookieId.parse(bookieId1)); + bookieCheckTask.startAudit(true); + verify(numUnderReplicatedLedgerStats, times(1)) + .registerSuccessfulValue(eq(bookie2LedgersCount)); + + } + + private Set getLedgers(long count) { + final Set ledgers = Sets.newHashSet(); + for (int i = 0; i < count; i++) { + ledgers.add(i + startLedgerId++); + } + return ledgers; + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorBookieTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorBookieTest.java index 25878655a11..1bf56983624 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorBookieTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorBookieTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -30,12 +30,11 @@ import java.util.HashMap; import java.util.LinkedList; import java.util.List; - import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookieServer; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; -import org.apache.bookkeeper.zookeeper.ZooKeeperClient; import org.apache.zookeeper.ZooKeeper; import org.junit.Test; import org.slf4j.Logger; @@ -88,14 +87,14 @@ public void testEnsureOnlySingleAuditor() throws Exception { BookieServer auditor = verifyAuditor(); // shutdown bookie which is not an auditor - int indexOf = bs.indexOf(auditor); + int indexOf = indexOfServer(auditor); int bkIndexDownBookie; - if (indexOf < bs.size() - 1) { + if (indexOf < lastBookieIndex()) { bkIndexDownBookie = indexOf + 1; } else { bkIndexDownBookie = indexOf - 1; } - shutdownBookie(bs.get(bkIndexDownBookie)); + shutdownBookie(serverByIndex(bkIndexDownBookie)); startNewBookie(); startNewBookie(); @@ -116,14 +115,11 @@ public void testSuccessiveAuditorCrashes() throws Exception { shutdownBookie(auditor); BookieServer newAuditor1 = waitForNewAuditor(auditor); - bs.remove(auditor); - shutdownBookie(newAuditor1); BookieServer newAuditor2 = waitForNewAuditor(newAuditor1); assertNotSame( "Auditor re-election is not happened for auditor failure!", auditor, newAuditor2); - bs.remove(newAuditor1); } /** @@ -161,14 +157,12 @@ public void testShutdown() throws Exception { assertNotSame( "Auditor re-election is not happened for auditor failure!", auditor, newAuditor); - int indexOfDownBookie = bs.indexOf(auditor); - bs.remove(indexOfDownBookie); - bsConfs.remove(indexOfDownBookie); + List children = zkc.getChildren(electionPath, false); for (String child : children) { byte[] data = zkc.getData(electionPath + '/' + child, false, null); String bookieIP = new String(data); - String addr = auditor.getLocalAddress().toString(); + String addr = auditor.getBookieId().toString(); assertFalse("AuditorElection cleanup fails", bookieIP .contains(addr)); } @@ -182,20 +176,18 @@ public void testShutdown() throws Exception { public void testRestartAuditorBookieAfterCrashing() throws Exception { BookieServer auditor = verifyAuditor(); - shutdownBookie(auditor); - String addr = auditor.getLocalAddress().toString(); + String addr = auditor.getBookieId().toString(); // restarting Bookie with same configurations. - int indexOfDownBookie = bs.indexOf(auditor); - ServerConfiguration serverConfiguration = bsConfs - .get(indexOfDownBookie); - bs.remove(indexOfDownBookie); - bsConfs.remove(indexOfDownBookie); + ServerConfiguration serverConfiguration = shutdownBookie(auditor); + auditorElectors.remove(addr); startBookie(serverConfiguration); // starting corresponding auditor elector - LOG.debug("Performing Auditor Election:" + addr); + if (LOG.isDebugEnabled()) { + LOG.debug("Performing Auditor Election:" + addr); + } startAuditorElector(addr); // waiting for new auditor to come @@ -204,35 +196,31 @@ public void testRestartAuditorBookieAfterCrashing() throws Exception { "Auditor re-election is not happened for auditor failure!", auditor, newAuditor); assertFalse("No relection after old auditor rejoins", auditor - .getLocalAddress().getPort() == newAuditor.getLocalAddress() - .getPort()); + .getBookieId().equals(newAuditor.getBookieId())); } private void startAuditorElector(String addr) throws Exception { - ZooKeeper zk = ZooKeeperClient.newBuilder() - .connectString(zkUtil.getZooKeeperConnectString()) - .sessionTimeoutMs(10000) - .build(); - zkClients.add(zk); - AuditorElector auditorElector = new AuditorElector(addr, - baseConf, zk); + baseConf); auditorElectors.put(addr, auditorElector); auditorElector.start(); - LOG.debug("Starting Auditor Elector"); + if (LOG.isDebugEnabled()) { + LOG.debug("Starting Auditor Elector"); + } } private void startAuditorElectors() throws Exception { - for (BookieServer bserver : bs) { - String addr = bserver.getLocalAddress().toString(); - startAuditorElector(addr); + for (BookieId addr : bookieAddresses()) { + startAuditorElector(addr.toString()); } } private void stopAuditorElectors() throws Exception { for (AuditorElector auditorElector : auditorElectors.values()) { auditorElector.shutdown(); - LOG.debug("Stopping Auditor Elector!"); + if (LOG.isDebugEnabled()) { + LOG.debug("Stopping Auditor Elector!"); + } } } @@ -240,7 +228,9 @@ private BookieServer verifyAuditor() throws Exception { List auditors = getAuditorBookie(); assertEquals("Multiple Bookies acting as Auditor!", 1, auditors .size()); - LOG.debug("Bookie running as Auditor:" + auditors.get(0)); + if (LOG.isDebugEnabled()) { + LOG.debug("Bookie running as Auditor:" + auditors.get(0)); + } return auditors.get(0); } @@ -248,22 +238,28 @@ private List getAuditorBookie() throws Exception { List auditors = new LinkedList(); byte[] data = zkc.getData(electionPath, false, null); assertNotNull("Auditor election failed", data); - for (BookieServer bks : bs) { - if (new String(data).contains(bks.getLocalAddress().getPort() + "")) { + for (int i = 0; i < bookieCount(); i++) { + BookieServer bks = serverByIndex(i); + if (new String(data).contains(bks.getBookieId() + "")) { auditors.add(bks); } } return auditors; } - private void shutdownBookie(BookieServer bkServer) throws Exception { - String addr = bkServer.getLocalAddress().toString(); - LOG.debug("Shutting down bookie:" + addr); + private ServerConfiguration shutdownBookie(BookieServer bkServer) throws Exception { + int index = indexOfServer(bkServer); + String addr = addressByIndex(index).toString(); + if (LOG.isDebugEnabled()) { + LOG.debug("Shutting down bookie:" + addr); + } // shutdown bookie which is an auditor - bkServer.shutdown(); + ServerConfiguration conf = killBookie(index); + // stopping corresponding auditor elector auditorElectors.get(addr).shutdown(); + return conf; } private BookieServer waitForNewAuditor(BookieServer auditor) @@ -271,12 +267,15 @@ private BookieServer waitForNewAuditor(BookieServer auditor) BookieServer newAuditor = null; int retryCount = 8; while (retryCount > 0) { - List auditors = getAuditorBookie(); - if (auditors.size() > 0) { - newAuditor = auditors.get(0); - if (auditor != newAuditor) { - break; + try { + List auditors = getAuditorBookie(); + if (auditors.size() > 0) { + newAuditor = auditors.get(0); + if (auditor != newAuditor) { + break; + } } + } catch (Exception ignore) { } Thread.sleep(500); retryCount--; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorCheckAllLedgersTaskTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorCheckAllLedgersTaskTest.java new file mode 100644 index 00000000000..adac608e1c0 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorCheckAllLedgersTaskTest.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.replication; + +import static org.apache.bookkeeper.replication.ReplicationStats.AUDITOR_SCOPE; +import static org.junit.Assert.assertEquals; + +import java.util.LinkedList; +import java.util.List; +import org.apache.bookkeeper.client.BookKeeper; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.client.LedgerHandle; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.LedgerManagerFactory; +import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.apache.bookkeeper.test.TestStatsProvider; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Unit test {@link AuditorCheckAllLedgersTask}. + */ +public class AuditorCheckAllLedgersTaskTest extends BookKeeperClusterTestCase { + private static final Logger LOG = LoggerFactory + .getLogger(AuditorCheckAllLedgersTaskTest.class); + + private static final int maxNumberOfConcurrentOpenLedgerOperations = 500; + private static final int acquireConcurrentOpenLedgerOperationsTimeoutMSec = 120000; + + private BookKeeperAdmin admin; + private LedgerManager ledgerManager; + private LedgerUnderreplicationManager ledgerUnderreplicationManager; + + public AuditorCheckAllLedgersTaskTest() { + super(3); + baseConf.setPageLimit(1); + baseConf.setAutoRecoveryDaemonEnabled(false); + } + + @Override + public void setUp() throws Exception { + super.setUp(); + final BookKeeper bookKeeper = new BookKeeper(baseClientConf); + admin = new BookKeeperAdmin(bookKeeper, NullStatsLogger.INSTANCE, new ClientConfiguration(baseClientConf)); + LedgerManagerFactory ledgerManagerFactory = bookKeeper.getLedgerManagerFactory(); + ledgerManager = ledgerManagerFactory.newLedgerManager(); + ledgerUnderreplicationManager = ledgerManagerFactory.newLedgerUnderreplicationManager(); + baseConf.setAuditorMaxNumberOfConcurrentOpenLedgerOperations(maxNumberOfConcurrentOpenLedgerOperations); + baseConf.setAuditorAcquireConcurrentOpenLedgerOperationsTimeoutMSec( + acquireConcurrentOpenLedgerOperationsTimeoutMSec); + } + + @Test + public void testCheckAllLedgers() throws Exception { + // 1. create ledgers + final int numLedgers = 10; + List ids = new LinkedList(); + for (int i = 0; i < numLedgers; i++) { + LedgerHandle lh = bkc.createLedger(3, 3, BookKeeper.DigestType.CRC32, "passwd".getBytes()); + ids.add(lh.getId()); + for (int j = 0; j < 2; j++) { + lh.addEntry("testdata".getBytes()); + } + lh.close(); + } + + // 2. init CheckAllLedgersTask + final TestStatsProvider statsProvider = new TestStatsProvider(); + final TestStatsProvider.TestStatsLogger statsLogger = statsProvider.getStatsLogger(AUDITOR_SCOPE); + final AuditorStats auditorStats = new AuditorStats(statsLogger); + + AuditorCheckAllLedgersTask auditorCheckAllLedgersTask = new AuditorCheckAllLedgersTask( + baseConf, auditorStats, admin, ledgerManager, + ledgerUnderreplicationManager, null, (flag, throwable) -> flag.set(false)); + + // 3. checkAllLedgers + auditorCheckAllLedgersTask.runTask(); + + // 4. verify + assertEquals("CHECK_ALL_LEDGERS_TIME", 1, ((TestStatsProvider.TestOpStatsLogger) statsLogger + .getOpStatsLogger(ReplicationStats.CHECK_ALL_LEDGERS_TIME)).getSuccessCount()); + assertEquals("NUM_LEDGERS_CHECKED", numLedgers, + (long) statsLogger.getCounter(ReplicationStats.NUM_LEDGERS_CHECKED).get()); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorLedgerCheckerTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorLedgerCheckerTest.java index 65c69226f02..2e3e09012fb 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorLedgerCheckerTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorLedgerCheckerTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -45,28 +45,31 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; import lombok.Cleanup; -import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.BookieImpl; import org.apache.bookkeeper.client.AsyncCallback.AddCallback; import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.client.LedgerHandle; -import org.apache.bookkeeper.client.LedgerMetadata; +import org.apache.bookkeeper.client.LedgerMetadataBuilder; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.common.util.OrderedScheduler; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.meta.LedgerManager; import org.apache.bookkeeper.meta.MetadataClientDriver; import org.apache.bookkeeper.meta.MetadataDrivers; +import org.apache.bookkeeper.meta.UnderreplicatedLedger; import org.apache.bookkeeper.meta.ZkLedgerUnderreplicationManager; import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.proto.BookieServer; -import org.apache.bookkeeper.proto.DataFormats.UnderreplicatedLedgerFormat; import org.apache.bookkeeper.replication.ReplicationException.CompatibilityException; import org.apache.bookkeeper.replication.ReplicationException.UnavailableException; import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; -import org.apache.commons.lang.mutable.MutableInt; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher; @@ -129,6 +132,7 @@ public void setUp() throws Exception { + "/underreplication/auditorelection"; urLedgerMgr = new ZkLedgerUnderreplicationManager(baseClientConf, zkc); + urLedgerMgr.setCheckAllLedgersCTime(System.currentTimeMillis()); startAuditorElectors(); rng = new Random(System.currentTimeMillis()); // Initialize the Random urLedgerList = new HashSet(); @@ -144,20 +148,23 @@ public void tearDown() throws Exception { } private void startAuditorElectors() throws Exception { - for (BookieServer bserver : bs) { - String addr = bserver.getLocalAddress().toString(); - AuditorElector auditorElector = new AuditorElector(addr, - baseConf, zkc); + for (String addr : bookieAddresses().stream().map(Object::toString) + .collect(Collectors.toList())) { + AuditorElector auditorElector = new AuditorElector(addr, baseConf); auditorElectors.put(addr, auditorElector); auditorElector.start(); - LOG.debug("Starting Auditor Elector"); + if (LOG.isDebugEnabled()) { + LOG.debug("Starting Auditor Elector"); + } } } private void stopAuditorElectors() throws Exception { for (AuditorElector auditorElector : auditorElectors.values()) { auditorElector.shutdown(); - LOG.debug("Stopping Auditor Elector!"); + if (LOG.isDebugEnabled()) { + LOG.debug("Stopping Auditor Elector!"); + } } } @@ -168,18 +175,22 @@ private void stopAuditorElectors() throws Exception { public void testSimpleLedger() throws Exception { LedgerHandle lh1 = createAndAddEntriesToLedger(); Long ledgerId = lh1.getId(); - LOG.debug("Created ledger : " + ledgerId); + if (LOG.isDebugEnabled()) { + LOG.debug("Created ledger : " + ledgerId); + } ledgerList.add(ledgerId); lh1.close(); final CountDownLatch underReplicaLatch = registerUrLedgerWatcher(ledgerList .size()); - int bkShutdownIndex = bs.size() - 1; + int bkShutdownIndex = lastBookieIndex(); String shutdownBookie = shutdownBookie(bkShutdownIndex); // grace period for publishing the bk-ledger - LOG.debug("Waiting for ledgers to be marked as under replicated"); + if (LOG.isDebugEnabled()) { + LOG.debug("Waiting for ledgers to be marked as under replicated"); + } waitForAuditToComplete(); underReplicaLatch.await(5, TimeUnit.SECONDS); Map urLedgerData = getUrLedgerData(urLedgerList); @@ -208,14 +219,16 @@ public void testRestartBookie() throws Exception { LedgerHandle lh1 = createAndAddEntriesToLedger(); LedgerHandle lh2 = createAndAddEntriesToLedger(); - LOG.debug("Created following ledgers : {}, {}", lh1, lh2); + if (LOG.isDebugEnabled()) { + LOG.debug("Created following ledgers : {}, {}", lh1, lh2); + } - int bkShutdownIndex = bs.size() - 1; - ServerConfiguration bookieConf1 = bsConfs.get(bkShutdownIndex); + int bkShutdownIndex = lastBookieIndex(); + ServerConfiguration bookieConf1 = confByIndex(bkShutdownIndex); String shutdownBookie = shutdownBookie(bkShutdownIndex); // restart the failed bookie - bs.add(startBookie(bookieConf1)); + startAndAddBookie(bookieConf1); waitForLedgerMissingReplicas(lh1.getId(), 10, shutdownBookie); waitForLedgerMissingReplicas(lh2.getId(), 10, shutdownBookie); @@ -230,16 +243,18 @@ public void testMultipleBookieFailures() throws Exception { LedgerHandle lh1 = createAndAddEntriesToLedger(); // failing first bookie - shutdownBookie(bs.size() - 1); + shutdownBookie(lastBookieIndex()); // simulate re-replication doLedgerRereplication(lh1.getId()); // failing another bookie - String shutdownBookie = shutdownBookie(bs.size() - 1); + String shutdownBookie = shutdownBookie(lastBookieIndex()); // grace period for publishing the bk-ledger - LOG.debug("Waiting for ledgers to be marked as under replicated"); + if (LOG.isDebugEnabled()) { + LOG.debug("Waiting for ledgers to be marked as under replicated"); + } assertTrue("Ledger should be missing second replica", waitForLedgerMissingReplicas(lh1.getId(), 10, shutdownBookie)); } @@ -248,7 +263,9 @@ public void testMultipleBookieFailures() throws Exception { public void testToggleLedgerReplication() throws Exception { LedgerHandle lh1 = createAndAddEntriesToLedger(); ledgerList.add(lh1.getId()); - LOG.debug("Created following ledgers : " + ledgerList); + if (LOG.isDebugEnabled()) { + LOG.debug("Created following ledgers : " + ledgerList); + } // failing another bookie CountDownLatch urReplicaLatch = registerUrLedgerWatcher(ledgerList @@ -257,8 +274,8 @@ public void testToggleLedgerReplication() throws Exception { // disabling ledger replication urLedgerMgr.disableLedgerReplication(); ArrayList shutdownBookieList = new ArrayList(); - shutdownBookieList.add(shutdownBookie(bs.size() - 1)); - shutdownBookieList.add(shutdownBookie(bs.size() - 1)); + shutdownBookieList.add(shutdownBookie(lastBookieIndex())); + shutdownBookieList.add(shutdownBookie(lastBookieIndex())); assertFalse("Ledger replication is not disabled!", urReplicaLatch .await(1, TimeUnit.SECONDS)); @@ -297,20 +314,26 @@ public void testDuplicateEnDisableAutoRecovery() throws Exception { public void testReadOnlyBookieExclusionFromURLedgersCheck() throws Exception { LedgerHandle lh = createAndAddEntriesToLedger(); ledgerList.add(lh.getId()); - LOG.debug("Created following ledgers : " + ledgerList); + if (LOG.isDebugEnabled()) { + LOG.debug("Created following ledgers : " + ledgerList); + } int count = ledgerList.size(); final CountDownLatch underReplicaLatch = registerUrLedgerWatcher(count); final int bkIndex = 2; - ServerConfiguration bookieConf = bsConfs.get(bkIndex); - BookieServer bk = bs.get(bkIndex); + ServerConfiguration bookieConf = confByIndex(bkIndex); + BookieServer bk = serverByIndex(bkIndex); bookieConf.setReadOnlyModeEnabled(true); - bk.getBookie().getStateManager().doTransitionToReadOnlyMode(); - bkc.waitForReadOnlyBookie(Bookie.getBookieAddress(bsConfs.get(bkIndex))).get(30, TimeUnit.SECONDS); + + ((BookieImpl) bk.getBookie()).getStateManager().doTransitionToReadOnlyMode(); + bkc.waitForReadOnlyBookie(BookieImpl.getBookieId(confByIndex(bkIndex))) + .get(30, TimeUnit.SECONDS); // grace period for publishing the bk-ledger - LOG.debug("Waiting for Auditor to finish ledger check."); + if (LOG.isDebugEnabled()) { + LOG.debug("Waiting for Auditor to finish ledger check."); + } waitForAuditToComplete(); assertFalse("latch should not have completed", underReplicaLatch.await(5, TimeUnit.SECONDS)); } @@ -323,28 +346,38 @@ public void testReadOnlyBookieShutdown() throws Exception { LedgerHandle lh = createAndAddEntriesToLedger(); long ledgerId = lh.getId(); ledgerList.add(ledgerId); - LOG.debug("Created following ledgers : " + ledgerList); + if (LOG.isDebugEnabled()) { + LOG.debug("Created following ledgers : " + ledgerList); + } int count = ledgerList.size(); final CountDownLatch underReplicaLatch = registerUrLedgerWatcher(count); - int bkIndex = bs.size() - 1; - LOG.debug("Moving bookie {} {} to read only...", bkIndex, bs.get(bkIndex)); - ServerConfiguration bookieConf = bsConfs.get(bkIndex); - BookieServer bk = bs.get(bkIndex); + int bkIndex = lastBookieIndex(); + if (LOG.isDebugEnabled()) { + LOG.debug("Moving bookie {} {} to read only...", bkIndex, serverByIndex(bkIndex)); + } + ServerConfiguration bookieConf = confByIndex(bkIndex); + BookieServer bk = serverByIndex(bkIndex); bookieConf.setReadOnlyModeEnabled(true); - bk.getBookie().getStateManager().doTransitionToReadOnlyMode(); - bkc.waitForReadOnlyBookie(Bookie.getBookieAddress(bsConfs.get(bkIndex))).get(30, TimeUnit.SECONDS); + + ((BookieImpl) bk.getBookie()).getStateManager().doTransitionToReadOnlyMode(); + bkc.waitForReadOnlyBookie(BookieImpl.getBookieId(confByIndex(bkIndex))) + .get(30, TimeUnit.SECONDS); // grace period for publishing the bk-ledger - LOG.debug("Waiting for Auditor to finish ledger check."); + if (LOG.isDebugEnabled()) { + LOG.debug("Waiting for Auditor to finish ledger check."); + } waitForAuditToComplete(); assertFalse("latch should not have completed", underReplicaLatch.await(1, TimeUnit.SECONDS)); String shutdownBookie = shutdownBookie(bkIndex); // grace period for publishing the bk-ledger - LOG.debug("Waiting for ledgers to be marked as under replicated"); + if (LOG.isDebugEnabled()) { + LOG.debug("Waiting for ledgers to be marked as under replicated"); + } waitForAuditToComplete(); underReplicaLatch.await(5, TimeUnit.SECONDS); Map urLedgerData = getUrLedgerData(urLedgerList); @@ -364,7 +397,9 @@ public void testReadOnlyBookieShutdown() throws Exception { public void testInnerDelayedAuditOfLostBookies() throws Exception { LedgerHandle lh1 = createAndAddEntriesToLedger(); Long ledgerId = lh1.getId(); - LOG.debug("Created ledger : " + ledgerId); + if (LOG.isDebugEnabled()) { + LOG.debug("Created ledger : " + ledgerId); + } ledgerList.add(ledgerId); lh1.close(); @@ -375,9 +410,20 @@ public void testInnerDelayedAuditOfLostBookies() throws Exception { urLedgerMgr.setLostBookieRecoveryDelay(5); // shutdown a non auditor bookie; choosing non-auditor to avoid another election - String shutdownBookie = shutDownNonAuditorBookie(); + AtomicReference shutdownBookieRef = new AtomicReference<>(); + CountDownLatch shutdownLatch = new CountDownLatch(1); + new Thread(() -> { + try { + String shutdownBookie = shutDownNonAuditorBookie(); + shutdownBookieRef.set(shutdownBookie); + shutdownLatch.countDown(); + } catch (Exception ignore) { + } + }).start(); - LOG.debug("Waiting for ledgers to be marked as under replicated"); + if (LOG.isDebugEnabled()) { + LOG.debug("Waiting for ledgers to be marked as under replicated"); + } assertFalse("audit of lost bookie isn't delayed", underReplicaLatch.await(4, TimeUnit.SECONDS)); assertEquals("under replicated ledgers identified when it was not expected", 0, urLedgerList.size()); @@ -389,9 +435,10 @@ public void testInnerDelayedAuditOfLostBookies() throws Exception { urLedgerList.contains(ledgerId)); Map urLedgerData = getUrLedgerData(urLedgerList); String data = urLedgerData.get(ledgerId); - assertTrue("Bookie " + shutdownBookie + shutdownLatch.await(); + assertTrue("Bookie " + shutdownBookieRef.get() + "is not listed in the ledger as missing replica :" + data, - data.contains(shutdownBookie)); + data.contains(shutdownBookieRef.get())); } /** @@ -437,7 +484,9 @@ public void testRescheduleOfDelayedAuditOfLostBookiesToStartImmediately() throws LedgerHandle lh1 = createAndAddEntriesToLedger(); Long ledgerId = lh1.getId(); - LOG.debug("Created ledger : " + ledgerId); + if (LOG.isDebugEnabled()) { + LOG.debug("Created ledger : " + ledgerId); + } ledgerList.add(ledgerId); lh1.close(); @@ -448,9 +497,20 @@ public void testRescheduleOfDelayedAuditOfLostBookiesToStartImmediately() throws urLedgerMgr.setLostBookieRecoveryDelay(50); // shutdown a non auditor bookie; choosing non-auditor to avoid another election - String shutdownBookie = shutDownNonAuditorBookie(); + AtomicReference shutdownBookieRef = new AtomicReference<>(); + CountDownLatch shutdownLatch = new CountDownLatch(1); + new Thread(() -> { + try { + String shutdownBookie = shutDownNonAuditorBookie(); + shutdownBookieRef.set(shutdownBookie); + shutdownLatch.countDown(); + } catch (Exception ignore) { + } + }).start(); - LOG.debug("Waiting for ledgers to be marked as under replicated"); + if (LOG.isDebugEnabled()) { + LOG.debug("Waiting for ledgers to be marked as under replicated"); + } assertFalse("audit of lost bookie isn't delayed", underReplicaLatch.await(4, TimeUnit.SECONDS)); assertEquals("under replicated ledgers identified when it was not expected", 0, urLedgerList.size()); @@ -465,9 +525,10 @@ public void testRescheduleOfDelayedAuditOfLostBookiesToStartImmediately() throws urLedgerList.contains(ledgerId)); Map urLedgerData = getUrLedgerData(urLedgerList); String data = urLedgerData.get(ledgerId); - assertTrue("Bookie " + shutdownBookie + shutdownLatch.await(); + assertTrue("Bookie " + shutdownBookieRef.get() + "is not listed in the ledger as missing replica :" + data, - data.contains(shutdownBookie)); + data.contains(shutdownBookieRef.get())); } @Test @@ -477,7 +538,9 @@ public void testRescheduleOfDelayedAuditOfLostBookiesToStartLater() throws Excep LedgerHandle lh1 = createAndAddEntriesToLedger(); Long ledgerId = lh1.getId(); - LOG.debug("Created ledger : " + ledgerId); + if (LOG.isDebugEnabled()) { + LOG.debug("Created ledger : " + ledgerId); + } ledgerList.add(ledgerId); lh1.close(); @@ -488,9 +551,20 @@ public void testRescheduleOfDelayedAuditOfLostBookiesToStartLater() throws Excep urLedgerMgr.setLostBookieRecoveryDelay(3); // shutdown a non auditor bookie; choosing non-auditor to avoid another election - String shutdownBookie = shutDownNonAuditorBookie(); + AtomicReference shutdownBookieRef = new AtomicReference<>(); + CountDownLatch shutdownLatch = new CountDownLatch(1); + new Thread(() -> { + try { + String shutdownBookie = shutDownNonAuditorBookie(); + shutdownBookieRef.set(shutdownBookie); + shutdownLatch.countDown(); + } catch (Exception ignore) { + } + }).start(); - LOG.debug("Waiting for ledgers to be marked as under replicated"); + if (LOG.isDebugEnabled()) { + LOG.debug("Waiting for ledgers to be marked as under replicated"); + } assertFalse("audit of lost bookie isn't delayed", underReplicaLatch.await(2, TimeUnit.SECONDS)); assertEquals("under replicated ledgers identified when it was not expected", 0, urLedgerList.size()); @@ -499,7 +573,9 @@ public void testRescheduleOfDelayedAuditOfLostBookiesToStartLater() throws Excep urLedgerMgr.setLostBookieRecoveryDelay(4); // since we changed the BookieRecoveryDelay period to 4, the audittask shouldn't have been executed - LOG.debug("Waiting for ledgers to be marked as under replicated"); + if (LOG.isDebugEnabled()) { + LOG.debug("Waiting for ledgers to be marked as under replicated"); + } assertFalse("audit of lost bookie isn't delayed", underReplicaLatch.await(2, TimeUnit.SECONDS)); assertEquals("under replicated ledgers identified when it was not expected", 0, urLedgerList.size()); @@ -510,9 +586,10 @@ public void testRescheduleOfDelayedAuditOfLostBookiesToStartLater() throws Excep urLedgerList.contains(ledgerId)); Map urLedgerData = getUrLedgerData(urLedgerList); String data = urLedgerData.get(ledgerId); - assertTrue("Bookie " + shutdownBookie + shutdownLatch.await(); + assertTrue("Bookie " + shutdownBookieRef.get() + "is not listed in the ledger as missing replica :" + data, - data.contains(shutdownBookie)); + data.contains(shutdownBookieRef.get())); } @Test @@ -546,29 +623,23 @@ public void testTriggerAuditorWithNoPendingAuditTask() throws Exception { int numofledgers = 5; Random rand = new Random(); for (int i = 0; i < numofledgers; i++) { - LedgerMetadata metadata = new LedgerMetadata(3, 2, 2, DigestType.CRC32, "passwd".getBytes()); - ArrayList ensemble = new ArrayList(); - ensemble.add(new BookieSocketAddress("99.99.99.99:9999")); - ensemble.add(new BookieSocketAddress("11.11.11.11:1111")); - ensemble.add(new BookieSocketAddress("88.88.88.88:8888")); - metadata.addEnsemble(0, ensemble); - - MutableInt ledgerCreateRC = new MutableInt(-1); - CountDownLatch latch = new CountDownLatch(1); + ArrayList ensemble = new ArrayList(); + ensemble.add(new BookieSocketAddress("99.99.99.99:9999").toBookieId()); + ensemble.add(new BookieSocketAddress("11.11.11.11:1111").toBookieId()); + ensemble.add(new BookieSocketAddress("88.88.88.88:8888").toBookieId()); + long ledgerId = (Math.abs(rand.nextLong())) % 100000000; + LedgerMetadata metadata = LedgerMetadataBuilder.create() + .withId(ledgerId) + .withEnsembleSize(3).withWriteQuorumSize(2).withAckQuorumSize(2) + .withPassword("passwd".getBytes()) + .withDigestType(DigestType.CRC32.toApiDigestType()) + .newEnsembleEntry(0L, ensemble).build(); + try (LedgerManager lm = driver.getLedgerManagerFactory().newLedgerManager()) { - lm.createLedgerMetadata(ledgerId, metadata, - (rc, result) -> { - ledgerCreateRC.setValue(rc); - latch.countDown(); - }); + lm.createLedgerMetadata(ledgerId, metadata).get(2000, TimeUnit.MILLISECONDS); } - - Assert.assertTrue("Ledger creation should complete within 2 secs", - latch.await(2000, TimeUnit.MILLISECONDS)); - Assert.assertEquals("LedgerCreate should succeed and return OK rc value", BKException.Code.OK, - ledgerCreateRC.getValue()); ledgerList.add(ledgerId); } @@ -593,7 +664,9 @@ public void testTriggerAuditorWithPendingAuditTask() throws Exception { Auditor auditorBookiesAuditor = getAuditorBookiesAuditor(); LedgerHandle lh1 = createAndAddEntriesToLedger(); Long ledgerId = lh1.getId(); - LOG.debug("Created ledger : " + ledgerId); + if (LOG.isDebugEnabled()) { + LOG.debug("Created ledger : " + ledgerId); + } ledgerList.add(ledgerId); lh1.close(); @@ -605,9 +678,16 @@ public void testTriggerAuditorWithPendingAuditTask() throws Exception { urLedgerMgr.setLostBookieRecoveryDelay(lostBookieRecoveryDelay); // shutdown a non auditor bookie; choosing non-auditor to avoid another election - String shutdownBookie = shutDownNonAuditorBookie(); + new Thread(() -> { + try { + shutDownNonAuditorBookie(); + } catch (Exception ignore) { + } + }).start(); - LOG.debug("Waiting for ledgers to be marked as under replicated"); + if (LOG.isDebugEnabled()) { + LOG.debug("Waiting for ledgers to be marked as under replicated"); + } assertFalse("audit of lost bookie isn't delayed", underReplicaLatch.await(2, TimeUnit.SECONDS)); assertEquals("under replicated ledgers identified when it was not expected", 0, urLedgerList.size()); @@ -640,7 +720,9 @@ public void testTriggerAuditorBySettingDelayToZeroWithPendingAuditTask() throws Auditor auditorBookiesAuditor = getAuditorBookiesAuditor(); LedgerHandle lh1 = createAndAddEntriesToLedger(); Long ledgerId = lh1.getId(); - LOG.debug("Created ledger : " + ledgerId); + if (LOG.isDebugEnabled()) { + LOG.debug("Created ledger : " + ledgerId); + } ledgerList.add(ledgerId); lh1.close(); @@ -652,9 +734,16 @@ public void testTriggerAuditorBySettingDelayToZeroWithPendingAuditTask() throws urLedgerMgr.setLostBookieRecoveryDelay(lostBookieRecoveryDelay); // shutdown a non auditor bookie; choosing non-auditor to avoid another election - String shutdownBookie = shutDownNonAuditorBookie(); + new Thread(() -> { + try { + shutDownNonAuditorBookie(); + } catch (Exception ignore) { + } + }).start(); - LOG.debug("Waiting for ledgers to be marked as under replicated"); + if (LOG.isDebugEnabled()) { + LOG.debug("Waiting for ledgers to be marked as under replicated"); + } assertFalse("audit of lost bookie isn't delayed", underReplicaLatch.await(2, TimeUnit.SECONDS)); assertEquals("under replicated ledgers identified when it was not expected", 0, urLedgerList.size()); @@ -691,7 +780,9 @@ public void testDelayedAuditWithMultipleBookieFailures() throws Exception { // create a ledger with a bunch of entries LedgerHandle lh1 = createAndAddEntriesToLedger(); Long ledgerId = lh1.getId(); - LOG.debug("Created ledger : " + ledgerId); + if (LOG.isDebugEnabled()) { + LOG.debug("Created ledger : " + ledgerId); + } ledgerList.add(ledgerId); lh1.close(); @@ -700,8 +791,17 @@ public void testDelayedAuditWithMultipleBookieFailures() throws Exception { // wait for 10 seconds before starting the recovery work when a bookie fails urLedgerMgr.setLostBookieRecoveryDelay(10); - // shutdown a non auditor bookie to avoid an election - String shutdownBookie1 = shutDownNonAuditorBookie(); + // shutdown a non auditor bookie; choosing non-auditor to avoid another election + AtomicReference shutdownBookieRef1 = new AtomicReference<>(); + CountDownLatch shutdownLatch1 = new CountDownLatch(1); + new Thread(() -> { + try { + String shutdownBookie1 = shutDownNonAuditorBookie(); + shutdownBookieRef1.set(shutdownBookie1); + shutdownLatch1.countDown(); + } catch (Exception ignore) { + } + }).start(); // wait for 3 seconds and there shouldn't be any under replicated ledgers // because we have delayed the start of audit by 10 seconds @@ -713,7 +813,16 @@ public void testDelayedAuditWithMultipleBookieFailures() throws Exception { // the history about having delayed recovery remains. Hence we make sure // we bring down a non auditor bookie. This should cause the audit to take // place immediately and not wait for the remaining 7 seconds to elapse - String shutdownBookie2 = shutDownNonAuditorBookie(); + AtomicReference shutdownBookieRef2 = new AtomicReference<>(); + CountDownLatch shutdownLatch2 = new CountDownLatch(1); + new Thread(() -> { + try { + String shutdownBookie2 = shutDownNonAuditorBookie(); + shutdownBookieRef2.set(shutdownBookie2); + shutdownLatch2.countDown(); + } catch (Exception ignore) { + } + }).start(); // 2 second grace period for the ledgers to get reported as under replicated Thread.sleep(2000); @@ -726,9 +835,11 @@ public void testDelayedAuditWithMultipleBookieFailures() throws Exception { urLedgerList.contains(ledgerId)); Map urLedgerData = getUrLedgerData(urLedgerList); String data = urLedgerData.get(ledgerId); - assertTrue("Bookie " + shutdownBookie1 + shutdownBookie2 + shutdownLatch1.await(); + shutdownLatch2.await(); + assertTrue("Bookie " + shutdownBookieRef1.get() + shutdownBookieRef2.get() + " are not listed in the ledger as missing replicas :" + data, - data.contains(shutdownBookie1) && data.contains(shutdownBookie2)); + data.contains(shutdownBookieRef1.get()) && data.contains(shutdownBookieRef2.get())); } /** @@ -744,7 +855,9 @@ public void testDelayedAuditWithRollingUpgrade() throws Exception { // create a ledger with a bunch of entries LedgerHandle lh1 = createAndAddEntriesToLedger(); Long ledgerId = lh1.getId(); - LOG.debug("Created ledger : " + ledgerId); + if (LOG.isDebugEnabled()) { + LOG.debug("Created ledger : " + ledgerId); + } ledgerList.add(ledgerId); lh1.close(); @@ -755,8 +868,18 @@ public void testDelayedAuditWithRollingUpgrade() throws Exception { // shutdown a non auditor bookie to avoid an election int idx1 = getShutDownNonAuditorBookieIdx(""); - ServerConfiguration conf1 = bsConfs.get(idx1); - String shutdownBookie1 = shutdownBookie(idx1); + ServerConfiguration conf1 = confByIndex(idx1); + + AtomicReference shutdownBookieRef1 = new AtomicReference<>(); + CountDownLatch shutdownLatch1 = new CountDownLatch(1); + new Thread(() -> { + try { + String shutdownBookie1 = shutdownBookie(idx1); + shutdownBookieRef1.set(shutdownBookie1); + shutdownLatch1.countDown(); + } catch (Exception ignore) { + } + }).start(); // wait for 2 seconds and there shouldn't be any under replicated ledgers // because we have delayed the start of audit by 5 seconds @@ -765,12 +888,21 @@ public void testDelayedAuditWithRollingUpgrade() throws Exception { urLedgerList.size()); // restart the bookie we shut down above - bs.add(startBookie(conf1)); + startAndAddBookie(conf1); // Now to simulate the rolling upgrade, bring down a bookie different from // the one we brought down/up above. - String shutdownBookie2 = shutDownNonAuditorBookie(shutdownBookie1); - + // shutdown a non auditor bookie; choosing non-auditor to avoid another election + AtomicReference shutdownBookieRef2 = new AtomicReference<>(); + CountDownLatch shutdownLatch2 = new CountDownLatch(1); + new Thread(() -> { + try { + String shutdownBookie2 = shutDownNonAuditorBookie(); + shutdownBookieRef2.set(shutdownBookie2); + shutdownLatch2.countDown(); + } catch (Exception ignore) { + } + }).start(); // since the first bookie that was brought down/up has come up, there is only // one bookie down at this time. Hence the lost bookie check shouldn't start // immediately; it will start 5 seconds after the second bookie went down @@ -787,11 +919,13 @@ public void testDelayedAuditWithRollingUpgrade() throws Exception { urLedgerList.contains(ledgerId)); Map urLedgerData = getUrLedgerData(urLedgerList); String data = urLedgerData.get(ledgerId); - assertTrue("Bookie " + shutdownBookie1 + "wrongly listed as missing the ledger: " + data, - !data.contains(shutdownBookie1)); - assertTrue("Bookie " + shutdownBookie2 + shutdownLatch1.await(); + shutdownLatch2.await(); + assertTrue("Bookie " + shutdownBookieRef1.get() + "wrongly listed as missing the ledger: " + data, + !data.contains(shutdownBookieRef1.get())); + assertTrue("Bookie " + shutdownBookieRef2.get() + " is not listed in the ledger as missing replicas :" + data, - data.contains(shutdownBookie2)); + data.contains(shutdownBookieRef2.get())); LOG.info("*****************Test Complete"); } @@ -816,7 +950,7 @@ private boolean waitForLedgerMissingReplicas(Long ledgerId, long secondsToWait, throws Exception { for (int i = 0; i < secondsToWait; i++) { try { - UnderreplicatedLedgerFormat data = urLedgerMgr.getLedgerUnreplicationInfo(ledgerId); + UnderreplicatedLedger data = urLedgerMgr.getLedgerUnreplicationInfo(ledgerId); boolean all = true; for (String r : replicas) { all = all && data.getReplicaList().contains(r); @@ -855,9 +989,11 @@ private void doLedgerRereplication(Long... ledgerIds) } private String shutdownBookie(int bkShutdownIndex) throws Exception { - BookieServer bkServer = bs.get(bkShutdownIndex); - String bookieAddr = bkServer.getLocalAddress().toString(); - LOG.debug("Shutting down bookie:" + bookieAddr); + BookieServer bkServer = serverByIndex(bkShutdownIndex); + String bookieAddr = bkServer.getBookieId().toString(); + if (LOG.isDebugEnabled()) { + LOG.debug("Shutting down bookie:" + bookieAddr); + } killBookie(bkShutdownIndex); auditorElectors.get(bookieAddr).shutdown(); auditorElectors.remove(bookieAddr); @@ -925,7 +1061,9 @@ public void process(WatchedEvent event) { urLedgerList.add(ledgerId); } } - LOG.debug("Count down and waiting for next notification"); + if (LOG.isDebugEnabled()) { + LOG.debug("Count down and waiting for next notification"); + } // count down and waiting for next notification underReplicaLatch.countDown(); } @@ -935,9 +1073,10 @@ private BookieServer getAuditorBookie() throws Exception { List auditors = new LinkedList(); byte[] data = zkc.getData(electionPath, false, null); assertNotNull("Auditor election failed", data); - for (BookieServer bks : bs) { - if (new String(data).contains(bks.getLocalAddress().getPort() + "")) { - auditors.add(bks); + for (int i = 0; i < bookieCount(); i++) { + BookieId bookieId = addressByIndex(i); + if (new String(data).contains(bookieId + "")) { + auditors.add(serverByIndex(i)); } } assertEquals("Multiple Bookies acting as Auditor!", 1, auditors @@ -947,15 +1086,15 @@ private BookieServer getAuditorBookie() throws Exception { private Auditor getAuditorBookiesAuditor() throws Exception { BookieServer auditorBookieServer = getAuditorBookie(); - String bookieAddr = auditorBookieServer.getLocalAddress().toString(); + String bookieAddr = auditorBookieServer.getBookieId().toString(); return auditorElectors.get(bookieAddr).auditor; } - private String shutDownNonAuditorBookie() throws Exception { + private String shutDownNonAuditorBookie() throws Exception { // shutdown bookie which is not an auditor - int indexOf = bs.indexOf(getAuditorBookie()); + int indexOf = indexOfServer(getAuditorBookie()); int bkIndexDownBookie; - if (indexOf < bs.size() - 1) { + if (indexOf < lastBookieIndex()) { bkIndexDownBookie = indexOf + 1; } else { bkIndexDownBookie = indexOf - 1; @@ -965,10 +1104,10 @@ private String shutDownNonAuditorBookie() throws Exception { private int getShutDownNonAuditorBookieIdx(String exclude) throws Exception { // shutdown bookie which is not an auditor - int indexOf = bs.indexOf(getAuditorBookie()); + int indexOf = indexOfServer(getAuditorBookie()); int bkIndexDownBookie = 0; - for (int i = 0; i < bs.size(); i++) { - if (i == indexOf || bs.get(i).getLocalAddress().toString().equals(exclude)) { + for (int i = 0; i <= lastBookieIndex(); i++) { + if (i == indexOf || addressByIndex(i).toString().equals(exclude)) { continue; } bkIndexDownBookie = i; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorPeriodicBookieCheckTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorPeriodicBookieCheckTest.java index 3ec8ae17ef0..5d25ce76e94 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorPeriodicBookieCheckTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorPeriodicBookieCheckTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -23,24 +23,17 @@ import static org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithLedgerManagerFactory; import static org.junit.Assert.assertEquals; +import com.google.common.collect.Lists; import com.google.common.util.concurrent.UncheckedExecutionException; -import java.util.ArrayList; -import java.util.List; import lombok.Cleanup; -import org.apache.bookkeeper.client.BookKeeper.DigestType; -import org.apache.bookkeeper.client.LedgerHandle; -import org.apache.bookkeeper.client.LedgerHandleAdapter; -import org.apache.bookkeeper.client.LedgerMetadata; +import org.apache.bookkeeper.client.ClientUtil; +import org.apache.bookkeeper.client.LedgerMetadataBuilder; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.conf.TestBKConfiguration; import org.apache.bookkeeper.meta.LedgerManager; import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; -import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; import org.apache.bookkeeper.net.BookieSocketAddress; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallbackFuture; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; -import org.apache.bookkeeper.zookeeper.ZooKeeperClient; -import org.apache.zookeeper.ZooKeeper; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -56,7 +49,6 @@ public class AuditorPeriodicBookieCheckTest extends BookKeeperClusterTestCase { .getLogger(AuditorPeriodicBookieCheckTest.class); private AuditorElector auditorElector = null; - private ZooKeeper auditorZookeeper = null; private static final int CHECK_INTERVAL = 1; // run every second @@ -73,15 +65,10 @@ public void setUp() throws Exception { ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); conf.setAuditorPeriodicBookieCheckInterval(CHECK_INTERVAL); conf.setMetadataServiceUri(metadataServiceUri); - String addr = bs.get(0).getLocalAddress().toString(); - - auditorZookeeper = ZooKeeperClient.newBuilder() - .connectString(ZKMetadataDriverBase.resolveZkServers(conf)) - .sessionTimeoutMs(10000) - .build(); + conf.setProperty("clientConnectTimeoutMillis", 500); + String addr = addressByIndex(0).toString(); - auditorElector = new AuditorElector(addr, conf, - auditorZookeeper); + auditorElector = new AuditorElector(addr, conf); auditorElector.start(); } @@ -89,7 +76,6 @@ public void setUp() throws Exception { @Override public void tearDown() throws Exception { auditorElector.shutdown(); - auditorZookeeper.close(); super.tearDown(); } @@ -99,23 +85,19 @@ public void tearDown() throws Exception { */ @Test public void testPeriodicBookieCheckInterval() throws Exception { - bsConfs.get(0).setMetadataServiceUri(zkUtil.getMetadataServiceUri()); - runFunctionWithLedgerManagerFactory(bsConfs.get(0), mFactory -> { + confByIndex(0).setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + runFunctionWithLedgerManagerFactory(confByIndex(0), mFactory -> { try (LedgerManager ledgerManager = mFactory.newLedgerManager()) { @Cleanup final LedgerUnderreplicationManager underReplicationManager = mFactory.newLedgerUnderreplicationManager(); - - LedgerHandle lh = bkc.createLedger(3, 3, DigestType.CRC32, "passwd".getBytes()); - LedgerMetadata md = LedgerHandleAdapter.getLedgerMetadata(lh); - List ensemble = new ArrayList<>(md.getEnsembles().get(0L)); - ensemble.set(0, new BookieSocketAddress("1.1.1.1", 1000)); - md.updateEnsemble(0L, ensemble); - - GenericCallbackFuture cb = - new GenericCallbackFuture(); - ledgerManager.writeLedgerMetadata(lh.getId(), md, cb); - cb.get(); - + long ledgerId = 12345L; + ClientUtil.setupLedger(bkc.getLedgerManager(), ledgerId, + LedgerMetadataBuilder.create().withEnsembleSize(3) + .withWriteQuorumSize(3).withAckQuorumSize(3) + .newEnsembleEntry(0L, Lists.newArrayList( + new BookieSocketAddress("192.0.2.1", 1000).toBookieId(), + getBookie(0), + getBookie(1)))); long underReplicatedLedger = -1; for (int i = 0; i < 10; i++) { underReplicatedLedger = underReplicationManager.pollLedgerToRereplicate(); @@ -124,7 +106,7 @@ public void testPeriodicBookieCheckInterval() throws Exception { } Thread.sleep(CHECK_INTERVAL * 1000); } - assertEquals("Ledger should be under replicated", lh.getId(), underReplicatedLedger); + assertEquals("Ledger should be under replicated", ledgerId, underReplicatedLedger); } catch (Exception e) { throw new UncheckedExecutionException(e.getMessage(), e); } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorPeriodicCheckTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorPeriodicCheckTest.java index 4e3bda8ebae..53e139b19e8 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorPeriodicCheckTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorPeriodicCheckTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,10 +20,12 @@ */ package org.apache.bookkeeper.replication; +import static org.apache.bookkeeper.replication.ReplicationStats.AUDITOR_SCOPE; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import io.netty.buffer.ByteBuf; import java.io.File; @@ -31,6 +33,7 @@ import java.io.FilenameFilter; import java.io.IOException; import java.net.URI; +import java.net.UnknownHostException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.HashMap; @@ -38,28 +41,39 @@ import java.util.List; import java.util.Map; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; import org.apache.bookkeeper.bookie.Bookie; import org.apache.bookkeeper.bookie.BookieAccessor; import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.BookieImpl; import org.apache.bookkeeper.bookie.IndexPersistenceMgr; +import org.apache.bookkeeper.bookie.TestBookieImpl; import org.apache.bookkeeper.client.AsyncCallback.AddCallback; import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeper; import org.apache.bookkeeper.client.BookKeeper.DigestType; +import org.apache.bookkeeper.client.BookKeeperAdmin; import org.apache.bookkeeper.client.LedgerHandle; -import org.apache.bookkeeper.client.LedgerHandleAdapter; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.meta.LedgerManagerFactory; import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; import org.apache.bookkeeper.meta.MetadataBookieDriver; import org.apache.bookkeeper.meta.MetadataDrivers; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback; +import org.apache.bookkeeper.replication.ReplicationException.UnavailableException; +import org.apache.bookkeeper.stats.Counter; import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.stats.StatsLogger; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; -import org.apache.bookkeeper.zookeeper.ZooKeeperClient; -import org.apache.zookeeper.ZooKeeper; +import org.apache.bookkeeper.test.TestStatsProvider; +import org.apache.bookkeeper.test.TestStatsProvider.TestOpStatsLogger; +import org.apache.bookkeeper.test.TestStatsProvider.TestStatsLogger; +import org.awaitility.Awaitility; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -76,7 +90,6 @@ public class AuditorPeriodicCheckTest extends BookKeeperClusterTestCase { private MetadataBookieDriver driver; private HashMap auditorElectors = new HashMap(); - private List zkClients = new LinkedList(); private static final int CHECK_INTERVAL = 1; // run every second @@ -91,29 +104,23 @@ public void setUp() throws Exception { super.setUp(); for (int i = 0; i < numBookies; i++) { - ServerConfiguration conf = new ServerConfiguration(bsConfs.get(i)); + ServerConfiguration conf = new ServerConfiguration(confByIndex(i)); conf.setAuditorPeriodicCheckInterval(CHECK_INTERVAL); - String addr = bs.get(i).getLocalAddress().toString(); - - ZooKeeper zk = ZooKeeperClient.newBuilder() - .connectString(zkUtil.getZooKeeperConnectString()) - .sessionTimeoutMs(10000) - .build(); - zkClients.add(zk); + String addr = addressByIndex(i).toString(); - AuditorElector auditorElector = new AuditorElector(addr, - conf, zk); + AuditorElector auditorElector = new AuditorElector(addr, conf); auditorElectors.put(addr, auditorElector); auditorElector.start(); - LOG.debug("Starting Auditor Elector"); + if (LOG.isDebugEnabled()) { + LOG.debug("Starting Auditor Elector"); + } } driver = MetadataDrivers.getBookieDriver( - URI.create(bsConfs.get(0).getMetadataServiceUri())); + URI.create(confByIndex(0).getMetadataServiceUri())); driver.initialize( - bsConfs.get(0), - () -> {}, + confByIndex(0), NullStatsLogger.INSTANCE); } @@ -127,10 +134,6 @@ public void tearDown() throws Exception { for (AuditorElector e : auditorElectors.values()) { e.shutdown(); } - for (ZooKeeper zk : zkClients) { - zk.close(); - } - zkClients.clear(); super.tearDown(); } @@ -151,11 +154,11 @@ public void testEntryLogCorruption() throws Exception { } lh.close(); - BookieAccessor.forceFlush(bs.get(0).getBookie()); + BookieAccessor.forceFlush((BookieImpl) serverByIndex(0).getBookie()); - File ledgerDir = bsConfs.get(0).getLedgerDirs()[0]; - ledgerDir = Bookie.getCurrentDirectory(ledgerDir); + File ledgerDir = confByIndex(0).getLedgerDirs()[0]; + ledgerDir = BookieImpl.getCurrentDirectory(ledgerDir); // corrupt of entryLogs File[] entryLogs = ledgerDir.listFiles(new FilenameFilter() { public boolean accept(File dir, String name) { @@ -207,10 +210,10 @@ public void testIndexCorruption() throws Exception { } lh.close(); - BookieAccessor.forceFlush(bs.get(0).getBookie()); + BookieAccessor.forceFlush((BookieImpl) serverByIndex(0).getBookie()); - File ledgerDir = bsConfs.get(0).getLedgerDirs()[0]; - ledgerDir = Bookie.getCurrentDirectory(ledgerDir); + File ledgerDir = confByIndex(0).getLedgerDirs()[0]; + ledgerDir = BookieImpl.getCurrentDirectory(ledgerDir); // corrupt of entryLogs File index = new File(ledgerDir, IndexPersistenceMgr.getLedgerName(ledgerToCorrupt)); @@ -221,7 +224,7 @@ public void testIndexCorruption() throws Exception { out.close(); long underReplicatedLedger = -1; - for (int i = 0; i < 10; i++) { + for (int i = 0; i < 15; i++) { underReplicatedLedger = underReplicationManager.pollLedgerToRereplicate(); if (underReplicatedLedger != -1) { break; @@ -273,7 +276,7 @@ public void addComplete(int rc2, LedgerHandle lh, long entryId, Object ctx) { final AtomicInteger numReads = new AtomicInteger(0); ServerConfiguration conf = killBookie(0); - Bookie deadBookie = new Bookie(conf) { + Bookie deadBookie = new TestBookieImpl(conf) { @Override public ByteBuf readEntry(long ledgerId, long entryId) throws IOException, NoLedgerException { @@ -282,8 +285,7 @@ public ByteBuf readEntry(long ledgerId, long entryId) throw new IOException("Fake I/O exception"); } }; - bsConfs.add(conf); - bs.add(startBookie(conf, deadBookie)); + startAndAddBookie(conf, deadBookie); Thread.sleep(CHECK_INTERVAL * 2000); assertEquals("Nothing should have tried to read", 0, numReads.get()); @@ -338,8 +340,8 @@ public void testPeriodicCheckWhenLedgerDeleted() throws Exception { } try (final Auditor auditor = new Auditor( - Bookie.getBookieAddress(bsConfs.get(0)).toString(), - bsConfs.get(0), zkc, NullStatsLogger.INSTANCE)) { + BookieImpl.getBookieId(confByIndex(0)).toString(), + confByIndex(0), NullStatsLogger.INSTANCE)) { final AtomicBoolean exceptionCaught = new AtomicBoolean(false); final CountDownLatch latch = new CountDownLatch(1); Thread t = new Thread() { @@ -347,7 +349,7 @@ public void run() { try { latch.countDown(); for (int i = 0; i < numLedgers; i++) { - auditor.checkAllLedgers(); + ((AuditorCheckAllLedgersTask) auditor.auditorCheckAllLedgersTask).checkAllLedgers(); } } catch (Exception e) { LOG.error("Caught exception while checking all ledgers", e); @@ -365,26 +367,627 @@ public void run() { } } - private BookieSocketAddress replaceBookieWithWriteFailingBookie(LedgerHandle lh) throws Exception { + @Test + public void testGetLedgerFromZookeeperThrottled() throws Exception { + final int numberLedgers = 30; + + // write ledgers into bookkeeper cluster + try { + for (AuditorElector e : auditorElectors.values()) { + e.shutdown(); + } + + for (int i = 0; i < numberLedgers; ++i) { + LedgerHandle lh = bkc.createLedger(3, 3, DigestType.CRC32, "passwd".getBytes()); + for (int j = 0; j < 5; j++) { + lh.addEntry("testdata".getBytes()); + } + lh.close(); + } + } catch (InterruptedException | BKException e) { + LOG.error("Failed to shutdown auditor elector or write data to ledgers ", e); + fail(); + } + + // create auditor and call `checkAllLedgers` + ServerConfiguration configuration = confByIndex(0); + configuration.setAuditorMaxNumberOfConcurrentOpenLedgerOperations(10); + + TestStatsProvider statsProvider = new TestStatsProvider(); + TestStatsLogger statsLogger = statsProvider.getStatsLogger(AUDITOR_SCOPE); + Counter numLedgersChecked = statsLogger + .getCounter(ReplicationStats.NUM_LEDGERS_CHECKED); + Auditor auditor = new Auditor(BookieImpl.getBookieId(configuration).toString(), + configuration, statsLogger); + + try { + ((AuditorCheckAllLedgersTask) auditor.auditorCheckAllLedgersTask).checkAllLedgers(); + assertEquals("NUM_LEDGERS_CHECKED", numberLedgers, (long) numLedgersChecked.get()); + } catch (Exception e) { + LOG.error("Caught exception while checking all ledgers ", e); + fail(); + } + } + + @Test + public void testInitialDelayOfCheckAllLedgers() throws Exception { + for (AuditorElector e : auditorElectors.values()) { + e.shutdown(); + } + + final int numLedgers = 10; + List ids = new LinkedList(); + for (int i = 0; i < numLedgers; i++) { + LedgerHandle lh = bkc.createLedger(3, 3, DigestType.CRC32, "passwd".getBytes()); + ids.add(lh.getId()); + for (int j = 0; j < 2; j++) { + lh.addEntry("testdata".getBytes()); + } + lh.close(); + } + + LedgerManagerFactory mFactory = driver.getLedgerManagerFactory(); + LedgerUnderreplicationManager urm = mFactory.newLedgerUnderreplicationManager(); + + ServerConfiguration servConf = new ServerConfiguration(confByIndex(0)); + validateInitialDelayOfCheckAllLedgers(urm, -1, 1000, servConf, bkc); + validateInitialDelayOfCheckAllLedgers(urm, 999, 1000, servConf, bkc); + validateInitialDelayOfCheckAllLedgers(urm, 1001, 1000, servConf, bkc); + } + + void validateInitialDelayOfCheckAllLedgers(LedgerUnderreplicationManager urm, long timeSinceLastExecutedInSecs, + long auditorPeriodicCheckInterval, ServerConfiguration servConf, BookKeeper bkc) + throws UnavailableException, UnknownHostException, InterruptedException { + TestStatsProvider statsProvider = new TestStatsProvider(); + TestStatsLogger statsLogger = statsProvider.getStatsLogger(AUDITOR_SCOPE); + TestOpStatsLogger checkAllLedgersStatsLogger = (TestOpStatsLogger) statsLogger + .getOpStatsLogger(ReplicationStats.CHECK_ALL_LEDGERS_TIME); + servConf.setAuditorPeriodicCheckInterval(auditorPeriodicCheckInterval); + servConf.setAuditorPeriodicPlacementPolicyCheckInterval(0); + servConf.setAuditorPeriodicBookieCheckInterval(0); + + final TestAuditor auditor = new TestAuditor(BookieImpl.getBookieId(servConf).toString(), servConf, bkc, false, + statsLogger, null); + CountDownLatch latch = auditor.getLatch(); + assertEquals("CHECK_ALL_LEDGERS_TIME SuccessCount", 0, checkAllLedgersStatsLogger.getSuccessCount()); + long curTimeBeforeStart = System.currentTimeMillis(); + long checkAllLedgersCTime = -1; + long initialDelayInMsecs = -1; + long nextExpectedCheckAllLedgersExecutionTime = -1; + long bufferTimeInMsecs = 12000L; + if (timeSinceLastExecutedInSecs == -1) { + /* + * if we are setting checkAllLedgersCTime to -1, it means that + * checkAllLedgers hasn't run before. So initialDelay for + * checkAllLedgers should be 0. + */ + checkAllLedgersCTime = -1; + initialDelayInMsecs = 0; + } else { + checkAllLedgersCTime = curTimeBeforeStart - timeSinceLastExecutedInSecs * 1000L; + initialDelayInMsecs = timeSinceLastExecutedInSecs > auditorPeriodicCheckInterval ? 0 + : (auditorPeriodicCheckInterval - timeSinceLastExecutedInSecs) * 1000L; + } + /* + * next checkAllLedgers should happen atleast after + * nextExpectedCheckAllLedgersExecutionTime. + */ + nextExpectedCheckAllLedgersExecutionTime = curTimeBeforeStart + initialDelayInMsecs; + + urm.setCheckAllLedgersCTime(checkAllLedgersCTime); + auditor.start(); + /* + * since auditorPeriodicCheckInterval are higher values (in the order of + * 100s of seconds), its ok bufferTimeInMsecs to be ` 10 secs. + */ + assertTrue("checkAllLedgers should have executed with initialDelay " + initialDelayInMsecs, + latch.await(initialDelayInMsecs + bufferTimeInMsecs, TimeUnit.MILLISECONDS)); + for (int i = 0; i < 10; i++) { + Thread.sleep(100); + if (checkAllLedgersStatsLogger.getSuccessCount() >= 1) { + break; + } + } + assertEquals("CHECK_ALL_LEDGERS_TIME SuccessCount", 1, checkAllLedgersStatsLogger.getSuccessCount()); + long currentCheckAllLedgersCTime = urm.getCheckAllLedgersCTime(); + assertTrue( + "currentCheckAllLedgersCTime: " + currentCheckAllLedgersCTime + + " should be greater than nextExpectedCheckAllLedgersExecutionTime: " + + nextExpectedCheckAllLedgersExecutionTime, + currentCheckAllLedgersCTime > nextExpectedCheckAllLedgersExecutionTime); + assertTrue( + "currentCheckAllLedgersCTime: " + currentCheckAllLedgersCTime + + " should be lesser than nextExpectedCheckAllLedgersExecutionTime+bufferTimeInMsecs: " + + (nextExpectedCheckAllLedgersExecutionTime + bufferTimeInMsecs), + currentCheckAllLedgersCTime < (nextExpectedCheckAllLedgersExecutionTime + bufferTimeInMsecs)); + auditor.close(); + } + + @Test + public void testInitialDelayOfPlacementPolicyCheck() throws Exception { + for (AuditorElector e : auditorElectors.values()) { + e.shutdown(); + } + + final int numLedgers = 10; + List ids = new LinkedList(); + for (int i = 0; i < numLedgers; i++) { + LedgerHandle lh = bkc.createLedger(3, 3, DigestType.CRC32, "passwd".getBytes()); + ids.add(lh.getId()); + for (int j = 0; j < 2; j++) { + lh.addEntry("testdata".getBytes()); + } + lh.close(); + } + + LedgerManagerFactory mFactory = driver.getLedgerManagerFactory(); + LedgerUnderreplicationManager urm = mFactory.newLedgerUnderreplicationManager(); + + ServerConfiguration servConf = new ServerConfiguration(confByIndex(0)); + validateInitialDelayOfPlacementPolicyCheck(urm, -1, 1000, servConf, bkc); + validateInitialDelayOfPlacementPolicyCheck(urm, 999, 1000, servConf, bkc); + validateInitialDelayOfPlacementPolicyCheck(urm, 1001, 1000, servConf, bkc); + } + + void validateInitialDelayOfPlacementPolicyCheck(LedgerUnderreplicationManager urm, long timeSinceLastExecutedInSecs, + long auditorPeriodicPlacementPolicyCheckInterval, ServerConfiguration servConf, BookKeeper bkc) + throws UnavailableException, UnknownHostException, InterruptedException { + TestStatsProvider statsProvider = new TestStatsProvider(); + TestStatsLogger statsLogger = statsProvider.getStatsLogger(AUDITOR_SCOPE); + TestOpStatsLogger placementPolicyCheckStatsLogger = (TestOpStatsLogger) statsLogger + .getOpStatsLogger(ReplicationStats.PLACEMENT_POLICY_CHECK_TIME); + servConf.setAuditorPeriodicPlacementPolicyCheckInterval(auditorPeriodicPlacementPolicyCheckInterval); + servConf.setAuditorPeriodicCheckInterval(0); + servConf.setAuditorPeriodicBookieCheckInterval(0); + + final TestAuditor auditor = new TestAuditor(BookieImpl.getBookieId(servConf).toString(), servConf, bkc, false, + statsLogger, null); + CountDownLatch latch = auditor.getLatch(); + assertEquals("PLACEMENT_POLICY_CHECK_TIME SuccessCount", 0, placementPolicyCheckStatsLogger.getSuccessCount()); + long curTimeBeforeStart = System.currentTimeMillis(); + long placementPolicyCheckCTime = -1; + long initialDelayInMsecs = -1; + long nextExpectedPlacementPolicyCheckExecutionTime = -1; + long bufferTimeInMsecs = 20000L; + if (timeSinceLastExecutedInSecs == -1) { + /* + * if we are setting placementPolicyCheckCTime to -1, it means that + * placementPolicyCheck hasn't run before. So initialDelay for + * placementPolicyCheck should be 0. + */ + placementPolicyCheckCTime = -1; + initialDelayInMsecs = 0; + } else { + placementPolicyCheckCTime = curTimeBeforeStart - timeSinceLastExecutedInSecs * 1000L; + initialDelayInMsecs = timeSinceLastExecutedInSecs > auditorPeriodicPlacementPolicyCheckInterval ? 0 + : (auditorPeriodicPlacementPolicyCheckInterval - timeSinceLastExecutedInSecs) * 1000L; + } + /* + * next placementPolicyCheck should happen atleast after + * nextExpectedPlacementPolicyCheckExecutionTime. + */ + nextExpectedPlacementPolicyCheckExecutionTime = curTimeBeforeStart + initialDelayInMsecs; + + urm.setPlacementPolicyCheckCTime(placementPolicyCheckCTime); + auditor.start(); + /* + * since auditorPeriodicPlacementPolicyCheckInterval are higher values (in the + * order of 100s of seconds), its ok bufferTimeInMsecs to be ` 20 secs. + */ + assertTrue("placementPolicyCheck should have executed with initialDelay " + initialDelayInMsecs, + latch.await(initialDelayInMsecs + bufferTimeInMsecs, TimeUnit.MILLISECONDS)); + for (int i = 0; i < 20; i++) { + Thread.sleep(100); + if (placementPolicyCheckStatsLogger.getSuccessCount() >= 1) { + break; + } + } + assertEquals("PLACEMENT_POLICY_CHECK_TIME SuccessCount", 1, placementPolicyCheckStatsLogger.getSuccessCount()); + long currentPlacementPolicyCheckCTime = urm.getPlacementPolicyCheckCTime(); + assertTrue( + "currentPlacementPolicyCheckCTime: " + currentPlacementPolicyCheckCTime + + " should be greater than nextExpectedPlacementPolicyCheckExecutionTime: " + + nextExpectedPlacementPolicyCheckExecutionTime, + currentPlacementPolicyCheckCTime > nextExpectedPlacementPolicyCheckExecutionTime); + assertTrue( + "currentPlacementPolicyCheckCTime: " + currentPlacementPolicyCheckCTime + + " should be lesser than nextExpectedPlacementPolicyCheckExecutionTime+bufferTimeInMsecs: " + + (nextExpectedPlacementPolicyCheckExecutionTime + bufferTimeInMsecs), + currentPlacementPolicyCheckCTime < (nextExpectedPlacementPolicyCheckExecutionTime + bufferTimeInMsecs)); + auditor.close(); + } + + @Test + public void testInitialDelayOfReplicasCheck() throws Exception { + for (AuditorElector e : auditorElectors.values()) { + e.shutdown(); + } + + LedgerHandle lh = bkc.createLedger(3, 2, DigestType.CRC32, "passwd".getBytes()); + for (int j = 0; j < 5; j++) { + lh.addEntry("testdata".getBytes()); + } + lh.close(); + + long ledgerId = 100000L; + lh = bkc.createLedgerAdv(ledgerId, 3, 2, 2, DigestType.CRC32, "passwd".getBytes(), null); + lh.close(); + + ledgerId = 100001234L; + lh = bkc.createLedgerAdv(ledgerId, 3, 3, 2, DigestType.CRC32, "passwd".getBytes(), null); + for (int j = 0; j < 4; j++) { + lh.addEntry(j, "testdata".getBytes()); + } + lh.close(); + + ledgerId = 991234L; + lh = bkc.createLedgerAdv(ledgerId, 3, 2, 2, DigestType.CRC32, "passwd".getBytes(), null); + lh.addEntry(0, "testdata".getBytes()); + lh.close(); + + LedgerManagerFactory mFactory = driver.getLedgerManagerFactory(); + LedgerUnderreplicationManager urm = mFactory.newLedgerUnderreplicationManager(); + + ServerConfiguration servConf = new ServerConfiguration(confByIndex(0)); + validateInitialDelayOfReplicasCheck(urm, -1, 1000, servConf, bkc); + validateInitialDelayOfReplicasCheck(urm, 999, 1000, servConf, bkc); + validateInitialDelayOfReplicasCheck(urm, 1001, 1000, servConf, bkc); + } + + void validateInitialDelayOfReplicasCheck(LedgerUnderreplicationManager urm, long timeSinceLastExecutedInSecs, + long auditorPeriodicReplicasCheckInterval, ServerConfiguration servConf, BookKeeper bkc) + throws UnavailableException, UnknownHostException, InterruptedException { + TestStatsProvider statsProvider = new TestStatsProvider(); + TestStatsLogger statsLogger = statsProvider.getStatsLogger(AUDITOR_SCOPE); + TestOpStatsLogger replicasCheckStatsLogger = (TestOpStatsLogger) statsLogger + .getOpStatsLogger(ReplicationStats.REPLICAS_CHECK_TIME); + servConf.setAuditorPeriodicReplicasCheckInterval(auditorPeriodicReplicasCheckInterval); + servConf.setAuditorPeriodicCheckInterval(0); + servConf.setAuditorPeriodicBookieCheckInterval(0); + final TestAuditor auditor = new TestAuditor(BookieImpl.getBookieId(servConf).toString(), servConf, bkc, false, + statsLogger, null); + CountDownLatch latch = auditor.getLatch(); + assertEquals("REPLICAS_CHECK_TIME SuccessCount", 0, replicasCheckStatsLogger.getSuccessCount()); + long curTimeBeforeStart = System.currentTimeMillis(); + long replicasCheckCTime = -1; + long initialDelayInMsecs = -1; + long nextExpectedReplicasCheckExecutionTime = -1; + long bufferTimeInMsecs = 20000L; + if (timeSinceLastExecutedInSecs == -1) { + /* + * if we are setting replicasCheckCTime to -1, it means that + * replicasCheck hasn't run before. So initialDelay for + * replicasCheck should be 0. + */ + replicasCheckCTime = -1; + initialDelayInMsecs = 0; + } else { + replicasCheckCTime = curTimeBeforeStart - timeSinceLastExecutedInSecs * 1000L; + initialDelayInMsecs = timeSinceLastExecutedInSecs > auditorPeriodicReplicasCheckInterval ? 0 + : (auditorPeriodicReplicasCheckInterval - timeSinceLastExecutedInSecs) * 1000L; + } + /* + * next replicasCheck should happen atleast after + * nextExpectedReplicasCheckExecutionTime. + */ + nextExpectedReplicasCheckExecutionTime = curTimeBeforeStart + initialDelayInMsecs; + + urm.setReplicasCheckCTime(replicasCheckCTime); + auditor.start(); + /* + * since auditorPeriodicReplicasCheckInterval are higher values (in the + * order of 100s of seconds), its ok bufferTimeInMsecs to be ` 20 secs. + */ + assertTrue("replicasCheck should have executed with initialDelay " + initialDelayInMsecs, + latch.await(initialDelayInMsecs + bufferTimeInMsecs, TimeUnit.MILLISECONDS)); + for (int i = 0; i < 20; i++) { + Thread.sleep(100); + if (replicasCheckStatsLogger.getSuccessCount() >= 1) { + break; + } + } + assertEquals("REPLICAS_CHECK_TIME SuccessCount", 1, replicasCheckStatsLogger.getSuccessCount()); + long currentReplicasCheckCTime = urm.getReplicasCheckCTime(); + assertTrue( + "currentReplicasCheckCTime: " + currentReplicasCheckCTime + + " should be greater than nextExpectedReplicasCheckExecutionTime: " + + nextExpectedReplicasCheckExecutionTime, + currentReplicasCheckCTime > nextExpectedReplicasCheckExecutionTime); + assertTrue( + "currentReplicasCheckCTime: " + currentReplicasCheckCTime + + " should be lesser than nextExpectedReplicasCheckExecutionTime+bufferTimeInMsecs: " + + (nextExpectedReplicasCheckExecutionTime + bufferTimeInMsecs), + currentReplicasCheckCTime < (nextExpectedReplicasCheckExecutionTime + bufferTimeInMsecs)); + auditor.close(); + } + + @Test + public void testDelayBookieAuditOfCheckAllLedgers() throws Exception { + for (AuditorElector e : auditorElectors.values()) { + e.shutdown(); + } + + final int numLedgers = 10; + List ids = new LinkedList(); + for (int i = 0; i < numLedgers; i++) { + LedgerHandle lh = bkc.createLedger(3, 3, DigestType.CRC32, "passwd".getBytes()); + ids.add(lh.getId()); + for (int j = 0; j < 2; j++) { + lh.addEntry("testdata".getBytes()); + } + lh.close(); + } + + LedgerManagerFactory mFactory = driver.getLedgerManagerFactory(); + LedgerUnderreplicationManager urm = mFactory.newLedgerUnderreplicationManager(); + + ServerConfiguration servConf = new ServerConfiguration(confByIndex(0)); + + TestStatsProvider statsProvider = new TestStatsProvider(); + TestStatsLogger statsLogger = statsProvider.getStatsLogger(AUDITOR_SCOPE); + Counter numBookieAuditsDelayed = + statsLogger.getCounter(ReplicationStats.NUM_BOOKIE_AUDITS_DELAYED); + TestOpStatsLogger underReplicatedLedgerTotalSizeStatsLogger = (TestOpStatsLogger) statsLogger + .getOpStatsLogger(ReplicationStats.UNDER_REPLICATED_LEDGERS_TOTAL_SIZE); + Counter numSkippingCheckTaskTimes = + statsLogger.getCounter(ReplicationStats.NUM_SKIPPING_CHECK_TASK_TIMES); + + servConf.setAuditorPeriodicCheckInterval(1); + servConf.setAuditorPeriodicPlacementPolicyCheckInterval(0); + servConf.setAuditorPeriodicBookieCheckInterval(Long.MAX_VALUE); + + urm.setLostBookieRecoveryDelay(Integer.MAX_VALUE); + + AtomicBoolean canRun = new AtomicBoolean(false); + + final TestAuditor auditor = new TestAuditor(BookieImpl.getBookieId(servConf).toString(), servConf, bkc, + false, statsLogger, canRun); + final CountDownLatch latch = auditor.getLatch(); + + auditor.start(); + + killBookie(addressByIndex(0)); + + Awaitility.await().untilAsserted(() -> assertEquals(1, (long) numBookieAuditsDelayed.get())); + final Future auditTask = auditor.auditTask; + assertTrue(auditTask != null && !auditTask.isDone()); + assertEquals("NUM_SKIPPING_CHECK_TASK_TIMES", 0, (long) numSkippingCheckTaskTimes.get()); + + canRun.set(true); + + assertTrue(latch.await(10, TimeUnit.SECONDS)); + assertTrue(auditor.auditTask.equals(auditTask) + && auditor.auditTask != null && !auditor.auditTask.isDone()); + // wrong num is numLedgers, right num is 0 + assertEquals("UNDER_REPLICATED_LEDGERS_TOTAL_SIZE", + 0, + underReplicatedLedgerTotalSizeStatsLogger.getSuccessCount()); + assertTrue("NUM_SKIPPING_CHECK_TASK_TIMES", numSkippingCheckTaskTimes.get() > 0); + + auditor.close(); + } + + @Test + public void testDelayBookieAuditOfPlacementPolicy() throws Exception { + for (AuditorElector e : auditorElectors.values()) { + e.shutdown(); + } + + final int numLedgers = 10; + List ids = new LinkedList(); + for (int i = 0; i < numLedgers; i++) { + LedgerHandle lh = bkc.createLedger(3, 3, DigestType.CRC32, "passwd".getBytes()); + ids.add(lh.getId()); + for (int j = 0; j < 2; j++) { + lh.addEntry("testdata".getBytes()); + } + lh.close(); + } + + LedgerManagerFactory mFactory = driver.getLedgerManagerFactory(); + LedgerUnderreplicationManager urm = mFactory.newLedgerUnderreplicationManager(); + + ServerConfiguration servConf = new ServerConfiguration(confByIndex(0)); + + TestStatsProvider statsProvider = new TestStatsProvider(); + TestStatsLogger statsLogger = statsProvider.getStatsLogger(AUDITOR_SCOPE); + Counter numBookieAuditsDelayed = + statsLogger.getCounter(ReplicationStats.NUM_BOOKIE_AUDITS_DELAYED); + TestOpStatsLogger placementPolicyCheckTime = (TestOpStatsLogger) statsLogger + .getOpStatsLogger(ReplicationStats.PLACEMENT_POLICY_CHECK_TIME); + Counter numSkippingCheckTaskTimes = + statsLogger.getCounter(ReplicationStats.NUM_SKIPPING_CHECK_TASK_TIMES); + + servConf.setAuditorPeriodicCheckInterval(0); + servConf.setAuditorPeriodicPlacementPolicyCheckInterval(1); + servConf.setAuditorPeriodicBookieCheckInterval(Long.MAX_VALUE); + + urm.setLostBookieRecoveryDelay(Integer.MAX_VALUE); + + AtomicBoolean canRun = new AtomicBoolean(false); + + final TestAuditor auditor = new TestAuditor(BookieImpl.getBookieId(servConf).toString(), servConf, bkc, + false, statsLogger, canRun); + final CountDownLatch latch = auditor.getLatch(); + + auditor.start(); + + killBookie(addressByIndex(0)); + + Awaitility.await().untilAsserted(() -> assertEquals(1, (long) numBookieAuditsDelayed.get())); + final Future auditTask = auditor.auditTask; + assertTrue(auditTask != null && !auditTask.isDone()); + assertEquals("PLACEMENT_POLICY_CHECK_TIME", 0, placementPolicyCheckTime.getSuccessCount()); + assertEquals("NUM_SKIPPING_CHECK_TASK_TIMES", 0, (long) numSkippingCheckTaskTimes.get()); + + canRun.set(true); + + assertTrue(latch.await(10, TimeUnit.SECONDS)); + assertTrue(auditor.auditTask.equals(auditTask) + && auditor.auditTask != null && !auditor.auditTask.isDone()); + // wrong successCount is > 0, right successCount is = 0 + assertEquals("PLACEMENT_POLICY_CHECK_TIME", 0, placementPolicyCheckTime.getSuccessCount()); + assertTrue("NUM_SKIPPING_CHECK_TASK_TIMES", numSkippingCheckTaskTimes.get() > 0); + + auditor.close(); + } + + @Test + public void testDelayBookieAuditOfReplicasCheck() throws Exception { + for (AuditorElector e : auditorElectors.values()) { + e.shutdown(); + } + + final int numLedgers = 10; + List ids = new LinkedList(); + for (int i = 0; i < numLedgers; i++) { + LedgerHandle lh = bkc.createLedger(3, 3, DigestType.CRC32, "passwd".getBytes()); + ids.add(lh.getId()); + for (int j = 0; j < 2; j++) { + lh.addEntry("testdata".getBytes()); + } + lh.close(); + } + + LedgerManagerFactory mFactory = driver.getLedgerManagerFactory(); + LedgerUnderreplicationManager urm = mFactory.newLedgerUnderreplicationManager(); + + ServerConfiguration servConf = new ServerConfiguration(confByIndex(0)); + + TestStatsProvider statsProvider = new TestStatsProvider(); + TestStatsLogger statsLogger = statsProvider.getStatsLogger(AUDITOR_SCOPE); + Counter numBookieAuditsDelayed = + statsLogger.getCounter(ReplicationStats.NUM_BOOKIE_AUDITS_DELAYED); + TestOpStatsLogger replicasCheckTime = (TestOpStatsLogger) statsLogger + .getOpStatsLogger(ReplicationStats.REPLICAS_CHECK_TIME); + Counter numSkippingCheckTaskTimes = + statsLogger.getCounter(ReplicationStats.NUM_SKIPPING_CHECK_TASK_TIMES); + + servConf.setAuditorPeriodicCheckInterval(0); + servConf.setAuditorPeriodicPlacementPolicyCheckInterval(0); + servConf.setAuditorPeriodicBookieCheckInterval(Long.MAX_VALUE); + servConf.setAuditorPeriodicReplicasCheckInterval(1); + + urm.setLostBookieRecoveryDelay(Integer.MAX_VALUE); + + AtomicBoolean canRun = new AtomicBoolean(false); + + final TestAuditor auditor = new TestAuditor(BookieImpl.getBookieId(servConf).toString(), servConf, bkc, + false, statsLogger, canRun); + final CountDownLatch latch = auditor.getLatch(); + + auditor.start(); + + killBookie(addressByIndex(0)); + + Awaitility.await().untilAsserted(() -> assertEquals(1, (long) numBookieAuditsDelayed.get())); + final Future auditTask = auditor.auditTask; + assertTrue(auditTask != null && !auditTask.isDone()); + assertEquals("REPLICAS_CHECK_TIME", 0, replicasCheckTime.getSuccessCount()); + assertEquals("NUM_SKIPPING_CHECK_TASK_TIMES", 0, (long) numSkippingCheckTaskTimes.get()); + + canRun.set(true); + + assertTrue(latch.await(10, TimeUnit.SECONDS)); + assertTrue(auditor.auditTask.equals(auditTask) + && auditor.auditTask != null && !auditor.auditTask.isDone()); + // wrong successCount is > 0, right successCount is = 0 + assertEquals("REPLICAS_CHECK_TIME", 0, replicasCheckTime.getSuccessCount()); + assertTrue("NUM_SKIPPING_CHECK_TASK_TIMES", numSkippingCheckTaskTimes.get() > 0); + + auditor.close(); + } + + static class TestAuditor extends Auditor { + + final AtomicReference latchRef = new AtomicReference(new CountDownLatch(1)); + + public TestAuditor(String bookieIdentifier, ServerConfiguration conf, BookKeeper bkc, boolean ownBkc, + StatsLogger statsLogger, AtomicBoolean exceptedRun) throws UnavailableException { + super(bookieIdentifier, conf, bkc, ownBkc, statsLogger); + renewAuditorTestWrapperTask(exceptedRun); + } + + public TestAuditor(String bookieIdentifier, ServerConfiguration conf, BookKeeper bkc, boolean ownBkc, + BookKeeperAdmin bkadmin, boolean ownadmin, StatsLogger statsLogger, + AtomicBoolean exceptedRun) throws UnavailableException { + super(bookieIdentifier, conf, bkc, ownBkc, bkadmin, ownadmin, statsLogger); + renewAuditorTestWrapperTask(exceptedRun); + } + + public TestAuditor(final String bookieIdentifier, ServerConfiguration conf, StatsLogger statsLogger, + AtomicBoolean exceptedRun) + throws UnavailableException { + super(bookieIdentifier, conf, statsLogger); + renewAuditorTestWrapperTask(exceptedRun); + } + + private void renewAuditorTestWrapperTask(AtomicBoolean exceptedRun) { + super.auditorCheckAllLedgersTask = + new AuditorTestWrapperTask(super.auditorCheckAllLedgersTask, latchRef, exceptedRun); + super.auditorPlacementPolicyCheckTask = + new AuditorTestWrapperTask(super.auditorPlacementPolicyCheckTask, latchRef, exceptedRun); + super.auditorReplicasCheckTask = + new AuditorTestWrapperTask(super.auditorReplicasCheckTask, latchRef, exceptedRun); + } + + CountDownLatch getLatch() { + return latchRef.get(); + } + + void setLatch(CountDownLatch latch) { + latchRef.set(latch); + } + + private static class AuditorTestWrapperTask extends AuditorTask { + private final AuditorTask innerTask; + private final AtomicReference latchRef; + private final AtomicBoolean exceptedRun; + + AuditorTestWrapperTask(AuditorTask innerTask, + AtomicReference latchRef, + AtomicBoolean exceptedRun) { + super(null, null, null, null, null, + null, null); + this.innerTask = innerTask; + this.latchRef = latchRef; + this.exceptedRun = exceptedRun; + } + + @Override + protected void runTask() { + if (exceptedRun == null || exceptedRun.get()) { + innerTask.runTask(); + latchRef.get().countDown(); + } + } + + @Override + public void shutdown() { + innerTask.shutdown(); + } + } + } + + private BookieId replaceBookieWithWriteFailingBookie(LedgerHandle lh) throws Exception { int bookieIdx = -1; - Long entryId = LedgerHandleAdapter.getLedgerMetadata(lh).getEnsembles().firstKey(); - List curEnsemble = LedgerHandleAdapter - .getLedgerMetadata(lh).getEnsembles().get(entryId); + Long entryId = lh.getLedgerMetadata().getAllEnsembles().firstKey(); + List curEnsemble = lh.getLedgerMetadata().getAllEnsembles().get(entryId); // Identify a bookie in the current ledger ensemble to be replaced - BookieSocketAddress replacedBookie = null; + BookieId replacedBookie = null; for (int i = 0; i < numBookies; i++) { - if (curEnsemble.contains(bs.get(i).getLocalAddress())) { + if (curEnsemble.contains(addressByIndex(i))) { bookieIdx = i; - replacedBookie = bs.get(i).getLocalAddress(); + replacedBookie = addressByIndex(i); break; } } assertNotEquals("Couldn't find ensemble bookie in bookie list", -1, bookieIdx); - LOG.info("Killing bookie " + bs.get(bookieIdx).getLocalAddress()); + LOG.info("Killing bookie " + addressByIndex(bookieIdx)); ServerConfiguration conf = killBookie(bookieIdx); - Bookie writeFailingBookie = new Bookie(conf) { + Bookie writeFailingBookie = new TestBookieImpl(conf) { @Override public void addEntry(ByteBuf entry, boolean ackBeforeSync, WriteCallback cb, Object ctx, byte[] masterKey) @@ -404,8 +1007,7 @@ public void addEntry(ByteBuf entry, boolean ackBeforeSync, WriteCallback cb, } } }; - bsConfs.add(conf); - bs.add(startBookie(conf, writeFailingBookie)); + startAndAddBookie(conf, writeFailingBookie); return replacedBookie; } @@ -422,7 +1024,7 @@ public void testFailedWriteRecovery() throws Exception { // kill one of the bookies and replace it with one that rejects write; // This way we get into the under replication state - BookieSocketAddress replacedBookie = replaceBookieWithWriteFailingBookie(lh); + BookieId replacedBookie = replaceBookieWithWriteFailingBookie(lh); // Write a few entries; this should cause under replication byte[] data = "foobar".getBytes(); @@ -449,8 +1051,7 @@ public void testFailedWriteRecovery() throws Exception { // now start the replication workers List l = new ArrayList(); for (int i = 0; i < numBookies; i++) { - ReplicationWorker rw = new ReplicationWorker( - zkc, bsConfs.get(i), NullStatsLogger.INSTANCE); + ReplicationWorker rw = new ReplicationWorker(confByIndex(i), NullStatsLogger.INSTANCE); rw.start(); l.add(rw); } @@ -465,9 +1066,9 @@ public void testFailedWriteRecovery() throws Exception { // check that ensemble has changed and the bookie that rejected writes has // been replaced in the ensemble LedgerHandle newLh = bkc.openLedger(lh.getId(), DigestType.CRC32, "passwd".getBytes()); - for (Map.Entry> e : LedgerHandleAdapter.getLedgerMetadata(newLh). - getEnsembles().entrySet()) { - List ensemble = e.getValue(); + for (Map.Entry> e : + newLh.getLedgerMetadata().getAllEnsembles().entrySet()) { + List ensemble = e.getValue(); assertFalse("Ensemble hasn't been updated", ensemble.contains(replacedBookie)); } newLh.close(); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorPlacementPolicyCheckTaskTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorPlacementPolicyCheckTaskTest.java new file mode 100644 index 00000000000..57645a6213b --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorPlacementPolicyCheckTaskTest.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.replication; + +import static org.apache.bookkeeper.replication.ReplicationStats.AUDITOR_SCOPE; +import static org.junit.Assert.assertEquals; + +import java.util.LinkedList; +import java.util.List; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeper; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.client.LedgerHandle; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.LedgerManagerFactory; +import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.apache.bookkeeper.test.TestStatsProvider; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Unit test {@link AuditorPlacementPolicyCheckTask}. + */ +public class AuditorPlacementPolicyCheckTaskTest extends BookKeeperClusterTestCase { + private static final Logger LOG = LoggerFactory + .getLogger(AuditorPlacementPolicyCheckTaskTest.class); + + private BookKeeperAdmin admin; + private LedgerManager ledgerManager; + private LedgerUnderreplicationManager ledgerUnderreplicationManager; + + public AuditorPlacementPolicyCheckTaskTest() { + super(3); + baseConf.setPageLimit(1); + baseConf.setAutoRecoveryDaemonEnabled(false); + } + + @Override + public void setUp() throws Exception { + super.setUp(); + final BookKeeper bookKeeper = new BookKeeper(baseClientConf); + admin = new BookKeeperAdmin(bookKeeper, NullStatsLogger.INSTANCE, new ClientConfiguration(baseClientConf)); + LedgerManagerFactory ledgerManagerFactory = bookKeeper.getLedgerManagerFactory(); + ledgerManager = ledgerManagerFactory.newLedgerManager(); + ledgerUnderreplicationManager = ledgerManagerFactory.newLedgerUnderreplicationManager(); + } + + @Test + public void testPlacementPolicyCheck() throws BKException, InterruptedException { + + // 1. create ledgers + final int numLedgers = 10; + List ids = new LinkedList(); + for (int i = 0; i < numLedgers; i++) { + LedgerHandle lh = bkc.createLedger(3, 3, BookKeeper.DigestType.CRC32, "passwd".getBytes()); + ids.add(lh.getId()); + for (int j = 0; j < 2; j++) { + lh.addEntry("testdata".getBytes()); + } + lh.close(); + } + + // 2. init auditorPlacementPolicyCheckTask + final TestStatsProvider statsProvider = new TestStatsProvider(); + final TestStatsProvider.TestStatsLogger statsLogger = statsProvider.getStatsLogger(AUDITOR_SCOPE); + final AuditorStats auditorStats = new AuditorStats(statsLogger); + + AuditorPlacementPolicyCheckTask auditorPlacementPolicyCheckTask = new AuditorPlacementPolicyCheckTask( + baseConf, auditorStats, admin, ledgerManager, + ledgerUnderreplicationManager, null, (flag, throwable) -> flag.set(false)); + + // 3. placementPolicyCheck + auditorPlacementPolicyCheckTask.runTask(); + + // 4. verify + assertEquals("PLACEMENT_POLICY_CHECK_TIME", 1, ((TestStatsProvider.TestOpStatsLogger) + statsLogger.getOpStatsLogger(ReplicationStats.PLACEMENT_POLICY_CHECK_TIME)).getSuccessCount()); + assertEquals("numOfClosedLedgersAuditedInPlacementPolicyCheck", + numLedgers, + auditorPlacementPolicyCheckTask.getNumOfClosedLedgersAuditedInPlacementPolicyCheck().get()); + assertEquals("numOfLedgersFoundNotAdheringInPlacementPolicyCheck", + numLedgers, + auditorPlacementPolicyCheckTask.getNumOfLedgersFoundNotAdheringInPlacementPolicyCheck().get()); + } + +} \ No newline at end of file diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorPlacementPolicyCheckTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorPlacementPolicyCheckTest.java new file mode 100644 index 00000000000..62af2c5d488 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorPlacementPolicyCheckTest.java @@ -0,0 +1,853 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.replication; + +import static org.apache.bookkeeper.client.RackawareEnsemblePlacementPolicyImpl.REPP_DNS_RESOLVER_CLASS; +import static org.apache.bookkeeper.replication.ReplicationStats.AUDITOR_SCOPE; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.net.URI; +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.client.LedgerMetadataBuilder; +import org.apache.bookkeeper.client.RackawareEnsemblePlacementPolicy; +import org.apache.bookkeeper.client.ZoneawareEnsemblePlacementPolicy; +import org.apache.bookkeeper.client.api.DigestType; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.discover.BookieServiceInfo; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.LedgerManagerFactory; +import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; +import org.apache.bookkeeper.meta.MetadataBookieDriver; +import org.apache.bookkeeper.meta.MetadataDrivers; +import org.apache.bookkeeper.meta.exceptions.MetadataException; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.replication.AuditorPeriodicCheckTest.TestAuditor; +import org.apache.bookkeeper.replication.ReplicationException.CompatibilityException; +import org.apache.bookkeeper.replication.ReplicationException.UnavailableException; +import org.apache.bookkeeper.stats.Gauge; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.apache.bookkeeper.test.TestStatsProvider; +import org.apache.bookkeeper.test.TestStatsProvider.TestOpStatsLogger; +import org.apache.bookkeeper.test.TestStatsProvider.TestStatsLogger; +import org.apache.bookkeeper.util.StaticDNSResolver; +import org.apache.commons.lang3.mutable.MutableObject; +import org.apache.zookeeper.KeeperException; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Tests the logic of Auditor's PlacementPolicyCheck. + */ +public class AuditorPlacementPolicyCheckTest extends BookKeeperClusterTestCase { + private MetadataBookieDriver driver; + + public AuditorPlacementPolicyCheckTest() { + super(1); + baseConf.setPageLimit(1); // to make it easy to push ledger out of cache + } + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + StaticDNSResolver.reset(); + driver = MetadataDrivers.getBookieDriver(URI.create(confByIndex(0).getMetadataServiceUri())); + driver.initialize(confByIndex(0), NullStatsLogger.INSTANCE); + } + + @After + @Override + public void tearDown() throws Exception { + if (null != driver) { + driver.close(); + } + super.tearDown(); + } + + @Test + public void testPlacementPolicyCheckWithBookiesFromDifferentRacks() throws Exception { + int numOfBookies = 5; + List bookieAddresses = new ArrayList<>(); + BookieSocketAddress bookieAddress; + RegistrationManager regManager = driver.createRegistrationManager(); + // all the numOfBookies (5) are going to be in different racks + for (int i = 0; i < numOfBookies; i++) { + bookieAddress = new BookieSocketAddress("98.98.98." + i, 2181); + StaticDNSResolver.addNodeToRack(bookieAddress.getHostName(), "/rack" + (i)); + bookieAddresses.add(bookieAddress.toBookieId()); + regManager.registerBookie(bookieAddress.toBookieId(), false, BookieServiceInfo.EMPTY); + } + + LedgerManagerFactory mFactory = driver.getLedgerManagerFactory(); + LedgerManager lm = mFactory.newLedgerManager(); + int ensembleSize = 5; + int writeQuorumSize = 4; + int ackQuorumSize = 2; + int minNumRacksPerWriteQuorumConfValue = 4; + Collections.shuffle(bookieAddresses); + + // closed ledger + LedgerMetadata initMeta = LedgerMetadataBuilder.create() + .withId(1L) + .withEnsembleSize(ensembleSize) + .withWriteQuorumSize(writeQuorumSize) + .withAckQuorumSize(ackQuorumSize) + .newEnsembleEntry(0L, bookieAddresses) + .withClosedState() + .withLastEntryId(100) + .withLength(10000) + .withDigestType(DigestType.DUMMY) + .withPassword(new byte[0]) + .build(); + lm.createLedgerMetadata(1L, initMeta).get(); + + Collections.shuffle(bookieAddresses); + ensembleSize = 4; + // closed ledger with multiple segments + initMeta = LedgerMetadataBuilder.create() + .withId(2L) + .withEnsembleSize(ensembleSize) + .withWriteQuorumSize(writeQuorumSize) + .withAckQuorumSize(ackQuorumSize) + .newEnsembleEntry(0L, bookieAddresses.subList(0, 4)) + .newEnsembleEntry(20L, bookieAddresses.subList(1, 5)) + .newEnsembleEntry(60L, bookieAddresses.subList(0, 4)) + .withClosedState() + .withLastEntryId(100) + .withLength(10000) + .withDigestType(DigestType.DUMMY) + .withPassword(new byte[0]) + .build(); + lm.createLedgerMetadata(2L, initMeta).get(); + + Collections.shuffle(bookieAddresses); + // non-closed ledger + initMeta = LedgerMetadataBuilder.create() + .withId(3L) + .withEnsembleSize(ensembleSize) + .withWriteQuorumSize(writeQuorumSize) + .withAckQuorumSize(ackQuorumSize) + .newEnsembleEntry(0L, bookieAddresses.subList(0, 4)) + .withDigestType(DigestType.DUMMY) + .withPassword(new byte[0]) + .build(); + lm.createLedgerMetadata(3L, initMeta).get(); + + Collections.shuffle(bookieAddresses); + // non-closed ledger with multiple segments + initMeta = LedgerMetadataBuilder.create() + .withId(4L) + .withEnsembleSize(ensembleSize) + .withWriteQuorumSize(writeQuorumSize) + .withAckQuorumSize(ackQuorumSize) + .newEnsembleEntry(0L, bookieAddresses.subList(0, 4)) + .newEnsembleEntry(20L, bookieAddresses.subList(1, 5)) + .newEnsembleEntry(60L, bookieAddresses.subList(0, 4)) + .withDigestType(DigestType.DUMMY) + .withPassword(new byte[0]) + .build(); + lm.createLedgerMetadata(4L, initMeta).get(); + + ServerConfiguration servConf = new ServerConfiguration(confByIndex(0)); + servConf.setMinNumRacksPerWriteQuorum(minNumRacksPerWriteQuorumConfValue); + setServerConfigPropertiesForRackPlacement(servConf); + MutableObject auditorRef = new MutableObject(); + try { + TestStatsLogger statsLogger = startAuditorAndWaitForPlacementPolicyCheck(servConf, auditorRef); + Gauge ledgersNotAdheringToPlacementPolicyGuage = statsLogger + .getGauge(ReplicationStats.NUM_LEDGERS_NOT_ADHERING_TO_PLACEMENT_POLICY); + Gauge ledgersSoftlyAdheringToPlacementPolicyGuage = statsLogger + .getGauge(ReplicationStats.NUM_LEDGERS_SOFTLY_ADHERING_TO_PLACEMENT_POLICY); + /* + * since all of the bookies are in different racks, there shouldn't be any ledger not adhering + * to placement policy. + */ + assertEquals("NUM_LEDGERS_NOT_ADHERING_TO_PLACEMENT_POLICY guage value", 0, + ledgersNotAdheringToPlacementPolicyGuage.getSample()); + /* + * since all of the bookies are in different racks, there shouldn't be any ledger softly adhering + * to placement policy. + */ + assertEquals("NUM_LEDGERS_SOFTLY_ADHERING_TO_PLACEMENT_POLICY guage value", 0, + ledgersSoftlyAdheringToPlacementPolicyGuage.getSample()); + } finally { + Auditor auditor = auditorRef.getValue(); + if (auditor != null) { + auditor.close(); + } + regManager.close(); + } + } + + @Test + public void testPlacementPolicyCheckWithLedgersNotAdheringToPlacementPolicy() throws Exception { + int numOfBookies = 5; + int numOfLedgersNotAdheringToPlacementPolicy = 0; + List bookieAddresses = new ArrayList<>(); + RegistrationManager regManager = driver.createRegistrationManager(); + for (int i = 0; i < numOfBookies; i++) { + BookieId bookieAddress = new BookieSocketAddress("98.98.98." + i, 2181).toBookieId(); + bookieAddresses.add(bookieAddress); + regManager.registerBookie(bookieAddress, false, BookieServiceInfo.EMPTY); + } + + // only three racks + StaticDNSResolver.addNodeToRack("98.98.98.0", "/rack1"); + StaticDNSResolver.addNodeToRack("98.98.98.1", "/rack2"); + StaticDNSResolver.addNodeToRack("98.98.98.2", "/rack3"); + StaticDNSResolver.addNodeToRack("98.98.98.3", "/rack1"); + StaticDNSResolver.addNodeToRack("98.98.98.4", "/rack2"); + + LedgerManagerFactory mFactory = driver.getLedgerManagerFactory(); + LedgerManager lm = mFactory.newLedgerManager(); + int ensembleSize = 5; + int writeQuorumSize = 3; + int ackQuorumSize = 2; + int minNumRacksPerWriteQuorumConfValue = 3; + + /* + * this closed ledger doesn't adhere to placement policy because there are only + * 3 racks, and the ensembleSize is 5. + */ + LedgerMetadata initMeta = LedgerMetadataBuilder.create() + .withId(1L) + .withEnsembleSize(ensembleSize) + .withWriteQuorumSize(writeQuorumSize) + .withAckQuorumSize(ackQuorumSize) + .newEnsembleEntry(0L, bookieAddresses) + .withClosedState() + .withLastEntryId(100) + .withLength(10000) + .withDigestType(DigestType.DUMMY) + .withPassword(new byte[0]) + .build(); + lm.createLedgerMetadata(1L, initMeta).get(); + numOfLedgersNotAdheringToPlacementPolicy++; + + /* + * this is non-closed ledger, so it shouldn't count as ledger not + * adhering to placement policy + */ + initMeta = LedgerMetadataBuilder.create() + .withId(2L) + .withEnsembleSize(ensembleSize) + .withWriteQuorumSize(writeQuorumSize) + .withAckQuorumSize(ackQuorumSize) + .newEnsembleEntry(0L, bookieAddresses) + .withDigestType(DigestType.DUMMY) + .withPassword(new byte[0]) + .build(); + lm.createLedgerMetadata(2L, initMeta).get(); + + ServerConfiguration servConf = new ServerConfiguration(confByIndex(0)); + servConf.setMinNumRacksPerWriteQuorum(minNumRacksPerWriteQuorumConfValue); + setServerConfigPropertiesForRackPlacement(servConf); + MutableObject auditorRef = new MutableObject(); + try { + TestStatsLogger statsLogger = startAuditorAndWaitForPlacementPolicyCheck(servConf, auditorRef); + Gauge ledgersNotAdheringToPlacementPolicyGuage = statsLogger + .getGauge(ReplicationStats.NUM_LEDGERS_NOT_ADHERING_TO_PLACEMENT_POLICY); + assertEquals("NUM_LEDGERS_NOT_ADHERING_TO_PLACEMENT_POLICY guage value", + numOfLedgersNotAdheringToPlacementPolicy, ledgersNotAdheringToPlacementPolicyGuage.getSample()); + Gauge ledgersSoftlyAdheringToPlacementPolicyGuage = statsLogger + .getGauge(ReplicationStats.NUM_LEDGERS_SOFTLY_ADHERING_TO_PLACEMENT_POLICY); + assertEquals("NUM_LEDGERS_SOFTLY_ADHERING_TO_PLACEMENT_POLICY guage value", + 0, ledgersSoftlyAdheringToPlacementPolicyGuage.getSample()); + } finally { + Auditor auditor = auditorRef.getValue(); + if (auditor != null) { + auditor.close(); + } + regManager.close(); + } + } + + @Test + public void testPlacementPolicyCheckWithLedgersNotAdheringToPlacementPolicyAndNotMarkToUnderreplication() + throws Exception { + int numOfBookies = 5; + int numOfLedgersNotAdheringToPlacementPolicy = 0; + List bookieAddresses = new ArrayList<>(); + RegistrationManager regManager = driver.createRegistrationManager(); + for (int i = 0; i < numOfBookies; i++) { + BookieId bookieAddress = new BookieSocketAddress("98.98.98." + i, 2181).toBookieId(); + bookieAddresses.add(bookieAddress); + regManager.registerBookie(bookieAddress, false, BookieServiceInfo.EMPTY); + } + + // only three racks + StaticDNSResolver.addNodeToRack("98.98.98.0", "/rack1"); + StaticDNSResolver.addNodeToRack("98.98.98.1", "/rack2"); + StaticDNSResolver.addNodeToRack("98.98.98.2", "/rack3"); + StaticDNSResolver.addNodeToRack("98.98.98.3", "/rack1"); + StaticDNSResolver.addNodeToRack("98.98.98.4", "/rack2"); + + LedgerManagerFactory mFactory = driver.getLedgerManagerFactory(); + LedgerManager lm = mFactory.newLedgerManager(); + int ensembleSize = 5; + int writeQuorumSize = 3; + int ackQuorumSize = 2; + int minNumRacksPerWriteQuorumConfValue = 3; + + /* + * this closed ledger doesn't adhere to placement policy because there are only + * 3 racks, and the ensembleSize is 5. + */ + LedgerMetadata initMeta = LedgerMetadataBuilder.create() + .withId(1L) + .withEnsembleSize(ensembleSize) + .withWriteQuorumSize(writeQuorumSize) + .withAckQuorumSize(ackQuorumSize) + .newEnsembleEntry(0L, bookieAddresses) + .withClosedState() + .withLastEntryId(100) + .withLength(10000) + .withDigestType(DigestType.DUMMY) + .withPassword(new byte[0]) + .build(); + lm.createLedgerMetadata(1L, initMeta).get(); + numOfLedgersNotAdheringToPlacementPolicy++; + + ServerConfiguration servConf = new ServerConfiguration(confByIndex(0)); + servConf.setMinNumRacksPerWriteQuorum(minNumRacksPerWriteQuorumConfValue); + setServerConfigPropertiesForRackPlacement(servConf); + MutableObject auditorRef = new MutableObject(); + try { + TestStatsLogger statsLogger = startAuditorAndWaitForPlacementPolicyCheck(servConf, auditorRef); + Gauge ledgersNotAdheringToPlacementPolicyGuage = statsLogger + .getGauge(ReplicationStats.NUM_LEDGERS_NOT_ADHERING_TO_PLACEMENT_POLICY); + assertEquals("NUM_LEDGERS_NOT_ADHERING_TO_PLACEMENT_POLICY guage value", + numOfLedgersNotAdheringToPlacementPolicy, ledgersNotAdheringToPlacementPolicyGuage.getSample()); + Gauge ledgersSoftlyAdheringToPlacementPolicyGuage = statsLogger + .getGauge(ReplicationStats.NUM_LEDGERS_SOFTLY_ADHERING_TO_PLACEMENT_POLICY); + assertEquals("NUM_LEDGERS_SOFTLY_ADHERING_TO_PLACEMENT_POLICY guage value", + 0, ledgersSoftlyAdheringToPlacementPolicyGuage.getSample()); + } finally { + Auditor auditor = auditorRef.getValue(); + if (auditor != null) { + auditor.close(); + } + } + LedgerUnderreplicationManager underreplicationManager = mFactory.newLedgerUnderreplicationManager(); + long unnderReplicateLedgerId = underreplicationManager.pollLedgerToRereplicate(); + assertEquals(unnderReplicateLedgerId, -1); + } + + @Test + public void testPlacementPolicyCheckWithLedgersNotAdheringToPlacementPolicyAndMarkToUnderreplication() + throws Exception { + int numOfBookies = 5; + int numOfLedgersNotAdheringToPlacementPolicy = 0; + List bookieAddresses = new ArrayList<>(); + RegistrationManager regManager = driver.createRegistrationManager(); + for (int i = 0; i < numOfBookies; i++) { + BookieId bookieAddress = new BookieSocketAddress("98.98.98." + i, 2181).toBookieId(); + bookieAddresses.add(bookieAddress); + regManager.registerBookie(bookieAddress, false, BookieServiceInfo.EMPTY); + } + + // only three racks + StaticDNSResolver.addNodeToRack("98.98.98.0", "/rack1"); + StaticDNSResolver.addNodeToRack("98.98.98.1", "/rack2"); + StaticDNSResolver.addNodeToRack("98.98.98.2", "/rack3"); + StaticDNSResolver.addNodeToRack("98.98.98.3", "/rack1"); + StaticDNSResolver.addNodeToRack("98.98.98.4", "/rack2"); + + LedgerManagerFactory mFactory = driver.getLedgerManagerFactory(); + LedgerManager lm = mFactory.newLedgerManager(); + int ensembleSize = 5; + int writeQuorumSize = 3; + int ackQuorumSize = 2; + int minNumRacksPerWriteQuorumConfValue = 3; + + /* + * this closed ledger doesn't adhere to placement policy because there are only + * 3 racks, and the ensembleSize is 5. + */ + LedgerMetadata initMeta = LedgerMetadataBuilder.create() + .withId(1L) + .withEnsembleSize(ensembleSize) + .withWriteQuorumSize(writeQuorumSize) + .withAckQuorumSize(ackQuorumSize) + .newEnsembleEntry(0L, bookieAddresses) + .withClosedState() + .withLastEntryId(100) + .withLength(10000) + .withDigestType(DigestType.DUMMY) + .withPassword(new byte[0]) + .build(); + lm.createLedgerMetadata(1L, initMeta).get(); + numOfLedgersNotAdheringToPlacementPolicy++; + + ServerConfiguration servConf = new ServerConfiguration(confByIndex(0)); + servConf.setMinNumRacksPerWriteQuorum(minNumRacksPerWriteQuorumConfValue); + servConf.setRepairedPlacementPolicyNotAdheringBookieEnable(true); + setServerConfigPropertiesForRackPlacement(servConf); + MutableObject auditorRef = new MutableObject(); + try { + TestStatsLogger statsLogger = startAuditorAndWaitForPlacementPolicyCheck(servConf, auditorRef); + Gauge ledgersNotAdheringToPlacementPolicyGuage = statsLogger + .getGauge(ReplicationStats.NUM_LEDGERS_NOT_ADHERING_TO_PLACEMENT_POLICY); + assertEquals("NUM_LEDGERS_NOT_ADHERING_TO_PLACEMENT_POLICY guage value", + numOfLedgersNotAdheringToPlacementPolicy, ledgersNotAdheringToPlacementPolicyGuage.getSample()); + Gauge ledgersSoftlyAdheringToPlacementPolicyGuage = statsLogger + .getGauge(ReplicationStats.NUM_LEDGERS_SOFTLY_ADHERING_TO_PLACEMENT_POLICY); + assertEquals("NUM_LEDGERS_SOFTLY_ADHERING_TO_PLACEMENT_POLICY guage value", + 0, ledgersSoftlyAdheringToPlacementPolicyGuage.getSample()); + } finally { + Auditor auditor = auditorRef.getValue(); + if (auditor != null) { + auditor.close(); + } + regManager.close(); + } + LedgerUnderreplicationManager underreplicationManager = mFactory.newLedgerUnderreplicationManager(); + long unnderReplicateLedgerId = underreplicationManager.pollLedgerToRereplicate(); + assertEquals(unnderReplicateLedgerId, 1L); + } + + @Test + public void testPlacementPolicyCheckForURLedgersElapsedRecoveryGracePeriod() throws Exception { + testPlacementPolicyCheckWithURLedgers(true); + } + + @Test + public void testPlacementPolicyCheckForURLedgersNotElapsedRecoveryGracePeriod() throws Exception { + testPlacementPolicyCheckWithURLedgers(false); + } + + public void testPlacementPolicyCheckWithURLedgers(boolean timeElapsed) throws Exception { + int numOfBookies = 4; + /* + * in timeElapsed=true scenario, set some low value, otherwise set some + * highValue. + */ + int underreplicatedLedgerRecoveryGracePeriod = timeElapsed ? 1 : 1000; + int numOfURLedgersElapsedRecoveryGracePeriod = 0; + List bookieAddresses = new ArrayList(); + RegistrationManager regManager = driver.createRegistrationManager(); + for (int i = 0; i < numOfBookies; i++) { + BookieId bookieAddress = new BookieSocketAddress("98.98.98." + i, 2181).toBookieId(); + bookieAddresses.add(bookieAddress); + regManager.registerBookie(bookieAddress, false, BookieServiceInfo.EMPTY); + } + + LedgerManagerFactory mFactory = driver.getLedgerManagerFactory(); + LedgerManager lm = mFactory.newLedgerManager(); + LedgerUnderreplicationManager underreplicationManager = mFactory.newLedgerUnderreplicationManager(); + int ensembleSize = 4; + int writeQuorumSize = 3; + int ackQuorumSize = 2; + + long ledgerId1 = 1L; + LedgerMetadata initMeta = LedgerMetadataBuilder.create() + .withId(ledgerId1) + .withEnsembleSize(ensembleSize) + .withWriteQuorumSize(writeQuorumSize) + .withAckQuorumSize(ackQuorumSize) + .newEnsembleEntry(0L, bookieAddresses) + .withClosedState() + .withLastEntryId(100) + .withLength(10000) + .withDigestType(DigestType.DUMMY) + .withPassword(new byte[0]) + .build(); + lm.createLedgerMetadata(ledgerId1, initMeta).get(); + underreplicationManager.markLedgerUnderreplicated(ledgerId1, bookieAddresses.get(0).toString()); + if (timeElapsed) { + numOfURLedgersElapsedRecoveryGracePeriod++; + } + + /* + * this is non-closed ledger, it should also be reported as + * URLedgersElapsedRecoveryGracePeriod + */ + ensembleSize = 3; + long ledgerId2 = 21234561L; + initMeta = LedgerMetadataBuilder.create() + .withId(ledgerId2) + .withEnsembleSize(ensembleSize) + .withWriteQuorumSize(writeQuorumSize) + .withAckQuorumSize(ackQuorumSize) + .newEnsembleEntry(0L, + Arrays.asList(bookieAddresses.get(0), bookieAddresses.get(1), bookieAddresses.get(2))) + .newEnsembleEntry(100L, + Arrays.asList(bookieAddresses.get(3), bookieAddresses.get(1), bookieAddresses.get(2))) + .withDigestType(DigestType.DUMMY) + .withPassword(new byte[0]) + .build(); + lm.createLedgerMetadata(ledgerId2, initMeta).get(); + underreplicationManager.markLedgerUnderreplicated(ledgerId2, bookieAddresses.get(0).toString()); + if (timeElapsed) { + numOfURLedgersElapsedRecoveryGracePeriod++; + } + + /* + * this ledger is not marked underreplicated. + */ + long ledgerId3 = 31234561L; + initMeta = LedgerMetadataBuilder.create() + .withId(ledgerId3) + .withEnsembleSize(ensembleSize) + .withWriteQuorumSize(writeQuorumSize) + .withAckQuorumSize(ackQuorumSize) + .newEnsembleEntry(0L, + Arrays.asList(bookieAddresses.get(1), bookieAddresses.get(2), bookieAddresses.get(3))) + .withClosedState() + .withLastEntryId(100) + .withLength(10000) + .withDigestType(DigestType.DUMMY) + .withPassword(new byte[0]) + .build(); + lm.createLedgerMetadata(ledgerId3, initMeta).get(); + + if (timeElapsed) { + /* + * in timeelapsed scenario, by waiting for + * underreplicatedLedgerRecoveryGracePeriod, recovery time must be + * elapsed. + */ + Thread.sleep((underreplicatedLedgerRecoveryGracePeriod + 1) * 1000); + } else { + /* + * in timeElapsed=false scenario, since + * underreplicatedLedgerRecoveryGracePeriod is set to some high + * value, there is no value in waiting. So just wait for some time + * and make sure urledgers are not reported as recoverytime elapsed + * urledgers. + */ + Thread.sleep(5000); + } + + ServerConfiguration servConf = new ServerConfiguration(confByIndex(0)); + servConf.setUnderreplicatedLedgerRecoveryGracePeriod(underreplicatedLedgerRecoveryGracePeriod); + setServerConfigPropertiesForRackPlacement(servConf); + MutableObject auditorRef = new MutableObject(); + try { + TestStatsLogger statsLogger = startAuditorAndWaitForPlacementPolicyCheck(servConf, auditorRef); + Gauge underreplicatedLedgersElapsedRecoveryGracePeriodGuage = statsLogger + .getGauge(ReplicationStats.NUM_UNDERREPLICATED_LEDGERS_ELAPSED_RECOVERY_GRACE_PERIOD); + assertEquals("NUM_UNDERREPLICATED_LEDGERS_ELAPSED_RECOVERY_GRACE_PERIOD guage value", + numOfURLedgersElapsedRecoveryGracePeriod, + underreplicatedLedgersElapsedRecoveryGracePeriodGuage.getSample()); + } finally { + Auditor auditor = auditorRef.getValue(); + if (auditor != null) { + auditor.close(); + } + regManager.close(); + } + } + + @Test + public void testPlacementPolicyCheckWithLedgersNotAdheringToPolicyWithMultipleSegments() throws Exception { + int numOfBookies = 7; + int numOfLedgersNotAdheringToPlacementPolicy = 0; + List bookieAddresses = new ArrayList<>(); + RegistrationManager regManager = driver.createRegistrationManager(); + for (int i = 0; i < numOfBookies; i++) { + BookieId bookieAddress = new BookieSocketAddress("98.98.98." + i, 2181).toBookieId(); + bookieAddresses.add(bookieAddress); + regManager.registerBookie(bookieAddress, false, BookieServiceInfo.EMPTY); + } + + // only three racks + StaticDNSResolver.addNodeToRack("98.98.98.0", "/rack1"); + StaticDNSResolver.addNodeToRack("98.98.98.1", "/rack2"); + StaticDNSResolver.addNodeToRack("98.98.98.2", "/rack3"); + StaticDNSResolver.addNodeToRack("98.98.98.3", "/rack4"); + StaticDNSResolver.addNodeToRack("98.98.98.4", "/rack1"); + StaticDNSResolver.addNodeToRack("98.98.98.5", "/rack2"); + StaticDNSResolver.addNodeToRack("98.98.98.6", "/rack3"); + + LedgerManagerFactory mFactory = driver.getLedgerManagerFactory(); + LedgerManager lm = mFactory.newLedgerManager(); + int ensembleSize = 5; + int writeQuorumSize = 5; + int ackQuorumSize = 2; + int minNumRacksPerWriteQuorumConfValue = 4; + + /* + * this closed ledger in each writeQuorumSize (5), there would be + * atleast minNumRacksPerWriteQuorumConfValue (4) racks. So it wont be + * counted as ledgers not adhering to placement policy. + */ + LedgerMetadata initMeta = LedgerMetadataBuilder.create() + .withId(1L) + .withEnsembleSize(ensembleSize) + .withWriteQuorumSize(writeQuorumSize) + .withAckQuorumSize(ackQuorumSize) + .newEnsembleEntry(0L, bookieAddresses.subList(0, 5)) + .newEnsembleEntry(20L, bookieAddresses.subList(1, 6)) + .withClosedState() + .withLastEntryId(100) + .withLength(10000) + .withDigestType(DigestType.DUMMY) + .withPassword(new byte[0]) + .build(); + lm.createLedgerMetadata(1L, initMeta).get(); + + /* + * for the second segment bookies are from /rack1, /rack2 and /rack3, + * which is < minNumRacksPerWriteQuorumConfValue (4). So it is not + * adhering to placement policy. + * + * also for the third segment are from /rack1, /rack2 and /rack3, which + * is < minNumRacksPerWriteQuorumConfValue (4). So it is not adhering to + * placement policy. + * + * Though there are multiple segments are not adhering to placement + * policy, it should be counted as single ledger. + */ + initMeta = LedgerMetadataBuilder.create() + .withId(2L) + .withEnsembleSize(ensembleSize) + .withWriteQuorumSize(writeQuorumSize) + .withAckQuorumSize(ackQuorumSize) + .newEnsembleEntry(0L, bookieAddresses.subList(0, 5)) + .newEnsembleEntry(20L, + Arrays.asList(bookieAddresses.get(0), bookieAddresses.get(1), bookieAddresses.get(2), + bookieAddresses.get(4), bookieAddresses.get(5))) + .newEnsembleEntry(40L, + Arrays.asList(bookieAddresses.get(0), bookieAddresses.get(1), bookieAddresses.get(2), + bookieAddresses.get(4), bookieAddresses.get(6))) + .withClosedState() + .withLastEntryId(100) + .withLength(10000) + .withDigestType(DigestType.DUMMY) + .withPassword(new byte[0]) + .build(); + lm.createLedgerMetadata(2L, initMeta).get(); + numOfLedgersNotAdheringToPlacementPolicy++; + + ServerConfiguration servConf = new ServerConfiguration(confByIndex(0)); + servConf.setMinNumRacksPerWriteQuorum(minNumRacksPerWriteQuorumConfValue); + setServerConfigPropertiesForRackPlacement(servConf); + MutableObject auditorRef = new MutableObject(); + try { + TestStatsLogger statsLogger = startAuditorAndWaitForPlacementPolicyCheck(servConf, auditorRef); + Gauge ledgersNotAdheringToPlacementPolicyGuage = statsLogger + .getGauge(ReplicationStats.NUM_LEDGERS_NOT_ADHERING_TO_PLACEMENT_POLICY); + assertEquals("NUM_LEDGERS_NOT_ADHERING_TO_PLACEMENT_POLICY gauge value", + numOfLedgersNotAdheringToPlacementPolicy, ledgersNotAdheringToPlacementPolicyGuage.getSample()); + Gauge ledgersSoftlyAdheringToPlacementPolicyGuage = statsLogger + .getGauge(ReplicationStats.NUM_LEDGERS_SOFTLY_ADHERING_TO_PLACEMENT_POLICY); + assertEquals("NUM_LEDGERS_SOFTLY_ADHERING_TO_PLACEMENT_POLICY gauge value", + 0, ledgersSoftlyAdheringToPlacementPolicyGuage.getSample()); + } finally { + Auditor auditor = auditorRef.getValue(); + if (auditor != null) { + auditor.close(); + } + regManager.close(); + } + } + + @Test + public void testZoneawarePlacementPolicyCheck() throws Exception { + int numOfBookies = 6; + int numOfLedgersNotAdheringToPlacementPolicy = 0; + int numOfLedgersSoftlyAdheringToPlacementPolicy = 0; + List bookieAddresses = new ArrayList(); + RegistrationManager regManager = driver.createRegistrationManager(); + /* + * 6 bookies - 3 zones and 2 uds + */ + for (int i = 0; i < numOfBookies; i++) { + BookieSocketAddress bookieAddress = new BookieSocketAddress("98.98.98." + i, 2181); + bookieAddresses.add(bookieAddress.toBookieId()); + regManager.registerBookie(bookieAddress.toBookieId(), false, BookieServiceInfo.EMPTY); + String zone = "/zone" + (i % 3); + String upgradeDomain = "/ud" + (i % 2); + String networkLocation = zone + upgradeDomain; + StaticDNSResolver.addNodeToRack(bookieAddress.getHostName(), networkLocation); + } + + LedgerManagerFactory mFactory = driver.getLedgerManagerFactory(); + LedgerManager lm = mFactory.newLedgerManager(); + + ServerConfiguration servConf = new ServerConfiguration(confByIndex(0)); + servConf.setDesiredNumZonesPerWriteQuorum(3); + servConf.setMinNumZonesPerWriteQuorum(2); + setServerConfigPropertiesForZonePlacement(servConf); + + /* + * this closed ledger adheres to ZoneAwarePlacementPolicy, since + * ensemble is spread across 3 zones and 2 UDs + */ + LedgerMetadata initMeta = LedgerMetadataBuilder.create() + .withId(1L) + .withEnsembleSize(6) + .withWriteQuorumSize(6) + .withAckQuorumSize(2) + .newEnsembleEntry(0L, bookieAddresses) + .withClosedState() + .withLastEntryId(100) + .withLength(10000) + .withDigestType(DigestType.DUMMY) + .withPassword(new byte[0]) + .build(); + lm.createLedgerMetadata(1L, initMeta).get(); + + /* + * this is non-closed ledger, so though ensemble is not adhering to + * placement policy (since ensemble is not multiple of writeQuorum), + * this shouldn't be reported + */ + initMeta = LedgerMetadataBuilder.create() + .withId(2L) + .withEnsembleSize(6) + .withWriteQuorumSize(5) + .withAckQuorumSize(2) + .newEnsembleEntry(0L, bookieAddresses) + .withDigestType(DigestType.DUMMY) + .withPassword(new byte[0]) + .build(); + lm.createLedgerMetadata(2L, initMeta).get(); + + /* + * this is closed ledger, since ensemble is not multiple of writeQuorum, + * this ledger is not adhering to placement policy. + */ + initMeta = LedgerMetadataBuilder.create() + .withId(3L) + .withEnsembleSize(6) + .withWriteQuorumSize(5) + .withAckQuorumSize(2) + .newEnsembleEntry(0L, bookieAddresses) + .withClosedState() + .withLastEntryId(100) + .withLength(10000) + .withDigestType(DigestType.DUMMY) + .withPassword(new byte[0]) + .build(); + lm.createLedgerMetadata(3L, initMeta).get(); + numOfLedgersNotAdheringToPlacementPolicy++; + + /* + * this closed ledger adheres softly to ZoneAwarePlacementPolicy, since + * ensemble/writeQuorum of size 4 has spread across just + * minNumZonesPerWriteQuorum (2). + */ + List newEnsemble = new ArrayList(); + newEnsemble.add(bookieAddresses.get(0)); + newEnsemble.add(bookieAddresses.get(1)); + newEnsemble.add(bookieAddresses.get(3)); + newEnsemble.add(bookieAddresses.get(4)); + initMeta = LedgerMetadataBuilder.create() + .withId(4L) + .withEnsembleSize(4) + .withWriteQuorumSize(4) + .withAckQuorumSize(2) + .newEnsembleEntry(0L, newEnsemble) + .withClosedState() + .withLastEntryId(100) + .withLength(10000) + .withDigestType(DigestType.DUMMY) + .withPassword(new byte[0]) + .build(); + lm.createLedgerMetadata(4L, initMeta).get(); + numOfLedgersSoftlyAdheringToPlacementPolicy++; + + MutableObject auditorRef = new MutableObject(); + try { + TestStatsLogger statsLogger = startAuditorAndWaitForPlacementPolicyCheck(servConf, auditorRef); + Gauge ledgersNotAdheringToPlacementPolicyGuage = statsLogger + .getGauge(ReplicationStats.NUM_LEDGERS_NOT_ADHERING_TO_PLACEMENT_POLICY); + assertEquals("NUM_LEDGERS_NOT_ADHERING_TO_PLACEMENT_POLICY guage value", + numOfLedgersNotAdheringToPlacementPolicy, ledgersNotAdheringToPlacementPolicyGuage.getSample()); + Gauge ledgersSoftlyAdheringToPlacementPolicyGuage = statsLogger + .getGauge(ReplicationStats.NUM_LEDGERS_SOFTLY_ADHERING_TO_PLACEMENT_POLICY); + assertEquals("NUM_LEDGERS_SOFTLY_ADHERING_TO_PLACEMENT_POLICY guage value", + numOfLedgersSoftlyAdheringToPlacementPolicy, + ledgersSoftlyAdheringToPlacementPolicyGuage.getSample()); + } finally { + Auditor auditor = auditorRef.getValue(); + if (auditor != null) { + auditor.close(); + } + regManager.close(); + } + } + + private void setServerConfigPropertiesForRackPlacement(ServerConfiguration servConf) { + setServerConfigProperties(servConf, RackawareEnsemblePlacementPolicy.class.getName()); + } + + private void setServerConfigPropertiesForZonePlacement(ServerConfiguration servConf) { + setServerConfigProperties(servConf, ZoneawareEnsemblePlacementPolicy.class.getName()); + } + + private void setServerConfigProperties(ServerConfiguration servConf, String ensemblePlacementPolicyClass) { + servConf.setProperty(REPP_DNS_RESOLVER_CLASS, StaticDNSResolver.class.getName()); + servConf.setProperty(ClientConfiguration.ENSEMBLE_PLACEMENT_POLICY, ensemblePlacementPolicyClass); + servConf.setAuditorPeriodicCheckInterval(0); + servConf.setAuditorPeriodicBookieCheckInterval(0); + servConf.setAuditorPeriodicReplicasCheckInterval(0); + servConf.setAuditorPeriodicPlacementPolicyCheckInterval(1000); + } + + private TestStatsLogger startAuditorAndWaitForPlacementPolicyCheck(ServerConfiguration servConf, + MutableObject auditorRef) throws MetadataException, CompatibilityException, KeeperException, + InterruptedException, UnavailableException, UnknownHostException { + LedgerManagerFactory mFactory = driver.getLedgerManagerFactory(); + LedgerUnderreplicationManager urm = mFactory.newLedgerUnderreplicationManager(); + TestStatsProvider statsProvider = new TestStatsProvider(); + TestStatsLogger statsLogger = statsProvider.getStatsLogger(AUDITOR_SCOPE); + TestOpStatsLogger placementPolicyCheckStatsLogger = (TestOpStatsLogger) statsLogger + .getOpStatsLogger(ReplicationStats.PLACEMENT_POLICY_CHECK_TIME); + + final TestAuditor auditor = new TestAuditor(BookieImpl.getBookieId(servConf).toString(), servConf, + statsLogger, null); + auditorRef.setValue(auditor); + CountDownLatch latch = auditor.getLatch(); + assertEquals("PLACEMENT_POLICY_CHECK_TIME SuccessCount", 0, placementPolicyCheckStatsLogger.getSuccessCount()); + urm.setPlacementPolicyCheckCTime(-1); + auditor.start(); + /* + * since placementPolicyCheckCTime is set to -1, placementPolicyCheck should be + * scheduled to run with no initialdelay + */ + assertTrue("placementPolicyCheck should have executed", latch.await(20, TimeUnit.SECONDS)); + for (int i = 0; i < 20; i++) { + Thread.sleep(100); + if (placementPolicyCheckStatsLogger.getSuccessCount() >= 1) { + break; + } + } + assertEquals("PLACEMENT_POLICY_CHECK_TIME SuccessCount", 1, placementPolicyCheckStatsLogger.getSuccessCount()); + return statsLogger; + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorReplicasCheckTaskTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorReplicasCheckTaskTest.java new file mode 100644 index 00000000000..8e0547a733c --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorReplicasCheckTaskTest.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.replication; + +import static org.apache.bookkeeper.replication.ReplicationStats.AUDITOR_SCOPE; +import static org.junit.Assert.assertEquals; + +import java.util.LinkedList; +import java.util.List; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeper; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.client.LedgerHandle; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.LedgerManagerFactory; +import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.apache.bookkeeper.test.TestStatsProvider; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Unit test {@link AuditorReplicasCheckTask}. + */ +public class AuditorReplicasCheckTaskTest extends BookKeeperClusterTestCase { + private static final Logger LOG = LoggerFactory + .getLogger(AuditorReplicasCheckTaskTest.class); + + private BookKeeperAdmin admin; + private LedgerManager ledgerManager; + private LedgerUnderreplicationManager ledgerUnderreplicationManager; + + public AuditorReplicasCheckTaskTest() { + super(3); + baseConf.setPageLimit(1); + baseConf.setAutoRecoveryDaemonEnabled(false); + } + + @Override + public void setUp() throws Exception { + super.setUp(); + final BookKeeper bookKeeper = new BookKeeper(baseClientConf); + admin = new BookKeeperAdmin(bookKeeper, NullStatsLogger.INSTANCE, new ClientConfiguration(baseClientConf)); + LedgerManagerFactory ledgerManagerFactory = bookKeeper.getLedgerManagerFactory(); + ledgerManager = ledgerManagerFactory.newLedgerManager(); + ledgerUnderreplicationManager = ledgerManagerFactory.newLedgerUnderreplicationManager(); + } + + @Test + public void testReplicasCheck() throws BKException, InterruptedException { + + // 1. create ledgers + final int numLedgers = 10; + List ids = new LinkedList(); + for (int i = 0; i < numLedgers; i++) { + LedgerHandle lh = bkc.createLedger(3, 3, BookKeeper.DigestType.CRC32, "passwd".getBytes()); + ids.add(lh.getId()); + for (int j = 0; j < 2; j++) { + lh.addEntry("testdata".getBytes()); + } + lh.close(); + } + + // 2. init auditorReplicasCheckTask + final TestStatsProvider statsProvider = new TestStatsProvider(); + final TestStatsProvider.TestStatsLogger statsLogger = statsProvider.getStatsLogger(AUDITOR_SCOPE); + final AuditorStats auditorStats = new AuditorStats(statsLogger); + AuditorReplicasCheckTask auditorReplicasCheckTask = new AuditorReplicasCheckTask( + baseConf, auditorStats, admin, ledgerManager, + ledgerUnderreplicationManager, null, (flag, throwable) -> flag.set(false)); + + // 3. replicasCheck + auditorReplicasCheckTask.runTask(); + + // 4. verify + assertEquals("REPLICAS_CHECK_TIME", 1, ((TestStatsProvider.TestOpStatsLogger) + statsLogger.getOpStatsLogger(ReplicationStats.REPLICAS_CHECK_TIME)).getSuccessCount()); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorReplicasCheckTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorReplicasCheckTest.java new file mode 100644 index 00000000000..c64a14eca28 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorReplicasCheckTest.java @@ -0,0 +1,931 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.replication; + +import static org.apache.bookkeeper.replication.ReplicationStats.AUDITOR_SCOPE; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.net.URI; +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BookKeeper; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.client.LedgerMetadataBuilder; +import org.apache.bookkeeper.client.api.DigestType; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.discover.BookieServiceInfo; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.LedgerManagerFactory; +import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; +import org.apache.bookkeeper.meta.MetadataBookieDriver; +import org.apache.bookkeeper.meta.MetadataDrivers; +import org.apache.bookkeeper.meta.exceptions.MetadataException; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.replication.AuditorPeriodicCheckTest.TestAuditor; +import org.apache.bookkeeper.replication.ReplicationException.CompatibilityException; +import org.apache.bookkeeper.replication.ReplicationException.UnavailableException; +import org.apache.bookkeeper.stats.Gauge; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.apache.bookkeeper.test.TestStatsProvider; +import org.apache.bookkeeper.test.TestStatsProvider.TestOpStatsLogger; +import org.apache.bookkeeper.test.TestStatsProvider.TestStatsLogger; +import org.apache.bookkeeper.util.AvailabilityOfEntriesOfLedger; +import org.apache.bookkeeper.util.StaticDNSResolver; +import org.apache.commons.collections4.map.MultiKeyMap; +import org.apache.commons.lang3.mutable.MutableObject; +import org.apache.zookeeper.KeeperException; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Tests the logic of Auditor's ReplicasCheck. + */ +public class AuditorReplicasCheckTest extends BookKeeperClusterTestCase { + private MetadataBookieDriver driver; + private RegistrationManager regManager; + + public AuditorReplicasCheckTest() { + super(1); + baseConf.setPageLimit(1); // to make it easy to push ledger out of cache + } + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + StaticDNSResolver.reset(); + driver = MetadataDrivers.getBookieDriver(URI.create(confByIndex(0).getMetadataServiceUri())); + driver.initialize(confByIndex(0), NullStatsLogger.INSTANCE); + regManager = driver.createRegistrationManager(); + } + + @After + @Override + public void tearDown() throws Exception { + if (null != regManager) { + regManager.close(); + } + if (null != driver) { + driver.close(); + } + super.tearDown(); + } + + private class TestBookKeeperAdmin extends BookKeeperAdmin { + + private final MultiKeyMap returnAvailabilityOfEntriesOfLedger; + private final MultiKeyMap errorReturnValueForGetAvailabilityOfEntriesOfLedger; + + public TestBookKeeperAdmin(BookKeeper bkc, StatsLogger statsLogger, + MultiKeyMap returnAvailabilityOfEntriesOfLedger, + MultiKeyMap errorReturnValueForGetAvailabilityOfEntriesOfLedger) { + super(bkc, statsLogger, baseClientConf); + this.returnAvailabilityOfEntriesOfLedger = returnAvailabilityOfEntriesOfLedger; + this.errorReturnValueForGetAvailabilityOfEntriesOfLedger = + errorReturnValueForGetAvailabilityOfEntriesOfLedger; + } + + @Override + public CompletableFuture asyncGetListOfEntriesOfLedger( + BookieId address, long ledgerId) { + CompletableFuture futureResult = + new CompletableFuture(); + Integer errorReturnValue = errorReturnValueForGetAvailabilityOfEntriesOfLedger.get(address.toString(), + Long.toString(ledgerId)); + if (errorReturnValue != null) { + futureResult.completeExceptionally(BKException.create(errorReturnValue).fillInStackTrace()); + } else { + AvailabilityOfEntriesOfLedger availabilityOfEntriesOfLedger = returnAvailabilityOfEntriesOfLedger + .get(address.toString(), Long.toString(ledgerId)); + futureResult.complete(availabilityOfEntriesOfLedger); + } + return futureResult; + } + } + + private TestStatsLogger startAuditorAndWaitForReplicasCheck(ServerConfiguration servConf, + MutableObject auditorRef, + MultiKeyMap expectedReturnAvailabilityOfEntriesOfLedger, + MultiKeyMap errorReturnValueForGetAvailabilityOfEntriesOfLedger) + throws MetadataException, CompatibilityException, KeeperException, InterruptedException, + UnavailableException, UnknownHostException { + LedgerManagerFactory mFactory = driver.getLedgerManagerFactory(); + LedgerUnderreplicationManager urm = mFactory.newLedgerUnderreplicationManager(); + TestStatsProvider statsProvider = new TestStatsProvider(); + TestStatsLogger statsLogger = statsProvider.getStatsLogger(AUDITOR_SCOPE); + TestOpStatsLogger replicasCheckStatsLogger = (TestOpStatsLogger) statsLogger + .getOpStatsLogger(ReplicationStats.REPLICAS_CHECK_TIME); + + final TestAuditor auditor = new TestAuditor(BookieImpl.getBookieId(servConf).toString(), servConf, bkc, true, + new TestBookKeeperAdmin(bkc, statsLogger, expectedReturnAvailabilityOfEntriesOfLedger, + errorReturnValueForGetAvailabilityOfEntriesOfLedger), + true, statsLogger, null); + auditorRef.setValue(auditor); + CountDownLatch latch = auditor.getLatch(); + assertEquals("REPLICAS_CHECK_TIME SuccessCount", 0, replicasCheckStatsLogger.getSuccessCount()); + urm.setReplicasCheckCTime(-1); + auditor.start(); + /* + * since replicasCheckCTime is set to -1, replicasCheck should be + * scheduled to run with no initialdelay + */ + assertTrue("replicasCheck should have executed", latch.await(20, TimeUnit.SECONDS)); + for (int i = 0; i < 200; i++) { + Thread.sleep(100); + if (replicasCheckStatsLogger.getSuccessCount() >= 1) { + break; + } + } + assertEquals("REPLICAS_CHECK_TIME SuccessCount", 1, replicasCheckStatsLogger.getSuccessCount()); + return statsLogger; + } + + private void setServerConfigProperties(ServerConfiguration servConf) { + servConf.setAuditorPeriodicCheckInterval(0); + servConf.setAuditorPeriodicBookieCheckInterval(0); + servConf.setAuditorPeriodicPlacementPolicyCheckInterval(0); + servConf.setAuditorPeriodicReplicasCheckInterval(1000); + } + + List addAndRegisterBookies(int numOfBookies) + throws BookieException { + BookieId bookieAddress; + List bookieAddresses = new ArrayList(); + for (int i = 0; i < numOfBookies; i++) { + bookieAddress = new BookieSocketAddress("98.98.98." + i, 2181).toBookieId(); + bookieAddresses.add(bookieAddress); + regManager.registerBookie(bookieAddress, false, BookieServiceInfo.EMPTY); + } + return bookieAddresses; + } + + private void createClosedLedgerMetadata(LedgerManager lm, long ledgerId, int ensembleSize, int writeQuorumSize, + int ackQuorumSize, Map> segmentEnsembles, long lastEntryId, int length, + DigestType digestType, byte[] password) throws InterruptedException, ExecutionException { + LedgerMetadataBuilder ledgerMetadataBuilder = LedgerMetadataBuilder.create(); + ledgerMetadataBuilder.withId(ledgerId).withEnsembleSize(ensembleSize).withWriteQuorumSize(writeQuorumSize) + .withAckQuorumSize(ackQuorumSize).withClosedState().withLastEntryId(lastEntryId).withLength(length) + .withDigestType(digestType).withPassword(password); + for (Map.Entry> mapEntry : segmentEnsembles.entrySet()) { + ledgerMetadataBuilder.newEnsembleEntry(mapEntry.getKey(), mapEntry.getValue()); + } + LedgerMetadata initMeta = ledgerMetadataBuilder.build(); + lm.createLedgerMetadata(ledgerId, initMeta).get(); + } + + private void createNonClosedLedgerMetadata(LedgerManager lm, long ledgerId, int ensembleSize, int writeQuorumSize, + int ackQuorumSize, Map> segmentEnsembles, DigestType digestType, + byte[] password) throws InterruptedException, ExecutionException { + LedgerMetadataBuilder ledgerMetadataBuilder = LedgerMetadataBuilder.create(); + ledgerMetadataBuilder.withId(ledgerId).withEnsembleSize(ensembleSize).withWriteQuorumSize(writeQuorumSize) + .withAckQuorumSize(ackQuorumSize).withDigestType(digestType).withPassword(password); + for (Map.Entry> mapEntry : segmentEnsembles.entrySet()) { + ledgerMetadataBuilder.newEnsembleEntry(mapEntry.getKey(), mapEntry.getValue()); + } + LedgerMetadata initMeta = ledgerMetadataBuilder.build(); + lm.createLedgerMetadata(ledgerId, initMeta).get(); + } + + private void runTestScenario(MultiKeyMap returnAvailabilityOfEntriesOfLedger, + MultiKeyMap errorReturnValueForGetAvailabilityOfEntriesOfLedger, + int expectedNumLedgersFoundHavingNoReplicaOfAnEntry, + int expectedNumLedgersHavingLessThanAQReplicasOfAnEntry, + int expectedNumLedgersHavingLessThanWQReplicasOfAnEntry) throws Exception { + ServerConfiguration servConf = new ServerConfiguration(confByIndex(0)); + setServerConfigProperties(servConf); + MutableObject auditorRef = new MutableObject(); + try { + TestStatsLogger statsLogger = startAuditorAndWaitForReplicasCheck(servConf, auditorRef, + returnAvailabilityOfEntriesOfLedger, errorReturnValueForGetAvailabilityOfEntriesOfLedger); + checkReplicasCheckStats(statsLogger, expectedNumLedgersFoundHavingNoReplicaOfAnEntry, + expectedNumLedgersHavingLessThanAQReplicasOfAnEntry, + expectedNumLedgersHavingLessThanWQReplicasOfAnEntry); + } finally { + Auditor auditor = auditorRef.getValue(); + if (auditor != null) { + auditor.close(); + } + } + } + + private void checkReplicasCheckStats(TestStatsLogger statsLogger, + int expectedNumLedgersFoundHavingNoReplicaOfAnEntry, + int expectedNumLedgersHavingLessThanAQReplicasOfAnEntry, + int expectedNumLedgersHavingLessThanWQReplicasOfAnEntry) { + Gauge numLedgersFoundHavingNoReplicaOfAnEntryGuage = statsLogger + .getGauge(ReplicationStats.NUM_LEDGERS_HAVING_NO_REPLICA_OF_AN_ENTRY); + Gauge numLedgersHavingLessThanAQReplicasOfAnEntryGuage = statsLogger + .getGauge(ReplicationStats.NUM_LEDGERS_HAVING_LESS_THAN_AQ_REPLICAS_OF_AN_ENTRY); + Gauge numLedgersHavingLessThanWQReplicasOfAnEntryGuage = statsLogger + .getGauge(ReplicationStats.NUM_LEDGERS_HAVING_LESS_THAN_WQ_REPLICAS_OF_AN_ENTRY); + + assertEquals("NUM_LEDGERS_HAVING_NO_REPLICA_OF_AN_ENTRY guage value", + expectedNumLedgersFoundHavingNoReplicaOfAnEntry, + numLedgersFoundHavingNoReplicaOfAnEntryGuage.getSample()); + assertEquals("NUM_LEDGERS_HAVING_LESS_THAN_AQ_REPLICAS_OF_AN_ENTRY guage value", + expectedNumLedgersHavingLessThanAQReplicasOfAnEntry, + numLedgersHavingLessThanAQReplicasOfAnEntryGuage.getSample()); + assertEquals("NUM_LEDGERS_HAVING_LESS_THAN_WQ_REPLICAS_OF_AN_ENTRY guage value", + expectedNumLedgersHavingLessThanWQReplicasOfAnEntry, + numLedgersHavingLessThanWQReplicasOfAnEntryGuage.getSample()); + } + + /* + * For all the ledgers and for all the bookies, + * asyncGetListOfEntriesOfLedger would return + * BookieHandleNotAvailableException, so these ledgers wouldn't be counted + * against expectedNumLedgersFoundHavingNoReplicaOfAnEntry / + * LessThanAQReplicasOfAnEntry / LessThanWQReplicasOfAnEntry. + */ + @Test + public void testReplicasCheckForBookieHandleNotAvailable() throws Exception { + int numOfBookies = 5; + MultiKeyMap returnAvailabilityOfEntriesOfLedger = + new MultiKeyMap(); + MultiKeyMap errorReturnValueForGetAvailabilityOfEntriesOfLedger = + new MultiKeyMap(); + List bookieAddresses = addAndRegisterBookies(numOfBookies); + + LedgerManagerFactory mFactory = driver.getLedgerManagerFactory(); + LedgerManager lm = mFactory.newLedgerManager(); + int ensembleSize = 5; + int writeQuorumSize = 4; + int ackQuorumSize = 2; + long lastEntryId = 100; + int length = 10000; + DigestType digestType = DigestType.DUMMY; + byte[] password = new byte[0]; + Collections.shuffle(bookieAddresses); + + /* + * closed ledger + * + * for this ledger, for all the bookies we are setting + * errorReturnValueForGetAvailabilityOfEntriesOfLedger to + * BookieHandleNotAvailableException so asyncGetListOfEntriesOfLedger will + * return BookieHandleNotAvailableException. + */ + Map> segmentEnsembles = new LinkedHashMap>(); + segmentEnsembles.put(0L, bookieAddresses); + long ledgerId = 1L; + createClosedLedgerMetadata(lm, ledgerId, ensembleSize, writeQuorumSize, ackQuorumSize, segmentEnsembles, + lastEntryId, length, digestType, password); + for (BookieId bookieSocketAddress : bookieAddresses) { + errorReturnValueForGetAvailabilityOfEntriesOfLedger.put(bookieSocketAddress.toString(), + Long.toString(ledgerId), BKException.Code.BookieHandleNotAvailableException); + } + + ensembleSize = 4; + /* + * closed ledger with multiple segments + * + * for this ledger, for all the bookies we are setting + * errorReturnValueForGetAvailabilityOfEntriesOfLedger to + * BookieHandleNotAvailableException so asyncGetListOfEntriesOfLedger will + * return BookieHandleNotAvailableException. + */ + segmentEnsembles.clear(); + segmentEnsembles.put(0L, bookieAddresses.subList(0, 4)); + segmentEnsembles.put(20L, bookieAddresses.subList(1, 5)); + segmentEnsembles.put(60L, bookieAddresses.subList(0, 4)); + ledgerId = 2L; + createClosedLedgerMetadata(lm, ledgerId, ensembleSize, writeQuorumSize, ackQuorumSize, segmentEnsembles, + lastEntryId, length, digestType, password); + for (BookieId bookieSocketAddress : bookieAddresses) { + errorReturnValueForGetAvailabilityOfEntriesOfLedger.put(bookieSocketAddress.toString(), + Long.toString(ledgerId), BKException.Code.BookieHandleNotAvailableException); + } + + /* + * non-closed ledger + */ + segmentEnsembles.clear(); + segmentEnsembles.put(0L, bookieAddresses.subList(0, 4)); + ledgerId = 3L; + createNonClosedLedgerMetadata(lm, ledgerId, ensembleSize, writeQuorumSize, ackQuorumSize, segmentEnsembles, + digestType, password); + for (BookieId bookieSocketAddress : bookieAddresses) { + errorReturnValueForGetAvailabilityOfEntriesOfLedger.put(bookieSocketAddress.toString(), + Long.toString(ledgerId), BKException.Code.BookieHandleNotAvailableException); + } + + /* + * non-closed ledger with multiple segments + * + */ + segmentEnsembles.clear(); + segmentEnsembles.put(0L, bookieAddresses.subList(0, 4)); + segmentEnsembles.put(20L, bookieAddresses.subList(1, 5)); + segmentEnsembles.put(60L, bookieAddresses.subList(0, 4)); + ledgerId = 4L; + createNonClosedLedgerMetadata(lm, ledgerId, ensembleSize, writeQuorumSize, ackQuorumSize, segmentEnsembles, + digestType, password); + for (BookieId bookieSocketAddress : bookieAddresses) { + errorReturnValueForGetAvailabilityOfEntriesOfLedger.put(bookieSocketAddress.toString(), + Long.toString(ledgerId), BKException.Code.BookieHandleNotAvailableException); + } + + runTestScenario(returnAvailabilityOfEntriesOfLedger, errorReturnValueForGetAvailabilityOfEntriesOfLedger, 0, 0, + 0); + } + + /* + * In this testscenario all the ledgers have a missing entry. So all closed + * ledgers should be counted towards + * numLedgersFoundHavingNoReplicaOfAnEntry. + */ + @Test + public void testReplicasCheckForLedgersFoundHavingNoReplica() throws Exception { + int numOfBookies = 5; + MultiKeyMap returnAvailabilityOfEntriesOfLedger = + new MultiKeyMap(); + MultiKeyMap errorReturnValueForGetAvailabilityOfEntriesOfLedger = + new MultiKeyMap(); + List bookieAddresses = addAndRegisterBookies(numOfBookies); + + LedgerManagerFactory mFactory = driver.getLedgerManagerFactory(); + LedgerManager lm = mFactory.newLedgerManager(); + int ensembleSize = 5; + int writeQuorumSize = 4; + int ackQuorumSize = 2; + long lastEntryId = 100; + int length = 10000; + DigestType digestType = DigestType.DUMMY; + byte[] password = new byte[0]; + Collections.shuffle(bookieAddresses); + + int numLedgersFoundHavingNoReplicaOfAnEntry = 0; + + /* + * closed ledger + * + * for this ledger we are setting returnAvailabilityOfEntriesOfLedger to + * Empty one for all of the bookies, so this ledger would be counted in + * ledgersFoundHavingNoReplicaOfAnEntry . + */ + Map> segmentEnsembles = new LinkedHashMap>(); + segmentEnsembles.put(0L, bookieAddresses); + long ledgerId = 1L; + createClosedLedgerMetadata(lm, ledgerId, ensembleSize, writeQuorumSize, ackQuorumSize, segmentEnsembles, + lastEntryId, length, digestType, password); + for (BookieId bookieSocketAddress : bookieAddresses) { + returnAvailabilityOfEntriesOfLedger.put(bookieSocketAddress.toString(), Long.toString(ledgerId), + AvailabilityOfEntriesOfLedger.EMPTY_AVAILABILITYOFENTRIESOFLEDGER); + } + numLedgersFoundHavingNoReplicaOfAnEntry++; + + ensembleSize = 4; + /* + * closed ledger with multiple segments + * + * for this ledger we are setting + * errorReturnValueForGetAvailabilityOfEntriesOfLedger to + * NoSuchLedgerExistsException. This is equivalent to + * EMPTY_AVAILABILITYOFENTRIESOFLEDGER. So this ledger would be counted + * in ledgersFoundHavingNoReplicaOfAnEntry + */ + segmentEnsembles.clear(); + segmentEnsembles.put(0L, bookieAddresses.subList(0, 4)); + segmentEnsembles.put(20L, bookieAddresses.subList(1, 5)); + segmentEnsembles.put(60L, bookieAddresses.subList(0, 4)); + ledgerId = 2L; + createClosedLedgerMetadata(lm, ledgerId, ensembleSize, writeQuorumSize, ackQuorumSize, segmentEnsembles, + lastEntryId, length, digestType, password); + for (BookieId bookieSocketAddress : bookieAddresses) { + errorReturnValueForGetAvailabilityOfEntriesOfLedger.put(bookieSocketAddress.toString(), + Long.toString(ledgerId), BKException.Code.NoSuchLedgerExistsException); + } + numLedgersFoundHavingNoReplicaOfAnEntry++; + + /* + * non-closed ledger + * + * since this is non-closed ledger, it should not be counted in + * ledgersFoundHavingNoReplicaOfAnEntry + */ + segmentEnsembles.clear(); + segmentEnsembles.put(0L, bookieAddresses.subList(0, 4)); + ledgerId = 3L; + createNonClosedLedgerMetadata(lm, ledgerId, ensembleSize, writeQuorumSize, ackQuorumSize, segmentEnsembles, + digestType, password); + for (BookieId bookieSocketAddress : bookieAddresses) { + returnAvailabilityOfEntriesOfLedger.put(bookieSocketAddress.toString(), Long.toString(ledgerId), + AvailabilityOfEntriesOfLedger.EMPTY_AVAILABILITYOFENTRIESOFLEDGER); + } + + ensembleSize = 3; + writeQuorumSize = 3; + ackQuorumSize = 2; + lastEntryId = 1; + length = 1000; + /* + * closed ledger + * + * for this ledger we are setting returnAvailabilityOfEntriesOfLedger to + * just {0l} for all of the bookies and entry 1l is missing for all of + * the bookies, so this ledger would be counted in + * ledgersFoundHavingNoReplicaOfAnEntry + */ + segmentEnsembles.clear(); + segmentEnsembles.put(0L, bookieAddresses.subList(0, 3)); + ledgerId = 4L; + createClosedLedgerMetadata(lm, ledgerId, ensembleSize, writeQuorumSize, ackQuorumSize, segmentEnsembles, + lastEntryId, length, digestType, password); + for (BookieId bookieSocketAddress : bookieAddresses) { + returnAvailabilityOfEntriesOfLedger.put(bookieSocketAddress.toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0L })); + } + numLedgersFoundHavingNoReplicaOfAnEntry++; + + /* + * For this closed ledger, entry 1 is missing. So it should be counted + * towards numLedgersFoundHavingNoReplicaOfAnEntry. + */ + ensembleSize = 4; + writeQuorumSize = 3; + ackQuorumSize = 2; + lastEntryId = 3; + length = 10000; + segmentEnsembles.put(0L, bookieAddresses.subList(0, 4)); + ledgerId = 5L; + createClosedLedgerMetadata(lm, ledgerId, ensembleSize, writeQuorumSize, ackQuorumSize, segmentEnsembles, + lastEntryId, length, digestType, password); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(0).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(1).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 3 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(2).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(3).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 2, 3 })); + numLedgersFoundHavingNoReplicaOfAnEntry++; + + runTestScenario(returnAvailabilityOfEntriesOfLedger, errorReturnValueForGetAvailabilityOfEntriesOfLedger, + numLedgersFoundHavingNoReplicaOfAnEntry, 0, 0); + } + + /* + * In this testscenario all the ledgers have an entry with less than AQ + * number of copies. So all closed ledgers should be counted towards + * numLedgersFoundHavingLessThanAQReplicasOfAnEntry. + */ + @Test + public void testReplicasCheckForLedgersFoundHavingLessThanAQReplicasOfAnEntry() throws Exception { + int numOfBookies = 5; + MultiKeyMap returnAvailabilityOfEntriesOfLedger = + new MultiKeyMap(); + MultiKeyMap errorReturnValueForGetAvailabilityOfEntriesOfLedger = + new MultiKeyMap(); + List bookieAddresses = addAndRegisterBookies(numOfBookies); + + LedgerManagerFactory mFactory = driver.getLedgerManagerFactory(); + LedgerManager lm = mFactory.newLedgerManager(); + DigestType digestType = DigestType.DUMMY; + byte[] password = new byte[0]; + Collections.shuffle(bookieAddresses); + + int numLedgersFoundHavingLessThanAQReplicasOfAnEntry = 0; + + /* + * closed ledger + * + * for this ledger there is only one copy of entry 2, so this ledger + * would be counted towards + * ledgersFoundHavingLessThanAQReplicasOfAnEntry. + */ + Map> segmentEnsembles = new LinkedHashMap>(); + int ensembleSize = 4; + int writeQuorumSize = 3; + int ackQuorumSize = 2; + long lastEntryId = 3; + int length = 10000; + segmentEnsembles.put(0L, bookieAddresses.subList(0, 4)); + long ledgerId = 1L; + createClosedLedgerMetadata(lm, ledgerId, ensembleSize, writeQuorumSize, ackQuorumSize, segmentEnsembles, + lastEntryId, length, digestType, password); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(0).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(1).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 1, 3 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(2).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 1 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(3).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 1, 2, 3 })); + numLedgersFoundHavingLessThanAQReplicasOfAnEntry++; + + /* + * closed ledger with multiple segments. + * + * for this ledger there is only one copy of entry 2, so this ledger + * would be counted towards + * ledgersFoundHavingLessThanAQReplicasOfAnEntry. + * + */ + segmentEnsembles.clear(); + segmentEnsembles.put(0L, bookieAddresses.subList(0, 4)); + segmentEnsembles.put(2L, bookieAddresses.subList(1, 5)); + ledgerId = 2L; + createClosedLedgerMetadata(lm, ledgerId, ensembleSize, writeQuorumSize, ackQuorumSize, segmentEnsembles, + lastEntryId, length, digestType, password); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(0).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] {})); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(1).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 1, 2, 3 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(2).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 1, 3 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(3).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 1 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(4).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 3 })); + numLedgersFoundHavingLessThanAQReplicasOfAnEntry++; + + /* + * closed ledger with multiple segments + * + * for this ledger entry 2 is overrreplicated, but it has only one copy + * in the set of bookies it is supposed to be. So it should be counted + * towards ledgersFoundHavingLessThanAQReplicasOfAnEntry. + */ + segmentEnsembles.clear(); + segmentEnsembles.put(0L, bookieAddresses.subList(0, 4)); + segmentEnsembles.put(2L, bookieAddresses.subList(1, 5)); + ledgerId = 3L; + createClosedLedgerMetadata(lm, ledgerId, ensembleSize, writeQuorumSize, ackQuorumSize, segmentEnsembles, + lastEntryId, length, digestType, password); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(0).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 2 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(1).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 1, 2, 3 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(2).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 1, 3 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(3).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 1 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(4).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 3 })); + numLedgersFoundHavingLessThanAQReplicasOfAnEntry++; + + /* + * non-closed ledger + * + * since this is non-closed ledger, it should not be counted towards + * ledgersFoundHavingLessThanAQReplicasOfAnEntry + */ + segmentEnsembles.clear(); + segmentEnsembles.put(0L, bookieAddresses.subList(0, 4)); + segmentEnsembles.put(2L, bookieAddresses.subList(1, 5)); + ledgerId = 4L; + createNonClosedLedgerMetadata(lm, ledgerId, ensembleSize, writeQuorumSize, ackQuorumSize, segmentEnsembles, + digestType, password); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(0).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] {})); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(1).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 1, 2, 3 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(2).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 1, 3 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(3).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 1 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(4).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 3 })); + + /* + * this is closed ledger. + * + * For third bookie, asyncGetListOfEntriesOfLedger will fail with + * BookieHandleNotAvailableException, so this should not be counted + * against missing copies of an entry. Other than that, for both entries + * 0 and 1, two copies are missing. Hence this should be counted towards + * numLedgersFoundHavingLessThanAQReplicasOfAnEntry. + */ + ensembleSize = 3; + writeQuorumSize = 3; + ackQuorumSize = 2; + lastEntryId = 1; + length = 1000; + segmentEnsembles.clear(); + segmentEnsembles.put(0L, bookieAddresses.subList(0, 3)); + ledgerId = 5L; + createClosedLedgerMetadata(lm, ledgerId, ensembleSize, writeQuorumSize, ackQuorumSize, segmentEnsembles, + lastEntryId, length, digestType, password); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(0).toString(), Long.toString(ledgerId), + AvailabilityOfEntriesOfLedger.EMPTY_AVAILABILITYOFENTRIESOFLEDGER); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(1).toString(), Long.toString(ledgerId), + AvailabilityOfEntriesOfLedger.EMPTY_AVAILABILITYOFENTRIESOFLEDGER); + errorReturnValueForGetAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(2).toString(), + Long.toString(ledgerId), BKException.Code.BookieHandleNotAvailableException); + numLedgersFoundHavingLessThanAQReplicasOfAnEntry++; + + runTestScenario(returnAvailabilityOfEntriesOfLedger, errorReturnValueForGetAvailabilityOfEntriesOfLedger, 0, + numLedgersFoundHavingLessThanAQReplicasOfAnEntry, 0); + } + + /* + * In this testscenario all the ledgers have an entry with less than WQ + * number of copies but greater than AQ. So all closed ledgers should be + * counted towards numLedgersFoundHavingLessThanWQReplicasOfAnEntry. + */ + @Test + public void testReplicasCheckForLedgersFoundHavingLessThanWQReplicasOfAnEntry() throws Exception { + int numOfBookies = 5; + MultiKeyMap returnAvailabilityOfEntriesOfLedger = + new MultiKeyMap(); + MultiKeyMap errorReturnValueForGetAvailabilityOfEntriesOfLedger = + new MultiKeyMap(); + List bookieAddresses = addAndRegisterBookies(numOfBookies); + + LedgerManagerFactory mFactory = driver.getLedgerManagerFactory(); + LedgerManager lm = mFactory.newLedgerManager(); + DigestType digestType = DigestType.DUMMY; + byte[] password = new byte[0]; + Collections.shuffle(bookieAddresses); + + int numLedgersFoundHavingLessThanWQReplicasOfAnEntry = 0; + + /* + * closed ledger + * + * for this ledger a copy of entry 3, so this ledger would be counted + * towards ledgersFoundHavingLessThanWQReplicasOfAnEntry. + */ + Map> segmentEnsembles = new LinkedHashMap>(); + int ensembleSize = 4; + int writeQuorumSize = 3; + int ackQuorumSize = 2; + long lastEntryId = 3; + int length = 10000; + segmentEnsembles.put(0L, bookieAddresses.subList(0, 4)); + long ledgerId = 1L; + createClosedLedgerMetadata(lm, ledgerId, ensembleSize, writeQuorumSize, ackQuorumSize, segmentEnsembles, + lastEntryId, length, digestType, password); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(0).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 2 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(1).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 1, 3 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(2).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 1 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(3).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 1, 2, 3 })); + numLedgersFoundHavingLessThanWQReplicasOfAnEntry++; + + /* + * closed ledger with multiple segments + * + * for this ledger a copy of entry 0 and entry 2 are missing, so this + * ledger would be counted towards + * ledgersFoundHavingLessThanWQReplicasOfAnEntry. + */ + segmentEnsembles.clear(); + segmentEnsembles.put(0L, bookieAddresses.subList(0, 4)); + segmentEnsembles.put(2L, bookieAddresses.subList(1, 5)); + ledgerId = 2L; + createClosedLedgerMetadata(lm, ledgerId, ensembleSize, writeQuorumSize, ackQuorumSize, segmentEnsembles, + lastEntryId, length, digestType, password); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(0).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] {})); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(1).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 1, 2, 3 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(2).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 1, 3 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(3).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 1 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(4).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 2, 3 })); + numLedgersFoundHavingLessThanWQReplicasOfAnEntry++; + + /* + * non-closed ledger with multiple segments + * + * since this is non-closed ledger, it should not be counted towards + * ledgersFoundHavingLessThanWQReplicasOfAnEntry + */ + segmentEnsembles.clear(); + segmentEnsembles.put(0L, bookieAddresses.subList(0, 4)); + segmentEnsembles.put(2L, bookieAddresses.subList(1, 5)); + ledgerId = 3L; + createNonClosedLedgerMetadata(lm, ledgerId, ensembleSize, writeQuorumSize, ackQuorumSize, segmentEnsembles, + digestType, password); + errorReturnValueForGetAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(0).toString(), + Long.toString(ledgerId), BKException.Code.NoSuchLedgerExistsException); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(1).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 1, 2, 3 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(2).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 1, 3 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(3).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 1 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(4).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 2, 3 })); + + /* + * closed ledger. + * + * for this ledger entry 0 is overrreplicated, but a copy is missing in + * the set of bookies it is supposed to be. So it should be counted + * towards ledgersFoundHavingLessThanWQReplicasOfAnEntry. + */ + ensembleSize = 4; + writeQuorumSize = 3; + ackQuorumSize = 2; + lastEntryId = 1; + length = 1000; + segmentEnsembles.clear(); + segmentEnsembles.put(0L, bookieAddresses.subList(0, 4)); + ledgerId = 4L; + createClosedLedgerMetadata(lm, ledgerId, ensembleSize, writeQuorumSize, ackQuorumSize, segmentEnsembles, + lastEntryId, length, digestType, password); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(0).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 1 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(1).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 1, 2, 3 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(2).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 1, 3 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(3).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0 })); + numLedgersFoundHavingLessThanWQReplicasOfAnEntry++; + + /* + * this is closed ledger. + * + * For third bookie, asyncGetListOfEntriesOfLedger will fail with + * BookieHandleNotAvailableException, so this should not be counted + * against missing copies of an entry. Other than that, for both entries + * 0 and 1, a copy is missing. Hence this should be counted towards + * numLedgersFoundHavingLessThanWQReplicasOfAnEntry. + */ + ensembleSize = 3; + writeQuorumSize = 3; + ackQuorumSize = 2; + lastEntryId = 1; + length = 1000; + segmentEnsembles.clear(); + segmentEnsembles.put(0L, bookieAddresses.subList(0, 3)); + ledgerId = 5L; + createClosedLedgerMetadata(lm, ledgerId, ensembleSize, writeQuorumSize, ackQuorumSize, segmentEnsembles, + lastEntryId, length, digestType, password); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(0).toString(), Long.toString(ledgerId), + AvailabilityOfEntriesOfLedger.EMPTY_AVAILABILITYOFENTRIESOFLEDGER); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(1).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 1 })); + errorReturnValueForGetAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(2).toString(), + Long.toString(ledgerId), BKException.Code.BookieHandleNotAvailableException); + numLedgersFoundHavingLessThanWQReplicasOfAnEntry++; + + runTestScenario(returnAvailabilityOfEntriesOfLedger, errorReturnValueForGetAvailabilityOfEntriesOfLedger, 0, 0, + numLedgersFoundHavingLessThanWQReplicasOfAnEntry); + } + + /* + * In this testscenario all the ledgers have empty segments. + */ + @Test + public void testReplicasCheckForLedgersWithEmptySegments() throws Exception { + int numOfBookies = 5; + MultiKeyMap returnAvailabilityOfEntriesOfLedger = + new MultiKeyMap(); + MultiKeyMap errorReturnValueForGetAvailabilityOfEntriesOfLedger = + new MultiKeyMap(); + List bookieAddresses = addAndRegisterBookies(numOfBookies); + + LedgerManagerFactory mFactory = driver.getLedgerManagerFactory(); + LedgerManager lm = mFactory.newLedgerManager(); + DigestType digestType = DigestType.DUMMY; + byte[] password = new byte[0]; + Collections.shuffle(bookieAddresses); + + int numLedgersFoundHavingNoReplicaOfAnEntry = 0; + int numLedgersFoundHavingLessThanAQReplicasOfAnEntry = 0; + int numLedgersFoundHavingLessThanWQReplicasOfAnEntry = 0; + + /* + * closed ledger. + * + * This closed Ledger has no entry. So it should not be counted towards + * numLedgersFoundHavingNoReplicaOfAnEntry/LessThanAQReplicasOfAnEntry + * /WQReplicasOfAnEntry. + */ + Map> segmentEnsembles = new LinkedHashMap>(); + int ensembleSize = 4; + int writeQuorumSize = 3; + int ackQuorumSize = 2; + long lastEntryId = -1L; + int length = 0; + segmentEnsembles.put(0L, bookieAddresses.subList(0, 4)); + long ledgerId = 1L; + createClosedLedgerMetadata(lm, ledgerId, ensembleSize, writeQuorumSize, ackQuorumSize, segmentEnsembles, + lastEntryId, length, digestType, password); + + /* + * closed ledger with multiple segments. + * + * This ledger has empty last segment, but all the entries have + * writeQuorumSize number of copies, So it should not be counted towards + * numLedgersFoundHavingNoReplicaOfAnEntry/LessThanAQReplicasOfAnEntry/ + * WQReplicasOfAnEntry. + */ + lastEntryId = 2; + segmentEnsembles.clear(); + segmentEnsembles.put(0L, bookieAddresses.subList(0, 4)); + segmentEnsembles.put((lastEntryId + 1), bookieAddresses.subList(1, 5)); + ledgerId = 2L; + createClosedLedgerMetadata(lm, ledgerId, ensembleSize, writeQuorumSize, ackQuorumSize, segmentEnsembles, + lastEntryId, length, digestType, password); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(0).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 2 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(1).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 1 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(2).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 1, 2 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(3).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 1, 2 })); + + /* + * Closed ledger with multiple segments. + * + * Segment0, Segment1, Segment3, Segment5 and Segment6 are empty. + * Entries from entryid 3 are missing. So it should be counted towards + * numLedgersFoundHavingNoReplicaOfAnEntry. + */ + lastEntryId = 5; + segmentEnsembles.clear(); + segmentEnsembles.put(0L, bookieAddresses.subList(1, 5)); + segmentEnsembles.put(0L, bookieAddresses.subList(0, 4)); + segmentEnsembles.put(0L, bookieAddresses.subList(0, 4)); + segmentEnsembles.put(4L, bookieAddresses.subList(1, 5)); + segmentEnsembles.put(4L, bookieAddresses.subList(0, 4)); + segmentEnsembles.put((lastEntryId + 1), bookieAddresses.subList(1, 5)); + segmentEnsembles.put((lastEntryId + 1), bookieAddresses.subList(0, 4)); + ledgerId = 3L; + createClosedLedgerMetadata(lm, ledgerId, ensembleSize, writeQuorumSize, ackQuorumSize, segmentEnsembles, + lastEntryId, length, digestType, password); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(0).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 2 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(1).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 1 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(2).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 1, 2 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(3).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 1, 2 })); + numLedgersFoundHavingNoReplicaOfAnEntry++; + + /* + * non-closed ledger with multiple segments + * + * since this is non-closed ledger, it should not be counted towards + * ledgersFoundHavingLessThanWQReplicasOfAnEntry + */ + lastEntryId = 2; + segmentEnsembles.clear(); + segmentEnsembles.put(0L, bookieAddresses.subList(0, 4)); + segmentEnsembles.put(0L, bookieAddresses.subList(1, 5)); + segmentEnsembles.put((lastEntryId + 1), bookieAddresses.subList(1, 5)); + ledgerId = 4L; + createNonClosedLedgerMetadata(lm, ledgerId, ensembleSize, writeQuorumSize, ackQuorumSize, segmentEnsembles, + digestType, password); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(0).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 2 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(1).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 1 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(2).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 0, 1, 2 })); + returnAvailabilityOfEntriesOfLedger.put(bookieAddresses.get(3).toString(), Long.toString(ledgerId), + new AvailabilityOfEntriesOfLedger(new long[] { 1, 2 })); + + runTestScenario(returnAvailabilityOfEntriesOfLedger, errorReturnValueForGetAvailabilityOfEntriesOfLedger, + numLedgersFoundHavingNoReplicaOfAnEntry, numLedgersFoundHavingLessThanAQReplicasOfAnEntry, + numLedgersFoundHavingLessThanWQReplicasOfAnEntry); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorRollingRestartTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorRollingRestartTest.java index 632a31b43df..6f734a38429 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorRollingRestartTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorRollingRestartTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -24,12 +24,14 @@ import static org.junit.Assert.assertEquals; import com.google.common.util.concurrent.UncheckedExecutionException; +import lombok.Cleanup; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.client.LedgerHandle; import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.meta.LedgerAuditorManager; import org.apache.bookkeeper.meta.LedgerManagerFactory; import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.apache.bookkeeper.test.TestCallbacks; import org.junit.Test; @@ -51,7 +53,7 @@ public AuditorRollingRestartTest() { @Test public void testAuditingDuringRollingRestart() throws Exception { runFunctionWithLedgerManagerFactory( - bsConfs.get(0), + confByIndex(0), mFactory -> { try { testAuditingDuringRollingRestart(mFactory); @@ -77,7 +79,9 @@ private void testAuditingDuringRollingRestart(LedgerManagerFactory mFactory) thr underReplicationManager.pollLedgerToRereplicate(), -1); underReplicationManager.disableLedgerReplication(); - BookieSocketAddress auditor = AuditorElector.getCurrentAuditor(baseConf, zkc); + @Cleanup + LedgerAuditorManager lam = mFactory.newLedgerAuditorManager(); + BookieId auditor = lam.getCurrentAuditor(); ServerConfiguration conf = killBookie(auditor); Thread.sleep(2000); startBookie(conf); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuthAutoRecoveryTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuthAutoRecoveryTest.java index 8e672fdb934..33557b99d21 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuthAutoRecoveryTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuthAutoRecoveryTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -89,7 +89,7 @@ public AuthAutoRecoveryTest() { */ @Test public void testAuthClientRole() throws Exception { - ServerConfiguration config = bsConfs.get(0); + ServerConfiguration config = confByIndex(0); assertEquals(AuditorClientAuthInterceptorFactory.class.getName(), config.getClientAuthProviderFactoryClass()); AutoRecoveryMain main = new AutoRecoveryMain(config); try { diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AutoRecoveryMainTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AutoRecoveryMainTest.java index 2970d3c5665..a0e795034e7 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AutoRecoveryMainTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AutoRecoveryMainTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,11 +20,20 @@ */ package org.apache.bookkeeper.replication; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import java.io.IOException; +import java.util.concurrent.TimeUnit; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.meta.zk.ZKMetadataClientDriver; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; - +import org.apache.bookkeeper.util.TestUtils; +import org.apache.zookeeper.ZooKeeper; +import org.awaitility.Awaitility; import org.junit.Test; /** @@ -41,7 +50,7 @@ public AutoRecoveryMainTest() { */ @Test public void testStartup() throws Exception { - AutoRecoveryMain main = new AutoRecoveryMain(bsConfs.get(0)); + AutoRecoveryMain main = new AutoRecoveryMain(confByIndex(0)); try { main.start(); Thread.sleep(500); @@ -59,7 +68,7 @@ public void testStartup() throws Exception { */ @Test public void testShutdown() throws Exception { - AutoRecoveryMain main = new AutoRecoveryMain(bsConfs.get(0)); + AutoRecoveryMain main = new AutoRecoveryMain(confByIndex(0)); main.start(); Thread.sleep(500); assertTrue("AuditorElector should be running", @@ -75,36 +84,129 @@ public void testShutdown() throws Exception { } /** - * Test that, if an autorecovery looses its ZK connection/session - * it will shutdown. + * Test that, if an autorecovery looses its ZK connection/session it will + * shutdown. */ @Test public void testAutoRecoverySessionLoss() throws Exception { - AutoRecoveryMain main1 = new AutoRecoveryMain(bsConfs.get(0)); - AutoRecoveryMain main2 = new AutoRecoveryMain(bsConfs.get(1)); - main1.start(); - main2.start(); - Thread.sleep(500); - assertTrue("AuditorElectors should be running", - main1.auditorElector.isRunning() && main2.auditorElector.isRunning()); - assertTrue("Replication workers should be running", - main1.replicationWorker.isRunning() && main2.replicationWorker.isRunning()); - - zkUtil.expireSession(main1.zk); - zkUtil.expireSession(main2.zk); - - for (int i = 0; i < 10; i++) { // give it 10 seconds to shutdown - if (!main1.auditorElector.isRunning() - && !main2.auditorElector.isRunning() - && !main1.replicationWorker.isRunning() - && !main2.replicationWorker.isRunning()) { + /* + * initialize three AutoRecovery instances. + */ + AutoRecoveryMain main1 = new AutoRecoveryMain(confByIndex(0)); + AutoRecoveryMain main2 = new AutoRecoveryMain(confByIndex(1)); + AutoRecoveryMain main3 = new AutoRecoveryMain(confByIndex(2)); + + /* + * start main1, make sure all the components are started and main1 is + * the current Auditor + */ + ZKMetadataClientDriver zkMetadataClientDriver1 = startAutoRecoveryMain(main1); + ZooKeeper zk1 = zkMetadataClientDriver1.getZk(); + + // Wait until auditor gets elected + for (int i = 0; i < 10; i++) { + try { + if (main1.auditorElector.getCurrentAuditor() != null) { + break; + } else { + Thread.sleep(1000); + } + } catch (IOException e) { + Thread.sleep(1000); + } + } + BookieId currentAuditor = main1.auditorElector.getCurrentAuditor(); + assertNotNull(currentAuditor); + Auditor auditor1 = main1.auditorElector.getAuditor(); + assertEquals("Current Auditor should be AR1", currentAuditor, BookieImpl.getBookieId(confByIndex(0))); + Awaitility.waitAtMost(30, TimeUnit.SECONDS).untilAsserted(() -> { + assertNotNull(auditor1); + assertTrue("Auditor of AR1 should be running", auditor1.isRunning()); + }); + + + /* + * start main2 and main3 + */ + ZKMetadataClientDriver zkMetadataClientDriver2 = startAutoRecoveryMain(main2); + ZooKeeper zk2 = zkMetadataClientDriver2.getZk(); + ZKMetadataClientDriver zkMetadataClientDriver3 = startAutoRecoveryMain(main3); + ZooKeeper zk3 = zkMetadataClientDriver3.getZk(); + + /* + * make sure AR1 is still the current Auditor and AR2's and AR3's + * auditors are not running. + */ + assertEquals("Current Auditor should still be AR1", currentAuditor, BookieImpl.getBookieId(confByIndex(0))); + Awaitility.await().untilAsserted(() -> { + assertTrue("AR2's Auditor should not be running", (main2.auditorElector.getAuditor() == null + || !main2.auditorElector.getAuditor().isRunning())); + assertTrue("AR3's Auditor should not be running", (main3.auditorElector.getAuditor() == null + || !main3.auditorElector.getAuditor().isRunning())); + }); + + + /* + * expire zk2 and zk1 sessions. + */ + zkUtil.expireSession(zk2); + zkUtil.expireSession(zk1); + + /* + * wait for some time for all the components of AR1 and AR2 are + * shutdown. + */ + for (int i = 0; i < 10; i++) { + if (!main1.auditorElector.isRunning() && !main1.replicationWorker.isRunning() + && !main1.isAutoRecoveryRunning() && !main2.auditorElector.isRunning() + && !main2.replicationWorker.isRunning() && !main2.isAutoRecoveryRunning()) { break; } Thread.sleep(1000); } - assertFalse("Elector1 should have shutdown", main1.auditorElector.isRunning()); - assertFalse("Elector2 should have shutdown", main2.auditorElector.isRunning()); - assertFalse("RW1 should have shutdown", main1.replicationWorker.isRunning()); - assertFalse("RW2 should have shutdown", main2.replicationWorker.isRunning()); + + /* + * the AR3 should be current auditor. + */ + currentAuditor = main3.auditorElector.getCurrentAuditor(); + assertEquals("Current Auditor should be AR3", currentAuditor, BookieImpl.getBookieId(confByIndex(2))); + Awaitility.await().untilAsserted(() -> { + assertNotNull(main3.auditorElector.getAuditor()); + assertTrue("Auditor of AR3 should be running", main3.auditorElector.getAuditor().isRunning()); + }); + + Awaitility.await().untilAsserted(() -> { + /* + * since AR3 is current auditor, AR1's auditor should not be running + * anymore. + */ + assertFalse("AR1's auditor should not be running", auditor1.isRunning()); + + /* + * components of AR2 and AR3 should not be running since zk1 and zk2 + * sessions are expired. + */ + assertFalse("Elector1 should have shutdown", main1.auditorElector.isRunning()); + assertFalse("RW1 should have shutdown", main1.replicationWorker.isRunning()); + assertFalse("AR1 should have shutdown", main1.isAutoRecoveryRunning()); + assertFalse("Elector2 should have shutdown", main2.auditorElector.isRunning()); + assertFalse("RW2 should have shutdown", main2.replicationWorker.isRunning()); + assertFalse("AR2 should have shutdown", main2.isAutoRecoveryRunning()); + }); + + } + + /* + * start autoRecoveryMain and make sure all its components are running and + * myVote node is existing + */ + ZKMetadataClientDriver startAutoRecoveryMain(AutoRecoveryMain autoRecoveryMain) throws Exception { + autoRecoveryMain.start(); + ZKMetadataClientDriver metadataClientDriver = (ZKMetadataClientDriver) autoRecoveryMain.bkc + .getMetadataClientDriver(); + TestUtils.assertEventuallyTrue("autoRecoveryMain components should be running", + () -> autoRecoveryMain.auditorElector.isRunning() + && autoRecoveryMain.replicationWorker.isRunning() && autoRecoveryMain.isAutoRecoveryRunning()); + return metadataClientDriver; } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/BookieAutoRecoveryTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/BookieAutoRecoveryTest.java index 4929b0ecc27..ccb262ed268 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/BookieAutoRecoveryTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/BookieAutoRecoveryTest.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -36,7 +36,6 @@ import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.client.BookKeeperTestClient; import org.apache.bookkeeper.client.LedgerHandle; -import org.apache.bookkeeper.client.LedgerHandleAdapter; import org.apache.bookkeeper.common.util.OrderedScheduler; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.meta.LedgerManager; @@ -45,7 +44,7 @@ import org.apache.bookkeeper.meta.MetadataClientDriver; import org.apache.bookkeeper.meta.MetadataDrivers; import org.apache.bookkeeper.meta.ZkLedgerUnderreplicationManager; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookieServer; import org.apache.bookkeeper.replication.ReplicationException.CompatibilityException; import org.apache.bookkeeper.replication.ReplicationException.UnavailableException; @@ -150,8 +149,7 @@ public void testOpenLedgers() throws Exception { List listOfLedgerHandle = createLedgersAndAddEntries(1, 5); LedgerHandle lh = listOfLedgerHandle.get(0); int ledgerReplicaIndex = 0; - BookieSocketAddress replicaToKillAddr = LedgerHandleAdapter - .getLedgerMetadata(lh).getEnsembles().get(0L).get(0); + BookieId replicaToKillAddr = lh.getLedgerMetadata().getAllEnsembles().get(0L).get(0); final String urLedgerZNode = getUrLedgerZNode(lh); ledgerReplicaIndex = getReplicaIndexInLedger(lh, replicaToKillAddr); @@ -174,11 +172,13 @@ public void testOpenLedgers() throws Exception { // starting the replication service, so that he will be able to act as // target bookie startNewBookie(); - int newBookieIndex = bs.size() - 1; - BookieServer newBookieServer = bs.get(newBookieIndex); + int newBookieIndex = lastBookieIndex(); + BookieServer newBookieServer = serverByIndex(newBookieIndex); - LOG.debug("Waiting to finish the replication of failed bookie : " - + replicaToKillAddr); + if (LOG.isDebugEnabled()) { + LOG.debug("Waiting to finish the replication of failed bookie : " + + replicaToKillAddr); + } latch.await(); // grace period to update the urledger metadata in zookeeper @@ -199,8 +199,7 @@ public void testClosedLedgers() throws Exception { closeLedgers(listOfLedgerHandle); LedgerHandle lhandle = listOfLedgerHandle.get(0); int ledgerReplicaIndex = 0; - BookieSocketAddress replicaToKillAddr = LedgerHandleAdapter - .getLedgerMetadata(lhandle).getEnsembles().get(0L).get(0); + BookieId replicaToKillAddr = lhandle.getLedgerMetadata().getAllEnsembles().get(0L).get(0); CountDownLatch latch = new CountDownLatch(listOfLedgerHandle.size()); for (LedgerHandle lh : listOfLedgerHandle) { @@ -229,11 +228,13 @@ public void testClosedLedgers() throws Exception { // starting the replication service, so that he will be able to act as // target bookie startNewBookie(); - int newBookieIndex = bs.size() - 1; - BookieServer newBookieServer = bs.get(newBookieIndex); + int newBookieIndex = lastBookieIndex(); + BookieServer newBookieServer = serverByIndex(newBookieIndex); - LOG.debug("Waiting to finish the replication of failed bookie : " - + replicaToKillAddr); + if (LOG.isDebugEnabled()) { + LOG.debug("Waiting to finish the replication of failed bookie : " + + replicaToKillAddr); + } // waiting to finish replication latch.await(); @@ -261,8 +262,7 @@ public void testStopWhileReplicationInProgress() throws Exception { numberOfLedgers, 5); closeLedgers(listOfLedgerHandle); LedgerHandle handle = listOfLedgerHandle.get(0); - BookieSocketAddress replicaToKillAddr = LedgerHandleAdapter - .getLedgerMetadata(handle).getEnsembles().get(0L).get(0); + BookieId replicaToKillAddr = handle.getLedgerMetadata().getAllEnsembles().get(0L).get(0); LOG.info("Killing Bookie:" + replicaToKillAddr); // Each ledger, there will be two events : create urLedger and after @@ -297,11 +297,13 @@ public void testStopWhileReplicationInProgress() throws Exception { // starting the replication service, so that he will be able to act as // target bookie startNewBookie(); - int newBookieIndex = bs.size() - 1; - BookieServer newBookieServer = bs.get(newBookieIndex); + int newBookieIndex = lastBookieIndex(); + BookieServer newBookieServer = serverByIndex(newBookieIndex); - LOG.debug("Waiting to finish the replication of failed bookie : " - + replicaToKillAddr); + if (LOG.isDebugEnabled()) { + LOG.debug("Waiting to finish the replication of failed bookie : " + + replicaToKillAddr); + } while (true) { if (latch.getCount() < numberOfLedgers || latch.getCount() <= 0) { stopReplicationService(); @@ -340,13 +342,13 @@ public void testNoSuchLedgerExists() throws Exception { assertNull("UrLedger already exists!", watchUrLedgerNode(getUrLedgerZNode(lh), latch)); } - BookieSocketAddress replicaToKillAddr = LedgerHandleAdapter - .getLedgerMetadata(listOfLedgerHandle.get(0)).getEnsembles() - .get(0L).get(0); + BookieId replicaToKillAddr = listOfLedgerHandle.get(0) + .getLedgerMetadata().getAllEnsembles() + .get(0L).get(0); killBookie(replicaToKillAddr); - replicaToKillAddr = LedgerHandleAdapter - .getLedgerMetadata(listOfLedgerHandle.get(0)).getEnsembles() - .get(0L).get(0); + replicaToKillAddr = listOfLedgerHandle.get(0) + .getLedgerMetadata().getAllEnsembles() + .get(0L).get(0); killBookie(replicaToKillAddr); // waiting to publish urLedger znode by Auditor latch.await(); @@ -383,11 +385,11 @@ public void testEmptyLedgerLosesQuorumEventually() throws Exception { String urZNode = getUrLedgerZNode(lh); watchUrLedgerNode(urZNode, latch); - BookieSocketAddress replicaToKill = LedgerHandleAdapter - .getLedgerMetadata(lh).getEnsembles().get(0L).get(2); + BookieId replicaToKill = lh.getLedgerMetadata().getAllEnsembles().get(0L).get(2); LOG.info("Killing last bookie, {}, in ensemble {}", replicaToKill, - LedgerHandleAdapter.getLedgerMetadata(lh).getEnsembles().get(0L)); + lh.getLedgerMetadata().getAllEnsembles().get(0L)); killBookie(replicaToKill); + startNewBookie(); getAuditor(10, TimeUnit.SECONDS).submitAuditTask().get(); // ensure auditor runs @@ -395,13 +397,12 @@ public void testEmptyLedgerLosesQuorumEventually() throws Exception { latch = new CountDownLatch(1); Stat s = watchUrLedgerNode(urZNode, latch); // should be marked as replicated if (s != null) { - assertTrue("Should be marked as replicated", latch.await(10, TimeUnit.SECONDS)); + assertTrue("Should be marked as replicated", latch.await(15, TimeUnit.SECONDS)); } - replicaToKill = LedgerHandleAdapter - .getLedgerMetadata(lh).getEnsembles().get(0L).get(1); + replicaToKill = lh.getLedgerMetadata().getAllEnsembles().get(0L).get(1); LOG.info("Killing second bookie, {}, in ensemble {}", replicaToKill, - LedgerHandleAdapter.getLedgerMetadata(lh).getEnsembles().get(0L)); + lh.getLedgerMetadata().getAllEnsembles().get(0L)); killBookie(replicaToKill); getAuditor(10, TimeUnit.SECONDS).submitAuditTask().get(); // ensure auditor runs @@ -409,8 +410,12 @@ public void testEmptyLedgerLosesQuorumEventually() throws Exception { assertTrue("Should be marked as underreplicated", latch.await(5, TimeUnit.SECONDS)); latch = new CountDownLatch(1); s = watchUrLedgerNode(urZNode, latch); // should be marked as replicated + + startNewBookie(); + getAuditor(10, TimeUnit.SECONDS).submitAuditTask().get(); // ensure auditor runs + if (s != null) { - assertTrue("Should be marked as replicated", latch.await(5, TimeUnit.SECONDS)); + assertTrue("Should be marked as replicated", latch.await(20, TimeUnit.SECONDS)); } // should be able to open ledger without issue @@ -433,22 +438,18 @@ public void testLedgerMetadataContainsIpAddressAsBookieID() serverConf2.setUseHostNameAsBookieID(true); ServerConfiguration serverConf3 = newServerConfiguration(); serverConf3.setUseHostNameAsBookieID(true); - bsConfs.add(serverConf1); - bsConfs.add(serverConf2); - bsConfs.add(serverConf3); - bs.add(startBookie(serverConf1)); - bs.add(startBookie(serverConf2)); - bs.add(startBookie(serverConf3)); + startAndAddBookie(serverConf1); + startAndAddBookie(serverConf2); + startAndAddBookie(serverConf3); List listOfLedgerHandle = createLedgersAndAddEntries(1, 5); LedgerHandle lh = listOfLedgerHandle.get(0); int ledgerReplicaIndex = 0; - final SortedMap> ensembles = LedgerHandleAdapter - .getLedgerMetadata(lh).getEnsembles(); - final List bkAddresses = ensembles.get(0L); - BookieSocketAddress replicaToKillAddr = bkAddresses.get(0); - for (BookieSocketAddress bookieSocketAddress : bkAddresses) { - if (!isCreatedFromIp(bookieSocketAddress)){ + final SortedMap> ensembles = lh.getLedgerMetadata().getAllEnsembles(); + final List bkAddresses = ensembles.get(0L); + BookieId replicaToKillAddr = bkAddresses.get(0); + for (BookieId bookieSocketAddress : bkAddresses) { + if (!isCreatedFromIp(bookieSocketAddress)) { replicaToKillAddr = bookieSocketAddress; LOG.info("Kill bookie which has registered using hostname"); break; @@ -477,14 +478,15 @@ public void testLedgerMetadataContainsIpAddressAsBookieID() // target bookie ServerConfiguration serverConf = newServerConfiguration(); serverConf.setUseHostNameAsBookieID(false); - bsConfs.add(serverConf); - bs.add(startBookie(serverConf)); + startAndAddBookie(serverConf); - int newBookieIndex = bs.size() - 1; - BookieServer newBookieServer = bs.get(newBookieIndex); + int newBookieIndex = lastBookieIndex(); + BookieServer newBookieServer = serverByIndex(newBookieIndex); - LOG.debug("Waiting to finish the replication of failed bookie : " - + replicaToKillAddr); + if (LOG.isDebugEnabled()) { + LOG.debug("Waiting to finish the replication of failed bookie : " + + replicaToKillAddr); + } latch.await(); // grace period to update the urledger metadata in zookeeper @@ -512,21 +514,17 @@ public void testLedgerMetadataContainsHostNameAsBookieID() serverConf2.setUseHostNameAsBookieID(true); ServerConfiguration serverConf3 = newServerConfiguration(); serverConf3.setUseHostNameAsBookieID(true); - bsConfs.add(serverConf1); - bsConfs.add(serverConf2); - bsConfs.add(serverConf3); - bs.add(startBookie(serverConf1)); - bs.add(startBookie(serverConf2)); - bs.add(startBookie(serverConf3)); + startAndAddBookie(serverConf1); + startAndAddBookie(serverConf2); + startAndAddBookie(serverConf3); List listOfLedgerHandle = createLedgersAndAddEntries(1, 5); LedgerHandle lh = listOfLedgerHandle.get(0); int ledgerReplicaIndex = 0; - final SortedMap> ensembles = LedgerHandleAdapter - .getLedgerMetadata(lh).getEnsembles(); - final List bkAddresses = ensembles.get(0L); - BookieSocketAddress replicaToKillAddr = bkAddresses.get(0); - for (BookieSocketAddress bookieSocketAddress : bkAddresses) { + final SortedMap> ensembles = lh.getLedgerMetadata().getAllEnsembles(); + final List bkAddresses = ensembles.get(0L); + BookieId replicaToKillAddr = bkAddresses.get(0); + for (BookieId bookieSocketAddress : bkAddresses) { if (isCreatedFromIp(bookieSocketAddress)) { replicaToKillAddr = bookieSocketAddress; LOG.info("Kill bookie which has registered using ipaddress"); @@ -558,14 +556,15 @@ public void testLedgerMetadataContainsHostNameAsBookieID() // target bookie ServerConfiguration serverConf = newServerConfiguration(); serverConf.setUseHostNameAsBookieID(true); - bsConfs.add(serverConf); - bs.add(startBookie(serverConf)); + startAndAddBookie(serverConf); - int newBookieIndex = bs.size() - 1; - BookieServer newBookieServer = bs.get(newBookieIndex); + int newBookieIndex = lastBookieIndex(); + BookieServer newBookieServer = serverByIndex(newBookieIndex); - LOG.debug("Waiting to finish the replication of failed bookie : " - + replicaToKillAddr); + if (LOG.isDebugEnabled()) { + LOG.debug("Waiting to finish the replication of failed bookie : " + + replicaToKillAddr); + } latch.await(); // grace period to update the urledger metadata in zookeeper @@ -576,11 +575,10 @@ public void testLedgerMetadataContainsHostNameAsBookieID() } - private int getReplicaIndexInLedger(LedgerHandle lh, BookieSocketAddress replicaToKill) { - SortedMap> ensembles = LedgerHandleAdapter - .getLedgerMetadata(lh).getEnsembles(); + private int getReplicaIndexInLedger(LedgerHandle lh, BookieId replicaToKill) { + SortedMap> ensembles = lh.getLedgerMetadata().getAllEnsembles(); int ledgerReplicaIndex = -1; - for (BookieSocketAddress addr : ensembles.get(0L)) { + for (BookieId addr : ensembles.get(0L)) { ++ledgerReplicaIndex; if (addr.equals(replicaToKill)) { break; @@ -595,11 +593,10 @@ private void verifyLedgerEnsembleMetadataAfterReplication( LedgerHandle openLedger = bkc .openLedger(lh.getId(), digestType, PASSWD); - BookieSocketAddress inetSocketAddress = LedgerHandleAdapter - .getLedgerMetadata(openLedger).getEnsembles().get(0L) + BookieId inetSocketAddress = openLedger.getLedgerMetadata().getAllEnsembles().get(0L) .get(ledgerReplicaIndex); assertEquals("Rereplication has been failed and ledgerReplicaIndex :" - + ledgerReplicaIndex, newBookieServer.getLocalAddress(), + + ledgerReplicaIndex, newBookieServer.getBookieId(), inetSocketAddress); openLedger.close(); } @@ -637,12 +634,12 @@ private Stat watchUrLedgerNode(final String znode, @Override public void process(WatchedEvent event) { if (event.getType() == EventType.NodeDeleted) { - LOG.info("Recieved Ledger rereplication completion event :" + LOG.info("Received Ledger rereplication completion event :" + event.getType()); latch.countDown(); } if (event.getType() == EventType.NodeCreated) { - LOG.info("Recieved urLedger publishing event :" + LOG.info("Received urLedger publishing event :" + event.getType()); latch.countDown(); } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/BookieLedgerIndexTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/BookieLedgerIndexTest.java index 4244ff3c884..f4483917980 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/BookieLedgerIndexTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/BookieLedgerIndexTest.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -21,7 +21,6 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -41,7 +40,6 @@ import org.apache.bookkeeper.replication.ReplicationException.BKAuditException; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.apache.bookkeeper.util.ZkUtils; -import org.apache.commons.io.FileUtils; import org.apache.zookeeper.KeeperException; import org.junit.After; import org.junit.Before; @@ -181,7 +179,7 @@ public void testEnsembleReformation() throws Exception { LedgerHandle lh2 = createAndAddEntriesToLedger(); startNewBookie(); - shutdownBookie(bs.size() - 2); + shutdownBookie(lastBookieIndex() - 1); // add few more entries after ensemble reformation for (int i = 0; i < 10; i++) { @@ -221,10 +219,8 @@ public void testEnsembleReformation() throws Exception { } } - private void shutdownBookie(int bkShutdownIndex) throws IOException { - bs.remove(bkShutdownIndex).shutdown(); - File f = tmpDirs.remove(bkShutdownIndex); - FileUtils.deleteDirectory(f); + private void shutdownBookie(int bkShutdownIndex) throws Exception { + killBookie(bkShutdownIndex); } private LedgerHandle createAndAddEntriesToLedger() throws BKException, diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/ReplicationTestUtil.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/ReplicationTestUtil.java index ac05c8481e1..e36955a0c29 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/ReplicationTestUtil.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/ReplicationTestUtil.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -32,7 +32,7 @@ public class ReplicationTestUtil { /** * Checks whether ledger is in under-replication. */ - static boolean isLedgerInUnderReplication(ZooKeeper zkc, long id, + public static boolean isLedgerInUnderReplication(ZooKeeper zkc, long id, String basePath) throws KeeperException, InterruptedException { List children; try { diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/TestAutoRecoveryAlongWithBookieServers.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/TestAutoRecoveryAlongWithBookieServers.java index ca767842fd7..9506ba02772 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/TestAutoRecoveryAlongWithBookieServers.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/TestAutoRecoveryAlongWithBookieServers.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -29,9 +29,8 @@ import org.apache.bookkeeper.client.BookKeeper; import org.apache.bookkeeper.client.LedgerEntry; import org.apache.bookkeeper.client.LedgerHandle; -import org.apache.bookkeeper.client.LedgerHandleAdapter; import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.apache.bookkeeper.util.BookKeeperConstants; import org.junit.Before; @@ -74,12 +73,11 @@ public void testAutoRecoveryAlongWithBookieServers() throws Exception { lh.addEntry(testData); } lh.close(); - BookieSocketAddress replicaToKill = LedgerHandleAdapter - .getLedgerMetadata(lh).getEnsembles().get(0L).get(0); + BookieId replicaToKill = lh.getLedgerMetadata().getAllEnsembles().get(0L).get(0); killBookie(replicaToKill); - BookieSocketAddress newBkAddr = startNewBookieAndReturnAddress(); + BookieId newBkAddr = startNewBookieAndReturnBookieId(); while (ReplicationTestUtil.isLedgerInUnderReplication(zkc, lh.getId(), basePath)) { @@ -87,10 +85,10 @@ public void testAutoRecoveryAlongWithBookieServers() throws Exception { } // Killing all bookies except newly replicated bookie - for (Entry> entry : - lh.getLedgerMetadata().getEnsembles().entrySet()) { - List bookies = entry.getValue(); - for (BookieSocketAddress bookie : bookies) { + for (Entry> entry : + lh.getLedgerMetadata().getAllEnsembles().entrySet()) { + List bookies = entry.getValue(); + for (BookieId bookie : bookies) { if (bookie.equals(newBkAddr)) { continue; } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/TestLedgerUnderreplicationManager.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/TestLedgerUnderreplicationManager.java index db0e308eb41..f466f41fa56 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/TestLedgerUnderreplicationManager.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/TestLedgerUnderreplicationManager.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -43,6 +43,7 @@ import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import lombok.Cleanup; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.conf.TestBKConfiguration; import org.apache.bookkeeper.meta.AbstractZkLedgerManagerFactory; @@ -92,7 +93,7 @@ public class TestLedgerUnderreplicationManager { @Before public void setupZooKeeper() throws Exception { zkUtil = new ZooKeeperUtil(); - zkUtil.startServer(); + zkUtil.startCluster(); conf = TestBKConfiguration.newServerConfiguration(); conf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); @@ -133,7 +134,7 @@ public void setupZooKeeper() throws Exception { @After public void teardownZooKeeper() throws Exception { if (zkUtil != null) { - zkUtil.killServer(); + zkUtil.killCluster(); zkUtil = null; } if (executor != null) { @@ -561,7 +562,9 @@ public void testDisableLedegerReplication() throws Exception { try { replicaMgr.markLedgerUnderreplicated(ledgerA, missingReplica); } catch (UnavailableException e) { - LOG.debug("Unexpected exception while marking urLedger", e); + if (LOG.isDebugEnabled()) { + LOG.debug("Unexpected exception while marking urLedger", e); + } fail("Unexpected exception while marking urLedger" + e.getMessage()); } @@ -594,13 +597,17 @@ public void testEnableLedgerReplication() throws Exception { try { replicaMgr.markLedgerUnderreplicated(ledgerA, missingReplica); } catch (UnavailableException e) { - LOG.debug("Unexpected exception while marking urLedger", e); + if (LOG.isDebugEnabled()) { + LOG.debug("Unexpected exception while marking urLedger", e); + } fail("Unexpected exception while marking urLedger" + e.getMessage()); } // disabling replication replicaMgr.disableLedgerReplication(); - LOG.debug("Disabled Ledeger Replication"); + if (LOG.isDebugEnabled()) { + LOG.debug("Disabled Ledeger Replication"); + } String znodeA = getUrLedgerZnode(ledgerA); final CountDownLatch znodeLatch = new CountDownLatch(2); @@ -611,8 +618,10 @@ public void testEnableLedgerReplication() throws Exception { public void process(WatchedEvent event) { if (event.getType() == EventType.NodeCreated) { znodeLatch.countDown(); - LOG.debug("Recieved node creation event for the zNodePath:" - + event.getPath()); + if (LOG.isDebugEnabled()) { + LOG.debug("Received node creation event for the zNodePath:" + + event.getPath()); + } } }}); @@ -627,7 +636,9 @@ public void run() { isLedgerReplicationDisabled = false; znodeLatch.countDown(); } catch (UnavailableException e) { - LOG.debug("Unexpected exception while marking urLedger", e); + if (LOG.isDebugEnabled()) { + LOG.debug("Unexpected exception while marking urLedger", e); + } isLedgerReplicationDisabled = false; } } @@ -643,7 +654,9 @@ public void run() { replicaMgr.enableLedgerReplication(); znodeLatch.await(5, TimeUnit.SECONDS); - LOG.debug("Enabled Ledeger Replication"); + if (LOG.isDebugEnabled()) { + LOG.debug("Enabled Ledeger Replication"); + } assertTrue("Ledger replication is not disabled!", !isLedgerReplicationDisabled); assertEquals("Failed to disable ledger replication!", 0, znodeLatch @@ -764,6 +777,51 @@ public void run() { assertEquals("All hierarchies should be cleaned up", 0, children.size()); } + @Test + public void testCheckAllLedgersCTime() throws Exception { + @Cleanup + LedgerUnderreplicationManager underReplicaMgr1 = lmf1.newLedgerUnderreplicationManager(); + @Cleanup + LedgerUnderreplicationManager underReplicaMgr2 = lmf2.newLedgerUnderreplicationManager(); + assertEquals(-1, underReplicaMgr1.getCheckAllLedgersCTime()); + long curTime = System.currentTimeMillis(); + underReplicaMgr2.setCheckAllLedgersCTime(curTime); + assertEquals(curTime, underReplicaMgr1.getCheckAllLedgersCTime()); + curTime = System.currentTimeMillis(); + underReplicaMgr2.setCheckAllLedgersCTime(curTime); + assertEquals(curTime, underReplicaMgr1.getCheckAllLedgersCTime()); + } + + @Test + public void testPlacementPolicyCheckCTime() throws Exception { + @Cleanup + LedgerUnderreplicationManager underReplicaMgr1 = lmf1.newLedgerUnderreplicationManager(); + @Cleanup + LedgerUnderreplicationManager underReplicaMgr2 = lmf2.newLedgerUnderreplicationManager(); + assertEquals(-1, underReplicaMgr1.getPlacementPolicyCheckCTime()); + long curTime = System.currentTimeMillis(); + underReplicaMgr2.setPlacementPolicyCheckCTime(curTime); + assertEquals(curTime, underReplicaMgr1.getPlacementPolicyCheckCTime()); + curTime = System.currentTimeMillis(); + underReplicaMgr2.setPlacementPolicyCheckCTime(curTime); + assertEquals(curTime, underReplicaMgr1.getPlacementPolicyCheckCTime()); + } + + @Test + public void testReplicasCheckCTime() throws Exception { + @Cleanup + LedgerUnderreplicationManager underReplicaMgr1 = lmf1.newLedgerUnderreplicationManager(); + @Cleanup + LedgerUnderreplicationManager underReplicaMgr2 = lmf2.newLedgerUnderreplicationManager(); + assertEquals(-1, underReplicaMgr1.getReplicasCheckCTime()); + long curTime = System.currentTimeMillis(); + underReplicaMgr2.setReplicasCheckCTime(curTime); + assertEquals(curTime, underReplicaMgr1.getReplicasCheckCTime()); + curTime = System.currentTimeMillis(); + underReplicaMgr2.setReplicasCheckCTime(curTime); + assertEquals(curTime, underReplicaMgr1.getReplicasCheckCTime()); + } + private void verifyMarkLedgerUnderreplicated(Collection missingReplica) throws KeeperException, InterruptedException, ReplicationException { Long ledgerA = 0xfeadeefdacL; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/TestReplicationWorker.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/TestReplicationWorker.java index e4b1232e775..507f143d5ca 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/TestReplicationWorker.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/TestReplicationWorker.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -19,40 +19,90 @@ */ package org.apache.bookkeeper.replication; +import static org.apache.bookkeeper.replication.ReplicationStats.AUDITOR_SCOPE; +import static org.apache.bookkeeper.replication.ReplicationStats.NUM_ENTRIES_UNABLE_TO_READ_FOR_REPLICATION; +import static org.apache.bookkeeper.replication.ReplicationStats.REPLICATION_SCOPE; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import io.netty.util.HashedWheelTimer; +import java.io.IOException; +import java.lang.reflect.Field; +import java.lang.reflect.Method; import java.net.URI; +import java.net.UnknownHostException; import java.util.Enumeration; import java.util.List; import java.util.Map.Entry; +import java.util.Objects; import java.util.Optional; +import java.util.TimerTask; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; - +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.BiConsumer; +import java.util.stream.Collectors; import lombok.Cleanup; - +import org.apache.bookkeeper.bookie.BookieImpl; import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.client.BookKeeper; +import org.apache.bookkeeper.client.BookKeeperTestClient; import org.apache.bookkeeper.client.ClientUtil; +import org.apache.bookkeeper.client.EnsemblePlacementPolicy; import org.apache.bookkeeper.client.LedgerEntry; import org.apache.bookkeeper.client.LedgerHandle; -import org.apache.bookkeeper.client.LedgerHandleAdapter; +import org.apache.bookkeeper.client.RackawareEnsemblePlacementPolicy; +import org.apache.bookkeeper.client.ZoneawareEnsemblePlacementPolicy; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.common.util.OrderedScheduler; +import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.feature.FeatureProvider; +import org.apache.bookkeeper.meta.AbstractZkLedgerManager; +import org.apache.bookkeeper.meta.LedgerManager; import org.apache.bookkeeper.meta.LedgerManagerFactory; import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; import org.apache.bookkeeper.meta.MetadataBookieDriver; import org.apache.bookkeeper.meta.MetadataClientDriver; import org.apache.bookkeeper.meta.MetadataDrivers; +import org.apache.bookkeeper.meta.ZkLedgerUnderreplicationManager; +import org.apache.bookkeeper.meta.exceptions.MetadataException; import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.DNSToSwitchMapping; +import org.apache.bookkeeper.proto.BookieAddressResolver; +import org.apache.bookkeeper.replication.ReplicationException.CompatibilityException; +import org.apache.bookkeeper.stats.Counter; +import org.apache.bookkeeper.stats.Gauge; import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.stats.StatsLogger; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.apache.bookkeeper.test.TestStatsProvider; +import org.apache.bookkeeper.test.TestStatsProvider.TestStatsLogger; import org.apache.bookkeeper.util.BookKeeperConstants; +import org.apache.bookkeeper.util.StaticDNSResolver; +import org.apache.bookkeeper.zookeeper.BoundExponentialBackoffRetryPolicy; import org.apache.bookkeeper.zookeeper.ZooKeeperClient; +import org.apache.bookkeeper.zookeeper.ZooKeeperWatcherBase; +import org.apache.commons.lang3.mutable.MutableObject; +import org.apache.commons.lang3.reflect.FieldUtils; +import org.apache.zookeeper.AsyncCallback.StatCallback; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.WatchedEvent; +import org.apache.zookeeper.Watcher; +import org.apache.zookeeper.Watcher.Event.EventType; +import org.apache.zookeeper.Watcher.Event.KeeperState; import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.ZooKeeper.States; +import org.apache.zookeeper.data.Stat; +import org.awaitility.Awaitility; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -71,28 +121,33 @@ public class TestReplicationWorker extends BookKeeperClusterTestCase { private MetadataBookieDriver driver; private LedgerManagerFactory mFactory; private LedgerUnderreplicationManager underReplicationManager; + private LedgerManager ledgerManager; private static byte[] data = "TestReplicationWorker".getBytes(); private OrderedScheduler scheduler; + private String zkLedgersRootPath; public TestReplicationWorker() { this("org.apache.bookkeeper.meta.HierarchicalLedgerManagerFactory"); } TestReplicationWorker(String ledgerManagerFactory) { - super(3); + super(3, 300); LOG.info("Running test case using ledger manager : " + ledgerManagerFactory); // set ledger manager name baseConf.setLedgerManagerFactoryClassName(ledgerManagerFactory); baseClientConf.setLedgerManagerFactoryClassName(ledgerManagerFactory); baseConf.setRereplicationEntryBatchSize(3); + baseConf.setZkTimeout(7000); + baseConf.setZkRetryBackoffMaxMs(500); + baseConf.setZkRetryBackoffStartMs(10); } @Override public void setUp() throws Exception { super.setUp(); - String zkLedgersRootPath = ZKMetadataDriverBase.resolveZkLedgersRootPath(baseClientConf); + zkLedgersRootPath = ZKMetadataDriverBase.resolveZkLedgersRootPath(baseClientConf); basePath = zkLedgersRootPath + '/' + BookKeeperConstants.UNDER_REPLICATION_NODE + BookKeeperConstants.DEFAULT_ZK_LEDGERS_ROOT_PATH; @@ -109,10 +164,10 @@ public void setUp() throws Exception { URI.create(baseConf.getMetadataServiceUri())); this.driver.initialize( baseConf, - () -> {}, NullStatsLogger.INSTANCE); // initialize urReplicationManager mFactory = driver.getLedgerManagerFactory(); + ledgerManager = mFactory.newLedgerManager(); underReplicationManager = mFactory.newLedgerUnderreplicationManager(); } @@ -140,20 +195,19 @@ public void testRWShouldReplicateFragmentsToTargetBookie() throws Exception { for (int i = 0; i < 10; i++) { lh.addEntry(data); } - BookieSocketAddress replicaToKill = LedgerHandleAdapter - .getLedgerMetadata(lh).getEnsembles().get(0L).get(0); + BookieId replicaToKill = lh.getLedgerMetadata().getAllEnsembles().get(0L).get(0); - LOG.info("Killing Bookie", replicaToKill); + LOG.info("Killing Bookie : {}", replicaToKill); killBookie(replicaToKill); - BookieSocketAddress newBkAddr = startNewBookieAndReturnAddress(); + BookieId newBkAddr = startNewBookieAndReturnBookieId(); LOG.info("New Bookie addr : {}", newBkAddr); for (int i = 0; i < 10; i++) { lh.addEntry(data); } - ReplicationWorker rw = new ReplicationWorker(zkc, baseConf); + ReplicationWorker rw = new ReplicationWorker(baseConf); rw.start(); try { @@ -189,16 +243,15 @@ public void testRWShouldRetryUntilThereAreEnoughBksAvailableForReplication() lh.addEntry(data); } lh.close(); - BookieSocketAddress replicaToKill = LedgerHandleAdapter - .getLedgerMetadata(lh).getEnsembles().get(0L).get(0); - LOG.info("Killing Bookie", replicaToKill); + BookieId replicaToKill = lh.getLedgerMetadata().getAllEnsembles().get(0L).get(0); + LOG.info("Killing Bookie : {}", replicaToKill); ServerConfiguration killedBookieConfig = killBookie(replicaToKill); - BookieSocketAddress newBkAddr = startNewBookieAndReturnAddress(); + BookieId newBkAddr = startNewBookieAndReturnBookieId(); LOG.info("New Bookie addr :" + newBkAddr); killAllBookies(lh, newBkAddr); - ReplicationWorker rw = new ReplicationWorker(zkc, baseConf); + ReplicationWorker rw = new ReplicationWorker(baseConf); rw.start(); try { @@ -212,8 +265,7 @@ public void testRWShouldRetryUntilThereAreEnoughBksAvailableForReplication() Thread.sleep(100); } // restart killed bookie - bs.add(startBookie(killedBookieConfig)); - bsConfs.add(killedBookieConfig); + startAndAddBookie(killedBookieConfig); while (ReplicationTestUtil.isLedgerInUnderReplication(zkc, lh .getId(), basePath)) { Thread.sleep(100); @@ -239,25 +291,20 @@ public void test2RWsShouldCompeteForReplicationOf2FragmentsAndCompleteReplicatio lh.addEntry(data); } lh.close(); - BookieSocketAddress replicaToKill = LedgerHandleAdapter - .getLedgerMetadata(lh).getEnsembles().get(0L).get(0); - LOG.info("Killing Bookie", replicaToKill); + BookieId replicaToKill = lh.getLedgerMetadata().getAllEnsembles().get(0L).get(0); + LOG.info("Killing Bookie : {}", replicaToKill); ServerConfiguration killedBookieConfig = killBookie(replicaToKill); killAllBookies(lh, null); // Starte RW1 - BookieSocketAddress newBkAddr1 = startNewBookieAndReturnAddress(); + BookieId newBkAddr1 = startNewBookieAndReturnBookieId(); LOG.info("New Bookie addr : {}", newBkAddr1); - ReplicationWorker rw1 = new ReplicationWorker(zkc, baseConf); + ReplicationWorker rw1 = new ReplicationWorker(baseConf); // Starte RW2 - BookieSocketAddress newBkAddr2 = startNewBookieAndReturnAddress(); + BookieId newBkAddr2 = startNewBookieAndReturnBookieId(); LOG.info("New Bookie addr : {}", newBkAddr2); - ZooKeeper zkc1 = ZooKeeperClient.newBuilder() - .connectString(zkUtil.getZooKeeperConnectString()) - .sessionTimeoutMs(10000) - .build(); - ReplicationWorker rw2 = new ReplicationWorker(zkc1, baseConf); + ReplicationWorker rw2 = new ReplicationWorker(baseConf); rw1.start(); rw2.start(); @@ -272,8 +319,7 @@ public void test2RWsShouldCompeteForReplicationOf2FragmentsAndCompleteReplicatio Thread.sleep(100); } // restart killed bookie - bs.add(startBookie(killedBookieConfig)); - bsConfs.add(killedBookieConfig); + startAndAddBookie(killedBookieConfig); while (ReplicationTestUtil.isLedgerInUnderReplication(zkc, lh .getId(), basePath)) { Thread.sleep(100); @@ -283,7 +329,6 @@ public void test2RWsShouldCompeteForReplicationOf2FragmentsAndCompleteReplicatio } finally { rw1.shutdown(); rw2.shutdown(); - zkc1.close(); } } @@ -301,14 +346,13 @@ public void testRWShouldCleanTheLedgerFromUnderReplicationIfLedgerAlreadyDeleted lh.addEntry(data); } lh.close(); - BookieSocketAddress replicaToKill = LedgerHandleAdapter - .getLedgerMetadata(lh).getEnsembles().get(0L).get(0); - LOG.info("Killing Bookie", replicaToKill); + BookieId replicaToKill = lh.getLedgerMetadata().getAllEnsembles().get(0L).get(0); + LOG.info("Killing Bookie : {}", replicaToKill); killBookie(replicaToKill); - BookieSocketAddress newBkAddr = startNewBookieAndReturnAddress(); + BookieId newBkAddr = startNewBookieAndReturnBookieId(); LOG.info("New Bookie addr : {}", newBkAddr); - ReplicationWorker rw = new ReplicationWorker(zkc, baseConf); + ReplicationWorker rw = new ReplicationWorker(baseConf); rw.start(); try { @@ -336,10 +380,9 @@ public void testMultipleLedgerReplicationWithReplicationWorker() for (int i = 0; i < 10; i++) { lh1.addEntry(data); } - BookieSocketAddress replicaToKillFromFirstLedger = LedgerHandleAdapter - .getLedgerMetadata(lh1).getEnsembles().get(0L).get(0); + BookieId replicaToKillFromFirstLedger = lh1.getLedgerMetadata().getAllEnsembles().get(0L).get(0); - LOG.info("Killing Bookie", replicaToKillFromFirstLedger); + LOG.info("Killing Bookie : {}", replicaToKillFromFirstLedger); // Ledger2 LedgerHandle lh2 = bkc.createLedger(3, 3, BookKeeper.DigestType.CRC32, @@ -348,10 +391,9 @@ public void testMultipleLedgerReplicationWithReplicationWorker() for (int i = 0; i < 10; i++) { lh2.addEntry(data); } - BookieSocketAddress replicaToKillFromSecondLedger = LedgerHandleAdapter - .getLedgerMetadata(lh2).getEnsembles().get(0L).get(0); + BookieId replicaToKillFromSecondLedger = lh2.getLedgerMetadata().getAllEnsembles().get(0L).get(0); - LOG.info("Killing Bookie", replicaToKillFromSecondLedger); + LOG.info("Killing Bookie : {}", replicaToKillFromSecondLedger); // Kill ledger1 killBookie(replicaToKillFromFirstLedger); @@ -360,10 +402,10 @@ public void testMultipleLedgerReplicationWithReplicationWorker() killBookie(replicaToKillFromFirstLedger); lh2.close(); - BookieSocketAddress newBkAddr = startNewBookieAndReturnAddress(); + BookieId newBkAddr = startNewBookieAndReturnBookieId(); LOG.info("New Bookie addr : {}", newBkAddr); - ReplicationWorker rw = new ReplicationWorker(zkc, baseConf); + ReplicationWorker rw = new ReplicationWorker(baseConf); rw.start(); try { @@ -395,6 +437,69 @@ public void testMultipleLedgerReplicationWithReplicationWorker() } + @Test + public void testMultipleLedgerReplicationWithReplicationWorkerBatchRead() throws Exception { + LedgerHandle lh1 = bkc.createLedger(3, 3, BookKeeper.DigestType.CRC32, TESTPASSWD); + for (int i = 0; i < 200; ++i) { + lh1.addEntry(data); + } + BookieId replicaToKillFromFirstLedger = lh1.getLedgerMetadata().getAllEnsembles().get(0L).get(0); + + LedgerHandle lh2 = bkc.createLedger(3, 3, BookKeeper.DigestType.CRC32, TESTPASSWD); + for (int i = 0; i < 200; ++i) { + lh2.addEntry(data); + } + + BookieId replicaToKillFromSecondLedger = lh2.getLedgerMetadata().getAllEnsembles().get(0L).get(0); + + LOG.info("Killing Bookie : {}", replicaToKillFromFirstLedger); + killBookie(replicaToKillFromFirstLedger); + lh1.close(); + + LOG.info("Killing Bookie : {}", replicaToKillFromSecondLedger); + killBookie(replicaToKillFromSecondLedger); + lh2.close(); + + BookieId newBkAddr = startNewBookieAndReturnBookieId(); + LOG.info("New Bookie addr : {}", newBkAddr); + + if (replicaToKillFromFirstLedger != replicaToKillFromSecondLedger) { + BookieId newBkAddr2 = startNewBookieAndReturnBookieId(); + LOG.info("New Bookie addr : {}", newBkAddr2); + } + + ClientConfiguration clientConfiguration = new ClientConfiguration(baseClientConf); + clientConfiguration.setUseV2WireProtocol(true); + clientConfiguration.setRecoveryBatchReadEnabled(true); + clientConfiguration.setBatchReadEnabled(true); + clientConfiguration.setRereplicationEntryBatchSize(100); + clientConfiguration.setReplicationRateByBytes(3 * 1024); + ReplicationWorker rw = new ReplicationWorker(new ServerConfiguration(clientConfiguration)); + + rw.start(); + try { + // Mark ledger1 and ledger2 as underreplicated + underReplicationManager.markLedgerUnderreplicated(lh1.getId(), replicaToKillFromFirstLedger.toString()); + underReplicationManager.markLedgerUnderreplicated(lh2.getId(), replicaToKillFromSecondLedger.toString()); + + while (ReplicationTestUtil.isLedgerInUnderReplication(zkc, lh1.getId(), basePath)) { + Thread.sleep(100); + } + + while (ReplicationTestUtil.isLedgerInUnderReplication(zkc, lh2.getId(), basePath)) { + Thread.sleep(100); + } + + killAllBookies(lh1, newBkAddr); + + // Should be able to read the entries from 0-99 + verifyRecoveredLedgers(lh1, 0, 199); + verifyRecoveredLedgers(lh2, 0, 199); + } finally { + rw.shutdown(); + } + } + /** * Tests that ReplicationWorker should fence the ledger and release ledger * lock after timeout. Then replication should happen normally. @@ -408,18 +513,17 @@ public void testRWShouldReplicateTheLedgersAfterTimeoutIfLastFragmentIsUR() for (int i = 0; i < 10; i++) { lh.addEntry(data); } - BookieSocketAddress replicaToKill = LedgerHandleAdapter - .getLedgerMetadata(lh).getEnsembles().get(0L).get(0); + BookieId replicaToKill = lh.getLedgerMetadata().getAllEnsembles().get(0L).get(0); - LOG.info("Killing Bookie", replicaToKill); + LOG.info("Killing Bookie : {}", replicaToKill); killBookie(replicaToKill); - BookieSocketAddress newBkAddr = startNewBookieAndReturnAddress(); + BookieId newBkAddr = startNewBookieAndReturnBookieId(); LOG.info("New Bookie addr : {}", newBkAddr); // set to 3s instead of default 30s baseConf.setOpenLedgerRereplicationGracePeriod("3000"); - ReplicationWorker rw = new ReplicationWorker(zkc, baseConf); + ReplicationWorker rw = new ReplicationWorker(baseConf); @Cleanup MetadataClientDriver clientDriver = MetadataDrivers.getClientDriver( URI.create(baseClientConf.getMetadataServiceUri())); @@ -452,45 +556,38 @@ public void testRWShouldReplicateTheLedgersAfterTimeoutIfLastFragmentIsUR() } - /** - * Tests that ReplicationWorker will not make more than - * ReplicationWorker.MAXNUMBER_REPLICATION_FAILURES_ALLOWED_BEFORE_DEFERRING - * number of replication failure attempts and if it fails more these many - * number of times then it will defer lock release by - * lockReleaseOfFailedLedgerGracePeriod. - * - * @throws Exception - */ @Test public void testBookiesNotAvailableScenarioForReplicationWorker() throws Exception { int ensembleSize = 3; LedgerHandle lh = bkc.createLedger(ensembleSize, ensembleSize, BookKeeper.DigestType.CRC32, TESTPASSWD); - for (int i = 0; i < 10; i++) { + int numOfEntries = 7; + for (int i = 0; i < numOfEntries; i++) { lh.addEntry(data); } lh.close(); - BookieSocketAddress[] bookiesKilled = new BookieSocketAddress[ensembleSize]; + BookieId[] bookiesKilled = new BookieId[ensembleSize]; ServerConfiguration[] killedBookiesConfig = new ServerConfiguration[ensembleSize]; // kill all bookies for (int i = 0; i < ensembleSize; i++) { - bookiesKilled[i] = LedgerHandleAdapter.getLedgerMetadata(lh).getEnsembles().get(0L).get(i); + bookiesKilled[i] = lh.getLedgerMetadata().getAllEnsembles().get(0L).get(i); killedBookiesConfig[i] = getBkConf(bookiesKilled[i]); - LOG.info("Killing Bookie", bookiesKilled[i]); + LOG.info("Killing Bookie : {}", bookiesKilled[i]); killBookie(bookiesKilled[i]); } // start new bookiesToKill number of bookies for (int i = 0; i < ensembleSize; i++) { - BookieSocketAddress newBkAddr = startNewBookieAndReturnAddress(); + BookieId newBkAddr = startNewBookieAndReturnBookieId(); } // create couple of replicationworkers - baseConf.setLockReleaseOfFailedLedgerGracePeriod("500"); - ReplicationWorker rw1 = new ReplicationWorker(zkc, baseConf); - ReplicationWorker rw2 = new ReplicationWorker(zkc, baseConf); + ServerConfiguration newRWConf = new ServerConfiguration(baseConf); + newRWConf.setLockReleaseOfFailedLedgerGracePeriod("64"); + ReplicationWorker rw1 = new ReplicationWorker(newRWConf); + ReplicationWorker rw2 = new ReplicationWorker(newRWConf); @Cleanup MetadataClientDriver clientDriver = MetadataDrivers @@ -501,6 +598,7 @@ public void testBookiesNotAvailableScenarioForReplicationWorker() throws Excepti LedgerUnderreplicationManager underReplicationManager = mFactory.newLedgerUnderreplicationManager(); try { + //mark ledger underreplicated for (int i = 0; i < bookiesKilled.length; i++) { underReplicationManager.markLedgerUnderreplicated(lh.getId(), bookiesKilled[i].toString()); } @@ -516,10 +614,10 @@ public void testBookiesNotAvailableScenarioForReplicationWorker() throws Excepti @Override public void run() { try { - Thread.sleep(4000); + Thread.sleep(3000); isBookieRestarted.set(true); /* - * after sleeping for 4000 msecs, restart one of the + * after sleeping for 3000 msecs, restart one of the * bookie, so that replication can succeed. */ startBookie(killedBookiesConfig[0]); @@ -529,6 +627,8 @@ public void run() { } })).start(); + int rw1PrevFailedAttemptsCount = 0; + int rw2PrevFailedAttemptsCount = 0; while (!isBookieRestarted.get()) { /* * since all the bookies containing the ledger entries are down @@ -536,26 +636,21 @@ public void run() { */ assertTrue("Ledger: " + lh.getId() + " should be underreplicated", ReplicationTestUtil.isLedgerInUnderReplication(zkc, lh.getId(), basePath)); - /* - * check for both the replicationworkders number of failed - * attempts should be less than ReplicationWorker. - * MAXNUMBER_REPLICATION_FAILURES_ALLOWED_BEFORE_DEFERRING - */ - int failedAttempts = rw1.replicationFailedLedgers.get(lh.getId()).get(); + + // the number of failed attempts should have increased. + int rw1CurFailedAttemptsCount = rw1.replicationFailedLedgers.get(lh.getId()).get(); assertTrue( - "The number of failed attempts should be less than " - + "ReplicationWorker.MAXNUMBER_REPLICATION_FAILURES_ALLOWED_BEFORE_DEFERRING, " - + "but it is " - + failedAttempts, - failedAttempts <= ReplicationWorker.MAXNUMBER_REPLICATION_FAILURES_ALLOWED_BEFORE_DEFERRING); + "The current number of failed attempts: " + rw1CurFailedAttemptsCount + + " should be greater than or equal to previous value: " + rw1PrevFailedAttemptsCount, + rw1CurFailedAttemptsCount >= rw1PrevFailedAttemptsCount); + rw1PrevFailedAttemptsCount = rw1CurFailedAttemptsCount; - failedAttempts = rw2.replicationFailedLedgers.get(lh.getId()).get(); + int rw2CurFailedAttemptsCount = rw2.replicationFailedLedgers.get(lh.getId()).get(); assertTrue( - "The number of failed attempts should be less than " - + "ReplicationWorker.MAXNUMBER_REPLICATION_FAILURES_ALLOWED_BEFORE_DEFERRING, " - + "but it is " - + failedAttempts, - failedAttempts <= ReplicationWorker.MAXNUMBER_REPLICATION_FAILURES_ALLOWED_BEFORE_DEFERRING); + "The current number of failed attempts: " + rw2CurFailedAttemptsCount + + " should be greater than or equal to previous value: " + rw2PrevFailedAttemptsCount, + rw2CurFailedAttemptsCount >= rw2PrevFailedAttemptsCount); + rw2PrevFailedAttemptsCount = rw2CurFailedAttemptsCount; Thread.sleep(50); } @@ -565,7 +660,7 @@ public void run() { * should succeed in replicating this under replicated ledger and it * shouldn't be under replicated anymore. */ - int timeToWaitForReplicationToComplete = 2000; + int timeToWaitForReplicationToComplete = 20000; int timeWaited = 0; while (ReplicationTestUtil.isLedgerInUnderReplication(zkc, lh.getId(), basePath)) { Thread.sleep(100); @@ -574,6 +669,167 @@ public void run() { fail("Ledger should be replicated by now"); } } + + rw1PrevFailedAttemptsCount = rw1.replicationFailedLedgers.get(lh.getId()).get(); + rw2PrevFailedAttemptsCount = rw2.replicationFailedLedgers.get(lh.getId()).get(); + Thread.sleep(2000); + // now since the ledger is replicated, number of failed attempts + // counter shouldn't be increased even after sleeping for sometime. + assertEquals("rw1 failedattempts", rw1PrevFailedAttemptsCount, + rw1.replicationFailedLedgers.get(lh.getId()).get()); + assertEquals("rw2 failed attempts ", rw2PrevFailedAttemptsCount, + rw2.replicationFailedLedgers.get(lh.getId()).get()); + + /* + * Since these entries are eventually available, and replication has + * eventually succeeded, in one of the RW + * unableToReadEntriesForReplication should be 0. + */ + int rw1UnableToReadEntriesForReplication = rw1.unableToReadEntriesForReplication.get(lh.getId()).size(); + int rw2UnableToReadEntriesForReplication = rw2.unableToReadEntriesForReplication.get(lh.getId()).size(); + assertTrue( + "unableToReadEntriesForReplication in RW1: " + rw1UnableToReadEntriesForReplication + + " in RW2: " + + rw2UnableToReadEntriesForReplication, + (rw1UnableToReadEntriesForReplication == 0) + || (rw2UnableToReadEntriesForReplication == 0)); + } finally { + rw1.shutdown(); + rw2.shutdown(); + underReplicationManager.close(); + } + } + + class InjectedReplicationWorker extends ReplicationWorker { + CopyOnWriteArrayList delayReplicationPeriods; + + public InjectedReplicationWorker(ServerConfiguration conf, StatsLogger statsLogger, + CopyOnWriteArrayList delayReplicationPeriods) + throws CompatibilityException, ReplicationException.UnavailableException, + InterruptedException, IOException { + super(conf, statsLogger); + this.delayReplicationPeriods = delayReplicationPeriods; + } + + @Override + void scheduleTaskWithDelay(TimerTask timerTask, long delayPeriod) { + delayReplicationPeriods.add(delayPeriod); + super.scheduleTaskWithDelay(timerTask, delayPeriod); + } + } + + @Test + public void testDeferLedgerLockReleaseForReplicationWorker() throws Exception { + int ensembleSize = 3; + LedgerHandle lh = bkc.createLedger(ensembleSize, ensembleSize, BookKeeper.DigestType.CRC32, TESTPASSWD); + int numOfEntries = 7; + for (int i = 0; i < numOfEntries; i++) { + lh.addEntry(data); + } + lh.close(); + + BookieId[] bookiesKilled = new BookieId[ensembleSize]; + ServerConfiguration[] killedBookiesConfig = new ServerConfiguration[ensembleSize]; + + // kill all bookies + for (int i = 0; i < ensembleSize; i++) { + bookiesKilled[i] = lh.getLedgerMetadata().getAllEnsembles().get(0L).get(i); + killedBookiesConfig[i] = getBkConf(bookiesKilled[i]); + LOG.info("Killing Bookie : {}", bookiesKilled[i]); + killBookie(bookiesKilled[i]); + } + + // start new bookiesToKill number of bookies + for (int i = 0; i < ensembleSize; i++) { + startNewBookieAndReturnBookieId(); + } + + // create couple of replicationworkers + long lockReleaseOfFailedLedgerGracePeriod = 64L; + long baseBackoffForLockReleaseOfFailedLedger = lockReleaseOfFailedLedgerGracePeriod + / (int) Math.pow(2, ReplicationWorker.NUM_OF_EXPONENTIAL_BACKOFF_RETRIALS); + ServerConfiguration newRWConf = new ServerConfiguration(baseConf); + newRWConf.setLockReleaseOfFailedLedgerGracePeriod(Long.toString(lockReleaseOfFailedLedgerGracePeriod)); + newRWConf.setRereplicationEntryBatchSize(1000); + CopyOnWriteArrayList rw1DelayReplicationPeriods = new CopyOnWriteArrayList(); + CopyOnWriteArrayList rw2DelayReplicationPeriods = new CopyOnWriteArrayList(); + TestStatsProvider statsProvider = new TestStatsProvider(); + TestStatsLogger statsLogger1 = statsProvider.getStatsLogger("rw1"); + TestStatsLogger statsLogger2 = statsProvider.getStatsLogger("rw2"); + ReplicationWorker rw1 = new InjectedReplicationWorker(newRWConf, statsLogger1, rw1DelayReplicationPeriods); + ReplicationWorker rw2 = new InjectedReplicationWorker(newRWConf, statsLogger2, rw2DelayReplicationPeriods); + + Counter numEntriesUnableToReadForReplication1 = statsLogger1 + .getCounter(NUM_ENTRIES_UNABLE_TO_READ_FOR_REPLICATION); + Counter numEntriesUnableToReadForReplication2 = statsLogger2 + .getCounter(NUM_ENTRIES_UNABLE_TO_READ_FOR_REPLICATION); + @Cleanup + MetadataClientDriver clientDriver = MetadataDrivers + .getClientDriver(URI.create(baseClientConf.getMetadataServiceUri())); + clientDriver.initialize(baseClientConf, scheduler, NullStatsLogger.INSTANCE, Optional.empty()); + + LedgerManagerFactory mFactory = clientDriver.getLedgerManagerFactory(); + + LedgerUnderreplicationManager underReplicationManager = mFactory.newLedgerUnderreplicationManager(); + try { + // mark ledger underreplicated + for (int i = 0; i < bookiesKilled.length; i++) { + underReplicationManager.markLedgerUnderreplicated(lh.getId(), bookiesKilled[i].toString()); + } + while (!ReplicationTestUtil.isLedgerInUnderReplication(zkc, lh.getId(), basePath)) { + Thread.sleep(100); + } + rw1.start(); + rw2.start(); + + // wait for RWs to complete 'numOfAttemptsToWaitFor' failed attempts + int numOfAttemptsToWaitFor = 10; + while ((rw1.replicationFailedLedgers.get(lh.getId()).get() < numOfAttemptsToWaitFor) + || rw2.replicationFailedLedgers.get(lh.getId()).get() < numOfAttemptsToWaitFor) { + Thread.sleep(500); + } + + /* + * since all the bookies containing the ledger entries are down + * replication wouldn't have succeeded. + */ + assertTrue("Ledger: " + lh.getId() + " should be underreplicated", + ReplicationTestUtil.isLedgerInUnderReplication(zkc, lh.getId(), basePath)); + + /* + * since RW failed 'numOfAttemptsToWaitFor' number of times, we + * should have atleast (numOfAttemptsToWaitFor - 1) + * delayReplicationPeriods and their value should be + * (lockReleaseOfFailedLedgerGracePeriod/16) , 2 * previous value,.. + * with max : lockReleaseOfFailedLedgerGracePeriod + */ + for (int i = 0; i < ((numOfAttemptsToWaitFor - 1)); i++) { + long expectedDelayValue = Math.min(lockReleaseOfFailedLedgerGracePeriod, + baseBackoffForLockReleaseOfFailedLedger * (1 << i)); + assertEquals("RW1 delayperiod", (Long) expectedDelayValue, rw1DelayReplicationPeriods.get(i)); + assertEquals("RW2 delayperiod", (Long) expectedDelayValue, rw2DelayReplicationPeriods.get(i)); + } + + /* + * RW wont try to replicate until and unless RW succeed in reading + * those failed entries before proceeding with replication of under + * replicated fragment, so the numEntriesUnableToReadForReplication + * should be just 'numOfEntries', though RW failed to replicate + * multiple times. + */ + assertEquals("numEntriesUnableToReadForReplication for RW1", Long.valueOf((long) numOfEntries), + numEntriesUnableToReadForReplication1.get()); + assertEquals("numEntriesUnableToReadForReplication for RW2", Long.valueOf((long) numOfEntries), + numEntriesUnableToReadForReplication2.get()); + + /* + * Since these entries are unavailable, + * unableToReadEntriesForReplication should be of size numOfEntries. + */ + assertEquals("RW1 unabletoreadentries", numOfEntries, + rw1.unableToReadEntriesForReplication.get(lh.getId()).size()); + assertEquals("RW2 unabletoreadentries", numOfEntries, + rw2.unableToReadEntriesForReplication.get(lh.getId()).size()); } finally { rw1.shutdown(); rw2.shutdown(); @@ -595,13 +851,12 @@ public void testRWShouldReplicateTheLedgersAfterTimeoutIfLastFragmentIsNotUR() for (int i = 0; i < 10; i++) { lh.addEntry(data); } - BookieSocketAddress replicaToKill = LedgerHandleAdapter - .getLedgerMetadata(lh).getEnsembles().get(0L).get(0); + BookieId replicaToKill = lh.getLedgerMetadata().getAllEnsembles().get(0L).get(0); - LOG.info("Killing Bookie", replicaToKill); + LOG.info("Killing Bookie : {}", replicaToKill); killBookie(replicaToKill); - BookieSocketAddress newBkAddr = startNewBookieAndReturnAddress(); + BookieId newBkAddr = startNewBookieAndReturnBookieId(); LOG.info("New Bookie addr : {}", newBkAddr); // Reform ensemble...Making sure that last fragment is not in @@ -610,7 +865,7 @@ public void testRWShouldReplicateTheLedgersAfterTimeoutIfLastFragmentIsNotUR() lh.addEntry(data); } - ReplicationWorker rw = new ReplicationWorker(zkc, baseConf); + ReplicationWorker rw = new ReplicationWorker(baseConf); baseClientConf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); @@ -660,7 +915,7 @@ public void testRWZKConnectionLost() throws Exception { .sessionTimeoutMs(10000) .build()) { - ReplicationWorker rw = new ReplicationWorker(zk, baseConf); + ReplicationWorker rw = new ReplicationWorker(baseConf); rw.start(); for (int i = 0; i < 10; i++) { if (rw.isRunning()) { @@ -671,27 +926,72 @@ public void testRWZKConnectionLost() throws Exception { assertTrue("Replication worker should be running", rw.isRunning()); stopZKCluster(); - // Wait for disconnection to be picked up + // ZK is down for shorter period than reconnect timeout + Thread.sleep(1000); + startZKCluster(); + + assertTrue("Replication worker should not shutdown", rw.isRunning()); + } + } + + /** + * Test that the replication worker shuts down on non-recoverable ZK connection loss. + */ + @Test + public void testRWZKConnectionLostOnNonRecoverableZkError() throws Exception { + for (int j = 0; j < 3; j++) { + LedgerHandle lh = bkc.createLedger(1, 1, 1, + BookKeeper.DigestType.CRC32, TESTPASSWD, + null); + final long createdLedgerId = lh.getId(); for (int i = 0; i < 10; i++) { - if (!zk.getState().isConnected()) { - break; - } - Thread.sleep(1000); + lh.addEntry(data); } - assertFalse(zk.getState().isConnected()); - startZKCluster(); + lh.close(); + } + + killBookie(2); + killBookie(1); + startNewBookie(); + startNewBookie(); + + servers.get(0).getConfiguration().setRwRereplicateBackoffMs(100); + servers.get(0).startAutoRecovery(); - assertTrue("Replication worker should still be running", rw.isRunning()); + Auditor auditor = getAuditor(10, TimeUnit.SECONDS); + ReplicationWorker rw = servers.get(0).getReplicationWorker(); + + ZkLedgerUnderreplicationManager ledgerUnderreplicationManager = + (ZkLedgerUnderreplicationManager) FieldUtils.readField(auditor, + "ledgerUnderreplicationManager", true); + + ZooKeeper zkc = (ZooKeeper) FieldUtils.readField(ledgerUnderreplicationManager, "zkc", true); + auditor.submitAuditTask().get(); + + assertTrue(zkc.getState().isConnected()); + zkc.close(); + assertFalse(zkc.getState().isConnected()); + + auditor.submitAuditTask(); + rw.run(); + + for (int i = 0; i < 10; i++) { + if (!rw.isRunning() && !auditor.isRunning()) { + break; + } + Thread.sleep(1000); } + assertFalse("Replication worker should NOT be running", rw.isRunning()); + assertFalse("Auditor should NOT be running", auditor.isRunning()); } - private void killAllBookies(LedgerHandle lh, BookieSocketAddress excludeBK) + private void killAllBookies(LedgerHandle lh, BookieId excludeBK) throws Exception { // Killing all bookies except newly replicated bookie - for (Entry> entry : - lh.getLedgerMetadata().getEnsembles().entrySet()) { - List bookies = entry.getValue(); - for (BookieSocketAddress bookie : bookies) { + for (Entry> entry : + lh.getLedgerMetadata().getAllEnsembles().entrySet()) { + List bookies = entry.getValue(); + for (BookieId bookie : bookies) { if (bookie.equals(excludeBK)) { continue; } @@ -713,4 +1013,480 @@ private void verifyRecoveredLedgers(LedgerHandle lh, long startEntryId, } } + class MockZooKeeperClient extends ZooKeeperClient { + private final String connectString; + private final int sessionTimeoutMs; + private final ZooKeeperWatcherBase watcherManager; + private volatile String pathOfSetDataToFail; + private volatile String pathOfDeleteToFail; + private AtomicInteger numOfTimesSetDataFailed = new AtomicInteger(); + private AtomicInteger numOfTimesDeleteFailed = new AtomicInteger(); + + MockZooKeeperClient(String connectString, int sessionTimeoutMs, ZooKeeperWatcherBase watcher) + throws IOException { + /* + * in OperationalRetryPolicy maxRetries is set to 0. So it wont + * retry incase of any error/exception. + */ + super(connectString, sessionTimeoutMs, watcher, + new BoundExponentialBackoffRetryPolicy(sessionTimeoutMs, sessionTimeoutMs, Integer.MAX_VALUE), + new BoundExponentialBackoffRetryPolicy(sessionTimeoutMs, sessionTimeoutMs, 0), + NullStatsLogger.INSTANCE, 1, 0, false); + this.connectString = connectString; + this.sessionTimeoutMs = sessionTimeoutMs; + this.watcherManager = watcher; + } + + @Override + protected ZooKeeper createZooKeeper() throws IOException { + return new MockZooKeeper(this.connectString, this.sessionTimeoutMs, this.watcherManager, false); + } + + private void setPathOfSetDataToFail(String pathOfSetDataToFail) { + this.pathOfSetDataToFail = pathOfSetDataToFail; + } + + private void setPathOfDeleteToFail(String pathOfDeleteToFail) { + this.pathOfDeleteToFail = pathOfDeleteToFail; + } + + private int getNumOfTimesSetDataFailed() { + return numOfTimesSetDataFailed.get(); + } + + private int getNumOfTimesDeleteFailed() { + return numOfTimesDeleteFailed.get(); + } + + class MockZooKeeper extends ZooKeeper { + public MockZooKeeper(String connectString, int sessionTimeout, Watcher watcher, boolean canBeReadOnly) + throws IOException { + super(connectString, sessionTimeout, watcher, canBeReadOnly); + } + + @Override + public void setData(final String path, final byte[] data, final int version, final StatCallback cb, + final Object context) { + if ((pathOfSetDataToFail != null) && (pathOfSetDataToFail.equals(path))) { + /* + * if pathOfSetDataToFail matches with the path of the node, + * then callback with CONNECTIONLOSS error. + */ + LOG.error("setData of MockZooKeeper, is failing with CONNECTIONLOSS for path: {}", path); + numOfTimesSetDataFailed.incrementAndGet(); + cb.processResult(KeeperException.Code.CONNECTIONLOSS.intValue(), path, context, null); + } else { + super.setData(path, data, version, cb, context); + } + } + + @Override + public void delete(final String path, final int version) throws KeeperException, InterruptedException { + if ((pathOfDeleteToFail != null) && (pathOfDeleteToFail.equals(path))) { + /* + * if pathOfDeleteToFail matches with the path of the node, + * then throw CONNECTIONLOSS exception. + */ + LOG.error("delete of MockZooKeeper, is failing with CONNECTIONLOSS for path: {}", path); + numOfTimesDeleteFailed.incrementAndGet(); + throw new KeeperException.ConnectionLossException(); + } else { + super.delete(path, version); + } + } + } + } + + @Test + public void testRWShutDownInTheCaseOfZKOperationFailures() throws Exception { + /* + * create MockZooKeeperClient instance and wait for it to be connected. + */ + int zkSessionTimeOut = 10000; + ZooKeeperWatcherBase zooKeeperWatcherBase = new ZooKeeperWatcherBase(zkSessionTimeOut, false, + NullStatsLogger.INSTANCE); + MockZooKeeperClient zkFaultInjectionWrapper = new MockZooKeeperClient(zkUtil.getZooKeeperConnectString(), + zkSessionTimeOut, zooKeeperWatcherBase); + zkFaultInjectionWrapper.waitForConnection(); + assertEquals("zkFaultInjectionWrapper should be in connected state", States.CONNECTED, + zkFaultInjectionWrapper.getState()); + long oldZkInstanceSessionId = zkFaultInjectionWrapper.getSessionId(); + + /* + * create ledger and add entries. + */ + BookKeeper bkWithMockZK = new BookKeeper(baseClientConf, zkFaultInjectionWrapper); + long ledgerId = 567L; + LedgerHandle lh = bkWithMockZK.createLedgerAdv(ledgerId, 2, 2, 2, + BookKeeper.DigestType.CRC32, TESTPASSWD, + null); + for (int i = 0; i < 10; i++) { + lh.addEntry(i, data); + } + lh.close(); + + /* + * trigger Expired event so that MockZooKeeperClient would run + * 'clientCreator' and create new zk handle. In this case it would + * create MockZooKeeper instance. + */ + zooKeeperWatcherBase.process(new WatchedEvent(EventType.None, KeeperState.Expired, "")); + zkFaultInjectionWrapper.waitForConnection(); + for (int i = 0; i < 10; i++) { + if (zkFaultInjectionWrapper.getState() == States.CONNECTED) { + break; + } + Thread.sleep(200); + } + assertEquals("zkFaultInjectionWrapper should be in connected state", States.CONNECTED, + zkFaultInjectionWrapper.getState()); + assertNotEquals("Session Id of old and new ZK instance should be different", oldZkInstanceSessionId, + zkFaultInjectionWrapper.getSessionId()); + + /* + * Kill a Bookie, so that ledger becomes underreplicated. Since totally + * 3 bookies are available and the ensemblesize of the current ledger is + * 2, we should be able to replicate to the other bookie. + */ + BookieId replicaToKill = lh.getLedgerMetadata().getAllEnsembles().get(0L).get(0); + LOG.info("Killing Bookie id {}", replicaToKill); + killBookie(replicaToKill); + + /* + * Start RW. + */ + ReplicationWorker rw = new ReplicationWorker(baseConf, bkWithMockZK, false, NullStatsLogger.INSTANCE); + rw.start(); + try { + for (int i = 0; i < 40; i++) { + if (rw.isRunning()) { + break; + } + LOG.info("Waiting for the RW to start..."); + Thread.sleep(500); + } + assertTrue("RW should be running", rw.isRunning()); + + /* + * Since Auditor is not running, ledger needs to be marked + * underreplicated explicitly. But before marking ledger + * underreplicated, set paths for which MockZooKeeper's setData and + * Delete operation to fail. + * + * ZK.setData will be called by 'updateEnsembleInfo' operation after + * completion of copying to a new bookie. ZK.delete will be called by + * RW.logBKExceptionAndReleaseLedger and finally block in + * 'rereplicate(long ledgerIdToReplicate)' + */ + AbstractZkLedgerManager absZKLedgerManager = (AbstractZkLedgerManager) ledgerManager; + String ledgerPath = absZKLedgerManager.getLedgerPath(ledgerId); + String urLockPath = ZkLedgerUnderreplicationManager + .getUrLedgerLockZnode(ZkLedgerUnderreplicationManager.getUrLockPath(zkLedgersRootPath), ledgerId); + zkFaultInjectionWrapper.setPathOfSetDataToFail(ledgerPath); + zkFaultInjectionWrapper.setPathOfDeleteToFail(urLockPath); + underReplicationManager.markLedgerUnderreplicated(lh.getId(), replicaToKill.toString()); + + /* + * Since there is only one RW, it will try to replicate underreplicated + * ledger. After completion of copying it to a new bookie, it will try + * to update ensembleinfo. Which would fail with our MockZK. After that + * it would try to delete lock znode as part of + * RW.logBKExceptionAndReleaseLedger, which will also fail because of + * our MockZK. In the finally block in 'rereplicate(long + * ledgerIdToReplicate)' it would try one more time to delete the ledger + * and once again it will fail because of our MockZK. So RW gives up and + * shutdowns itself. + */ + for (int i = 0; i < 40; i++) { + if (!rw.isRunning()) { + break; + } + LOG.info("Waiting for the RW to shutdown..."); + Thread.sleep(500); + } + + /* + * as described earlier, numOfTimes setDataFailed should be 1 and + * numOfTimes deleteFailed should be 2 + */ + assertEquals("NumOfTimesSetDataFailed", 1, + zkFaultInjectionWrapper.getNumOfTimesSetDataFailed()); + assertEquals("NumOfTimesDeleteFailed", 2, + zkFaultInjectionWrapper.getNumOfTimesDeleteFailed()); + assertFalse("RW should be shutdown", rw.isRunning()); + } finally { + rw.shutdown(); + zkFaultInjectionWrapper.close(); + bkWithMockZK.close(); + } + } + + @Test + public void testReplicateEmptyOpenStateLedger() throws Exception { + LedgerHandle lh = bkc.createLedger(3, 3, 2, BookKeeper.DigestType.CRC32, TESTPASSWD); + assertFalse(lh.getLedgerMetadata().isClosed()); + + List firstEnsemble = lh.getLedgerMetadata().getAllEnsembles().firstEntry().getValue(); + List ensemble = lh.getLedgerMetadata().getAllEnsembles().entrySet().iterator().next().getValue(); + killBookie(ensemble.get(1)); + + startNewBookie(); + baseConf.setOpenLedgerRereplicationGracePeriod(String.valueOf(30)); + ReplicationWorker replicationWorker = new ReplicationWorker(baseConf); + replicationWorker.start(); + + try { + underReplicationManager.markLedgerUnderreplicated(lh.getId(), ensemble.get(1).toString()); + Awaitility.waitAtMost(60, TimeUnit.SECONDS).untilAsserted(() -> + assertFalse(ReplicationTestUtil.isLedgerInUnderReplication(zkc, lh.getId(), basePath)) + ); + + LedgerHandle lh1 = bkc.openLedgerNoRecovery(lh.getId(), BookKeeper.DigestType.CRC32, TESTPASSWD); + assertTrue(lh1.getLedgerMetadata().isClosed()); + } finally { + replicationWorker.shutdown(); + } + } + + @Test + public void testRepairedNotAdheringPlacementPolicyLedgerFragmentsOnRack() throws Exception { + testRepairedNotAdheringPlacementPolicyLedgerFragments(RackawareEnsemblePlacementPolicy.class, null); + } + + @Test + public void testReplicationStats() throws Exception { + BiConsumer checkReplicationStats = (first, rw) -> { + try { + final Method rereplicate = rw.getClass().getDeclaredMethod("rereplicate"); + rereplicate.setAccessible(true); + final Object result = rereplicate.invoke(rw); + final Field statsLoggerField = rw.getClass().getDeclaredField("statsLogger"); + statsLoggerField.setAccessible(true); + final TestStatsLogger statsLogger = (TestStatsLogger) statsLoggerField.get(rw); + + final Counter numDeferLedgerLockReleaseOfFailedLedgerCounter = + statsLogger.getCounter(ReplicationStats.NUM_DEFER_LEDGER_LOCK_RELEASE_OF_FAILED_LEDGER); + final Counter numLedgersReplicatedCounter = + statsLogger.getCounter(ReplicationStats.NUM_FULL_OR_PARTIAL_LEDGERS_REPLICATED); + final Counter numNotAdheringPlacementLedgersCounter = statsLogger + .getCounter(ReplicationStats.NUM_NOT_ADHERING_PLACEMENT_LEDGERS_REPLICATED); + + assertEquals("NUM_DEFER_LEDGER_LOCK_RELEASE_OF_FAILED_LEDGER", + 1, numDeferLedgerLockReleaseOfFailedLedgerCounter.get().longValue()); + + if (first) { + assertFalse((boolean) result); + assertEquals("NUM_FULL_OR_PARTIAL_LEDGERS_REPLICATED", + 0, numLedgersReplicatedCounter.get().longValue()); + assertEquals("NUM_NOT_ADHERING_PLACEMENT_LEDGERS_REPLICATED", + 0, numNotAdheringPlacementLedgersCounter.get().longValue()); + + } else { + assertTrue((boolean) result); + assertEquals("NUM_FULL_OR_PARTIAL_LEDGERS_REPLICATED", + 1, numLedgersReplicatedCounter.get().longValue()); + assertEquals("NUM_NOT_ADHERING_PLACEMENT_LEDGERS_REPLICATED", + 1, numNotAdheringPlacementLedgersCounter.get().longValue()); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + }; + testRepairedNotAdheringPlacementPolicyLedgerFragments( + RackawareEnsemblePlacementPolicy.class, checkReplicationStats); + } + + private void testRepairedNotAdheringPlacementPolicyLedgerFragments( + Class placementPolicyClass, + BiConsumer checkReplicationStats) throws Exception { + List firstThreeBookies = servers.stream().map(ele -> { + try { + return ele.getServer().getBookieId(); + } catch (UnknownHostException e) { + return null; + } + }).filter(Objects::nonNull).collect(Collectors.toList()); + + baseClientConf.setProperty("reppDnsResolverClass", StaticDNSResolver.class.getName()); + baseClientConf.setProperty("enforceStrictZoneawarePlacement", false); + bkc.close(); + bkc = new BookKeeperTestClient(baseClientConf) { + @Override + protected EnsemblePlacementPolicy initializeEnsemblePlacementPolicy(ClientConfiguration conf, + DNSToSwitchMapping dnsResolver, HashedWheelTimer timer, FeatureProvider featureProvider, + StatsLogger statsLogger, BookieAddressResolver bookieAddressResolver) throws IOException { + EnsemblePlacementPolicy ensemblePlacementPolicy = null; + if (ZoneawareEnsemblePlacementPolicy.class == placementPolicyClass) { + ensemblePlacementPolicy = buildZoneAwareEnsemblePlacementPolicy(firstThreeBookies); + } else if (RackawareEnsemblePlacementPolicy.class == placementPolicyClass) { + ensemblePlacementPolicy = buildRackAwareEnsemblePlacementPolicy(firstThreeBookies); + } + ensemblePlacementPolicy.initialize(conf, Optional.ofNullable(dnsResolver), timer, + featureProvider, statsLogger, bookieAddressResolver); + return ensemblePlacementPolicy; + } + }; + + //This ledger not adhering placement policy, the combine(0,1,2) rack is 1. + LedgerHandle lh = bkc.createLedger(3, 3, 3, BookKeeper.DigestType.CRC32, TESTPASSWD); + + int entrySize = 10; + for (int i = 0; i < entrySize; i++) { + lh.addEntry(data); + } + lh.close(); + + int minNumRacksPerWriteQuorumConfValue = 2; + + ServerConfiguration servConf = new ServerConfiguration(confByIndex(0)); + servConf.setMinNumRacksPerWriteQuorum(minNumRacksPerWriteQuorumConfValue); + servConf.setProperty("reppDnsResolverClass", StaticDNSResolver.class.getName()); + servConf.setAuditorPeriodicPlacementPolicyCheckInterval(1000); + servConf.setRepairedPlacementPolicyNotAdheringBookieEnable(true); + + MutableObject auditorRef = new MutableObject(); + try { + TestStatsLogger statsLogger = startAuditorAndWaitForPlacementPolicyCheck(servConf, auditorRef); + Gauge ledgersNotAdheringToPlacementPolicyGuage = statsLogger + .getGauge(ReplicationStats.NUM_LEDGERS_NOT_ADHERING_TO_PLACEMENT_POLICY); + assertEquals("NUM_LEDGERS_NOT_ADHERING_TO_PLACEMENT_POLICY guage value", + 1, ledgersNotAdheringToPlacementPolicyGuage.getSample()); + Gauge ledgersSoftlyAdheringToPlacementPolicyGuage = statsLogger + .getGauge(ReplicationStats.NUM_LEDGERS_SOFTLY_ADHERING_TO_PLACEMENT_POLICY); + assertEquals("NUM_LEDGERS_SOFTLY_ADHERING_TO_PLACEMENT_POLICY guage value", + 0, ledgersSoftlyAdheringToPlacementPolicyGuage.getSample()); + } finally { + Auditor auditor = auditorRef.getValue(); + if (auditor != null) { + auditor.close(); + } + } + + Stat stat = bkc.getZkHandle() + .exists("/ledgers/underreplication/ledgers/0000/0000/0000/0000/urL0000000000", false); + assertNotNull(stat); + + baseConf.setRepairedPlacementPolicyNotAdheringBookieEnable(true); + BookKeeper bookKeeper = new BookKeeperTestClient(baseClientConf) { + @Override + protected EnsemblePlacementPolicy initializeEnsemblePlacementPolicy(ClientConfiguration conf, + DNSToSwitchMapping dnsResolver, HashedWheelTimer timer, FeatureProvider featureProvider, + StatsLogger statsLogger, BookieAddressResolver bookieAddressResolver) throws IOException { + EnsemblePlacementPolicy ensemblePlacementPolicy = null; + if (ZoneawareEnsemblePlacementPolicy.class == placementPolicyClass) { + ensemblePlacementPolicy = buildZoneAwareEnsemblePlacementPolicy(firstThreeBookies); + } else if (RackawareEnsemblePlacementPolicy.class == placementPolicyClass) { + ensemblePlacementPolicy = buildRackAwareEnsemblePlacementPolicy(firstThreeBookies); + } + ensemblePlacementPolicy.initialize(conf, Optional.ofNullable(dnsResolver), timer, + featureProvider, statsLogger, bookieAddressResolver); + return ensemblePlacementPolicy; + } + }; + TestStatsProvider statsProvider = new TestStatsProvider(); + TestStatsLogger statsLogger = statsProvider.getStatsLogger(REPLICATION_SCOPE); + ReplicationWorker rw = new ReplicationWorker(baseConf, bookKeeper, false, statsLogger); + + if (checkReplicationStats != null) { + checkReplicationStats.accept(true, rw); + } else { + rw.start(); + } + + //start new bookie, the rack is /rack2 + BookieId newBookieId = startNewBookieAndReturnBookieId(); + + if (checkReplicationStats != null) { + checkReplicationStats.accept(false, rw); + } + + Awaitility.await().untilAsserted(() -> { + LedgerMetadata metadata = bkc.getLedgerManager().readLedgerMetadata(lh.getId()).get().getValue(); + List newBookies = metadata.getAllEnsembles().get(0L); + assertTrue(newBookies.contains(newBookieId)); + }); + + Awaitility.await().untilAsserted(() -> { + Stat stat1 = bkc.getZkHandle() + .exists("/ledgers/underreplication/ledgers/0000/0000/0000/0000/urL0000000000", false); + assertNull(stat1); + }); + + for (BookieId rack1Book : firstThreeBookies) { + killBookie(rack1Book); + } + + verifyRecoveredLedgers(lh, 0, entrySize - 1); + + if (checkReplicationStats == null) { + rw.shutdown(); + } + baseConf.setRepairedPlacementPolicyNotAdheringBookieEnable(false); + bookKeeper.close(); + } + + private EnsemblePlacementPolicy buildRackAwareEnsemblePlacementPolicy(List bookieIds) { + return new RackawareEnsemblePlacementPolicy() { + @Override + public String resolveNetworkLocation(BookieId addr) { + if (bookieIds.contains(addr)) { + return "/rack1"; + } + //The other bookie is /rack2 + return "/rack2"; + } + }; + } + + private EnsemblePlacementPolicy buildZoneAwareEnsemblePlacementPolicy(List firstThreeBookies) { + return new ZoneawareEnsemblePlacementPolicy() { + @Override + protected String resolveNetworkLocation(BookieId addr) { + //The first three bookie 1 is /zone1/ud1 + //The first three bookie 2,3 is /zone1/ud2 + if (firstThreeBookies.get(0).equals(addr)) { + return "/zone1/ud1"; + } else if (firstThreeBookies.contains(addr)) { + return "/zone1/ud2"; + } + //The other bookie is /zone2/ud1 + return "/zone2/ud1"; + } + }; + } + + private TestStatsLogger startAuditorAndWaitForPlacementPolicyCheck(ServerConfiguration servConf, + MutableObject auditorRef) throws MetadataException, CompatibilityException, KeeperException, + InterruptedException, ReplicationException.UnavailableException, UnknownHostException { + LedgerManagerFactory mFactory = driver.getLedgerManagerFactory(); + LedgerUnderreplicationManager urm = mFactory.newLedgerUnderreplicationManager(); + TestStatsProvider statsProvider = new TestStatsProvider(); + TestStatsLogger statsLogger = statsProvider.getStatsLogger(AUDITOR_SCOPE); + TestStatsProvider.TestOpStatsLogger placementPolicyCheckStatsLogger = + (TestStatsProvider.TestOpStatsLogger) statsLogger + .getOpStatsLogger(ReplicationStats.PLACEMENT_POLICY_CHECK_TIME); + + final AuditorPeriodicCheckTest.TestAuditor auditor = new AuditorPeriodicCheckTest.TestAuditor( + BookieImpl.getBookieId(servConf).toString(), servConf, bkc, false, statsLogger, null); + auditorRef.setValue(auditor); + CountDownLatch latch = auditor.getLatch(); + assertEquals("PLACEMENT_POLICY_CHECK_TIME SuccessCount", 0, + placementPolicyCheckStatsLogger.getSuccessCount()); + urm.setPlacementPolicyCheckCTime(-1); + auditor.start(); + /* + * since placementPolicyCheckCTime is set to -1, placementPolicyCheck should be + * scheduled to run with no initialdelay + */ + assertTrue("placementPolicyCheck should have executed", latch.await(20, TimeUnit.SECONDS)); + for (int i = 0; i < 20; i++) { + Thread.sleep(100); + if (placementPolicyCheckStatsLogger.getSuccessCount() >= 1) { + break; + } + } + assertEquals("PLACEMENT_POLICY_CHECK_TIME SuccessCount", 1, + placementPolicyCheckStatsLogger.getSuccessCount()); + return statsLogger; + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/sasl/GSSAPIBookKeeperTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/sasl/GSSAPIBookKeeperTest.java index db39ae734c5..55485486f68 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/sasl/GSSAPIBookKeeperTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/sasl/GSSAPIBookKeeperTest.java @@ -33,8 +33,7 @@ import java.util.Properties; import java.util.concurrent.atomic.AtomicLong; import javax.security.auth.login.Configuration; - -import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.BookieImpl; import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.client.BKException.BKUnauthorizedAccessException; import org.apache.bookkeeper.client.BookKeeper; @@ -43,14 +42,13 @@ import org.apache.bookkeeper.client.LedgerHandle; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.conf.TestBKConfiguration; import org.apache.bookkeeper.proto.BookieServer; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; -import org.apache.hadoop.minikdc.MiniKdc; import org.apache.zookeeper.KeeperException; -import org.junit.After; import org.junit.AfterClass; -import org.junit.Before; -import org.junit.Rule; +import org.junit.BeforeClass; +import org.junit.ClassRule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.slf4j.Logger; @@ -66,28 +64,31 @@ public class GSSAPIBookKeeperTest extends BookKeeperClusterTestCase { private static final byte[] PASSWD = "testPasswd".getBytes(); private static final byte[] ENTRY = "TestEntry".getBytes(); - private MiniKdc kdc; - private Properties conf; + private static MiniKdc kdc; + private static Properties conf; + + private static final String non_default_sasl_service_name = "non_default_servicename"; - @Rule - public TemporaryFolder kdcDir = new TemporaryFolder(); + @ClassRule + public static TemporaryFolder kdcDir = new TemporaryFolder(); - @Rule - public TemporaryFolder kerberosWorkDir = new TemporaryFolder(); + @ClassRule + public static TemporaryFolder kerberosWorkDir = new TemporaryFolder(); - @Before - public void startMiniKdc() throws Exception { + @BeforeClass + public static void startMiniKdc() throws Exception { conf = MiniKdc.createConf(); kdc = new MiniKdc(conf, kdcDir.getRoot()); kdc.start(); - ServerConfiguration bookieConf = newServerConfiguration(); + // this is just to calculate "localhostName" the same way the bookie does + ServerConfiguration bookieConf = TestBKConfiguration.newServerConfiguration(); bookieConf.setUseHostNameAsBookieID(true); - String localhostName = Bookie.getBookieAddress(bookieConf).getHostName(); + String localhostName = BookieImpl.getBookieAddress(bookieConf).getHostName(); - String principalServerNoRealm = "bookkeeper/" + localhostName; - String principalServer = "bookkeeper/" + localhostName + "@" + kdc.getRealm(); + String principalServerNoRealm = non_default_sasl_service_name + "/" + localhostName; + String principalServer = non_default_sasl_service_name + "/" + localhostName + "@" + kdc.getRealm(); LOG.info("principalServer: " + principalServer); String principalClientNoRealm = "bookkeeperclient/" + localhostName; String principalClient = principalClientNoRealm + "@" + kdc.getRealm(); @@ -127,16 +128,17 @@ public void startMiniKdc() throws Exception { File krb5file = new File(kerberosWorkDir.getRoot(), "krb5.conf"); try (FileWriter writer = new FileWriter(krb5file)) { - writer.write("[libdefaults]\n" + String conf = "[libdefaults]\n" + " default_realm = " + kdc.getRealm() + "\n" + + " udp_preference_limit = 1\n" // force use TCP + "\n" + "\n" + "[realms]\n" + " " + kdc.getRealm() + " = {\n" + " kdc = " + kdc.getHost() + ":" + kdc.getPort() + "\n" - + " }" - ); - + + " }"; + writer.write(conf); + LOG.info("krb5.conf:\n" + conf); } System.setProperty("java.security.auth.login.config", jaasFile.getAbsolutePath()); @@ -145,8 +147,8 @@ public void startMiniKdc() throws Exception { } - @After - public void stopMiniKdc() { + @AfterClass + public static void stopMiniKdc() { System.clearProperty("java.security.auth.login.config"); System.clearProperty("java.security.krb5.conf"); if (kdc != null) { @@ -177,15 +179,15 @@ private void connectAndWriteToBookie(ClientConfiguration conf, AtomicLong ledger private int entryCount(long ledgerId, ClientConfiguration clientConf) throws Exception { LOG.info("Counting entries in {}", ledgerId); - for (ServerConfiguration conf : bsConfs) { - conf.setUseHostNameAsBookieID(true); - conf.setBookieAuthProviderFactoryClass( - SASLBookieAuthProviderFactory.class.getName()); - } clientConf.setClientAuthProviderFactoryClass( SASLClientProviderFactory.class.getName()); - restartBookies(); + restartBookies(c -> { + c.setUseHostNameAsBookieID(true); + c.setBookieAuthProviderFactoryClass( + SASLBookieAuthProviderFactory.class.getName()); + return c; + }); try (BookKeeper bkc = new BookKeeper(clientConf, zkc); LedgerHandle lh = bkc.openLedger(ledgerId, DigestType.CRC32, @@ -251,10 +253,8 @@ public void testNotAllowedClientId() throws Exception { } BookieServer startAndStoreBookie(ServerConfiguration conf) throws Exception { - bsConfs.add(conf); - BookieServer s = startBookie(conf); - bs.add(s); - return s; + System.setProperty(SaslConstants.SASL_SERVICE_NAME, non_default_sasl_service_name); + return startAndAddBookie(conf).getServer(); } @AfterClass diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/sasl/MD5DigestBookKeeperTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/sasl/MD5DigestBookKeeperTest.java index 93719061adf..a12b532a489 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/sasl/MD5DigestBookKeeperTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/sasl/MD5DigestBookKeeperTest.java @@ -30,7 +30,6 @@ import java.util.Enumeration; import java.util.concurrent.atomic.AtomicLong; import javax.security.auth.login.Configuration; - import org.apache.bookkeeper.client.BookKeeper; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.client.LedgerEntry; @@ -82,15 +81,15 @@ private void connectAndWriteToBookie(ClientConfiguration conf, AtomicLong ledger private int entryCount(long ledgerId, ServerConfiguration bookieConf, ClientConfiguration clientConf) throws Exception { LOG.info("Counting entries in {}", ledgerId); - for (ServerConfiguration conf : bsConfs) { - bookieConf.setBookieAuthProviderFactoryClass( - SASLBookieAuthProviderFactory.class.getName()); - bookieConf.setProperty(JAAS_CLIENT_ALLOWED_IDS, ".*hd.*"); - } clientConf.setClientAuthProviderFactoryClass( SASLClientProviderFactory.class.getName()); - restartBookies(); + restartBookies(c -> { + c.setBookieAuthProviderFactoryClass( + SASLBookieAuthProviderFactory.class.getName()); + c.setProperty(JAAS_CLIENT_ALLOWED_IDS, ".*hd.*"); + return c; + }); try (BookKeeper bkc = new BookKeeper(clientConf, zkc); LedgerHandle lh = bkc.openLedger(ledgerId, DigestType.CRC32, @@ -134,10 +133,7 @@ public void testSingleMessageAuth() throws Exception { } BookieServer startAndStoreBookie(ServerConfiguration conf) throws Exception { - bsConfs.add(conf); - BookieServer s = startBookie(conf); - bs.add(s); - return s; + return startAndAddBookie(conf).getServer(); } @AfterClass diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/sasl/MiniKdc.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/sasl/MiniKdc.java new file mode 100644 index 00000000000..42b0fd85c1e --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/sasl/MiniKdc.java @@ -0,0 +1,349 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.sasl; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.HashSet; +import java.util.Locale; +import java.util.Map; +import java.util.Properties; +import java.util.Set; +import org.apache.kerby.kerberos.kerb.KrbException; +import org.apache.kerby.kerberos.kerb.server.KdcConfigKey; +import org.apache.kerby.kerberos.kerb.server.SimpleKdcServer; +import org.apache.kerby.util.IOUtil; +import org.apache.kerby.util.NetworkUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Mini KDC based on Apache Directory Server that can be embedded in testcases + * or used from command line as a standalone KDC. + * + *

          From within testcases: + * + *

          MiniKdc sets one System property when started and un-set when stopped: + *

            + *
          • sun.security.krb5.debug: set to the debug value provided in the + * configuration
          • + *
          + * Because of this, multiple MiniKdc instances cannot be started in parallel. + * For example, running testcases in parallel that start a KDC each. To + * accomplish this a single MiniKdc should be used for all testcases running + * in parallel. + * + *

          MiniKdc default configuration values are: + *

            + *
          • org.name=EXAMPLE (used to create the REALM)
          • + *
          • org.domain=COM (used to create the REALM)
          • + *
          • kdc.bind.address=localhost
          • + *
          • kdc.port=0 (ephemeral port)
          • + *
          • instance=DefaultKrbServer
          • + *
          • max.ticket.lifetime=86400000 (1 day)
          • + *
          • max.renewable.lifetime=604800000 (7 days)
          • + *
          • transport=TCP
          • + *
          • debug=false
          • + *
          + * The generated krb5.conf forces TCP connections. + * This code is originally from HDFS, see the file name MiniKdc there + * in case of bug fixing, history, etc. + * https://github.com/apache/hadoop/blob/trunk/hadoop-common-project/hadoop-minikdc/src/main/java/org/apache/hadoop/minikdc/MiniKdc.java + */ +public class MiniKdc { + + public static final String JAVA_SECURITY_KRB5_CONF = + "java.security.krb5.conf"; + public static final String SUN_SECURITY_KRB5_DEBUG = + "sun.security.krb5.debug"; + + + private static final Logger LOG = LoggerFactory.getLogger(MiniKdc.class); + + public static final String ORG_NAME = "org.name"; + public static final String ORG_DOMAIN = "org.domain"; + public static final String KDC_BIND_ADDRESS = "kdc.bind.address"; + public static final String KDC_PORT = "kdc.port"; + public static final String INSTANCE = "instance"; + public static final String MAX_TICKET_LIFETIME = "max.ticket.lifetime"; + public static final String MAX_RENEWABLE_LIFETIME = "max.renewable.lifetime"; + public static final String TRANSPORT = "transport"; + public static final String DEBUG = "debug"; + + private static final Set PROPERTIES = new HashSet(); + private static final Properties DEFAULT_CONFIG = new Properties(); + + static { + PROPERTIES.add(ORG_NAME); + PROPERTIES.add(ORG_DOMAIN); + PROPERTIES.add(KDC_BIND_ADDRESS); + PROPERTIES.add(KDC_BIND_ADDRESS); + PROPERTIES.add(KDC_PORT); + PROPERTIES.add(INSTANCE); + PROPERTIES.add(TRANSPORT); + PROPERTIES.add(MAX_TICKET_LIFETIME); + PROPERTIES.add(MAX_RENEWABLE_LIFETIME); + + DEFAULT_CONFIG.setProperty(KDC_BIND_ADDRESS, "localhost"); + DEFAULT_CONFIG.setProperty(KDC_PORT, "0"); + DEFAULT_CONFIG.setProperty(INSTANCE, "DefaultKrbServer"); + DEFAULT_CONFIG.setProperty(ORG_NAME, "EXAMPLE"); + DEFAULT_CONFIG.setProperty(ORG_DOMAIN, "COM"); + DEFAULT_CONFIG.setProperty(TRANSPORT, "TCP"); + DEFAULT_CONFIG.setProperty(MAX_TICKET_LIFETIME, "86400000"); + DEFAULT_CONFIG.setProperty(MAX_RENEWABLE_LIFETIME, "604800000"); + DEFAULT_CONFIG.setProperty(DEBUG, "false"); + } + + /** + * Convenience method that returns MiniKdc default configuration. + * + *

          The returned configuration is a copy, it can be customized before using + * it to create a MiniKdc. + * @return a MiniKdc default configuration. + */ + public static Properties createConf() { + return (Properties) DEFAULT_CONFIG.clone(); + } + + private Properties conf; + private SimpleKdcServer simpleKdc; + private int port; + private String realm; + private File workDir; + private File krb5conf; + private String transport; + private boolean krb5Debug; + + public void setTransport(String transport) { + this.transport = transport; + } + + /** + * Creates a MiniKdc. + * + * @param conf MiniKdc configuration. + * @param workDir working directory, it should be the build directory. Under + * this directory an ApacheDS working directory will be created, this + * directory will be deleted when the MiniKdc stops. + * @throws Exception thrown if the MiniKdc could not be created. + */ + public MiniKdc(Properties conf, File workDir) throws Exception { + if (!conf.keySet().containsAll(PROPERTIES)) { + Set missingProperties = new HashSet(PROPERTIES); + missingProperties.removeAll(conf.keySet()); + throw new IllegalArgumentException("Missing configuration properties: " + + missingProperties); + } + this.workDir = new File(workDir, Long.toString(System.currentTimeMillis())); + if (!this.workDir.exists() + && !this.workDir.mkdirs()) { + throw new RuntimeException("Cannot create directory " + this.workDir); + } + LOG.info("Configuration:"); + LOG.info("---------------------------------------------------------------"); + for (Map.Entry entry : conf.entrySet()) { + LOG.info(" {}: {}", entry.getKey(), entry.getValue()); + } + LOG.info("---------------------------------------------------------------"); + this.conf = conf; + port = Integer.parseInt(conf.getProperty(KDC_PORT)); + String orgName = conf.getProperty(ORG_NAME); + String orgDomain = conf.getProperty(ORG_DOMAIN); + realm = orgName.toUpperCase(Locale.ENGLISH) + "." + + orgDomain.toUpperCase(Locale.ENGLISH); + } + + /** + * Returns the port of the MiniKdc. + * + * @return the port of the MiniKdc. + */ + public int getPort() { + return port; + } + + /** + * Returns the host of the MiniKdc. + * + * @return the host of the MiniKdc. + */ + public String getHost() { + return conf.getProperty(KDC_BIND_ADDRESS); + } + + /** + * Returns the realm of the MiniKdc. + * + * @return the realm of the MiniKdc. + */ + public String getRealm() { + return realm; + } + + public File getKrb5conf() { + krb5conf = new File(System.getProperty(JAVA_SECURITY_KRB5_CONF)); + return krb5conf; + } + + /** + * Starts the MiniKdc. + * + * @throws Exception thrown if the MiniKdc could not be started. + */ + public synchronized void start() throws Exception { + if (simpleKdc != null) { + throw new RuntimeException("Already started"); + } + simpleKdc = new SimpleKdcServer(); + prepareKdcServer(); + simpleKdc.init(); + resetDefaultRealm(); + simpleKdc.start(); + LOG.info("MiniKdc stated."); + } + + private void resetDefaultRealm() throws IOException { + InputStream templateResource = new FileInputStream( + getKrb5conf().getAbsolutePath()); + String content = IOUtil.readInput(templateResource); + content = content.replaceAll("default_realm = .*\n", + "default_realm = " + getRealm() + "\n"); + IOUtil.writeFile(content, getKrb5conf()); + } + + private void prepareKdcServer() throws Exception { + // transport + simpleKdc.setWorkDir(workDir); + simpleKdc.setKdcHost(getHost()); + simpleKdc.setKdcRealm(realm); + if (transport == null) { + transport = conf.getProperty(TRANSPORT); + } + if (port == 0) { + port = NetworkUtil.getServerPort(); + } + if (transport != null) { + if (transport.trim().equals("TCP")) { + simpleKdc.setKdcTcpPort(port); + simpleKdc.setAllowUdp(false); + } else if (transport.trim().equals("UDP")) { + simpleKdc.setKdcUdpPort(port); + simpleKdc.setAllowTcp(false); + } else { + throw new IllegalArgumentException("Invalid transport: " + transport); + } + } else { + throw new IllegalArgumentException("Need to set transport!"); + } + simpleKdc.getKdcConfig().setString(KdcConfigKey.KDC_SERVICE_NAME, + conf.getProperty(INSTANCE)); + if (conf.getProperty(DEBUG) != null) { + krb5Debug = getAndSet(SUN_SECURITY_KRB5_DEBUG, conf.getProperty(DEBUG)); + } + } + + /** + * Stops the MiniKdc. + */ + public synchronized void stop() { + if (simpleKdc != null) { + try { + simpleKdc.stop(); + } catch (KrbException e) { + e.printStackTrace(); + } finally { + if (conf.getProperty(DEBUG) != null) { + System.setProperty(SUN_SECURITY_KRB5_DEBUG, + Boolean.toString(krb5Debug)); + } + } + } + delete(workDir); + try { + // Will be fixed in next Kerby version. + Thread.sleep(1000); + } catch (InterruptedException e) { + e.printStackTrace(); + } + LOG.info("MiniKdc stopped."); + } + + private void delete(File f) { + if (f.isFile()) { + if (!f.delete()) { + LOG.warn("WARNING: cannot delete file " + f.getAbsolutePath()); + } + } else { + for (File c: f.listFiles()) { + delete(c); + } + if (!f.delete()) { + LOG.warn("WARNING: cannot delete directory " + f.getAbsolutePath()); + } + } + } + + /** + * Creates a principal in the KDC with the specified user and password. + * + * @param principal principal name, do not include the domain. + * @param password password. + * @throws Exception thrown if the principal could not be created. + */ + public synchronized void createPrincipal(String principal, String password) + throws Exception { + simpleKdc.createPrincipal(principal, password); + } + + /** + * Creates multiple principals in the KDC and adds them to a keytab file. + * + * @param keytabFile keytab file to add the created principals. + * @param principals principals to add to the KDC, do not include the domain. + * @throws Exception thrown if the principals or the keytab file could not be + * created. + */ + public synchronized void createPrincipal(File keytabFile, + String ... principals) + throws Exception { + simpleKdc.createPrincipals(principals); + if (keytabFile.exists() && !keytabFile.delete()) { + LOG.error("Failed to delete keytab file: " + keytabFile); + } + for (String principal : principals) { + simpleKdc.getKadmin().exportKeytab(keytabFile, principal); + } + } + + /** + * Set the System property; return the old value for caching. + * + * @param sysprop property + * @param debug true or false + * @return the previous value + */ + private boolean getAndSet(String sysprop, String debug) { + boolean old = Boolean.getBoolean(sysprop); + System.setProperty(sysprop, debug); + return old; + } +} \ No newline at end of file diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/TestBookieBoot.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/TestBookieBoot.java new file mode 100644 index 00000000000..62567bd5502 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/TestBookieBoot.java @@ -0,0 +1,132 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.server; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; + +import java.io.File; +import java.net.Socket; +import java.util.Iterator; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.ExitCode; +import org.apache.bookkeeper.client.BookKeeperTestClient; +import org.apache.bookkeeper.client.api.DigestType; +import org.apache.bookkeeper.client.api.LedgerEntries; +import org.apache.bookkeeper.client.api.ReadHandle; +import org.apache.bookkeeper.client.api.WriteHandle; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.apache.bookkeeper.util.PortManager; +import org.apache.commons.configuration.PropertiesConfiguration; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Tests that a bookie can boot via the main method + * and serve read and write requests. + */ +public class TestBookieBoot extends BookKeeperClusterTestCase { + private static final Logger log = LoggerFactory.getLogger(TestBookieBoot.class); + public TestBookieBoot() throws Exception { + super(0); + } + + @Test + public void testBootFromConfig() throws Exception { + ServerConfiguration conf = new ServerConfiguration(); + conf.setMetadataServiceUri(this.metadataServiceUri); + conf.setAllowLoopback(true); + conf.setBookiePort(PortManager.nextFreePort()); + conf.setLedgerStorageClass("org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorage"); + + File storageDir = tmpDirs.createNew("bookie", "storage"); + conf.setLedgerDirNames(new String[] { storageDir.toString() }); + conf.setJournalDirName(storageDir.toString()); + + PropertiesConfiguration propsConf = new PropertiesConfiguration(); + for (Iterator iter = conf.getKeys(); iter.hasNext(); ) { + String key = iter.next(); + propsConf.setProperty(key, conf.getProperty(key)); + } + + File confFile = File.createTempFile("test", "conf"); + propsConf.save(confFile); + + log.info("Conf: {}", confFile); + + CompletableFuture promise = new CompletableFuture<>(); + Thread t = new Thread(() -> { + try { + int ret = Main.doMain(new String[] {"-c", confFile.toString()}); + promise.complete(ret); + } catch (Exception e) { + promise.completeExceptionally(e); + } + }, "bookie-main"); + t.start(); + + BookieSocketAddress addr = BookieImpl.getBookieAddress(conf); + BookKeeperTestClient bkc = new BookKeeperTestClient(baseClientConf); + bkc.waitForWritableBookie(addr.toBookieId()).get(); + + boolean connected = false; + for (int i = 0; i < 100 && t.isAlive(); i++) { + try (Socket s = new Socket(addr.getSocketAddress().getAddress(), addr.getPort())) { + connected = true; + break; + } catch (Exception e) { + // expected, will retry + } + Thread.sleep(100); + } + assertThat(connected, equalTo(true)); + + long ledgerId; + try (WriteHandle wh = bkc.newCreateLedgerOp().withEnsembleSize(1) + .withWriteQuorumSize(1).withAckQuorumSize(1) + .withDigestType(DigestType.CRC32C) + .withPassword(new byte[0]) + .execute().get()) { + ledgerId = wh.getId(); + wh.append("foobar".getBytes(UTF_8)); + } + + try (ReadHandle rh = bkc.newOpenLedgerOp().withLedgerId(ledgerId) + .withDigestType(DigestType.CRC32C) + .withPassword(new byte[0]) + .withRecovery(true) + .execute().get()) { + assertThat(rh.getLastAddConfirmed(), equalTo(0L)); + try (LedgerEntries entries = rh.read(0, 0)) { + assertThat(new String(entries.getEntry(0).getEntryBytes(), UTF_8), equalTo("foobar")); + } + } + + t.interrupt(); + assertThat(promise.get(10, TimeUnit.SECONDS), equalTo(ExitCode.OK)); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/TestEmbeddedServer.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/TestEmbeddedServer.java new file mode 100644 index 00000000000..60b43032ab7 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/TestEmbeddedServer.java @@ -0,0 +1,321 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.bookkeeper.server; + +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LD_INDEX_SCOPE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LD_LEDGER_SCOPE; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.CALLS_REAL_METHODS; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockStatic; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import lombok.Cleanup; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.BookieResources; +import org.apache.bookkeeper.bookie.LedgerDirsManager; +import org.apache.bookkeeper.bookie.LegacyCookieValidation; +import org.apache.bookkeeper.bookie.UncleanShutdownDetectionImpl; +import org.apache.bookkeeper.common.allocator.ByteBufAllocatorWithOomHandler; +import org.apache.bookkeeper.common.component.LifecycleComponentStack; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.meta.LedgerManagerFactory; +import org.apache.bookkeeper.meta.NullMetadataBookieDriver; +import org.apache.bookkeeper.meta.NullMetadataBookieDriver.NullLedgerManagerFactory; +import org.apache.bookkeeper.meta.NullMetadataBookieDriver.NullRegistrationManager; +import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.proto.BookieServer; +import org.apache.bookkeeper.server.component.ServerLifecycleComponent; +import org.apache.bookkeeper.server.conf.BookieConfiguration; +import org.apache.bookkeeper.stats.NullStatsProvider; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.StatsProvider; +import org.apache.bookkeeper.util.DiskChecker; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.MockedStatic; +import org.mockito.junit.MockitoJUnitRunner; + +/** + * Unit test of {@link EmbeddedServer}. + */ +@RunWith(MockitoJUnitRunner.class) +public class TestEmbeddedServer { + + static class TestComponent extends ServerLifecycleComponent { + + public TestComponent(BookieConfiguration conf, StatsLogger statsLogger) { + super("test-component", conf, statsLogger); + } + + @Override + protected void doStart() { + } + + @Override + protected void doStop() { + } + + @Override + protected void doClose() throws IOException { + } + + } + + @Test + public void testBuildBookieServer() throws Exception { + @Cleanup + MockedStatic bookieResourcesMockedStatic = mockStatic(BookieResources.class, + CALLS_REAL_METHODS); + bookieResourcesMockedStatic.when(() -> + BookieResources.createMetadataDriver(any(), any())).thenReturn(new NullMetadataBookieDriver()); + bookieResourcesMockedStatic.when(() -> + BookieResources.createAllocator(any())).thenReturn(mock(ByteBufAllocatorWithOomHandler.class)); + + ServerConfiguration serverConf = new ServerConfiguration() + .setAllowLoopback(true) + .setAutoRecoveryDaemonEnabled(false) + .setHttpServerEnabled(false) + .setExtraServerComponents(new String[] { TestComponent.class.getName() }); + BookieConfiguration conf = new BookieConfiguration(serverConf); + + @Cleanup + MockedStatic legacyCookieValidationMockedStatic = + mockStatic(LegacyCookieValidation.class); + legacyCookieValidationMockedStatic.when(() -> LegacyCookieValidation.newLegacyCookieValidation(any(), any())) + .thenReturn(mock(LegacyCookieValidation.class)); + + @Cleanup + MockedStatic bookieMockedStatic = mockStatic(BookieImpl.class, CALLS_REAL_METHODS); + bookieMockedStatic.when(() -> BookieImpl.newBookieImpl(any(), any(), any(), any(), any(), any(), any(), + any(), any())).thenReturn(mock(BookieImpl.class)); + + BookieServer mockServer = mock(BookieServer.class); + + BookieSocketAddress bookieAddress = new BookieSocketAddress("127.0.0.1", 1281); + when(mockServer.getLocalAddress()).thenReturn(bookieAddress); + + @Cleanup + MockedStatic bookieServerMockedStatic = mockStatic(BookieServer.class); + bookieServerMockedStatic.when(() -> BookieServer.newBookieServer(any(), any(), any(), any(), any())) + .thenReturn(mockServer); + + EmbeddedServer server = EmbeddedServer.builder(conf).build(); + LifecycleComponentStack stack = server.getLifecycleComponentStack(); + assertEquals(7, stack.getNumComponents()); + assertTrue(stack.getComponent(6) instanceof TestComponent); + + stack.start(); + verify(mockServer, times(1)).start(); + + stack.stop(); + + stack.close(); + verify(mockServer, times(1)).shutdown(); + } + + @Test + public void testBuildBookieServerCustomComponents() throws Exception { + + ServerConfiguration serverConf = new ServerConfiguration() + .setAllowLoopback(true) + .setAutoRecoveryDaemonEnabled(false) + .setHttpServerEnabled(false) + .setExtraServerComponents(new String[]{TestComponent.class.getName()}); + BookieConfiguration conf = new BookieConfiguration(serverConf); + + StatsProvider statsProvider = new NullStatsProvider(); + StatsLogger rootStatsLogger = statsProvider.getStatsLogger(""); + RegistrationManager registrationManager = new NullRegistrationManager(); + LedgerManagerFactory ledgerManagerFactory = new NullLedgerManagerFactory(); + + DiskChecker diskChecker = BookieResources.createDiskChecker(serverConf); + + LedgerDirsManager ledgerDirsManager = BookieResources.createLedgerDirsManager( + conf.getServerConf(), diskChecker, rootStatsLogger.scope(LD_LEDGER_SCOPE)); + + LedgerDirsManager indexDirsManager = BookieResources.createIndexDirsManager( + conf.getServerConf(), diskChecker, rootStatsLogger.scope(LD_INDEX_SCOPE), ledgerDirsManager); + + UncleanShutdownDetectionImpl uncleanShutdownDetection = new UncleanShutdownDetectionImpl(ledgerDirsManager); + + ByteBufAllocatorWithOomHandler byteBufFromResources = mock(ByteBufAllocatorWithOomHandler.class); + ByteBufAllocatorWithOomHandler byteBuf = mock(ByteBufAllocatorWithOomHandler.class); + + @Cleanup + MockedStatic bookieResourcesMockedStatic = mockStatic(BookieResources.class); + bookieResourcesMockedStatic.when(() -> + BookieResources.createMetadataDriver(any(), any())).thenReturn(new NullMetadataBookieDriver()); + bookieResourcesMockedStatic.when(() -> + BookieResources.createAllocator(any())).thenReturn(byteBufFromResources); + + @Cleanup + MockedStatic legacyCookieValidationMockedStatic = + mockStatic(LegacyCookieValidation.class); + legacyCookieValidationMockedStatic.when(() -> LegacyCookieValidation.newLegacyCookieValidation(any(), any())) + .thenReturn(mock(LegacyCookieValidation.class)); + + @Cleanup + MockedStatic bookieMockedStatic = mockStatic(BookieImpl.class, CALLS_REAL_METHODS); + bookieMockedStatic.when(() -> BookieImpl.newBookieImpl(any(), any(), any(), any(), any(), any(), any(), any(), + any())).thenReturn(mock(BookieImpl.class)); + + BookieServer mockServer = mock(BookieServer.class); + + @Cleanup + MockedStatic bookieServerMockedStatic = mockStatic(BookieServer.class); + bookieServerMockedStatic.when(() -> BookieServer.newBookieServer(any(), any(), any(), any(), any())) + .thenReturn(mockServer); + + BookieSocketAddress bookieAddress = new BookieSocketAddress("127.0.0.1", 1281); + when(mockServer.getLocalAddress()).thenReturn(bookieAddress); + + EmbeddedServer server = EmbeddedServer.builder(conf) + .statsProvider(statsProvider) + .registrationManager(registrationManager) + .ledgerManagerFactory(ledgerManagerFactory) + .diskChecker(diskChecker) + .ledgerDirsManager(ledgerDirsManager) + .indexDirsManager(indexDirsManager) + .allocator(byteBuf) + .uncleanShutdownDetection(uncleanShutdownDetection) + .build(); + + assertSame(statsProvider, server.getStatsProvider()); + assertSame(registrationManager, server.getRegistrationManager()); + assertSame(ledgerManagerFactory, server.getLedgerManagerFactory()); + assertSame(diskChecker, server.getDiskChecker()); + assertSame(ledgerDirsManager, server.getLedgerDirsManager()); + assertSame(indexDirsManager, server.getIndexDirsManager()); + + LifecycleComponentStack stack = server.getLifecycleComponentStack(); + assertEquals(3, stack.getNumComponents()); + assertTrue(stack.getComponent(2) instanceof TestComponent); + + stack.start(); + verify(mockServer, times(1)).start(); + + stack.stop(); + + stack.close(); + verify(mockServer, times(1)).shutdown(); + } + + @Test + public void testIgnoreExtraServerComponentsStartupFailures() throws Exception { + @Cleanup + MockedStatic bookieResourcesMockedStatic = mockStatic(BookieResources.class, + CALLS_REAL_METHODS); + bookieResourcesMockedStatic.when(() -> + BookieResources.createMetadataDriver(any(), any())).thenReturn(new NullMetadataBookieDriver()); + + ServerConfiguration serverConf = new ServerConfiguration() + .setAllowLoopback(true) + .setAutoRecoveryDaemonEnabled(false) + .setHttpServerEnabled(false) + .setExtraServerComponents(new String[] { "bad-server-component"}) + .setIgnoreExtraServerComponentsStartupFailures(true); + BookieConfiguration conf = new BookieConfiguration(serverConf); + + @Cleanup + MockedStatic legacyCookieValidationMockedStatic = + mockStatic(LegacyCookieValidation.class); + legacyCookieValidationMockedStatic.when(() -> LegacyCookieValidation.newLegacyCookieValidation(any(), any())) + .thenReturn(mock(LegacyCookieValidation.class)); + + @Cleanup + MockedStatic bookieMockedStatic = mockStatic(BookieImpl.class, CALLS_REAL_METHODS); + bookieMockedStatic.when(() -> BookieImpl.newBookieImpl(any(), any(), any(), any(), any(), any(), any(), any(), + any())).thenReturn(mock(BookieImpl.class)); + + BookieServer mockServer = mock(BookieServer.class); + + @Cleanup + MockedStatic bookieServerMockedStatic = mockStatic(BookieServer.class); + bookieServerMockedStatic.when(() -> BookieServer.newBookieServer(any(), any(), any(), any(), any())) + .thenReturn(mockServer); + + BookieSocketAddress bookieAddress = new BookieSocketAddress("127.0.0.1", 1281); + when(mockServer.getLocalAddress()).thenReturn(bookieAddress); + + LifecycleComponentStack stack = EmbeddedServer.builder(conf).build().getLifecycleComponentStack(); + assertEquals(6, stack.getNumComponents()); + + stack.start(); + verify(mockServer, times(1)).start(); + + stack.stop(); + + stack.close(); + verify(mockServer, times(1)).shutdown(); + } + + @Test + public void testExtraServerComponentsStartupFailures() throws Exception { + @Cleanup + MockedStatic bookieResourcesMockedStatic = mockStatic(BookieResources.class, + CALLS_REAL_METHODS); + bookieResourcesMockedStatic.when(() -> + BookieResources.createMetadataDriver(any(), any())).thenReturn(new NullMetadataBookieDriver()); + + ServerConfiguration serverConf = new ServerConfiguration() + .setAllowLoopback(true) + .setAutoRecoveryDaemonEnabled(false) + .setHttpServerEnabled(false) + .setExtraServerComponents(new String[] { "bad-server-component"}) + .setIgnoreExtraServerComponentsStartupFailures(false); + BookieConfiguration conf = new BookieConfiguration(serverConf); + + @Cleanup + MockedStatic legacyCookieValidationMockedStatic = + mockStatic(LegacyCookieValidation.class); + legacyCookieValidationMockedStatic.when(() -> LegacyCookieValidation.newLegacyCookieValidation(any(), any())) + .thenReturn(mock(LegacyCookieValidation.class)); + + @Cleanup + MockedStatic bookieMockedStatic = mockStatic(BookieImpl.class, CALLS_REAL_METHODS); + bookieMockedStatic.when(() -> BookieImpl.newBookieImpl(any(), any(), any(), any(), any(), any(), any(), any(), + any())).thenReturn(mock(BookieImpl.class)); + + BookieServer mockServer = mock(BookieServer.class); + + @Cleanup + MockedStatic bookieServerMockedStatic = mockStatic(BookieServer.class); + bookieServerMockedStatic.when(() -> BookieServer.newBookieServer(any(), any(), any(), any(), any())) + .thenReturn(mockServer); + + try { + EmbeddedServer.builder(conf).build().getLifecycleComponentStack(); + fail("Should fail to start bookie server if `ignoreExtraServerComponentsStartupFailures` is set to false"); + } catch (RuntimeException re) { + assertTrue(re.getCause() instanceof ClassNotFoundException); + } + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/TestMain.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/TestMain.java deleted file mode 100644 index d5253fdc422..00000000000 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/TestMain.java +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bookkeeper.server; - -import static org.apache.bookkeeper.server.Main.buildBookieServer; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.powermock.api.mockito.PowerMockito.whenNew; - -import java.io.IOException; - -import org.apache.bookkeeper.common.component.LifecycleComponentStack; -import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.proto.BookieServer; -import org.apache.bookkeeper.server.component.ServerLifecycleComponent; -import org.apache.bookkeeper.server.conf.BookieConfiguration; -import org.apache.bookkeeper.server.service.BookieService; -import org.apache.bookkeeper.stats.StatsLogger; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.powermock.api.mockito.PowerMockito; -import org.powermock.core.classloader.annotations.PrepareForTest; -import org.powermock.modules.junit4.PowerMockRunner; - -/** - * Unit test of {@link Main}. - */ -@RunWith(PowerMockRunner.class) -@PrepareForTest(BookieService.class) -public class TestMain { - - static class TestComponent extends ServerLifecycleComponent { - - public TestComponent(BookieConfiguration conf, StatsLogger statsLogger) { - super("test-component", conf, statsLogger); - } - - @Override - protected void doStart() { - } - - @Override - protected void doStop() { - } - - @Override - protected void doClose() throws IOException { - } - - } - - @Test - public void testBuildBookieServer() throws Exception { - ServerConfiguration serverConf = new ServerConfiguration() - .setAutoRecoveryDaemonEnabled(false) - .setHttpServerEnabled(false) - .setExtraServerComponents(new String[] { TestComponent.class.getName() }); - BookieConfiguration conf = new BookieConfiguration(serverConf); - - BookieServer mockServer = PowerMockito.mock(BookieServer.class); - whenNew(BookieServer.class) - .withArguments(any(ServerConfiguration.class), any(StatsLogger.class)) - .thenReturn(mockServer); - - LifecycleComponentStack stack = buildBookieServer(conf); - assertEquals(3, stack.getNumComponents()); - assertTrue(stack.getComponent(2) instanceof TestComponent); - - stack.start(); - verify(mockServer, times(1)).start(); - - stack.stop(); - - stack.close(); - verify(mockServer, times(1)).shutdown(); - } - - @Test - public void testIgnoreExtraServerComponentsStartupFailures() throws Exception { - ServerConfiguration serverConf = new ServerConfiguration() - .setAutoRecoveryDaemonEnabled(false) - .setHttpServerEnabled(false) - .setExtraServerComponents(new String[] { "bad-server-component"}) - .setIgnoreExtraServerComponentsStartupFailures(true); - BookieConfiguration conf = new BookieConfiguration(serverConf); - - BookieServer mockServer = PowerMockito.mock(BookieServer.class); - whenNew(BookieServer.class) - .withArguments(any(ServerConfiguration.class), any(StatsLogger.class)) - .thenReturn(mockServer); - - LifecycleComponentStack stack = buildBookieServer(conf); - assertEquals(2, stack.getNumComponents()); - - stack.start(); - verify(mockServer, times(1)).start(); - - stack.stop(); - - stack.close(); - verify(mockServer, times(1)).shutdown(); - } - - @Test - public void testExtraServerComponentsStartupFailures() throws Exception { - ServerConfiguration serverConf = new ServerConfiguration() - .setAutoRecoveryDaemonEnabled(false) - .setHttpServerEnabled(false) - .setExtraServerComponents(new String[] { "bad-server-component"}) - .setIgnoreExtraServerComponentsStartupFailures(false); - BookieConfiguration conf = new BookieConfiguration(serverConf); - - BookieServer mockServer = PowerMockito.mock(BookieServer.class); - whenNew(BookieServer.class) - .withArguments(any(ServerConfiguration.class), any(StatsLogger.class)) - .thenReturn(mockServer); - - try { - buildBookieServer(conf); - fail("Should fail to start bookie server if `ignoreExtraServerComponentsStartupFailures` is set to false"); - } catch (RuntimeException re) { - assertTrue(re.getCause() instanceof ClassNotFoundException); - } - } - -} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/component/TestServerLifecycleComponent.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/component/TestServerLifecycleComponent.java index 4337d001be7..8a1153f509a 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/component/TestServerLifecycleComponent.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/component/TestServerLifecycleComponent.java @@ -26,7 +26,6 @@ import java.io.IOException; import java.util.List; - import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.server.conf.BookieConfiguration; import org.apache.bookkeeper.stats.NullStatsLogger; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/http/TestHttpService.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/http/TestHttpService.java index bc57424915a..993880a764c 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/http/TestHttpService.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/http/TestHttpService.java @@ -19,23 +19,31 @@ package org.apache.bookkeeper.server.http; import static org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithLedgerManagerFactory; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; - +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; + +import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.util.concurrent.UncheckedExecutionException; import java.io.File; -import java.util.ArrayList; +import java.lang.reflect.Field; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.Future; import lombok.Cleanup; +import org.apache.bookkeeper.bookie.BookieResources; +import org.apache.bookkeeper.bookie.LedgerStorage; import org.apache.bookkeeper.client.BookKeeper; +import org.apache.bookkeeper.client.ClientUtil; import org.apache.bookkeeper.client.LedgerHandle; -import org.apache.bookkeeper.client.LedgerHandleAdapter; -import org.apache.bookkeeper.client.LedgerMetadata; +import org.apache.bookkeeper.client.LedgerMetadataBuilder; import org.apache.bookkeeper.common.util.JsonUtil; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.conf.TestBKConfiguration; @@ -46,14 +54,21 @@ import org.apache.bookkeeper.meta.LedgerManager; import org.apache.bookkeeper.meta.LedgerManagerFactory; import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; +import org.apache.bookkeeper.meta.MetadataBookieDriver; import org.apache.bookkeeper.net.BookieSocketAddress; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallbackFuture; +import org.apache.bookkeeper.proto.BookieServer; import org.apache.bookkeeper.replication.AuditorElector; +import org.apache.bookkeeper.server.http.service.BookieInfoService; +import org.apache.bookkeeper.server.http.service.BookieSanityService; +import org.apache.bookkeeper.server.http.service.BookieSanityService.BookieSanity; +import org.apache.bookkeeper.server.http.service.BookieStateReadOnlyService.ReadOnlyState; +import org.apache.bookkeeper.server.http.service.BookieStateService.BookieState; +import org.apache.bookkeeper.server.http.service.ClusterInfoService; +import org.apache.bookkeeper.stats.NullStatsLogger; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; -import org.apache.bookkeeper.zookeeper.ZooKeeperClient; -import org.apache.zookeeper.ZooKeeper; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -70,7 +85,7 @@ public class TestHttpService extends BookKeeperClusterTestCase { public TestHttpService() { super(numberOfBookies); try { - File tmpDir = createTempDir("bookie_http", "test"); + File tmpDir = tmpDirs.createNew("bookie_http", "test"); baseConf.setJournalDirName(tmpDir.getPath()) .setLedgerDirNames( new String[]{tmpDir.getPath()}); @@ -80,16 +95,29 @@ public TestHttpService() { } @Override - @Before + @BeforeEach public void setUp() throws Exception { super.setUp(); baseConf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + baseClientConf.setStoreSystemtimeAsLedgerCreationTime(true); + + MetadataBookieDriver metadataDriver = BookieResources.createMetadataDriver( + baseConf, NullStatsLogger.INSTANCE); + this.bkHttpServiceProvider = new BKHttpServiceProvider.Builder() - .setBookieServer(bs.get(numberOfBookies - 1)) + .setBookieServer(serverByIndex(numberOfBookies - 1)) .setServerConfiguration(baseConf) + .setLedgerManagerFactory(metadataDriver.getLedgerManagerFactory()) .build(); } + @Override + @AfterEach + public void tearDown() throws Exception { + this.bkHttpServiceProvider.close(); + super.tearDown(); + } + @Test public void testHeartbeatService() throws Exception { // test heartbeat service @@ -175,8 +203,8 @@ public void testListBookiesService() throws Exception { HashMap respBody = JsonUtil.fromJson(response1.getBody(), HashMap.class); assertEquals(numberOfBookies, respBody.size()); for (int i = 0; i < numberOfBookies; i++) { - assertEquals(true, respBody.containsKey(getBookie(i).toString())); - assertEquals(null, respBody.get(getBookie(i).toString())); + assertTrue(respBody.containsKey(getBookie(i).toString())); + assertNull(respBody.get(getBookie(i).toString())); } //2, parameter: type=rw&print_hostnames=true, should print rw bookies with hostname @@ -191,7 +219,7 @@ public void testListBookiesService() throws Exception { HashMap respBody2 = JsonUtil.fromJson(response2.getBody(), HashMap.class); assertEquals(numberOfBookies, respBody2.size()); for (int i = 0; i < numberOfBookies; i++) { - assertEquals(true, respBody2.containsKey(getBookie(i).toString())); + assertTrue(respBody2.containsKey(getBookie(i).toString())); assertNotNull(respBody2.get(getBookie(i).toString())); } @@ -210,7 +238,7 @@ public void testListBookiesService() throws Exception { @SuppressWarnings("unchecked") HashMap respBody3 = JsonUtil.fromJson(response3.getBody(), HashMap.class); assertEquals(1, respBody3.size()); - assertEquals(true, respBody3.containsKey(getBookie(1).toString())); + assertTrue(respBody3.containsKey(getBookie(1).toString())); // get other 5 rw bookies. HashMap params4 = Maps.newHashMap(); @@ -222,7 +250,7 @@ public void testListBookiesService() throws Exception { @SuppressWarnings("unchecked") HashMap respBody4 = JsonUtil.fromJson(response4.getBody(), HashMap.class); assertEquals(5, respBody4.size()); - assertEquals(true, respBody4.containsKey(getBookie(2).toString())); + assertTrue(respBody4.containsKey(getBookie(2).toString())); } /** @@ -250,8 +278,8 @@ public void testListLedgerService() throws Exception { LinkedHashMap respBody = JsonUtil.fromJson(response1.getBody(), LinkedHashMap.class); assertEquals(numLedgers, respBody.size()); for (int i = 0; i < numLedgers; i++) { - assertEquals(true, respBody.containsKey(Long.valueOf(lh[i].getId()).toString())); - assertEquals(null, respBody.get(Long.valueOf(lh[i].getId()).toString())); + assertTrue(respBody.containsKey(Long.valueOf(lh[i].getId()).toString())); + assertNull(respBody.get(Long.valueOf(lh[i].getId()).toString())); } //2, parameter: print_metadata=true, should print ledger ids, with metadata @@ -265,7 +293,7 @@ public void testListLedgerService() throws Exception { LinkedHashMap respBody2 = JsonUtil.fromJson(response2.getBody(), LinkedHashMap.class); assertEquals(numLedgers, respBody2.size()); for (int i = 0; i < numLedgers; i++) { - assertEquals(true, respBody2.containsKey(Long.valueOf(lh[i].getId()).toString())); + assertTrue(respBody2.containsKey(Long.valueOf(lh[i].getId()).toString())); assertNotNull(respBody2.get(Long.valueOf(lh[i].getId()).toString())); } @@ -283,7 +311,7 @@ public void testListLedgerService() throws Exception { LinkedHashMap respBody3 = JsonUtil.fromJson(response3.getBody(), LinkedHashMap.class); assertEquals(31, respBody3.size()); for (int i = 400; i < 430; i++) { - assertEquals(true, respBody3.containsKey(Long.valueOf(lh[i].getId()).toString())); + assertTrue(respBody3.containsKey(Long.valueOf(lh[i].getId()).toString())); assertNotNull(respBody3.get(Long.valueOf(lh[i].getId()).toString())); } } @@ -329,7 +357,7 @@ public void testDeleteLedgerService() throws Exception { //3, delete first ledger, should return OK, and should only get 3 ledgers after delete. HashMap params = Maps.newHashMap(); - Long ledgerId = Long.valueOf(lh[0].getId()); + Long ledgerId = lh[0].getId(); params.put("ledger_id", ledgerId.toString()); HttpServiceRequest request3 = new HttpServiceRequest(null, HttpServer.Method.DELETE, params); HttpServiceResponse response3 = deleteLedgerService.handle(request3); @@ -378,17 +406,21 @@ public void testGetLedgerMetaService() throws Exception { //2, parameters for GET first ledger, should return OK, and contains metadata HashMap params = Maps.newHashMap(); - Long ledgerId = Long.valueOf(lh[0].getId()); + Long ledgerId = lh[0].getId(); params.put("ledger_id", ledgerId.toString()); HttpServiceRequest request2 = new HttpServiceRequest(null, HttpServer.Method.GET, params); HttpServiceResponse response2 = getLedgerMetaService.handle(request2); assertEquals(HttpServer.StatusCode.OK.getValue(), response2.getStatusCode()); @SuppressWarnings("unchecked") - HashMap respBody = JsonUtil.fromJson(response2.getBody(), HashMap.class); + HashMap respBody = JsonUtil.fromJson(response2.getBody(), HashMap.class); assertEquals(1, respBody.size()); + @SuppressWarnings("unchecked") + HashMap expected = JsonUtil.fromJson(JsonUtil.toJson(lh[0].getLedgerMetadata()), HashMap.class); + @SuppressWarnings("unchecked") + HashMap actual = (HashMap) respBody.get(ledgerId.toString()); + // verify LedgerMetadata content is equal - assertTrue(respBody.get(ledgerId.toString()).toString() - .equals(new String(lh[0].getLedgerMetadata().serialize()))); + assertTrue(Maps.difference(expected, actual).areEqual()); } @Test @@ -423,7 +455,7 @@ public void testReadLedgerEntryService() throws Exception { //2, parameters for GET first ledger, should return OK // no start/end entry id, so return all the 100 entries. HashMap params = Maps.newHashMap(); - Long ledgerId = Long.valueOf(lh[0].getId()); + Long ledgerId = lh[0].getId(); params.put("ledger_id", ledgerId.toString()); HttpServiceRequest request2 = new HttpServiceRequest(null, HttpServer.Method.GET, params); HttpServiceResponse response2 = readLedgerEntryService.handle(request2); @@ -446,7 +478,7 @@ public void testReadLedgerEntryService() throws Exception { HashMap respBody3 = JsonUtil.fromJson(response3.getBody(), HashMap.class); assertEquals(77, respBody3.size()); // Verify the entry content that we got. - assertTrue(respBody3.get("17").equals(content)); + assertEquals(respBody3.get("17"), content); } @Test @@ -467,7 +499,7 @@ public void testListBookieInfoService() throws Exception { LinkedHashMap respBody = JsonUtil.fromJson(response2.getBody(), LinkedHashMap.class); assertEquals(numberOfBookies + 1, respBody.size()); for (int i = 0; i < numberOfBookies; i++) { - assertEquals(true, respBody.containsKey(getBookie(i).toString())); + assertTrue(respBody.containsKey(getBookie(i).toString())); } } @@ -584,25 +616,18 @@ public void testRecoveryBookieService() throws Exception { assertEquals(HttpServer.StatusCode.OK.getValue(), response5.getStatusCode()); } - ZooKeeper auditorZookeeper; AuditorElector auditorElector; - private void startAuditorElector() throws Exception { - auditorZookeeper = ZooKeeperClient.newBuilder() - .connectString(zkUtil.getZooKeeperConnectString()) - .sessionTimeoutMs(10000) - .build(); - String addr = bs.get(0).getLocalAddress().toString(); + private Future startAuditorElector() throws Exception { + String addr = addressByIndex(0).toString(); ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); conf.setAuditorPeriodicBookieCheckInterval(1); conf.setMetadataServiceUri("zk://" + zkUtil.getZooKeeperConnectString() + "/ledgers"); - auditorElector = new AuditorElector(addr, conf, - auditorZookeeper); - auditorElector.start(); + auditorElector = new AuditorElector(addr, conf); + return auditorElector.start(); } private void stopAuditorElector() throws Exception { auditorElector.shutdown(); - auditorZookeeper.close(); } @Test @@ -628,7 +653,8 @@ public void testTriggerAuditService() throws Exception { @Test public void testWhoIsAuditorService() throws Exception { - startAuditorElector(); + // start the auditor elector and wait until auditor finishes election. + startAuditorElector().get(); HttpEndpointService whoIsAuditorService = bkHttpServiceProvider .provideHttpEndpointService(HttpServer.ApiType.WHO_IS_AUDITOR); @@ -671,25 +697,26 @@ private void testListUnderReplicatedLedgerService(LedgerManagerFactory mFactory) @Cleanup final LedgerUnderreplicationManager underReplicationManager = mFactory.newLedgerUnderreplicationManager(); - LedgerHandle lh = bkc.createLedger(3, 3, BookKeeper.DigestType.CRC32, "passwd".getBytes()); - LedgerMetadata md = LedgerHandleAdapter.getLedgerMetadata(lh); - List ensemble = new ArrayList<>(md.getEnsembles().get(0L)); - ensemble.set(0, new BookieSocketAddress("1.1.1.1", 1000)); - md.updateEnsemble(0L, ensemble); - - GenericCallbackFuture cb = - new GenericCallbackFuture(); - ledgerManager.writeLedgerMetadata(lh.getId(), md, cb); - cb.get(); + // 192.0.2.0/24 is reserved TEST-NET range + LedgerMetadataBuilder metadata = LedgerMetadataBuilder.create() + .withEnsembleSize(3).withWriteQuorumSize(3).withAckQuorumSize(3) + .newEnsembleEntry(0L, Lists.newArrayList(new BookieSocketAddress("192.0.2.1", 1000).toBookieId(), + getBookie(0), + getBookie(1))); + ClientUtil.setupLedger(ledgerManager, 1L, metadata); + // wait for up to two minutes to complete. + // if the metadata was created just before checkAllLedgers ran, then we need to wait for the timeout long underReplicatedLedger = -1; - for (int i = 0; i < 10; i++) { + for (int i = 0; i < 120; i++) { underReplicatedLedger = underReplicationManager.pollLedgerToRereplicate(); if (underReplicatedLedger != -1) { + LOG.info("Underreplicated ledgers found, breaking out of loop"); break; } Thread.sleep(1000); } + assertTrue(underReplicatedLedger != -1); HttpServiceRequest request2 = new HttpServiceRequest(null, HttpServer.Method.GET, null); HttpServiceResponse response2 = listUnderReplicatedLedgerService.handle(request2); @@ -747,4 +774,447 @@ public void testDecommissionService() throws Exception { stopAuditorElector(); } + @Test + public void testTriggerGCService() throws Exception { + baseConf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + BookKeeper.DigestType digestType = BookKeeper.DigestType.CRC32; + int numLedgers = 4; + int numMsgs = 100; + LedgerHandle[] lh = new LedgerHandle[numLedgers]; + // create ledgers + for (int i = 0; i < numLedgers; i++) { + lh[i] = bkc.createLedger(digestType, "".getBytes()); + } + String content = "Apache BookKeeper is cool!"; + // add entries + for (int i = 0; i < numMsgs; i++) { + for (int j = 0; j < numLedgers; j++) { + lh[j].addEntry(content.getBytes()); + } + } + // close ledgers + for (int i = 0; i < numLedgers; i++) { + lh[i].close(); + } + HttpEndpointService triggerGCService = bkHttpServiceProvider + .provideHttpEndpointService(HttpServer.ApiType.GC); + + //1, GET, should return OK + HttpServiceRequest request1 = new HttpServiceRequest(null, HttpServer.Method.GET, null); + HttpServiceResponse response1 = triggerGCService.handle(request1); + assertEquals(HttpServer.StatusCode.OK.getValue(), response1.getStatusCode()); + assertTrue(response1.getBody().contains("\"is_in_force_gc\" : \"false\"")); + + //2, PUT, should return OK + HttpServiceRequest request2 = new HttpServiceRequest(null, HttpServer.Method.PUT, null); + HttpServiceResponse response2 = triggerGCService.handle(request2); + assertEquals(HttpServer.StatusCode.OK.getValue(), response2.getStatusCode()); + } + + @Test + public void testGCDetailsService() throws Exception { + baseConf.setMetadataServiceUri(zkUtil.getMetadataServiceUri()); + BookKeeper.DigestType digestType = BookKeeper.DigestType.CRC32; + int numLedgers = 4; + int numMsgs = 100; + LedgerHandle[] lh = new LedgerHandle[numLedgers]; + // create ledgers + for (int i = 0; i < numLedgers; i++) { + lh[i] = bkc.createLedger(digestType, "".getBytes()); + } + String content = "This is test for GC details service!"; + // add entries + for (int i = 0; i < numMsgs; i++) { + for (int j = 0; j < numLedgers; j++) { + lh[j].addEntry(content.getBytes()); + } + } + // close ledgers + for (int i = 0; i < numLedgers; i++) { + lh[i].close(); + } + HttpEndpointService gcDetailsService = bkHttpServiceProvider + .provideHttpEndpointService(HttpServer.ApiType.GC_DETAILS); + + // force trigger a GC + HttpEndpointService triggerGCService = bkHttpServiceProvider + .provideHttpEndpointService(HttpServer.ApiType.GC); + HttpServiceRequest request0 = new HttpServiceRequest(null, HttpServer.Method.PUT, null); + HttpServiceResponse response0 = triggerGCService.handle(request0); + assertEquals(HttpServer.StatusCode.OK.getValue(), response0.getStatusCode()); + + //1, GET, should return OK + HttpServiceRequest request1 = new HttpServiceRequest(null, HttpServer.Method.GET, null); + HttpServiceResponse response1 = gcDetailsService.handle(request1); + assertEquals(HttpServer.StatusCode.OK.getValue(), response1.getStatusCode()); + LOG.info("Get response: {}", response1.getBody()); + + //2, PUT, should return NOT_FOUND + HttpServiceRequest request3 = new HttpServiceRequest(null, HttpServer.Method.PUT, null); + HttpServiceResponse response3 = gcDetailsService.handle(request3); + assertEquals(HttpServer.StatusCode.NOT_FOUND.getValue(), response3.getStatusCode()); + } + + @Test + public void testGetBookieState() throws Exception { + HttpEndpointService bookieStateServer = bkHttpServiceProvider + .provideHttpEndpointService(HttpServer.ApiType.BOOKIE_STATE); + + HttpServiceRequest request1 = new HttpServiceRequest(null, HttpServer.Method.GET, null); + HttpServiceResponse response1 = bookieStateServer.handle(request1); + assertEquals(HttpServer.StatusCode.OK.getValue(), response1.getStatusCode()); + + BookieState bs = JsonUtil.fromJson(response1.getBody(), BookieState.class); + assertTrue(bs.isRunning()); + assertFalse(bs.isReadOnly()); + assertTrue(bs.isAvailableForHighPriorityWrites()); + assertFalse(bs.isShuttingDown()); + } + + @Test + public void testGetBookieSanity() throws Exception { + HttpEndpointService bookieStateServer = bkHttpServiceProvider + .provideHttpEndpointService(HttpServer.ApiType.BOOKIE_SANITY); + + HttpServiceRequest request1 = new HttpServiceRequest(null, HttpServer.Method.GET, null); + ServerConfiguration conf = servers.get(0).getConfiguration(); + BookieSanityService service = new BookieSanityService(conf); + HttpServiceResponse response1 = service.handle(request1); + assertEquals(HttpServer.StatusCode.OK.getValue(), response1.getStatusCode()); + // run multiple iteration to validate any server side throttling doesn't + // fail sequential requests. + for (int i = 0; i < 3; i++) { + BookieSanity bs = JsonUtil.fromJson(response1.getBody(), BookieSanity.class); + assertTrue(bs.isPassed()); + assertFalse(bs.isReadOnly()); + } + HttpServiceResponse response2 = bookieStateServer.handle(request1); + assertEquals(HttpServer.StatusCode.OK.getValue(), response2.getStatusCode()); + } + + @Test + public void testGetBookieIsReady() throws Exception { + HttpEndpointService bookieStateServer = bkHttpServiceProvider + .provideHttpEndpointService(HttpServer.ApiType.BOOKIE_IS_READY); + + HttpServiceRequest request1 = new HttpServiceRequest(null, HttpServer.Method.GET, null); + HttpServiceResponse response1 = bookieStateServer.handle(request1); + assertEquals(HttpServer.StatusCode.OK.getValue(), response1.getStatusCode()); + assertEquals("OK", response1.getBody()); + + // Try using POST instead of GET + HttpServiceRequest request2 = new HttpServiceRequest(null, HttpServer.Method.POST, null); + HttpServiceResponse response2 = bookieStateServer.handle(request2); + assertEquals(HttpServer.StatusCode.NOT_FOUND.getValue(), response2.getStatusCode()); + + // Simulate bookies shutting down + for (int i = 0; i < bookieCount(); i++) { + serverByIndex(i).getBookie().getStateManager().forceToShuttingDown(); + } + HttpServiceRequest request3 = new HttpServiceRequest(null, HttpServer.Method.GET, null); + HttpServiceResponse response3 = bookieStateServer.handle(request3); + assertEquals(HttpServer.StatusCode.SERVICE_UNAVAILABLE.getValue(), response3.getStatusCode()); + } + + @Test + public void testGetBookieInfo() throws Exception { + HttpEndpointService bookieStateServer = bkHttpServiceProvider + .provideHttpEndpointService(HttpServer.ApiType.BOOKIE_INFO); + + HttpServiceRequest request1 = new HttpServiceRequest(null, HttpServer.Method.GET, null); + HttpServiceResponse response1 = bookieStateServer.handle(request1); + assertEquals(HttpServer.StatusCode.OK.getValue(), response1.getStatusCode()); + LOG.info("Get response: {}", response1.getBody()); + + BookieInfoService.BookieInfo bs = JsonUtil.fromJson(response1.getBody(), BookieInfoService.BookieInfo.class); + assertTrue(bs.getFreeSpace() > 0); + assertTrue(bs.getTotalSpace() > 0); + + // Try using POST instead of GET + HttpServiceRequest request2 = new HttpServiceRequest(null, HttpServer.Method.POST, null); + HttpServiceResponse response2 = bookieStateServer.handle(request2); + assertEquals(HttpServer.StatusCode.NOT_FOUND.getValue(), response2.getStatusCode()); + } + + @Test + public void testGetClusterInfo() throws Exception { + HttpEndpointService clusterInfoServer = bkHttpServiceProvider + .provideHttpEndpointService(HttpServer.ApiType.CLUSTER_INFO); + + HttpServiceRequest request1 = new HttpServiceRequest(null, HttpServer.Method.GET, null); + HttpServiceResponse response1 = clusterInfoServer.handle(request1); + assertEquals(HttpServer.StatusCode.OK.getValue(), response1.getStatusCode()); + LOG.info("Get response: {}", response1.getBody()); + + ClusterInfoService.ClusterInfo info = JsonUtil.fromJson(response1.getBody(), + ClusterInfoService.ClusterInfo.class); + assertFalse(info.isAuditorElected()); + assertTrue(info.getAuditorId().isEmpty()); + assertFalse(info.isClusterUnderReplicated()); + assertTrue(info.isLedgerReplicationEnabled()); + assertTrue(info.getTotalBookiesCount() > 0); + assertTrue(info.getWritableBookiesCount() > 0); + assertEquals(0, info.getReadonlyBookiesCount()); + assertEquals(0, info.getUnavailableBookiesCount()); + assertEquals(info.getTotalBookiesCount(), info.getWritableBookiesCount()); + + // Try using POST instead of GET + HttpServiceRequest request2 = new HttpServiceRequest(null, HttpServer.Method.POST, null); + HttpServiceResponse response2 = clusterInfoServer.handle(request2); + assertEquals(HttpServer.StatusCode.NOT_FOUND.getValue(), response2.getStatusCode()); + } + + @Test + public void testBookieReadOnlyState() throws Exception { + HttpEndpointService bookieStateServer = bkHttpServiceProvider + .provideHttpEndpointService(HttpServer.ApiType.BOOKIE_STATE); + HttpEndpointService bookieReadOnlyService = bkHttpServiceProvider + .provideHttpEndpointService(HttpServer.ApiType.BOOKIE_STATE_READONLY); + + // responses from both endpoints should indicate the bookie is not read only + HttpServiceRequest request = new HttpServiceRequest(null, HttpServer.Method.GET, null); + HttpServiceResponse response = bookieStateServer.handle(request); + assertEquals(HttpServer.StatusCode.OK.getValue(), response.getStatusCode()); + + BookieState bs = JsonUtil.fromJson(response.getBody(), BookieState.class); + assertTrue(bs.isRunning()); + assertFalse(bs.isReadOnly()); + assertTrue(bs.isAvailableForHighPriorityWrites()); + assertFalse(bs.isShuttingDown()); + + request = new HttpServiceRequest(null, HttpServer.Method.GET, null); + response = bookieReadOnlyService.handle(request); + ReadOnlyState readOnlyState = JsonUtil.fromJson(response.getBody(), ReadOnlyState.class); + assertFalse(readOnlyState.isReadOnly()); + + // update the state to read only + request = new HttpServiceRequest(JsonUtil.toJson(new ReadOnlyState(true)), HttpServer.Method.PUT, null); + response = bookieReadOnlyService.handle(request); + readOnlyState = JsonUtil.fromJson(response.getBody(), ReadOnlyState.class); + assertTrue(readOnlyState.isReadOnly()); + + // responses from both endpoints should indicate the bookie is read only + request = new HttpServiceRequest(null, HttpServer.Method.GET, null); + response = bookieStateServer.handle(request); + assertEquals(HttpServer.StatusCode.OK.getValue(), response.getStatusCode()); + + bs = JsonUtil.fromJson(response.getBody(), BookieState.class); + assertTrue(bs.isRunning()); + assertTrue(bs.isReadOnly()); + assertTrue(bs.isAvailableForHighPriorityWrites()); + assertFalse(bs.isShuttingDown()); + + request = new HttpServiceRequest(null, HttpServer.Method.GET, null); + response = bookieReadOnlyService.handle(request); + readOnlyState = JsonUtil.fromJson(response.getBody(), ReadOnlyState.class); + assertTrue(readOnlyState.isReadOnly()); + + // should be able to update the state to writable again + request = new HttpServiceRequest(JsonUtil.toJson(new ReadOnlyState(false)), HttpServer.Method.PUT, null); + response = bookieReadOnlyService.handle(request); + readOnlyState = JsonUtil.fromJson(response.getBody(), ReadOnlyState.class); + assertFalse(readOnlyState.isReadOnly()); + + // responses from both endpoints should indicate the bookie is writable + request = new HttpServiceRequest(null, HttpServer.Method.GET, null); + response = bookieStateServer.handle(request); + assertEquals(HttpServer.StatusCode.OK.getValue(), response.getStatusCode()); + + bs = JsonUtil.fromJson(response.getBody(), BookieState.class); + assertTrue(bs.isRunning()); + assertFalse(bs.isReadOnly()); + assertTrue(bs.isAvailableForHighPriorityWrites()); + assertFalse(bs.isShuttingDown()); + + request = new HttpServiceRequest(null, HttpServer.Method.GET, null); + response = bookieReadOnlyService.handle(request); + readOnlyState = JsonUtil.fromJson(response.getBody(), ReadOnlyState.class); + assertFalse(readOnlyState.isReadOnly()); + + //forceReadonly to writable + MetadataBookieDriver metadataDriver = BookieResources.createMetadataDriver( + baseConf, NullStatsLogger.INSTANCE); + restartBookies(c -> { + c.setForceReadOnlyBookie(true); + c.setReadOnlyModeEnabled(true); + return c; + }); + // the old bkHttpServiceProvider has an old bookie instance who has been shutdown + // so we need create a new bkHttpServiceProvider2 to contains a new bookie which has created by restart. + BKHttpServiceProvider bkHttpServiceProvider2 = new BKHttpServiceProvider.Builder() + .setBookieServer(serverByIndex(numberOfBookies - 1)) + .setServerConfiguration(baseConf) + .setLedgerManagerFactory(metadataDriver.getLedgerManagerFactory()) + .build(); + HttpEndpointService bookieReadOnlyService2 = bkHttpServiceProvider2 + .provideHttpEndpointService(HttpServer.ApiType.BOOKIE_STATE_READONLY); + + request = new HttpServiceRequest(JsonUtil.toJson(new ReadOnlyState(false)), HttpServer.Method.PUT, null); + response = bookieReadOnlyService2.handle(request); + assertEquals(400, response.getStatusCode()); + + // disable readOnly mode + restartBookies(c -> { + c.setForceReadOnlyBookie(false); + c.setReadOnlyModeEnabled(false); + return c; + }); + bkHttpServiceProvider2 = new BKHttpServiceProvider.Builder() + .setBookieServer(serverByIndex(numberOfBookies - 1)) + .setServerConfiguration(baseConf) + .setLedgerManagerFactory(metadataDriver.getLedgerManagerFactory()) + .build(); + bookieReadOnlyService2 = bkHttpServiceProvider2 + .provideHttpEndpointService(HttpServer.ApiType.BOOKIE_STATE_READONLY); + + request = new HttpServiceRequest(JsonUtil.toJson(new ReadOnlyState(true)), HttpServer.Method.PUT, null); + response = bookieReadOnlyService2.handle(request); + assertEquals(400, response.getStatusCode()); + } + + @Test + public void testSuspendCompaction() throws Exception { + HttpEndpointService suspendCompactionService = bkHttpServiceProvider + .provideHttpEndpointService(HttpServer.ApiType.SUSPEND_GC_COMPACTION); + + HttpEndpointService resumeCompactionService = bkHttpServiceProvider + .provideHttpEndpointService(HttpServer.ApiType.RESUME_GC_COMPACTION); + + //1, PUT with null body, should return error + HttpServiceRequest request1 = new HttpServiceRequest(null, HttpServer.Method.PUT, null); + HttpServiceResponse response1 = suspendCompactionService.handle(request1); + assertEquals(HttpServer.StatusCode.BAD_REQUEST.getValue(), response1.getStatusCode()); + + //2, PUT with null, should return error, because should contains "suspendMajor" or "suspendMinor" + String putBody2 = "{}"; + HttpServiceRequest request2 = new HttpServiceRequest(putBody2, HttpServer.Method.PUT, null); + HttpServiceResponse response2 = suspendCompactionService.handle(request2); + assertEquals(HttpServer.StatusCode.BAD_REQUEST.getValue(), response2.getStatusCode()); + + + //3, GET before suspend, should success + HttpServiceRequest request3 = new HttpServiceRequest(null, HttpServer.Method.GET, null); + HttpServiceResponse response3 = suspendCompactionService.handle(request3); + assertEquals(HttpServer.StatusCode.OK.getValue(), response3.getStatusCode()); + + Map responseMap = JsonUtil.fromJson( + response3.getBody(), + Map.class + ); + assertEquals(responseMap.get("isMajorGcSuspended"), "false"); + assertEquals(responseMap.get("isMinorGcSuspended"), "false"); + + + //2, PUT, with body, should success + String putBody4 = "{\"suspendMajor\": true, \"suspendMinor\": true}"; + HttpServiceRequest request4 = new HttpServiceRequest(putBody4, HttpServer.Method.PUT, null); + HttpServiceResponse response4 = suspendCompactionService.handle(request4); + assertEquals(HttpServer.StatusCode.OK.getValue(), response4.getStatusCode()); + + //3, GET after suspend, should success + HttpServiceRequest request5 = new HttpServiceRequest(null, HttpServer.Method.GET, null); + HttpServiceResponse response5 = suspendCompactionService.handle(request5); + assertEquals(HttpServer.StatusCode.OK.getValue(), response5.getStatusCode()); + + Map responseMap5 = JsonUtil.fromJson( + response5.getBody(), + Map.class + ); + assertEquals(responseMap5.get("isMajorGcSuspended"), "true"); + assertEquals(responseMap5.get("isMinorGcSuspended"), "true"); + + + //2, PUT, with body, should success + String putBody6 = "{\"resumeMajor\": true, \"resumeMinor\": true}"; + HttpServiceRequest request6 = new HttpServiceRequest(putBody6, HttpServer.Method.PUT, null); + HttpServiceResponse response6 = resumeCompactionService.handle(request6); + assertEquals(HttpServer.StatusCode.OK.getValue(), response6.getStatusCode()); + + //3, GET after suspend, should success + HttpServiceRequest request7 = new HttpServiceRequest(null, HttpServer.Method.GET, null); + HttpServiceResponse response7 = suspendCompactionService.handle(request7); + assertEquals(HttpServer.StatusCode.OK.getValue(), response7.getStatusCode()); + + Map responseMap7 = JsonUtil.fromJson( + response7.getBody(), + Map.class + ); + assertEquals(responseMap7.get("isMajorGcSuspended"), "false"); + assertEquals(responseMap7.get("isMinorGcSuspended"), "false"); + } + + @Test + public void testTriggerEntryLocationCompactService() throws Exception { + BookieServer bookieServer = serverByIndex(numberOfBookies - 1); + LedgerStorage spyLedgerStorage = spy(bookieServer.getBookie().getLedgerStorage()); + List dbLocationPath = Lists.newArrayList("/data1/bookkeeper/ledgers/current/locations", + "/data2/bookkeeper/ledgers/current/locations"); + when(spyLedgerStorage.getEntryLocationDBPath()) + .thenReturn(dbLocationPath); + + HashMap statusMap = Maps.newHashMap(); + statusMap.put("/data1/bookkeeper/ledgers/current/locations", false); + statusMap.put("/data2/bookkeeper/ledgers/current/locations", true); + when(spyLedgerStorage.isEntryLocationCompacting(dbLocationPath)) + .thenReturn(statusMap); + + Field ledgerStorageField = bookieServer.getBookie().getClass().getDeclaredField("ledgerStorage"); + ledgerStorageField.setAccessible(true); + ledgerStorageField.set(bookieServer.getBookie(), spyLedgerStorage); + + HttpEndpointService triggerEntryLocationCompactService = bkHttpServiceProvider + .provideHttpEndpointService(HttpServer.ApiType.TRIGGER_ENTRY_LOCATION_COMPACT); + + // 1. Put + // 1.1 Trigger all entry location rocksDB compact, should return OK + HttpServiceRequest request1 = new HttpServiceRequest("{\"entryLocationRocksDBCompact\":true}", + HttpServer.Method.PUT, null); + HttpServiceResponse response1 = triggerEntryLocationCompactService.handle(request1); + assertEquals(HttpServer.StatusCode.OK.getValue(), response1.getStatusCode()); + LOG.info("Get response: {}", response1.getBody()); + + // 1.2 Specified trigger entry location rocksDB compact, should return OK + String body2 = "{\"entryLocationRocksDBCompact\":true,\"entryLocations\"" + + ":\"/data1/bookkeeper/ledgers/current/locations\"}"; + HttpServiceRequest request2 = new HttpServiceRequest(body2, HttpServer.Method.PUT, null); + HttpServiceResponse response2 = triggerEntryLocationCompactService.handle(request2); + assertEquals(HttpServer.StatusCode.OK.getValue(), response2.getStatusCode()); + LOG.info("Get response: {}", response2.getBody()); + assertTrue(response2.getBody().contains("Triggered entry Location RocksDB")); + + // 1.3 Specified invalid entry location rocksDB compact, should return BAD_REQUEST + String body3 = "{\"entryLocationRocksDBCompact\":true,\"entryLocations\"" + + ":\"/invalid1/locations,/data2/bookkeeper/ledgers/current/locations\"}"; + HttpServiceRequest request3 = new HttpServiceRequest(body3, HttpServer.Method.PUT, null); + HttpServiceResponse response3 = triggerEntryLocationCompactService.handle(request3); + assertEquals(HttpServer.StatusCode.BAD_REQUEST.getValue(), response3.getStatusCode()); + LOG.info("Get response: {}", response3.getBody()); + assertTrue(response3.getBody().contains("is invalid")); + + // 1.4 Some rocksDB is running compact, should return OK + String body4 = "{\"entryLocationRocksDBCompact\":true,\"entryLocations\"" + + ":\"/data1/bookkeeper/ledgers/current/locations,/data2/bookkeeper/ledgers/current/locations\"}"; + HttpServiceRequest request4 = new HttpServiceRequest(body4, HttpServer.Method.PUT, null); + HttpServiceResponse response4 = triggerEntryLocationCompactService.handle(request4); + assertEquals(HttpServer.StatusCode.OK.getValue(), response4.getStatusCode()); + LOG.info("Get response: {}", response4.getBody()); + + // 1.5 Put, empty body, should return BAD_REQUEST + HttpServiceRequest request5 = new HttpServiceRequest(null, HttpServer.Method.PUT, null); + HttpServiceResponse response5 = triggerEntryLocationCompactService.handle(request5); + assertEquals(HttpServer.StatusCode.BAD_REQUEST.getValue(), response5.getStatusCode()); + LOG.info("Get response: {}", response5.getBody()); + + // 2. GET, should return OK + HttpServiceRequest request6 = new HttpServiceRequest(null, HttpServer.Method.GET, null); + HttpServiceResponse response6 = triggerEntryLocationCompactService.handle(request6); + assertEquals(HttpServer.StatusCode.OK.getValue(), response6.getStatusCode()); + assertTrue(response6.getBody().contains("\"/data2/bookkeeper/ledgers/current/locations\" : true")); + assertTrue(response6.getBody().contains("\"/data1/bookkeeper/ledgers/current/locations\" : false")); + + // 3. POST, should return NOT_FOUND + HttpServiceRequest request7 = new HttpServiceRequest(null, HttpServer.Method.POST, null); + HttpServiceResponse response7 = triggerEntryLocationCompactService.handle(request7); + assertEquals(HttpServer.StatusCode.METHOD_NOT_ALLOWED.getValue(), response7.getStatusCode()); + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/http/service/AutoRecoveryStatusServiceTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/http/service/AutoRecoveryStatusServiceTest.java new file mode 100644 index 00000000000..69e851f0911 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/http/service/AutoRecoveryStatusServiceTest.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.server.http.service; + +import static org.junit.Assert.assertEquals; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import org.apache.bookkeeper.http.HttpServer; +import org.apache.bookkeeper.http.service.HttpServiceRequest; +import org.apache.bookkeeper.http.service.HttpServiceResponse; +import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.junit.Before; +import org.junit.Test; + +/** + * Unit tests for {@link AutoRecoveryStatusService}. + */ +public class AutoRecoveryStatusServiceTest extends BookKeeperClusterTestCase { + private final ObjectMapper mapper = new ObjectMapper(); + private AutoRecoveryStatusService autoRecoveryStatusService; + public AutoRecoveryStatusServiceTest() { + super(1); + } + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + autoRecoveryStatusService = new AutoRecoveryStatusService(baseConf); + } + + @Test + public void testGetStatus() throws Exception { + HttpServiceRequest request = new HttpServiceRequest(null, HttpServer.Method.GET, null); + HttpServiceResponse response = autoRecoveryStatusService.handle(request); + assertEquals(HttpServer.StatusCode.OK.getValue(), response.getStatusCode()); + JsonNode json = mapper.readTree(response.getBody()); + assertEquals(Boolean.TRUE, json.get("enabled").asBoolean()); + } + + @Test + public void testEnableStatus() throws Exception { + Map params = ImmutableMap.of("enabled", "true"); + HttpServiceRequest request = new HttpServiceRequest(null, HttpServer.Method.PUT, params); + HttpServiceResponse response = autoRecoveryStatusService.handle(request); + assertEquals(HttpServer.StatusCode.OK.getValue(), response.getStatusCode()); + JsonNode json = mapper.readTree(response.getBody()); + assertEquals(Boolean.TRUE, json.get("enabled").asBoolean()); + + request = new HttpServiceRequest(null, HttpServer.Method.GET, params); + response = autoRecoveryStatusService.handle(request); + assertEquals(HttpServer.StatusCode.OK.getValue(), response.getStatusCode()); + json = mapper.readTree(response.getBody()); + assertEquals(Boolean.TRUE, json.get("enabled").asBoolean()); + } + + @Test + public void testDisableStatus() throws Exception { + Map params = ImmutableMap.of("enabled", "false"); + HttpServiceRequest request = new HttpServiceRequest(null, HttpServer.Method.PUT, params); + HttpServiceResponse response = autoRecoveryStatusService.handle(request); + assertEquals(HttpServer.StatusCode.OK.getValue(), response.getStatusCode()); + JsonNode json = mapper.readTree(response.getBody()); + assertEquals(Boolean.FALSE, json.get("enabled").asBoolean()); + + request = new HttpServiceRequest(null, HttpServer.Method.GET, params); + response = autoRecoveryStatusService.handle(request); + assertEquals(HttpServer.StatusCode.OK.getValue(), response.getStatusCode()); + json = mapper.readTree(response.getBody()); + assertEquals(Boolean.FALSE, json.get("enabled").asBoolean()); + } + + @Test + public void testInvalidParams() throws Exception { + Map params = ImmutableMap.of("enable", "false"); + HttpServiceRequest request = new HttpServiceRequest(null, HttpServer.Method.PUT, params); + HttpServiceResponse response = autoRecoveryStatusService.handle(request); + assertEquals(HttpServer.StatusCode.BAD_REQUEST.getValue(), response.getStatusCode()); + } + + @Test + public void testInvalidMethod() throws Exception { + HttpServiceRequest request = new HttpServiceRequest(null, HttpServer.Method.POST, null); + HttpServiceResponse response = autoRecoveryStatusService.handle(request); + assertEquals(HttpServer.StatusCode.NOT_FOUND.getValue(), response.getStatusCode()); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/http/service/ListLedgerServiceTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/http/service/ListLedgerServiceTest.java new file mode 100644 index 00000000000..e1882eb0d91 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/http/service/ListLedgerServiceTest.java @@ -0,0 +1,173 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.server.http.service; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableMap; +import java.util.Base64; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.bookkeeper.bookie.BookieResources; +import org.apache.bookkeeper.client.BookKeeper; +import org.apache.bookkeeper.client.LedgerHandle; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.http.HttpServer; +import org.apache.bookkeeper.http.service.HttpServiceRequest; +import org.apache.bookkeeper.http.service.HttpServiceResponse; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.stats.StatsProvider; +import org.apache.bookkeeper.test.BookKeeperClusterTestCase; +import org.apache.bookkeeper.test.TestStatsProvider; +import org.apache.commons.lang3.RandomUtils; +import org.junit.Before; +import org.junit.Test; + +/** + * Unit tests for {@link ListLedgerService}. + */ +public class ListLedgerServiceTest extends BookKeeperClusterTestCase { + private final ObjectMapper mapper = new ObjectMapper(); + private ListLedgerService listLedgerService; + + public ListLedgerServiceTest() { + super(1); + } + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + StatsProvider provider = new TestStatsProvider(); + listLedgerService = new ListLedgerService(confByIndex(0), + BookieResources.createMetadataDriver(confByIndex(0), + provider.getStatsLogger("")).getLedgerManagerFactory()); + } + + @Test + public void testEmptyList() throws Exception { + HttpServiceResponse response = listLedgerService.handle(new HttpServiceRequest()); + assertEquals(response.getStatusCode(), HttpServer.StatusCode.OK.getValue()); + JsonNode json = mapper.readTree(response.getBody()); + assertEquals(0, json.size()); + } + + @Test + public void testListLedgers() throws Exception { + int ledgerNum = RandomUtils.nextInt(1, 10); + Map ledgers = new HashMap<>(); + for (int i = 0; i < ledgerNum; i++) { + LedgerHandle ledger = bkc.createLedger(1, 1, 1, BookKeeper.DigestType.CRC32, new byte[0]); + ledgers.put(ledger.getId(), ledger.getLedgerMetadata()); + ledger.close(); + } + + HttpServiceResponse response = listLedgerService.handle(new HttpServiceRequest()); + assertEquals(response.getStatusCode(), HttpServer.StatusCode.OK.getValue()); + JsonNode json = mapper.readTree(response.getBody()); + assertEquals(ledgerNum, json.size()); + + json.fieldNames().forEachRemaining(field -> { + assertTrue(ledgers.containsKey(Long.parseLong(field))); + assertTrue(json.get(field).isNull()); + }); + } + + @Test + public void testListLedgersWithMetadata() throws Exception { + int ledgerNum = RandomUtils.nextInt(1, 10); + Map ledgers = new HashMap<>(); + for (int i = 0; i < ledgerNum; i++) { + LedgerHandle ledger = bkc.createLedger(1, 1, 1, BookKeeper.DigestType.CRC32, new byte[0]); + ledger.close(); + ledgers.put(ledger.getId(), ledger.getLedgerMetadata()); + } + + HttpServiceResponse response = listLedgerService.handle(new HttpServiceRequest(null, HttpServer.Method.GET, + ImmutableMap.of("print_metadata", "true"))); + assertEquals(response.getStatusCode(), HttpServer.StatusCode.OK.getValue()); + JsonNode json = mapper.readTree(response.getBody()); + assertEquals(ledgerNum, json.size()); + + json.fieldNames().forEachRemaining(field -> { + LedgerMetadata meta = ledgers.get(Long.parseLong(field)); + assertNotNull(meta); + assertFalse(json.get(field).isNull()); + }); + } + + @Test + public void testListLedgersWithMetadataDecoded() throws Exception { + int ledgerNum = RandomUtils.nextInt(1, 10); + Map ledgers = new HashMap<>(); + for (int i = 0; i < ledgerNum; i++) { + LedgerHandle ledger = bkc.createLedger(1, 1, 1, BookKeeper.DigestType.CRC32, new byte[0], + ImmutableMap.of("test_key", "test_value".getBytes())); + ledger.close(); + ledgers.put(ledger.getId(), ledger.getLedgerMetadata()); + } + + HttpServiceResponse response = listLedgerService.handle(new HttpServiceRequest(null, HttpServer.Method.GET, + ImmutableMap.of("print_metadata", "true", "decode_meta", "true"))); + assertEquals(response.getStatusCode(), HttpServer.StatusCode.OK.getValue()); + JsonNode json = mapper.readTree(response.getBody()); + assertEquals(ledgerNum, json.size()); + + json.fieldNames().forEachRemaining(field -> { + LedgerMetadata meta = ledgers.get(Long.parseLong(field)); + assertNotNull(meta); + JsonNode node = json.get(field); + assertEquals(meta.getMetadataFormatVersion(), node.get("metadataFormatVersion").asInt()); + assertEquals(meta.getEnsembleSize(), node.get("ensembleSize").asInt()); + assertEquals(meta.getWriteQuorumSize(), node.get("writeQuorumSize").asInt()); + assertEquals(meta.getAckQuorumSize(), node.get("ackQuorumSize").asInt()); + assertEquals(meta.getCToken(), node.get("ctoken").asLong()); +// assertEquals(meta.getCtime(), node.get("ctime").asLong()); + assertEquals(meta.getState().name(), node.get("state").asText()); + assertEquals(meta.isClosed(), node.get("closed").asBoolean()); + assertEquals(meta.getLength(), node.get("length").asLong()); + assertEquals(meta.getLastEntryId(), node.get("lastEntryId").asLong()); + assertEquals(meta.getDigestType().name(), node.get("digestType").asText()); + assertEquals(new String(meta.getPassword()), node.get("password").asText()); + + for (Map.Entry entry : meta.getCustomMetadata().entrySet()) { + JsonNode data = node.get("customMetadata").get(entry.getKey()); + assertArrayEquals(entry.getValue(), Base64.getDecoder().decode(data.asText())); + } + + for (Map.Entry> entry : meta.getAllEnsembles().entrySet()) { + JsonNode members = node.get("allEnsembles") + .get(String.valueOf(entry.getKey())); + assertEquals(1, entry.getValue().size()); + assertEquals(entry.getValue().size(), members.size()); + JsonNode member = members.get(0); + assertEquals(entry.getValue().get(0).getId(), member.get("id").asText()); + } + }); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/http/service/MetricsServiceTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/http/service/MetricsServiceTest.java index 3f5e63d60b4..c46dfe29976 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/http/service/MetricsServiceTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/http/service/MetricsServiceTest.java @@ -19,6 +19,7 @@ package org.apache.bookkeeper.server.http.service; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.CALLS_REAL_METHODS; import static org.mockito.Mockito.doAnswer; @@ -55,6 +56,7 @@ public void testForbiddenMethods() throws Exception { HttpServiceRequest request = new HttpServiceRequest().setMethod(Method.PUT); HttpServiceResponse response = service.handle(request); assertEquals(StatusCode.FORBIDDEN.getValue(), response.getStatusCode()); + assertNull(response.getContentType()); assertEquals( "PUT is forbidden. Should be GET method", response.getBody()); @@ -66,6 +68,7 @@ public void testNullStatsProvider() throws Exception { HttpServiceRequest request = new HttpServiceRequest().setMethod(Method.GET); HttpServiceResponse response = service.handle(request); assertEquals(StatusCode.INTERNAL_ERROR.getValue(), response.getStatusCode()); + assertNull(response.getContentType()); assertEquals( "Stats provider is not enabled. Please enable it by set statsProviderClass" + " on bookie configuration", @@ -86,6 +89,7 @@ public void testWriteMetrics() throws Exception { HttpServiceResponse response = service.handle(request); assertEquals(StatusCode.OK.getValue(), response.getStatusCode()); + assertEquals(MetricsService.PROMETHEUS_CONTENT_TYPE_004, response.getContentType()); assertEquals(content, response.getBody()); } @@ -98,6 +102,7 @@ public void testWriteMetricsException() throws Exception { HttpServiceResponse response = service.handle(request); assertEquals(StatusCode.INTERNAL_ERROR.getValue(), response.getStatusCode()); + assertNull(response.getContentType()); assertEquals("Exceptions are thrown when exporting metrics : write-metrics-exception", response.getBody()); } @@ -111,6 +116,7 @@ public void testWriteMetricsUnimplemented() throws Exception { HttpServiceResponse response = service.handle(request); assertEquals(StatusCode.INTERNAL_ERROR.getValue(), response.getStatusCode()); + assertNull(response.getContentType()); assertEquals("Currently stats provider doesn't support exporting metrics in http service", response.getBody()); } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/http/service/TriggerGCServiceTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/http/service/TriggerGCServiceTest.java new file mode 100644 index 00000000000..dca0466e563 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/server/http/service/TriggerGCServiceTest.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.server.http.service; + +import static org.junit.Assert.assertEquals; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.RETURNS_DEEP_STUBS; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.LedgerStorage; +import org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorage; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.http.HttpServer; +import org.apache.bookkeeper.http.service.HttpServiceRequest; +import org.apache.bookkeeper.http.service.HttpServiceResponse; +import org.apache.bookkeeper.proto.BookieServer; +import org.junit.Before; +import org.junit.Test; + + +/** + * Unit test for {@link TriggerGCService}. + */ +@Slf4j +public class TriggerGCServiceTest { + private TriggerGCService service; + private BookieServer mockBookieServer; + private LedgerStorage mockLedgerStorage; + + @Before + public void setup() { + this.mockBookieServer = mock(BookieServer.class, RETURNS_DEEP_STUBS); + this.mockLedgerStorage = mock(DbLedgerStorage.class); + when(mockBookieServer.getBookie().getLedgerStorage()).thenReturn(mockLedgerStorage); + when(mockLedgerStorage.isInForceGC()).thenReturn(false); + when(mockLedgerStorage.isMajorGcSuspended()).thenReturn(false); + when(mockLedgerStorage.isMinorGcSuspended()).thenReturn(false); + this.service = new TriggerGCService(new ServerConfiguration(), mockBookieServer); + } + + @Test + public void testHandleRequest() throws Exception { + + // test empty put body + HttpServiceRequest request = new HttpServiceRequest(); + request.setMethod(HttpServer.Method.PUT); + HttpServiceResponse resp = service.handle(request); + assertEquals(HttpServer.StatusCode.OK.getValue(), resp.getStatusCode()); + assertEquals("\"Triggered GC on BookieServer: " + mockBookieServer.getBookieId() + "\"", + resp.getBody()); + + // test invalid put json body + request = new HttpServiceRequest(); + request.setMethod(HttpServer.Method.PUT); + request.setBody("test"); + resp = service.handle(request); + assertEquals(HttpServer.StatusCode.BAD_REQUEST.getValue(), resp.getStatusCode()); + assertEquals("Failed to handle the request, exception: Failed to deserialize Object from Json string", + resp.getBody()); + + // test forceMajor and forceMinor not set + request = new HttpServiceRequest(); + request.setMethod(HttpServer.Method.PUT); + request.setBody("{\"test\":1}"); + resp = service.handle(request); + verify(mockLedgerStorage, times(1)).forceGC(eq(true), eq(true)); + assertEquals(HttpServer.StatusCode.OK.getValue(), resp.getStatusCode()); + assertEquals("\"Triggered GC on BookieServer: " + mockBookieServer.getBookieId() + "\"", + resp.getBody()); + + // test forceMajor set, but forceMinor not set + request = new HttpServiceRequest(); + request.setMethod(HttpServer.Method.PUT); + request.setBody("{\"test\":1,\"forceMajor\":true}"); + resp = service.handle(request); + verify(mockLedgerStorage, times(2)).forceGC(eq(true), eq(true)); + assertEquals(HttpServer.StatusCode.OK.getValue(), resp.getStatusCode()); + assertEquals("\"Triggered GC on BookieServer: " + mockBookieServer.getBookieId() + "\"", + resp.getBody()); + + // test forceMajor set, but forceMinor not set + request = new HttpServiceRequest(); + request.setMethod(HttpServer.Method.PUT); + request.setBody("{\"test\":1,\"forceMajor\":\"true\"}"); + resp = service.handle(request); + verify(mockLedgerStorage, times(3)).forceGC(eq(true), eq(true)); + assertEquals(HttpServer.StatusCode.OK.getValue(), resp.getStatusCode()); + assertEquals("\"Triggered GC on BookieServer: " + mockBookieServer.getBookieId() + "\"", + resp.getBody()); + + // test forceMajor set to false, and forMinor not set + request = new HttpServiceRequest(); + request.setMethod(HttpServer.Method.PUT); + request.setBody("{\"test\":1,\"forceMajor\":false}"); + resp = service.handle(request); + verify(mockLedgerStorage, times(1)).forceGC(eq(false), eq(true)); + assertEquals(HttpServer.StatusCode.OK.getValue(), resp.getStatusCode()); + assertEquals("\"Triggered GC on BookieServer: " + mockBookieServer.getBookieId() + "\"", + resp.getBody()); + + // test forceMajor not set and forMinor set + request = new HttpServiceRequest(); + request.setMethod(HttpServer.Method.PUT); + request.setBody("{\"test\":1,\"forceMinor\":true}"); + resp = service.handle(request); + verify(mockLedgerStorage, times(4)).forceGC(eq(true), eq(true)); + assertEquals(HttpServer.StatusCode.OK.getValue(), resp.getStatusCode()); + assertEquals("\"Triggered GC on BookieServer: " + mockBookieServer.getBookieId() + "\"", + resp.getBody()); + + // test get gc + request = new HttpServiceRequest(); + request.setMethod(HttpServer.Method.GET); + resp = service.handle(request); + assertEquals(HttpServer.StatusCode.OK.getValue(), resp.getStatusCode()); + assertEquals("{\n \"is_in_force_gc\" : \"false\"\n}", resp.getBody()); + + // test invalid method type + request = new HttpServiceRequest(); + request.setMethod(HttpServer.Method.POST); + resp = service.handle(request); + assertEquals(HttpServer.StatusCode.METHOD_NOT_ALLOWED.getValue(), resp.getStatusCode()); + assertEquals("Not allowed method. Should be PUT to trigger GC, Or GET to get Force GC state.", + resp.getBody()); + } + +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/suites/BookKeeperClusterTestSuite.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/suites/BookKeeperClusterTestSuite.java new file mode 100644 index 00000000000..98d360d0166 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/suites/BookKeeperClusterTestSuite.java @@ -0,0 +1,151 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.suites; + +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import io.netty.buffer.PooledByteBufAllocator; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.MockUncleanShutdownDetection; +import org.apache.bookkeeper.bookie.TestBookieImpl; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.common.net.ServiceURI; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.proto.BookieServer; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.test.TestStatsProvider; +import org.apache.bookkeeper.util.IOUtils; +import org.apache.bookkeeper.util.PortManager; +import org.apache.commons.io.FileUtils; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +/** + * A class runs a bookkeeper cluster for testing. + * + *

          The cluster will be setup and teardown before class. It will not be restarted between different + * test methods. It is more suitable for running tests that don't require restarting bookies. + */ +@Slf4j +public abstract class BookKeeperClusterTestSuite { + + protected static MetadataStore metadataStore; + protected static ClientConfiguration baseClientConf; + protected static ServerConfiguration baseServerConf; + protected static final int NUM_BOOKIES = 3; + protected static final List BOOKIES = new ArrayList<>(NUM_BOOKIES); + protected static final List TMP_DIRS = new ArrayList<>(NUM_BOOKIES); + + protected static File createTempDir(String prefix, String suffix) throws IOException { + File dir = IOUtils.createTempDir(prefix, suffix); + TMP_DIRS.add(dir); + return dir; + } + + protected static ServerConfiguration newServerConfiguration() throws Exception { + File f = createTempDir("bookie", "test"); + int port = PortManager.nextFreePort(); + return newServerConfiguration(port, f, new File[] { f }); + } + + protected static ServerConfiguration newServerConfiguration(int port, File journalDir, File[] ledgerDirs) { + ServerConfiguration conf = new ServerConfiguration(baseServerConf); + conf.setBookiePort(port); + conf.setJournalDirName(journalDir.getPath()); + String[] ledgerDirNames = new String[ledgerDirs.length]; + for (int i = 0; i < ledgerDirs.length; i++) { + ledgerDirNames[i] = ledgerDirs[i].getPath(); + } + conf.setLedgerDirNames(ledgerDirNames); + conf.setEnableTaskExecutionStats(true); + return conf; + } + + @BeforeClass + public static void setUpCluster() throws Exception { + setUpCluster(NUM_BOOKIES); + } + + protected static void setUpCluster(int numBookies) throws Exception { + // set up the metadata store + metadataStore = new ZKMetadataStore(); + metadataStore.start(); + ServiceURI uri = metadataStore.getServiceUri(); + log.info("Setting up cluster at service uri : {}", uri.getUri()); + + baseClientConf = new ClientConfiguration() + .setMetadataServiceUri(uri.getUri().toString()); + baseServerConf = TestBKConfiguration.newServerConfiguration() + .setMetadataServiceUri(uri.getUri().toString()); + + // format the cluster + assertTrue(BookKeeperAdmin.format(baseServerConf, false, true)); + + // start bookies + startNumBookies(numBookies); + } + + private static void startNumBookies(int numBookies) throws Exception { + for (int i = 0; i < numBookies; i++) { + ServerConfiguration conf = newServerConfiguration(); + log.info("Starting new bookie on port : {}", conf.getBookiePort()); + BookieServer server = startBookie(conf); + synchronized (BOOKIES) { + BOOKIES.add(server); + } + } + } + + private static BookieServer startBookie(ServerConfiguration conf) throws Exception { + conf.setAutoRecoveryDaemonEnabled(true); + BookieServer server = new BookieServer( conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, PooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); + server.start(); + return server; + } + + @AfterClass + public static void tearDownCluster() throws Exception { + // stop bookies + stopBookies(); + + // stop metadata store + metadataStore.close(); + log.info("Stopped the metadata store."); + + // clean up temp dirs + for (File f : TMP_DIRS) { + FileUtils.deleteDirectory(f); + } + log.info("Clean up all the temp directories."); + } + + private static void stopBookies() { + synchronized (BOOKIES) { + BOOKIES.forEach(BookieServer::shutdown); + log.info("Stopped all the bookies."); + } + } + + + +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/suites/MetadataStore.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/suites/MetadataStore.java new file mode 100644 index 00000000000..853dee5a22e --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/suites/MetadataStore.java @@ -0,0 +1,41 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.suites; + +import org.apache.bookkeeper.common.net.ServiceURI; + +/** + * A metadata store interface to start up and teardown a metadata service. + */ +public interface MetadataStore extends AutoCloseable { + + /** + * Start the metadata store. + */ + void start() throws Exception; + + /** + * Close the metadata store. + */ + void close() throws Exception; + + /** + * Return the service uri for bookies and clients to use. + * + * @return the service uri. + */ + ServiceURI getServiceUri(); + +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/suites/ZKMetadataStore.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/suites/ZKMetadataStore.java new file mode 100644 index 00000000000..5e34f2dbe97 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/suites/ZKMetadataStore.java @@ -0,0 +1,45 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.suites; + +import org.apache.bookkeeper.common.net.ServiceURI; +import org.apache.bookkeeper.test.ZooKeeperUtil; + +/** + * Start the zookeeper based metadata store. + */ +class ZKMetadataStore implements MetadataStore { + + private final ZooKeeperUtil zkUtil; + + ZKMetadataStore() { + this.zkUtil = new ZooKeeperUtil(); + } + + @Override + public void start() throws Exception { + this.zkUtil.startCluster(); + } + + @Override + public void close() throws Exception { + this.zkUtil.killCluster(); + } + + @Override + public ServiceURI getServiceUri() { + return ServiceURI.create(zkUtil.getMetadataServiceUri()); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/AsyncLedgerOpsTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/AsyncLedgerOpsTest.java index 1f8717b6ff0..b5b151e622a 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/AsyncLedgerOpsTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/AsyncLedgerOpsTest.java @@ -29,7 +29,6 @@ import java.util.Enumeration; import java.util.Random; import java.util.Set; - import org.apache.bookkeeper.client.AsyncCallback.AddCallback; import org.apache.bookkeeper.client.AsyncCallback.CloseCallback; import org.apache.bookkeeper.client.AsyncCallback.CreateCallback; @@ -134,7 +133,9 @@ public void testAsyncCreateClose() throws IOException, BKException { // wait for all entries to be acknowledged synchronized (sync) { while (sync.counter < numEntriesToWrite) { - LOG.debug("Entries counter = " + sync.counter); + if (LOG.isDebugEnabled()) { + LOG.debug("Entries counter = " + sync.counter); + } sync.wait(); } } @@ -155,7 +156,9 @@ public void testAsyncCreateClose() throws IOException, BKException { } lh = ctx.getLh(); - LOG.debug("Number of entries written: " + lh.getLastAddConfirmed()); + if (LOG.isDebugEnabled()) { + LOG.debug("Number of entries written: " + lh.getLastAddConfirmed()); + } assertTrue("Verifying number of entries written", lh.getLastAddConfirmed() == (numEntriesToWrite - 1)); // read entries @@ -167,7 +170,9 @@ public void testAsyncCreateClose() throws IOException, BKException { } } - LOG.debug("*** READ COMPLETE ***"); + if (LOG.isDebugEnabled()) { + LOG.debug("*** READ COMPLETE ***"); + } // at this point, Enumeration ls is filled with the returned // values @@ -177,13 +182,14 @@ public void testAsyncCreateClose() throws IOException, BKException { Integer origEntry = origbb.getInt(); byte[] entry = ls.nextElement().getEntry(); ByteBuffer result = ByteBuffer.wrap(entry); - LOG.debug("Length of result: " + result.capacity()); - LOG.debug("Original entry: " + origEntry); - Integer retrEntry = result.getInt(); - LOG.debug("Retrieved entry: " + retrEntry); + if (LOG.isDebugEnabled()) { + LOG.debug("Length of result: " + result.capacity()); + LOG.debug("Original entry: " + origEntry); + LOG.debug("Retrieved entry: " + retrEntry); + } assertTrue("Checking entry " + i + " for equality", origEntry.equals(retrEntry)); - assertTrue("Checking entry " + i + " for size", entry.length == entriesSize.get(i).intValue()); + assertTrue("Checking entry " + i + " for size", entry.length == entriesSize.get(i)); i++; } assertTrue("Checking number of read entries", i == numEntriesToWrite); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookKeeperClusterTestCase.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookKeeperClusterTestCase.java index e42b8187fb8..b73a3ee7b44 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookKeeperClusterTestCase.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookKeeperClusterTestCase.java @@ -21,45 +21,72 @@ package org.apache.bookkeeper.test; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_SCOPE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LD_INDEX_SCOPE; +import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LD_LEDGER_SCOPE; import static org.apache.bookkeeper.util.BookKeeperConstants.AVAILABLE_NODE; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; import com.google.common.base.Stopwatch; import java.io.File; import java.io.IOException; import java.net.UnknownHostException; -import java.util.HashMap; +import java.util.ArrayList; +import java.util.Arrays; import java.util.LinkedList; import java.util.List; -import java.util.Map; -import java.util.Map.Entry; +import java.util.Optional; +import java.util.OptionalInt; import java.util.concurrent.CountDownLatch; import java.util.concurrent.Future; import java.util.concurrent.SynchronousQueue; import java.util.concurrent.TimeUnit; - +import java.util.function.Function; +import java.util.stream.Collectors; import org.apache.bookkeeper.bookie.Bookie; import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.BookieImpl; +import org.apache.bookkeeper.bookie.BookieResources; +import org.apache.bookkeeper.bookie.LedgerDirsManager; +import org.apache.bookkeeper.bookie.LedgerStorage; +import org.apache.bookkeeper.bookie.LegacyCookieValidation; +import org.apache.bookkeeper.bookie.MockUncleanShutdownDetection; +import org.apache.bookkeeper.bookie.ReadOnlyBookie; +import org.apache.bookkeeper.bookie.UncleanShutdownDetection; +import org.apache.bookkeeper.bookie.UncleanShutdownDetectionImpl; import org.apache.bookkeeper.client.BookKeeperTestClient; +import org.apache.bookkeeper.common.allocator.ByteBufAllocatorWithOomHandler; +import org.apache.bookkeeper.common.allocator.PoolingPolicy; import org.apache.bookkeeper.conf.AbstractConfiguration; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.discover.BookieServiceInfo; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.LedgerManagerFactory; +import org.apache.bookkeeper.meta.MetadataBookieDriver; import org.apache.bookkeeper.meta.zk.ZKMetadataDriverBase; import org.apache.bookkeeper.metastore.InMemoryMetaStore; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.proto.BookieServer; import org.apache.bookkeeper.replication.Auditor; import org.apache.bookkeeper.replication.AutoRecoveryMain; -import org.apache.bookkeeper.replication.ReplicationException.CompatibilityException; -import org.apache.bookkeeper.replication.ReplicationException.UnavailableException; -import org.apache.bookkeeper.util.IOUtils; -import org.apache.commons.io.FileUtils; +import org.apache.bookkeeper.replication.ReplicationWorker; +import org.apache.bookkeeper.server.Main; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.bookkeeper.stats.ThreadRegistry; +import org.apache.bookkeeper.util.DiskChecker; +import org.apache.bookkeeper.util.PortManager; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.ZooKeeper; import org.junit.After; import org.junit.Before; import org.junit.Rule; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.TestInfo; import org.junit.rules.TestName; import org.junit.rules.Timeout; import org.slf4j.Logger; @@ -78,18 +105,20 @@ public abstract class BookKeeperClusterTestCase { @Rule public final Timeout globalTimeout; + protected String testName; + // Metadata service related variables - protected final ZooKeeperUtil zkUtil = new ZooKeeperUtil(); + protected final ZooKeeperCluster zkUtil; protected ZooKeeper zkc; protected String metadataServiceUri; // BookKeeper related variables - protected final List tmpDirs = new LinkedList(); - protected final List bs = new LinkedList(); - protected final List bsConfs = new LinkedList(); - private final Map bsLoggers = new HashMap<>(); + protected final TmpDirs tmpDirs = new TmpDirs(); + protected final List servers = new LinkedList<>(); + protected int numBookies; protected BookKeeperTestClient bkc; + protected boolean useUUIDasBookieId = true; /* * Loopback interface is set as the listening interface and allowloopback is @@ -97,9 +126,8 @@ public abstract class BookKeeperClusterTestCase { * bind to loopback address. */ protected final ServerConfiguration baseConf = TestBKConfiguration.newServerConfiguration(); - protected final ClientConfiguration baseClientConf = new ClientConfiguration(); - - private final Map autoRecoveryProcesses = new HashMap<>(); + protected final ClientConfiguration baseClientConf = TestBKConfiguration.newClientConfiguration(); + private final ByteBufAllocatorWithOomHandler allocator = BookieResources.createAllocator(baseConf); private boolean isAutoRecoveryEnabled; @@ -108,7 +136,7 @@ protected void captureThrowable(Runnable c) { try { c.run(); } catch (Throwable e) { - LOG.error("Captured error: {}", e); + LOG.error("Captured error: ", e); asyncExceptions.add(e); } } @@ -118,15 +146,39 @@ public BookKeeperClusterTestCase(int numBookies) { } public BookKeeperClusterTestCase(int numBookies, int testTimeoutSecs) { + this(numBookies, 1, testTimeoutSecs); + } + + public BookKeeperClusterTestCase(int numBookies, int numOfZKNodes, int testTimeoutSecs) { this.numBookies = numBookies; this.globalTimeout = Timeout.seconds(testTimeoutSecs); + if (numOfZKNodes == 1) { + zkUtil = new ZooKeeperUtil(); + } else { + try { + zkUtil = new ZooKeeperClusterUtil(numOfZKNodes); + } catch (IOException | KeeperException | InterruptedException e) { + throw new RuntimeException(e); + } + } } @Before + @BeforeEach public void setUp() throws Exception { setUp("/ledgers"); } + @Before + public void setTestNameJunit4() { + testName = runtime.getMethodName(); + } + + @BeforeEach + void setTestNameJunit5(TestInfo testInfo) { + testName = testInfo.getDisplayName(); + } + protected void setUp(String ledgersRootPath) throws Exception { LOG.info("Setting up test {}", getClass()); InMemoryMetaStore.reset(); @@ -141,7 +193,7 @@ protected void setUp(String ledgersRootPath) throws Exception { this.metadataServiceUri = getMetadataServiceUri(ledgersRootPath); startBKCluster(metadataServiceUri); LOG.info("Setup testcase {} @ metadata service {} in {} ms.", - runtime.getMethodName(), metadataServiceUri, sw.elapsed(TimeUnit.MILLISECONDS)); + testName, metadataServiceUri, sw.elapsed(TimeUnit.MILLISECONDS)); } catch (Exception e) { LOG.error("Error setting up", e); throw e; @@ -153,13 +205,14 @@ protected String getMetadataServiceUri(String ledgersRootPath) { } @After + @AfterEach public void tearDown() throws Exception { boolean failed = false; for (Throwable e : asyncExceptions) { - LOG.error("Got async exception: {}", e); + LOG.error("Got async exception: ", e); failed = true; } - assertTrue("Async failure", !failed); + assertFalse("Async failure", failed); Stopwatch sw = Stopwatch.createStarted(); LOG.info("TearDown"); Exception tearDownException = null; @@ -179,21 +232,20 @@ public void tearDown() throws Exception { } // cleanup temp dirs try { - cleanupTempDirs(); + tmpDirs.cleanup(); } catch (Exception e) { LOG.error("Got Exception while trying to cleanupTempDirs", e); tearDownException = e; } - LOG.info("Tearing down test {} in {} ms.", runtime.getMethodName(), sw.elapsed(TimeUnit.MILLISECONDS)); + LOG.info("Tearing down test {} in {} ms.", testName, sw.elapsed(TimeUnit.MILLISECONDS)); if (tearDownException != null) { throw tearDownException; } } - protected File createTempDir(String prefix, String suffix) throws IOException { - File dir = IOUtils.createTempDir(prefix, suffix); - tmpDirs.add(dir); - return dir; + @After + public void clearMetricsThreadRegistry() throws Exception { + ThreadRegistry.clear(); } /** @@ -202,7 +254,7 @@ protected File createTempDir(String prefix, String suffix) throws IOException { * @throws Exception */ protected void startZKCluster() throws Exception { - zkUtil.startServer(); + zkUtil.startCluster(); zkc = zkUtil.getZooKeeperClient(); } @@ -212,7 +264,7 @@ protected void startZKCluster() throws Exception { * @throws Exception */ protected void stopZKCluster() throws Exception { - zkUtil.killServer(); + zkUtil.killCluster(); } /** @@ -224,6 +276,7 @@ protected void stopZKCluster() throws Exception { protected void startBKCluster(String metadataServiceUri) throws Exception { baseConf.setMetadataServiceUri(metadataServiceUri); baseClientConf.setMetadataServiceUri(metadataServiceUri); + baseClientConf.setAllocatorPoolingPolicy(PoolingPolicy.UnpooledHeap); if (numBookies > 0) { bkc = new BookKeeperTestClient(baseClientConf, new TestStatsProvider()); } @@ -245,27 +298,15 @@ protected void stopBKCluster() throws Exception { bkc.close(); } - for (BookieServer server : bs) { - server.shutdown(); - AutoRecoveryMain autoRecovery = autoRecoveryProcesses.get(server); - if (autoRecovery != null && isAutoRecoveryEnabled()) { - autoRecovery.shutdown(); - LOG.debug("Shutdown auto recovery for bookieserver:" - + server.getLocalAddress()); - } - } - bs.clear(); - bsLoggers.clear(); - } - - protected void cleanupTempDirs() throws Exception { - for (File f : tmpDirs) { - FileUtils.deleteDirectory(f); + stopReplicationService(); + for (ServerTester t : servers) { + t.shutdown(); } + servers.clear(); } protected ServerConfiguration newServerConfiguration() throws Exception { - File f = createTempDir("bookie", "test"); + File f = tmpDirs.createNew("bookie", "test"); int port; if (baseConf.isEnableLocalTransport() || !baseConf.getAllowEphemeralPorts()) { @@ -277,7 +318,7 @@ protected ServerConfiguration newServerConfiguration() throws Exception { } protected ClientConfiguration newClientConfiguration() { - return new ClientConfiguration(baseConf); + return new ClientConfiguration(baseClientConf); } protected ServerConfiguration newServerConfiguration(int port, File journalDir, File[] ledgerDirs) { @@ -290,57 +331,113 @@ protected ServerConfiguration newServerConfiguration(int port, File journalDir, } conf.setLedgerDirNames(ledgerDirNames); conf.setEnableTaskExecutionStats(true); + conf.setAllocatorPoolingPolicy(PoolingPolicy.UnpooledHeap); return conf; } protected void stopAllBookies() throws Exception { - for (BookieServer server : bs) { - server.shutdown(); + stopAllBookies(true); + } + + protected void stopAllBookies(boolean shutdownClient) throws Exception { + stopReplicationService(); + for (ServerTester t : servers) { + t.shutdown(); } - bsConfs.clear(); - bs.clear(); - if (bkc != null) { + servers.clear(); + if (shutdownClient && bkc != null) { bkc.close(); bkc = null; } } - protected void startAllBookies() throws Exception { - for (ServerConfiguration conf : bsConfs) { - bs.add(startBookie(conf)); - } - } - protected String newMetadataServiceUri(String ledgersRootPath) { return zkUtil.getMetadataServiceUri(ledgersRootPath); } + protected String newMetadataServiceUri(String ledgersRootPath, String type) { + return zkUtil.getMetadataServiceUri(ledgersRootPath, type); + } + /** * Get bookie address for bookie at index. */ - public BookieSocketAddress getBookie(int index) throws Exception { - if (bs.size() <= index || index < 0) { - throw new IllegalArgumentException("Invalid index, there are only " + bs.size() - + " bookies. Asked for " + index); + public BookieId getBookie(int index) throws Exception { + return servers.get(index).getServer().getBookieId(); + } + + protected List bookieAddresses() throws Exception { + List bookieIds = new ArrayList<>(); + for (ServerTester a : servers) { + bookieIds.add(a.getServer().getBookieId()); } - return bs.get(index).getLocalAddress(); + return bookieIds; } - /** - * Get bookie configuration for bookie. - */ - public ServerConfiguration getBkConf(BookieSocketAddress addr) throws Exception { - int bkIndex = 0; - for (BookieServer server : bs) { - if (server.getLocalAddress().equals(addr)) { - break; + protected List bookieLedgerDirs() throws Exception { + return servers.stream() + .flatMap(t -> Arrays.stream(t.getConfiguration().getLedgerDirs())) + .collect(Collectors.toList()); + } + + protected List bookieJournalDirs() throws Exception { + return servers.stream() + .flatMap(t -> Arrays.stream(t.getConfiguration().getJournalDirs())) + .collect(Collectors.toList()); + } + + protected BookieId addressByIndex(int index) throws Exception { + return servers.get(index).getServer().getBookieId(); + } + + protected BookieServer serverByIndex(int index) throws Exception { + return servers.get(index).getServer(); + } + + protected ServerConfiguration confByIndex(int index) throws Exception { + return servers.get(index).getConfiguration(); + } + + private Optional byAddress(BookieId addr) throws UnknownHostException { + for (ServerTester s : servers) { + if (s.getServer().getBookieId().equals(addr)) { + return Optional.of(s); } - ++bkIndex; } - if (bkIndex < bs.size()) { - return bsConfs.get(bkIndex); + return Optional.empty(); + } + + protected int indexOfServer(BookieServer b) throws Exception { + for (int i = 0; i < servers.size(); i++) { + if (servers.get(i).getServer().equals(b)) { + return i; + } } - return null; + return -1; + } + + protected int lastBookieIndex() { + return servers.size() - 1; + } + + protected int bookieCount() { + return servers.size(); + } + + private OptionalInt indexByAddress(BookieId addr) throws UnknownHostException { + for (int i = 0; i < servers.size(); i++) { + if (addr.equals(servers.get(i).getServer().getBookieId())) { + return OptionalInt.of(i); + } + } + return OptionalInt.empty(); + } + + /** + * Get bookie configuration for bookie. + */ + public ServerConfiguration getBkConf(BookieId addr) throws Exception { + return byAddress(addr).get().getConfiguration(); } /** @@ -352,22 +449,17 @@ public ServerConfiguration getBkConf(BookieSocketAddress addr) throws Exception * @return the configuration of killed bookie * @throws InterruptedException */ - public ServerConfiguration killBookie(BookieSocketAddress addr) throws Exception { - BookieServer toRemove = null; - int toRemoveIndex = 0; - for (BookieServer server : bs) { - if (server.getLocalAddress().equals(addr)) { - server.shutdown(); - toRemove = server; - break; + public ServerConfiguration killBookie(BookieId addr) throws Exception { + Optional tester = byAddress(addr); + if (tester.isPresent()) { + if (tester.get().autoRecovery != null + && tester.get().autoRecovery.getAuditor() != null + && tester.get().autoRecovery.getAuditor().isRunning()) { + LOG.warn("Killing bookie {} who is the current Auditor", addr); } - ++toRemoveIndex; - } - if (toRemove != null) { - stopAutoRecoveryService(toRemove); - bs.remove(toRemove); - bsLoggers.remove(addr); - return bsConfs.remove(toRemoveIndex); + servers.remove(tester.get()); + tester.get().shutdown(); + return tester.get().getConfiguration(); } return null; } @@ -379,12 +471,10 @@ public ServerConfiguration killBookie(BookieSocketAddress addr) throws Exception * Socket Address * @throws InterruptedException */ - public void setBookieToReadOnly(BookieSocketAddress addr) throws InterruptedException, UnknownHostException { - for (BookieServer server : bs) { - if (server.getLocalAddress().equals(addr)) { - server.getBookie().getStateManager().doTransitionToReadOnlyMode(); - break; - } + public void setBookieToReadOnly(BookieId addr) throws Exception { + Optional tester = byAddress(addr); + if (tester.isPresent()) { + tester.get().getServer().getBookie().getStateManager().transitionToReadOnlyMode().get(); } } @@ -399,15 +489,9 @@ public void setBookieToReadOnly(BookieSocketAddress addr) throws InterruptedExce * @throws IOException */ public ServerConfiguration killBookie(int index) throws Exception { - if (index >= bs.size()) { - throw new IOException("Bookie does not exist"); - } - BookieServer server = bs.get(index); - server.shutdown(); - stopAutoRecoveryService(server); - bs.remove(server); - bsLoggers.remove(server.getLocalAddress()); - return bsConfs.remove(index); + ServerTester tester = servers.remove(index); + tester.shutdown(); + return tester.getConfiguration(); } /** @@ -418,13 +502,10 @@ public ServerConfiguration killBookie(int index) throws Exception { * @return configuration of killed bookie */ public ServerConfiguration killBookieAndWaitForZK(int index) throws Exception { - if (index >= bs.size()) { - throw new IOException("Bookie does not exist"); - } - BookieServer server = bs.get(index); + ServerTester tester = servers.get(index); // IKTODO: this method is awful ServerConfiguration ret = killBookie(index); while (zkc.exists(ZKMetadataDriverBase.resolveZkLedgersRootPath(baseConf) + "/" + AVAILABLE_NODE + "/" - + server.getLocalAddress().toString(), false) != null) { + + tester.getServer().getBookieId().toString(), false) != null) { Thread.sleep(500); } return ret; @@ -441,31 +522,31 @@ public ServerConfiguration killBookieAndWaitForZK(int index) throws Exception { * @throws InterruptedException * @throws IOException */ - public CountDownLatch sleepBookie(BookieSocketAddress addr, final int seconds) + public CountDownLatch sleepBookie(BookieId addr, final int seconds) throws Exception { - for (final BookieServer bookie : bs) { - if (bookie.getLocalAddress().equals(addr)) { - final CountDownLatch l = new CountDownLatch(1); - Thread sleeper = new Thread() { - @Override - public void run() { - try { - bookie.suspendProcessing(); - LOG.info("bookie {} is asleep", bookie.getLocalAddress()); - l.countDown(); - Thread.sleep(seconds * 1000); - bookie.resumeProcessing(); - LOG.info("bookie {} is awake", bookie.getLocalAddress()); - } catch (Exception e) { - LOG.error("Error suspending bookie", e); - } + Optional tester = byAddress(addr); + if (tester.isPresent()) { + CountDownLatch latch = new CountDownLatch(1); + Thread sleeper = new Thread() { + @Override + public void run() { + try { + tester.get().getServer().suspendProcessing(); + LOG.info("bookie {} is asleep", tester.get().getAddress()); + latch.countDown(); + Thread.sleep(seconds * 1000); + tester.get().getServer().resumeProcessing(); + LOG.info("bookie {} is awake", tester.get().getAddress()); + } catch (Exception e) { + LOG.error("Error suspending bookie", e); } - }; - sleeper.start(); - return l; - } + } + }; + sleeper.start(); + return latch; + } else { + throw new IOException("Bookie not found"); } - throw new IOException("Bookie not found"); } /** @@ -478,19 +559,20 @@ public void run() { * @throws InterruptedException * @throws IOException */ - public void sleepBookie(BookieSocketAddress addr, final CountDownLatch l) + public void sleepBookie(BookieId addr, final CountDownLatch l) throws InterruptedException, IOException { final CountDownLatch suspendLatch = new CountDownLatch(1); sleepBookie(addr, l, suspendLatch); suspendLatch.await(); } - public void sleepBookie(BookieSocketAddress addr, final CountDownLatch l, final CountDownLatch suspendLatch) + public void sleepBookie(BookieId addr, final CountDownLatch l, final CountDownLatch suspendLatch) throws InterruptedException, IOException { - for (final BookieServer bookie : bs) { - if (bookie.getLocalAddress().equals(addr)) { - LOG.info("Sleep bookie {}.", addr); - Thread sleeper = new Thread() { + Optional tester = byAddress(addr); + if (tester.isPresent()) { + BookieServer bookie = tester.get().getServer(); + LOG.info("Sleep bookie {}.", addr); + Thread sleeper = new Thread() { @Override public void run() { try { @@ -505,11 +587,10 @@ public void run() { } } }; - sleeper.start(); - return; - } + sleeper.start(); + } else { + throw new IOException("Bookie not found"); } - throw new IOException("Bookie not found"); } /** @@ -523,7 +604,7 @@ public void run() { */ public void restartBookies() throws Exception { - restartBookies(null); + restartBookies(c -> c); } /** @@ -536,57 +617,32 @@ public void restartBookies() * @throws KeeperException * @throws BookieException */ - public void restartBookie(BookieSocketAddress addr) throws Exception { - BookieServer toRemove = null; - int toRemoveIndex = 0; - for (BookieServer server : bs) { - if (server.getLocalAddress().equals(addr)) { - toRemove = server; - break; - } - ++toRemoveIndex; - } - if (toRemove != null) { - ServerConfiguration newConfig = bsConfs.get(toRemoveIndex); - killBookie(toRemoveIndex); + public void restartBookie(BookieId addr) throws Exception { + OptionalInt toRemove = indexByAddress(addr); + if (toRemove.isPresent()) { + ServerConfiguration newConfig = killBookie(toRemove.getAsInt()); Thread.sleep(1000); - bs.add(startBookie(newConfig)); - bsConfs.add(newConfig); - return; + startAndAddBookie(newConfig); + } else { + throw new IOException("Bookie not found"); } - throw new IOException("Bookie not found"); } - /** - * Restart bookie servers using new configuration settings. Also restart the - * respective auto recovery process, if isAutoRecoveryEnabled is true. - * - * @param newConf - * New Configuration Settings - * @throws InterruptedException - * @throws IOException - * @throws KeeperException - * @throws BookieException - */ - public void restartBookies(ServerConfiguration newConf) + public void restartBookies(Function reconfFunction) throws Exception { // shut down bookie server - for (BookieServer server : bs) { + List confs = new ArrayList<>(); + stopReplicationService(); + for (ServerTester server : servers) { server.shutdown(); - stopAutoRecoveryService(server); + confs.add(server.getConfiguration()); } - bs.clear(); - bsLoggers.clear(); + servers.clear(); Thread.sleep(1000); // restart them to ensure we can't - for (ServerConfiguration conf : bsConfs) { + for (ServerConfiguration conf : confs) { // ensure the bookie port is loaded correctly - int port = conf.getBookiePort(); - if (null != newConf) { - conf.loadConf(newConf); - } - conf.setBookiePort(port); - bs.add(startBookie(conf)); + startAndAddBookie(reconfFunction.apply(conf)); } } @@ -605,14 +661,28 @@ public int startNewBookie() public BookieSocketAddress startNewBookieAndReturnAddress() throws Exception { ServerConfiguration conf = newServerConfiguration(); - bsConfs.add(conf); LOG.info("Starting new bookie on port: {}", conf.getBookiePort()); - BookieServer server = startBookie(conf); - bs.add(server); + return startAndAddBookie(conf).getServer().getLocalAddress(); + } - return server.getLocalAddress(); + public BookieId startNewBookieAndReturnBookieId() + throws Exception { + ServerConfiguration conf = newServerConfiguration(); + LOG.info("Starting new bookie on port: {}", conf.getBookiePort()); + return startAndAddBookie(conf).getServer().getBookieId(); } + protected ServerTester startAndAddBookie(ServerConfiguration conf) throws Exception { + ServerTester server = startBookie(conf); + servers.add(server); + return server; + } + + protected ServerTester startAndAddBookie(ServerConfiguration conf, Bookie b) throws Exception { + ServerTester server = startBookie(conf, b); + servers.add(server); + return server; + } /** * Helper method to startup a bookie server using a configuration object. * Also, starts the auto recovery process if isAutoRecoveryEnabled is true. @@ -621,72 +691,54 @@ public BookieSocketAddress startNewBookieAndReturnAddress() * Server Configuration Object * */ - protected BookieServer startBookie(ServerConfiguration conf) + protected ServerTester startBookie(ServerConfiguration conf) throws Exception { - TestStatsProvider provider = new TestStatsProvider(); - BookieServer server = new BookieServer(conf, provider.getStatsLogger("")); - BookieSocketAddress address = Bookie.getBookieAddress(conf); - bsLoggers.put(address, provider); + ServerTester tester = new ServerTester(conf); if (bkc == null) { bkc = new BookKeeperTestClient(baseClientConf, new TestStatsProvider()); } + BookieId address = tester.getServer().getBookieId(); Future waitForBookie = conf.isForceReadOnlyBookie() ? bkc.waitForReadOnlyBookie(address) : bkc.waitForWritableBookie(address); - server.start(); + tester.getServer().start(); waitForBookie.get(30, TimeUnit.SECONDS); LOG.info("New bookie '{}' has been created.", address); - try { - startAutoRecovery(server, conf); - } catch (CompatibilityException ce) { - LOG.error("Exception while starting AutoRecovery!", ce); - } catch (UnavailableException ue) { - LOG.error("Exception while starting AutoRecovery!", ue); + if (isAutoRecoveryEnabled()) { + tester.startAutoRecovery(); } - return server; + return tester; } /** * Start a bookie with the given bookie instance. Also, starts the auto * recovery for this bookie, if isAutoRecoveryEnabled is true. */ - protected BookieServer startBookie(ServerConfiguration conf, final Bookie b) + protected ServerTester startBookie(ServerConfiguration conf, final Bookie b) throws Exception { - TestStatsProvider provider = new TestStatsProvider(); - BookieServer server = new BookieServer(conf, provider.getStatsLogger("")) { - @Override - protected Bookie newBookie(ServerConfiguration conf) { - return b; - } - }; - - BookieSocketAddress address = Bookie.getBookieAddress(conf); + ServerTester tester = new ServerTester(conf, b); if (bkc == null) { bkc = new BookKeeperTestClient(baseClientConf, new TestStatsProvider()); } + BookieId address = tester.getServer().getBookieId(); Future waitForBookie = conf.isForceReadOnlyBookie() ? bkc.waitForReadOnlyBookie(address) : bkc.waitForWritableBookie(address); - server.start(); - bsLoggers.put(server.getLocalAddress(), provider); + tester.getServer().start(); waitForBookie.get(30, TimeUnit.SECONDS); LOG.info("New bookie '{}' has been created.", address); - try { - startAutoRecovery(server, conf); - } catch (CompatibilityException ce) { - LOG.error("Exception while starting AutoRecovery!", ce); - } catch (UnavailableException ue) { - LOG.error("Exception while starting AutoRecovery!", ue); + if (isAutoRecoveryEnabled()) { + tester.startAutoRecovery(); } - return server; + return tester; } public void setMetastoreImplClass(AbstractConfiguration conf) { @@ -717,36 +769,14 @@ public boolean isAutoRecoveryEnabled() { return isAutoRecoveryEnabled; } - private void startAutoRecovery(BookieServer bserver, - ServerConfiguration conf) throws Exception { - if (isAutoRecoveryEnabled()) { - AutoRecoveryMain autoRecoveryProcess = new AutoRecoveryMain(conf); - autoRecoveryProcess.start(); - autoRecoveryProcesses.put(bserver, autoRecoveryProcess); - LOG.debug("Starting Auditor Recovery for the bookie:" - + bserver.getLocalAddress()); - } - } - - private void stopAutoRecoveryService(BookieServer toRemove) throws Exception { - AutoRecoveryMain autoRecoveryMain = autoRecoveryProcesses - .remove(toRemove); - if (null != autoRecoveryMain && isAutoRecoveryEnabled()) { - autoRecoveryMain.shutdown(); - LOG.debug("Shutdown auto recovery for bookieserver:" - + toRemove.getLocalAddress()); - } - } - /** * Will starts the auto recovery process for the bookie servers. One auto * recovery process per each bookie server, if isAutoRecoveryEnabled is * enabled. */ public void startReplicationService() throws Exception { - int index = -1; - for (BookieServer bserver : bs) { - startAutoRecovery(bserver, bsConfs.get(++index)); + for (ServerTester t : servers) { + t.startAutoRecovery(); } } @@ -755,23 +785,30 @@ public void startReplicationService() throws Exception { * isAutoRecoveryEnabled is true. */ public void stopReplicationService() throws Exception{ - if (!isAutoRecoveryEnabled()){ - return; - } - for (Entry autoRecoveryProcess : autoRecoveryProcesses - .entrySet()) { - autoRecoveryProcess.getValue().shutdown(); - LOG.debug("Shutdown Auditor Recovery for the bookie:" - + autoRecoveryProcess.getKey().getLocalAddress()); + for (ServerTester t : servers) { + t.stopAutoRecovery(); } } public Auditor getAuditor(int timeout, TimeUnit unit) throws Exception { final long timeoutAt = System.nanoTime() + TimeUnit.NANOSECONDS.convert(timeout, unit); while (System.nanoTime() < timeoutAt) { - for (AutoRecoveryMain p : autoRecoveryProcesses.values()) { - Auditor a = p.getAuditor(); - if (a != null) { + for (ServerTester t : servers) { + Auditor a = t.getAuditor(); + ReplicationWorker replicationWorker = t.getReplicationWorker(); + + // found a candidate Auditor + ReplicationWorker + if (a != null && a.isRunning() + && replicationWorker != null && replicationWorker.isRunning()) { + int deathWatchInterval = t.getConfiguration().getDeathWatchInterval(); + Thread.sleep(deathWatchInterval + 1000); + } + + // double check, because in the meantime AutoRecoveryDeathWatcher may have killed the + // AutoRecovery daemon + if (a != null && a.isRunning() + && replicationWorker != null && replicationWorker.isRunning()) { + LOG.info("Found Auditor Bookie {}", t.server.getBookieId()); return a; } } @@ -786,27 +823,177 @@ public Auditor getAuditor(int timeout, TimeUnit unit) throws Exception { * created using hostname. Represent as '/IPaddress' if the * InetSocketAddress was created using an IPaddress * - * @param addr - * inetaddress + * @param bookieId id * @return true if the address was created using an IP address, false if the * address was created using a hostname */ - public static boolean isCreatedFromIp(BookieSocketAddress addr) { + public boolean isCreatedFromIp(BookieId bookieId) { + BookieSocketAddress addr = bkc.getBookieAddressResolver().resolve(bookieId); return addr.getSocketAddress().toString().startsWith("/"); } public void resetBookieOpLoggers() { - for (TestStatsProvider provider : bsLoggers.values()) { - provider.clear(); - } + servers.forEach(t -> t.getStatsProvider().clear()); } - public TestStatsProvider getStatsProvider(BookieSocketAddress addr) { - return bsLoggers.get(addr); + public TestStatsProvider getStatsProvider(BookieId addr) throws UnknownHostException { + return byAddress(addr).get().getStatsProvider(); } public TestStatsProvider getStatsProvider(int index) throws Exception { - return getStatsProvider(bs.get(index).getLocalAddress()); + return servers.get(index).getStatsProvider(); } + /** + * Class to encapsulate all the test objects. + */ + public class ServerTester { + private final ServerConfiguration conf; + private final TestStatsProvider provider; + private final Bookie bookie; + private final BookieServer server; + private final BookieSocketAddress address; + private final MetadataBookieDriver metadataDriver; + private final RegistrationManager registrationManager; + private final LedgerManagerFactory lmFactory; + private final LedgerManager ledgerManager; + private final LedgerStorage storage; + + private AutoRecoveryMain autoRecovery; + + public ServerTester(ServerConfiguration conf) throws Exception { + this.conf = conf; + provider = new TestStatsProvider(); + + StatsLogger rootStatsLogger = provider.getStatsLogger(""); + StatsLogger bookieStats = rootStatsLogger.scope(BOOKIE_SCOPE); + + metadataDriver = BookieResources.createMetadataDriver(conf, bookieStats); + registrationManager = metadataDriver.createRegistrationManager(); + lmFactory = metadataDriver.getLedgerManagerFactory(); + ledgerManager = lmFactory.newLedgerManager(); + + LegacyCookieValidation cookieValidation = new LegacyCookieValidation( + conf, registrationManager); + cookieValidation.checkCookies(Main.storageDirectoriesFromConf(conf)); + + DiskChecker diskChecker = BookieResources.createDiskChecker(conf); + LedgerDirsManager ledgerDirsManager = BookieResources.createLedgerDirsManager( + conf, diskChecker, bookieStats.scope(LD_LEDGER_SCOPE)); + LedgerDirsManager indexDirsManager = BookieResources.createIndexDirsManager( + conf, diskChecker, bookieStats.scope(LD_INDEX_SCOPE), ledgerDirsManager); + + UncleanShutdownDetection uncleanShutdownDetection = new UncleanShutdownDetectionImpl(ledgerDirsManager); + + storage = BookieResources.createLedgerStorage( + conf, ledgerManager, ledgerDirsManager, indexDirsManager, + bookieStats, allocator); + + if (conf.isForceReadOnlyBookie()) { + bookie = new ReadOnlyBookie(conf, registrationManager, storage, + diskChecker, ledgerDirsManager, indexDirsManager, + bookieStats, allocator, BookieServiceInfo.NO_INFO); + } else { + bookie = new BookieImpl(conf, registrationManager, storage, + diskChecker, ledgerDirsManager, indexDirsManager, + bookieStats, allocator, BookieServiceInfo.NO_INFO); + } + server = new BookieServer(conf, bookie, rootStatsLogger, allocator, + uncleanShutdownDetection); + address = BookieImpl.getBookieAddress(conf); + + autoRecovery = null; + } + + public ServerTester(ServerConfiguration conf, Bookie b) throws Exception { + this.conf = conf; + provider = new TestStatsProvider(); + + metadataDriver = null; + registrationManager = null; + ledgerManager = null; + lmFactory = null; + storage = null; + + bookie = b; + server = new BookieServer(conf, b, provider.getStatsLogger(""), + allocator, new MockUncleanShutdownDetection()); + address = BookieImpl.getBookieAddress(conf); + + autoRecovery = null; + } + + public void startAutoRecovery() throws Exception { + if (LOG.isDebugEnabled()) { + LOG.debug("Starting Auditor Recovery for the bookie: {}", address); + } + autoRecovery = new AutoRecoveryMain(conf); + autoRecovery.start(); + } + + public void stopAutoRecovery() { + if (autoRecovery != null) { + if (LOG.isDebugEnabled()) { + LOG.debug("Shutdown Auditor Recovery for the bookie: {}", address); + } + autoRecovery.shutdown(); + } + } + + public Auditor getAuditor() { + if (autoRecovery != null) { + return autoRecovery.getAuditor(); + } else { + return null; + } + } + + public ReplicationWorker getReplicationWorker() { + if (autoRecovery != null) { + return autoRecovery.getReplicationWorker(); + } else { + return null; + } + } + + public ServerConfiguration getConfiguration() { + return conf; + } + + public BookieServer getServer() { + return server; + } + + public TestStatsProvider getStatsProvider() { + return provider; + } + + public BookieSocketAddress getAddress() { + return address; + } + + public void shutdown() throws Exception { + server.shutdown(); + + if (ledgerManager != null) { + ledgerManager.close(); + } + if (lmFactory != null) { + lmFactory.close(); + } + if (registrationManager != null) { + registrationManager.close(); + } + if (metadataDriver != null) { + metadataDriver.close(); + } + + if (autoRecovery != null) { + if (LOG.isDebugEnabled()) { + LOG.debug("Shutdown auto recovery for bookie server: {}", address); + } + autoRecovery.shutdown(); + } + } + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookieClientTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookieClientTest.java index c6cc72bcd8a..d8ab2f5989a 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookieClientTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookieClientTest.java @@ -21,42 +21,66 @@ package org.apache.bookkeeper.test; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.ByteBufUtil; +import io.netty.buffer.PooledByteBufAllocator; import io.netty.buffer.Unpooled; +import io.netty.buffer.UnpooledByteBufAllocator; +import io.netty.channel.Channel; import io.netty.channel.EventLoopGroup; import io.netty.channel.nio.NioEventLoopGroup; +import io.netty.util.ReferenceCounted; import io.netty.util.concurrent.DefaultThreadFactory; - import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; +import java.time.Duration; import java.util.Arrays; import java.util.concurrent.CountDownLatch; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; - +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.MockUncleanShutdownDetection; +import org.apache.bookkeeper.bookie.TestBookieImpl; import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.client.BKException.Code; +import org.apache.bookkeeper.client.BookKeeper; +import org.apache.bookkeeper.client.BookKeeperClientStats; import org.apache.bookkeeper.client.BookieInfoReader.BookieInfo; import org.apache.bookkeeper.client.api.WriteFlag; import org.apache.bookkeeper.common.util.OrderedExecutor; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.proto.BookieClient; import org.apache.bookkeeper.proto.BookieClientImpl; +import org.apache.bookkeeper.proto.BookieProtoEncoding; import org.apache.bookkeeper.proto.BookieProtocol; import org.apache.bookkeeper.proto.BookieServer; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GetBookieInfoCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadEntryCallback; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback; import org.apache.bookkeeper.proto.BookkeeperProtocol; +import org.apache.bookkeeper.proto.DataFormats; +import org.apache.bookkeeper.proto.PerChannelBookieClient; +import org.apache.bookkeeper.proto.PerChannelBookieClientPool; +import org.apache.bookkeeper.proto.checksum.DigestManager; import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.test.TestStatsProvider.TestOpStatsLogger; +import org.apache.bookkeeper.test.TestStatsProvider.TestStatsLogger; import org.apache.bookkeeper.util.ByteBufList; import org.apache.bookkeeper.util.IOUtils; +import org.awaitility.Awaitility; +import org.awaitility.reflect.WhiteboxImpl; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -64,6 +88,7 @@ /** * Test the bookie client. */ +@Slf4j public class BookieClientTest { BookieServer bs; File tmpDir; @@ -80,11 +105,16 @@ public void setUp() throws Exception { // know via ZooKeeper which Bookies are available, okay, so pass in null // for the zkServers input parameter when constructing the BookieServer. ServerConfiguration conf = TestBKConfiguration.newServerConfiguration(); + conf.setGcWaitTime(1000 * 100); conf.setBookiePort(port) .setJournalDirName(tmpDir.getPath()) .setLedgerDirNames(new String[] { tmpDir.getPath() }) .setMetadataServiceUri(null); - bs = new BookieServer(conf); + + bs = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); bs.start(); eventLoopGroup = new NioEventLoopGroup(); executor = OrderedExecutor.newBuilder() @@ -105,7 +135,7 @@ public void tearDown() throws Exception { } private static void recursiveDelete(File dir) { - File children[] = dir.listFiles(); + File[] children = dir.listFiles(); if (children != null) { for (File child : children) { recursiveDelete(child); @@ -136,7 +166,7 @@ public void readEntryComplete(int rc, long ledgerId, long entryId, ByteBuf bb, O }; WriteCallback wrcb = new WriteCallback() { - public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddress addr, Object ctx) { + public void writeComplete(int rc, long ledgerId, long entryId, BookieId addr, Object ctx) { if (ctx != null) { synchronized (ctx) { if (ctx instanceof ResultStruct) { @@ -154,11 +184,12 @@ public void testWriteGaps() throws Exception { final Object notifyObject = new Object(); byte[] passwd = new byte[20]; Arrays.fill(passwd, (byte) 'a'); - BookieSocketAddress addr = bs.getLocalAddress(); + BookieId addr = bs.getBookieId(); ResultStruct arc = new ResultStruct(); - BookieClient bc = new BookieClientImpl(new ClientConfiguration(), eventLoopGroup, executor, - scheduler, NullStatsLogger.INSTANCE); + BookieClient bc = new BookieClientImpl(new ClientConfiguration(), eventLoopGroup, + UnpooledByteBufAllocator.DEFAULT, executor, scheduler, NullStatsLogger.INSTANCE, + BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); ByteBufList bb = createByteBuffer(1, 1, 1); bc.addEntry(addr, 1, passwd, 1, bb, wrcb, arc, BookieProtocol.FLAG_NONE, false, WriteFlag.NONE); synchronized (arc) { @@ -257,9 +288,10 @@ private ByteBufList createByteBuffer(int i, long lid, long eid) { @Test public void testNoLedger() throws Exception { ResultStruct arc = new ResultStruct(); - BookieSocketAddress addr = bs.getLocalAddress(); - BookieClient bc = new BookieClientImpl(new ClientConfiguration(), eventLoopGroup, executor, - scheduler, NullStatsLogger.INSTANCE); + BookieId addr = bs.getBookieId(); + BookieClient bc = new BookieClientImpl(new ClientConfiguration(), eventLoopGroup, + UnpooledByteBufAllocator.DEFAULT, executor, scheduler, NullStatsLogger.INSTANCE, + BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); synchronized (arc) { bc.readEntry(addr, 2, 13, recb, arc, BookieProtocol.FLAG_NONE); arc.wait(1000); @@ -268,10 +300,24 @@ public void testNoLedger() throws Exception { } @Test - public void testGetBookieInfo() throws IOException, InterruptedException { + public void testGetBookieInfoWithLimitStatsLogging() throws IOException, InterruptedException { + testGetBookieInfo(true); + } + + @Test + public void testGetBookieInfoWithoutLimitStatsLogging() throws IOException, InterruptedException { + testGetBookieInfo(false); + } + + public void testGetBookieInfo(boolean limitStatsLogging) throws IOException, InterruptedException { + BookieId bookieId = bs.getBookieId(); BookieSocketAddress addr = bs.getLocalAddress(); - BookieClient bc = new BookieClientImpl(new ClientConfiguration(), new NioEventLoopGroup(), executor, - scheduler, NullStatsLogger.INSTANCE); + ClientConfiguration clientConf = new ClientConfiguration(); + clientConf.setLimitStatsLogging(limitStatsLogging); + TestStatsProvider statsProvider = new TestStatsProvider(); + TestStatsLogger statsLogger = statsProvider.getStatsLogger(""); + BookieClient bc = new BookieClientImpl(clientConf, new NioEventLoopGroup(), UnpooledByteBufAllocator.DEFAULT, + executor, scheduler, statsLogger, BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); long flags = BookkeeperProtocol.GetBookieInfoRequest.Flags.FREE_DISK_SPACE_VALUE | BookkeeperProtocol.GetBookieInfoRequest.Flags.TOTAL_DISK_CAPACITY_VALUE; @@ -280,6 +326,7 @@ class CallbackObj { long requested; long freeDiskSpace, totalDiskCapacity; CountDownLatch latch = new CountDownLatch(1); + CallbackObj(long requested) { this.requested = requested; this.rc = 0; @@ -288,7 +335,7 @@ class CallbackObj { } } CallbackObj obj = new CallbackObj(flags); - bc.getBookieInfo(addr, flags, new GetBookieInfoCallback() { + bc.getBookieInfo(bookieId, flags, new GetBookieInfoCallback() { @Override public void getBookieInfoComplete(int rc, BookieInfo bInfo, Object ctx) { CallbackObj obj = (CallbackObj) ctx; @@ -298,7 +345,7 @@ public void getBookieInfoComplete(int rc, BookieInfo bInfo, Object ctx) { obj.freeDiskSpace = bInfo.getFreeDiskSpace(); } if ((obj.requested - & BookkeeperProtocol.GetBookieInfoRequest.Flags.TOTAL_DISK_CAPACITY_VALUE) != 0) { + & BookkeeperProtocol.GetBookieInfoRequest.Flags.TOTAL_DISK_CAPACITY_VALUE) != 0) { obj.totalDiskCapacity = bInfo.getTotalDiskSpace(); } } @@ -312,5 +359,561 @@ public void getBookieInfoComplete(int rc, BookieInfo bInfo, Object ctx) { assertTrue("GetBookieInfo failed with error " + obj.rc, obj.rc == Code.OK); assertTrue("GetBookieInfo failed with error " + obj.rc, obj.freeDiskSpace <= obj.totalDiskCapacity); assertTrue("GetBookieInfo failed with error " + obj.rc, obj.totalDiskCapacity > 0); + + TestOpStatsLogger perChannelBookieClientScopeOfThisAddr = (TestOpStatsLogger) statsLogger + .scope(BookKeeperClientStats.CHANNEL_SCOPE) + .scopeLabel(BookKeeperClientStats.BOOKIE_LABEL, addr.toBookieId().toString()) + .getOpStatsLogger(BookKeeperClientStats.GET_BOOKIE_INFO_OP); + int expectedBookieInfoSuccessCount = (limitStatsLogging) ? 0 : 1; + assertEquals("BookieInfoSuccessCount", expectedBookieInfoSuccessCount, + perChannelBookieClientScopeOfThisAddr.getSuccessCount()); + } + + @Test + public void testBatchRead() throws Exception { + ClientConfiguration conf = new ClientConfiguration(); + conf.setUseV2WireProtocol(true); + BookieClient bc = new BookieClientImpl(conf, eventLoopGroup, + UnpooledByteBufAllocator.DEFAULT, executor, scheduler, NullStatsLogger.INSTANCE, + BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + + BookieId addr = bs.getBookieId(); + byte[] passwd = new byte[20]; + Arrays.fill(passwd, (byte) 'a'); + DigestManager digestManager = DigestManager.instantiate(1, passwd, + DataFormats.LedgerMetadataFormat.DigestType.CRC32C, ByteBufAllocator.DEFAULT, true); + byte[] masterKey = DigestManager.generateMasterKey(passwd); + + final int entries = 10; + int length = 0; + for (int i = 0; i < entries; i++) { + ByteBuf bb = Unpooled.buffer(4); + bb.writeInt(i); + length += 4; + ReferenceCounted content = digestManager.computeDigestAndPackageForSending(i, i - 1, length, bb, + masterKey, BookieProtocol.FLAG_NONE); + ResultStruct arc = new ResultStruct(); + bc.addEntry(addr, 1, passwd, i, content, wrcb, arc, BookieProtocol.FLAG_NONE, false, WriteFlag.NONE); + Awaitility.await().untilAsserted(() -> { + assertEquals(0, arc.rc); + }); + } + AtomicReference result = new AtomicReference<>(); + AtomicInteger resCode = new AtomicInteger(); + + bc.batchReadEntries(addr, 1, 0, 5, 5 * 1024 * 1024, (rc, ledgerId, startEntryId, bufList, ctx) -> { + resCode.set(rc); + result.set(bufList); + }, null, BookieProtocol.FLAG_NONE); + + Awaitility.await().untilAsserted(() -> { + ByteBufList byteBufList = result.get(); + assertNotNull(byteBufList); + }); + assertEquals(Code.OK, resCode.get()); + ByteBufList byteBufList = result.get(); + assertEquals(5, byteBufList.size()); + for (int i = 0; i < byteBufList.size(); i++) { + ByteBuf buffer = byteBufList.getBuffer(i); + //ledgerId + assertEquals(1, buffer.readLong()); + //entryId + assertEquals(i, buffer.readLong()); + //lac + assertEquals(i - 1, buffer.readLong()); + //length + assertEquals((i + 1) * 4, buffer.readLong()); + //digest + int i1 = buffer.readInt(); + //data + ByteBuf byteBuf = buffer.readBytes(buffer.readableBytes()); + assertEquals(i, byteBuf.readInt()); + } + } + + @Test + public void testBatchedReadWittLostFourthEntry() throws Exception { + ClientConfiguration conf = new ClientConfiguration(); + conf.setUseV2WireProtocol(true); + BookieClient bc = new BookieClientImpl(conf, eventLoopGroup, + UnpooledByteBufAllocator.DEFAULT, executor, scheduler, NullStatsLogger.INSTANCE, + BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + + BookieId addr = bs.getBookieId(); + byte[] passwd = new byte[20]; + Arrays.fill(passwd, (byte) 'a'); + DigestManager digestManager = DigestManager.instantiate(1, passwd, + DataFormats.LedgerMetadataFormat.DigestType.CRC32C, ByteBufAllocator.DEFAULT, true); + byte[] masterKey = DigestManager.generateMasterKey(passwd); + + final int entries = 10; + int length = 0; + for (int i = 0; i < entries; i++) { + //The bookie server lost entry:3 + if (i == 3) { + continue; + } + ByteBuf bb = Unpooled.buffer(4); + bb.writeInt(i); + length += 4; + ReferenceCounted content = digestManager.computeDigestAndPackageForSending(i, i - 1, length, bb, + masterKey, BookieProtocol.FLAG_NONE); + ResultStruct arc = new ResultStruct(); + bc.addEntry(addr, 1, passwd, i, content, wrcb, arc, BookieProtocol.FLAG_NONE, false, WriteFlag.NONE); + Awaitility.await().untilAsserted(() -> { + assertEquals(0, arc.rc); + }); + } + AtomicReference result = new AtomicReference<>(); + AtomicInteger resCode = new AtomicInteger(); + + bc.batchReadEntries(addr, 1, 0, 5, 5 * 1024 * 1024, (rc, ledgerId, startEntryId, bufList, ctx) -> { + resCode.set(rc); + result.set(bufList); + }, null, BookieProtocol.FLAG_NONE); + + Awaitility.await().untilAsserted(() -> { + ByteBufList byteBufList = result.get(); + assertNotNull(byteBufList); + }); + assertEquals(Code.OK, resCode.get()); + ByteBufList byteBufList = result.get(); + assertEquals(3, byteBufList.size()); + for (int i = 0; i < byteBufList.size(); i++) { + ByteBuf buffer = byteBufList.getBuffer(i); + //ledgerId + assertEquals(1, buffer.readLong()); + //entryId + assertEquals(i, buffer.readLong()); + //lac + assertEquals(i - 1, buffer.readLong()); + //length + assertEquals((i + 1) * 4, buffer.readLong()); + //digest + int i1 = buffer.readInt(); + //data + ByteBuf byteBuf = buffer.readBytes(buffer.readableBytes()); + assertEquals(i, byteBuf.readInt()); + } + } + + @Test + public void testBatchedReadWittLostFirstEntry() throws Exception { + ClientConfiguration conf = new ClientConfiguration(); + conf.setUseV2WireProtocol(true); + BookieClient bc = new BookieClientImpl(conf, eventLoopGroup, + UnpooledByteBufAllocator.DEFAULT, executor, scheduler, NullStatsLogger.INSTANCE, + BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + + BookieId addr = bs.getBookieId(); + byte[] passwd = new byte[20]; + Arrays.fill(passwd, (byte) 'a'); + DigestManager digestManager = DigestManager.instantiate(1, passwd, + DataFormats.LedgerMetadataFormat.DigestType.CRC32C, ByteBufAllocator.DEFAULT, true); + byte[] masterKey = DigestManager.generateMasterKey(passwd); + + final int entries = 10; + int length = 0; + for (int i = 0; i < entries; i++) { + //The bookie server lost entry:0 + if (i == 0) { + continue; + } + ByteBuf bb = Unpooled.buffer(4); + bb.writeInt(i); + length += 4; + ReferenceCounted content = digestManager.computeDigestAndPackageForSending(i, i - 1, length, bb, + masterKey, BookieProtocol.FLAG_NONE); + ResultStruct arc = new ResultStruct(); + bc.addEntry(addr, 1, passwd, i, content, wrcb, arc, BookieProtocol.FLAG_NONE, false, WriteFlag.NONE); + Awaitility.await().untilAsserted(() -> { + assertEquals(0, arc.rc); + }); + } + AtomicReference result = new AtomicReference<>(); + AtomicInteger resCode = new AtomicInteger(); + + bc.batchReadEntries(addr, 1, 0, 5, 5 * 1024 * 1024, (rc, ledgerId, startEntryId, bufList, ctx) -> { + resCode.set(rc); + result.set(bufList); + }, null, BookieProtocol.FLAG_NONE); + + Awaitility.await().untilAsserted(() -> { + ByteBufList byteBufList = result.get(); + assertNotNull(byteBufList); + }); + assertEquals(Code.NoSuchEntryException, resCode.get()); + ByteBufList byteBufList = result.get(); + assertEquals(0, byteBufList.size()); + } + + //This test is for the `isSmallEntry` improvement. + @Test + public void testBatchedReadWittBigPayload() throws Exception { + ClientConfiguration conf = new ClientConfiguration(); + conf.setUseV2WireProtocol(true); + BookieClient bc = new BookieClientImpl(conf, eventLoopGroup, + UnpooledByteBufAllocator.DEFAULT, executor, scheduler, NullStatsLogger.INSTANCE, + BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + + BookieId addr = bs.getBookieId(); + byte[] passwd = new byte[20]; + Arrays.fill(passwd, (byte) 'a'); + DigestManager digestManager = DigestManager.instantiate(1, passwd, + DataFormats.LedgerMetadataFormat.DigestType.CRC32C, ByteBufAllocator.DEFAULT, true); + byte[] masterKey = DigestManager.generateMasterKey(passwd); + byte[] kbData = new byte[1024]; + for (int i = 0; i < 1024; i++) { + kbData[i] = (byte) i; + } + final int entries = 20; + int length = 0; + for (int i = 0; i < entries; i++) { + ByteBuf bb = Unpooled.buffer(1024); + bb.writeBytes(kbData); + length += 1024; + ReferenceCounted content = digestManager.computeDigestAndPackageForSending(i, i - 1, length, bb, + masterKey, BookieProtocol.FLAG_NONE); + ResultStruct arc = new ResultStruct(); + bc.addEntry(addr, 1, passwd, i, content, wrcb, arc, BookieProtocol.FLAG_NONE, false, WriteFlag.NONE); + Awaitility.await().untilAsserted(() -> { + assertEquals(0, arc.rc); + }); + } + + AtomicReference result = new AtomicReference<>(); + AtomicInteger resCode = new AtomicInteger(); + + bc.batchReadEntries(addr, 1, 0, 20, 5 * 1024 * 1024, (rc, ledgerId, startEntryId, bufList, ctx) -> { + result.set(bufList); + resCode.set(rc); + }, null, BookieProtocol.FLAG_NONE); + Awaitility.await().untilAsserted(() -> { + ByteBufList byteBufList = result.get(); + assertNotNull(byteBufList); + }); + ByteBufList byteBufList = result.get(); + assertEquals(0, resCode.get()); + assertEquals(20, byteBufList.size()); + for (int i = 0; i < byteBufList.size(); i++) { + ByteBuf buffer = byteBufList.getBuffer(i); + //ledgerId + assertEquals(1, buffer.readLong()); + //entryId + assertEquals(i, buffer.readLong()); + //lac + assertEquals(i - 1, buffer.readLong()); + //length + assertEquals((i + 1) * 1024, buffer.readLong()); + //digest + int i1 = buffer.readInt(); + //data + ByteBuf byteBuf = buffer.readBytes(buffer.readableBytes()); + assertEquals(1024, byteBuf.readableBytes()); + byte[] bytes = ByteBufUtil.getBytes(byteBuf); + assertTrue(Arrays.equals(kbData, bytes)); + } + } + + @Test + public void testBatchedReadWithMaxSizeLimitCase1() throws Exception { + ClientConfiguration conf = new ClientConfiguration(); + conf.setUseV2WireProtocol(true); + BookieClient bc = new BookieClientImpl(conf, eventLoopGroup, + UnpooledByteBufAllocator.DEFAULT, executor, scheduler, NullStatsLogger.INSTANCE, + BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + + BookieId addr = bs.getBookieId(); + byte[] passwd = new byte[20]; + Arrays.fill(passwd, (byte) 'a'); + DigestManager digestManager = DigestManager.instantiate(1, passwd, + DataFormats.LedgerMetadataFormat.DigestType.CRC32C, ByteBufAllocator.DEFAULT, true); + byte[] masterKey = DigestManager.generateMasterKey(passwd); + byte[] kbData = new byte[1024]; + for (int i = 0; i < 1024; i++) { + kbData[i] = (byte) i; + } + final int entries = 20; + int length = 0; + for (int i = 0; i < entries; i++) { + ByteBuf bb = Unpooled.buffer(1024); + bb.writeBytes(kbData); + length += 1024; + ReferenceCounted content = digestManager.computeDigestAndPackageForSending(i, i - 1, length, bb, + masterKey, BookieProtocol.FLAG_NONE); + ResultStruct arc = new ResultStruct(); + bc.addEntry(addr, 1, passwd, i, content, wrcb, arc, BookieProtocol.FLAG_NONE, false, WriteFlag.NONE); + Awaitility.await().untilAsserted(() -> { + assertEquals(0, arc.rc); + }); + } + + AtomicReference result = new AtomicReference<>(); + AtomicInteger resCode = new AtomicInteger(); + + // one entry size = 8(ledgerId) + 8(entryId) + 8(lac) + 8(length) + 4(digest) + payload size + int entrySize = 8 + 8 + 8 + 8 + 4 + 1024; + bc.batchReadEntries(addr, 1, 0, 20, 5 * entrySize , (rc, ledgerId, startEntryId, bufList, ctx) -> { + result.set(bufList); + resCode.set(rc); + }, null, BookieProtocol.FLAG_NONE); + Awaitility.await().untilAsserted(() -> { + ByteBufList byteBufList = result.get(); + assertNotNull(byteBufList); + }); + ByteBufList byteBufList = result.get(); + assertEquals(0, resCode.get()); + assertEquals(4, byteBufList.size()); + for (int i = 0; i < byteBufList.size(); i++) { + ByteBuf buffer = byteBufList.getBuffer(i); + //ledgerId + assertEquals(1, buffer.readLong()); + //entryId + assertEquals(i, buffer.readLong()); + //lac + assertEquals(i - 1, buffer.readLong()); + //length + assertEquals((i + 1) * 1024, buffer.readLong()); + //digest + int i1 = buffer.readInt(); + //data + ByteBuf byteBuf = buffer.readBytes(buffer.readableBytes()); + assertEquals(1024, byteBuf.readableBytes()); + byte[] bytes = ByteBufUtil.getBytes(byteBuf); + assertTrue(Arrays.equals(kbData, bytes)); + } + } + + //consider header size rather than case1. + @Test + public void testBatchedReadWithMaxSizeLimitCase2() throws Exception { + ClientConfiguration conf = new ClientConfiguration(); + conf.setUseV2WireProtocol(true); + BookieClient bc = new BookieClientImpl(conf, eventLoopGroup, + UnpooledByteBufAllocator.DEFAULT, executor, scheduler, NullStatsLogger.INSTANCE, + BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + + BookieId addr = bs.getBookieId(); + byte[] passwd = new byte[20]; + Arrays.fill(passwd, (byte) 'a'); + DigestManager digestManager = DigestManager.instantiate(1, passwd, + DataFormats.LedgerMetadataFormat.DigestType.CRC32C, ByteBufAllocator.DEFAULT, true); + byte[] masterKey = DigestManager.generateMasterKey(passwd); + byte[] kbData = new byte[1024]; + for (int i = 0; i < 1024; i++) { + kbData[i] = (byte) i; + } + final int entries = 20; + int length = 0; + for (int i = 0; i < entries; i++) { + ByteBuf bb = Unpooled.buffer(1024); + bb.writeBytes(kbData); + length += 1024; + ReferenceCounted content = digestManager.computeDigestAndPackageForSending(i, i - 1, length, bb, + masterKey, BookieProtocol.FLAG_NONE); + ResultStruct arc = new ResultStruct(); + bc.addEntry(addr, 1, passwd, i, content, wrcb, arc, BookieProtocol.FLAG_NONE, false, WriteFlag.NONE); + Awaitility.await().untilAsserted(() -> { + assertEquals(0, arc.rc); + }); + } + + AtomicReference result = new AtomicReference<>(); + AtomicInteger resCode = new AtomicInteger(); + + // one entry size = 8(ledgerId) + 8(entryId) + 8(lac) + 8(length) + 4(digest) + payload size + int entrySize = 8 + 8 + 8 + 8 + 4 + 1024; + //response header size. + int headerSize = 24 + 8 + 4; + bc.batchReadEntries(addr, 1, 0, 20, 5 * entrySize + headerSize + (5 * 4) , + (rc, ledgerId, startEntryId, bufList, ctx) -> { + result.set(bufList); + resCode.set(rc); + }, null, BookieProtocol.FLAG_NONE); + Awaitility.await().untilAsserted(() -> { + ByteBufList byteBufList = result.get(); + assertNotNull(byteBufList); + }); + ByteBufList byteBufList = result.get(); + assertEquals(0, resCode.get()); + assertEquals(5, byteBufList.size()); + for (int i = 0; i < byteBufList.size(); i++) { + ByteBuf buffer = byteBufList.getBuffer(i); + //ledgerId + assertEquals(1, buffer.readLong()); + //entryId + assertEquals(i, buffer.readLong()); + //lac + assertEquals(i - 1, buffer.readLong()); + //length + assertEquals((i + 1) * 1024, buffer.readLong()); + //digest + int i1 = buffer.readInt(); + //data + ByteBuf byteBuf = buffer.readBytes(buffer.readableBytes()); + assertEquals(1024, byteBuf.readableBytes()); + byte[] bytes = ByteBufUtil.getBytes(byteBuf); + assertTrue(Arrays.equals(kbData, bytes)); + } + } + + /** + * Explain the stacks of "BookieClientImpl.addEntry" here + * 1.`BookieClientImpl.addEntry`. + * a.Retain the `ByteBuf` before get `PerChannelBookieClient`. We call this `ByteBuf` as `toSend` in the + * following sections. `toSend.recCnf` is `2` now. + * 2.`Get PerChannelBookieClient`. + * 3.`ChannelReadyForAddEntryCallback.operationComplete` + * a.`PerChannelBookieClient.addEntry` + * a-1.Build a new ByteBuf for request command. We call this `ByteBuf` new as `request` in the following + * sections. + * a-2.`channel.writeAndFlush(request)` or release the ByteBuf when `channel` is switching. + * Note the callback will be called immediately if the channel is switching. + * b.Release the `ByteBuf` since it has been retained at `step 1`. `toSend.recCnf` should be `1` now. + */ + public void testDataRefCnfWhenReconnect(boolean useV2WireProtocol, boolean smallPayload, + boolean withDelayReconnect, boolean withDelayAddEntry, + int tryTimes) throws Exception { + final long ledgerId = 1; + final BookieId addr = bs.getBookieId(); + // Build passwd. + byte[] passwd = new byte[20]; + Arrays.fill(passwd, (byte) 'a'); + // Build digest manager. + DigestManager digestManager = DigestManager.instantiate(1, passwd, + BookKeeper.DigestType.toProtoDigestType(BookKeeper.DigestType.DUMMY), + PooledByteBufAllocator.DEFAULT, useV2WireProtocol); + // Build client. + ClientConfiguration clientConf = new ClientConfiguration(); + clientConf.setUseV2WireProtocol(useV2WireProtocol); + BookieClientImpl client = new BookieClientImpl(clientConf, eventLoopGroup, + UnpooledByteBufAllocator.DEFAULT, executor, scheduler, NullStatsLogger.INSTANCE, + BookieSocketAddress.LEGACY_BOOKIEID_RESOLVER); + + // Inject a reconnect event. + // 1. Get the channel that will be used. + // 2. Call add entry. + // 3. Another thread close the channel that is using. + for (int i = 0; i < tryTimes; i++) { + long entryId = i + 1; + long lac = i; + // Build payload. + int payloadLen; + ByteBuf payload; + if (smallPayload) { + payloadLen = 1; + payload = PooledByteBufAllocator.DEFAULT.buffer(1); + payload.writeByte(1); + } else { + payloadLen = BookieProtoEncoding.SMALL_ENTRY_SIZE_THRESHOLD; + payload = PooledByteBufAllocator.DEFAULT.buffer(); + byte[] bs = new byte[payloadLen]; + payload.writeBytes(bs); + } + + // Digest. + ReferenceCounted bb = digestManager.computeDigestAndPackageForSending(entryId, lac, + payloadLen * entryId, payload, passwd, BookieProtocol.FLAG_NONE); + log.info("Before send. bb.refCnf: {}", bb.refCnt()); + + // Step: get the channel that will be used. + PerChannelBookieClientPool perChannelBookieClientPool = client.lookupClient(addr); + AtomicReference perChannelBookieClient = new AtomicReference<>(); + perChannelBookieClientPool.obtain((rc, result) -> perChannelBookieClient.set(result), ledgerId); + Awaitility.await().untilAsserted(() -> { + assertNotNull(perChannelBookieClient.get()); + }); + + // Step: Inject a reconnect event. + final int delayMillis = i; + new Thread(() -> { + if (withDelayReconnect) { + sleep(delayMillis); + } + Channel channel = WhiteboxImpl.getInternalState(perChannelBookieClient.get(), "channel"); + if (channel != null) { + channel.close(); + } + }).start(); + if (withDelayAddEntry) { + sleep(delayMillis); + } + + // Step: add entry. + AtomicBoolean callbackExecuted = new AtomicBoolean(); + WriteCallback callback = (rc, lId, eId, socketAddr, ctx) -> { + log.info("Writing is finished. rc: {}, withDelayReconnect: {}, withDelayAddEntry: {}, ledgerId: {}," + + " entryId: {}, socketAddr: {}, ctx: {}", + rc, withDelayReconnect, withDelayAddEntry, lId, eId, socketAddr, ctx); + callbackExecuted.set(true); + }; + client.addEntry(addr, ledgerId, passwd, entryId, bb, callback, i, BookieProtocol.FLAG_NONE, false, + WriteFlag.NONE); + // Wait for adding entry is finish. + Awaitility.await().untilAsserted(() -> assertTrue(callbackExecuted.get())); + // The steps have be explained on the method description. + // Since the step "3-a-2" always runs before the step "3-b", so the "callbackExecuted" will be finished + // before the step "3-b". Add a sleep to wait the step "3-a-2" is finish. + Thread.sleep(100); + // Check the ref count. + Awaitility.await().atMost(Duration.ofSeconds(60)).untilAsserted(() -> { + assertEquals(1, bb.refCnt()); + // V2 will release this original data if it is a small. + if (!useV2WireProtocol && !smallPayload) { + assertEquals(1, payload.refCnt()); + } + }); + bb.release(); + // V2 will release this original data if it is a small. + if (!useV2WireProtocol && !smallPayload) { + payload.release(); + } + } + // cleanup. + client.close(); + } + + private void sleep(int milliSeconds) { + try { + if (milliSeconds > 0) { + Thread.sleep(1); + } + } catch (InterruptedException e) { + log.warn("Error occurs", e); + } + } + + /** + * Relate to https://github.com/apache/bookkeeper/pull/4289. + */ + @Test + public void testDataRefCnfWhenReconnectV2() throws Exception { + // Large payload. + // Run this test may not reproduce the issue, you can reproduce the issue this way: + // 1. Add two break points. + // a. At the line "Channel c = channel" in the method PerChannelBookieClient.addEntry. + // b. At the line "channel = null" in the method "PerChannelBookieClient.channelInactive". + // 2. Make the break point b to run earlier than the break point a during debugging. + testDataRefCnfWhenReconnect(true, false, false, false, 10); + testDataRefCnfWhenReconnect(true, false, true, false, 10); + testDataRefCnfWhenReconnect(true, false, false, true, 10); + + // Small payload. + // There is no issue without https://github.com/apache/bookkeeper/pull/4289, just add a test for this scenario. + testDataRefCnfWhenReconnect(true, true, false, false, 10); + testDataRefCnfWhenReconnect(true, true, true, false, 10); + testDataRefCnfWhenReconnect(true, true, false, true, 10); + } + + /** + * Please see the comment of the scenario "Large payload" in the {@link #testDataRefCnfWhenReconnectV2()} if you + * can not reproduce the issue when running this test. + * Relate to https://github.com/apache/bookkeeper/pull/4289. + */ + @Test + public void testDataRefCnfWhenReconnectV3() throws Exception { + testDataRefCnfWhenReconnect(false, true, false, false, 10); + testDataRefCnfWhenReconnect(false, true, true, false, 10); + testDataRefCnfWhenReconnect(false, true, false, true, 10); } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookieFailureTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookieFailureTest.java index 8a18127a297..eea1be20907 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookieFailureTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookieFailureTest.java @@ -99,35 +99,36 @@ public BookieFailureTest() { * @throws IOException */ @Test - public void testAsyncBK1() throws IOException { + public void testAsyncBK1() throws Exception { LOG.info("#### BK1 ####"); - auxTestReadWriteAsyncSingleClient(bs.get(0)); + auxTestReadWriteAsyncSingleClient(serverByIndex(0)); } @Test - public void testAsyncBK2() throws IOException { + public void testAsyncBK2() throws Exception { LOG.info("#### BK2 ####"); - auxTestReadWriteAsyncSingleClient(bs.get(1)); + auxTestReadWriteAsyncSingleClient(serverByIndex(1)); } @Test - public void testAsyncBK3() throws IOException { + public void testAsyncBK3() throws Exception { LOG.info("#### BK3 ####"); - auxTestReadWriteAsyncSingleClient(bs.get(2)); + auxTestReadWriteAsyncSingleClient(serverByIndex(2)); } @Test - public void testAsyncBK4() throws IOException { + public void testAsyncBK4() throws Exception { LOG.info("#### BK4 ####"); - auxTestReadWriteAsyncSingleClient(bs.get(3)); + auxTestReadWriteAsyncSingleClient(serverByIndex(3)); } @Test public void testBookieRecovery() throws Exception { - //Shutdown all but 1 bookie - bs.get(0).shutdown(); - bs.get(1).shutdown(); - bs.get(2).shutdown(); + //Shutdown all but 1 bookie (should be in it's own test case with 1 bookie) + assertEquals(4, bookieCount()); + killBookie(0); + killBookie(0); + killBookie(0); byte[] passwd = "blah".getBytes(); LedgerHandle lh = bkc.createLedger(1, 1, digestType, passwd); @@ -138,10 +139,8 @@ public void testBookieRecovery() throws Exception { lh.addEntry(data); } - bs.get(3).shutdown(); - BookieServer server = new BookieServer(bsConfs.get(3)); - server.start(); - bs.set(3, server); + assertEquals(1, bookieCount()); + restartBookies(); assertEquals(numEntries - 1 , lh.getLastAddConfirmed()); Enumeration entries = lh.readEntries(0, lh.getLastAddConfirmed()); @@ -180,13 +179,17 @@ void auxTestReadWriteAsyncSingleClient(BookieServer bs) throws IOException { // wait for all entries to be acknowledged synchronized (sync) { while (sync.counter < numEntriesToWrite) { - LOG.debug("Entries counter = " + sync.counter); + if (LOG.isDebugEnabled()) { + LOG.debug("Entries counter = " + sync.counter); + } sync.wait(10000); assertFalse("Failure occurred during write", sync.failureOccurred); } } - LOG.debug("*** WRITE COMPLETE ***"); + if (LOG.isDebugEnabled()) { + LOG.debug("*** WRITE COMPLETE ***"); + } // close ledger lh.close(); @@ -196,7 +199,9 @@ void auxTestReadWriteAsyncSingleClient(BookieServer bs) throws IOException { bkc.close(); bkc = new BookKeeperTestClient(baseClientConf); lh = bkc.openLedger(ledgerId, digestType, ledgerPassword); - LOG.debug("Number of entries written: " + (lh.getLastAddConfirmed() + 1)); + if (LOG.isDebugEnabled()) { + LOG.debug("Number of entries written: " + (lh.getLastAddConfirmed() + 1)); + } assertTrue("Verifying number of entries written", lh.getLastAddConfirmed() == (numEntriesToWrite - 1)); // read entries @@ -210,7 +215,9 @@ void auxTestReadWriteAsyncSingleClient(BookieServer bs) throws IOException { assertTrue("Haven't received entries", sync.value); } - LOG.debug("*** READ COMPLETE ***"); + if (LOG.isDebugEnabled()) { + LOG.debug("*** READ COMPLETE ***"); + } // at this point, Enumeration ls is filled with the returned // values @@ -222,9 +229,11 @@ void auxTestReadWriteAsyncSingleClient(BookieServer bs) throws IOException { ByteBuffer result = ByteBuffer.wrap(entry); Integer retrEntry = result.getInt(); - LOG.debug("Retrieved entry: " + i); + if (LOG.isDebugEnabled()) { + LOG.debug("Retrieved entry: " + i); + } assertTrue("Checking entry " + i + " for equality", origEntry.equals(retrEntry)); - assertTrue("Checking entry " + i + " for size", entry.length == entriesSize.get(i).intValue()); + assertTrue("Checking entry " + i + " for size", entry.length == entriesSize.get(i)); i++; } @@ -301,24 +310,6 @@ public void testLedgerNoRecoveryOpenAfterBKCrashed() throws Exception { LedgerHandle afterlh = bkc.openLedgerNoRecovery(beforelh.getId(), digestType, "".getBytes()); assertEquals(numEntries - 2, afterlh.getLastAddConfirmed()); - - startNewBookie(); - LedgerHandle beforelh2 = bkc.createLedger(numBookies, 1, digestType, "".getBytes()); - - for (int i = 0; i < numEntries; i++) { - beforelh2.addEntry(tmp.getBytes()); - } - - // shutdown first bookie server - killBookie(0); - - // try to open ledger no recovery - try { - bkc.openLedgerNoRecovery(beforelh2.getId(), digestType, "".getBytes()); - fail("Should have thrown exception"); - } catch (BKException.BKReadException e) { - // correct behaviour - } } @Test @@ -336,27 +327,22 @@ public void testLedgerOpenAfterBKCrashed() throws Exception { killBookie(0); startNewBookie(); - // try to open ledger no recovery + // try to open ledger with recovery LedgerHandle afterlh = bkc.openLedger(beforelh.getId(), digestType, "".getBytes()); - assertEquals(beforelh.getLastAddPushed(), afterlh.getLastAddConfirmed()); - LedgerHandle beforelh2 = bkc.createLedger(numBookies, 1, digestType, "".getBytes()); - + // try to open ledger no recovery + // bookies: 4, ensSize: 3, ackQuorumSize: 2 + LedgerHandle beforelhWithNoRecovery = bkc.createLedger(numBookies - 1 , 2, digestType, "".getBytes()); for (int i = 0; i < numEntries; i++) { - beforelh2.addEntry(tmp.getBytes()); + beforelhWithNoRecovery.addEntry(tmp.getBytes()); } // shutdown first bookie server killBookie(0); - // try to open ledger no recovery - try { - bkc.openLedger(beforelh2.getId(), digestType, "".getBytes()); - fail("Should have thrown exception"); - } catch (BKException.BKLedgerRecoveryException e) { - // correct behaviour - } + // try to open ledger no recovery, should be able to open ledger + bkc.openLedger(beforelhWithNoRecovery.getId(), digestType, "".getBytes()); } /** diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookieJournalRollingTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookieJournalRollingTest.java index 83d7ba174f0..4b0243f672b 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookieJournalRollingTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookieJournalRollingTest.java @@ -28,14 +28,12 @@ import java.util.Enumeration; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicInteger; - import org.apache.bookkeeper.client.AsyncCallback.AddCallback; import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.client.LedgerEntry; import org.apache.bookkeeper.client.LedgerHandle; -import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.awaitility.Awaitility; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -61,7 +59,7 @@ public BookieJournalRollingTest() { public void setUp() throws Exception { // Set up the configuration properties needed. baseConf.setMaxJournalSizeMB(1); - baseConf.setMaxBackupJournals(1); + baseConf.setMaxBackupJournals(2); super.setUp(); } @@ -74,7 +72,7 @@ public void tearDown() throws Exception { /** * Common method to create ledgers and write entries to them. */ - private LedgerHandle[] writeLedgerEntries(int numLedgers, int msgSize, int numMsgs) throws Exception { + protected LedgerHandle[] writeLedgerEntries(int numLedgers, int msgSize, int numMsgs) throws Exception { // Create the ledgers LedgerHandle[] lhs = new LedgerHandle[numLedgers]; long[] ledgerIds = new long[numLedgers]; @@ -87,7 +85,7 @@ private LedgerHandle[] writeLedgerEntries(int numLedgers, int msgSize, int numMs return lhs; } - private void writeLedgerEntries(LedgerHandle[] lhs, int msgSize, int numMsgs) throws Exception { + protected void writeLedgerEntries(LedgerHandle[] lhs, int msgSize, int numMsgs) throws Exception { // Create a dummy message string to write as ledger entries StringBuilder msgSB = new StringBuilder(); for (int i = 0; i < msgSize; i++) { @@ -118,7 +116,7 @@ public void addComplete(int rc2, LedgerHandle lh, long entryId, Object ctx) { } } - private void validLedgerEntries(long[] ledgerIds, int msgSize, int numMsgs) throws Exception { + protected void validLedgerEntries(long[] ledgerIds, int msgSize, int numMsgs) throws Exception { // Open the ledgers LedgerHandle[] lhs = new LedgerHandle[ledgerIds.length]; for (int i = 0; i < lhs.length; i++) { @@ -186,21 +184,21 @@ public void testJournalRolling() throws Exception { lhs[i].close(); } - // Sleep for a while to ensure data are flushed - Thread.sleep(2000); - - // verify that we only keep at most journal files - for (File journalDir : tmpDirs) { - File[] journals = journalDir.listFiles(); - int numJournals = 0; - for (File f : journals) { - if (!f.getName().endsWith(".txn")) { - continue; + Awaitility.await().untilAsserted(() -> { + // verify that we only keep at most journal files + for (File journalDir : bookieJournalDirs()) { + File[] journals = journalDir.listFiles(); + int numJournals = 0; + for (File f : journals) { + if (!f.getName().endsWith(".txn")) { + continue; + } + ++numJournals; + } + assertTrue(numJournals <= 2); } - ++numJournals; } - assertTrue(numJournals <= 2); - } + ); // restart bookies // ensure after restart we can read the entries since journals rolls @@ -221,11 +219,12 @@ public void testJournalRollingWithoutSyncup() throws Exception { } // set flush interval to a large value - ServerConfiguration newConf = TestBKConfiguration.newServerConfiguration(); - newConf.setFlushInterval(999999999); - newConf.setAllowEphemeralPorts(false); // restart bookies - restartBookies(newConf); + restartBookies(c -> { + c.setFlushInterval(999999999); + c.setAllowEphemeralPorts(false); + return c; + }); // Write enough ledger entries so that we roll over journals LedgerHandle[] lhs = writeLedgerEntries(4, 1024, 1024); @@ -238,7 +237,7 @@ public void testJournalRollingWithoutSyncup() throws Exception { // ledger indexes are not flushed // and after bookies restarted, journals will be relayed // ensure that we can still read the entries - restartBookies(newConf); + restartBookies(); validLedgerEntries(ledgerIds, 1024, 1024); } @@ -260,11 +259,12 @@ public void testReplayDeletedLedgerJournalEntries() throws Exception { Thread.sleep(3 * baseConf.getFlushInterval()); // restart bookies with flush interval set to a large value - ServerConfiguration newConf = TestBKConfiguration.newServerConfiguration(); - newConf.setFlushInterval(999999999); - newConf.setAllowEphemeralPorts(false); // restart bookies - restartBookies(newConf); + restartBookies(c -> { + c.setFlushInterval(999999999); + c.setAllowEphemeralPorts(false); + return c; + }); // Write entries again to let them existed in journal writeLedgerEntries(lhs, 1024, 10); @@ -274,10 +274,10 @@ public void testReplayDeletedLedgerJournalEntries() throws Exception { bkc.deleteLedger(lh.getId()); } // wait for gc - Thread.sleep(2 * newConf.getGcWaitTime()); + Thread.sleep(2 * confByIndex(0).getGcWaitTime()); // restart bookies again to trigger replaying journal - restartBookies(newConf); + restartBookies(); } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookieJournalRollingWithReuseJournalTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookieJournalRollingWithReuseJournalTest.java new file mode 100644 index 00000000000..7a60e9f23cd --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookieJournalRollingWithReuseJournalTest.java @@ -0,0 +1,84 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.test; + +import static org.junit.Assert.assertTrue; + +import java.io.File; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.client.LedgerHandle; +import org.awaitility.Awaitility; +import org.junit.Before; +import org.junit.Test; + +/** + * This class tests that bookie rolling journals for reuse journal files. + */ +@Slf4j +public class BookieJournalRollingWithReuseJournalTest extends BookieJournalRollingTest { + + public BookieJournalRollingWithReuseJournalTest() { + super(); + } + + @Before + @Override + public void setUp() throws Exception { + baseConf.setJournalReuseFiles(true); + super.setUp(); + } + + @Test + public void testJournalRolling() throws Exception { + if (log.isDebugEnabled()) { + log.debug("Testing Journal Rolling"); + } + + // Write enough ledger entries so that we roll over journals + LedgerHandle[] lhs = writeLedgerEntries(10, 1024, 1024); + long[] ledgerIds = new long[lhs.length]; + for (int i = 0; i < lhs.length; i++) { + ledgerIds[i] = lhs[i].getId(); + lhs[i].close(); + } + + Awaitility.await().untilAsserted(() -> { + // verify that we only keep at most journal files + for (File journalDir : bookieJournalDirs()) { + File[] journals = journalDir.listFiles(); + int numJournals = 0; + for (File f : journals) { + if (!f.getName().endsWith(".txn")) { + continue; + } + ++numJournals; + } + assertTrue(numJournals <= 2); + } + }); + + // restart bookies + // ensure after restart we can read the entries since journals rolls + restartBookies(); + validLedgerEntries(ledgerIds, 1024, 1024); + } + +} \ No newline at end of file diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookieReadWriteTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookieReadWriteTest.java index afcb4b475c3..3ed8a2a655e 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookieReadWriteTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookieReadWriteTest.java @@ -36,7 +36,6 @@ import java.util.Random; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicInteger; - import org.apache.bookkeeper.client.AsyncCallback.AddCallback; import org.apache.bookkeeper.client.AsyncCallback.CloseCallback; import org.apache.bookkeeper.client.AsyncCallback.ReadCallback; @@ -141,7 +140,7 @@ public void testOpenException() throws IOException, InterruptedException { @Test public void testStreamingClients() throws IOException, BKException, InterruptedException { lh = bkc.createLedger(digestType, ledgerPassword); - // write a string so that we cna + // write a string so that we can // create a buffer of a single bytes // and check for corner cases String toWrite = "we need to check for this string to match " + "and for the record mahadev is the best"; @@ -207,13 +206,17 @@ private void testReadWriteAsyncSingleClient(int numEntries) throws IOException { // wait for all entries to be acknowledged synchronized (sync) { while (sync.counter < numEntriesToWrite) { - LOG.debug("Entries counter = " + sync.counter); + if (LOG.isDebugEnabled()) { + LOG.debug("Entries counter = " + sync.counter); + } sync.wait(); } assertEquals("Error adding", BKException.Code.OK, sync.getReturnCode()); } - LOG.debug("*** WRITE COMPLETE ***"); + if (LOG.isDebugEnabled()) { + LOG.debug("*** WRITE COMPLETE ***"); + } // close ledger lh.close(); @@ -221,7 +224,9 @@ private void testReadWriteAsyncSingleClient(int numEntries) throws IOException { // open ledger lh = bkc.openLedger(ledgerId, digestType, ledgerPassword); - LOG.debug("Number of entries written: " + (lh.getLastAddConfirmed() + 1)); + if (LOG.isDebugEnabled()) { + LOG.debug("Number of entries written: " + (lh.getLastAddConfirmed() + 1)); + } assertTrue("Verifying number of entries written", lh.getLastAddConfirmed() == (numEntriesToWrite - 1)); // read entries @@ -234,7 +239,9 @@ private void testReadWriteAsyncSingleClient(int numEntries) throws IOException { assertEquals("Error reading", BKException.Code.OK, sync.getReturnCode()); } - LOG.debug("*** READ COMPLETE ***"); + if (LOG.isDebugEnabled()) { + LOG.debug("*** READ COMPLETE ***"); + } // at this point, Enumeration ls is filled with the returned // values @@ -245,13 +252,14 @@ private void testReadWriteAsyncSingleClient(int numEntries) throws IOException { Integer origEntry = origbb.getInt(); byte[] entry = ls.nextElement().getEntry(); ByteBuffer result = ByteBuffer.wrap(entry); - LOG.debug("Length of result: " + result.capacity()); - LOG.debug("Original entry: " + origEntry); - Integer retrEntry = result.getInt(); - LOG.debug("Retrieved entry: " + retrEntry); + if (LOG.isDebugEnabled()) { + LOG.debug("Length of result: " + result.capacity()); + LOG.debug("Original entry: " + origEntry); + LOG.debug("Retrieved entry: " + retrEntry); + } assertTrue("Checking entry " + i + " for equality", origEntry.equals(retrEntry)); - assertTrue("Checking entry " + i + " for size", entry.length == entriesSize.get(i).intValue()); + assertTrue("Checking entry " + i + " for size", entry.length == entriesSize.get(i)); i++; } assertTrue("Checking number of read entries", i == numEntriesToWrite); @@ -286,7 +294,7 @@ public void testReadWriteRangeAsyncSingleClient() throws IOException { // bkc.initMessageDigest("SHA1"); ledgerId = lh.getId(); LOG.info("Ledger ID: " + lh.getId()); - byte bytes[] = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'}; + byte[] bytes = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'}; lh.asyncAddEntry(bytes, 0, bytes.length, this, sync); lh.asyncAddEntry(bytes, 0, 4, this, sync); // abcd @@ -297,7 +305,9 @@ public void testReadWriteRangeAsyncSingleClient() throws IOException { // wait for all entries to be acknowledged synchronized (sync) { while (sync.counter < numEntries) { - LOG.debug("Entries counter = " + sync.counter); + if (LOG.isDebugEnabled()) { + LOG.debug("Entries counter = " + sync.counter); + } sync.wait(); } assertEquals("Error adding", BKException.Code.OK, sync.getReturnCode()); @@ -335,8 +345,9 @@ public void testReadWriteRangeAsyncSingleClient() throws IOException { // expected } - - LOG.debug("*** WRITE COMPLETE ***"); + if (LOG.isDebugEnabled()) { + LOG.debug("*** WRITE COMPLETE ***"); + } // close ledger lh.close(); @@ -344,7 +355,9 @@ public void testReadWriteRangeAsyncSingleClient() throws IOException { // open ledger lh = bkc.openLedger(ledgerId, digestType, ledgerPassword); - LOG.debug("Number of entries written: " + (lh.getLastAddConfirmed() + 1)); + if (LOG.isDebugEnabled()) { + LOG.debug("Number of entries written: " + (lh.getLastAddConfirmed() + 1)); + } assertTrue("Verifying number of entries written", lh.getLastAddConfirmed() == (numEntries - 1)); @@ -358,7 +371,9 @@ public void testReadWriteRangeAsyncSingleClient() throws IOException { assertEquals("Error reading", BKException.Code.OK, sync.getReturnCode()); } - LOG.debug("*** READ COMPLETE ***"); + if (LOG.isDebugEnabled()) { + LOG.debug("*** READ COMPLETE ***"); + } // at this point, Enumeration ls is filled with the returned // values @@ -429,7 +444,9 @@ public void testSyncReadAsyncWriteStringsSingleClient() throws IOException { SyncObj sync = new SyncObj(); LOG.info("TEST READ WRITE STRINGS MIXED SINGLE CLIENT"); String charset = "utf-8"; - LOG.debug("Default charset: " + Charset.defaultCharset()); + if (LOG.isDebugEnabled()) { + LOG.debug("Default charset: " + Charset.defaultCharset()); + } try { // Create a ledger lh = bkc.createLedger(digestType, ledgerPassword); @@ -446,13 +463,17 @@ public void testSyncReadAsyncWriteStringsSingleClient() throws IOException { // wait for all entries to be acknowledged synchronized (sync) { while (sync.counter < numEntriesToWrite) { - LOG.debug("Entries counter = " + sync.counter); + if (LOG.isDebugEnabled()) { + LOG.debug("Entries counter = " + sync.counter); + } sync.wait(); } assertEquals("Error adding", BKException.Code.OK, sync.getReturnCode()); } - LOG.debug("*** ASYNC WRITE COMPLETE ***"); + if (LOG.isDebugEnabled()) { + LOG.debug("*** ASYNC WRITE COMPLETE ***"); + } // close ledger lh.close(); @@ -460,13 +481,17 @@ public void testSyncReadAsyncWriteStringsSingleClient() throws IOException { // open ledger lh = bkc.openLedger(ledgerId, digestType, ledgerPassword); - LOG.debug("Number of entries written: " + (lh.getLastAddConfirmed() + 1)); + if (LOG.isDebugEnabled()) { + LOG.debug("Number of entries written: " + (lh.getLastAddConfirmed() + 1)); + } assertTrue("Verifying number of entries written", lh.getLastAddConfirmed() == (numEntriesToWrite - 1)); // read entries Enumeration ls = lh.readEntries(0, numEntriesToWrite - 1); - LOG.debug("*** SYNC READ COMPLETE ***"); + if (LOG.isDebugEnabled()) { + LOG.debug("*** SYNC READ COMPLETE ***"); + } // at this point, Enumeration ls is filled with the returned // values @@ -475,14 +500,18 @@ public void testSyncReadAsyncWriteStringsSingleClient() throws IOException { byte[] origEntryBytes = entries.get(i++); byte[] retrEntryBytes = ls.nextElement().getEntry(); - LOG.debug("Original byte entry size: " + origEntryBytes.length); - LOG.debug("Saved byte entry size: " + retrEntryBytes.length); + if (LOG.isDebugEnabled()) { + LOG.debug("Original byte entry size: " + origEntryBytes.length); + LOG.debug("Saved byte entry size: " + retrEntryBytes.length); + } String origEntry = new String(origEntryBytes, charset); String retrEntry = new String(retrEntryBytes, charset); - LOG.debug("Original entry: " + origEntry); - LOG.debug("Retrieved entry: " + retrEntry); + if (LOG.isDebugEnabled()) { + LOG.debug("Original entry: " + origEntry); + LOG.debug("Retrieved entry: " + retrEntry); + } assertTrue("Checking entry " + i + " for equality", origEntry.equals(retrEntry)); } @@ -517,7 +546,9 @@ public void testReadWriteSyncSingleClient() throws IOException { } lh.close(); lh = bkc.openLedger(ledgerId, digestType, ledgerPassword); - LOG.debug("Number of entries written: " + lh.getLastAddConfirmed()); + if (LOG.isDebugEnabled()) { + LOG.debug("Number of entries written: " + lh.getLastAddConfirmed()); + } assertTrue("Verifying number of entries written", lh.getLastAddConfirmed() == (numEntriesToWrite - 1)); Enumeration ls = lh.readEntries(0, numEntriesToWrite - 1); @@ -526,11 +557,12 @@ public void testReadWriteSyncSingleClient() throws IOException { ByteBuffer origbb = ByteBuffer.wrap(entries.get(i++)); Integer origEntry = origbb.getInt(); ByteBuffer result = ByteBuffer.wrap(ls.nextElement().getEntry()); - LOG.debug("Length of result: " + result.capacity()); - LOG.debug("Original entry: " + origEntry); - Integer retrEntry = result.getInt(); - LOG.debug("Retrieved entry: " + retrEntry); + if (LOG.isDebugEnabled()) { + LOG.debug("Length of result: " + result.capacity()); + LOG.debug("Original entry: " + origEntry); + LOG.debug("Retrieved entry: " + retrEntry); + } assertTrue("Checking entry " + i + " for equality", origEntry.equals(retrEntry)); } lh.close(); @@ -579,14 +611,18 @@ public void addComplete(int rccb, LedgerHandle lh, long entryId, Object ctx) { lh.close(); lh = bkc.openLedger(ledgerId, digestType, ledgerPassword); - LOG.debug("Number of entries written: " + lh.getLastAddConfirmed()); + if (LOG.isDebugEnabled()) { + LOG.debug("Number of entries written: " + lh.getLastAddConfirmed()); + } assertTrue("Verifying number of entries written", lh.getLastAddConfirmed() == numEntriesToWrite); Enumeration ls = lh.readEntries(0, numEntriesToWrite - 1); int i = 0; while (ls.hasMoreElements()) { ByteBuffer result = ByteBuffer.wrap(ls.nextElement().getEntry()); - LOG.debug("Length of result: " + result.capacity()); + if (LOG.isDebugEnabled()) { + LOG.debug("Length of result: " + result.capacity()); + } assertTrue("Checking if entry " + i + " has zero bytes", result.capacity() == 0); } @@ -641,7 +677,9 @@ public void addComplete(int rc2, LedgerHandle lh, long entryId, Object ctx) { lh = bkc.openLedger(ledgerId, digestType, ledgerPassword); lh2 = bkc.openLedger(ledgerId2, digestType, ledgerPassword); - LOG.debug("Number of entries written: " + lh.getLastAddConfirmed() + ", " + lh2.getLastAddConfirmed()); + if (LOG.isDebugEnabled()) { + LOG.debug("Number of entries written: " + lh.getLastAddConfirmed() + ", " + lh2.getLastAddConfirmed()); + } assertTrue("Verifying number of entries written lh (" + lh.getLastAddConfirmed() + ")", lh .getLastAddConfirmed() == (numEntriesToWrite - 1)); assertTrue("Verifying number of entries written lh2 (" + lh2.getLastAddConfirmed() + ")", lh2 @@ -651,7 +689,9 @@ public void addComplete(int rc2, LedgerHandle lh, long entryId, Object ctx) { int i = 0; while (ls.hasMoreElements()) { ByteBuffer result = ByteBuffer.wrap(ls.nextElement().getEntry()); - LOG.debug("Length of result: " + result.capacity()); + if (LOG.isDebugEnabled()) { + LOG.debug("Length of result: " + result.capacity()); + } assertTrue("Checking if entry " + i + " has zero bytes", result.capacity() == 0); } @@ -660,7 +700,9 @@ public void addComplete(int rc2, LedgerHandle lh, long entryId, Object ctx) { i = 0; while (ls.hasMoreElements()) { ByteBuffer result = ByteBuffer.wrap(ls.nextElement().getEntry()); - LOG.debug("Length of result: " + result.capacity()); + if (LOG.isDebugEnabled()) { + LOG.debug("Length of result: " + result.capacity()); + } assertTrue("Checking if entry " + i + " has zero bytes", result.capacity() == 0); } @@ -697,7 +739,9 @@ public void testReadWriteAsyncLength() throws IOException { // wait for all entries to be acknowledged synchronized (sync) { while (sync.counter < numEntriesToWrite) { - LOG.debug("Entries counter = " + sync.counter); + if (LOG.isDebugEnabled()) { + LOG.debug("Entries counter = " + sync.counter); + } sync.wait(); } assertEquals("Error adding", BKException.Code.OK, sync.getReturnCode()); @@ -705,7 +749,9 @@ public void testReadWriteAsyncLength() throws IOException { long length = numEntriesToWrite * 4; assertTrue("Ledger length before closing: " + lh.getLength(), lh.getLength() == length); - LOG.debug("*** WRITE COMPLETE ***"); + if (LOG.isDebugEnabled()) { + LOG.debug("*** WRITE COMPLETE ***"); + } // close ledger lh.close(); @@ -803,7 +849,9 @@ public void testReadFromOpenLedger() throws Exception { assertEquals("Last confirmed add: ", lac, (numEntriesToWrite * 2) - 1); - LOG.debug("*** WRITE COMPLETE ***"); + if (LOG.isDebugEnabled()) { + LOG.debug("*** WRITE COMPLETE ***"); + } // close ledger lh.close(); /* @@ -821,7 +869,9 @@ public void testReadFromOpenLedger() throws Exception { // Wait for for last confirmed synchronized (sync) { while (sync.lastConfirmed == -1) { - LOG.debug("Counter = " + sync.lastConfirmed); + if (LOG.isDebugEnabled()) { + LOG.debug("Counter = " + sync.lastConfirmed); + } sync.wait(); } assertEquals("Error reading", BKException.Code.OK, sync.getReturnCode()); @@ -829,7 +879,9 @@ public void testReadFromOpenLedger() throws Exception { assertEquals("Last confirmed add", sync.lastConfirmed, (numEntriesToWrite - 2)); - LOG.debug("*** WRITE COMPLETE ***"); + if (LOG.isDebugEnabled()) { + LOG.debug("*** WRITE COMPLETE ***"); + } // close ledger lh.close(); } catch (BKException e) { @@ -884,7 +936,9 @@ public void testReadFromOpenLedgerOpenOnce() throws Exception { long last = lh.readLastConfirmed(); assertTrue("Last confirmed add: " + last, last == (numEntriesToWrite - 2)); - LOG.debug("*** WRITE COMPLETE ***"); + if (LOG.isDebugEnabled()) { + LOG.debug("*** WRITE COMPLETE ***"); + } // close ledger lh.close(); // close read only ledger should not change metadata @@ -912,7 +966,9 @@ public void testReadFromOpenLedgerZeroAndOne() throws Exception { /* * We haven't written anything, so it should be empty. */ - LOG.debug("Checking that it is empty"); + if (LOG.isDebugEnabled()) { + LOG.debug("Checking that it is empty"); + } long readLastConfirmed = lhOpen.readLastConfirmed(); assertTrue("Last confirmed has the wrong value", readLastConfirmed == LedgerHandle.INVALID_ENTRY_ID); @@ -920,7 +976,9 @@ public void testReadFromOpenLedgerZeroAndOne() throws Exception { /* * Writing one entry. */ - LOG.debug("Going to write one entry"); + if (LOG.isDebugEnabled()) { + LOG.debug("Going to write one entry"); + } ByteBuffer entry = ByteBuffer.allocate(4); entry.putInt(rng.nextInt(maxInt)); entry.position(0); @@ -933,7 +991,9 @@ public void testReadFromOpenLedgerZeroAndOne() throws Exception { * The hint should still indicate that there is no confirmed * add. */ - LOG.debug("Checking that it is still empty even after writing one entry"); + if (LOG.isDebugEnabled()) { + LOG.debug("Checking that it is still empty even after writing one entry"); + } readLastConfirmed = lhOpen.readLastConfirmed(); assertTrue(readLastConfirmed == LedgerHandle.INVALID_ENTRY_ID); @@ -1107,7 +1167,9 @@ public void testLastConfirmedAdd() throws Exception { long last = lh.readLastConfirmed(); assertTrue("Last confirmed add: " + last, last == (numEntriesToWrite - 2)); - LOG.debug("*** WRITE COMPLETE ***"); + if (LOG.isDebugEnabled()) { + LOG.debug("*** WRITE COMPLETE ***"); + } // close ledger lh.close(); /* @@ -1125,7 +1187,9 @@ public void testLastConfirmedAdd() throws Exception { // Wait for for last confirmed synchronized (sync) { while (sync.lastConfirmed == LedgerHandle.INVALID_ENTRY_ID) { - LOG.debug("Counter = " + sync.lastConfirmed); + if (LOG.isDebugEnabled()) { + LOG.debug("Counter = " + sync.lastConfirmed); + } sync.wait(); } assertEquals("Error reading", BKException.Code.OK, sync.getReturnCode()); @@ -1133,7 +1197,9 @@ public void testLastConfirmedAdd() throws Exception { assertTrue("Last confirmed add: " + sync.lastConfirmed, sync.lastConfirmed == (numEntriesToWrite - 2)); - LOG.debug("*** WRITE COMPLETE ***"); + if (LOG.isDebugEnabled()) { + LOG.debug("*** WRITE COMPLETE ***"); + } // close ledger lh.close(); } catch (BKException e) { diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookieZKExpireTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookieZKExpireTest.java index 21ac7e5c4b3..fbe6c921eb8 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookieZKExpireTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookieZKExpireTest.java @@ -21,39 +21,125 @@ package org.apache.bookkeeper.test; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; +import io.netty.buffer.UnpooledByteBufAllocator; import java.io.File; import java.util.HashSet; - +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.MockUncleanShutdownDetection; +import org.apache.bookkeeper.bookie.TestBookieImpl; +import org.apache.bookkeeper.common.testing.annotations.FlakyTest; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.proto.BookieServer; -import org.junit.Test; +import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.util.PortManager; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledForJreRange; +import org.junit.jupiter.api.condition.JRE; /** * Test bookie expiration. */ +@Slf4j public class BookieZKExpireTest extends BookKeeperClusterTestCase { public BookieZKExpireTest() { super(0); + } + + /* + Should recover from request timeout. + */ + @Test + @SuppressWarnings("deprecation") + @EnabledForJreRange(max = JRE.JAVA_17) + public void testBookieServerZKRequestTimeoutBehaviour() throws Exception { // 6000 is minimum due to default tick time - baseConf.setZkTimeout(6000); - baseClientConf.setZkTimeout(6000); + System.setProperty("zookeeper.request.timeout", "6000"); + baseConf.setZkTimeout(24000); + baseClientConf.setZkTimeout(24000); + BookieServer server = null; + try { + File f = tmpDirs.createNew("bookieserver", "test"); + + HashSet threadset = new HashSet(); + int threadCount = Thread.activeCount(); + Thread[] threads = new Thread[threadCount * 2]; + threadCount = Thread.enumerate(threads); + for (int i = 0; i < threadCount; i++) { + if (threads[i].getName().contains("SendThread")) { + threadset.add(threads[i]); + } + } + + ServerConfiguration conf = newServerConfiguration(PortManager.nextFreePort(), f, new File[] { f }); + server = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); + server.start(); + + int secondsToWait = 5; + while (!server.isRunning()) { + Thread.sleep(1000); + if (secondsToWait-- <= 0) { + fail("Bookie never started"); + } + } + Thread sendthread = null; + threadCount = Thread.activeCount(); + threads = new Thread[threadCount * 2]; + threadCount = Thread.enumerate(threads); + for (int i = 0; i < threadCount; i++) { + if (threads[i].getName().contains("SendThread") + && !threadset.contains(threads[i])) { + sendthread = threads[i]; + break; + } + } + assertNotNull(sendthread, "Send thread not found"); + + log.info("Suspending threads"); + sendthread.suspend(); + Thread.sleep(12000); + log.info("Resuming threads"); + sendthread.resume(); + + // allow watcher thread to run + Thread.sleep(3000); + assertTrue(server.isBookieRunning(), "Bookie should not shutdown on zk timeout"); + assertTrue(server.isRunning(), "Bookie Server should not shutdown on zk timeout"); + } finally { + System.clearProperty("zookeeper.request.timeout"); + server.shutdown(); + } } + /* + Bookie cannot recover from ZK Client's SessionExpired error. + In this case the ZK client must be recreated, reconnect does not work. + Attempt to reconnect by BookieStateManager's RegistrationManager listener + will fail (even if retry it many times). + */ + @FlakyTest(value = "https://github.com/apache/bookkeeper/issues/4142") @SuppressWarnings("deprecation") - @Test - public void testBookieServerZKExpireBehaviour() throws Exception { + @EnabledForJreRange(max = JRE.JAVA_17) + public void testBookieServerZKSessionExpireBehaviour() throws Exception { + // 6000 is minimum due to default tick time + System.setProperty("zookeeper.request.timeout", "0"); + baseConf.setZkTimeout(6000); + baseClientConf.setZkTimeout(6000); BookieServer server = null; try { - File f = createTempDir("bookieserver", "test"); + File f = tmpDirs.createNew("bookieserver", "test"); HashSet threadset = new HashSet(); int threadCount = Thread.activeCount(); - Thread threads[] = new Thread[threadCount * 2]; + Thread[] threads = new Thread[threadCount * 2]; threadCount = Thread.enumerate(threads); for (int i = 0; i < threadCount; i++) { if (threads[i].getName().indexOf("SendThread") != -1) { @@ -62,7 +148,10 @@ public void testBookieServerZKExpireBehaviour() throws Exception { } ServerConfiguration conf = newServerConfiguration(PortManager.nextFreePort(), f, new File[] { f }); - server = new BookieServer(conf); + server = new BookieServer( + conf, new TestBookieImpl(conf), + NullStatsLogger.INSTANCE, UnpooledByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); server.start(); int secondsToWait = 5; @@ -83,18 +172,22 @@ public void testBookieServerZKExpireBehaviour() throws Exception { break; } } - assertNotNull("Send thread not found", sendthread); + assertNotNull(sendthread, "Send thread not found"); + log.info("Suspending threads"); sendthread.suspend(); - Thread.sleep(2 * conf.getZkTimeout()); + Thread.sleep(2L * conf.getZkTimeout()); + log.info("Resuming threads"); sendthread.resume(); // allow watcher thread to run Thread.sleep(3000); - assertTrue("Bookie should not shutdown on losing zk session", server.isBookieRunning()); - assertTrue("Bookie Server should not shutdown on losing zk session", server.isRunning()); + assertFalse(server.isBookieRunning(), "Bookie should shutdown on losing zk session"); + assertFalse(server.isRunning(), "Bookie Server should shutdown on losing zk session"); } finally { + System.clearProperty("zookeeper.request.timeout"); server.shutdown(); } } + } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ConcurrentLedgerTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ConcurrentLedgerTest.java index d611e9daf95..11b7fe80d04 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ConcurrentLedgerTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ConcurrentLedgerTest.java @@ -25,7 +25,6 @@ import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; - import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; @@ -33,12 +32,12 @@ import java.util.List; import java.util.concurrent.Semaphore; import java.util.concurrent.atomic.AtomicInteger; - import org.apache.bookkeeper.bookie.Bookie; import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.TestBookieImpl; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.conf.TestBKConfiguration; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback; import org.junit.After; import org.junit.Before; @@ -87,7 +86,7 @@ public void setUp() throws Exception { conf.setMetadataServiceUri(null); conf.setJournalDirName(txnDir.getPath()); conf.setLedgerDirNames(new String[] { ledgerDir.getPath() }); - bookie = new Bookie(conf); + bookie = new TestBookieImpl(conf); bookie.start(); } @@ -109,7 +108,7 @@ public void tearDown() { recursiveDelete(ledgerDir); } - byte zeros[] = new byte[16]; + byte[] zeros = new byte[16]; int iterations = 51; { @@ -168,7 +167,7 @@ private long doWrites(int ledgers, int size, int totalwrites) WriteCallback cb = new WriteCallback() { @Override public void writeComplete(int rc, long ledgerId, long entryId, - BookieSocketAddress addr, Object ctx) { + BookieId addr, Object ctx) { AtomicInteger counter = (AtomicInteger) ctx; counter.getAndIncrement(); throttle.release(); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ConditionalSetTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ConditionalSetTest.java index b6d1153d16e..db8dcd9223c 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ConditionalSetTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ConditionalSetTest.java @@ -20,10 +20,7 @@ */ package org.apache.bookkeeper.test; -import static org.junit.Assert.fail; - import java.io.IOException; - import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.client.BookKeeper; import org.apache.bookkeeper.client.BookKeeper.DigestType; @@ -76,9 +73,13 @@ public void testConditionalSet() throws IOException, InterruptedException, LedgerHandle lhWrite = bkc.createLedger(digestType, new byte[] { 'a', 'b' }); long ledgerId = lhWrite.getId(); - LOG.debug("Ledger ID: " + lhWrite.getId()); + if (LOG.isDebugEnabled()) { + LOG.debug("Ledger ID: {}", ledgerId); + } for (int i = 0; i < 10; i++) { - LOG.debug("Adding entry: " + i); + if (LOG.isDebugEnabled()) { + LOG.debug("Adding entry: " + i); + } lhWrite.addEntry(entry); } @@ -86,21 +87,19 @@ public void testConditionalSet() throws IOException, InterruptedException, * Open a ledger for reading, which triggers recovery, since the ledger * is still open. */ - LOG.debug("Instantiating new bookkeeper client."); + if (LOG.isDebugEnabled()) { + LOG.debug("Instantiating new bookkeeper client."); + } LedgerHandle lhRead = bkcReader.openLedger(lhWrite.getId(), digestType, new byte[] { 'a', 'b' }); - LOG.debug("Opened the ledger already"); + if (LOG.isDebugEnabled()) { + LOG.debug("Opened the ledger already"); + } /* - * Writer tries to close the ledger, and if should fail. + * Writer tries to close the ledger, and it should succeed as recovery closed + * the ledger already, but with the correct LAC and length */ - try { - lhWrite.close(); - fail("Should have received an exception when trying to close the ledger."); - } catch (BKException e) { - /* - * Correctly failed to close the ledger - */ - } + lhWrite.close(); } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ConfigurationTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ConfigurationTest.java index 3252c32a1b4..5eb3c82c098 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ConfigurationTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ConfigurationTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -25,7 +25,6 @@ import org.apache.bookkeeper.conf.AbstractConfiguration; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; - import org.junit.Test; /** diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ForceReadOnlyBookieTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ForceReadOnlyBookieTest.java index 28ce82afe66..ed3e5a65c93 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ForceReadOnlyBookieTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ForceReadOnlyBookieTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -25,15 +25,14 @@ import java.io.File; import java.util.Enumeration; - import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.BookieImpl; import org.apache.bookkeeper.bookie.InterleavedLedgerStorage; import org.apache.bookkeeper.bookie.LedgerDirsManager; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.client.LedgerEntry; import org.apache.bookkeeper.client.LedgerHandle; import org.junit.Test; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -61,20 +60,20 @@ public void testBookieForceStartAsReadOnly() throws Exception { ledger.addEntry("data".getBytes()); } ledger.close(); - LOG.info("successed prepare"); + LOG.info("prepare succeeded"); // start bookie 1 as readonly - bsConfs.get(1).setReadOnlyModeEnabled(true); - bsConfs.get(1).setForceReadOnlyBookie(true); + confByIndex(1).setReadOnlyModeEnabled(true); + confByIndex(1).setForceReadOnlyBookie(true); restartBookies(); - Bookie bookie = bs.get(1).getBookie(); + Bookie bookie = serverByIndex(1).getBookie(); assertTrue("Bookie should be running and in readonly mode", bookie.isRunning() && bookie.isReadOnly()); - LOG.info("successed force start ReadOnlyBookie"); + LOG.info("force start ReadOnlyBookie succeeded"); // Check new bookie with readonly mode enabled. - File[] ledgerDirs = bsConfs.get(1).getLedgerDirs(); + File[] ledgerDirs = confByIndex(1).getLedgerDirs(); assertEquals("Only one ledger dir should be present", 1, ledgerDirs.length); // kill the writable bookie @@ -86,13 +85,13 @@ public void testBookieForceStartAsReadOnly() throws Exception { assertEquals("Entry should contain correct data", "data", new String(entry.getEntry())); } - LOG.info("successed read entry from ReadOnlyBookie"); + LOG.info("read entry from ReadOnlyBookie succeeded"); // test will not transfer to Writable mode. - LedgerDirsManager ledgerDirsManager = bookie.getLedgerDirsManager(); + LedgerDirsManager ledgerDirsManager = ((BookieImpl) bookie).getLedgerDirsManager(); ledgerDirsManager.addToWritableDirs(new File(ledgerDirs[0], "current"), true); assertTrue("Bookie should be running and in readonly mode", bookie.isRunning() && bookie.isReadOnly()); - LOG.info("successed: bookie still readonly"); + LOG.info("bookie still readonly"); } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/LedgerCreateDeleteTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/LedgerCreateDeleteTest.java index 5f7dd580a42..da342c863df 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/LedgerCreateDeleteTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/LedgerCreateDeleteTest.java @@ -24,11 +24,9 @@ import static org.junit.Assert.fail; import java.util.ArrayList; - import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.client.LedgerHandle; - import org.junit.Before; import org.junit.Test; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/LedgerDeleteTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/LedgerDeleteTest.java index 90c48bd3ebb..a638fa73830 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/LedgerDeleteTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/LedgerDeleteTest.java @@ -23,9 +23,9 @@ import static org.junit.Assert.assertFalse; import java.io.File; +import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicInteger; - import org.apache.bookkeeper.bookie.InterleavedLedgerStorage; import org.apache.bookkeeper.client.AsyncCallback.AddCallback; import org.apache.bookkeeper.client.BKException; @@ -122,15 +122,16 @@ public void testLedgerDelete() throws Exception { // restart bookies to force rolling entry log files restartBookies(); + List ledgerDirectories = bookieLedgerDirs(); // Delete all of these ledgers from the BookKeeper client for (LedgerHandle lh : lhs) { bkc.deleteLedger(lh.getId()); } LOG.info("Finished deleting all ledgers so waiting for the GC thread to clean up the entryLogs"); - Thread.sleep(2000); + Thread.sleep(5000); // Verify that the first entry log (0.log) has been deleted from all of the Bookie Servers. - for (File ledgerDirectory : tmpDirs) { + for (File ledgerDirectory : ledgerDirectories) { assertFalse("Found the entry log file (0.log) that should have been deleted in ledgerDirectory: " + ledgerDirectory, TestUtils.hasLogFiles(ledgerDirectory, true, 0)); } @@ -161,7 +162,7 @@ public void testLedgerDeleteWithExistingEntryLogs() throws Exception { bkc.deleteLedger(lh.getId()); } LOG.info("Finished deleting all ledgers so waiting for the GC thread to clean up the entryLogs"); - Thread.sleep(2 * baseConf.getGcWaitTime()); + Thread.sleep(5000); /* * Verify that the first two entry logs ([0,1].log) have been deleted @@ -169,7 +170,7 @@ public void testLedgerDeleteWithExistingEntryLogs() throws Exception { * test, a new entry log is created. We know then that the first two * entry logs should be deleted. */ - for (File ledgerDirectory : tmpDirs) { + for (File ledgerDirectory : bookieLedgerDirs()) { assertFalse("Found the entry log file ([0,1].log) that should have been deleted in ledgerDirectory: " + ledgerDirectory, TestUtils.hasLogFiles(ledgerDirectory, true, 0, 1)); } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/LocalBookiesRegistryTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/LocalBookiesRegistryTest.java index f1c1c0a3859..36ff08b3853 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/LocalBookiesRegistryTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/LocalBookiesRegistryTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -24,7 +24,6 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -import org.apache.bookkeeper.proto.BookieServer; import org.apache.bookkeeper.proto.LocalBookiesRegistry; import org.junit.Test; @@ -34,16 +33,14 @@ public class LocalBookiesRegistryTest extends BookKeeperClusterTestCase { public LocalBookiesRegistryTest() { - super(3); + super(1); baseConf.setDisableServerSocketBind(true); baseConf.setEnableLocalTransport(true); } @Test public void testAccessibleLocalBookiesRegistry() throws Exception { - assertEquals(3, bs.size()); - for (BookieServer bk : bs) { - assertTrue(LocalBookiesRegistry.isLocalBookie(bk.getLocalAddress())); - } + assertEquals(1, bookieCount()); + bookieAddresses().forEach(a -> assertTrue(LocalBookiesRegistry.isLocalBookie(a))); } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/MultipleThreadReadTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/MultipleThreadReadTest.java index 1d3929ca02e..dd451dc87a5 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/MultipleThreadReadTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/MultipleThreadReadTest.java @@ -20,7 +20,7 @@ */ package org.apache.bookkeeper.test; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -31,7 +31,6 @@ import java.util.List; import java.util.NoSuchElementException; import java.util.concurrent.atomic.AtomicBoolean; - import org.apache.bookkeeper.client.AsyncCallback; import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.client.BookKeeper; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/OpStatTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/OpStatTest.java index a950709a009..1a7399c152a 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/OpStatTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/OpStatTest.java @@ -22,7 +22,6 @@ package org.apache.bookkeeper.test; import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_SCOPE; -import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_CB_QUEUE_SIZE; import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_FORCE_WRITE_QUEUE_SIZE; import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_QUEUE_SIZE; import static org.apache.bookkeeper.bookie.BookKeeperServerStats.JOURNAL_SCOPE; @@ -32,7 +31,7 @@ import java.util.function.BiConsumer; import org.apache.bookkeeper.client.BookKeeper; import org.apache.bookkeeper.client.LedgerHandle; -import org.apache.bookkeeper.util.MathUtils; +import org.apache.bookkeeper.common.util.MathUtils; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -70,7 +69,7 @@ private void validateOpStat(TestStatsProvider stats, String path, BiConsumer f) { + private void validateOpStat(TestStatsProvider stats, String[] paths, BiConsumer f) { for (String path : paths) { validateOpStat(stats, path, f); } @@ -83,7 +82,7 @@ private void validateNonMonotonicCounterGauge(TestStatsProvider stats, String pa f.accept(counter.get(), counter.getMax()); } - private void validateNonMonotonicCounterGauges(TestStatsProvider stats, String paths[], BiConsumer f) { + private void validateNonMonotonicCounterGauges(TestStatsProvider stats, String[] paths, BiConsumer f) { for (String path : paths) { validateNonMonotonicCounterGauge(stats, path, f); } @@ -114,9 +113,8 @@ public void testTopLevelBookieWriteCounters() throws Exception { assertTrue(average <= elapsed); }); validateNonMonotonicCounterGauges(stats, new String[]{ - BOOKIE_SCOPE + "." + JOURNAL_SCOPE + "." + JOURNAL_CB_QUEUE_SIZE, - BOOKIE_SCOPE + "." + JOURNAL_SCOPE + "." + JOURNAL_FORCE_WRITE_QUEUE_SIZE, - BOOKIE_SCOPE + "." + JOURNAL_SCOPE + "." + JOURNAL_QUEUE_SIZE + BOOKIE_SCOPE + "." + JOURNAL_SCOPE + ".journalIndex_0." + JOURNAL_FORCE_WRITE_QUEUE_SIZE, + BOOKIE_SCOPE + "." + JOURNAL_SCOPE + ".journalIndex_0." + JOURNAL_QUEUE_SIZE }, (value, max) -> { assertTrue(max > 0); }); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/PortManager.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/PortManager.java deleted file mode 100644 index 0a4b8ab0ef8..00000000000 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/PortManager.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * - */ -package org.apache.bookkeeper.test; - -import java.net.ServerSocket; -/** - * Port manager allows a base port to be specified on the commandline. - * Tests will then use ports, counting up from this base port. - * This allows multiple instances of the bookkeeper tests to run at once. - */ -public class PortManager { - private static int nextPort = getBasePort(); - - public static synchronized int nextFreePort() { - int exceptionCount = 0; - while (true) { - int port = nextPort++; - try (ServerSocket ss = new ServerSocket(port)) { - ss.close(); - //Give it some time to truly close the connection - Thread.sleep(100); - return port; - } catch (Exception e) { - exceptionCount++; - if (exceptionCount > 5) { - throw new RuntimeException(e); - } - } - } - } - - private static int getBasePort() { - return Integer.valueOf(System.getProperty("test.basePort", "15000")); - } -} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ReadOnlyBookieTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ReadOnlyBookieTest.java index 35b2fd36d47..d3aa1ed7ad2 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ReadOnlyBookieTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ReadOnlyBookieTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -24,10 +24,12 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; + import java.io.File; import java.util.Enumeration; import java.util.concurrent.TimeUnit; import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.BookieImpl; import org.apache.bookkeeper.bookie.InterleavedLedgerStorage; import org.apache.bookkeeper.bookie.LedgerDirsManager; import org.apache.bookkeeper.client.BKException; @@ -35,6 +37,8 @@ import org.apache.bookkeeper.client.LedgerEntry; import org.apache.bookkeeper.client.LedgerHandle; import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.util.PortManager; +import org.awaitility.Awaitility; import org.junit.Test; /** @@ -61,11 +65,11 @@ public void testBookieShouldServeAsReadOnly() throws Exception { "".getBytes()); // Check new bookie with readonly mode enabled. - File[] ledgerDirs = bsConfs.get(1).getLedgerDirs(); + File[] ledgerDirs = confByIndex(1).getLedgerDirs(); assertEquals("Only one ledger dir should be present", 1, ledgerDirs.length); - Bookie bookie = bs.get(1).getBookie(); - LedgerDirsManager ledgerDirsManager = bookie.getLedgerDirsManager(); + Bookie bookie = serverByIndex(1).getBookie(); + LedgerDirsManager ledgerDirsManager = ((BookieImpl) bookie).getLedgerDirsManager(); for (int i = 0; i < 10; i++) { ledger.addEntry("data".getBytes()); @@ -99,15 +103,16 @@ public void testBookieShouldServeAsReadOnly() throws Exception { public void testBookieShouldTurnWritableFromReadOnly() throws Exception { killBookie(0); baseConf.setReadOnlyModeEnabled(true); + baseConf.setDiskCheckInterval(Integer.MAX_VALUE); startNewBookie(); LedgerHandle ledger = bkc.createLedger(2, 2, DigestType.MAC, "".getBytes()); // Check new bookie with readonly mode enabled. - File[] ledgerDirs = bsConfs.get(1).getLedgerDirs(); + File[] ledgerDirs = confByIndex(1).getLedgerDirs(); assertEquals("Only one ledger dir should be present", 1, ledgerDirs.length); - Bookie bookie = bs.get(1).getBookie(); + BookieImpl bookie = (BookieImpl) serverByIndex(1).getBookie(); LedgerDirsManager ledgerDirsManager = bookie.getLedgerDirsManager(); for (int i = 0; i < 10; i++) { @@ -126,12 +131,14 @@ public void testBookieShouldTurnWritableFromReadOnly() throws Exception { // Expected } - bkc.waitForReadOnlyBookie(Bookie.getBookieAddress(bsConfs.get(1))) - .get(30, TimeUnit.SECONDS); - + // waitForReadOnlyBookie adds another listener thread to observe the node status of bookie, + // which may be out of sync with the triggering of node changes in EnsemblePlacementPolicy. + // This sequence leads to flaky test. So change from watching zk to Awaitility.await(). + Awaitility.await().untilAsserted(() -> { + assertTrue("Bookie should be running and converted to readonly mode", + bookie.isRunning() && bookie.isReadOnly()); + }); LOG.info("bookie is running {}, readonly {}.", bookie.isRunning(), bookie.isReadOnly()); - assertTrue("Bookie should be running and converted to readonly mode", - bookie.isRunning() && bookie.isReadOnly()); // should fail to create ledger try { @@ -144,12 +151,14 @@ public void testBookieShouldTurnWritableFromReadOnly() throws Exception { // Now add the current ledger dir back to writable dirs list ledgerDirsManager.addToWritableDirs(testDir, true); - bkc.waitForWritableBookie(Bookie.getBookieAddress(bsConfs.get(1))) - .get(30, TimeUnit.SECONDS); - + // waitForWritableBookie adds another listener thread to observe the node status of bookie, + // which may be out of sync with the triggering of node changes in EnsemblePlacementPolicy. + // This sequence leads to flaky test. So change from watching zk to Awaitility.await(). + Awaitility.await().untilAsserted(() -> { + assertTrue("Bookie should be running and converted back to writable mode", bookie.isRunning() + && !bookie.isReadOnly()); + }); LOG.info("bookie is running {}, readonly {}.", bookie.isRunning(), bookie.isReadOnly()); - assertTrue("Bookie should be running and converted back to writable mode", bookie.isRunning() - && !bookie.isReadOnly()); LedgerHandle newLedger = bkc.createLedger(2, 2, DigestType.MAC, "".getBytes()); for (int i = 0; i < 10; i++) { @@ -171,10 +180,10 @@ public void testBookieShutdownIfReadOnlyModeNotEnabled() throws Exception { baseConf.setReadOnlyModeEnabled(false); startNewBookie(); - File[] ledgerDirs = bsConfs.get(1).getLedgerDirs(); + File[] ledgerDirs = confByIndex(1).getLedgerDirs(); assertEquals("Only one ledger dir should be present", 1, ledgerDirs.length); - Bookie bookie = bs.get(1).getBookie(); + BookieImpl bookie = (BookieImpl) serverByIndex(1).getBookie(); LedgerHandle ledger = bkc.createLedger(2, 2, DigestType.MAC, "".getBytes()); LedgerDirsManager ledgerDirsManager = bookie.getLedgerDirsManager(); @@ -209,10 +218,10 @@ public void testBookieContinueWritingIfMultipleLedgersPresent() throws Exception { startNewBookieWithMultipleLedgerDirs(2); - File[] ledgerDirs = bsConfs.get(1).getLedgerDirs(); + File[] ledgerDirs = confByIndex(1).getLedgerDirs(); assertEquals("Only one ledger dir should be present", 2, ledgerDirs.length); - Bookie bookie = bs.get(1).getBookie(); + BookieImpl bookie = (BookieImpl) serverByIndex(1).getBookie(); LedgerHandle ledger = bkc.createLedger(2, 2, DigestType.MAC, "".getBytes()); LedgerDirsManager ledgerDirsManager = bookie.getLedgerDirsManager(); @@ -229,26 +238,25 @@ public void testBookieContinueWritingIfMultipleLedgersPresent() assertEquals("writable dirs should have one dir", 1, ledgerDirsManager .getWritableLedgerDirs().size()); assertTrue("Bookie should shutdown if readOnlyMode not enabled", - bookie.isAlive()); + bookie.isRunning()); } private void startNewBookieWithMultipleLedgerDirs(int numOfLedgerDirs) throws Exception { - ServerConfiguration conf = bsConfs.get(1); + ServerConfiguration conf = confByIndex(1); killBookie(1); File[] ledgerDirs = new File[numOfLedgerDirs]; for (int i = 0; i < numOfLedgerDirs; i++) { - File dir = createTempDir("bookie", "test"); - tmpDirs.add(dir); + File dir = tmpDirs.createNew("bookie", "test"); ledgerDirs[i] = dir; } ServerConfiguration newConf = newServerConfiguration( - conf.getBookiePort() + 1, + PortManager.nextFreePort(), ledgerDirs[0], ledgerDirs); - bsConfs.add(newConf); - bs.add(startBookie(newConf)); + newConf.setDiskCheckInterval(Integer.MAX_VALUE); + startAndAddBookie(newConf); } /** @@ -259,9 +267,10 @@ public void testLedgerCreationShouldFailWithReadonlyBookie() throws Exception { killBookie(1); baseConf.setReadOnlyModeEnabled(true); startNewBookie(); - bs.get(1).getBookie().getStateManager().doTransitionToReadOnlyMode(); + + serverByIndex(1).getBookie().getStateManager().transitionToReadOnlyMode().get(); try { - bkc.waitForReadOnlyBookie(Bookie.getBookieAddress(bsConfs.get(1))) + bkc.waitForReadOnlyBookie(BookieImpl.getBookieId(confByIndex(1))) .get(30, TimeUnit.SECONDS); bkc.createLedger(2, 2, DigestType.CRC32, "".getBytes()); @@ -280,14 +289,14 @@ public void testReadFromReadOnlyBookieShouldBeSuccess() throws Exception { ledger.addEntry("data".getBytes()); } ledger.close(); - bsConfs.get(1).setReadOnlyModeEnabled(true); - bsConfs.get(1).setDiskCheckInterval(500); + confByIndex(1).setReadOnlyModeEnabled(true); + confByIndex(1).setDiskCheckInterval(500); restartBookies(); // Check new bookie with readonly mode enabled. - File[] ledgerDirs = bsConfs.get(1).getLedgerDirs(); + File[] ledgerDirs = confByIndex(1).getLedgerDirs(); assertEquals("Only one ledger dir should be present", 1, ledgerDirs.length); - Bookie bookie = bs.get(1).getBookie(); + BookieImpl bookie = (BookieImpl) serverByIndex(1).getBookie(); LedgerDirsManager ledgerDirsManager = bookie.getLedgerDirsManager(); // Now add the current ledger dir to filled dirs list @@ -306,4 +315,4 @@ public void testReadFromReadOnlyBookieShouldBeSuccess() throws Exception { assertEquals("Entry should contain correct data", "data", new String(entry.getEntry())); } } -} \ No newline at end of file +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/TestCallbacks.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/TestCallbacks.java index 237f0502766..cdfdcd05b0e 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/TestCallbacks.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/TestCallbacks.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,7 +21,6 @@ package org.apache.bookkeeper.test; import com.google.common.util.concurrent.AbstractFuture; - import org.apache.bookkeeper.client.AsyncCallback.AddCallback; import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.client.LedgerHandle; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/TmpDirs.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/TmpDirs.java new file mode 100644 index 00000000000..7b8d23e2bd4 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/TmpDirs.java @@ -0,0 +1,50 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.test; + +import java.io.File; +import java.util.LinkedList; +import java.util.List; +import org.apache.bookkeeper.util.IOUtils; +import org.apache.commons.io.FileUtils; + +/** + * Utility class for managing tmp directories in tests. + */ +public class TmpDirs { + private final List tmpDirs = new LinkedList<>(); // retained to delete files + + public File createNew(String prefix, String suffix) throws Exception { + File dir = IOUtils.createTempDir(prefix, suffix); + tmpDirs.add(dir); + return dir; + } + + public void cleanup() throws Exception { + for (File f : tmpDirs) { + FileUtils.deleteDirectory(f); + } + } + + public List getDirs() { + return tmpDirs; + } +} \ No newline at end of file diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ZooKeeperCluster.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ZooKeeperCluster.java new file mode 100644 index 00000000000..b0e828bd5ca --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ZooKeeperCluster.java @@ -0,0 +1,84 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.test; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.bookkeeper.util.BookKeeperConstants.AVAILABLE_NODE; +import static org.apache.bookkeeper.util.BookKeeperConstants.INSTANCEID; +import static org.apache.bookkeeper.util.BookKeeperConstants.READONLY; + +import java.io.IOException; +import java.util.UUID; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import org.apache.bookkeeper.zookeeper.ZooKeeperWatcherBase; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.Transaction; +import org.apache.zookeeper.ZooDefs.Ids; +import org.apache.zookeeper.ZooKeeper; + +/** + * Interface for ZooKeeperCluster. + */ +public interface ZooKeeperCluster { + ZooKeeper getZooKeeperClient(); + + String getZooKeeperConnectString(); + + String getMetadataServiceUri(); + + String getMetadataServiceUri(String zkLedgersRootPath); + + String getMetadataServiceUri(String zkLedgersRootPath, String type); + + void startCluster() throws Exception; + + void stopCluster() throws Exception; + + void restartCluster() throws Exception; + + void killCluster() throws Exception; + + void sleepCluster(int time, TimeUnit timeUnit, CountDownLatch l) + throws InterruptedException, IOException; + + default void expireSession(ZooKeeper zk) throws Exception { + long id = zk.getSessionId(); + byte[] password = zk.getSessionPasswd(); + ZooKeeperWatcherBase w = new ZooKeeperWatcherBase(10000, false); + ZooKeeper zk2 = new ZooKeeper(getZooKeeperConnectString(), zk.getSessionTimeout(), w, id, password); + w.waitForConnection(); + zk2.close(); + } + + default void createBKEnsemble(String ledgersPath) throws KeeperException, InterruptedException { + Transaction txn = getZooKeeperClient().transaction(); + txn.create(ledgersPath, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); + txn.create(ledgersPath + "/" + AVAILABLE_NODE, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); + txn.create(ledgersPath + "/" + AVAILABLE_NODE + "/" + READONLY, new byte[0], Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + String instanceId = UUID.randomUUID().toString(); + txn.create(ledgersPath + "/" + INSTANCEID, instanceId.getBytes(UTF_8), + Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); + txn.commit(); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ZooKeeperClusterUtil.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ZooKeeperClusterUtil.java new file mode 100644 index 00000000000..6dbf182110f --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ZooKeeperClusterUtil.java @@ -0,0 +1,150 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.test; + +import java.io.IOException; +import java.nio.file.Files; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.meta.LongHierarchicalLedgerManagerFactory; +import org.apache.bookkeeper.zookeeper.ZooKeeperClient; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.test.QuorumUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Provides multi node zookeeper cluster. + */ +@Slf4j +public class ZooKeeperClusterUtil implements ZooKeeperCluster { + + static { + enableZookeeperTestEnvVariables(); + } + + static final Logger LOG = LoggerFactory.getLogger(ZooKeeperClusterUtil.class); + private final int numOfZKNodes; + public QuorumUtil quorumUtil; + String connectString; + protected ZooKeeper zkc; // zookeeper client + + public static void enableZookeeperTestEnvVariables() { + /* + * org.apache.zookeeper.test.ClientBase uses FourLetterWordMain, from + * 3.5.3 four letter words are disabled by default due to security + * reasons + */ + System.setProperty("zookeeper.4lw.commands.whitelist", "*"); + System.setProperty("zookeeper.admin.enableServer", "false"); + try { + System.setProperty("build.test.dir", Files.createTempDirectory("zktests").toFile().getCanonicalPath()); + } catch (IOException e) { + log.error("Failed to create temp dir, so setting build.test.dir system property to /tmp"); + System.setProperty("build.test.dir", "/tmp"); + } + } + + public ZooKeeperClusterUtil(int numOfZKNodes) throws IOException, KeeperException, InterruptedException { + if ((numOfZKNodes < 3) || (numOfZKNodes % 2 == 0)) { + throw new IllegalArgumentException("numOfZKNodes should be atleast 3 and it should not be even number"); + } + this.numOfZKNodes = numOfZKNodes; + } + + @Override + public String getZooKeeperConnectString() { + return connectString; + } + + @Override + public String getMetadataServiceUri() { + return getMetadataServiceUri("/ledgers"); + } + + @Override + public String getMetadataServiceUri(String zkLedgersRootPath) { + return getMetadataServiceUri(zkLedgersRootPath, LongHierarchicalLedgerManagerFactory.NAME); + } + + @Override + public String getMetadataServiceUri(String zkLedgersRootPath, String type) { + /* + * URI doesn't accept ',', for more info. check + * AbstractConfiguration.getMetadataServiceUri() + */ + return "zk+" + type + "://" + connectString.replace(",", ";") + zkLedgersRootPath; + } + + @Override + public ZooKeeper getZooKeeperClient() { + return zkc; + } + + @Override + public void startCluster() throws Exception { + // QuorumUtil will start 2*n+1 nodes. + quorumUtil = new QuorumUtil(numOfZKNodes / 2); + quorumUtil.startAll(); + connectString = quorumUtil.getConnString(); + // create a zookeeper client + if (LOG.isDebugEnabled()) { + LOG.debug("Instantiate ZK Client"); + } + zkc = ZooKeeperClient.newBuilder().connectString(getZooKeeperConnectString()).sessionTimeoutMs(10000).build(); + + // create default bk ensemble + createBKEnsemble("/ledgers"); + } + + @Override + public void stopCluster() throws Exception { + if (zkc != null) { + zkc.close(); + } + quorumUtil.shutdownAll(); + } + + @Override + public void restartCluster() throws Exception { + quorumUtil.startAll(); + } + + @Override + public void killCluster() throws Exception { + quorumUtil.tearDown(); + } + + @Override + public void sleepCluster(int time, TimeUnit timeUnit, CountDownLatch l) throws InterruptedException, IOException { + throw new UnsupportedOperationException("sleepServer operation is not supported for ZooKeeperClusterUtil"); + } + + public void stopPeer(int id) throws Exception { + quorumUtil.shutdown(id); + } + + public void enableLocalSession(boolean localSessionEnabled) { + quorumUtil.enableLocalSession(localSessionEnabled); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ZooKeeperUtil.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ZooKeeperUtil.java index 09f50495742..dcaa0506afe 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ZooKeeperUtil.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/ZooKeeperUtil.java @@ -21,8 +21,6 @@ package org.apache.bookkeeper.test; -import static org.apache.bookkeeper.util.BookKeeperConstants.AVAILABLE_NODE; -import static org.apache.bookkeeper.util.BookKeeperConstants.READONLY; import static org.junit.Assert.assertTrue; import java.io.File; @@ -31,15 +29,9 @@ import java.net.InetSocketAddress; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; - import org.apache.bookkeeper.util.IOUtils; import org.apache.bookkeeper.zookeeper.ZooKeeperClient; -import org.apache.bookkeeper.zookeeper.ZooKeeperWatcherBase; import org.apache.commons.io.FileUtils; -import org.apache.zookeeper.CreateMode; -import org.apache.zookeeper.KeeperException; -import org.apache.zookeeper.Transaction; -import org.apache.zookeeper.ZooDefs.Ids; import org.apache.zookeeper.ZooKeeper; import org.apache.zookeeper.server.NIOServerCnxnFactory; import org.apache.zookeeper.server.ZooKeeperServer; @@ -50,7 +42,7 @@ /** * Test the zookeeper utilities. */ -public class ZooKeeperUtil { +public class ZooKeeperUtil implements ZooKeeperCluster { static { // org.apache.zookeeper.test.ClientBase uses FourLetterWordMain, from 3.5.3 four letter words @@ -75,50 +67,49 @@ public ZooKeeperUtil() { connectString = loopbackIPAddr + ":" + zooKeeperPort; } + @Override public ZooKeeper getZooKeeperClient() { return zkc; } + @Override public String getZooKeeperConnectString() { return connectString; } + @Override public String getMetadataServiceUri() { return getMetadataServiceUri("/ledgers"); } + @Override public String getMetadataServiceUri(String zkLedgersRootPath) { return "zk://" + connectString + zkLedgersRootPath; } + @Override public String getMetadataServiceUri(String zkLedgersRootPath, String type) { return "zk+" + type + "://" + connectString + zkLedgersRootPath; } - public void startServer() throws Exception { + @Override + public void startCluster() throws Exception { // create a ZooKeeper server(dataDir, dataLogDir, port) - LOG.debug("Running ZK server"); + if (LOG.isDebugEnabled()) { + LOG.debug("Running ZK server"); + } ClientBase.setupTestEnv(); zkTmpDir = IOUtils.createTempDir("zookeeper", "test"); // start the server and client. - restartServer(); + restartCluster(); // create default bk ensemble createBKEnsemble("/ledgers"); } - public void createBKEnsemble(String ledgersPath) throws KeeperException, InterruptedException { - Transaction txn = zkc.transaction(); - txn.create(ledgersPath, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); - txn.create(ledgersPath + "/" + AVAILABLE_NODE, - new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); - txn.create(ledgersPath + "/" + AVAILABLE_NODE + "/" + READONLY, - new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); - txn.commit(); - } - - public void restartServer() throws Exception { + @Override + public void restartCluster() throws Exception { zks = new ZooKeeperServer(zkTmpDir, zkTmpDir, ZooKeeperServer.DEFAULT_TICK_TIME); serverFactory = new NIOServerCnxnFactory(); @@ -133,17 +124,22 @@ public void restartServer() throws Exception { boolean b = ClientBase.waitForServerUp(getZooKeeperConnectString(), ClientBase.CONNECTION_TIMEOUT); - LOG.debug("Server up: " + b); + if (LOG.isDebugEnabled()) { + LOG.debug("Server up: " + b); + } // create a zookeeper client - LOG.debug("Instantiate ZK Client"); + if (LOG.isDebugEnabled()) { + LOG.debug("Instantiate ZK Client"); + } zkc = ZooKeeperClient.newBuilder() .connectString(getZooKeeperConnectString()) .sessionTimeoutMs(10000) .build(); } - public void sleepServer(final int time, + @Override + public void sleepCluster(final int time, final TimeUnit timeUnit, final CountDownLatch l) throws InterruptedException, IOException { @@ -171,17 +167,8 @@ public void run() { throw new IOException("ZooKeeper thread not found"); } - public void expireSession(ZooKeeper zk) throws Exception { - long id = zk.getSessionId(); - byte[] password = zk.getSessionPasswd(); - ZooKeeperWatcherBase w = new ZooKeeperWatcherBase(10000); - ZooKeeper zk2 = new ZooKeeper(getZooKeeperConnectString(), - zk.getSessionTimeout(), w, id, password); - w.waitForConnection(); - zk2.close(); - } - - public void stopServer() throws Exception { + @Override + public void stopCluster() throws Exception { if (zkc != null) { zkc.close(); } @@ -198,8 +185,9 @@ public void stopServer() throws Exception { } } - public void killServer() throws Exception { - stopServer(); + @Override + public void killCluster() throws Exception { + stopCluster(); FileUtils.deleteDirectory(zkTmpDir); } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/tls/TestBookieAuthZFactory.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/tls/TestBookieAuthZFactory.java new file mode 100644 index 00000000000..2cd0a80b324 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/tls/TestBookieAuthZFactory.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.tls; + +import static org.junit.Assert.fail; + +import java.io.IOException; +import org.apache.bookkeeper.auth.BookieAuthProvider; +import org.apache.bookkeeper.common.util.ReflectionUtils; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * Light weight Unit Tests for BookieAuthZFactory. + */ +public class TestBookieAuthZFactory { + private static final Logger LOG = LoggerFactory.getLogger(TestBookieAuthZFactory.class); + + public TestBookieAuthZFactory() { + } + + /** + * Initialize a BookieAuthZFactory without configuring authorizedRoles in ServerConfiguration. + * This should fail as in order to use this authorization provider, we need to have authorizedRoles set. + */ + @Test + public void testBookieAuthZInitNoRoles() { + ServerConfiguration conf = new ServerConfiguration(); + String factoryClassName = BookieAuthZFactory.class.getName(); + BookieAuthProvider.Factory factory = ReflectionUtils.newInstance(factoryClassName, + BookieAuthProvider.Factory.class); + + try { + factory.init(conf); + fail("Not supposed to initialize BookieAuthZFactory without authorized roles set"); + } catch (IOException | RuntimeException e) { + LOG.info("BookieAuthZFactory did not initialize as there are no authorized roles set."); + } + } + + /** + * Initialize a BookieAuthZFactory as an authProvider and configure an empty string in authorizedRoles. + * This should fail as in order to use this as an authorization provider, we need to have valid authorizedRoles set. + */ + @Test + public void testBookieAuthZInitEmptyRole() { + ServerConfiguration conf = new ServerConfiguration(); + conf.setAuthorizedRoles(""); + String factoryClassName = BookieAuthZFactory.class.getName(); + BookieAuthProvider.Factory factory = ReflectionUtils.newInstance(factoryClassName, + BookieAuthProvider.Factory.class); + + try { + factory.init(conf); + fail("Not supposed to initialize BookieAuthZFactory without authorized roles set"); + } catch (IOException | RuntimeException e) { + LOG.info("BookieAuthZFactory did not initialize as there are no authorized roles set."); + } + } + + /** + * Initialize a BookieAuthZFactory with a valid string for the configured role. + * However, pass a null (or faulty) connection for it to authorize, it should fail. + */ + @Test + public void testBookieAuthZNewProviderNullAddress() { + ServerConfiguration conf = new ServerConfiguration(); + conf.setAuthorizedRoles("testRole"); + String factoryClassName = BookieAuthZFactory.class.getName(); + BookieAuthProvider.Factory factory = ReflectionUtils.newInstance(factoryClassName, + BookieAuthProvider.Factory.class); + + try { + factory.init(conf); + BookieAuthProvider authProvider = factory.newProvider(null, null); + authProvider.onProtocolUpgrade(); + fail("BookieAuthZFactory should fail with a null connection"); + } catch (IOException | RuntimeException e) { + } + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/tls/TestTLS.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/tls/TestTLS.java index 8dafe8bc08b..1ab90d32521 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/tls/TestTLS.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/tls/TestTLS.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -23,9 +23,11 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.junit.Assume.assumeTrue; +import java.io.File; import java.io.IOException; -import java.net.InetSocketAddress; +import java.nio.file.Paths; import java.security.cert.Certificate; import java.security.cert.X509Certificate; import java.util.Arrays; @@ -35,7 +37,8 @@ import java.util.List; import java.util.Set; import java.util.concurrent.CountDownLatch; - +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.bookkeeper.auth.AuthCallbacks; import org.apache.bookkeeper.auth.AuthToken; import org.apache.bookkeeper.auth.BookieAuthProvider; @@ -48,10 +51,10 @@ import org.apache.bookkeeper.client.BookKeeperTestClient; import org.apache.bookkeeper.client.LedgerEntry; import org.apache.bookkeeper.client.LedgerHandle; -import org.apache.bookkeeper.client.LedgerMetadata; +import org.apache.bookkeeper.client.api.LedgerMetadata; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; -import org.apache.bookkeeper.net.BookieSocketAddress; +import org.apache.bookkeeper.net.BookieId; import org.apache.bookkeeper.proto.BookieConnectionPeer; import org.apache.bookkeeper.proto.BookieServer; import org.apache.bookkeeper.proto.ClientConnectionPeer; @@ -59,8 +62,11 @@ import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.apache.bookkeeper.test.TestStatsProvider; import org.apache.bookkeeper.tls.TLSContextFactory.KeyStoreType; +import org.apache.bookkeeper.util.IOUtils; +import org.apache.bookkeeper.util.TestUtils; +import org.apache.commons.io.FileUtils; import org.junit.After; -import org.junit.Assume; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; @@ -87,28 +93,33 @@ public class TestTLS extends BookKeeperClusterTestCase { private KeyStoreType clientTrustStoreFormat; private KeyStoreType serverKeyStoreFormat; private KeyStoreType serverTrustStoreFormat; + private final boolean useV2Protocol; @Parameters public static Collection data() { return Arrays.asList(new Object[][] { - { "JKS", "JKS" }, - { "PEM", "PEM" }, - { "PKCS12", "PKCS12" }, - { "JKS", "PEM" }, - { "PEM", "PKCS12" }, - { "PKCS12", "JKS" } + { "JKS", "JKS", false }, + { "PEM", "PEM", false }, + { "PEM", "PEM", true }, + { "PKCS12", "PKCS12", false }, + { "JKS", "PEM", false }, + { "PEM", "PKCS12", false }, + { "PKCS12", "JKS", false } }); } - public TestTLS(String keyStoreFormat, String trustStoreFormat) { + public TestTLS(String keyStoreFormat, + String trustStoreFormat, + boolean useV2Protocol) { super(3); this.clientKeyStoreFormat = KeyStoreType.valueOf(keyStoreFormat); this.clientTrustStoreFormat = KeyStoreType.valueOf(trustStoreFormat); this.serverKeyStoreFormat = KeyStoreType.valueOf(keyStoreFormat); this.serverTrustStoreFormat = KeyStoreType.valueOf(trustStoreFormat); + this.useV2Protocol = useV2Protocol; } private String getResourcePath(String resource) throws Exception { - return this.getClass().getClassLoader().getResource(resource).toURI().getPath(); + return Paths.get(this.getClass().getClassLoader().getResource(resource).toURI()).toString(); } @Before @@ -117,6 +128,8 @@ public void setUp() throws Exception { /* client configuration */ baseClientConf.setTLSProviderFactoryClass(TLSContextFactory.class.getName()); baseClientConf.setTLSClientAuthentication(true); + baseClientConf.setUseV2WireProtocol(useV2Protocol); + baseClientConf.setLimitStatsLogging(false); switch (clientKeyStoreFormat) { case PEM: @@ -222,6 +235,15 @@ public void tearDown() throws Exception { super.tearDown(); } + /** + * Verify the BouncyCastleProvider Name is expected. + */ + @Test + public void testGetBouncyCastleProviderName() throws Exception { + String bcName = TLSContextFactory.getProvider().getName(); + Assert.assertEquals(bcName, TLSContextFactory.BC_FIPS); + } + /** * Verify that a server will not start if tls is enabled but no cert is specified. */ @@ -230,7 +252,7 @@ public void testStartTLSServerNoKeyStore() throws Exception { ServerConfiguration bookieConf = newServerConfiguration().setTLSKeyStore(null); try { - bs.add(startBookie(bookieConf)); + startAndAddBookie(bookieConf); fail("Shouldn't have been able to start"); } catch (SecurityException se) { assertTrue(true); @@ -243,19 +265,18 @@ public void testStartTLSServerNoKeyStore() throws Exception { @Test public void testKeyMismatchFailure() throws Exception { // Valid test case only for PEM format keys - Assume.assumeTrue(serverKeyStoreFormat == KeyStoreType.PEM); + assumeTrue(serverKeyStoreFormat == KeyStoreType.PEM); ClientConfiguration clientConf = new ClientConfiguration(baseClientConf); // restart a bookie with bad cert int restartBookieIdx = 0; - ServerConfiguration bookieConf = bsConfs.get(restartBookieIdx) + ServerConfiguration bookieConf = confByIndex(restartBookieIdx) .setTLSCertificatePath(getResourcePath("client-cert.pem")); killBookie(restartBookieIdx); LOG.info("Sleeping for 1s before restarting bookie with bad cert"); Thread.sleep(1000); - bs.add(startBookie(bookieConf)); - bsConfs.add(bookieConf); + startAndAddBookie(bookieConf); // Create ledger and write entries BookKeeper client = new BookKeeper(clientConf); @@ -280,7 +301,7 @@ public void testKeyMismatchFailure() throws Exception { public void testStartTLSServerBadPassword() throws Exception { ServerConfiguration bookieConf = newServerConfiguration().setTLSKeyStorePasswordPath("badpassword"); try { - bs.add(startBookie(bookieConf)); + startAndAddBookie(bookieConf); fail("Shouldn't have been able to start"); } catch (SecurityException se) { assertTrue(true); @@ -292,25 +313,25 @@ private LedgerMetadata testClient(BookKeeper client, int clusterSize) throws Exc int numEntries = 100; long lid; byte[] testEntry = "testEntry".getBytes(); - try (LedgerHandle lh = client.createLedger(clusterSize, clusterSize, DigestType.CRC32, passwd);) { + try (LedgerHandle lh = client.createLedger(clusterSize, clusterSize, DigestType.CRC32, passwd)) { for (int i = 0; i <= numEntries; i++) { lh.addEntry(testEntry); } lid = lh.getId(); } - try (LedgerHandle lh = client.openLedger(lid, DigestType.CRC32, passwd);) { + try (LedgerHandle lh = client.openLedger(lid, DigestType.CRC32, passwd)) { Enumeration entries = lh.readEntries(0, numEntries); while (entries.hasMoreElements()) { LedgerEntry e = entries.nextElement(); assertTrue("Entry contents incorrect", Arrays.equals(e.getEntry(), testEntry)); } - BookKeeperAdmin admin = new BookKeeperAdmin(client); + BookKeeperAdmin admin = new BookKeeperAdmin(client, baseClientConf); return admin.getLedgerMetadata(lh); } } private LedgerMetadata testClient(ClientConfiguration conf, int clusterSize) throws Exception { - try (BookKeeper client = new BookKeeper(conf);) { + try (BookKeeper client = new BookKeeper(conf)) { return testClient(client, clusterSize); } } @@ -329,17 +350,118 @@ public void testConnectToTLSClusterTLSClient() throws Exception { */ @Test public void testConnectToLocalTLSClusterTLSClient() throws Exception { - ServerConfiguration serverConf = new ServerConfiguration(); - for (ServerConfiguration conf : bsConfs) { - conf.setDisableServerSocketBind(true); - conf.setEnableLocalTransport(true); - } - restartBookies(serverConf); + restartBookies(c -> { + c.setDisableServerSocketBind(true); + c.setEnableLocalTransport(true); + return c; + }); ClientConfiguration clientConf = new ClientConfiguration(baseClientConf); testClient(clientConf, numBookies); } + /** + * Verify Bookie refreshes certs at configured duration. + */ + @Test + public void testRefreshDurationForBookieCerts() throws Exception { + assumeTrue(serverKeyStoreFormat == KeyStoreType.PEM); + String originalTlsKeyFilePath = confByIndex(0).getTLSKeyStore(); + String invalidServerKey = getResourcePath("client-key.pem"); + File originalTlsCertFile = new File(originalTlsKeyFilePath); + File newTlsKeyFile = IOUtils.createTempFileAndDeleteOnExit(originalTlsKeyFilePath, "refresh"); + // clean up temp file even if test fails + newTlsKeyFile.deleteOnExit(); + File invalidServerKeyFile = new File(invalidServerKey); + // copy invalid cert to new temp file + FileUtils.copyFile(invalidServerKeyFile, newTlsKeyFile); + long refreshDurationInSec = 1; + restartBookies(c -> { + c.setTLSCertFilesRefreshDurationSeconds(1); + c.setTLSKeyStore(newTlsKeyFile.getAbsolutePath()); + return c; + }); + + ClientConfiguration clientConf = new ClientConfiguration(baseClientConf); + try { + testClient(clientConf, numBookies); + Assert.fail("Should have fail due to invalid cert"); + } catch (Exception e) { + // Ok. + } + + // Sleep so, cert file can be refreshed + Thread.sleep(refreshDurationInSec * 1000 + 1000); + + // copy valid key-file at given new location + FileUtils.copyFile(originalTlsCertFile, newTlsKeyFile); + newTlsKeyFile.setLastModified(System.currentTimeMillis() + 1000); + // client should be successfully able to add entries over tls + testClient(clientConf, numBookies); + newTlsKeyFile.delete(); + } + + /** + * Verify Bookkeeper-client refreshes certs at configured duration. + */ + @Test + public void testRefreshDurationForBookkeeperClientCerts() throws Exception { + assumeTrue(serverKeyStoreFormat == KeyStoreType.PEM); + + ClientConfiguration clientConf = new ClientConfiguration(baseClientConf); + String originalTlsCertFilePath = baseClientConf.getTLSCertificatePath(); + String invalidClientCert = getResourcePath("server-cert.pem"); + File originalTlsCertFile = new File(originalTlsCertFilePath); + File newTlsCertFile = IOUtils.createTempFileAndDeleteOnExit(originalTlsCertFilePath, "refresh"); + // clean up temp file even if test fails + newTlsCertFile.deleteOnExit(); + File invalidClientCertFile = new File(invalidClientCert); + // copy invalid cert to new temp file + FileUtils.copyFile(invalidClientCertFile, newTlsCertFile); + long refreshDurationInSec = 2; + clientConf.setTLSCertFilesRefreshDurationSeconds(1); + clientConf.setTLSCertificatePath(newTlsCertFile.getAbsolutePath()); + + // create a bookkeeper-client + try (BookKeeper client = new BookKeeper(clientConf)) { + byte[] testEntry = "testEntry".getBytes(); + byte[] passwd = "testPassword".getBytes(); + int totalAddEntries = 1; + CountDownLatch latch = new CountDownLatch(totalAddEntries); + AtomicInteger result = new AtomicInteger(-1); + LedgerHandle lh = client.createLedger(1, 1, DigestType.CRC32, passwd); + + for (int i = 0; i <= totalAddEntries; i++) { + lh.asyncAddEntry(testEntry, (rc, lgh, entryId, ctx) -> { + result.set(rc); + latch.countDown(); + }, null); + } + latch.await(1, TimeUnit.SECONDS); + Assert.assertNotEquals(result.get(), BKException.Code.OK); + + // Sleep so, cert file can be refreshed + Thread.sleep(refreshDurationInSec * 1000 + 1000); + + // copy valid key-file at given new location + FileUtils.copyFile(originalTlsCertFile, newTlsCertFile); + newTlsCertFile.setLastModified(System.currentTimeMillis() + 1000); + // client should be successfully able to add entries over tls + CountDownLatch latchWithValidCert = new CountDownLatch(totalAddEntries); + AtomicInteger validCertResult = new AtomicInteger(-1); + lh = client.createLedger(1, 1, DigestType.CRC32, passwd); + for (int i = 0; i <= totalAddEntries; i++) { + lh.asyncAddEntry(testEntry, (rc, lgh, entryId, ctx) -> { + validCertResult.set(rc); + latchWithValidCert.countDown(); + }, null); + } + latchWithValidCert.await(1, TimeUnit.SECONDS); + Assert.assertEquals(validCertResult.get(), BKException.Code.OK); + newTlsCertFile.delete(); + } + } + /** * Multiple clients, some with TLS, and some without TLS. */ @@ -358,9 +480,10 @@ public void testConnectToTLSClusterMixedClient() throws Exception { */ @Test public void testConnectToTLSClusterTLSClientWithTLSNoAuthentication() throws Exception { - ServerConfiguration serverConf = new ServerConfiguration(baseConf); - serverConf.setTLSClientAuthentication(false); - restartBookies(serverConf); + restartBookies(c -> { + c.setTLSClientAuthentication(false); + return c; + }); ClientConfiguration conf = new ClientConfiguration(baseClientConf); testClient(conf, numBookies); @@ -398,11 +521,10 @@ public void testConnectToTLSClusterNonTLSClient() throws Exception { */ @Test public void testClientWantsTLSNoServersHaveIt() throws Exception { - ServerConfiguration serverConf = new ServerConfiguration(); - for (ServerConfiguration conf : bsConfs) { - conf.setTLSProviderFactoryClass(null); - } - restartBookies(serverConf); + restartBookies(c -> { + c.setTLSProviderFactoryClass(null); + return c; + }); ClientConfiguration clientConf = new ClientConfiguration(baseClientConf); try { @@ -420,11 +542,10 @@ public void testClientWantsTLSNoServersHaveIt() throws Exception { @Test public void testTLSClientButOnlyFewTLSServers() throws Exception { // disable TLS on initial set of bookies - ServerConfiguration serverConf = new ServerConfiguration(); - for (ServerConfiguration conf : bsConfs) { - conf.setTLSProviderFactoryClass(null); - } - restartBookies(serverConf); + restartBookies(c -> { + c.setTLSProviderFactoryClass(null); + return c; + }); // add two bookies which support TLS baseConf.setTLSProviderFactoryClass(TLSContextFactory.class.getName()); @@ -435,12 +556,14 @@ public void testTLSClientButOnlyFewTLSServers() throws Exception { ClientConfiguration clientConf = new ClientConfiguration(baseClientConf); LedgerMetadata metadata = testClient(clientConf, 2); - assertTrue(metadata.getEnsembles().size() > 0); - Collection> ensembles = metadata.getEnsembles().values(); - for (List bookies : ensembles) { - for (BookieSocketAddress bookieAddress : bookies) { - int port = bookieAddress.getPort(); - assertTrue(tlsBookiePorts.contains(port)); + assertFalse(metadata.getAllEnsembles().isEmpty()); + Collection> ensembles = metadata.getAllEnsembles().values(); + try (BookKeeper client = new BookKeeper(clientConf)) { + for (List bookies : ensembles) { + for (BookieId bookieAddress : bookies) { + int port = client.getBookieAddressResolver().resolve(bookieAddress).getPort(); + assertTrue(tlsBookiePorts.contains(port)); + } } } } @@ -470,9 +593,11 @@ public void testClientAuthPlugin() throws Exception { */ @Test public void testBookieAuthPluginRequireClientTLSAuthentication() throws Exception { - ServerConfiguration serverConf = new ServerConfiguration(baseConf); - serverConf.setBookieAuthProviderFactoryClass(AllowOnlyClientsWithX509Certificates.class.getName()); - restartBookies(serverConf); + restartBookies(c -> { + c.setBookieAuthProviderFactoryClass( + AllowOnlyClientsWithX509Certificates.class.getName()); + return c; + }); secureBookieSideChannel = false; secureBookieSideChannelPrincipals = null; @@ -492,11 +617,13 @@ public void testBookieAuthPluginRequireClientTLSAuthentication() throws Exceptio */ @Test public void testBookieAuthPluginRequireClientTLSAuthenticationLocal() throws Exception { - ServerConfiguration serverConf = new ServerConfiguration(baseConf); - serverConf.setBookieAuthProviderFactoryClass(AllowOnlyClientsWithX509Certificates.class.getName()); - serverConf.setDisableServerSocketBind(true); - serverConf.setEnableLocalTransport(true); - restartBookies(serverConf); + restartBookies(c -> { + c.setBookieAuthProviderFactoryClass( + AllowOnlyClientsWithX509Certificates.class.getName()); + c.setDisableServerSocketBind(true); + c.setEnableLocalTransport(true); + return c; + }); secureBookieSideChannel = false; secureBookieSideChannelPrincipals = null; @@ -511,15 +638,40 @@ public void testBookieAuthPluginRequireClientTLSAuthenticationLocal() throws Exc assertTrue(cert instanceof X509Certificate); } + /** + * Verify that given role in client certificate is checked when BookieAuthZFactory is set. + * Positive test case where all given roles are present. + * If authorization fails unexpectedly, we catch the UnauthorizedAccessException and fail. + * Otherwise we exit the test and mark it as success + */ + @Test + public void testRoleBasedAuthZInCertificate() throws Exception { + restartBookies(serverConf -> { + serverConf.setBookieAuthProviderFactoryClass(BookieAuthZFactory.class.getCanonicalName()); + serverConf.setAuthorizedRoles("testRole,testRole1"); + return serverConf; + }); + + ClientConfiguration clientConf = new ClientConfiguration(baseClientConf); + + try { + testClient(clientConf, numBookies); + } catch (BKException.BKUnauthorizedAccessException bke) { + fail("Could not verify given role."); + } + } + /** * Verify that a bookie-side Auth plugin can access server certificates. */ @Test public void testBookieAuthPluginDenyAccesstoClientWithoutTLSAuthentication() throws Exception { - ServerConfiguration serverConf = new ServerConfiguration(baseConf); - serverConf.setTLSClientAuthentication(false); - serverConf.setBookieAuthProviderFactoryClass(AllowOnlyClientsWithX509Certificates.class.getName()); - restartBookies(serverConf); + restartBookies(c -> { + c.setTLSClientAuthentication(false); + c.setBookieAuthProviderFactoryClass( + AllowOnlyClientsWithX509Certificates.class.getName()); + return c; + }); secureBookieSideChannel = false; secureBookieSideChannelPrincipals = null; @@ -530,6 +682,10 @@ public void testBookieAuthPluginDenyAccesstoClientWithoutTLSAuthentication() thr testClient(clientConf, numBookies); fail("Shouldn't be able to connect"); } catch (BKException.BKUnauthorizedAccessException authFailed) { + } catch (BKException.BKNotEnoughBookiesException notEnoughBookiesException) { + if (!useV2Protocol) { + fail("Unexpected exception occurred."); + } } assertTrue(secureBookieSideChannel); @@ -542,12 +698,14 @@ public void testBookieAuthPluginDenyAccesstoClientWithoutTLSAuthentication() thr */ @Test public void testBookieAuthPluginDenyAccessToClientWithoutTLSAuthenticationLocal() throws Exception { - ServerConfiguration serverConf = new ServerConfiguration(baseConf); - serverConf.setTLSClientAuthentication(false); - serverConf.setBookieAuthProviderFactoryClass(AllowOnlyClientsWithX509Certificates.class.getName()); - serverConf.setDisableServerSocketBind(true); - serverConf.setEnableLocalTransport(true); - restartBookies(serverConf); + restartBookies(c -> { + c.setTLSClientAuthentication(false); + c.setBookieAuthProviderFactoryClass( + AllowOnlyClientsWithX509Certificates.class.getName()); + c.setDisableServerSocketBind(true); + c.setEnableLocalTransport(true); + return c; + }); secureBookieSideChannel = false; secureBookieSideChannelPrincipals = null; @@ -558,6 +716,10 @@ public void testBookieAuthPluginDenyAccessToClientWithoutTLSAuthenticationLocal( testClient(clientConf, numBookies); fail("Shouldn't be able to connect"); } catch (BKException.BKUnauthorizedAccessException authFailed) { + } catch (BKException.BKNotEnoughBookiesException notEnoughBookiesException) { + if (!useV2Protocol) { + fail("Unexpected exception occurred."); + } } assertTrue(secureBookieSideChannel); @@ -570,9 +732,11 @@ public void testBookieAuthPluginDenyAccessToClientWithoutTLSAuthenticationLocal( */ @Test public void testBookieAuthPluginDenyAccessToClientWithoutTLS() throws Exception { - ServerConfiguration serverConf = new ServerConfiguration(baseConf); - serverConf.setBookieAuthProviderFactoryClass(AllowOnlyClientsWithX509Certificates.class.getName()); - restartBookies(serverConf); + restartBookies(c -> { + c.setBookieAuthProviderFactoryClass( + AllowOnlyClientsWithX509Certificates.class.getName()); + return c; + }); secureBookieSideChannel = false; secureBookieSideChannelPrincipals = null; @@ -583,6 +747,10 @@ public void testBookieAuthPluginDenyAccessToClientWithoutTLS() throws Exception testClient(clientConf, numBookies); fail("Shouldn't be able to connect"); } catch (BKException.BKUnauthorizedAccessException authFailed) { + } catch (BKException.BKNotEnoughBookiesException notEnoughBookiesException) { + if (!useV2Protocol) { + fail("Unexpected exception occurred."); + } } assertFalse(secureBookieSideChannel); @@ -682,7 +850,7 @@ public void testMixedCluster() throws Exception { ServerConfiguration bookieConf = newServerConfiguration(); bookieConf.setTLSProviderFactoryClass(TLSContextFactory.class.getName()); - bs.add(startBookie(bookieConf)); + startAndAddBookie(bookieConf); testClient(clientConf, origNumBookies + 1); } @@ -709,16 +877,16 @@ public void testHungServer() throws Exception { */ @Test public void testTLSChannelCounters() throws Exception { - ClientConfiguration tlsClientconf = new ClientConfiguration(baseClientConf) + ClientConfiguration tlsClientConf = new ClientConfiguration(baseClientConf) .setNumChannelsPerBookie(1); - ClientConfiguration nonTlsClientconf = new ClientConfiguration(baseClientConf) + ClientConfiguration nonTlsClientConf = new ClientConfiguration(baseClientConf) .setNumChannelsPerBookie(1) .setTLSProviderFactoryClass(null); TestStatsProvider tlsStatsProvider = new TestStatsProvider(); TestStatsProvider nonTlsStatsProvider = new TestStatsProvider(); - BookKeeperTestClient tlsClient = new BookKeeperTestClient(tlsClientconf, tlsStatsProvider); - BookKeeperTestClient nonTlsClient = new BookKeeperTestClient(nonTlsClientconf, nonTlsStatsProvider); + BookKeeperTestClient tlsClient = new BookKeeperTestClient(tlsClientConf, tlsStatsProvider); + BookKeeperTestClient nonTlsClient = new BookKeeperTestClient(nonTlsClientConf, nonTlsStatsProvider); // IO load from clients testClient(tlsClient, numBookies); @@ -726,51 +894,46 @@ public void testTLSChannelCounters() throws Exception { // verify stats for (int i = 0; i < numBookies; i++) { - BookieServer bookie = bs.get(i); - InetSocketAddress addr = bookie.getLocalAddress().getSocketAddress(); + BookieServer bookie = serverByIndex(i); StringBuilder nameBuilder = new StringBuilder(BookKeeperClientStats.CHANNEL_SCOPE) .append(".") - .append(addr.getAddress().getHostAddress() - .replace('.', '_') - .replace('-', '_')) - .append("_") - .append(addr.getPort()) + .append("bookie_") + .append(TestUtils.buildStatsCounterPathFromBookieID(bookie.getBookieId())) .append("."); - // check stats on TLS enabled client assertEquals("Mismatch TLS channel count", 1, - tlsClient.getTestStatsProvider().getCounter(nameBuilder.toString() + tlsClient.getTestStatsProvider().getCounter(nameBuilder + BookKeeperClientStats.ACTIVE_TLS_CHANNEL_COUNTER).get().longValue()); assertEquals("TLS handshake failure unexpected", 0, - tlsClient.getTestStatsProvider().getCounter(nameBuilder.toString() + tlsClient.getTestStatsProvider().getCounter(nameBuilder + BookKeeperClientStats.FAILED_TLS_HANDSHAKE_COUNTER).get().longValue()); assertEquals("Mismatch non-TLS channel count", 0, - tlsClient.getTestStatsProvider().getCounter(nameBuilder.toString() + tlsClient.getTestStatsProvider().getCounter(nameBuilder + BookKeeperClientStats.ACTIVE_NON_TLS_CHANNEL_COUNTER).get().longValue()); assertEquals("Connection failures unexpected", 0, - tlsClient.getTestStatsProvider().getCounter(nameBuilder.toString() + tlsClient.getTestStatsProvider().getCounter(nameBuilder + BookKeeperClientStats.FAILED_CONNECTION_COUNTER).get().longValue()); // check stats on non-TLS enabled client assertEquals("Mismatch TLS channel count", 0, - nonTlsClient.getTestStatsProvider().getCounter(nameBuilder.toString() + nonTlsClient.getTestStatsProvider().getCounter(nameBuilder + BookKeeperClientStats.ACTIVE_TLS_CHANNEL_COUNTER).get().longValue()); assertEquals("TLS handshake failure unexpected", 0, - nonTlsClient.getTestStatsProvider().getCounter(nameBuilder.toString() + nonTlsClient.getTestStatsProvider().getCounter(nameBuilder + BookKeeperClientStats.FAILED_TLS_HANDSHAKE_COUNTER).get().longValue()); assertEquals("Mismatch non-TLS channel count", 1, - nonTlsClient.getTestStatsProvider().getCounter(nameBuilder.toString() + nonTlsClient.getTestStatsProvider().getCounter(nameBuilder + BookKeeperClientStats.ACTIVE_NON_TLS_CHANNEL_COUNTER).get().longValue()); assertEquals("Connection failures unexpected", 0, - nonTlsClient.getTestStatsProvider().getCounter(nameBuilder.toString() + nonTlsClient.getTestStatsProvider().getCounter(nameBuilder + BookKeeperClientStats.FAILED_CONNECTION_COUNTER).get().longValue()); bookie.shutdown(); assertEquals("Mismatch TLS channel count", 0, - tlsClient.getTestStatsProvider().getCounter(nameBuilder.toString() + tlsClient.getTestStatsProvider().getCounter(nameBuilder + BookKeeperClientStats.ACTIVE_TLS_CHANNEL_COUNTER).get().longValue()); assertEquals("Mismatch non-TLS channel count", 0, - nonTlsClient.getTestStatsProvider().getCounter(nameBuilder.toString() + nonTlsClient.getTestStatsProvider().getCounter(nameBuilder + BookKeeperClientStats.ACTIVE_NON_TLS_CHANNEL_COUNTER).get().longValue()); } @@ -786,7 +949,7 @@ public void testHandshakeFailure() throws Exception { // restart a bookie with wrong trust store int restartBookieIdx = 0; - ServerConfiguration badBookieConf = bsConfs.get(restartBookieIdx); + ServerConfiguration badBookieConf = confByIndex(restartBookieIdx); switch (serverTrustStoreFormat) { case PEM: @@ -808,10 +971,7 @@ public void testHandshakeFailure() throws Exception { killBookie(restartBookieIdx); LOG.info("Sleeping for 1s before restarting bookie with bad cert"); Thread.sleep(1000); - BookieServer bookie = startBookie(badBookieConf); - bs.add(bookie); - bsConfs.add(badBookieConf); - + BookieServer bookie = startAndAddBookie(badBookieConf).getServer(); // Create ledger and write entries TestStatsProvider testStatsProvider = new TestStatsProvider(); BookKeeperTestClient client = new BookKeeperTestClient(clientConf, testStatsProvider); @@ -830,18 +990,33 @@ public void testHandshakeFailure() throws Exception { } // check failed handshake counter - InetSocketAddress addr = bookie.getLocalAddress().getSocketAddress(); StringBuilder nameBuilder = new StringBuilder(BookKeeperClientStats.CHANNEL_SCOPE) .append(".") - .append(addr.getAddress().getHostAddress() - .replace('.', '_') - .replace('-', '_')) - .append("_") - .append(addr.getPort()) + .append("bookie_") + .append(TestUtils.buildStatsCounterPathFromBookieID(bookie.getBookieId())) .append("."); - assertEquals("TLS handshake failure expected", 1, - client.getTestStatsProvider().getCounter(nameBuilder.toString() + client.getTestStatsProvider().getCounter(nameBuilder + BookKeeperClientStats.FAILED_TLS_HANDSHAKE_COUNTER).get().longValue()); } + + /** + * Verify that a client fails to connect to bookie if hostname verification + * fails. + */ + @Test + public void testClientAuthPluginWithHostnameVerificationEnabled() throws Exception { + secureClientSideChannel = false; + secureClientSideChannelPrincipals = null; + ClientConfiguration clientConf = new ClientConfiguration(baseClientConf); + + clientConf.setClientAuthProviderFactoryClass(AllowOnlyBookiesWithX509Certificates.class.getName()); + clientConf.setHostnameVerificationEnabled(true); + try { + testClient(clientConf, numBookies); + fail("should have failed with unauthorized exception"); + } catch (BKException.BKNotEnoughBookiesException e) { + // Ok. + } + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/AvailabilityOfEntriesOfLedgerTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/AvailabilityOfEntriesOfLedgerTest.java new file mode 100644 index 00000000000..66d1d943af1 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/AvailabilityOfEntriesOfLedgerTest.java @@ -0,0 +1,301 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.util; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.HashSet; +import java.util.List; +import java.util.PrimitiveIterator; +import java.util.Set; +import org.junit.Test; + +/** + * Testsuite for AvailabilityOfEntriesOfLedger. + */ +public class AvailabilityOfEntriesOfLedgerTest { + @Test + public void testWithItrConstructor() { + long[][] arrays = { + { 0, 1, 2 }, + { 1, 2}, + { 1, 2, 3, 5, 6, 7, 8 }, + { 0, 1, 5 }, + { 3 }, + { 1, 2, 4, 5, 7, 8 }, + {}, + {0}, + { 1, 2, 3, 5, 6, 11, 12, 13, 14, 15, 16, 17, 100, 1000, 1001, 10000, 20000, 20001 } + }; + for (int i = 0; i < arrays.length; i++) { + long[] tempArray = arrays[i]; + PrimitiveIterator.OfLong primitiveIterator = Arrays.stream(tempArray).iterator(); + AvailabilityOfEntriesOfLedger availabilityOfEntriesOfLedger = new AvailabilityOfEntriesOfLedger( + primitiveIterator); + assertEquals("Expected total number of entries", tempArray.length, + availabilityOfEntriesOfLedger.getTotalNumOfAvailableEntries()); + for (int j = 0; j < tempArray.length; j++) { + assertTrue(tempArray[j] + " is supposed to be available", + availabilityOfEntriesOfLedger.isEntryAvailable(tempArray[j])); + } + } + } + + @Test + public void testWithItrConstructorWithDuplicates() { + long[][] arrays = { + { 1, 2, 2, 3 }, + { 1, 2, 3, 5, 5, 6, 7, 8, 8 }, + { 1, 1, 5, 5 }, + { 3, 3 }, + { 1, 1, 2, 4, 5, 8, 9, 9, 9, 9, 9 }, + {}, + { 1, 2, 3, 5, 6, 11, 12, 13, 14, 15, 16, 17, 17, 100, 1000, 1000, 1001, 10000, 10000, 20000, 20001 } + }; + for (int i = 0; i < arrays.length; i++) { + long[] tempArray = arrays[i]; + Set tempSet = new HashSet(); + for (int k = 0; k < tempArray.length; k++) { + tempSet.add(tempArray[k]); + } + PrimitiveIterator.OfLong primitiveIterator = Arrays.stream(tempArray).iterator(); + AvailabilityOfEntriesOfLedger availabilityOfEntriesOfLedger = new AvailabilityOfEntriesOfLedger( + primitiveIterator); + assertEquals("Expected total number of entries", tempSet.size(), + availabilityOfEntriesOfLedger.getTotalNumOfAvailableEntries()); + for (int j = 0; j < tempArray.length; j++) { + assertTrue(tempArray[j] + " is supposed to be available", + availabilityOfEntriesOfLedger.isEntryAvailable(tempArray[j])); + } + } + } + + @Test + public void testSerializeDeserialize() { + long[][] arrays = { + { 0, 1, 2 }, + { 1, 2 }, + { 1, 2, 3, 5, 6, 7, 8 }, + { 0, 1, 5 }, + { 3 }, + { 1, 2, 4, 5, 7, 8 }, + { }, + { 0 }, + { 1, 2, 3, 5, 6, 11, 12, 13, 14, 15, 16, 17, 100, 1000, 1001, 10000, 20000, 20001 } + }; + for (int i = 0; i < arrays.length; i++) { + long[] tempArray = arrays[i]; + PrimitiveIterator.OfLong primitiveIterator = Arrays.stream(tempArray).iterator(); + AvailabilityOfEntriesOfLedger availabilityOfEntriesOfLedger = new AvailabilityOfEntriesOfLedger( + primitiveIterator); + byte[] serializedState = availabilityOfEntriesOfLedger.serializeStateOfEntriesOfLedger(); + AvailabilityOfEntriesOfLedger availabilityOfEntriesOfLedgerUsingSer = new AvailabilityOfEntriesOfLedger( + serializedState); + assertEquals("Expected total number of entries", tempArray.length, + availabilityOfEntriesOfLedgerUsingSer.getTotalNumOfAvailableEntries()); + for (int j = 0; j < tempArray.length; j++) { + assertTrue(tempArray[j] + " is supposed to be available", + availabilityOfEntriesOfLedgerUsingSer.isEntryAvailable(tempArray[j])); + } + } + } + + @Test + public void testSerializeDeserializeWithItrConstructorWithDuplicates() { + long[][] arrays = { + { 1, 2, 2, 3 }, + { 1, 2, 3, 5, 5, 6, 7, 8, 8 }, + { 1, 1, 5, 5 }, + { 3, 3 }, + { 1, 1, 2, 4, 5, 8, 9, 9, 9, 9, 9 }, + {}, + { 1, 2, 3, 5, 6, 11, 12, 13, 14, 15, 16, 17, 17, 100, 1000, 1000, 1001, 10000, 10000, 20000, 20001 } + }; + for (int i = 0; i < arrays.length; i++) { + long[] tempArray = arrays[i]; + Set tempSet = new HashSet(); + for (int k = 0; k < tempArray.length; k++) { + tempSet.add(tempArray[k]); + } + PrimitiveIterator.OfLong primitiveIterator = Arrays.stream(tempArray).iterator(); + AvailabilityOfEntriesOfLedger availabilityOfEntriesOfLedger = new AvailabilityOfEntriesOfLedger( + primitiveIterator); + byte[] serializedState = availabilityOfEntriesOfLedger.serializeStateOfEntriesOfLedger(); + AvailabilityOfEntriesOfLedger availabilityOfEntriesOfLedgerUsingSer = new AvailabilityOfEntriesOfLedger( + serializedState); + assertEquals("Expected total number of entries", tempSet.size(), + availabilityOfEntriesOfLedgerUsingSer.getTotalNumOfAvailableEntries()); + for (int j = 0; j < tempArray.length; j++) { + assertTrue(tempArray[j] + " is supposed to be available", + availabilityOfEntriesOfLedgerUsingSer.isEntryAvailable(tempArray[j])); + } + } + } + + @Test + public void testNonExistingEntries() { + long[][] arrays = { + { 0, 1, 2 }, + { 1, 2, 3, 5, 6, 7, 8 }, + { 1, 5 }, + { 3 }, + { 1, 2, 4, 5, 7, 8 }, + {}, + { 1, 2, 3, 5, 6, 11, 12, 13, 14, 15, 16, 17, 100, 1000, 1001, 10000, 20000, 20001 } + }; + /** + * corresponding non-existing entries for 'arrays' + */ + long[][] nonExistingEntries = { + { 3 }, + { 0, 4, 9, 100, 101 }, + { 0, 2, 3, 6, 9 }, + { 0, 1, 2, 4, 5, 6 }, + { 0, 3, 9, 10, 11, 100, 1000 }, + { 0, 1, 2, 3, 4, 5 }, + { 4, 18, 1002, 19999, 20003 } + }; + for (int i = 0; i < arrays.length; i++) { + long[] tempArray = arrays[i]; + long[] nonExistingElementsTempArray = nonExistingEntries[i]; + PrimitiveIterator.OfLong primitiveIterator = Arrays.stream(tempArray).iterator(); + AvailabilityOfEntriesOfLedger availabilityOfEntriesOfLedger = new AvailabilityOfEntriesOfLedger( + primitiveIterator); + + for (int j = 0; j < nonExistingElementsTempArray.length; j++) { + assertFalse(nonExistingElementsTempArray[j] + " is not supposed to be available", + availabilityOfEntriesOfLedger.isEntryAvailable(nonExistingElementsTempArray[j])); + } + } + } + + @Test + public void testGetUnavailableEntries() { + /* + * AvailabilityOfEntriesOfLedger is going to be created with this + * entries. It is equivalent to considering that Bookie has these + * entries. + */ + long[][] availableEntries = { + { 1, 2}, + { 0, 1, 2 }, + { 1, 2, 3, 5, 6, 7, 8 }, + { 1, 5 }, + { 3 }, + { 1, 2, 4, 5, 7, 8 }, + {}, + { 1, 2, 3, 5, 6, 11, 12, 13, 14, 15, 16, 17, 100, 1000, 1001, 10000, 20000, 20001 } + }; + + /* + * getUnavailableEntries method is going to be called with these entries + * as expected to contain. + */ + long[][] expectedToContainEntries = { + { 1, 2}, + { 0, 1, 2, 3, 5 }, + { 1, 2, 5, 7, 8 }, + { 2, 7 }, + { 3 }, + { 1, 5, 7, 8, 9, 10 }, + { 0, 1, 2, 3, 4, 5 }, + { 4, 18, 1002, 19999, 20003 } + }; + + /* + * Considering what AvailabilityOfEntriesOfLedger contains + * (availableEntries), what it is expected to contain + * (expectedToContainEntries), following are the entries which are + * supposed to be reported as unavailable (unavailableEntries). + */ + long[][] unavailableEntries = { + { }, + { 3, 5 }, + { }, + { 2, 7 }, + { }, + { 9, 10 }, + { 0, 1, 2, 3, 4, 5 }, + { 4, 18, 1002, 19999, 20003 } + }; + + for (int i = 0; i < availableEntries.length; i++) { + long[] availableEntriesTempArray = availableEntries[i]; + long[] expectedToContainEntriesTempArray = expectedToContainEntries[i]; + long[] unavailableEntriesTempArray = unavailableEntries[i]; + List unavailableEntriesTempList = new ArrayList(); + for (int j = 0; j < unavailableEntriesTempArray.length; j++) { + unavailableEntriesTempList.add(unavailableEntriesTempArray[j]); + } + + PrimitiveIterator.OfLong primitiveIterator = Arrays.stream(availableEntriesTempArray).iterator(); + AvailabilityOfEntriesOfLedger availabilityOfEntriesOfLedger = new AvailabilityOfEntriesOfLedger( + primitiveIterator); + + long startEntryId; + long lastEntryId; + if (expectedToContainEntriesTempArray[0] == 0) { + startEntryId = expectedToContainEntriesTempArray[0]; + lastEntryId = expectedToContainEntriesTempArray[expectedToContainEntriesTempArray.length - 1]; + } else { + startEntryId = expectedToContainEntriesTempArray[0] - 1; + lastEntryId = expectedToContainEntriesTempArray[expectedToContainEntriesTempArray.length - 1] + 1; + } + BitSet expectedToContainEntriesBitSet = new BitSet((int) (lastEntryId - startEntryId + 1)); + for (int ind = 0; ind < expectedToContainEntriesTempArray.length; ind++) { + int entryId = (int) expectedToContainEntriesTempArray[ind]; + expectedToContainEntriesBitSet.set(entryId - (int) startEntryId); + } + + List actualUnavailableEntries = availabilityOfEntriesOfLedger.getUnavailableEntries(startEntryId, + lastEntryId, expectedToContainEntriesBitSet); + assertEquals("Unavailable Entries", unavailableEntriesTempList, actualUnavailableEntries); + } + } + + @Test + public void testEmptyAvailabilityOfEntriesOfLedger() { + AvailabilityOfEntriesOfLedger emptyOne = AvailabilityOfEntriesOfLedger.EMPTY_AVAILABILITYOFENTRIESOFLEDGER; + assertEquals("expected totalNumOfAvailableEntries", 0, emptyOne.getTotalNumOfAvailableEntries()); + assertFalse("empty one is not supposed to contain any entry", emptyOne.isEntryAvailable(100L)); + long startEntryId = 100; + long lastEntryId = 105; + BitSet bitSetOfAvailability = new BitSet((int) (lastEntryId - startEntryId + 1)); + for (int i = 0; i < bitSetOfAvailability.length(); i++) { + if ((i % 2) == 0) { + bitSetOfAvailability.set(i); + } + } + List unavailableEntries = emptyOne.getUnavailableEntries(startEntryId, lastEntryId, bitSetOfAvailability); + assertEquals("Num of unavailable entries", bitSetOfAvailability.cardinality(), unavailableEntries.size()); + for (int i = 0; i < bitSetOfAvailability.length(); i++) { + long entryId = startEntryId + i; + if (bitSetOfAvailability.get(i)) { + assertTrue("Unavailable entry", unavailableEntries.contains(entryId)); + } + } + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/ByteBufListTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/ByteBufListTest.java index 19c841be4c9..822d4a7c48e 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/ByteBufListTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/ByteBufListTest.java @@ -20,6 +20,8 @@ import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBufAllocator; @@ -32,13 +34,13 @@ import io.netty.channel.ChannelPipeline; import io.netty.channel.ChannelProgressivePromise; import io.netty.channel.ChannelPromise; +import io.netty.channel.DefaultChannelPromise; +import io.netty.channel.VoidChannelPromise; import io.netty.util.Attribute; import io.netty.util.AttributeKey; import io.netty.util.ReferenceCountUtil; import io.netty.util.concurrent.EventExecutor; - import java.net.SocketAddress; - import org.junit.Test; /** @@ -197,7 +199,8 @@ public void testEncoder() throws Exception { b2.writerIndex(b2.capacity()); ByteBufList buf = ByteBufList.get(b1, b2); - ChannelHandlerContext ctx = new MockChannelHandlerContext(); + Channel channel = mock(Channel.class); + MockChannelHandlerContext ctx = new MockChannelHandlerContext(channel); ByteBufList.ENCODER.write(ctx, buf, null); @@ -207,6 +210,15 @@ public void testEncoder() throws Exception { } class MockChannelHandlerContext implements ChannelHandlerContext { + private final Channel channel; + private final EventExecutor eventExecutor; + + public MockChannelHandlerContext(Channel channel) { + this.channel = channel; + eventExecutor = mock(EventExecutor.class); + when(eventExecutor.inEventLoop()).thenReturn(true); + } + @Override public ChannelFuture bind(SocketAddress localAddress) { return null; @@ -269,31 +281,37 @@ public ChannelFuture deregister(ChannelPromise promise) { @Override public ChannelFuture write(Object msg) { - ReferenceCountUtil.safeRelease(msg); + ReferenceCountUtil.release(msg); return null; } @Override public ChannelFuture write(Object msg, ChannelPromise promise) { - ReferenceCountUtil.safeRelease(msg); + ReferenceCountUtil.release(msg); + if (promise != null && !promise.isVoid()) { + promise.setSuccess(); + } return null; } @Override public ChannelFuture writeAndFlush(Object msg, ChannelPromise promise) { - ReferenceCountUtil.safeRelease(msg); + ReferenceCountUtil.release(msg); + if (promise != null && !promise.isVoid()) { + promise.setSuccess(); + } return null; } @Override public ChannelFuture writeAndFlush(Object msg) { - ReferenceCountUtil.safeRelease(msg); + ReferenceCountUtil.release(msg); return null; } @Override public ChannelPromise newPromise() { - return null; + return new DefaultChannelPromise(channel, eventExecutor); } @Override @@ -313,17 +331,17 @@ public ChannelFuture newFailedFuture(Throwable cause) { @Override public ChannelPromise voidPromise() { - return null; + return new VoidChannelPromise(channel, false); } @Override public Channel channel() { - return null; + return channel; } @Override public EventExecutor executor() { - return null; + return eventExecutor; } @Override diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/IteratorUtilityTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/IteratorUtilityTest.java new file mode 100644 index 00000000000..8fb41cce4d7 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/IteratorUtilityTest.java @@ -0,0 +1,140 @@ +/** + * Copyright The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.util; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Iterator; +import java.util.PrimitiveIterator; +import java.util.function.Consumer; +import java.util.stream.IntStream; +import org.junit.Assert; +import org.junit.Test; + +/** + * Testsuite for IteratorUtility methods. + */ +public class IteratorUtilityTest { + + @Test + public void testWithPrimitiveItrMerge() { + long[][] arrays = { + { 0, 1, 2 }, + { 0, 1 }, + { 1, 2 }, + { 1, 2, 3, 5, 6, 7, 8 }, + { 1, 2, 3, 5, 6, 7, 8 }, + { 0, 1, 5 }, + { 3 }, + { 1, 2, 4, 5, 7, 8 }, + {}, + {}, + { 0 }, + { 1, 2, 3, 5, 6, 11, 12, 13, 14, 15, 16, 17, 100, 1000, 1001, 10000, 20000, 20001 }, + { 201, 202, 203, 205, 206, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 20100, 201000, + 201001, 2010000, 2020000, 2020001 } + }; + for (int i = 0; i < arrays.length; i++) { + for (int j = i + 1; j < arrays.length; j++) { + long[] tempArray1 = arrays[i]; + long[] tempArray2 = arrays[j]; + HashSet unionSet = new HashSet(); + for (int k = 0; k < tempArray1.length; k++) { + unionSet.add(tempArray1[k]); + } + for (int k = 0; k < tempArray2.length; k++) { + unionSet.add(tempArray2[k]); + } + + PrimitiveIterator.OfLong primitiveIterator1 = Arrays.stream(tempArray1).iterator(); + PrimitiveIterator.OfLong primitiveIterator2 = Arrays.stream(tempArray2).iterator(); + + PrimitiveIterator.OfLong mergedItr = IteratorUtility.mergePrimitiveLongIterator(primitiveIterator1, + primitiveIterator2); + ArrayList mergedArrayList = new ArrayList(); + Consumer addMethod = mergedArrayList::add; + mergedItr.forEachRemaining(addMethod); + int mergedListSize = mergedArrayList.size(); + Assert.assertEquals("Size of the mergedArrayList", unionSet.size(), mergedArrayList.size()); + Assert.assertTrue("mergedArrayList should contain all elements in unionSet", + mergedArrayList.containsAll(unionSet)); + Assert.assertTrue("Merged Iterator should be sorted", IntStream.range(0, mergedListSize - 1) + .allMatch(k -> mergedArrayList.get(k) <= mergedArrayList.get(k + 1))); + Assert.assertTrue("All elements of tempArray1 should be in mergedArrayList", + IntStream.range(0, tempArray1.length).allMatch(k -> mergedArrayList.contains(tempArray1[k]))); + Assert.assertTrue("All elements of tempArray2 should be in mergedArrayList", + IntStream.range(0, tempArray2.length).allMatch(k -> mergedArrayList.contains(tempArray2[k]))); + } + } + } + + @Test + public void testWithItrMerge() { + long[][] arrays = { + { 0, 1, 2 }, + { 0, 1 }, + { 1, 2 }, + { 1, 2, 3, 5, 6, 7, 8 }, + { 1, 2, 3, 5, 6, 7, 8 }, + { 0, 1, 5 }, + { 3 }, + { 1, 2, 4, 5, 7, 8 }, + {}, + {}, + { 0 }, + { 1, 2, 3, 5, 6, 11, 12, 13, 14, 15, 16, 17, 100, 1000, 1001, 10000, 20000, 20001 }, + { 201, 202, 203, 205, 206, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 20100, 201000, + 201001, 2010000, 2020000, 2020001 } + }; + for (int i = 0; i < arrays.length; i++) { + for (int j = i + 1; j < arrays.length; j++) { + long[] tempArray1 = arrays[i]; + ArrayList tempArrayList1 = new ArrayList(); + IntStream.range(0, tempArray1.length).forEach((k) -> tempArrayList1.add(tempArray1[k])); + long[] tempArray2 = arrays[j]; + ArrayList tempArrayList2 = new ArrayList(); + IntStream.range(0, tempArray2.length).forEach((k) -> tempArrayList2.add(tempArray2[k])); + HashSet unionSet = new HashSet(); + unionSet.addAll(tempArrayList1); + unionSet.addAll(tempArrayList2); + + Iterator itr1 = tempArrayList1.iterator(); + Iterator itr2 = tempArrayList2.iterator(); + + Iterator mergedItr = IteratorUtility.mergeIteratorsForPrimitiveLongIterator(itr1, itr2, + Long::compare, (l) -> l); + ArrayList mergedArrayList = new ArrayList(); + Consumer addMethod = mergedArrayList::add; + mergedItr.forEachRemaining(addMethod); + int mergedListSize = mergedArrayList.size(); + Assert.assertEquals("Size of the mergedArrayList", unionSet.size(), mergedArrayList.size()); + Assert.assertTrue("mergedArrayList should contain all elements in unionSet", + mergedArrayList.containsAll(unionSet)); + Assert.assertTrue("Merged Iterator should be sorted", IntStream.range(0, mergedListSize - 1) + .allMatch(k -> mergedArrayList.get(k) <= mergedArrayList.get(k + 1))); + Assert.assertTrue("All elements of tempArray1 should be in mergedArrayList", + IntStream.range(0, tempArray1.length).allMatch(k -> mergedArrayList.contains(tempArray1[k]))); + Assert.assertTrue("All elements of tempArray2 should be in mergedArrayList", + IntStream.range(0, tempArray2.length).allMatch(k -> mergedArrayList.contains(tempArray2[k]))); + } + } + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/LoggerOutput.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/LoggerOutput.java index eeb161e9ffd..b507a16f46b 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/LoggerOutput.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/LoggerOutput.java @@ -21,21 +21,24 @@ package org.apache.bookkeeper.util; import static org.mockito.Mockito.atLeastOnce; -import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; import static org.mockito.Mockito.verify; import java.util.ArrayList; import java.util.List; +import java.util.UUID; import java.util.function.Consumer; import java.util.stream.Collectors; -import org.apache.log4j.Appender; -import org.apache.log4j.LogManager; -import org.apache.log4j.Logger; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.core.LogEvent; +import org.apache.logging.log4j.core.LoggerContext; +import org.apache.logging.log4j.core.appender.NullAppender; import org.junit.rules.TestRule; import org.junit.runner.Description; import org.junit.runners.model.Statement; import org.mockito.ArgumentCaptor; import org.slf4j.Marker; +import org.slf4j.event.KeyValuePair; import org.slf4j.event.Level; import org.slf4j.event.LoggingEvent; @@ -44,13 +47,13 @@ */ public class LoggerOutput implements TestRule { - private Appender logAppender; - private ArgumentCaptor logEventCaptor; - private List>> logEventExpectations = new ArrayList<>(); + private NullAppender logAppender; + private ArgumentCaptor logEventCaptor; + private final List>> logEventExpectations = new ArrayList<>(); public void expect(Consumer> expectation) { if (logEventCaptor == null) { - logEventCaptor = ArgumentCaptor.forClass(org.apache.log4j.spi.LoggingEvent.class); + logEventCaptor = ArgumentCaptor.forClass(LogEvent.class); } logEventExpectations.add(expectation); } @@ -61,13 +64,16 @@ public Statement apply(final Statement base, Description description) { @Override public void evaluate() throws Throwable { - logAppender = mock(Appender.class); - Logger rootLogger = LogManager.getRootLogger(); - rootLogger.addAppender(logAppender); + LoggerContext lc = (LoggerContext) LogManager.getContext(false); + logAppender = spy(NullAppender.createAppender(UUID.randomUUID().toString())); + logAppender.start(); + lc.getConfiguration().addAppender(logAppender); + lc.getRootLogger().addAppender(lc.getConfiguration().getAppender(logAppender.getName())); + lc.updateLoggers(); try { base.evaluate(); if (!logEventExpectations.isEmpty()) { - verify(logAppender, atLeastOnce()).doAppend(logEventCaptor.capture()); + verify(logAppender, atLeastOnce()).append(logEventCaptor.capture()); List logEvents = logEventCaptor.getAllValues().stream() .map(LoggerOutput::toSlf4j) .collect(Collectors.toList()); @@ -76,7 +82,8 @@ public void evaluate() throws Throwable { } } } finally { - rootLogger.removeAppender(logAppender); + lc.getRootLogger().removeAppender(lc.getConfiguration().getAppender(logAppender.getName())); + lc.updateLoggers(); logEventExpectations.clear(); logEventCaptor = null; } @@ -84,7 +91,7 @@ public void evaluate() throws Throwable { }; } - private static LoggingEvent toSlf4j(org.apache.log4j.spi.LoggingEvent log4jEvent) { + private static LoggingEvent toSlf4j(LogEvent log4jEvent) { return new LoggingEvent() { @Override public Level getLevel() { @@ -101,11 +108,6 @@ public Level getLevel() { } } - @Override - public Marker getMarker() { - return null; - } - @Override public String getLoggerName() { return log4jEvent.getLoggerName(); @@ -113,7 +115,12 @@ public String getLoggerName() { @Override public String getMessage() { - return log4jEvent.getRenderedMessage(); + return log4jEvent.getMessage().getFormattedMessage(); + } + + @Override + public List getArguments() { + return null; } @Override @@ -126,9 +133,19 @@ public Object[] getArgumentArray() { return new Object[0]; } + @Override + public List getMarkers() { + return null; + } + + @Override + public List getKeyValuePairs() { + return null; + } + @Override public long getTimeStamp() { - return log4jEvent.getTimeStamp(); + return log4jEvent.getTimeMillis(); } @Override diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/NettyChannelUtilTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/NettyChannelUtilTest.java new file mode 100644 index 00000000000..ea47d83ec75 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/NettyChannelUtilTest.java @@ -0,0 +1,73 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.util; + +import static org.mockito.ArgumentMatchers.same; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import io.netty.channel.Channel; +import io.netty.channel.ChannelFutureListener; +import io.netty.channel.ChannelOutboundInvoker; +import io.netty.channel.ChannelPromise; +import io.netty.channel.VoidChannelPromise; +import java.nio.charset.StandardCharsets; +import org.junit.Test; + +public class NettyChannelUtilTest { + + @Test + public void testWriteAndFlushWithVoidPromise() { + final ChannelOutboundInvoker ctx = mock(ChannelOutboundInvoker.class); + final VoidChannelPromise voidChannelPromise = new VoidChannelPromise(mock(Channel.class), true); + when(ctx.voidPromise()).thenReturn(voidChannelPromise); + final byte[] data = "test".getBytes(StandardCharsets.UTF_8); + final ByteBuf byteBuf = Unpooled.wrappedBuffer(data, 0, data.length); + try { + NettyChannelUtil.writeAndFlushWithVoidPromise(ctx, byteBuf); + verify(ctx).writeAndFlush(same(byteBuf), same(voidChannelPromise)); + verify(ctx).voidPromise(); + } finally { + byteBuf.release(); + } + } + + @Test + public void testWriteAndFlushWithClosePromise() { + final ChannelOutboundInvoker ctx = mock(ChannelOutboundInvoker.class); + final ChannelPromise promise = mock(ChannelPromise.class); + + final byte[] data = "test".getBytes(StandardCharsets.UTF_8); + final ByteBuf byteBuf = Unpooled.wrappedBuffer(data, 0, data.length); + when(ctx.writeAndFlush(same(byteBuf))).thenReturn(promise); + try { + NettyChannelUtil.writeAndFlushWithClosePromise(ctx, byteBuf); + verify(ctx).writeAndFlush(same(byteBuf)); + verify(promise).addListener(same(ChannelFutureListener.CLOSE)); + } finally { + byteBuf.release(); + } + } + +} \ No newline at end of file diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/StaticDNSResolver.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/StaticDNSResolver.java index d5cb06710b7..07da660a878 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/StaticDNSResolver.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/StaticDNSResolver.java @@ -21,10 +21,11 @@ import java.util.List; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; - +import org.apache.bookkeeper.client.ITopologyAwareEnsemblePlacementPolicy; import org.apache.bookkeeper.client.RackChangeNotifier; -import org.apache.bookkeeper.client.RackawareEnsemblePlacementPolicyImpl; import org.apache.bookkeeper.net.AbstractDNSToSwitchMapping; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.net.BookieNode; import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.net.DNSToSwitchMapping; import org.apache.bookkeeper.net.NetworkTopology; @@ -71,6 +72,10 @@ public static void reset() { @Override public List resolve(List names) { + if (getBookieAddressResolver() == null) { + // test that this instance has been properly initialized + throw new IllegalStateException("bookieAddressResolver was not set"); + } List racks = new ArrayList(); for (String n : names) { String rack = name2Racks.get(n); @@ -87,19 +92,20 @@ public void reloadCachedMappings() { // nop } - private static RackawareEnsemblePlacementPolicyImpl rackawarePolicy = null; + private static ITopologyAwareEnsemblePlacementPolicy rackawarePolicy = null; @Override - public void registerRackChangeListener(RackawareEnsemblePlacementPolicyImpl rackawareEnsemblePolicy) { + public void registerRackChangeListener(ITopologyAwareEnsemblePlacementPolicy rackawareEnsemblePolicy) { rackawarePolicy = rackawareEnsemblePolicy; } public static void changeRack(List bookieAddressList, List rack) { + List bookieIds = new ArrayList<>(); for (int i = 0; i < bookieAddressList.size(); i++) { BookieSocketAddress bkAddress = bookieAddressList.get(i); name2Racks.put(bkAddress.getHostName(), rack.get(i)); + bookieIds.add(bkAddress.toBookieId()); } - rackawarePolicy.onBookieRackChange(bookieAddressList); + rackawarePolicy.onBookieRackChange(bookieIds); } - } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/SubTreeCacheTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/SubTreeCacheTest.java index 249f5ae9469..aea9b962aae 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/SubTreeCacheTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/SubTreeCacheTest.java @@ -28,7 +28,6 @@ import java.util.Map; import java.util.SortedSet; import java.util.TreeSet; - import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException.NoNodeException; import org.apache.zookeeper.WatchedEvent; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/TestDiskChecker.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/TestDiskChecker.java index 58a8236d31a..c7d52878a1b 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/TestDiskChecker.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/TestDiskChecker.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/TestHardLink.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/TestHardLink.java new file mode 100644 index 00000000000..75f6cf502d8 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/TestHardLink.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.util; + +import java.io.File; +import java.io.IOException; +import java.util.UUID; +import org.apache.commons.io.FileUtils; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + + +public class TestHardLink { + + private File tempDir; + + @Before + public void setup() throws IOException { + // Create at least one file so that target disk will never be empty + tempDir = IOUtils.createTempDir("TestHardLink", "test-hardlink"); + } + + @After + public void tearDown() throws IOException { + FileUtils.deleteDirectory(tempDir); + } + + private void verifyHardLink(File origin, File linkedOrigin) throws IOException { + Assert.assertTrue(origin.exists()); + Assert.assertFalse(linkedOrigin.exists()); + + HardLink.createHardLink(origin, linkedOrigin); + + Assert.assertTrue(origin.exists()); + Assert.assertTrue(linkedOrigin.exists()); + + // when delete origin file it should be success and not exist. + origin.delete(); + Assert.assertFalse(origin.exists()); + Assert.assertTrue(linkedOrigin.exists()); + } + + @Test + public void testHardLink() throws IOException { + String uuidSuffix = UUID.randomUUID().toString(); + + // prepare file + File origin = new File(tempDir, "originFile." + uuidSuffix); + File linkedOrigin = new File(tempDir, "linkedOrigin." + uuidSuffix); + origin.createNewFile(); + + // disable jdk api link first + HardLink.enableJdkLinkApi(false); + verifyHardLink(origin, linkedOrigin); + + // prepare file + File jdkorigin = new File(tempDir, "jdkoriginFile." + uuidSuffix); + File jdklinkedOrigin = new File(tempDir, "jdklinkedOrigin." + uuidSuffix); + jdkorigin.createNewFile(); + + // enable jdk api link + HardLink.enableJdkLinkApi(true); + verifyHardLink(jdkorigin, jdklinkedOrigin); + } +} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/TestUtils.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/TestUtils.java index 26b24482067..38755c884ad 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/TestUtils.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/TestUtils.java @@ -22,14 +22,19 @@ package org.apache.bookkeeper.util; import java.io.File; +import java.util.Arrays; +import java.util.Collection; import java.util.HashSet; import java.util.Set; - import java.util.concurrent.TimeUnit; +import java.util.function.BooleanSupplier; import lombok.extern.slf4j.Slf4j; -import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.BookieImpl; import org.apache.bookkeeper.client.LedgerHandle; import org.apache.bookkeeper.client.api.ReadHandle; +import org.apache.bookkeeper.net.BookieId; +import org.apache.commons.io.FileUtils; +import org.junit.Assert; /** * Test utilities. @@ -39,18 +44,23 @@ public final class TestUtils { private TestUtils() {} + public static String buildStatsCounterPathFromBookieID(BookieId bookieId) { + return bookieId.toString().replace('.', '_').replace('-', '_').replace(":", "_"); + } + + public static boolean hasAllLogFiles(File ledgerDirectory, Integer... logsId) { + Set logs = findEntryLogFileIds(ledgerDirectory); + return logs.containsAll(Arrays.asList(logsId)); + } + + public static boolean hasNoneLogFiles(File ledgerDirectory, Integer... logsId) { + Set logs = findEntryLogFileIds(ledgerDirectory); + return Arrays.stream(logsId).noneMatch(logs::contains); + } + public static boolean hasLogFiles(File ledgerDirectory, boolean partial, Integer... logsId) { - boolean result = partial ? false : true; - Set logs = new HashSet(); - for (File file : Bookie.getCurrentDirectory(ledgerDirectory).listFiles()) { - if (file.isFile()) { - String name = file.getName(); - if (!name.endsWith(".log")) { - continue; - } - logs.add(Integer.parseInt(name.split("\\.")[0], 16)); - } - } + boolean result = !partial; + Set logs = findEntryLogFileIds(ledgerDirectory); for (Integer logId : logsId) { boolean exist = logs.contains(logId); if ((partial && exist) @@ -61,6 +71,20 @@ public static boolean hasLogFiles(File ledgerDirectory, boolean partial, Integer return result; } + private static Set findEntryLogFileIds(File ledgerDirectory) { + Set logs = new HashSet<>(); + for (File file : BookieImpl.getCurrentDirectory(ledgerDirectory).listFiles()) { + if (file.isFile()) { + String name = file.getName(); + if (!name.endsWith(".log")) { + continue; + } + logs.add(Integer.parseInt(name.split("\\.")[0], 16)); + } + } + return logs; + } + public static void waitUntilLacUpdated(ReadHandle rh, long newLac) throws Exception { long lac = rh.getLastAddConfirmed(); while (lac < newLac) { @@ -77,4 +101,26 @@ public static long waitUntilExplicitLacUpdated(LedgerHandle rh, long newLac) thr return lac; } + public static void assertEventuallyTrue(String description, BooleanSupplier predicate) throws Exception { + assertEventuallyTrue(description, predicate, 10, TimeUnit.SECONDS); + } + + public static void assertEventuallyTrue(String description, BooleanSupplier predicate, + long duration, TimeUnit unit) throws Exception { + long iterations = unit.toMillis(duration) / 100; + for (int i = 0; i < iterations && !predicate.getAsBoolean(); i++) { + Thread.sleep(100); + } + Assert.assertTrue(description, predicate.getAsBoolean()); + } + + public static int countNumOfFiles(File[] folderNames, String... extensions) { + int count = 0; + for (int i = 0; i < folderNames.length; i++) { + Collection filesCollection = FileUtils.listFiles(folderNames[i], extensions, true); + count += filesCollection.size(); + } + return count; + } + } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/TestZeroBuffer.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/TestZeroBuffer.java index cf224162ac2..962f9209ef0 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/TestZeroBuffer.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/TestZeroBuffer.java @@ -22,7 +22,6 @@ import java.nio.ByteBuffer; import java.util.Random; - import org.junit.Assert; import org.junit.Test; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/TestZkUtils.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/TestZkUtils.java index 527a38460eb..e52cf467ddc 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/TestZkUtils.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/TestZkUtils.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -21,9 +21,7 @@ package org.apache.bookkeeper.util; import java.io.IOException; - import junit.framework.TestCase; - import org.apache.bookkeeper.test.ZooKeeperUtil; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.KeeperException; @@ -49,13 +47,13 @@ public class TestZkUtils extends TestCase { @Override public void setUp() throws Exception { logger.info("Setting up test {}.", getName()); - zkUtil.startServer(); + zkUtil.startCluster(); } @After @Override public void tearDown() throws Exception { - zkUtil.killServer(); + zkUtil.killCluster(); logger.info("Teared down test {}.", getName()); } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/ConcurrentLongHashMapTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/ConcurrentLongHashMapTest.java index cec38cdf548..b1f1b5437d4 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/ConcurrentLongHashMapTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/ConcurrentLongHashMapTest.java @@ -20,14 +20,15 @@ */ package org.apache.bookkeeper.util.collections; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import com.google.common.collect.Lists; - import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -40,8 +41,8 @@ import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.LongFunction; - import org.junit.Test; /** @@ -52,30 +53,239 @@ public class ConcurrentLongHashMapTest { @Test public void testConstructor() { try { - new ConcurrentLongHashMap(0); + ConcurrentLongHashMap.newBuilder() + .expectedItems(0) + .build(); fail("should have thrown exception"); } catch (IllegalArgumentException e) { // ok } try { - new ConcurrentLongHashMap(16, 0); + ConcurrentLongHashMap.newBuilder() + .expectedItems(16) + .concurrencyLevel(0) + .build(); fail("should have thrown exception"); } catch (IllegalArgumentException e) { // ok } try { - new ConcurrentLongHashMap(4, 8); + ConcurrentLongHashMap.newBuilder() + .expectedItems(4) + .concurrencyLevel(8) + .build(); fail("should have thrown exception"); } catch (IllegalArgumentException e) { // ok } } + @Test + public void testReduceUnnecessaryExpansions() { + ConcurrentLongHashMap map = ConcurrentLongHashMap.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .build(); + assertNull(map.put(1, "v1")); + assertNull(map.put(2, "v2")); + assertNull(map.put(3, "v3")); + assertNull(map.put(4, "v4")); + + assertTrue(map.remove(1, "v1")); + assertTrue(map.remove(2, "v2")); + assertTrue(map.remove(3, "v3")); + assertTrue(map.remove(4, "v4")); + + assertEquals(0, map.getUsedBucketCount()); + } + + @Test + public void testClear() { + ConcurrentLongHashMap map = ConcurrentLongHashMap.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .autoShrink(true) + .mapIdleFactor(0.25f) + .build(); + assertTrue(map.capacity() == 4); + + assertNull(map.put(1, "v1")); + assertNull(map.put(2, "v2")); + assertNull(map.put(3, "v3")); + + assertTrue(map.capacity() == 8); + map.clear(); + assertTrue(map.capacity() == 4); + } + + private void addSpecifyIncrement(ConcurrentLongHashMap mkc, int start, int end) { + for (int i = start; i <= end; i++) { + assertNull(mkc.put(i, ("comment:" + i).getBytes(UTF_8))); + } + } + + private void removeSpecifyIncrement(ConcurrentLongHashMap mkc, int start, int end) { + for (int i = end; i >= start; i--) { + assertNotNull(mkc.remove(i)); + } + } + + @Test + public void testAutoShrinkWithByte() { + final int defaultExpectedItems = 256; + final int defaultConcurrencyLevel = 16; + final float defaultExpandFactor = 2; + final float defaultShrinkFactor = 2; + + ConcurrentLongHashMap mkc = ConcurrentLongHashMap.newBuilder().autoShrink(true).build(); + assertTrue(mkc.capacity() == defaultExpectedItems * 2); + + addSpecifyIncrement(mkc, 1, defaultExpectedItems * 2); + // expand hashmap + assertTrue(mkc.capacity() == defaultExpectedItems * 2 + + defaultConcurrencyLevel * defaultExpandFactor * 15); + + removeSpecifyIncrement(mkc, 220, defaultExpectedItems * 2); + // shrink hashmap + assertTrue(mkc.capacity() == defaultExpectedItems * 2 + + defaultConcurrencyLevel * defaultExpandFactor * 15 - defaultConcurrencyLevel * defaultShrinkFactor); + } + + @Test + public void testExpandAndShrink() { + ConcurrentLongHashMap map = ConcurrentLongHashMap.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .autoShrink(true) + .mapIdleFactor(0.25f) + .build(); + assertTrue(map.capacity() == 4); + + assertNull(map.put(1, "v1")); + assertNull(map.put(2, "v2")); + assertNull(map.put(3, "v3")); + + // expand hashmap + assertTrue(map.capacity() == 8); + + assertTrue(map.remove(1, "v1")); + // not shrink + assertTrue(map.capacity() == 8); + assertTrue(map.remove(2, "v2")); + // shrink hashmap + assertTrue(map.capacity() == 4); + + // expand hashmap + assertNull(map.put(4, "v4")); + assertNull(map.put(5, "v5")); + assertTrue(map.capacity() == 8); + + //verify that the map does not keep shrinking at every remove() operation + assertNull(map.put(6, "v6")); + assertTrue(map.remove(6, "v6")); + assertTrue(map.capacity() == 8); + } + + @Test + public void testConcurrentExpandAndShrinkAndGet() throws Throwable { + ConcurrentLongHashMap map = ConcurrentLongHashMap.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .autoShrink(true) + .mapIdleFactor(0.25f) + .build(); + assertEquals(map.capacity(), 4); + + ExecutorService executor = Executors.newCachedThreadPool(); + final int readThreads = 16; + final int writeThreads = 1; + final int n = 1_000; + CyclicBarrier barrier = new CyclicBarrier(writeThreads + readThreads); + Future future = null; + AtomicReference ex = new AtomicReference<>(); + + for (int i = 0; i < readThreads; i++) { + executor.submit(() -> { + try { + barrier.await(); + } catch (Exception e) { + throw new RuntimeException(e); + } + try { + map.get(1); + } catch (Exception e) { + ex.set(e); + } + }); + } + + assertNull(map.put(1, "v1")); + future = executor.submit(() -> { + try { + barrier.await(); + } catch (Exception e) { + throw new RuntimeException(e); + } + + for (int i = 0; i < n; i++) { + // expand hashmap + assertNull(map.put(2, "v2")); + assertNull(map.put(3, "v3")); + assertEquals(map.capacity(), 8); + + // shrink hashmap + assertTrue(map.remove(2, "v2")); + assertTrue(map.remove(3, "v3")); + assertEquals(map.capacity(), 4); + } + }); + + future.get(); + assertTrue(ex.get() == null); + // shut down pool + executor.shutdown(); + } + + @Test + public void testExpandShrinkAndClear() { + ConcurrentLongHashMap map = ConcurrentLongHashMap.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .autoShrink(true) + .mapIdleFactor(0.25f) + .build(); + final long initCapacity = map.capacity(); + assertTrue(map.capacity() == 4); + assertNull(map.put(1, "v1")); + assertNull(map.put(2, "v2")); + assertNull(map.put(3, "v3")); + + // expand hashmap + assertTrue(map.capacity() == 8); + + assertTrue(map.remove(1, "v1")); + // not shrink + assertTrue(map.capacity() == 8); + assertTrue(map.remove(2, "v2")); + // shrink hashmap + assertTrue(map.capacity() == 4); + + assertTrue(map.remove(3, "v3")); + // Will not shrink the hashmap again because shrink capacity is less than initCapacity + // current capacity is equal than the initial capacity + assertTrue(map.capacity() == initCapacity); + map.clear(); + // after clear, because current capacity is equal than the initial capacity, so not shrinkToInitCapacity + assertTrue(map.capacity() == initCapacity); + } + @Test public void simpleInsertions() { - ConcurrentLongHashMap map = new ConcurrentLongHashMap<>(16); + ConcurrentLongHashMap map = ConcurrentLongHashMap.newBuilder() + .expectedItems(16) + .build(); assertTrue(map.isEmpty()); assertNull(map.put(1, "one")); @@ -103,7 +313,8 @@ public void simpleInsertions() { @Test public void testRemove() { - ConcurrentLongHashMap map = new ConcurrentLongHashMap<>(); + ConcurrentLongHashMap map = + ConcurrentLongHashMap.newBuilder().build(); assertTrue(map.isEmpty()); assertNull(map.put(1, "one")); @@ -119,7 +330,10 @@ public void testRemove() { @Test public void testRemoveIf() { - ConcurrentLongHashMap map = new ConcurrentLongHashMap<>(16, 1); + ConcurrentLongHashMap map = ConcurrentLongHashMap.newBuilder() + .expectedItems(16) + .concurrencyLevel(1) + .build(); map.put(1L, "one"); map.put(2L, "two"); @@ -136,7 +350,10 @@ public void testRemoveIf() { @Test public void testNegativeUsedBucketCount() { - ConcurrentLongHashMap map = new ConcurrentLongHashMap<>(16, 1); + ConcurrentLongHashMap map = ConcurrentLongHashMap.newBuilder() + .expectedItems(16) + .concurrencyLevel(1) + .build(); map.put(0, "zero"); assertEquals(1, map.getUsedBucketCount()); @@ -151,7 +368,10 @@ public void testNegativeUsedBucketCount() { @Test public void testRehashing() { int n = 16; - ConcurrentLongHashMap map = new ConcurrentLongHashMap<>(n / 2, 1); + ConcurrentLongHashMap map = ConcurrentLongHashMap.newBuilder() + .expectedItems(n / 2) + .concurrencyLevel(1) + .build(); assertEquals(map.capacity(), n); assertEquals(map.size(), 0); @@ -166,7 +386,10 @@ public void testRehashing() { @Test public void testRehashingWithDeletes() { int n = 16; - ConcurrentLongHashMap map = new ConcurrentLongHashMap<>(n / 2, 1); + ConcurrentLongHashMap map = ConcurrentLongHashMap.newBuilder() + .expectedItems(n / 2) + .concurrencyLevel(1) + .build(); assertEquals(map.capacity(), n); assertEquals(map.size(), 0); @@ -188,7 +411,8 @@ public void testRehashingWithDeletes() { @Test public void concurrentInsertions() throws Throwable { - ConcurrentLongHashMap map = new ConcurrentLongHashMap<>(); + ConcurrentLongHashMap map = + ConcurrentLongHashMap.newBuilder().build(); ExecutorService executor = Executors.newCachedThreadPool(); final int nThreads = 16; @@ -223,7 +447,8 @@ public void concurrentInsertions() throws Throwable { @Test public void concurrentInsertionsAndReads() throws Throwable { - ConcurrentLongHashMap map = new ConcurrentLongHashMap<>(); + ConcurrentLongHashMap map = + ConcurrentLongHashMap.newBuilder().build(); ExecutorService executor = Executors.newCachedThreadPool(); final int nThreads = 16; @@ -258,7 +483,8 @@ public void concurrentInsertionsAndReads() throws Throwable { @Test public void stressConcurrentInsertionsAndReads() throws Throwable { - ConcurrentLongHashMap map = new ConcurrentLongHashMap<>(4, 1); + ConcurrentLongHashMap map = + ConcurrentLongHashMap.newBuilder().expectedItems(4).concurrencyLevel(1).build(); ExecutorService executor = Executors.newCachedThreadPool(); final int writeThreads = 16; @@ -325,7 +551,8 @@ public void stressConcurrentInsertionsAndReads() throws Throwable { @Test public void testIteration() { - ConcurrentLongHashMap map = new ConcurrentLongHashMap<>(); + ConcurrentLongHashMap map = + ConcurrentLongHashMap.newBuilder().build(); assertEquals(map.keys(), Collections.emptyList()); assertEquals(map.values(), Collections.emptyList()); @@ -369,7 +596,10 @@ public void testIteration() { @Test public void testHashConflictWithDeletion() { final int buckets = 16; - ConcurrentLongHashMap map = new ConcurrentLongHashMap<>(buckets, 1); + ConcurrentLongHashMap map = ConcurrentLongHashMap.newBuilder() + .expectedItems(buckets) + .concurrencyLevel(1) + .build(); // Pick 2 keys that fall into the same bucket long key1 = 1; @@ -402,7 +632,8 @@ public void testHashConflictWithDeletion() { @Test public void testPutIfAbsent() { - ConcurrentLongHashMap map = new ConcurrentLongHashMap<>(); + ConcurrentLongHashMap map = + ConcurrentLongHashMap.newBuilder().build(); assertEquals(map.putIfAbsent(1, "one"), null); assertEquals(map.get(1), "one"); @@ -412,7 +643,10 @@ public void testPutIfAbsent() { @Test public void testComputeIfAbsent() { - ConcurrentLongHashMap map = new ConcurrentLongHashMap<>(16, 1); + ConcurrentLongHashMap map = ConcurrentLongHashMap.newBuilder() + .expectedItems(16) + .concurrencyLevel(1) + .build(); AtomicInteger counter = new AtomicInteger(); LongFunction provider = new LongFunction() { public Integer apply(long key) { @@ -439,7 +673,10 @@ public Integer apply(long key) { public void benchConcurrentLongHashMap() throws Exception { // public static void main(String args[]) { - ConcurrentLongHashMap map = new ConcurrentLongHashMap<>(N, 1); + ConcurrentLongHashMap map = ConcurrentLongHashMap.newBuilder() + .expectedItems(N) + .concurrencyLevel(1) + .build(); for (long i = 0; i < Iterations; i++) { for (int j = 0; j < N; j++) { diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/ConcurrentLongHashSetTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/ConcurrentLongHashSetTest.java index 34cf470e2b7..aa308d94471 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/ConcurrentLongHashSetTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/ConcurrentLongHashSetTest.java @@ -20,47 +20,49 @@ */ package org.apache.bookkeeper.util.collections; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import com.google.common.collect.Lists; import com.google.common.collect.Sets; - import java.util.ArrayList; import java.util.Collections; import java.util.List; -import java.util.Random; +import java.util.concurrent.CyclicBarrier; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; - -import org.junit.Test; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.atomic.AtomicReference; +import org.junit.jupiter.api.Test; /** * Test the ConcurrentLongHashSet class. */ +@SuppressWarnings("deprecation") public class ConcurrentLongHashSetTest { @Test public void testConstructor() { try { - new ConcurrentLongHashSet(0); + ConcurrentLongHashSet.newBuilder().concurrencyLevel(0).build(); fail("should have thrown exception"); } catch (IllegalArgumentException e) { // ok } try { - new ConcurrentLongHashSet(16, 0); + ConcurrentLongHashSet.newBuilder().expectedItems(16).concurrencyLevel(0).build(); fail("should have thrown exception"); } catch (IllegalArgumentException e) { // ok } try { - new ConcurrentLongHashSet(4, 8); + ConcurrentLongHashSet.newBuilder().expectedItems(4).concurrencyLevel(8).build(); fail("should have thrown exception"); } catch (IllegalArgumentException e) { // ok @@ -69,7 +71,9 @@ public void testConstructor() { @Test public void simpleInsertions() { - ConcurrentLongHashSet set = new ConcurrentLongHashSet(16); + ConcurrentLongHashSet set = ConcurrentLongHashSet.newBuilder() + .expectedItems(16) + .build(); assertTrue(set.isEmpty()); assertTrue(set.add(1)); @@ -95,9 +99,28 @@ public void simpleInsertions() { assertEquals(set.size(), 3); } + @Test + public void testReduceUnnecessaryExpansions() { + ConcurrentLongHashSet set = ConcurrentLongHashSet.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .build(); + assertTrue(set.add(1)); + assertTrue(set.add(2)); + assertTrue(set.add(3)); + assertTrue(set.add(4)); + + assertTrue(set.remove(1)); + assertTrue(set.remove(2)); + assertTrue(set.remove(3)); + assertTrue(set.remove(4)); + + assertEquals(0, set.getUsedBucketCount()); + } + @Test public void testRemove() { - ConcurrentLongHashSet set = new ConcurrentLongHashSet(); + ConcurrentLongHashSet set = ConcurrentLongHashSet.newBuilder().build(); assertTrue(set.isEmpty()); assertTrue(set.add(1)); @@ -112,7 +135,10 @@ public void testRemove() { @Test public void testRehashing() { int n = 16; - ConcurrentLongHashSet set = new ConcurrentLongHashSet(n / 2, 1); + ConcurrentLongHashSet set = ConcurrentLongHashSet.newBuilder() + .expectedItems(n / 2) + .concurrencyLevel(1) + .build(); assertEquals(set.capacity(), n); assertEquals(set.size(), 0); @@ -127,7 +153,10 @@ public void testRehashing() { @Test public void testRehashingWithDeletes() { int n = 16; - ConcurrentLongHashSet set = new ConcurrentLongHashSet(n / 2, 1); + ConcurrentLongHashSet set = ConcurrentLongHashSet.newBuilder() + .expectedItems(n / 2) + .concurrencyLevel(1) + .build(); assertEquals(set.capacity(), n); assertEquals(set.size(), 0); @@ -149,7 +178,7 @@ public void testRehashingWithDeletes() { @Test public void concurrentInsertions() throws Throwable { - ConcurrentLongHashSet set = new ConcurrentLongHashSet(); + ConcurrentLongHashSet set = ConcurrentLongHashSet.newBuilder().build(); ExecutorService executor = Executors.newCachedThreadPool(); final int nThreads = 16; @@ -160,10 +189,8 @@ public void concurrentInsertions() throws Throwable { final int threadIdx = i; futures.add(executor.submit(() -> { - Random random = new Random(); - for (int j = 0; j < n; j++) { - long key = Math.abs(random.nextLong()); + long key = ThreadLocalRandom.current().nextLong(Long.MAX_VALUE); // Ensure keys are unique key -= key % (threadIdx + 1); @@ -183,7 +210,7 @@ public void concurrentInsertions() throws Throwable { @Test public void concurrentInsertionsAndReads() throws Throwable { - ConcurrentLongHashSet map = new ConcurrentLongHashSet(); + ConcurrentLongHashSet map = ConcurrentLongHashSet.newBuilder().build(); ExecutorService executor = Executors.newCachedThreadPool(); final int nThreads = 16; @@ -194,10 +221,8 @@ public void concurrentInsertionsAndReads() throws Throwable { final int threadIdx = i; futures.add(executor.submit(() -> { - Random random = new Random(); - for (int j = 0; j < n; j++) { - long key = Math.abs(random.nextLong()); + long key = ThreadLocalRandom.current().nextLong(Long.MAX_VALUE); // Ensure keys are unique key -= key % (threadIdx + 1); @@ -215,9 +240,159 @@ public void concurrentInsertionsAndReads() throws Throwable { executor.shutdown(); } + @Test + public void testClear() { + ConcurrentLongHashSet map = ConcurrentLongHashSet.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .autoShrink(true) + .mapIdleFactor(0.25f) + .build(); + assertEquals(4, map.capacity()); + + assertTrue(map.add(1)); + assertTrue(map.add(2)); + assertTrue(map.add(3)); + + assertEquals(8, map.capacity()); + map.clear(); + assertEquals(4, map.capacity()); + } + + @Test + public void testExpandAndShrink() { + ConcurrentLongHashSet map = ConcurrentLongHashSet.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .autoShrink(true) + .mapIdleFactor(0.25f) + .build(); + assertEquals(4, map.capacity()); + + assertTrue(map.add(1)); + assertTrue(map.add(2)); + assertTrue(map.add(3)); + + // expand hashmap + assertEquals(8, map.capacity()); + + assertTrue(map.remove(1)); + // not shrink + assertEquals(8, map.capacity()); + assertTrue(map.remove(2)); + // shrink hashmap + assertEquals(4, map.capacity()); + + // expand hashmap + assertTrue(map.add(4)); + assertTrue(map.add(5)); + assertEquals(8, map.capacity()); + + //verify that the map does not keep shrinking at every remove() operation + assertTrue(map.add(6)); + assertTrue(map.remove(6)); + assertEquals(8, map.capacity()); + } + + @Test + public void testConcurrentExpandAndShrinkAndGet() throws Throwable { + ConcurrentLongHashSet set = ConcurrentLongHashSet.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .autoShrink(true) + .mapIdleFactor(0.25f) + .build(); + assertEquals(set.capacity(), 4); + + ExecutorService executor = Executors.newCachedThreadPool(); + final int readThreads = 16; + final int writeThreads = 1; + final int n = 1_000; + CyclicBarrier barrier = new CyclicBarrier(writeThreads + readThreads); + Future future = null; + AtomicReference ex = new AtomicReference<>(); + + for (int i = 0; i < readThreads; i++) { + executor.submit(() -> { + try { + barrier.await(); + } catch (Exception e) { + throw new RuntimeException(e); + } + while (true) { + try { + set.contains(1); + } catch (Exception e) { + ex.set(e); + } + } + }); + } + + assertTrue(set.add(1)); + future = executor.submit(() -> { + try { + barrier.await(); + } catch (Exception e) { + throw new RuntimeException(e); + } + + for (int i = 0; i < n; i++) { + // expand hashmap + assertTrue(set.add(2)); + assertTrue(set.add(3)); + assertEquals(set.capacity(), 8); + + // shrink hashmap + assertTrue(set.remove(2)); + assertTrue(set.remove(3)); + assertEquals(set.capacity(), 4); + } + }); + + future.get(); + assertNull(ex.get()); + // shut down pool + executor.shutdown(); + } + + @Test + public void testExpandShrinkAndClear() { + ConcurrentLongHashSet map = ConcurrentLongHashSet.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .autoShrink(true) + .mapIdleFactor(0.25f) + .build(); + final long initCapacity = map.capacity(); + assertEquals(4, map.capacity()); + + assertTrue(map.add(1)); + assertTrue(map.add(2)); + assertTrue(map.add(3)); + + // expand hashmap + assertEquals(8, map.capacity()); + + assertTrue(map.remove(1)); + // not shrink + assertEquals(8, map.capacity()); + assertTrue(map.remove(2)); + // shrink hashmap + assertEquals(4, map.capacity()); + + assertTrue(map.remove(3)); + // Will not shrink the hashmap again because shrink capacity is less than initCapacity + // current capacity is equal than the initial capacity + assertEquals(map.capacity(), initCapacity); + map.clear(); + // after clear, because current capacity is equal than the initial capacity, so not shrinkToInitCapacity + assertEquals(map.capacity(), initCapacity); + } + @Test public void testIteration() { - ConcurrentLongHashSet set = new ConcurrentLongHashSet(); + ConcurrentLongHashSet set = ConcurrentLongHashSet.newBuilder().build(); assertEquals(set.items(), Collections.emptySet()); @@ -244,7 +419,10 @@ public void testIteration() { @Test public void testHashConflictWithDeletion() { final int buckets = 16; - ConcurrentLongHashSet set = new ConcurrentLongHashSet(buckets, 1); + ConcurrentLongHashSet set = ConcurrentLongHashSet.newBuilder() + .expectedItems(buckets) + .concurrencyLevel(1) + .build(); // Pick 2 keys that fall into the same bucket long key1 = 1; @@ -275,4 +453,24 @@ public void testHashConflictWithDeletion() { assertTrue(set.isEmpty()); } + @Test + public void testSizeInBytes() { + ConcurrentLongHashSet set = new ConcurrentLongHashSet(4, 2); + assertEquals(64, set.sizeInBytes()); + set.add(1); + assertEquals(64, set.sizeInBytes()); + set.add(2); + assertEquals(64, set.sizeInBytes()); + set.add(3); + assertEquals(64, set.sizeInBytes()); + set.add(4); + assertEquals(96, set.sizeInBytes()); + set.add(5); + assertEquals(96, set.sizeInBytes()); + set.add(6); + assertEquals(128, set.sizeInBytes()); + set.add(7); + assertEquals(128, set.sizeInBytes()); + } + } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/ConcurrentLongLongHashMapTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/ConcurrentLongLongHashMapTest.java index abd96ecf523..e7a9e993ad6 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/ConcurrentLongLongHashMapTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/ConcurrentLongLongHashMapTest.java @@ -20,50 +20,60 @@ */ package org.apache.bookkeeper.util.collections; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import com.google.common.collect.Lists; import com.google.common.collect.Maps; - import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.Random; +import java.util.concurrent.CyclicBarrier; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; +import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicLong; - +import java.util.concurrent.atomic.AtomicReference; import org.apache.bookkeeper.util.collections.ConcurrentLongLongHashMap.LongLongFunction; -import org.junit.Test; +import org.junit.jupiter.api.Test; /** * Test the ConcurrentLongLongHashMap class. */ +@SuppressWarnings("deprecation") public class ConcurrentLongLongHashMapTest { @Test public void testConstructor() { try { - new ConcurrentLongLongHashMap(0); + ConcurrentLongLongHashMap.newBuilder() + .expectedItems(0) + .build(); fail("should have thrown exception"); } catch (IllegalArgumentException e) { // ok } try { - new ConcurrentLongLongHashMap(16, 0); + ConcurrentLongLongHashMap.newBuilder() + .expectedItems(16) + .concurrencyLevel(0) + .build(); fail("should have thrown exception"); } catch (IllegalArgumentException e) { // ok } try { - new ConcurrentLongLongHashMap(4, 8); + ConcurrentLongLongHashMap.newBuilder() + .expectedItems(4) + .concurrencyLevel(8) + .build(); fail("should have thrown exception"); } catch (IllegalArgumentException e) { // ok @@ -72,7 +82,9 @@ public void testConstructor() { @Test public void simpleInsertions() { - ConcurrentLongLongHashMap map = new ConcurrentLongLongHashMap(16); + ConcurrentLongLongHashMap map = ConcurrentLongLongHashMap.newBuilder() + .expectedItems(16) + .build(); assertTrue(map.isEmpty()); assertEquals(map.put(1, 11), -1); @@ -98,9 +110,155 @@ public void simpleInsertions() { assertEquals(map.size(), 3); } + @Test + public void testClear() { + ConcurrentLongLongHashMap map = ConcurrentLongLongHashMap.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .autoShrink(true) + .mapIdleFactor(0.25f) + .build(); + assertEquals(4, map.capacity()); + + assertEquals(-1, map.put(1, 1)); + assertEquals(-1, map.put(2, 2)); + assertEquals(-1, map.put(3, 3)); + + assertEquals(8, map.capacity()); + map.clear(); + assertEquals(4, map.capacity()); + } + + @Test + public void testExpandAndShrink() { + ConcurrentLongLongHashMap map = ConcurrentLongLongHashMap.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .autoShrink(true) + .mapIdleFactor(0.25f) + .build(); + assertEquals(-1, map.put(1, 1)); + assertEquals(-1, map.put(2, 2)); + assertEquals(-1, map.put(3, 3)); + + // expand hashmap + assertEquals(8, map.capacity()); + + assertTrue(map.remove(1, 1)); + // not shrink + assertEquals(8, map.capacity()); + assertTrue(map.remove(2, 2)); + // shrink hashmap + assertEquals(4, map.capacity()); + + // expand hashmap + assertEquals(-1, map.put(4, 4)); + assertEquals(-1, map.put(5, 5)); + assertEquals(8, map.capacity()); + + //verify that the map does not keep shrinking at every remove() operation + assertEquals(-1, map.put(6, 6)); + assertTrue(map.remove(6, 6)); + assertEquals(8, map.capacity()); + } + + @Test + public void testConcurrentExpandAndShrinkAndGet() throws Throwable { + ConcurrentLongLongHashMap map = ConcurrentLongLongHashMap.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .autoShrink(true) + .mapIdleFactor(0.25f) + .build(); + assertEquals(map.capacity(), 4); + + ExecutorService executor = Executors.newCachedThreadPool(); + final int readThreads = 16; + final int writeThreads = 1; + final int n = 1_000; + CyclicBarrier barrier = new CyclicBarrier(writeThreads + readThreads); + Future future = null; + AtomicReference ex = new AtomicReference<>(); + + for (int i = 0; i < readThreads; i++) { + executor.submit(() -> { + try { + barrier.await(); + } catch (Exception e) { + throw new RuntimeException(e); + } + while (true) { + try { + map.get(1); + } catch (Exception e) { + ex.set(e); + } + } + }); + } + map.put(1, 11); + future = executor.submit(() -> { + try { + barrier.await(); + } catch (Exception e) { + throw new RuntimeException(e); + } + + for (int i = 0; i < n; i++) { + // expand hashmap + map.put(2, 22); + map.put(3, 33); + assertEquals(map.capacity(), 8); + + // shrink hashmap + map.remove(2, 22); + map.remove(3, 33); + assertEquals(map.capacity(), 4); + } + }); + + future.get(); + assertNull(ex.get()); + // shut down pool + executor.shutdown(); + } + + @Test + public void testExpandShrinkAndClear() { + ConcurrentLongLongHashMap map = ConcurrentLongLongHashMap.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .autoShrink(true) + .mapIdleFactor(0.25f) + .build(); + final long initCapacity = map.capacity(); + assertEquals(4, map.capacity()); + assertEquals(-1, map.put(1, 1)); + assertEquals(-1, map.put(2, 2)); + assertEquals(-1, map.put(3, 3)); + + // expand hashmap + assertEquals(8, map.capacity()); + + assertTrue(map.remove(1, 1)); + // not shrink + assertEquals(8, map.capacity()); + assertTrue(map.remove(2, 2)); + // shrink hashmap + assertEquals(4, map.capacity()); + + assertTrue(map.remove(3, 3)); + // Will not shrink the hashmap again because shrink capacity is less than initCapacity + // current capacity is equal than the initial capacity + assertEquals(map.capacity(), initCapacity); + map.clear(); + // after clear, because current capacity is equal than the initial capacity, so not shrinkToInitCapacity + assertEquals(map.capacity(), initCapacity); + } + @Test public void testRemove() { - ConcurrentLongLongHashMap map = new ConcurrentLongLongHashMap(); + ConcurrentLongLongHashMap map = ConcurrentLongLongHashMap.newBuilder().build(); assertTrue(map.isEmpty()); assertEquals(map.put(1, 11), -1); @@ -116,7 +274,10 @@ public void testRemove() { @Test public void testNegativeUsedBucketCount() { - ConcurrentLongLongHashMap map = new ConcurrentLongLongHashMap(16, 1); + ConcurrentLongLongHashMap map = ConcurrentLongLongHashMap.newBuilder() + .expectedItems(16) + .concurrencyLevel(1) + .build(); map.put(0, 0); assertEquals(1, map.getUsedBucketCount()); @@ -131,7 +292,10 @@ public void testNegativeUsedBucketCount() { @Test public void testRehashing() { int n = 16; - ConcurrentLongLongHashMap map = new ConcurrentLongLongHashMap(n / 2, 1); + ConcurrentLongLongHashMap map = ConcurrentLongLongHashMap.newBuilder() + .expectedItems(n / 2) + .concurrencyLevel(1) + .build(); assertEquals(map.capacity(), n); assertEquals(map.size(), 0); @@ -146,7 +310,10 @@ public void testRehashing() { @Test public void testRehashingWithDeletes() { int n = 16; - ConcurrentLongLongHashMap map = new ConcurrentLongLongHashMap(n / 2, 1); + ConcurrentLongLongHashMap map = ConcurrentLongLongHashMap.newBuilder() + .expectedItems(n / 2) + .concurrencyLevel(1) + .build(); assertEquals(map.capacity(), n); assertEquals(map.size(), 0); @@ -168,7 +335,7 @@ public void testRehashingWithDeletes() { @Test public void concurrentInsertions() throws Throwable { - ConcurrentLongLongHashMap map = new ConcurrentLongLongHashMap(); + ConcurrentLongLongHashMap map = ConcurrentLongLongHashMap.newBuilder().build(); ExecutorService executor = Executors.newCachedThreadPool(); final int nThreads = 16; @@ -180,10 +347,8 @@ public void concurrentInsertions() throws Throwable { final int threadIdx = i; futures.add(executor.submit(() -> { - Random random = new Random(); - for (int j = 0; j < n; j++) { - long key = Math.abs(random.nextLong()); + long key = ThreadLocalRandom.current().nextLong(Long.MAX_VALUE); // Ensure keys are uniques key -= key % (threadIdx + 1); @@ -203,7 +368,7 @@ public void concurrentInsertions() throws Throwable { @Test public void concurrentInsertionsAndReads() throws Throwable { - ConcurrentLongLongHashMap map = new ConcurrentLongLongHashMap(); + ConcurrentLongLongHashMap map = ConcurrentLongLongHashMap.newBuilder().build(); ExecutorService executor = Executors.newCachedThreadPool(); final int nThreads = 16; @@ -215,10 +380,8 @@ public void concurrentInsertionsAndReads() throws Throwable { final int threadIdx = i; futures.add(executor.submit(() -> { - Random random = new Random(); - for (int j = 0; j < n; j++) { - long key = Math.abs(random.nextLong()); + long key = ThreadLocalRandom.current().nextLong(Long.MAX_VALUE); // Ensure keys are uniques key -= key % (threadIdx + 1); @@ -238,7 +401,7 @@ public void concurrentInsertionsAndReads() throws Throwable { @Test public void testIteration() { - ConcurrentLongLongHashMap map = new ConcurrentLongLongHashMap(); + ConcurrentLongLongHashMap map = ConcurrentLongLongHashMap.newBuilder().build(); assertEquals(map.keys(), Collections.emptyList()); assertEquals(map.values(), Collections.emptyList()); @@ -282,7 +445,10 @@ public void testIteration() { @Test public void testHashConflictWithDeletion() { final int buckets = 16; - ConcurrentLongLongHashMap map = new ConcurrentLongLongHashMap(buckets, 1); + ConcurrentLongLongHashMap map = ConcurrentLongLongHashMap.newBuilder() + .expectedItems(buckets) + .concurrencyLevel(1) + .build(); // Pick 2 keys that fall into the same bucket long key1 = 1; @@ -320,7 +486,7 @@ public void testHashConflictWithDeletion() { @Test public void testPutIfAbsent() { - ConcurrentLongLongHashMap map = new ConcurrentLongLongHashMap(); + ConcurrentLongLongHashMap map = ConcurrentLongLongHashMap.newBuilder().build(); assertEquals(map.putIfAbsent(1, 11), -1); assertEquals(map.get(1), 11); @@ -330,7 +496,10 @@ public void testPutIfAbsent() { @Test public void testComputeIfAbsent() { - ConcurrentLongLongHashMap map = new ConcurrentLongLongHashMap(16, 1); + ConcurrentLongLongHashMap map = ConcurrentLongLongHashMap.newBuilder() + .expectedItems(16) + .concurrencyLevel(1) + .build(); AtomicLong counter = new AtomicLong(); LongLongFunction provider = new LongLongFunction() { public long apply(long key) { @@ -353,13 +522,16 @@ public long apply(long key) { @Test public void testAddAndGet() { - ConcurrentLongLongHashMap map = new ConcurrentLongLongHashMap(16, 1); + ConcurrentLongLongHashMap map = ConcurrentLongLongHashMap.newBuilder() + .expectedItems(16) + .concurrencyLevel(1) + .build(); assertEquals(map.addAndGet(0, 0), 0); - assertEquals(map.containsKey(0), true); + assertTrue(map.containsKey(0)); assertEquals(map.get(0), 0); - assertEquals(map.containsKey(5), false); + assertFalse(map.containsKey(5)); assertEquals(map.addAndGet(0, 5), 5); assertEquals(map.get(0), 5); @@ -380,9 +552,31 @@ public void testAddAndGet() { assertEquals(map.get(0), 4); } + @Test + public void testReduceUnnecessaryExpansions() { + ConcurrentLongLongHashMap map = ConcurrentLongLongHashMap.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .build(); + map.put(1L, 1L); + map.put(2L, 2L); + map.put(3L, 3L); + map.put(4L, 4L); + + map.remove(1L); + map.remove(2L); + map.remove(3L); + map.remove(4L); + assertEquals(0, map.getUsedBucketCount()); + } + + @Test public void testRemoveIf() { - ConcurrentLongLongHashMap map = new ConcurrentLongLongHashMap(16, 1); + ConcurrentLongLongHashMap map = ConcurrentLongLongHashMap.newBuilder() + .expectedItems(16) + .concurrencyLevel(1) + .build(); map.put(1L, 1L); map.put(2L, 2L); @@ -399,7 +593,10 @@ public void testRemoveIf() { @Test public void testRemoveIfValue() { - ConcurrentLongLongHashMap map = new ConcurrentLongLongHashMap(16, 1); + ConcurrentLongLongHashMap map = ConcurrentLongLongHashMap.newBuilder() + .expectedItems(16) + .concurrencyLevel(1) + .build(); map.put(1L, 1L); map.put(2L, 2L); @@ -415,8 +612,11 @@ public void testRemoveIfValue() { } @Test - public void testIvalidKeys() { - ConcurrentLongLongHashMap map = new ConcurrentLongLongHashMap(16, 1); + public void testInvalidKeys() { + ConcurrentLongLongHashMap map = ConcurrentLongLongHashMap.newBuilder() + .expectedItems(16) + .concurrencyLevel(1) + .build(); try { map.put(-5, 4); @@ -461,7 +661,10 @@ public long apply(long key) { @Test public void testAsMap() { - ConcurrentLongLongHashMap lmap = new ConcurrentLongLongHashMap(16, 1); + ConcurrentLongLongHashMap lmap = ConcurrentLongLongHashMap.newBuilder() + .expectedItems(16) + .concurrencyLevel(1) + .build(); lmap.put(1, 11); lmap.put(2, 22); lmap.put(3, 33); @@ -473,4 +676,24 @@ public void testAsMap() { assertEquals(map, lmap.asMap()); } + + @Test + public void testSizeInBytes() { + ConcurrentLongLongHashMap lmap = new ConcurrentLongLongHashMap(4, 2); + assertEquals(128, lmap.sizeInBytes()); + lmap.put(1, 1); + assertEquals(128, lmap.sizeInBytes()); + lmap.put(2, 2); + assertEquals(128, lmap.sizeInBytes()); + lmap.put(3, 3); + assertEquals(128, lmap.sizeInBytes()); + lmap.put(4, 4); + assertEquals(192, lmap.sizeInBytes()); + lmap.put(5, 5); + assertEquals(192, lmap.sizeInBytes()); + lmap.put(6, 6); + assertEquals(256, lmap.sizeInBytes()); + lmap.put(7, 7); + assertEquals(256, lmap.sizeInBytes()); + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/ConcurrentLongLongPairHashMapTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/ConcurrentLongLongPairHashMapTest.java index f8bfd52106a..30672837f5a 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/ConcurrentLongLongPairHashMapTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/ConcurrentLongLongPairHashMapTest.java @@ -27,42 +27,51 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; - import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Random; +import java.util.concurrent.CyclicBarrier; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; - +import java.util.concurrent.atomic.AtomicReference; import org.apache.bookkeeper.util.collections.ConcurrentLongLongPairHashMap.LongPair; import org.junit.Test; /** * Test the concurrent long-long pair hashmap class. */ +@SuppressWarnings("deprecation") public class ConcurrentLongLongPairHashMapTest { @Test public void testConstructor() { try { - new ConcurrentLongLongPairHashMap(0); + ConcurrentLongLongPairHashMap.newBuilder() + .expectedItems(0) + .build(); fail("should have thrown exception"); } catch (IllegalArgumentException e) { // ok } try { - new ConcurrentLongLongPairHashMap(16, 0); + ConcurrentLongLongPairHashMap.newBuilder() + .expectedItems(16) + .concurrencyLevel(0) + .build(); fail("should have thrown exception"); } catch (IllegalArgumentException e) { // ok } try { - new ConcurrentLongLongPairHashMap(4, 8); + ConcurrentLongLongPairHashMap.newBuilder() + .expectedItems(4) + .concurrencyLevel(8) + .build(); fail("should have thrown exception"); } catch (IllegalArgumentException e) { // ok @@ -71,8 +80,9 @@ public void testConstructor() { @Test public void simpleInsertions() { - ConcurrentLongLongPairHashMap map = new ConcurrentLongLongPairHashMap(16); - + ConcurrentLongLongPairHashMap map = ConcurrentLongLongPairHashMap.newBuilder() + .expectedItems(16) + .build(); assertTrue(map.isEmpty()); assertTrue(map.put(1, 1, 11, 11)); assertFalse(map.isEmpty()); @@ -99,7 +109,9 @@ public void simpleInsertions() { @Test public void testRemove() { - ConcurrentLongLongPairHashMap map = new ConcurrentLongLongPairHashMap(); + ConcurrentLongLongPairHashMap map = ConcurrentLongLongPairHashMap + .newBuilder() + .build(); assertTrue(map.isEmpty()); assertTrue(map.put(1, 1, 11, 11)); @@ -113,9 +125,158 @@ public void testRemove() { assertTrue(map.isEmpty()); } + @Test + public void testClear() { + ConcurrentLongLongPairHashMap map = ConcurrentLongLongPairHashMap.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .autoShrink(true) + .mapIdleFactor(0.25f) + .build(); + assertTrue(map.capacity() == 4); + + assertTrue(map.put(1, 1, 11, 11)); + assertTrue(map.put(2, 2, 22, 22)); + assertTrue(map.put(3, 3, 33, 33)); + + assertTrue(map.capacity() == 8); + map.clear(); + assertTrue(map.capacity() == 4); + } + + @Test + public void testExpandAndShrink() { + ConcurrentLongLongPairHashMap map = ConcurrentLongLongPairHashMap.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .autoShrink(true) + .mapIdleFactor(0.25f) + .build(); + assertTrue(map.put(1, 1, 11, 11)); + assertTrue(map.put(2, 2, 22, 22)); + assertTrue(map.put(3, 3, 33, 33)); + + // expand hashmap + assertTrue(map.capacity() == 8); + + assertTrue(map.remove(1, 1, 11, 11)); + // not shrink + assertTrue(map.capacity() == 8); + assertTrue(map.remove(2, 2, 22, 22)); + // shrink hashmap + assertTrue(map.capacity() == 4); + + // expand hashmap + assertTrue(map.put(4, 4, 44, 44)); + assertTrue(map.put(5, 5, 55, 55)); + assertTrue(map.capacity() == 8); + + //verify that the map does not keep shrinking at every remove() operation + assertTrue(map.put(6, 6, 66, 66)); + assertTrue(map.remove(6, 6, 66, 66)); + assertTrue(map.capacity() == 8); + } + + @Test + public void testConcurrentExpandAndShrinkAndGet() throws Throwable { + ConcurrentLongLongPairHashMap map = ConcurrentLongLongPairHashMap.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .autoShrink(true) + .mapIdleFactor(0.25f) + .build(); + assertEquals(map.capacity(), 4); + + ExecutorService executor = Executors.newCachedThreadPool(); + final int readThreads = 16; + final int writeThreads = 1; + final int n = 1_000; + CyclicBarrier barrier = new CyclicBarrier(writeThreads + readThreads); + Future future = null; + AtomicReference ex = new AtomicReference<>(); + + for (int i = 0; i < readThreads; i++) { + executor.submit(() -> { + try { + barrier.await(); + } catch (Exception e) { + throw new RuntimeException(e); + } + while (true) { + try { + map.get(1, 1); + } catch (Exception e) { + ex.set(e); + } + } + }); + } + + assertTrue(map.put(1, 1, 11, 11)); + future = executor.submit(() -> { + try { + barrier.await(); + } catch (Exception e) { + throw new RuntimeException(e); + } + + for (int i = 0; i < n; i++) { + // expand hashmap + assertTrue(map.put(2, 2, 22, 22)); + assertTrue(map.put(3, 3, 33, 33)); + assertEquals(map.capacity(), 8); + + // shrink hashmap + assertTrue(map.remove(2, 2, 22, 22)); + assertTrue(map.remove(3, 3, 33, 33)); + assertEquals(map.capacity(), 4); + } + }); + + future.get(); + assertTrue(ex.get() == null); + // shut down pool + executor.shutdown(); + } + + @Test + public void testExpandShrinkAndClear() { + ConcurrentLongLongPairHashMap map = ConcurrentLongLongPairHashMap.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .autoShrink(true) + .mapIdleFactor(0.25f) + .build(); + final long initCapacity = map.capacity(); + assertTrue(map.put(1, 1, 11, 11)); + assertTrue(map.put(2, 2, 22, 22)); + assertTrue(map.put(3, 3, 33, 33)); + + // expand hashmap + assertTrue(map.capacity() == 8); + + assertTrue(map.remove(1, 1, 11, 11)); + // not shrink + assertTrue(map.capacity() == 8); + assertTrue(map.remove(2, 2, 22, 22)); + // shrink hashmap + assertTrue(map.capacity() == 4); + + assertTrue(map.remove(3, 3, 33, 33)); + // Will not shrink the hashmap again because shrink capacity is less than initCapacity + // current capacity is equal than the initial capacity + assertTrue(map.capacity() == initCapacity); + map.clear(); + // after clear, because current capacity is equal than the initial capacity, so not shrinkToInitCapacity + assertTrue(map.capacity() == initCapacity); + } + @Test public void testNegativeUsedBucketCount() { - ConcurrentLongLongPairHashMap map = new ConcurrentLongLongPairHashMap(16, 1); + ConcurrentLongLongPairHashMap map = ConcurrentLongLongPairHashMap.newBuilder() + .expectedItems(16) + .concurrencyLevel(1) + .build(); map.put(0, 0, 0, 0); assertEquals(1, map.getUsedBucketCount()); @@ -127,10 +288,31 @@ public void testNegativeUsedBucketCount() { assertEquals(0, map.getUsedBucketCount()); } + @Test + public void testReduceUnnecessaryExpansions() { + ConcurrentLongLongPairHashMap map = ConcurrentLongLongPairHashMap.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .mapFillFactor(0.9f) + .build(); + map.put(1, 1, 1, 1); + map.put(2, 2, 2, 2); + map.put(3, 3, 3, 3); + + map.remove(1, 1); + map.remove(2, 2); + map.remove(3, 3); + + assertEquals(0, map.getUsedBucketCount()); + } + @Test public void testRehashing() { int n = 16; - ConcurrentLongLongPairHashMap map = new ConcurrentLongLongPairHashMap(n / 2, 1); + ConcurrentLongLongPairHashMap map = ConcurrentLongLongPairHashMap.newBuilder() + .expectedItems(n / 2) + .concurrencyLevel(1) + .build(); assertEquals(map.capacity(), n); assertEquals(map.size(), 0); @@ -145,7 +327,10 @@ public void testRehashing() { @Test public void testRehashingWithDeletes() { int n = 16; - ConcurrentLongLongPairHashMap map = new ConcurrentLongLongPairHashMap(n / 2, 1); + ConcurrentLongLongPairHashMap map = ConcurrentLongLongPairHashMap.newBuilder() + .expectedItems(n / 2) + .concurrencyLevel(1) + .build(); assertEquals(map.capacity(), n); assertEquals(map.size(), 0); @@ -167,7 +352,8 @@ public void testRehashingWithDeletes() { @Test public void concurrentInsertions() throws Throwable { - ConcurrentLongLongPairHashMap map = new ConcurrentLongLongPairHashMap(); + ConcurrentLongLongPairHashMap map = ConcurrentLongLongPairHashMap.newBuilder() + .build(); ExecutorService executor = Executors.newCachedThreadPool(); final int nThreads = 16; @@ -206,7 +392,8 @@ public void concurrentInsertions() throws Throwable { @Test public void concurrentInsertionsAndReads() throws Throwable { - ConcurrentLongLongPairHashMap map = new ConcurrentLongLongPairHashMap(); + ConcurrentLongLongPairHashMap map = ConcurrentLongLongPairHashMap.newBuilder() + .build(); ExecutorService executor = Executors.newCachedThreadPool(); final int nThreads = 16; @@ -245,7 +432,8 @@ public void concurrentInsertionsAndReads() throws Throwable { @Test public void testIteration() { - ConcurrentLongLongPairHashMap map = new ConcurrentLongLongPairHashMap(); + ConcurrentLongLongPairHashMap map = ConcurrentLongLongPairHashMap.newBuilder() + .build(); assertEquals(map.keys(), Collections.emptyList()); assertEquals(map.values(), Collections.emptyList()); @@ -288,7 +476,9 @@ public void testIteration() { @Test public void testPutIfAbsent() { - ConcurrentLongLongPairHashMap map = new ConcurrentLongLongPairHashMap(); + ConcurrentLongLongPairHashMap map = ConcurrentLongLongPairHashMap.newBuilder() + .build(); + assertTrue(map.putIfAbsent(1, 1, 11, 11)); assertEquals(map.get(1, 1), new LongPair(11, 11)); @@ -297,8 +487,12 @@ public void testPutIfAbsent() { } @Test - public void testIvalidKeys() { - ConcurrentLongLongPairHashMap map = new ConcurrentLongLongPairHashMap(16, 1); + public void testInvalidKeys() { + ConcurrentLongLongPairHashMap map = ConcurrentLongLongPairHashMap.newBuilder() + .expectedItems(16) + .concurrencyLevel(1) + .build(); + try { map.put(-5, 3, 4, 4); @@ -331,7 +525,10 @@ public void testIvalidKeys() { @Test public void testAsMap() { - ConcurrentLongLongPairHashMap lmap = new ConcurrentLongLongPairHashMap(16, 1); + ConcurrentLongLongPairHashMap lmap = ConcurrentLongLongPairHashMap.newBuilder() + .expectedItems(16) + .concurrencyLevel(1) + .build(); lmap.put(1, 1, 11, 11); lmap.put(2, 2, 22, 22); lmap.put(3, 3, 33, 33); @@ -343,4 +540,24 @@ public void testAsMap() { assertEquals(map, lmap.asMap()); } + + @Test + public void testSizeInBytes() { + ConcurrentLongLongPairHashMap lmap = new ConcurrentLongLongPairHashMap(4, 2); + assertEquals(256, lmap.sizeInBytes()); + lmap.put(1, 1, 1, 1); + assertEquals(256, lmap.sizeInBytes()); + lmap.put(2, 2, 2, 2); + assertEquals(256, lmap.sizeInBytes()); + lmap.put(3, 3, 3, 3); + assertEquals(256, lmap.sizeInBytes()); + lmap.put(4, 4, 4, 4); + assertEquals(256, lmap.sizeInBytes()); + lmap.put(5, 5, 5, 5); + assertEquals(384, lmap.sizeInBytes()); + lmap.put(6, 6, 6, 6); + assertEquals(512, lmap.sizeInBytes()); + lmap.put(7, 7, 7, 7); + assertEquals(512, lmap.sizeInBytes()); + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/ConcurrentOpenHashMapTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/ConcurrentOpenHashMapTest.java index 20ad0bdffad..a7835e63897 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/ConcurrentOpenHashMapTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/ConcurrentOpenHashMapTest.java @@ -27,21 +27,21 @@ import static org.junit.Assert.fail; import com.google.common.collect.Lists; - import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Random; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.CyclicBarrier; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.BiPredicate; import java.util.function.Function; - import org.junit.Test; /** @@ -52,30 +52,57 @@ public class ConcurrentOpenHashMapTest { @Test public void testConstructor() { try { - new ConcurrentOpenHashMap(0); + ConcurrentOpenHashMap.newBuilder().expectedItems(0).build(); fail("should have thrown exception"); } catch (IllegalArgumentException e) { // ok } try { - new ConcurrentOpenHashMap(16, 0); + ConcurrentOpenHashMap.newBuilder() + .expectedItems(16) + .concurrencyLevel(0) + .build(); fail("should have thrown exception"); } catch (IllegalArgumentException e) { // ok } try { - new ConcurrentOpenHashMap(4, 8); + ConcurrentOpenHashMap.newBuilder() + .expectedItems(4) + .concurrencyLevel(8) + .build(); fail("should have thrown exception"); } catch (IllegalArgumentException e) { // ok } } + @Test + public void testReduceUnnecessaryExpansions() { + ConcurrentOpenHashMap map = ConcurrentOpenHashMap.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .build(); + assertNull(map.put("1", "1")); + assertNull(map.put("2", "2")); + assertNull(map.put("3", "3")); + assertNull(map.put("4", "4")); + + assertEquals(map.remove("1"), "1"); + assertEquals(map.remove("2"), "2"); + assertEquals(map.remove("3"), "3"); + assertEquals(map.remove("4"), "4"); + + assertEquals(0, map.getUsedBucketCount()); + } + @Test public void simpleInsertions() { - ConcurrentOpenHashMap map = new ConcurrentOpenHashMap<>(16); + ConcurrentOpenHashMap map = ConcurrentOpenHashMap.newBuilder() + .expectedItems(16) + .build(); assertTrue(map.isEmpty()); assertNull(map.put("1", "one")); @@ -101,9 +128,159 @@ public void simpleInsertions() { assertEquals(map.size(), 3); } + @Test + public void testClear() { + ConcurrentOpenHashMap map = ConcurrentOpenHashMap.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .autoShrink(true) + .mapIdleFactor(0.25f) + .build(); + assertTrue(map.capacity() == 4); + + assertNull(map.put("k1", "v1")); + assertNull(map.put("k2", "v2")); + assertNull(map.put("k3", "v3")); + + assertTrue(map.capacity() == 8); + map.clear(); + assertTrue(map.capacity() == 4); + } + + @Test + public void testExpandAndShrink() { + ConcurrentOpenHashMap map = ConcurrentOpenHashMap.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .autoShrink(true) + .mapIdleFactor(0.25f) + .build(); + assertTrue(map.capacity() == 4); + + assertNull(map.put("k1", "v1")); + assertNull(map.put("k2", "v2")); + assertNull(map.put("k3", "v3")); + + // expand hashmap + assertTrue(map.capacity() == 8); + + assertTrue(map.remove("k1", "v1")); + // not shrink + assertTrue(map.capacity() == 8); + assertTrue(map.remove("k2", "v2")); + // shrink hashmap + assertTrue(map.capacity() == 4); + + // expand hashmap + assertNull(map.put("k4", "v4")); + assertNull(map.put("k5", "v5")); + assertTrue(map.capacity() == 8); + + //verify that the map does not keep shrinking at every remove() operation + assertNull(map.put("k6", "v6")); + assertTrue(map.remove("k6", "v6")); + assertTrue(map.capacity() == 8); + } + + @Test + public void testConcurrentExpandAndShrinkAndGet() throws Throwable { + ConcurrentOpenHashMap map = ConcurrentOpenHashMap.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .autoShrink(true) + .mapIdleFactor(0.25f) + .build(); + assertEquals(map.capacity(), 4); + + ExecutorService executor = Executors.newCachedThreadPool(); + final int readThreads = 16; + final int writeThreads = 1; + final int n = 1_000; + CyclicBarrier barrier = new CyclicBarrier(writeThreads + readThreads); + Future future = null; + AtomicReference ex = new AtomicReference<>(); + + for (int i = 0; i < readThreads; i++) { + executor.submit(() -> { + try { + barrier.await(); + } catch (Exception e) { + throw new RuntimeException(e); + } + while (true) { + try { + map.get("k3"); + } catch (Exception e) { + ex.set(e); + } + } + }); + } + + assertNull(map.put("k1", "v1")); + future = executor.submit(() -> { + try { + barrier.await(); + } catch (Exception e) { + throw new RuntimeException(e); + } + + for (int i = 0; i < n; i++) { + // expand hashmap + assertNull(map.put("k2", "v2")); + assertNull(map.put("k3", "v3")); + assertEquals(map.capacity(), 8); + + // shrink hashmap + assertTrue(map.remove("k2", "v2")); + assertTrue(map.remove("k3", "v3")); + assertEquals(map.capacity(), 4); + } + }); + + future.get(); + assertTrue(ex.get() == null); + // shut down pool + executor.shutdown(); + } + + @Test + public void testExpandShrinkAndClear() { + ConcurrentOpenHashMap map = ConcurrentOpenHashMap.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .autoShrink(true) + .mapIdleFactor(0.25f) + .build(); + final long initCapacity = map.capacity(); + assertTrue(map.capacity() == 4); + assertNull(map.put("k1", "v1")); + assertNull(map.put("k2", "v2")); + assertNull(map.put("k3", "v3")); + + // expand hashmap + assertTrue(map.capacity() == 8); + + assertTrue(map.remove("k1", "v1")); + // not shrink + assertTrue(map.capacity() == 8); + assertTrue(map.remove("k2", "v2")); + // shrink hashmap + assertTrue(map.capacity() == 4); + + assertTrue(map.remove("k3", "v3")); + // Will not shrink the hashmap again because shrink capacity is less than initCapacity + // current capacity is equal than the initial capacity + assertTrue(map.capacity() == initCapacity); + map.clear(); + // after clear, because current capacity is equal than the initial capacity, so not shrinkToInitCapacity + assertTrue(map.capacity() == initCapacity); + } + @Test public void testRemove() { - ConcurrentOpenHashMap map = new ConcurrentOpenHashMap<>(); + ConcurrentOpenHashMap map = + ConcurrentOpenHashMap.newBuilder().build(); assertTrue(map.isEmpty()); assertNull(map.put("1", "one")); @@ -120,7 +297,10 @@ public void testRemove() { @Test public void testRehashing() { int n = 16; - ConcurrentOpenHashMap map = new ConcurrentOpenHashMap<>(n / 2, 1); + ConcurrentOpenHashMap map = ConcurrentOpenHashMap.newBuilder() + .expectedItems(n / 2) + .concurrencyLevel(1) + .build(); assertEquals(map.capacity(), n); assertEquals(map.size(), 0); @@ -135,7 +315,10 @@ public void testRehashing() { @Test public void testRehashingWithDeletes() { int n = 16; - ConcurrentOpenHashMap map = new ConcurrentOpenHashMap<>(n / 2, 1); + ConcurrentOpenHashMap map = ConcurrentOpenHashMap.newBuilder() + .expectedItems(n / 2) + .concurrencyLevel(1) + .build(); assertEquals(map.capacity(), n); assertEquals(map.size(), 0); @@ -157,7 +340,8 @@ public void testRehashingWithDeletes() { @Test public void concurrentInsertions() throws Throwable { - ConcurrentOpenHashMap map = new ConcurrentOpenHashMap<>(); + ConcurrentOpenHashMap map = + ConcurrentOpenHashMap.newBuilder().build(); ExecutorService executor = Executors.newCachedThreadPool(); final int nThreads = 16; @@ -192,7 +376,8 @@ public void concurrentInsertions() throws Throwable { @Test public void concurrentInsertionsAndReads() throws Throwable { - ConcurrentOpenHashMap map = new ConcurrentOpenHashMap<>(); + ConcurrentOpenHashMap map = + ConcurrentOpenHashMap.newBuilder().build(); ExecutorService executor = Executors.newCachedThreadPool(); final int nThreads = 16; @@ -227,7 +412,8 @@ public void concurrentInsertionsAndReads() throws Throwable { @Test public void testIteration() { - ConcurrentOpenHashMap map = new ConcurrentOpenHashMap<>(); + ConcurrentOpenHashMap map = + ConcurrentOpenHashMap.newBuilder().build(); assertEquals(map.keys(), Collections.emptyList()); assertEquals(map.values(), Collections.emptyList()); @@ -271,7 +457,10 @@ public void testIteration() { @Test public void testHashConflictWithDeletion() { final int buckets = 16; - ConcurrentOpenHashMap map = new ConcurrentOpenHashMap<>(buckets, 1); + ConcurrentOpenHashMap map = ConcurrentOpenHashMap.newBuilder() + .expectedItems(buckets) + .concurrencyLevel(1) + .build(); // Pick 2 keys that fall into the same bucket long key1 = 1; @@ -304,7 +493,8 @@ public void testHashConflictWithDeletion() { @Test public void testPutIfAbsent() { - ConcurrentOpenHashMap map = new ConcurrentOpenHashMap<>(); + ConcurrentOpenHashMap map = + ConcurrentOpenHashMap.newBuilder().build(); assertEquals(map.putIfAbsent(1L, "one"), null); assertEquals(map.get(1L), "one"); @@ -314,7 +504,10 @@ public void testPutIfAbsent() { @Test public void testComputeIfAbsent() { - ConcurrentOpenHashMap map = new ConcurrentOpenHashMap<>(16, 1); + ConcurrentOpenHashMap map = ConcurrentOpenHashMap.newBuilder() + .expectedItems(16) + .concurrencyLevel(1) + .build(); AtomicInteger counter = new AtomicInteger(); Function provider = key -> counter.getAndIncrement(); @@ -333,7 +526,8 @@ public void testComputeIfAbsent() { @Test public void testRemoval() { - ConcurrentOpenHashMap map = new ConcurrentOpenHashMap<>(); + ConcurrentOpenHashMap map = + ConcurrentOpenHashMap.newBuilder().build(); map.put(0, "0"); map.put(1, "1"); map.put(3, "3"); @@ -381,7 +575,8 @@ public boolean equals(Object obj) { } } - ConcurrentOpenHashMap map = new ConcurrentOpenHashMap<>(); + ConcurrentOpenHashMap map = + ConcurrentOpenHashMap.newBuilder().build(); T t1 = new T(1); T t1B = new T(1); @@ -407,7 +602,10 @@ public boolean equals(Object obj) { public void benchConcurrentOpenHashMap() throws Exception { // public static void main(String args[]) { - ConcurrentOpenHashMap map = new ConcurrentOpenHashMap<>(N, 1); + ConcurrentOpenHashMap map = ConcurrentOpenHashMap.newBuilder() + .expectedItems(N) + .concurrencyLevel(1) + .build(); for (long i = 0; i < Iterations; i++) { for (int j = 0; j < N; j++) { diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/ConcurrentOpenHashSetTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/ConcurrentOpenHashSetTest.java index d4f74f913b1..8840eacb09d 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/ConcurrentOpenHashSetTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/ConcurrentOpenHashSetTest.java @@ -26,15 +26,15 @@ import static org.junit.Assert.fail; import com.google.common.collect.Lists; - import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Random; +import java.util.concurrent.CyclicBarrier; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; - +import java.util.concurrent.atomic.AtomicReference; import org.junit.Test; /** @@ -45,21 +45,29 @@ public class ConcurrentOpenHashSetTest { @Test public void testConstructor() { try { - new ConcurrentOpenHashSet(0); + ConcurrentOpenHashSet.newBuilder() + .expectedItems(0) + .build(); fail("should have thrown exception"); } catch (IllegalArgumentException e) { // ok } try { - new ConcurrentOpenHashSet(16, 0); + ConcurrentOpenHashSet.newBuilder() + .expectedItems(14) + .concurrencyLevel(0) + .build(); fail("should have thrown exception"); } catch (IllegalArgumentException e) { // ok } try { - new ConcurrentOpenHashSet(4, 8); + ConcurrentOpenHashSet.newBuilder() + .expectedItems(4) + .concurrencyLevel(8) + .build(); fail("should have thrown exception"); } catch (IllegalArgumentException e) { // ok @@ -68,7 +76,9 @@ public void testConstructor() { @Test public void simpleInsertions() { - ConcurrentOpenHashSet set = new ConcurrentOpenHashSet<>(16); + ConcurrentOpenHashSet set = ConcurrentOpenHashSet.newBuilder() + .expectedItems(16) + .build(); assertTrue(set.isEmpty()); assertTrue(set.add("1")); @@ -94,9 +104,180 @@ public void simpleInsertions() { assertEquals(set.size(), 3); } + @Test + public void testClear() { + ConcurrentOpenHashSet map = ConcurrentOpenHashSet.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .autoShrink(true) + .mapIdleFactor(0.25f) + .build(); + assertTrue(map.capacity() == 4); + + assertTrue(map.add("k1")); + assertTrue(map.add("k2")); + assertTrue(map.add("k3")); + + assertTrue(map.capacity() == 8); + map.clear(); + assertTrue(map.capacity() == 4); + } + + @Test + public void testExpandAndShrink() { + ConcurrentOpenHashSet map = ConcurrentOpenHashSet.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .autoShrink(true) + .mapIdleFactor(0.25f) + .build(); + assertTrue(map.capacity() == 4); + + assertTrue(map.add("k1")); + assertTrue(map.add("k2")); + assertTrue(map.add("k3")); + + // expand hashmap + assertTrue(map.capacity() == 8); + + assertTrue(map.remove("k1")); + // not shrink + assertTrue(map.capacity() == 8); + assertTrue(map.remove("k2")); + // shrink hashmap + assertTrue(map.capacity() == 4); + + // expand hashmap + assertTrue(map.add("k4")); + assertTrue(map.add("k5")); + assertTrue(map.capacity() == 8); + + //verify that the map does not keep shrinking at every remove() operation + assertTrue(map.add("k6")); + assertTrue(map.remove("k6")); + assertTrue(map.capacity() == 8); + } + + @Test + public void testExpandShrinkAndClear() { + ConcurrentOpenHashSet map = ConcurrentOpenHashSet.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .autoShrink(true) + .mapIdleFactor(0.25f) + .build(); + final long initCapacity = map.capacity(); + assertTrue(map.capacity() == 4); + + assertTrue(map.add("k1")); + assertTrue(map.add("k2")); + assertTrue(map.add("k3")); + + // expand hashmap + assertTrue(map.capacity() == 8); + + assertTrue(map.remove("k1")); + // not shrink + assertTrue(map.capacity() == 8); + assertTrue(map.remove("k2")); + // shrink hashmap + assertTrue(map.capacity() == 4); + + assertTrue(map.remove("k3")); + // Will not shrink the hashmap again because shrink capacity is less than initCapacity + // current capacity is equal than the initial capacity + assertTrue(map.capacity() == initCapacity); + map.clear(); + // after clear, because current capacity is equal than the initial capacity, so not shrinkToInitCapacity + assertTrue(map.capacity() == initCapacity); + } + + @Test + public void testConcurrentExpandAndShrinkAndGet() throws Throwable { + ConcurrentOpenHashSet set = ConcurrentOpenHashSet.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .autoShrink(true) + .mapIdleFactor(0.25f) + .build(); + assertEquals(set.capacity(), 4); + + ExecutorService executor = Executors.newCachedThreadPool(); + final int readThreads = 16; + final int writeThreads = 1; + final int n = 1_000; + CyclicBarrier barrier = new CyclicBarrier(writeThreads + readThreads); + Future future = null; + AtomicReference ex = new AtomicReference<>(); + + for (int i = 0; i < readThreads; i++) { + executor.submit(() -> { + try { + barrier.await(); + } catch (Exception e) { + throw new RuntimeException(e); + } + while (true) { + try { + set.contains("k2"); + } catch (Exception e) { + ex.set(e); + } + } + }); + } + + assertTrue(set.add("k1")); + future = executor.submit(() -> { + try { + barrier.await(); + } catch (Exception e) { + throw new RuntimeException(e); + } + + for (int i = 0; i < n; i++) { + // expand hashmap + assertTrue(set.add("k2")); + assertTrue(set.add("k3")); + assertEquals(set.capacity(), 8); + + // shrink hashmap + assertTrue(set.remove("k2")); + assertTrue(set.remove("k3")); + assertEquals(set.capacity(), 4); + } + }); + + future.get(); + assertTrue(ex.get() == null); + // shut down pool + executor.shutdown(); + } + + @Test + public void testReduceUnnecessaryExpansions(){ + ConcurrentOpenHashSet set = + ConcurrentOpenHashSet.newBuilder() + .expectedItems(2) + .concurrencyLevel(1) + .build(); + + assertTrue(set.add("1")); + assertTrue(set.add("2")); + assertTrue(set.add("3")); + assertTrue(set.add("4")); + + assertTrue(set.remove("1")); + assertTrue(set.remove("2")); + assertTrue(set.remove("3")); + assertTrue(set.remove("4")); + assertEquals(0, set.getUsedBucketCount()); + } + @Test public void testRemove() { - ConcurrentOpenHashSet set = new ConcurrentOpenHashSet<>(); + ConcurrentOpenHashSet set = + ConcurrentOpenHashSet.newBuilder().build(); assertTrue(set.isEmpty()); assertTrue(set.add("1")); @@ -111,7 +292,10 @@ public void testRemove() { @Test public void testRehashing() { int n = 16; - ConcurrentOpenHashSet set = new ConcurrentOpenHashSet<>(n / 2, 1); + ConcurrentOpenHashSet set = ConcurrentOpenHashSet.newBuilder() + .expectedItems(n / 2) + .concurrencyLevel(1) + .build(); assertEquals(set.capacity(), n); assertEquals(set.size(), 0); @@ -126,7 +310,10 @@ public void testRehashing() { @Test public void testRehashingWithDeletes() { int n = 16; - ConcurrentOpenHashSet set = new ConcurrentOpenHashSet<>(n / 2, 1); + ConcurrentOpenHashSet set = ConcurrentOpenHashSet.newBuilder() + .expectedItems(n / 2) + .concurrencyLevel(1) + .build(); assertEquals(set.capacity(), n); assertEquals(set.size(), 0); @@ -148,7 +335,8 @@ public void testRehashingWithDeletes() { @Test public void concurrentInsertions() throws Throwable { - ConcurrentOpenHashSet set = new ConcurrentOpenHashSet<>(); + ConcurrentOpenHashSet set = + ConcurrentOpenHashSet.newBuilder().build(); ExecutorService executor = Executors.newCachedThreadPool(); final int nThreads = 16; @@ -182,7 +370,8 @@ public void concurrentInsertions() throws Throwable { @Test public void concurrentInsertionsAndReads() throws Throwable { - ConcurrentOpenHashSet map = new ConcurrentOpenHashSet<>(); + ConcurrentOpenHashSet map = + ConcurrentOpenHashSet.newBuilder().build(); ExecutorService executor = Executors.newCachedThreadPool(); final int nThreads = 16; @@ -216,7 +405,8 @@ public void concurrentInsertionsAndReads() throws Throwable { @Test public void testIteration() { - ConcurrentOpenHashSet set = new ConcurrentOpenHashSet<>(); + ConcurrentOpenHashSet set = + ConcurrentOpenHashSet.newBuilder().build(); assertEquals(set.values(), Collections.emptyList()); @@ -243,7 +433,10 @@ public void testIteration() { @Test public void testHashConflictWithDeletion() { final int buckets = 16; - ConcurrentOpenHashSet set = new ConcurrentOpenHashSet<>(buckets, 1); + ConcurrentOpenHashSet set = ConcurrentOpenHashSet.newBuilder() + .expectedItems(buckets) + .concurrencyLevel(1) + .build(); // Pick 2 keys that fall into the same bucket long key1 = 1; @@ -298,7 +491,8 @@ public boolean equals(Object obj) { } } - ConcurrentOpenHashSet set = new ConcurrentOpenHashSet<>(); + ConcurrentOpenHashSet set = + ConcurrentOpenHashSet.newBuilder().build(); T t1 = new T(1); T t1B = new T(1); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/GrowableArrayBlockingQueueTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/GrowableArrayBlockingQueueTest.java deleted file mode 100644 index 81364922425..00000000000 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/GrowableArrayBlockingQueueTest.java +++ /dev/null @@ -1,209 +0,0 @@ -/* - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * - */ -package org.apache.bookkeeper.util.collections; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import com.google.common.collect.Lists; - -import java.util.ArrayList; -import java.util.List; -import java.util.NoSuchElementException; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; - -import org.junit.Test; - -/** - * Test the growable array blocking queue. - */ -public class GrowableArrayBlockingQueueTest { - - @Test - public void simple() throws Exception { - BlockingQueue queue = new GrowableArrayBlockingQueue<>(4); - - assertEquals(null, queue.poll()); - - assertEquals(Integer.MAX_VALUE, queue.remainingCapacity()); - assertEquals("[]", queue.toString()); - - try { - queue.element(); - fail("Should have thrown exception"); - } catch (NoSuchElementException e) { - // Expected - } - - try { - queue.iterator(); - fail("Should have thrown exception"); - } catch (UnsupportedOperationException e) { - // Expected - } - - // Test index rollover - for (int i = 0; i < 100; i++) { - queue.add(i); - - assertEquals(i, queue.take().intValue()); - } - - queue.offer(1); - assertEquals("[1]", queue.toString()); - queue.offer(2); - assertEquals("[1, 2]", queue.toString()); - queue.offer(3); - assertEquals("[1, 2, 3]", queue.toString()); - queue.offer(4); - assertEquals("[1, 2, 3, 4]", queue.toString()); - - assertEquals(4, queue.size()); - - List list = new ArrayList<>(); - queue.drainTo(list, 3); - - assertEquals(1, queue.size()); - assertEquals(Lists.newArrayList(1, 2, 3), list); - assertEquals("[4]", queue.toString()); - assertEquals(4, queue.peek().intValue()); - - assertEquals(4, queue.element().intValue()); - assertEquals(4, queue.remove().intValue()); - try { - queue.remove(); - fail("Should have thrown exception"); - } catch (NoSuchElementException e) { - // Expected - } - } - - @Test - public void blockingTake() throws Exception { - BlockingQueue queue = new GrowableArrayBlockingQueue<>(); - - CountDownLatch latch = new CountDownLatch(1); - - new Thread(() -> { - try { - int expected = 0; - - for (int i = 0; i < 100; i++) { - int n = queue.take(); - - assertEquals(expected++, n); - } - - latch.countDown(); - } catch (Exception e) { - e.printStackTrace(); - } - }).start(); - - int n = 0; - for (int i = 0; i < 10; i++) { - for (int j = 0; j < 10; j++) { - queue.put(n); - ++n; - } - - // Wait until all the entries are consumed - while (!queue.isEmpty()) { - Thread.sleep(1); - } - } - - latch.await(); - } - - @Test - public void growArray() throws Exception { - BlockingQueue queue = new GrowableArrayBlockingQueue<>(4); - - assertEquals(null, queue.poll()); - - assertTrue(queue.offer(1)); - assertTrue(queue.offer(2)); - assertTrue(queue.offer(3)); - assertTrue(queue.offer(4)); - assertTrue(queue.offer(5)); - - assertEquals(5, queue.size()); - - queue.clear(); - assertEquals(0, queue.size()); - - assertTrue(queue.offer(1, 1, TimeUnit.SECONDS)); - assertTrue(queue.offer(2, 1, TimeUnit.SECONDS)); - assertTrue(queue.offer(3, 1, TimeUnit.SECONDS)); - assertEquals(3, queue.size()); - - List list = new ArrayList<>(); - queue.drainTo(list); - assertEquals(0, queue.size()); - - assertEquals(Lists.newArrayList(1, 2, 3), list); - } - - @Test - public void pollTimeout() throws Exception { - BlockingQueue queue = new GrowableArrayBlockingQueue<>(4); - - assertEquals(null, queue.poll(1, TimeUnit.MILLISECONDS)); - - queue.put(1); - assertEquals(1, queue.poll(1, TimeUnit.MILLISECONDS).intValue()); - - // 0 timeout should not block - assertEquals(null, queue.poll(0, TimeUnit.HOURS)); - - queue.put(2); - queue.put(3); - assertEquals(2, queue.poll(1, TimeUnit.HOURS).intValue()); - assertEquals(3, queue.poll(1, TimeUnit.HOURS).intValue()); - } - - @Test - public void pollTimeout2() throws Exception { - BlockingQueue queue = new GrowableArrayBlockingQueue<>(); - - CountDownLatch latch = new CountDownLatch(1); - - new Thread(() -> { - try { - queue.poll(1, TimeUnit.HOURS); - - latch.countDown(); - } catch (Exception e) { - e.printStackTrace(); - } - }).start(); - - // Make sure background thread is waiting on poll - Thread.sleep(100); - queue.put(1); - - latch.await(); - } -} diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/SynchronizedHashMultiMapTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/SynchronizedHashMultiMapTest.java index f9ab747ee60..c58093848b6 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/SynchronizedHashMultiMapTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/util/collections/SynchronizedHashMultiMapTest.java @@ -21,7 +21,6 @@ package org.apache.bookkeeper.util.collections; import java.util.Optional; - import org.junit.Assert; import org.junit.Test; diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/verifier/BookkeeperVerifierTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/verifier/BookkeeperVerifierTest.java index d282bfdc0fc..6681c018b81 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/verifier/BookkeeperVerifierTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/verifier/BookkeeperVerifierTest.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/zookeeper/MockZooKeeperTestCase.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/zookeeper/MockZooKeeperTestCase.java index 4190bf5c427..73f6762f963 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/zookeeper/MockZooKeeperTestCase.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/zookeeper/MockZooKeeperTestCase.java @@ -23,6 +23,7 @@ import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockStatic; import com.google.common.collect.Maps; import java.util.HashSet; @@ -40,13 +41,14 @@ import org.apache.zookeeper.AsyncCallback.StringCallback; import org.apache.zookeeper.AsyncCallback.VoidCallback; import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher; import org.apache.zookeeper.Watcher.Event.EventType; import org.apache.zookeeper.Watcher.Event.KeeperState; import org.apache.zookeeper.ZooKeeper; import org.apache.zookeeper.data.Stat; -import org.powermock.api.mockito.PowerMockito; +import org.mockito.MockedStatic; /** * A test base that provides mocked zookeeper. @@ -57,11 +59,12 @@ public abstract class MockZooKeeperTestCase { protected ZooKeeper mockZk; protected ScheduledExecutorService zkCallbackExecutor; protected MockExecutorController zkCallbackController; + private MockedStatic zkUtilsMockedStatic; protected void setup() throws Exception { this.mockZk = mock(ZooKeeper.class); - PowerMockito.mockStatic(ZkUtils.class); + this.zkUtilsMockedStatic = mockStatic(ZkUtils.class); this.zkCallbackExecutor = mock(ScheduledExecutorService.class); this.zkCallbackController = new MockExecutorController() @@ -71,6 +74,10 @@ protected void setup() throws Exception { .controlScheduleAtFixedRate(zkCallbackExecutor, 10); } + protected void teardown() throws Exception { + zkUtilsMockedStatic.close(); + } + private void addWatcher(String path, Watcher watcher) { if (null == watcher) { return; @@ -83,32 +90,42 @@ private void addWatcher(String path, Watcher watcher) { watcherSet.add(watcher); } + private void removeWatcher(String path, Watcher watcher) { + if (watcher == null) { + return; + } + Set watcherSet = watchers.get(path); + if (null == watcherSet) { + return; + } + watcherSet.remove(watcher); + if (watcherSet.isEmpty()) { + watchers.remove(path); + } + } + protected void mockZkUtilsAsyncCreateFullPathOptimistic( String expectedLedgerPath, CreateMode expectedCreateMode, int retCode, String retCreatedZnodeName ) throws Exception { - - PowerMockito.doAnswer(invocationOnMock -> { + zkUtilsMockedStatic.when(() -> ZkUtils.asyncCreateFullPathOptimistic( + eq(mockZk), + eq(expectedLedgerPath), + any(byte[].class), + anyList(), + eq(expectedCreateMode), + any(StringCallback.class), + any())).thenAnswer(invocationOnMock -> { String path = invocationOnMock.getArgument(1); StringCallback callback = invocationOnMock.getArgument(5); Object ctx = invocationOnMock.getArgument(6); callback.processResult( - retCode, path, ctx, retCreatedZnodeName); + retCode, path, ctx, retCreatedZnodeName); return null; - }).when( - ZkUtils.class, - "asyncCreateFullPathOptimistic", - eq(mockZk), - eq(expectedLedgerPath), - any(byte[].class), - anyList(), - eq(expectedCreateMode), - any(StringCallback.class), - any()); - + }); } protected void mockZkDelete( @@ -140,23 +157,19 @@ protected void mockZkUtilsAsyncDeleteFullPathOptimistic( int expectedZnodeVersion, int retCode ) throws Exception { - - PowerMockito.doAnswer(invocationOnMock -> { + zkUtilsMockedStatic.when(() -> ZkUtils.asyncDeleteFullPathOptimistic( + eq(mockZk), + eq(expectedLedgerPath), + eq(expectedZnodeVersion), + any(VoidCallback.class), + eq(expectedLedgerPath))).thenAnswer(invocationOnMock -> { String path = invocationOnMock.getArgument(1); VoidCallback callback = invocationOnMock.getArgument(3); callback.processResult( retCode, path, null); return null; - }).when( - ZkUtils.class, - "asyncDeleteFullPathOptimistic", - eq(mockZk), - eq(expectedLedgerPath), - eq(expectedZnodeVersion), - any(VoidCallback.class), - eq(expectedLedgerPath)); - + }); } protected void mockZkGetData( @@ -187,7 +200,24 @@ protected void mockZkGetData( expectedWatcher ? any(Watcher.class) : eq(null), any(DataCallback.class), any()); + } + + protected void mockZkRemoveWatcher () throws Exception { + doAnswer(invocationOnMock -> { + String path = invocationOnMock.getArgument(0); + Watcher watcher = invocationOnMock.getArgument(1); + VoidCallback callback = invocationOnMock.getArgument(4); + removeWatcher(path, watcher); + callback.processResult(KeeperException.Code.OK.intValue(), path, null); + return null; + }).when(mockZk).removeWatches( + any(String.class), + any(Watcher.class), + any(Watcher.WatcherType.class), + any(Boolean.class), + any(VoidCallback.class), + any()); } protected void mockZkSetData( diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/zookeeper/TestRetryPolicy.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/zookeeper/TestRetryPolicy.java index 2ed3eee2528..030a7154d99 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/zookeeper/TestRetryPolicy.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/zookeeper/TestRetryPolicy.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,6 +20,8 @@ */ package org.apache.bookkeeper.zookeeper; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import org.junit.Test; @@ -55,4 +57,32 @@ public void testBoundExponentialBackoffRetryPolicy() throws Exception { assertTimeRange(policy.nextRetryWaitTime(128, 2000), 1000L, 2000L); assertTimeRange(policy.nextRetryWaitTime(129, 2000), 1000L, 2000L); } + + @Test + public void testExponentialBackoffWithDeadlineRetryPolicy() throws Exception { + RetryPolicy policy = new ExponentialBackOffWithDeadlinePolicy(100, 55 * 1000, 20); + + // Retries are allowed as long as we don't exceed the limits of retry count and deadline + assertTrue(policy.allowRetry(1, 5 * 1000)); + assertTrue(policy.allowRetry(4, 20 * 1000)); + assertTrue(policy.allowRetry(10, 50 * 1000)); + + assertFalse(policy.allowRetry(0, 60 * 1000)); + assertFalse(policy.allowRetry(22, 20 * 1000)); + assertFalse(policy.allowRetry(22, 60 * 1000)); + + // Verify that the wait times are in the range and with the excepted jitter, until deadline is exceeded + assertTimeRange(policy.nextRetryWaitTime(0, 0), 0, 0); + assertTimeRange(policy.nextRetryWaitTime(1, 0), 100, 110); + assertTimeRange(policy.nextRetryWaitTime(1, 53 * 1000), 100, 110); + assertTimeRange(policy.nextRetryWaitTime(2, 0), 200, 220); + assertTimeRange(policy.nextRetryWaitTime(3, 0), 300, 330); + assertTimeRange(policy.nextRetryWaitTime(3, 53 * 1000), 300, 330); + assertTimeRange(policy.nextRetryWaitTime(4, 0), 500, 550); + assertTimeRange(policy.nextRetryWaitTime(5, 0), 500, 550); + + // Verify that the final attempt is triggered at deadline. + assertEquals(2000, policy.nextRetryWaitTime(10, 53 * 1000)); + assertEquals(4000, policy.nextRetryWaitTime(15, 51 * 1000)); + } } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/zookeeper/TestZKClientBoundExpBackoffRP.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/zookeeper/TestZKClientBoundExpBackoffRP.java new file mode 100644 index 00000000000..ddf046df440 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/zookeeper/TestZKClientBoundExpBackoffRP.java @@ -0,0 +1,51 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.zookeeper; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import org.apache.bookkeeper.test.ZooKeeperCluster; +import org.apache.bookkeeper.test.ZooKeeperClusterUtil; +import org.apache.bookkeeper.test.ZooKeeperUtil; +import org.apache.zookeeper.KeeperException; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +/** + * Test zk client resiliency with BoundExponentialBackoffRetryPolicy. + */ +@RunWith(Parameterized.class) +public class TestZKClientBoundExpBackoffRP extends TestZooKeeperClient { + + public TestZKClientBoundExpBackoffRP(Class zooKeeperUtilClass, + Class retryPolicyClass) + throws IOException, KeeperException, InterruptedException { + super(zooKeeperUtilClass, retryPolicyClass); + } + + @Parameterized.Parameters + public static Collection zooKeeperUtilClass() { + return Arrays.asList(new Object[][] { { ZooKeeperUtil.class, BoundExponentialBackoffRetryPolicy.class }, + { ZooKeeperClusterUtil.class, BoundExponentialBackoffRetryPolicy.class } }); + } + +} \ No newline at end of file diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/zookeeper/TestZKClientExpBackoffWithDeadlineRP.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/zookeeper/TestZKClientExpBackoffWithDeadlineRP.java new file mode 100644 index 00000000000..73bd3909e3c --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/zookeeper/TestZKClientExpBackoffWithDeadlineRP.java @@ -0,0 +1,51 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.zookeeper; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import org.apache.bookkeeper.test.ZooKeeperCluster; +import org.apache.bookkeeper.test.ZooKeeperClusterUtil; +import org.apache.bookkeeper.test.ZooKeeperUtil; +import org.apache.zookeeper.KeeperException; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +/** + * Test zk client resiliency with ExponentialBackOffWithDeadlinePolicy. + */ +@RunWith(Parameterized.class) +public class TestZKClientExpBackoffWithDeadlineRP extends TestZooKeeperClient { + + public TestZKClientExpBackoffWithDeadlineRP(Class zooKeeperUtilClass, + Class retryPolicyClass) + throws IOException, KeeperException, InterruptedException { + super(zooKeeperUtilClass, retryPolicyClass); + } + + @Parameterized.Parameters + public static Collection zooKeeperUtilClass() { + return Arrays.asList(new Object[][] { { ZooKeeperUtil.class, ExponentialBackOffWithDeadlinePolicy.class }, + { ZooKeeperClusterUtil.class, ExponentialBackOffWithDeadlinePolicy.class } }); + } + +} \ No newline at end of file diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/zookeeper/TestZooKeeperClient.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/zookeeper/TestZooKeeperClient.java index bb9554ac2e4..d06892b27d7 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/zookeeper/TestZooKeeperClient.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/zookeeper/TestZooKeeperClient.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -28,10 +28,10 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; - import junit.framework.TestCase; - import org.apache.bookkeeper.stats.NullStatsLogger; +import org.apache.bookkeeper.test.ZooKeeperCluster; +import org.apache.bookkeeper.test.ZooKeeperClusterUtil; import org.apache.bookkeeper.test.ZooKeeperUtil; import org.apache.zookeeper.AsyncCallback; import org.apache.zookeeper.AsyncCallback.ACLCallback; @@ -54,30 +54,55 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Test the wrapper of {@link org.apache.zookeeper.ZooKeeper} client. */ -public class TestZooKeeperClient extends TestCase { +@RunWith(Parameterized.class) +public abstract class TestZooKeeperClient extends TestCase { + + static { + ZooKeeperClusterUtil.enableZookeeperTestEnvVariables(); + } private static final Logger logger = LoggerFactory.getLogger(TestZooKeeperClient.class); // ZooKeeper related variables - protected ZooKeeperUtil zkUtil = new ZooKeeperUtil(); + protected ZooKeeperCluster zkUtil; + private RetryPolicy retryPolicy; + + public TestZooKeeperClient(Class zooKeeperUtilClass, + Class retryPolicyClass) + throws IOException, KeeperException, InterruptedException { + if (zooKeeperUtilClass.equals(ZooKeeperUtil.class)) { + zkUtil = new ZooKeeperUtil(); + } else { + zkUtil = new ZooKeeperClusterUtil(3); + } + if (retryPolicyClass.equals(BoundExponentialBackoffRetryPolicy.class)) { + retryPolicy = new BoundExponentialBackoffRetryPolicy(2000, + 2000, Integer.MAX_VALUE); + } else { + retryPolicy = new ExponentialBackOffWithDeadlinePolicy(100, + 20 * 1000, Integer.MAX_VALUE); + } + } @Before @Override public void setUp() throws Exception { logger.info("Setting up test {}.", getName()); - zkUtil.startServer(); + zkUtil.startCluster(); } @After @Override public void tearDown() throws Exception { - zkUtil.killServer(); + zkUtil.killCluster(); logger.info("Teared down test {}.", getName()); } @@ -121,7 +146,7 @@ class ShutdownZkServerClient extends ZooKeeperClient { public void process(WatchedEvent event) { if (event.getType() == EventType.None && event.getState() == KeeperState.Expired) { try { - zkUtil.stopServer(); + zkUtil.stopCluster(); } catch (Exception e) { logger.error("Failed to stop zookeeper server : ", e); } @@ -132,7 +157,7 @@ public void process(WatchedEvent event) { } @Test - public void testReconnectAfterExipred() throws Exception { + public void testReconnectAfterExpired() throws Exception { final CountDownLatch expireLatch = new CountDownLatch(1); Watcher testWatcher = new Watcher() { @@ -146,13 +171,14 @@ public void process(WatchedEvent event) { }; final int timeout = 2000; ZooKeeperWatcherBase watcherManager = - new ZooKeeperWatcherBase(timeout).addChildWatcher(testWatcher); + new ZooKeeperWatcherBase(timeout, false).addChildWatcher(testWatcher); List watchers = new ArrayList(1); watchers.add(testWatcher); ZooKeeperClient client = new ShutdownZkServerClient( zkUtil.getZooKeeperConnectString(), timeout, watcherManager, - new BoundExponentialBackoffRetryPolicy(timeout, timeout, 0) - ); + ((retryPolicy instanceof BoundExponentialBackoffRetryPolicy) + ? new BoundExponentialBackoffRetryPolicy(timeout, timeout, 0) : + new ExponentialBackOffWithDeadlinePolicy(100, 20 * 1000, 0))); client.waitForConnection(); Assert.assertTrue("Client failed to connect an alive ZooKeeper.", client.getState().isConnected()); @@ -175,7 +201,7 @@ public void process(WatchedEvent event) { // expected } - zkUtil.restartServer(); + zkUtil.restartCluster(); // wait for a reconnect cycle Thread.sleep(2 * timeout); @@ -194,9 +220,7 @@ public void process(WatchedEvent event) { public void testRetrySyncOperations() throws Exception { final int timeout = 2000; ZooKeeperClient client = ZooKeeperClient.createConnectedZooKeeperClient( - zkUtil.getZooKeeperConnectString(), timeout, new HashSet(), - new BoundExponentialBackoffRetryPolicy(timeout, timeout, Integer.MAX_VALUE) - ); + zkUtil.getZooKeeperConnectString(), timeout, new HashSet(), retryPolicy); Assert.assertTrue("Client failed to connect an alive ZooKeeper.", client.getState().isConnected()); @@ -243,8 +267,7 @@ public void testRetrySyncOperations() throws Exception { public void testSyncAfterSessionExpiry() throws Exception { final int timeout = 2000; ZooKeeperClient client = ZooKeeperClient.createConnectedZooKeeperClient(zkUtil.getZooKeeperConnectString(), - timeout, new HashSet(), - new BoundExponentialBackoffRetryPolicy(timeout, timeout, Integer.MAX_VALUE)); + timeout, new HashSet(), retryPolicy); Assert.assertTrue("Client failed to connect an alive ZooKeeper.", client.getState().isConnected()); String path = "/testSyncAfterSessionExpiry"; @@ -287,8 +310,7 @@ public void processResult(int rc, String path, Object ctx) { public void testACLSetAndGet() throws Exception { final int timeout = 2000; ZooKeeperClient client = ZooKeeperClient.createConnectedZooKeeperClient(zkUtil.getZooKeeperConnectString(), - timeout, new HashSet(), - new BoundExponentialBackoffRetryPolicy(timeout, timeout, Integer.MAX_VALUE)); + timeout, new HashSet(), retryPolicy); Assert.assertTrue("Client failed to connect an alive ZooKeeper.", client.getState().isConnected()); String path = "/testACLSetAndGet"; @@ -362,8 +384,7 @@ public void processResult(int rc, String path, Object ctx, List acl, Stat s public void testACLSetAndGetAfterSessionExpiry() throws Exception { final int timeout = 2000; ZooKeeperClient client = ZooKeeperClient.createConnectedZooKeeperClient(zkUtil.getZooKeeperConnectString(), - timeout, new HashSet(), - new BoundExponentialBackoffRetryPolicy(timeout, timeout, Integer.MAX_VALUE)); + timeout, new HashSet(), retryPolicy); Assert.assertTrue("Client failed to connect an alive ZooKeeper.", client.getState().isConnected()); String path = "/testACLSetAndGetAfterSessionExpiry"; @@ -452,8 +473,7 @@ public void processResult(int rc, String path, Object ctx, List acl, Stat s public void testZnodeExists() throws Exception { final int timeout = 2000; ZooKeeperClient client = ZooKeeperClient.createConnectedZooKeeperClient(zkUtil.getZooKeeperConnectString(), - timeout, new HashSet(), - new BoundExponentialBackoffRetryPolicy(timeout, timeout, Integer.MAX_VALUE)); + timeout, new HashSet(), retryPolicy); Assert.assertTrue("Client failed to connect an alive ZooKeeper.", client.getState().isConnected()); String path = "/testZnodeExists"; @@ -509,8 +529,7 @@ public void processResult(int rc, String path, Object ctx, Stat stat) { public void testGetSetData() throws Exception { final int timeout = 2000; ZooKeeperClient client = ZooKeeperClient.createConnectedZooKeeperClient(zkUtil.getZooKeeperConnectString(), - timeout, new HashSet(), - new BoundExponentialBackoffRetryPolicy(timeout, timeout, Integer.MAX_VALUE)); + timeout, new HashSet(), retryPolicy); Assert.assertTrue("Client failed to connect an alive ZooKeeper.", client.getState().isConnected()); String path = "/testGetSetData"; @@ -592,8 +611,7 @@ public void processResult(int rc, String path, Object ctx, byte[] data, Stat sta public void testGetChildren() throws Exception { final int timeout = 2000; ZooKeeperClient client = ZooKeeperClient.createConnectedZooKeeperClient(zkUtil.getZooKeeperConnectString(), - timeout, new HashSet(), - new BoundExponentialBackoffRetryPolicy(timeout, timeout, Integer.MAX_VALUE)); + timeout, new HashSet(), retryPolicy); Assert.assertTrue("Client failed to connect an alive ZooKeeper.", client.getState().isConnected()); // create a root node @@ -738,9 +756,7 @@ public void processResult(int rc, String path, Object ctx, List children public void testRetryOnCreatingEphemeralZnode() throws Exception { final int timeout = 2000; ZooKeeperClient client = ZooKeeperClient.createConnectedZooKeeperClient( - zkUtil.getZooKeeperConnectString(), timeout, new HashSet(), - new BoundExponentialBackoffRetryPolicy(timeout, timeout, Integer.MAX_VALUE) - ); + zkUtil.getZooKeeperConnectString(), timeout, new HashSet(), retryPolicy); Assert.assertTrue("Client failed to connect an alive ZooKeeper.", client.getState().isConnected()); @@ -761,9 +777,7 @@ public void testRetryOnCreatingEphemeralZnode() throws Exception { public void testRetryAsyncOperations() throws Exception { final int timeout = 2000; ZooKeeperClient client = ZooKeeperClient.createConnectedZooKeeperClient( - zkUtil.getZooKeeperConnectString(), timeout, new HashSet(), - new BoundExponentialBackoffRetryPolicy(timeout, timeout, Integer.MAX_VALUE) - ); + zkUtil.getZooKeeperConnectString(), timeout, new HashSet(), retryPolicy); Assert.assertTrue("Client failed to connect an alive ZooKeeper.", client.getState().isConnected()); @@ -881,4 +895,32 @@ public void processResult(int rc, String path, Object ctx) { logger.info("Delete children from znode " + path); } + @Test + public void testAllowReadOnlyMode() throws Exception { + if (zkUtil instanceof ZooKeeperClusterUtil) { + System.setProperty("readonlymode.enabled", "true"); + ((ZooKeeperClusterUtil) zkUtil).enableLocalSession(true); + zkUtil.restartCluster(); + Thread.sleep(2000); + ((ZooKeeperClusterUtil) zkUtil).stopPeer(2); + ((ZooKeeperClusterUtil) zkUtil).stopPeer(3); + } + + try (ZooKeeperClient client = ZooKeeperClient.newBuilder() + .connectString(zkUtil.getZooKeeperConnectString()) + .sessionTimeoutMs(30000) + .watchers(new HashSet()) + .operationRetryPolicy(retryPolicy) + .allowReadOnlyMode(true) + .build()) { + Assert.assertTrue("Client failed to connect a ZooKeeper in read-only mode.", + client.getState().isConnected()); + } finally { + if (zkUtil instanceof ZooKeeperClusterUtil) { + System.setProperty("readonlymode.enabled", "false"); + ((ZooKeeperClusterUtil) zkUtil).enableLocalSession(false); + } + } + } + } diff --git a/bookkeeper-server/src/test/resources/client-cert.pem b/bookkeeper-server/src/test/resources/client-cert.pem index d819f198ba4..c505fabcdaf 100644 --- a/bookkeeper-server/src/test/resources/client-cert.pem +++ b/bookkeeper-server/src/test/resources/client-cert.pem @@ -1,32 +1,35 @@ -----BEGIN CERTIFICATE----- -MIIFmTCCA4GgAwIBAgIJAKJZcAdMXw8CMA0GCSqGSIb3DQEBCwUAMGIxCzAJBgNV +MIIGAzCCA+ugAwIBAgIUDrW7ZpaXgcsTTWyax2tu8WqZXW0wDQYJKoZIhvcNAQEL +BQAwgY8xCzAJBgNVBAYTAlVTMQswCQYDVQQIDAJDQTEWMBQGA1UEBwwNU2FuIEZy +YW5jaXNjbzEOMAwGA1UECgwFRHVtbXkxKzApBgNVBAsMIjA6dGVzdFJvbGUsdGVz +dFJvbGUxOzE6dGVzdENsdXN0ZXIxHjAcBgNVBAMMFWFwYWNoZS5ib29ra2VlcGVy +Lm9yZzAgFw0yMDA2MDMyMjIwMTlaGA8zMDE5MTAwNTIyMjAxOVowgY8xCzAJBgNV BAYTAlVTMQswCQYDVQQIDAJDQTEWMBQGA1UEBwwNU2FuIEZyYW5jaXNjbzEOMAwG -A1UECgwFRHVtbXkxHjAcBgNVBAMMFWFwYWNoZS5ib29ra2VlcGVyLm9yZzAgFw0x -ODAxMjQxODM2MjRaGA8zMDE3MDUyNzE4MzYyNFowYjELMAkGA1UEBhMCVVMxCzAJ -BgNVBAgMAkNBMRYwFAYDVQQHDA1TYW4gRnJhbmNpc2NvMQ4wDAYDVQQKDAVEdW1t -eTEeMBwGA1UEAwwVYXBhY2hlLmJvb2trZWVwZXIub3JnMIICIjANBgkqhkiG9w0B -AQEFAAOCAg8AMIICCgKCAgEAwdphglnvMvIrEjHgBekeTtg9KmUtmK9yviI0xKdP -8xMC6r5lvQIXQwcVB2LA7zBl3mI3s2DT9cqZ+E6q9Vz3AF9yV+F2zvPcQOt3P9hO -CvEj9aXqMuzvSXQYmG+w7lLhm4M7IX7B4smGvXm905WACUeLr2XM1qr2SlpMZIBc -O35NyaOeB9srQS2NXMB8mIsWDzoftbW0eNji7uz6OuyNvQWcS7rdcXBSoBRyRU2y -qpQcfQT9aUDttDsAtkvEvuHi8LicKocaF1ufHVFvygdX7nGCNOnC83ZW5HvttwAJ -C8Spe67etFp98bumwsXYaPO0su7M1Ym1DbphJsGO0LNwdEWstjT4uIsxyXZtblfM -DBFpxA4s/7hIZtmrjM3CyZuoAuJWsU8UkZOVMWtxfBbY4iCo6K/ScaFSFvMykJ+p -MST0U83JlDrqH+XjmIxcGrs0FVgD8vtaPcPJFWhm9TUBTFo1CU3Zw74W49ltgRa/ -oOLIzn27XHCcTY0LDRLXhvtGiPOSw+2U192w0MQVby6Q/QxAGTl5UXIeHsp91fkO -H3pRHYNdjF5CVFn5vsNEcdjQQa1bi4Cr1O64yV6kHBtvsp3bKmkfBPT+mO1OzV+N -vjfNswk606NCd6rI8UePErDI7YIJJvHphd8EHyz/Kr64ZjLodgKCyETEDWsvVaex -8eMCAwEAAaNQME4wHQYDVR0OBBYEFCuc137GLVBGagBjH2STNeJ2gZcZMB8GA1Ud -IwQYMBaAFCuc137GLVBGagBjH2STNeJ2gZcZMAwGA1UdEwQFMAMBAf8wDQYJKoZI -hvcNAQELBQADggIBAJ/PW17Gk5qbsKxnxdpNctqCOlYOLcZ+/45LvTKFQDgi8kYi -/4WkhvADWHYYTXRibgHYWMxU+/JCbgOgwPP30646mq0cvQGYN7aQTjYY8TRMbbLh -PGGHu30EwL/siXVsHBS0ZonXpRvYr+Bn5V0JXdw1JNjGs4C3GxyIoK1zVqXRmS/b -u052AtKN8A+iLw4F/8b5rmpRg1BDguc1kqaVh0c64aVZ49QM1IVpvWx1fIpxZPj2 -9YlHUYmmt7mf6313VplRL98jqH+U959ueJjQ9RmEMwgUeu3iIN75/kytkpGrMLdC -WwKlDWBid66TJaPgJ8mBUC9LNlndSe5bzpueN12MFtaLgaWopiB8WU5pEdMV2eze -auvrUJkobl5n6vAcip1+SfrnY4EvXwdrO2VpIVkEhRWLfxsuthKwobi7/gR2jIa/ -DT/o5eqvdlxHwdIZ0VVPDdUwngQT26VkGozyrBFlhuSkv7pI/8mjFPjYF1vMcQbi -J6C3C/K9gQp8XXOFm8r3Szqd14yzqigx2SWz4VWY7L7vX2iVs5gNb1/Fnjt2lQ20 -XMCDNoN8tO6kIqv03L4MRue5ISbJx5hcFML1/aLaidpOhWWgEBsH5hhOVdzzAOGF -vWBXMgfSLxq/cD2ZZARq5ZHhsX1x5BeZfw4zS6dJspxF6Io4CETOOxi6AA3L +A1UECgwFRHVtbXkxKzApBgNVBAsMIjA6dGVzdFJvbGUsdGVzdFJvbGUxOzE6dGVz +dENsdXN0ZXIxHjAcBgNVBAMMFWFwYWNoZS5ib29ra2VlcGVyLm9yZzCCAiIwDQYJ +KoZIhvcNAQEBBQADggIPADCCAgoCggIBAMsokjsgNrcsbEZboCondRNKXJvtTafv +zzHFZDjVRyMXqmRB+ypc6TMqK/G4steE0bQWGV8NOlLiPl2qtDEFrBwuaOJ7b2W4 +b8RVKF1lpMtxSvHUb+SWNrG1IWFaRdGW2LFapDUjkPjGWCCIb9lhnatvXz2+ZJ7y +6xYCJqr87agrdI2Ck1dQBwKBSxNTmT0V3cWCZQPFQR4faE5WDMHTPe+rd8rjcm8d +r3KXQtSV5LHg+zw/vBv0XHDJ5571AaVgto/Zxlu2jx3GHRxxcVJrHyxnsc2AjUcB +QovxlXDXyAGrbLWdxOMMKHoUeav/MFsCH2UXEbJnvCn1dLbRUQtFTae0laaPUlKr +4GBWd99xFyUkbO/gedCe6iKuZ6Sljb0JaXaYXg5WhbTUjNxKu87UcYj54U6CWMkt +k2ZdIgG3Z7ILFEX0VOwjrmV1lQZ8FU9kqx9VeK9UlZr8GXDE59ncBFZ/6TkTKnCQ +Rkqbmu5eDuA3hxO/qVDO/94yjjSjVp/aBGM7jeFj7DkHAyo/OeAyyChJD7my+6Mr +/V6BvnY6lFvHV0MINOym9Jq8HGAvYvpcRi03EuEdjpNSRA4gkO7KsBSU19m/CVuB +c5s+fyw/lGN8BiH9P0P7JlRzJGyR3jHtM3+2FF3VfxKg1atC2X5Dovu6hbtQJKxJ +BUxXPxAhP6YFAgMBAAGjUzBRMB0GA1UdDgQWBBTgqCCwkOED4VtjZ3cL6y8L8NQH +ETAfBgNVHSMEGDAWgBTgqCCwkOED4VtjZ3cL6y8L8NQHETAPBgNVHRMBAf8EBTAD +AQH/MA0GCSqGSIb3DQEBCwUAA4ICAQCgQaUW60VzyHvZmi8mQfTeEiwHFfIHFXU8 +aW8QkPyN5TRYOJy6/L5gfcJN6C1EwB78nzrO0R3bB4dQLnjehkzmyD7aqQ4tEyyW +Kc63lgawu8aV+s9awd0BT0zYUT8t2FaQLTweITUjJkltyh0So9hI8sx6P1DZr+bK +oYRdcMp9AsyKwkTA2c96poOAELHC5x7nXXolpm0mDX4AyrJFWC8w7UMjgXHk/1Om +776Kw8yvOAvDpNM/a1QMyVL0Spjq/Z5XhD+MnKRw4+bS1LNAdCIvTX4Q9Q+CYbm+ +4sPHdzJjGp3T8cJlNG/iPNNWWJPvFxClWOsFIGliElw9bWWo+Yy0lXOsYUezndbf +424+rYU+SyXMZVN3bPHJtrDigill2AxdtG5qD9SSOwkgLgyV4uBo5z5lobdXIb3T +Y9qbgCP4+HbCZuwLkqs1wL0ktWSR8+iu0eDgeHyzUI3+YvLuWMUcvkkfBFQunIxW +5a/2pvaShONoSgcPXTC+/h5vYAPYfnTeV17cYD4inYm2ra20QBVHeRW3Pvgk3Yci +My4NlR5reWbD98xiYsJJUYCutlS7q/YGMdOCaiQp7kv1vJEKswSGfuZCaYO7rMDn +PZruUZgsfLV10rxtBvcRdaIHAc4vktOj+QoMt6VPmP8MqcjijbhyamYpWFzywOFk +q8T6QxUiKw== -----END CERTIFICATE----- diff --git a/bookkeeper-server/src/test/resources/client-key.jks b/bookkeeper-server/src/test/resources/client-key.jks index 5bd6e53ccbf..868a9787243 100644 Binary files a/bookkeeper-server/src/test/resources/client-key.jks and b/bookkeeper-server/src/test/resources/client-key.jks differ diff --git a/bookkeeper-server/src/test/resources/client-key.p12 b/bookkeeper-server/src/test/resources/client-key.p12 index 36a950934ea..656f373cd94 100644 Binary files a/bookkeeper-server/src/test/resources/client-key.p12 and b/bookkeeper-server/src/test/resources/client-key.p12 differ diff --git a/bookkeeper-server/src/test/resources/client-key.pem b/bookkeeper-server/src/test/resources/client-key.pem index d92a0bd1b86..8396691cb78 100644 --- a/bookkeeper-server/src/test/resources/client-key.pem +++ b/bookkeeper-server/src/test/resources/client-key.pem @@ -1,52 +1,52 @@ -----BEGIN PRIVATE KEY----- -MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQDB2mGCWe8y8isS -MeAF6R5O2D0qZS2Yr3K+IjTEp0/zEwLqvmW9AhdDBxUHYsDvMGXeYjezYNP1ypn4 -Tqr1XPcAX3JX4XbO89xA63c/2E4K8SP1peoy7O9JdBiYb7DuUuGbgzshfsHiyYa9 -eb3TlYAJR4uvZczWqvZKWkxkgFw7fk3Jo54H2ytBLY1cwHyYixYPOh+1tbR42OLu -7Po67I29BZxLut1xcFKgFHJFTbKqlBx9BP1pQO20OwC2S8S+4eLwuJwqhxoXW58d -UW/KB1fucYI06cLzdlbke+23AAkLxKl7rt60Wn3xu6bCxdho87Sy7szVibUNumEm -wY7Qs3B0Ray2NPi4izHJdm1uV8wMEWnEDiz/uEhm2auMzcLJm6gC4laxTxSRk5Ux -a3F8FtjiIKjor9JxoVIW8zKQn6kxJPRTzcmUOuof5eOYjFwauzQVWAPy+1o9w8kV -aGb1NQFMWjUJTdnDvhbj2W2BFr+g4sjOfbtccJxNjQsNEteG+0aI85LD7ZTX3bDQ -xBVvLpD9DEAZOXlRch4eyn3V+Q4felEdg12MXkJUWfm+w0Rx2NBBrVuLgKvU7rjJ -XqQcG2+yndsqaR8E9P6Y7U7NX42+N82zCTrTo0J3qsjxR48SsMjtggkm8emF3wQf -LP8qvrhmMuh2AoLIRMQNay9Vp7Hx4wIDAQABAoICAQCqaVuGx6CrXI/YctfI2mG2 -VgmPF1q5+qIX2uIgbiSuPmw2CCJPwWLJnZQy5fFNU3J5yEXG/rvWOsCXtDA9eff4 -7+8Iqj9TNrTMrTIrge85VzqRW8VB919zZweoGaekGmAR4Y89pryyrQ4xyq/BLI9d -mPOGwSsNG0Vfn3nAb8ak1idzts3ZgiXIKk821k+xmbNOt33gs1dvVNpJxzFCU2lW -XXREboT0kBVSfCboHaGOqp1Qme5bdKSB58x8dKcEVna1vtQp3pJlLjn1//0R0NrP -1iDsewLSG5nPSdJzKSjKm5uSCuvkCBjnRFsYpevUd0jGc37FyUTMSKfW9hiiBtw3 -EQnA8vMvLdeJvBTPrFerfbrkbXhxWKATTO+nN6JThQgUzIQdxiGIpOPxKli4t9xm -qvFfokOfacymTzyDu/R9upZokRiblO0xtJ89odVSS8SwgskoiXgm3InOiATxWTaM -gTG/8rPvG6OTSYgBqLRQ5z2XgOG7oEBU9rlPfyivlGH7N5LrCHXF2j+4+fMxgfbe -vO+FqI0SQvtW8mGxuJ3aaKuJORZDQrTg3Z/AccZTDfFAvmRbAny8IZR/H00GIvFR -Arwvt0fh/8IGekZVzA5DRW89DWaXUQI5lhX4UviThQK9fw+P4ZCmHmVoLpd0jxYY -utyQpRbz1rYyiPd/TPRdQQKCAQEA9PL179+hQp6vPCfI6lT/Rus5JQtyu08J8CgJ -x81m8rySLgsAvFBLLLqm19QViLkgryizhllnf6gUExFEG317rqlR3/1gso/QgWdj -L7hexHhJPLUYPkHH0CBPceAKWJfFpSkqXD7tskumtj4X3Bdp0GRKHitWpZoF/FiX -9A2djrSHS725eWCU3KgDtKVf8Ygz3Kx7XGw4+RmE9EBtLItxvlXCPsYOfWz6O06O -4iFaAebHA23IwOw3wk36XU86aelSQZMiXHJcfgzTJyGZOY0UgKuamuU6WchAmzK1 -Ao0tCKfqjFOdQIWGDP/g/ceHq0aMSOwIAFn6GVo4+OYgdbGXGQKCAQEAyplJaVtF -q1VEsQt9kq/WwGfjjGHPLJEVMHRnNcORg+FarfbKY5jt8nv00eMGm65BxmzXGmvl -pxyQjZw6nZ9JQfKe7fPRE0Dz3aELSS5+8MiljpndMpRBdj4P13phcKYA9VxYyHVX -sSt0ZGI2hDuk2LRmPDjkUMtn1FE9wH8WrBH2vENMs16f9lzpUEtpaz7HyFWyscJX -1koJIWdR93joQMyDb0B34lNoXQ2OrfiS5/AvkeSwkj98nm9kJfsm1bKUq1pYesTT -Owux602R6fGX8VB5hyYIPA0WqEkxHvwS65bcPWexLpIyDd0uixNK9WUOAWyfMyD2 -fdcrocl7ay6cWwKCAQB2cY1uwkot9qFxiyNh/Fu8JT3qpdCCtkNt905TaQUg1wIw -dW2ToZfYNyE6N/l5tVsSl7HHgy/C0Ll0RuMSD+lgmctXbiP19Ai0qhOSHarlgeyY -CFGCuTgvcZA41kbqc+lEZdVv6ZXyoxYoBXpwGHo4JGaalAY/6Wx/iy9e+b54JN9P -RpyLDqKs2CmCjn0IQ/4f9N9p34LlIOvjV8vywDLuAHX++LJFAA834lLBEbN+O+N7 -yvhKIW8M67vmpsruL75wqv7wiPQkl3r67woyg/+oAFKwF6vRgj2LTkesxitChj+q -PzxI2MfrPUfEL1lw/poTIN71nIyM+c2WvWBwyMDxAoIBAG4PC5RSYuyKa8CJ73OK -Vm07gp+2WqdpQUuLUK4iSaCNAYfTs2qbn1fFAuAqJmLYLR8v7UKLLryzhcuH/Ue3 -SkKrHK9Dbma5OEFDxS/CNG91cIqhB0r8wvsLB+wUrW5Wn9qqigiLxlGWu6n0uIzp -IcofZhJ9DXrepM7wO02hPJ3JPHJVVQtz8g4RtyVJckEyX7Fy7JooazMcEQ22ZQ68 -/d6FuzjqmrW2fdFfFg1oJdYd4pms1Eb+eiJPfOYtI5Gfa6gSclJvLhi7Z7Hd99BQ -0Cvlfb9vZ7XHnnFZIXglk9mroIUzGUulW8+wQiKHHodkmFEpwuoxk/YUt70yCPvW -3FUCggEAMLOHunak/3VClEI8eO6eP1JuH+cnlH2Bpj+/34h6QM8A48J/aI63d/2b -7bas9ZB9iHfvo22Csu3tjOP4vFyCrumx8ilCgiCTY01DOHqR8UHA+OiX2yiQvEn6 -8he/ku6hizT9w1w0tf9NTxOE4Jy9M5UQ1Ol+20Iwjw99rrHyutM6HpIT9UQaLw4y -a7tr4CF9+X/1Zjz2/He4fZ9h5akt8pHRjQi4oWx8Adi195s+Cdsrxm1NBTMFM7VU -ywOTiEpvn6gS3PguMFLwhRObp36VBwymrC7ACcJsszqr2OypzQYbi+XyMj9WfQ80 -lZ/dii5O+DlR4aDU90na6bTa5v8ASw== +MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQDLKJI7IDa3LGxG +W6AqJ3UTSlyb7U2n788xxWQ41UcjF6pkQfsqXOkzKivxuLLXhNG0FhlfDTpS4j5d +qrQxBawcLmjie29luG/EVShdZaTLcUrx1G/kljaxtSFhWkXRltixWqQ1I5D4xlgg +iG/ZYZ2rb189vmSe8usWAiaq/O2oK3SNgpNXUAcCgUsTU5k9Fd3FgmUDxUEeH2hO +VgzB0z3vq3fK43JvHa9yl0LUleSx4Ps8P7wb9Fxwyeee9QGlYLaP2cZbto8dxh0c +cXFSax8sZ7HNgI1HAUKL8ZVw18gBq2y1ncTjDCh6FHmr/zBbAh9lFxGyZ7wp9XS2 +0VELRU2ntJWmj1JSq+BgVnffcRclJGzv4HnQnuoirmekpY29CWl2mF4OVoW01Izc +SrvO1HGI+eFOgljJLZNmXSIBt2eyCxRF9FTsI65ldZUGfBVPZKsfVXivVJWa/Blw +xOfZ3ARWf+k5EypwkEZKm5ruXg7gN4cTv6lQzv/eMo40o1af2gRjO43hY+w5BwMq +PzngMsgoSQ+5svujK/1egb52OpRbx1dDCDTspvSavBxgL2L6XEYtNxLhHY6TUkQO +IJDuyrAUlNfZvwlbgXObPn8sP5RjfAYh/T9D+yZUcyRskd4x7TN/thRd1X8SoNWr +Qtl+Q6L7uoW7UCSsSQVMVz8QIT+mBQIDAQABAoICAFP/hV6u9hCMbIQ2tCVZxR1h +vKK33kjWbWudus+I123aBhiH82pTmhQOlrbN8BwODYqRLJJRbNECPFkMEI7IUp4g +Tjt+X9PGC2g48YSUVyvKvvr4I/92YEzfoFqZMY7z+MpzuLtD5lgF4kApSV2u77sH +RHDJ/N5/f9XMs+I0y6qhtQIhf/w02YEepkTqZsyL8vML8+o4L7FduQnSqFmnls82 +rUQVAKaSto0Bn584DqPBav9BNuyz/1ifEoZ1tOWE9FbL6yjWdZwQ4s501S8HnY8F +nM1kHoXCLpqgG3LZtReDomIBRnu99iprttb3ny2x1fs9K5MBMxqCI2zHZOPtFqrI +3ZSdvEW5pKpNGOWzIQddLF/mETma/r/4Kk7QU71zJCs27Ul2dGF/xkXV9lGz6cqW +mZHflz9SWXzX6pYtE/OeYfewxywF3yNliXPyiwHFVo5+GXQK2hGSy4oxstsxOP/s +q51UvZCF///9vu9b7/AFI2T1JX0LLtXY/JgZT9NnYluI5r3dxLT+kFM6j9ZEwvuz +wdNntZjt6CYUHH9yUfxtH5SUknJfMmSP8jXzcoprPkW5FZr9SDOkplxnuGh4TXiD +GesDkQfFkv5gJQkHBJbf733VKJN1vUdqoi5hBmeZuxYH7pqe34BYZGUbOXRTkE4a +cauMCt66svUp73g8nRctAoIBAQDr1Os0I161+dy8wgZ+DTl7piPj3bLfyscoKUXY +OmoI7IioTbgUbnRIbf6xEG9aNom8idldtICgwo2Q+mtHWRWDhKsvAH0ak8FpXpxC +UbfUhR0iE/99n/B/A4YNPRmUUhW9hXRriqyLFKAqbuuqpHXnZIkAC3e3mxgQ7AEr +limiEkPoA5ETE2E/u6vMTvxeqJgs3ti5EjcS3WF652jeRiNgOHWHc7kmyRxXEdGy +tTKua0z7k+zf0l3jIfVc4ldoITuToR+mjbAqEn3TSGPx1uiFh34soHnDNL1LAMTF +hAktzFRvJyYKAjZ6otwoJDUHzAwlAPVL7NPXCke/WzSBRuG7AoIBAQDciFW+sgYX +V08Lxy+EVcUrCEIWFrN87f6UjdVynUqHUUTdGQ2oAL+f7yjDPyBPG3JrmtBnpH34 +YyZPjAYJBV/myWAmt2OfXZyk1shJD14oFKodebwSRS6RBz60Li1GjSRCXGsA85nG +ZpTgMM8UqArAHNqJjbW2pHzlTo+oY3Gpr94Wr6I7CWViky4eFmNWHntFc+rCQZ5T +caJYxeskrwzxbqf50x0+4VLIGdC6GiKV3HQ3HlyZiP6LmT04v4xCLoz3LSo5UoM7 +FhBq7TtRSV7+QdGeshNxhP1QWhBQfzm0aLYUhJwMM8WPp4TqmJHdw7QuJ75gSjkd +1ZG2VWC/2Ds/AoIBAQDoMKkmm1BroMdZPTsBLmoTOL+LYUL1HgQ5oXE39ENj5+ag +tmxwCVQJ3+psUL8htiKkc6CEpuwh24tgW332MFqDM8MLMtL5sNIzuUfl7+krn6fQ +XpolCKzTkReRvz+JDcyD3XqDS3SYNsV/Idh8GRJQsWZFmgTTzCrEtmhsjpsNDCi5 +rZFPk7wi8Na0AGxvklTJkNsWVD3PIBIS3PYeKjY4TT5CD4kTC90QSOYKqmZs3g5c +gAcvU0LHFy8ptVvTlSzALFoqNMRml/A0bsigWjRzC7UJoTJhJHLe/rG9ukMDSXM0 +QPotzoHu+pwCFav0skyevjPE/jaQOXnsiJLIWY5zAoIBAQCv8xZ2TxMNDFvEyebf +bo8hBjWz6ejKhBfZ7k+eYmBUFkMbJCIBKJe6wtWC8ZqVgxCHSb1884CN7I1kahNm +nyMkvwY33ZleTRHtcm/Z2qkE0XfojHfBG/FLRLyChVUaNQH48ENvPuwxnyaouEh/ +8pKhNGQn2yhf0FzVlxiHANBu0iIfd4G4GIcLXuIsnzAiRa+UeieTVUc8zCz7Ju21 +FvT8q4zZhdaPlLa3b+FYmgF+D6WpVFANL3/nYRu3axw8sWdGzoIhufN58OCobx4k +fKWNtnXIZ8ubhr+UnEbn8pnXlrOvKx5VnfjB5KXnhlLa3ImlSZBawt1PMFZRY00N +wzeBAoIBABNi386SZwdmKP49bSjj53QctnKT/ki7+gN4QF/BkNARVNOXyzlwfnB8 +okrBfafv/51HRzr3YmqOYmhEI2B/856Gqk6jk0LvRHEmmcCEV44liuncJfABNf84 +05KCZ3+walwpa8ody0PtrFUNQXP4rOPFXSNP+GlnrtVJQDZ9qoIBpuVOE8nROWoQ +AdwCk55GlfebDQu9UafzVMQaOed55X+47RH2UczHrmt0J8ss3DfR55/EdgxVpTaU +46WewhF9gDIXecndmS77SXqHLLUYkhI8AwAa8XrgN78rDmwX2wnIBjjJ5OCxnbI1 +qgl5tWPWHD/K94CtNPJI6/8OdnkY3AY= -----END PRIVATE KEY----- diff --git a/bookkeeper-server/src/test/resources/conf/default_rocksdb.conf b/bookkeeper-server/src/test/resources/conf/default_rocksdb.conf new file mode 100644 index 00000000000..0f3a08779ed --- /dev/null +++ b/bookkeeper-server/src/test/resources/conf/default_rocksdb.conf @@ -0,0 +1,29 @@ +#/** +# * Licensed to the Apache Software Foundation (ASF) under one +# * or more contributor license agreements. See the NOTICE file +# * distributed with this work for additional information +# * regarding copyright ownership. The ASF licenses this file +# * to you under the Apache License, Version 2.0 (the +# * "License"); you may not use this file except in compliance +# * with the License. You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ + +[DBOptions] + # set by jni: options.setCreateIfMissing + create_if_missing=true + # set by jni: options.setInfoLogLevel + info_log_level=INFO_LEVEL + # set by jni: options.setKeepLogFileNum + keep_log_file_num=30 + +[CFOptions "default"] + # set by jni: options.setLogFileTimeToRoll + log_file_time_to_roll=86400 \ No newline at end of file diff --git a/bookkeeper-server/src/test/resources/conf/entry_location_rocksdb.conf b/bookkeeper-server/src/test/resources/conf/entry_location_rocksdb.conf new file mode 100644 index 00000000000..6f6c1b4d052 --- /dev/null +++ b/bookkeeper-server/src/test/resources/conf/entry_location_rocksdb.conf @@ -0,0 +1,69 @@ +#/** +# * Licensed to the Apache Software Foundation (ASF) under one +# * or more contributor license agreements. See the NOTICE file +# * distributed with this work for additional information +# * regarding copyright ownership. The ASF licenses this file +# * to you under the Apache License, Version 2.0 (the +# * "License"); you may not use this file except in compliance +# * with the License. You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ + +[DBOptions] + # set by jni: options.setCreateIfMissing + create_if_missing=true + # set by jni: options.setInfoLogLevel + info_log_level=INFO_LEVEL + # set by jni: options.setKeepLogFileNum + keep_log_file_num=30 + # set by jni: options.setLogFileTimeToRoll + log_file_time_to_roll=86400 + # set by jni: options.setMaxBackgroundJobs or options.setIncreaseParallelism + max_background_jobs=2 + # set by jni: options.setMaxSubcompactions + max_subcompactions=1 + # set by jni: options.setMaxTotalWalSize + max_total_wal_size=536870912 + # set by jni: options.setMaxOpenFiles + max_open_files=-1 + # set by jni: options.setDeleteObsoleteFilesPeriodMicros + delete_obsolete_files_period_micros=3600000000 + +[CFOptions "default"] + # set by jni: options.setCompressionType + compression=kLZ4Compression + # set by jni: options.setWriteBufferSize + write_buffer_size=67108864 + # set by jni: options.setMaxWriteBufferNumber + max_write_buffer_number=4 + # set by jni: options.setNumLevels + num_levels=7 + # set by jni: options.setLevelZeroFileNumCompactionTrigger + level0_file_num_compaction_trigger=4 + # set by jni: options.setMaxBytesForLevelBase + max_bytes_for_level_base=268435456 + # set by jni: options.setTargetFileSizeBase + target_file_size_base=67108864 + # set by jni: options.setLevelCompactionDynamicLevelBytes + level_compaction_dynamic_level_bytes=true + +[TableOptions/BlockBasedTable "default"] + # set by jni: tableOptions.setBlockSize + block_size=65536 + # set by jni: tableOptions.setBlockCache + block_cache=206150041 + # set by jni: tableOptions.setFormatVersion + format_version=2 + # set by jni: tableOptions.setChecksumType + checksum=kxxHash + # set by jni: tableOptions.setFilterPolicy, bloomfilter:[bits_per_key]:[use_block_based_builder] + filter_policy=rocksdb.BloomFilter:10:false + # set by jni: tableOptions.setCacheIndexAndFilterBlocks + cache_index_and_filter_blocks=true \ No newline at end of file diff --git a/bookkeeper-server/src/test/resources/conf/ledger_metadata_rocksdb.conf b/bookkeeper-server/src/test/resources/conf/ledger_metadata_rocksdb.conf new file mode 100644 index 00000000000..0f3a08779ed --- /dev/null +++ b/bookkeeper-server/src/test/resources/conf/ledger_metadata_rocksdb.conf @@ -0,0 +1,29 @@ +#/** +# * Licensed to the Apache Software Foundation (ASF) under one +# * or more contributor license agreements. See the NOTICE file +# * distributed with this work for additional information +# * regarding copyright ownership. The ASF licenses this file +# * to you under the Apache License, Version 2.0 (the +# * "License"); you may not use this file except in compliance +# * with the License. You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ + +[DBOptions] + # set by jni: options.setCreateIfMissing + create_if_missing=true + # set by jni: options.setInfoLogLevel + info_log_level=INFO_LEVEL + # set by jni: options.setKeepLogFileNum + keep_log_file_num=30 + +[CFOptions "default"] + # set by jni: options.setLogFileTimeToRoll + log_file_time_to_roll=86400 \ No newline at end of file diff --git a/bookkeeper-server/src/test/resources/generateKeysAndCerts.sh b/bookkeeper-server/src/test/resources/generateKeysAndCerts.sh index 3d8fdb699e4..0ce152202c7 100755 --- a/bookkeeper-server/src/test/resources/generateKeysAndCerts.sh +++ b/bookkeeper-server/src/test/resources/generateKeysAndCerts.sh @@ -61,7 +61,7 @@ openssl req \ -days 365000 \ -nodes \ -x509 \ - -subj "/C=US/ST=CA/L=San Francisco/O=Dummy/CN=apache.bookkeeper.org" \ + -subj "/C=US/ST=CA/L=San Francisco/O=Dummy/OU=0:testRole,testRole1;1:testCluster/CN=apache.bookkeeper.org" \ -out client-cert.pem \ -keyout client-key.pem diff --git a/bookkeeper-server/src/test/resources/log4j.properties b/bookkeeper-server/src/test/resources/log4j.properties deleted file mode 100644 index 10ae6bfcbba..00000000000 --- a/bookkeeper-server/src/test/resources/log4j.properties +++ /dev/null @@ -1,42 +0,0 @@ -# -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# - -# -# Bookkeeper Logging Configuration -# - -# Format is " (, )+ - -# DEFAULT: console appender only, level INFO -bookkeeper.root.logger=INFO,CONSOLE -log4j.rootLogger=${bookkeeper.root.logger} - -# -# Log INFO level and above messages to the console -# -log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender -log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout -log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} - %-5p - [%t:%C{1}@%L] - %m%n - -#disable zookeeper logging -log4j.logger.org.apache.zookeeper=OFF -log4j.logger.org.apache.bookkeeper.bookie=INFO -log4j.logger.org.apache.bookkeeper.meta=INFO diff --git a/bookkeeper-server/src/test/resources/server-cert.pem b/bookkeeper-server/src/test/resources/server-cert.pem index b19de519b98..730925e01e7 100644 --- a/bookkeeper-server/src/test/resources/server-cert.pem +++ b/bookkeeper-server/src/test/resources/server-cert.pem @@ -1,32 +1,33 @@ -----BEGIN CERTIFICATE----- -MIIFmTCCA4GgAwIBAgIJAJLN/+fjRP2hMA0GCSqGSIb3DQEBCwUAMGIxCzAJBgNV -BAYTAlVTMQswCQYDVQQIDAJDQTEWMBQGA1UEBwwNU2FuIEZyYW5jaXNjbzEOMAwG -A1UECgwFRHVtbXkxHjAcBgNVBAMMFWFwYWNoZS5ib29ra2VlcGVyLm9yZzAgFw0x -ODAxMjQxODM2MjNaGA8zMDE3MDUyNzE4MzYyM1owYjELMAkGA1UEBhMCVVMxCzAJ -BgNVBAgMAkNBMRYwFAYDVQQHDA1TYW4gRnJhbmNpc2NvMQ4wDAYDVQQKDAVEdW1t -eTEeMBwGA1UEAwwVYXBhY2hlLmJvb2trZWVwZXIub3JnMIICIjANBgkqhkiG9w0B -AQEFAAOCAg8AMIICCgKCAgEAvn6D0jbhA9OLpfuklC3ytUOuBXlFlgWlFEXTyqH8 -P54tH70+EJjmkV6d9kzBZohAL1cWgGwwQUR2MMIYLvKjRGnULkaCJor5WBP5D0wi -jjTCsVNJcdC4gbEUjCl5HSmP4hPRVEHzFfWS61bJ2KDxAL7GCnAz+10MEr04KX62 -fOLr/eCHG8icfwnQ1O2fJs8cGUcxTeE5k/DV/103gH/49K3cFHHPNzyVWad8dgQ5 -Y0IZ1HTh/e/3IvsaOPp2EuKof9WtLYjjWnCMmL9MsmJiBmufetzZVqiIb8WNxa+e -OR46Cm5lp7DwIXrUurYHTgtugmB9Pm1RL+T2YVHrPSbZ4yKYV7V6RRR/j+y4aI/7 -YXnZKI1zjDQT1GiyZDJGUBSILiMO9G5vbEL4btJlyQ4oBtlKozOLqi36GmlzOpSS -yMVu7xyx6/va94YbI2VlTvCF/2sSUsf2ZNg4oL44+NfZDGO7vaDVFavdEsdkxxse -wKa9uYpkfN7i4MZKi7CCR4lZo78xzt5vZ/irCqVngl3gwwBokfKLGskUi7Xwv/Sd -gu2W88dAOzIfU878+MKeqVN3Uz91RhUgCrFhbhE/8E87WEzM0MZ68UxxlANI6tiA -sBcYUUvhq154RPc5iY2Mzp48hju4NEKLvSbRDhJLpJGAVuupqzzM6iC1Uw23KwpI -KOUCAwEAAaNQME4wHQYDVR0OBBYEFBjV9p9fG1YMVK7PrGrJ3wJLx370MB8GA1Ud -IwQYMBaAFBjV9p9fG1YMVK7PrGrJ3wJLx370MAwGA1UdEwQFMAMBAf8wDQYJKoZI -hvcNAQELBQADggIBABc8029Iw5b+qS+B110qRRfj0fBHz8x4jjIQdzHah3qqM3FM -VzpySogESmKT3A+UmNUqzop/l+2V4G4g66UV7ps0rtJkgQU17Q70QF0TCQ8YranA -5Vy/qZe/AXwrjjeVlmaFRxKpKWDDdlMX84+CjXdhAyMKSSezbtMKDvfERB2oGcjD -Pz6d0XekRhqX4CiJLA2zkSZ55ABuJwyK24KeOtirsKxZXgQ5pG78zblWm3w2OdJv -NxictCLqemDmEsMBuk2mlIBogm2tF7U9QBrIa7z2le7SjxLTO/+hWFOcDr41Bwcd -cWoi1OkdiM3kg2COItCxtcHI8VmU17WL98SQMQsmby9RbVPXr56/x5Xi12CR/5b3 -LKCy6xmX4GEkWgc4VkvJp07eoArRxfQPEeYzZmwXBRcDO4uGRp9680QacwUvCOYe -9Kkpe+iVdVMY2vAyil0ZmJ1oR3wW3hsiwu0RTEU1bKwrePqCR6t4EC6ihygYn0nG -Y6PkdKMq/hoWjLo0p0iZuYMbbMNYcZWNAwV90eltw57ITyv8EI+D8hJ/b18wzPIA -91QyqZ8qd7eo+o/YTGY0YzZHSk147pDk4nqK0d0kszr6SxBwx7i0KSsAB4Kvtrj8 -BLTOlCqwMKKmEuwlNNfonINZyjzRY5cTMeuxj8U7LjTamMOQH4kgnQ0FoEX6 +MIIFpzCCA4+gAwIBAgIUfC+RpY3InsoGfZClL1JQqAueYWEwDQYJKoZIhvcNAQEL +BQAwYjELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAkNBMRYwFAYDVQQHDA1TYW4gRnJh +bmNpc2NvMQ4wDAYDVQQKDAVEdW1teTEeMBwGA1UEAwwVYXBhY2hlLmJvb2trZWVw +ZXIub3JnMCAXDTIwMDYwMzIyMjAxOVoYDzMwMTkxMDA1MjIyMDE5WjBiMQswCQYD +VQQGEwJVUzELMAkGA1UECAwCQ0ExFjAUBgNVBAcMDVNhbiBGcmFuY2lzY28xDjAM +BgNVBAoMBUR1bW15MR4wHAYDVQQDDBVhcGFjaGUuYm9va2tlZXBlci5vcmcwggIi +MA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCkHmy+taLHMepkJh+pX75IYNdp +oedKs2LEWOtLQu47JsdXH3A26Juc0aeAHK4dq+7oSGSi6MiDsi/ls4Uzz2szoti6 +iZLPx6PdUDWoCmNHktQ12heCpN/my/PytB3gneZK/CtUa+mkGpmF8MsMttGAJ/bq +qJqsgJofIoQjYw5cD+vGvUeL9GCKt2qC6ZnonGLqcKF67kuBQdkSePAPsLYk9LQZ +gfZH1xAqJl2oOnFKmKOSsECXyJg5m82Fj3T4RgzgmPjBsLpa9H08dYFjhqh7OasS +mNUAzBNYgkFa8fpm4ip1JqHuLIodTLe5UjFwCoAoGeYUwww5pzdE/9Chr30azr/d +7+NDy263/IM2Pe+lnJejad1QOkDHrmWS+VSAwQQSNQnPQwksCSib/dTJockfd3/h +EUBU7mY3LCo75RYbjNR4VS+NN6rMggwYeWoa20C+0mvYWvRP5x4QgEtcH6TxIQuo +B+TjUp9PiNi3M2VafUTu9OXWSorH99+nRiM7w04W2ITRU9HSBix8fSezX1vF6qov +yDHVudX8d+6QRrfdpEWza0Bd3pj41tXFP1zIcHQVl/D/kt1KVkERKQbpXUZytElO +HjDn9netmDi06dgaUV9qUEQKOwh+PzQB0fcsD/Ydi90dIjM53SfDj/okUrwgYcej +sxfEi1kex5yAFdvvvwIDAQABo1MwUTAdBgNVHQ4EFgQUiaXEwotFXGBiWwazyL5j +GjNbknYwHwYDVR0jBBgwFoAUiaXEwotFXGBiWwazyL5jGjNbknYwDwYDVR0TAQH/ +BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAgEAXGHM35Oz20KdmKR0b/J7Ig+WyUwu +jRrVw1G9FzQatI9TwVcpNkLjJENB5OxmfqIWcdBwI+gw7inS+9zCQVvjPBMUxbIl +MgglpIVHEZaT3O45/cTmAdBDRIx4Kl22YJJhqPA3H+y2eFG4YnU6PBYQJn2aac/h +qOfINgz0LaAtQ+6EFt9+tTbyx9qTWjEpCRlod8RpHJHiShuZTV0OrpSYgU0IW5J/ +J7gak9V5R3si6ApQIlWKKW7UnG/MzGnNrc7saCklL4G+eTYS2w1Qzda8pkMbB3xT +9MrWO+zk7sPlJ1VAV4jJcYX8mcLzugEugPSPRzywYfkWZnfIbtM2QeDEWOGSCtpu +VXQiWNORdV2tdMAulI7XbcBTt1Zbcew4yDtDLEyb/OisU1I4kVF+qIFleSLVEnjs +Syh57Vw1LfGaFRFBKaZXOfswMw0J1+iztszjo7Mncfc4q/vdIwDtitdqE6yPuC6D +gSe2PkriEH8Ukpyi1jLDf3kh6SBJ/atO80KaPwKRxi4QxXKY7TnnqaZftcPj6/9l +0gs73oXYcj6sF0gVq+hwUHyvaZWQcyZ/7pTYpKcUq0RBCmUgCAA1zooN1pmr+OzK +yYvQjW76Xk+gqGSQxe4p4EcefRSGVeEmjgM9VHZ+oyFh/5V8qUBA3LtN6eBuzqkr +elBfEEV+3m6pAmI= -----END CERTIFICATE----- diff --git a/bookkeeper-server/src/test/resources/server-key.jks b/bookkeeper-server/src/test/resources/server-key.jks index 3949b9d6e5f..14920807bd7 100644 Binary files a/bookkeeper-server/src/test/resources/server-key.jks and b/bookkeeper-server/src/test/resources/server-key.jks differ diff --git a/bookkeeper-server/src/test/resources/server-key.p12 b/bookkeeper-server/src/test/resources/server-key.p12 index ab9c8aa3bb8..9217cf4ad9f 100644 Binary files a/bookkeeper-server/src/test/resources/server-key.p12 and b/bookkeeper-server/src/test/resources/server-key.p12 differ diff --git a/bookkeeper-server/src/test/resources/server-key.pem b/bookkeeper-server/src/test/resources/server-key.pem index 1e2f1ec9c1e..9c609541802 100644 --- a/bookkeeper-server/src/test/resources/server-key.pem +++ b/bookkeeper-server/src/test/resources/server-key.pem @@ -1,52 +1,52 @@ -----BEGIN PRIVATE KEY----- -MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQC+foPSNuED04ul -+6SULfK1Q64FeUWWBaUURdPKofw/ni0fvT4QmOaRXp32TMFmiEAvVxaAbDBBRHYw -whgu8qNEadQuRoImivlYE/kPTCKONMKxU0lx0LiBsRSMKXkdKY/iE9FUQfMV9ZLr -VsnYoPEAvsYKcDP7XQwSvTgpfrZ84uv94IcbyJx/CdDU7Z8mzxwZRzFN4TmT8NX/ -XTeAf/j0rdwUcc83PJVZp3x2BDljQhnUdOH97/ci+xo4+nYS4qh/1a0tiONacIyY -v0yyYmIGa5963NlWqIhvxY3Fr545HjoKbmWnsPAhetS6tgdOC26CYH0+bVEv5PZh -Ues9JtnjIphXtXpFFH+P7Lhoj/thedkojXOMNBPUaLJkMkZQFIguIw70bm9sQvhu -0mXJDigG2UqjM4uqLfoaaXM6lJLIxW7vHLHr+9r3hhsjZWVO8IX/axJSx/Zk2Dig -vjj419kMY7u9oNUVq90Sx2THGx7Apr25imR83uLgxkqLsIJHiVmjvzHO3m9n+KsK -pWeCXeDDAGiR8osayRSLtfC/9J2C7Zbzx0A7Mh9Tzvz4wp6pU3dTP3VGFSAKsWFu -ET/wTztYTMzQxnrxTHGUA0jq2ICwFxhRS+GrXnhE9zmJjYzOnjyGO7g0Qou9JtEO -EkukkYBW66mrPMzqILVTDbcrCkgo5QIDAQABAoICAQCvgea33iIQoW4vfhrS/0Z3 -pSSHHIV1RDwk4nTQY9ABWR2f+X5eUlFULAWDcJJbgjsIoscziPoomAgAwkL/tkOg -e5SnEgVFt5MliDlW08GenZOnRuIK/8+OhfU1cdyJdsp+891QMPbjC3/SXgLYGOgS -1LGn2lq6Q68k8Lr22C0QAQ6GuMAiZAFztjp2g3u3iOgNjh8p7tFasXCot1y0grN/ -01NKbtUIwkOj94DfRuMMxVEBArNYgCeFTi6JwpDYs4WlSdwlcNJvd/TBorbqP2Sr -H6suyp1fjyUtPalyMmynmWbGR5JXHtkPL5khcSZnzHaDnpyl0JgVdXFeltgSXmIt -oCF882Jc0u3qvQ+mVeW6VRQtzMxfU6L9diP7GI7x3TvJhds1WZIT7kdC5LVjkBJs -JVYnhixEnwbQqSAk5VgqIVbDKmURAXDVp8PRpRLwKuQfgl6w1/KNzfmge1VXR3cO -E/sX6Y00JMIJ9749JlPrcaD3fMLEwHH9NFdc7XIpMDpSmBkIa3+zoaQQ0rtMBSr3 -OF1SnHT5Xb4ykoPi5XPMLTqkkZTd622R9kBBpwkJh7Y5sGFK9FI9CmvMmC/ziHSI -Orb+gRu39ECQB+u6It/sYvM7i/cFkCmdGcRuTaCDmYiooE8Tjp8YMR5GTyLIV874 -GKjqGSHrvaz58yHrCXEkXQKCAQEA6TvW/C4pevHjdmTrnTOp3OIDYhDvgyJq278J -ID0sq8ZPv6V1dyjEqreXpb/R2//aPH2EdzfOpBhbmSUKj5h5gOQ3CH9vkJP27oXJ -tx06ezSg8+WpDH0I+xYHMp2yj8GF+KrgC8qCSqxOglePVZ268ou6lyqgNYa4Ujv7 -XI8O/6ha+k6fYrEooMjINC8m0gKCzlrIvUsu9o/uhJAbxOeXX8U/+DVb397WQ6cB -THAxo7uIzoarD7+VhWbXY+9OIANWE/RQ5rhIvOmBf+hOYgMKlX0F67uM/go0ZEvy -05rccFsqprGEibs+Ic0w5cRwV+BqYcLrzoRkmKwkKwa9igxiTwKCAQEA0RatmiAY -Gc1zBsfyq8f3ZqKZ2OrOCrFcsN4rMnyNBCh64yTdyNT/alCZUzy8/aKSDbvqb+Cc -BqbCVlDq+jVkiDvs7k497noXQEJ2KfGm1g8hOX1U7AjOse4qOPhujTCYu9hSDtYB -EPTw2OqEGOMCCuv4xzdlgK0vbSOztrF/dWp385B6hTXBvUqTlECtUhFa4QryCI9w -NrsACQaiUt0Xu2SW1EOUv0dzTlmzyAf1s4IKHpm8t0Rkr1OqYRCqW2Ey+mKE6z6y -NeAudaRCoxe40Z/7QrJ9eBhu+oympXazLj+BPEXHU2wkaCjXWMcolItxMpaIaD3X -nD1Mt0QqRzq4iwKCAQA+iuNdgGtzIoYia3GbGA2Gw7ywgWYYvhP1lUa3NHBUJ7ue -4pmbOH10YgLyWXvHCNbWvbnV1ks9SaLWcE5irzp1y7zONI4QMP1YfNvYlKfn/fbj -MESiqqzL195aPltxnS11vyyRPN6vc4EiBqTTCpblD38bpjyL3fJzas4+xcX53IV4 -9bhb2LHSW8UD6Vj5m97DwyhtSknvqC0HszUfGhNHhTdgMb7PS4wdXB1HCBbnlxRa -fVZFxNQtj6RWkgdbIknk0/EVzXkD34HwcLUEJ1ihOYNq8UIfpVDjTFJzV+Wg43GO -fa/S1zkUC1f/ZSvTBMTCLmjZWjs3jYGtYANXj3aVAoIBAHzg1JKm9H4ErNyx8wgS -CHsuRkC+DI1qXPft2VLv/LEtFCgxzpyySlJPDSQftKivvheh0mU7ezSlyJARCCak -WQTc9adm56pVFSn2B+kJQSG8K5XQezX2FK1El8cq6aw+CBq5GlluC3j7MhX8CyVp -/8BSK2WgemkeBqNinWVSIdQY4MeB1QtWjf3mWrpC3sGTR/n8tY3TTawCiATcB3sC -PbhYXZUtP9v2arGy9aNUzbSGyFB6dbHnkVL931bVw0mMhgvxZ32xFnMDD/yHPJ13 -/5SDvmeZf0KJJU9TTfypJl9K4n8DFgeHIT9slSGa4WvG1LboHVRVCz9vhTA38CBW -u/0CggEALh3hL56l5hpyXDwNQHR8XpZkX6rMuJmqD9f/18tM+deGuqj1/FRUYJrt -/AeaN1UHjugitYDHGu2hLEORdp6mK19ENXcEajKP+f3TUliuBdogqTXCCNu1RS5p -ru/G47QV0IxlKMxFRChL7pYs2/7PY0Qo37MvtEd6/FS7g95NZasE5ko8swkDbE7C -+F6wNBDFbS3RhIBfIPlr35Tv4sVEl7gAM7JdUqzIEDsmW38RQzIo8F8t1acwfdZ6 -PLVj2hh2JoF9jjMcy9o1VtnBWtoRkSD48+UmvrJuYrjd98XW4VRcq0VVsuOPBE+q -EJRVpONvEXHus+f1sCUmQYWYuhmJHA== +MIIJQQIBADANBgkqhkiG9w0BAQEFAASCCSswggknAgEAAoICAQCkHmy+taLHMepk +Jh+pX75IYNdpoedKs2LEWOtLQu47JsdXH3A26Juc0aeAHK4dq+7oSGSi6MiDsi/l +s4Uzz2szoti6iZLPx6PdUDWoCmNHktQ12heCpN/my/PytB3gneZK/CtUa+mkGpmF +8MsMttGAJ/bqqJqsgJofIoQjYw5cD+vGvUeL9GCKt2qC6ZnonGLqcKF67kuBQdkS +ePAPsLYk9LQZgfZH1xAqJl2oOnFKmKOSsECXyJg5m82Fj3T4RgzgmPjBsLpa9H08 +dYFjhqh7OasSmNUAzBNYgkFa8fpm4ip1JqHuLIodTLe5UjFwCoAoGeYUwww5pzdE +/9Chr30azr/d7+NDy263/IM2Pe+lnJejad1QOkDHrmWS+VSAwQQSNQnPQwksCSib +/dTJockfd3/hEUBU7mY3LCo75RYbjNR4VS+NN6rMggwYeWoa20C+0mvYWvRP5x4Q +gEtcH6TxIQuoB+TjUp9PiNi3M2VafUTu9OXWSorH99+nRiM7w04W2ITRU9HSBix8 +fSezX1vF6qovyDHVudX8d+6QRrfdpEWza0Bd3pj41tXFP1zIcHQVl/D/kt1KVkER +KQbpXUZytElOHjDn9netmDi06dgaUV9qUEQKOwh+PzQB0fcsD/Ydi90dIjM53SfD +j/okUrwgYcejsxfEi1kex5yAFdvvvwIDAQABAoICACJQtj4d7l4mArNUAVTBzyCF +FyVgE+NbpLAXQ4NmCDfYAOAnk6f3dOoSMCqqVIGhvH9W+6vQbnSS4k7t/VD68phG +WXiPYWIOhSW2KrHrEh0aB0MHSdkoLiSsymIZ5RFdEpTlKw4ozh5g7W7DDUGoTB+2 +u3sPK+Abt54A8o4PnSYEsjDIzNfnjfQTK7MZsvOfF1Obnzf78U+ifAfPv6oYMc2C +WNHoiZ01y4x0nyqYpa2KQtFeBvN1ntaUzCE8AKIUy4Z+layyDhUMf88PZrFvq3Sw +dyOZTxguk3DzM0UXyVqPuX7rR/pr16kOrG+UTv/1QhygZao7qie33eeQcYgNEGQf +wxpXTEDa1zqhmfSW+j4c3l9JN/DPfHCIRa7d58lTvouXI1i8JhwFDADKv9ekSbZd +Z/UrGEPq5fASmGenGV0th0x58LFCwdzLLqc1g1gFg6w6tTpqsoAbfRC+YuNpjWTc +gdUXnFH3KwcHrOWImylNlPlL25P1FlvXeFgato08tGvM6hOoFNIauAl1j8rGfP7E +KZ5miezlJeoV6pjcOl7xqtcMVzcXfl0Vz6hfT16PqoDRWZ4+WKzEoPJ+Yw4XQ3JH +e6LrJqzrgkIDoFUvEGtdACPtc19bRdVig7xncW4/wTNwGsQDbAJSHMCV2xkFdVVt +AF2kCZ9L73nclq+xiU4hAoIBAQDVds9Sqm+UGZVFu2aT85epLfMNH0y6X0mZJOGk +rhlbCFqa+Vs/ZseZidEifAIv48sc3HIFi38+Wh+HwnuFyueLyN9kCubUJChWdPKs +Ll3c4n7F9qFhOIuLYwJbQ0qSpdQDUpKnVApo3AgeYeaWYtDeEHhd3kr0RtzsOgIJ +HFUNaN/fX1ODQ7vrjv/YgW/bNMImMuGYRJVHONlyVE1swCbHl74xuuvzLFabDNbG +N5kiV1yD/sQczlacNQijuScZEjRJ5gly6LFyXq853xyYcvmq/rZwZNZugN/blZBc +ZrpkprABtQQdyHRAKI2iRN07WaHcqj8D6GYhrES+uiKrUJIRAoIBAQDE0myYk+as +F//IIAr7EVmvjPUtBinFZFwf3C5D4UkMf6mFfxv5pHG3TYdOo/eMeUdxSzARTYqe +xOCAZWST9Ud90FG4uGI/OXSa3vkJUrnSAQgWvj2SWT3ZC+zrZ4AL+2NmWIYFiglK +XiY9+8otY/KLhw8OpyX+P5EJ4zEqcdtY2LT++kAkbbAu8EhI70RQdqbIsx1jBBxH +JO6j/q5oPuiJb7b0PkuznNKUUunlj9yUlGQdX1yVyDN21ZdunQ4g1DU+d8YL0dxj +8UjWW1+Su8kZGEFJWFcdWD9Y2xYs6bGiDeM/339Cs9xiaJ7RHKEI9KCW+o7XluE2 +yiFXxKyDBJTPAoIBAFQWMSUHLlzQlxSY9ZdiZWOnTg7lPXxM/NjuxlPwoNxTNh3G +DEO1YPsCZveGkfX7bY8451F32e/d4H3CBpZ1jzBpOFZYVQVMGPe1qcJZoW/ZaMbi +mD+L3qxRnCQHJeEOoa2NYWe4m0EjK6+MMSEcW9qd8kxHvI7tOyly0Ep2dqqJnWaI +ToVusR/hfLb0hfN7nN8fO+6XVZZwtndq7fTg3GXTSICeeUOZO6RoMlJypEf60d7N +BMO0JDn0Sir0TIp7uU1C6IXzU7MRyUSqtsD2yZKqM4OitTViXsxI5kz/ynzLjJmf +jLeOtvxu/RvmtJ8kd7ZM+fW4HkM1cP8Qo4eIfYECggEAY+VlRt+ycWBQ61MQH7QD +sC0791ksdEpm06jeNoeumtBkyyPjoAZNzTplVY21RC/+CcuenvmbRNAqHiDYNpyE +Z6AHCllTTEGuJDjNb2T0eVkasOhnudLfqDz8R9KlU++I2NZPV5pi8sLsaANW70jO +PvESvF4r02qSA9GzD5bwPJzo6I09Zv/hL0G3foUqSCigqDb40Fuyuo1NLiFChhh/ +z9nKoxbHN2d+HdgjbOdijmrukVXoWQFe1Y69KGBAXns2dvk1pKQkVvmM/Xcs//2d +qMX9xOs4c+VpaYDxNWR7SwP/dljKb6F4Vt5A1WPRfAklRNvxCg1DS5q156Uj5e+6 +UwKCAQBhyJQCsK5CEmBidaN8FvBGF9sZPCxGZZQIiPB7WlcJup6GiLb2btZ4zQpC +mg3mI+pgcr8CLzRpEqrw7x4n3j39QsAycjib4UDxi5elTxbeRlOrg2qSfI05lrIl +XpZSzMCHfjTfSiG6eeKkA2h93sk8k3IKs60cpFkmu9+R5oVH2umI4bCtjPZc3Dqo +rqb9PPsgoXqObhjjlTI5Dk3kW2sgp2R09bHa/HYURJ5iDDRC58F+BoJKomNjoj9s +Aeomssn4qyjlF+LaklPb2vGAnHecozuHM4yu1tc41xA39ybzLVh2QnUt/mpESCQj +4Z63r2v2DLBpCwDzI6JMkUBY8zJ/ -----END PRIVATE KEY----- diff --git a/bookkeeper-server/src/test/resources/test_entry_location_rocksdb.conf b/bookkeeper-server/src/test/resources/test_entry_location_rocksdb.conf new file mode 100644 index 00000000000..9d1c3e08c47 --- /dev/null +++ b/bookkeeper-server/src/test/resources/test_entry_location_rocksdb.conf @@ -0,0 +1,49 @@ +#/** +# * Licensed to the Apache Software Foundation (ASF) under one +# * or more contributor license agreements. See the NOTICE file +# * distributed with this work for additional information +# * regarding copyright ownership. The ASF licenses this file +# * to you under the Apache License, Version 2.0 (the +# * "License"); you may not use this file except in compliance +# * with the License. You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ + +[DBOptions] + # set by jni: options.setCreateIfMissing + create_if_missing=true + # set by jni: options.setKeepLogFileNum + keep_log_file_num=1 + # set by jni: options.setMaxTotalWalSize + max_total_wal_size=1000 + +[CFOptions "default"] + # set by jni: options.setCompressionType + compression=kLZ4Compression + # set by jni: options.setWriteBufferSize + write_buffer_size=1024 + # set by jni: options.setMaxWriteBufferNumber + max_write_buffer_number=1 + # set by jni: options.setLevelCompactionDynamicLevelBytes + level_compaction_dynamic_level_bytes=true + +[TableOptions/BlockBasedTable "default"] + # set by jni: tableOptions.setBlockSize + block_size=65536 + # set by jni: tableOptions.setBlockCache + block_cache=206150041 + # set by jni: tableOptions.setFormatVersion + format_version=2 + # set by jni: tableOptions.setChecksumType + checksum=kxxHash + # set by jni: tableOptions.setFilterPolicy, bloomfilter:[bits_per_key]:[use_block_based_builder] + filter_policy=rocksdb.BloomFilter:10:false + # set by jni: tableOptions.setCacheIndexAndFilterBlocks + cache_index_and_filter_blocks=true \ No newline at end of file diff --git a/bookkeeper-slogger/api/pom.xml b/bookkeeper-slogger/api/pom.xml new file mode 100644 index 00000000000..59dd4459d9f --- /dev/null +++ b/bookkeeper-slogger/api/pom.xml @@ -0,0 +1,27 @@ + + + + 4.0.0 + + bookkeeper-slogger-parent + org.apache.bookkeeper + 4.18.0-SNAPSHOT + .. + + org.apache.bookkeeper + bookkeeper-slogger-api + Apache BookKeeper :: Structured Logger :: API + diff --git a/bookkeeper-slogger/api/src/main/java/org/apache/bookkeeper/slogger/AbstractSlogger.java b/bookkeeper-slogger/api/src/main/java/org/apache/bookkeeper/slogger/AbstractSlogger.java new file mode 100644 index 00000000000..55ca52d38bc --- /dev/null +++ b/bookkeeper-slogger/api/src/main/java/org/apache/bookkeeper/slogger/AbstractSlogger.java @@ -0,0 +1,275 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.slogger; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Optional; +import java.util.function.BiConsumer; + +/** + * Abstract implementation of slogger. Keeps track of key value pairs. + */ +public abstract class AbstractSlogger implements Slogger, Iterable { + /** + * Levels at which slogger can slog. + */ + public enum Level { + INFO, + WARN, + ERROR + } + + private static final int MAX_DEPTH = 3; + private List parentCtx; + + private ThreadLocal> kvs = new ThreadLocal>() { + @Override + protected List initialValue() { + return new ArrayList<>(); + } + }; + private ThreadLocal> flattenedTls = ThreadLocal.withInitial(ArrayList::new); + + protected AbstractSlogger(Iterable parentCtx) { + List flattened = new ArrayList<>(); + flattenKeyValues(parentCtx.iterator(), (k, v) -> { + flattened.add(k); + flattened.add(v); + }); + this.parentCtx = Collections.unmodifiableList(flattened); + } + + protected abstract Slogger newSlogger(Optional> clazz, Iterable parent); + protected abstract void doLog(Level level, Enum event, String message, + Throwable throwable, List keyValues); + + private void flattenAndLog(Level level, Enum event, String message, + Throwable throwable) { + List flattened = flattenedTls.get(); + flattened.clear(); + + flattenKeyValues(this::addToFlattened); + doLog(level, event, message, throwable, flattened); + } + + @Override + public void info(String message) { + flattenAndLog(Level.INFO, null, message, null); + } + + @Override + public void info(String message, Throwable cause) { + flattenAndLog(Level.INFO, null, message, cause); + } + + @Override + public void info(Enum event) { + flattenAndLog(Level.INFO, event, null, null); + } + + @Override + public void info(Enum event, Throwable cause) { + flattenAndLog(Level.INFO, event, null, cause); + } + + @Override + public void warn(String message) { + flattenAndLog(Level.WARN, null, message, null); + } + + @Override + public void warn(String message, Throwable cause) { + flattenAndLog(Level.WARN, null, message, cause); + } + + @Override + public void warn(Enum event) { + flattenAndLog(Level.WARN, event, null, null); + } + + @Override + public void warn(Enum event, Throwable cause) { + flattenAndLog(Level.WARN, event, null, cause); + } + + @Override + public void error(String message) { + flattenAndLog(Level.ERROR, null, message, null); + } + + @Override + public void error(String message, Throwable cause) { + flattenAndLog(Level.ERROR, null, message, cause); + } + + @Override + public void error(Enum event) { + flattenAndLog(Level.ERROR, event, null, null); + } + + @Override + public void error(Enum event, Throwable cause) { + flattenAndLog(Level.ERROR, event, null, cause); + } + + @Override + public Slogger ctx() { + try { + return newSlogger(Optional.empty(), this); + } finally { + kvs.get().clear(); + } + } + + @Override + public Slogger ctx(Class clazz) { + try { + return newSlogger(Optional.of(clazz), this); + } finally { + kvs.get().clear(); + } + } + + @Override + public Iterator iterator() { + CtxIterator iterator = this.iterator.get(); + iterator.reset(); + return iterator; + } + + protected void clearCurrentCtx() { + kvs.get().clear(); + } + + private void addToFlattened(String key, String value) { + flattenedTls.get().add(key); + flattenedTls.get().add(value); + } + + protected void flattenKeyValues(BiConsumer consumer) { + Iterator iter = iterator(); + try { + flattenKeyValues(iter, consumer); + } finally { + kvs.get().clear(); + } + } + + public static void flattenKeyValues(Iterator iter, + BiConsumer consumer) { + while (iter.hasNext()) { + String key = iter.next().toString(); + if (!iter.hasNext()) { + return; // key without value + } + Object value = iter.next(); + + if (value instanceof Sloggable) { + addWithPrefix(key, (Sloggable) value, consumer, 0); + } else if (value.getClass().isArray()) { + consumer.accept(key, arrayToString(value)); + } else { + consumer.accept(key, value.toString()); + } + } + } + + @Override + public Slogger kv(Object key, Object value) { + kvs.get().add(key); + kvs.get().add(value); + return this; + } + + private static void addWithPrefix(String prefix, Sloggable value, + BiConsumer consumer, int depth) { + value.log(new SloggableAccumulator() { + @Override + public SloggableAccumulator kv(Object key, Object value) { + if (value instanceof Sloggable && depth < MAX_DEPTH) { + addWithPrefix(prefix + "." + key.toString(), + (Sloggable) value, consumer, depth + 1); + } else if (value.getClass().isArray()) { + consumer.accept(prefix + "." + key.toString(), arrayToString(value)); + } else { + consumer.accept(prefix + "." + key.toString(), value.toString()); + } + return this; + } + }); + } + + private static String arrayToString(Object o) { + if (o instanceof long[]) { + return Arrays.toString((long[]) o); + } else if (o instanceof int[]) { + return Arrays.toString((int[]) o); + } else if (o instanceof short[]) { + return Arrays.toString((short[]) o); + } else if (o instanceof char[]) { + return Arrays.toString((char[]) o); + } else if (o instanceof byte[]) { + return Arrays.toString((byte[]) o); + } else if (o instanceof boolean[]) { + return Arrays.toString((boolean[]) o); + } else if (o instanceof float[]) { + return Arrays.toString((float[]) o); + } else if (o instanceof double[]) { + return Arrays.toString((double[]) o); + } else if (o instanceof Object[]) { + return Arrays.toString((Object[]) o); + } else { + return o.toString(); + } + } + + private final ThreadLocal iterator = new ThreadLocal() { + @Override + protected CtxIterator initialValue() { + return new CtxIterator(); + } + }; + class CtxIterator implements Iterator { + int index = 0; + + private void reset() { + index = 0; + } + + @Override + public boolean hasNext() { + return index < (parentCtx.size() + kvs.get().size()); + } + + @Override + public Object next() { + int i = index++; + if (i < parentCtx.size()) { + return parentCtx.get(i); + } else { + i -= parentCtx.size(); + return kvs.get().get(i); + } + } + } +} diff --git a/bookkeeper-slogger/api/src/main/java/org/apache/bookkeeper/slogger/ConsoleSlogger.java b/bookkeeper-slogger/api/src/main/java/org/apache/bookkeeper/slogger/ConsoleSlogger.java new file mode 100644 index 00000000000..74e53a4b8eb --- /dev/null +++ b/bookkeeper-slogger/api/src/main/java/org/apache/bookkeeper/slogger/ConsoleSlogger.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.slogger; + +import java.time.ZoneOffset; +import java.time.ZonedDateTime; +import java.time.format.DateTimeFormatter; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +/** + * Simple slogger implementation which writes json to console. + */ +public class ConsoleSlogger extends AbstractSlogger { + private static final int MAX_STACKTRACE_ELEMENTS = 20; + private static final int MAX_CAUSES = 10; + private final Class clazz; + + ConsoleSlogger() { + this(ConsoleSlogger.class); + } + + ConsoleSlogger(Class clazz) { + this(clazz, Collections.emptyList()); + } + + ConsoleSlogger(Class clazz, Iterable parent) { + super(parent); + this.clazz = clazz; + } + + @Override + protected Slogger newSlogger(Optional> clazz, Iterable parent) { + return new ConsoleSlogger(clazz.orElse(ConsoleSlogger.class), parent); + } + + @Override + protected void doLog(Level level, Enum event, String message, + Throwable throwable, List keyValues) { + String nowAsISO = ZonedDateTime.now(ZoneOffset.UTC).format(DateTimeFormatter.ISO_INSTANT); + + StringBuilder builder = new StringBuilder(); + builder.append("{"); + keyValue(builder, "date", nowAsISO); + builder.append(","); + keyValue(builder, "level", level.toString()); + if (event != null) { + builder.append(","); + keyValue(builder, "event", event.toString()); + } + if (message != null) { + builder.append(","); + keyValue(builder, "message", message); + } + + for (int i = 0; i < keyValues.size(); i += 2) { + builder.append(","); + keyValue(builder, keyValues.get(i).toString(), keyValues.get(i + 1).toString()); + } + if (throwable != null) { + builder.append(","); + Throwable cause = throwable; + StringBuilder stacktrace = new StringBuilder(); + int causes = 0; + while (cause != null) { + stacktrace.append("[").append(cause.getMessage()).append("] at "); + int i = 0; + for (StackTraceElement element : cause.getStackTrace()) { + if (i++ > MAX_STACKTRACE_ELEMENTS) { + stacktrace.append("<|[frames omitted]"); + } + stacktrace.append("<|").append(element.toString()); + } + cause = cause.getCause(); + if (cause != null) { + if (causes++ > MAX_CAUSES) { + stacktrace.append(" [max causes exceeded] "); + break; + } else { + stacktrace.append(" caused by "); + } + } + } + keyValue(builder, "exception", stacktrace.toString()); + } + builder.append("}"); + + System.out.println(builder); + } + + private static void keyValue(StringBuilder sb, String key, String value) { + quotedAppend(sb, key); + sb.append(":"); + quotedAppend(sb, value); + } + + private static void quotedAppend(StringBuilder sb, String str) { + sb.append('"'); + for (int i = 0; i < str.length(); i++) { + char c = str.charAt(i); + if (c == '\\') { + sb.append("\\\\"); + } else if (c == '"') { + sb.append("\\\""); + } else if (c < ' ') { + sb.append(String.format("\\u%04X", (int) c)); + } else { + sb.append(c); + } + } + sb.append('"'); + } +} diff --git a/bookkeeper-slogger/api/src/main/java/org/apache/bookkeeper/slogger/NullSlogger.java b/bookkeeper-slogger/api/src/main/java/org/apache/bookkeeper/slogger/NullSlogger.java new file mode 100644 index 00000000000..2c5302f80ab --- /dev/null +++ b/bookkeeper-slogger/api/src/main/java/org/apache/bookkeeper/slogger/NullSlogger.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.slogger; + +class NullSlogger implements Slogger { + @Override + public Slogger kv(Object key, Object value) { + return this; + } + + @Override + public Slogger ctx() { + return this; + } + + @Override + public Slogger ctx(Class clazz) { + return this; + } + + @Override + public void info(String message) {} + @Override + public void info(String message, Throwable cause) {} + @Override + public void info(Enum event) {} + @Override + public void info(Enum event, Throwable cause) {} + + @Override + public void warn(String message) {} + @Override + public void warn(String message, Throwable cause) {} + @Override + public void warn(Enum event) {} + @Override + public void warn(Enum event, Throwable cause) {} + + @Override + public void error(String message) {} + @Override + public void error(String message, Throwable cause) {} + @Override + public void error(Enum event) {} + @Override + public void error(Enum event, Throwable cause) {} +} diff --git a/bookkeeper-slogger/api/src/main/java/org/apache/bookkeeper/slogger/Sloggable.java b/bookkeeper-slogger/api/src/main/java/org/apache/bookkeeper/slogger/Sloggable.java new file mode 100644 index 00000000000..165dff5d682 --- /dev/null +++ b/bookkeeper-slogger/api/src/main/java/org/apache/bookkeeper/slogger/Sloggable.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.slogger; + +/** + * Interface to be implemented by classes that want more control + * over how they are added to a structured log. + */ +public interface Sloggable { + SloggableAccumulator log(SloggableAccumulator accumulator); +} diff --git a/bookkeeper-slogger/api/src/main/java/org/apache/bookkeeper/slogger/SloggableAccumulator.java b/bookkeeper-slogger/api/src/main/java/org/apache/bookkeeper/slogger/SloggableAccumulator.java new file mode 100644 index 00000000000..10f484b0b07 --- /dev/null +++ b/bookkeeper-slogger/api/src/main/java/org/apache/bookkeeper/slogger/SloggableAccumulator.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.slogger; + +/** + * Interface passed to Sloggable instances, with which they + * can add their own key/value pairs to the logged event. + */ +public interface SloggableAccumulator { + SloggableAccumulator kv(Object key, Object value); +} diff --git a/bookkeeper-slogger/api/src/main/java/org/apache/bookkeeper/slogger/Slogger.java b/bookkeeper-slogger/api/src/main/java/org/apache/bookkeeper/slogger/Slogger.java new file mode 100644 index 00000000000..f91b2b8f449 --- /dev/null +++ b/bookkeeper-slogger/api/src/main/java/org/apache/bookkeeper/slogger/Slogger.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.slogger; + +/** + * Event logging interface will support for key value pairs and reusable context. + */ +public interface Slogger { + Slogger kv(Object key, Object value); + + Slogger ctx(); + Slogger ctx(Class clazz); // <- should this be class or Logger? Logger requires some generics + + void info(String message); + void info(String message, Throwable cause); + void info(Enum event); + void info(Enum event, Throwable cause); + + void warn(String message); + void warn(String message, Throwable cause); + void warn(Enum event); + void warn(Enum event, Throwable cause); + + void error(String message); + void error(String message, Throwable cause); + void error(Enum event); + void error(Enum event, Throwable cause); + + Slogger NULL = new NullSlogger(); + Slogger CONSOLE = new ConsoleSlogger(); +} diff --git a/bookkeeper-slogger/api/src/main/java/org/apache/bookkeeper/slogger/package-info.java b/bookkeeper-slogger/api/src/main/java/org/apache/bookkeeper/slogger/package-info.java new file mode 100644 index 00000000000..f132eafa8d2 --- /dev/null +++ b/bookkeeper-slogger/api/src/main/java/org/apache/bookkeeper/slogger/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * Structured logging. + */ +package org.apache.bookkeeper.slogger; diff --git a/bookkeeper-slogger/api/src/test/java/org/apache/bookkeeper/slogger/ConcurrencyTest.java b/bookkeeper-slogger/api/src/test/java/org/apache/bookkeeper/slogger/ConcurrencyTest.java new file mode 100644 index 00000000000..07c28c37bfa --- /dev/null +++ b/bookkeeper-slogger/api/src/test/java/org/apache/bookkeeper/slogger/ConcurrencyTest.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.slogger; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import org.junit.Test; + +/** + * Test concurrent access to slogger. + */ +public class ConcurrencyTest { + enum Events { + FOOBAR + } + + @Test + public void testConcurrentFlattening() throws Exception { + final int numThreads = 100; + final int numIterations = 10000; + + Slogger slog = new AbstractSlogger(Collections.emptyList()) { + @Override + public Slogger newSlogger(Optional> clazz, Iterable parent) { + return this; + } + @Override + public void doLog(Level level, Enum event, String message, + Throwable throwable, List keyValues) { + for (int i = 0; i < keyValues.size(); i += 2) { + if (!keyValues.get(i).equals(keyValues.get(i + 1))) { + + throw new RuntimeException("Concurrency error"); + } + } + } + }; + + ExecutorService executor = Executors.newFixedThreadPool(numThreads); + List> futures = new ArrayList<>(); + for (int i = 0; i < numThreads; i++) { + futures.add(executor.submit(() -> { + for (int j = 0; j < numIterations; j++) { + String value = "kv" + Thread.currentThread().getId() + "-" + j; + + slog.kv(value, value).info(Events.FOOBAR); + } + })); + } + + for (Future f : futures) { + f.get(60, TimeUnit.SECONDS); + } + } +} diff --git a/bookkeeper-slogger/api/src/test/java/org/apache/bookkeeper/slogger/ConsoleSloggerTest.java b/bookkeeper-slogger/api/src/test/java/org/apache/bookkeeper/slogger/ConsoleSloggerTest.java new file mode 100644 index 00000000000..0ca3612346c --- /dev/null +++ b/bookkeeper-slogger/api/src/test/java/org/apache/bookkeeper/slogger/ConsoleSloggerTest.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.slogger; + +import org.junit.Test; + +/** + * Test console slogger. + * Doesn't actually assert anything, but can be used to eyeball + */ +public class ConsoleSloggerTest { + enum Events { + FOOBAR, + BARFOO + }; + + @Test + public void testBasic() throws Exception { + ConsoleSlogger root = new ConsoleSlogger(); + root.kv("fo\"o", "ba\r \\").info(Events.FOOBAR); + } +} + diff --git a/bookkeeper-slogger/api/src/test/java/org/apache/bookkeeper/slogger/MockSlogger.java b/bookkeeper-slogger/api/src/test/java/org/apache/bookkeeper/slogger/MockSlogger.java new file mode 100644 index 00000000000..f71465f88e1 --- /dev/null +++ b/bookkeeper-slogger/api/src/test/java/org/apache/bookkeeper/slogger/MockSlogger.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.slogger; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +/** + * Mock Slogger. + */ +public class MockSlogger extends AbstractSlogger { + List events = new ArrayList<>(); + + public MockSlogger() { + super(new ArrayList<>()); + } + + private MockSlogger(Iterable parentCtx) { + super(parentCtx); + } + + @Override + protected Slogger newSlogger(Optional> clazz, Iterable parentCtx) { + return new MockSlogger(parentCtx); + } + + @Override + protected void doLog(Level level, Enum event, String message, Throwable throwable, + List keyValues) { + Map tmpKvs = new HashMap<>(); + for (int i = 0; i < keyValues.size(); i += 2) { + tmpKvs.put(keyValues.get(i).toString(), keyValues.get(i + 1)); + } + events.add(new MockEvent(level, event, message, tmpKvs, throwable)); + } + + static class MockEvent { + private final Level level; + private final Enum event; + private final String message; + private final Map kvs; + private final Throwable throwable; + + MockEvent(Level level, Enum event, String message, + Map kvs, Throwable throwable) { + this.level = level; + this.event = event; + this.message = message; + this.kvs = kvs; + this.throwable = throwable; + } + + Level getLevel() { + return level; + } + Enum getEvent() { + return event; + } + String getMessage() { + return message; + } + Map getKeyValues() { + return kvs; + } + Throwable getThrowable() { + return throwable; + } + } +} diff --git a/bookkeeper-slogger/api/src/test/java/org/apache/bookkeeper/slogger/SloggerTest.java b/bookkeeper-slogger/api/src/test/java/org/apache/bookkeeper/slogger/SloggerTest.java new file mode 100644 index 00000000000..32bf663d7e2 --- /dev/null +++ b/bookkeeper-slogger/api/src/test/java/org/apache/bookkeeper/slogger/SloggerTest.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.slogger; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.allOf; +import static org.hamcrest.Matchers.hasEntry; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import org.junit.Test; + +/** + * Test Slogger. + */ +public class SloggerTest { + enum Events { + FOOBAR, + BARFOO + }; + + @Test + public void testBasic() throws Exception { + MockSlogger root = new MockSlogger(); + root.kv("foo", 2324).kv("bar", 2342).info(Events.FOOBAR); + assertThat(root.events, hasSize(1)); + assertThat(root.events.get(0).getLevel(), is(MockSlogger.Level.INFO)); + assertThat(root.events.get(0).getEvent(), is(Events.FOOBAR)); + assertThat(root.events.get(0).getKeyValues(), + allOf(hasEntry("foo", "2324"), + hasEntry("bar", "2342"))); + } + + @Test + public void testSloggable() throws Exception { + MockSlogger root = new MockSlogger(); + root.kv("fancy", new FancyClass(0, 2)).info(Events.FOOBAR); + assertThat(root.events, hasSize(1)); + assertThat(root.events.get(0).getLevel(), is(MockSlogger.Level.INFO)); + assertThat(root.events.get(0).getEvent(), is(Events.FOOBAR)); + assertThat(root.events.get(0).getKeyValues(), + allOf(hasEntry("fancy.foo", "0"), + hasEntry("fancy.bar", "2"), + hasEntry("fancy.baz.baz", "123"))); + } + + @Test + public void testList() throws Exception { + MockSlogger root = new MockSlogger(); + List list = new ArrayList<>(); + list.add(1); + list.add(2); + root.kv("list", list).info(Events.FOOBAR); + + assertThat(root.events, hasSize(1)); + assertThat(root.events.get(0).getLevel(), is(MockSlogger.Level.INFO)); + assertThat(root.events.get(0).getEvent(), is(Events.FOOBAR)); + assertThat(root.events.get(0).getKeyValues(), hasEntry("list", "[1, 2]")); + } + + @Test + public void testMap() throws Exception { + MockSlogger root = new MockSlogger(); + HashMap map = new LinkedHashMap<>(); + map.put(1, 3); + map.put(2, 4); + root.kv("map", map).info(Events.FOOBAR); + + assertThat(root.events, hasSize(1)); + assertThat(root.events.get(0).getLevel(), is(MockSlogger.Level.INFO)); + assertThat(root.events.get(0).getEvent(), is(Events.FOOBAR)); + assertThat(root.events.get(0).getKeyValues(), hasEntry("map", "{1=3, 2=4}")); + } + + @Test + public void testArray() throws Exception { + MockSlogger root = new MockSlogger(); + String[] array = {"foo", "bar"}; + root.kv("array", array).info(Events.FOOBAR); + + assertThat(root.events, hasSize(1)); + assertThat(root.events.get(0).getLevel(), is(MockSlogger.Level.INFO)); + assertThat(root.events.get(0).getEvent(), is(Events.FOOBAR)); + assertThat(root.events.get(0).getKeyValues(), hasEntry("array", "[foo, bar]")); + } + + @Test + public void testNestingLimit() throws Exception { + } + + @Test + public void testCtx() throws Exception { + MockSlogger root = new MockSlogger(); + MockSlogger withCtx = (MockSlogger) root.kv("ctx1", 1234).kv("ctx2", 4321).ctx(); + + withCtx.kv("someMore", 2345).info(Events.FOOBAR); + + assertThat(withCtx.events, hasSize(1)); + assertThat(withCtx.events.get(0).getLevel(), is(MockSlogger.Level.INFO)); + assertThat(withCtx.events.get(0).getEvent(), is(Events.FOOBAR)); + System.out.println("kvs " + withCtx.events.get(0).getKeyValues()); + assertThat(withCtx.events.get(0).getKeyValues(), + allOf(hasEntry("ctx1", "1234"), + hasEntry("ctx2", "4321"), + hasEntry("someMore", "2345"))); + } + + @Test + public void textCtxImmutableAfterCreation() throws Exception { + } + + static class FancyClass implements Sloggable { + int foo; + int bar; + OtherFancyClass baz; + + FancyClass(int foo, int bar) { + this.foo = foo; + this.bar = bar; + this.baz = new OtherFancyClass(123); + } + + @Override + public SloggableAccumulator log(SloggableAccumulator slogger) { + return slogger.kv("foo", foo) + .kv("bar", bar) + .kv("baz", baz); + } + } + + static class OtherFancyClass implements Sloggable { + int baz; + + OtherFancyClass(int baz) { + this.baz = baz; + } + + @Override + public SloggableAccumulator log(SloggableAccumulator slogger) { + return slogger.kv("baz", baz); + } + } +} diff --git a/bookkeeper-slogger/pom.xml b/bookkeeper-slogger/pom.xml new file mode 100644 index 00000000000..a80ef4b8346 --- /dev/null +++ b/bookkeeper-slogger/pom.xml @@ -0,0 +1,50 @@ + + + + 4.0.0 + + org.apache.bookkeeper + bookkeeper + 4.18.0-SNAPSHOT + .. + + pom + org.apache.bookkeeper + bookkeeper-slogger-parent + Apache BookKeeper :: Structured Logger :: Parent + + + api + slf4j + + + + + + com.github.spotbugs + spotbugs-maven-plugin + + true + + + + org.apache.maven.plugins + maven-surefire-plugin + + + + + diff --git a/bookkeeper-slogger/slf4j/pom.xml b/bookkeeper-slogger/slf4j/pom.xml new file mode 100644 index 00000000000..e45b5848bfe --- /dev/null +++ b/bookkeeper-slogger/slf4j/pom.xml @@ -0,0 +1,34 @@ + + + + 4.0.0 + + bookkeeper-slogger-parent + org.apache.bookkeeper + 4.18.0-SNAPSHOT + .. + + org.apache.bookkeeper + bookkeeper-slogger-slf4j + Apache BookKeeper :: Structured Logger :: SLF4J Implementation + + + org.apache.bookkeeper + bookkeeper-slogger-api + ${project.parent.version} + + + diff --git a/bookkeeper-slogger/slf4j/src/main/java/org/apache/bookkeeper/slogger/slf4j/Slf4jSlogger.java b/bookkeeper-slogger/slf4j/src/main/java/org/apache/bookkeeper/slogger/slf4j/Slf4jSlogger.java new file mode 100644 index 00000000000..81400227ab3 --- /dev/null +++ b/bookkeeper-slogger/slf4j/src/main/java/org/apache/bookkeeper/slogger/slf4j/Slf4jSlogger.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.slogger.slf4j; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import org.apache.bookkeeper.slogger.AbstractSlogger; +import org.apache.bookkeeper.slogger.Slogger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.slf4j.MDC; + +/** + * Slf4j implementation of slogger. + */ +public class Slf4jSlogger extends AbstractSlogger { + private ThreadLocal> mdcKeysTls = new ThreadLocal>() { + @Override + protected List initialValue() { + return new ArrayList<>(); + } + }; + + private final Logger log; + + public Slf4jSlogger(Class clazz) { + this(clazz, Collections.emptyList()); + } + + Slf4jSlogger() { + this(Slf4jSlogger.class); + } + + Slf4jSlogger(Class clazz, Iterable parent) { + super(parent); + this.log = LoggerFactory.getLogger(clazz); + } + + @Override + protected Slogger newSlogger(Optional> clazz, Iterable parent) { + return new Slf4jSlogger(clazz.orElse(Slf4jSlogger.class), parent); + } + + @Override + protected void doLog(Level level, Enum event, String message, + Throwable throwable, List keyValues) { + List mdcKeys = mdcKeysTls.get(); + mdcKeys.clear(); + try { + if (event != null) { + MDC.put("event", event.toString()); + mdcKeys.add("event"); + } + + for (int i = 0; i < keyValues.size(); i += 2) { + MDC.put(keyValues.get(i).toString(), keyValues.get(i + 1).toString()); + mdcKeys.add(keyValues.get(i).toString()); + } + + String msg = message == null ? event.toString() : message; + switch (level) { + case INFO: + log.info(msg); + break; + case WARN: + if (throwable != null) { + log.warn(msg, throwable); + } else { + log.warn(msg); + } + break; + default: + case ERROR: + if (throwable != null) { + log.error(msg, throwable); + } else { + log.error(msg); + } + break; + } + } finally { + for (String key : mdcKeys) { + MDC.remove(key); + } + mdcKeys.clear(); + } + } +} diff --git a/bookkeeper-slogger/slf4j/src/main/java/org/apache/bookkeeper/slogger/slf4j/package-info.java b/bookkeeper-slogger/slf4j/src/main/java/org/apache/bookkeeper/slogger/slf4j/package-info.java new file mode 100644 index 00000000000..69ff1ed3eb9 --- /dev/null +++ b/bookkeeper-slogger/slf4j/src/main/java/org/apache/bookkeeper/slogger/slf4j/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * Structured logging (slf4j implementation). + */ +package org.apache.bookkeeper.slogger.slf4j; diff --git a/bookkeeper-slogger/slf4j/src/test/java/org/apache/bookkeeper/slogger/slf4j/Slf4jTest.java b/bookkeeper-slogger/slf4j/src/test/java/org/apache/bookkeeper/slogger/slf4j/Slf4jTest.java new file mode 100644 index 00000000000..848a05cfacf --- /dev/null +++ b/bookkeeper-slogger/slf4j/src/test/java/org/apache/bookkeeper/slogger/slf4j/Slf4jTest.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.slogger.slf4j; + +import org.apache.bookkeeper.slogger.Slogger; +import org.junit.Test; + +/** + * Test to eyeball slf4j output. + * Contains no asserts. + */ +public class Slf4jTest { + enum Events { + FOOBAR + } + @Test + public void testBasic() throws Exception { + Slogger slogger = new Slf4jSlogger(Slf4jTest.class); + slogger.kv("foo", 123).kv("bar", 432).info(Events.FOOBAR); + } +} diff --git a/bookkeeper-stats-providers/codahale-metrics-provider/pom.xml b/bookkeeper-stats-providers/codahale-metrics-provider/pom.xml deleted file mode 100644 index cab53fefad0..00000000000 --- a/bookkeeper-stats-providers/codahale-metrics-provider/pom.xml +++ /dev/null @@ -1,53 +0,0 @@ - - - - 4.0.0 - - bookkeeper - org.apache.bookkeeper - 4.9.0-SNAPSHOT - ../.. - - org.apache.bookkeeper.stats - codahale-metrics-provider - Apache BookKeeper :: Stats Providers :: Codahale Metrics - http://maven.apache.org - - - org.apache.bookkeeper.stats - bookkeeper-stats-api - ${project.parent.version} - - - io.dropwizard.metrics - metrics-core - - - io.dropwizard.metrics - metrics-jvm - - - io.dropwizard.metrics - metrics-graphite - - - com.google.guava - guava - - - diff --git a/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/CodahaleMetricsProvider.java b/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/CodahaleMetricsProvider.java deleted file mode 100644 index 01658c7652e..00000000000 --- a/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/CodahaleMetricsProvider.java +++ /dev/null @@ -1,143 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.bookkeeper.stats; - -import com.codahale.metrics.CsvReporter; -import com.codahale.metrics.JmxReporter; -import com.codahale.metrics.MetricFilter; -import com.codahale.metrics.MetricRegistry; -import com.codahale.metrics.ScheduledReporter; -import com.codahale.metrics.Slf4jReporter; -import com.codahale.metrics.graphite.Graphite; -import com.codahale.metrics.graphite.GraphiteReporter; -import com.codahale.metrics.jvm.GarbageCollectorMetricSet; -import com.codahale.metrics.jvm.MemoryUsageGaugeSet; -import com.google.common.base.Strings; -import com.google.common.net.HostAndPort; -import java.io.File; -import java.net.InetSocketAddress; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.TimeUnit; -import org.apache.commons.configuration.Configuration; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A {@link StatsProvider} implemented based on Codahale metrics library. - */ -@Deprecated -@SuppressWarnings("deprecation") -public class CodahaleMetricsProvider implements StatsProvider { - - static final Logger LOG = LoggerFactory.getLogger(CodahaleMetricsProvider.class); - - MetricRegistry metrics = null; - List reporters = new ArrayList(); - JmxReporter jmx = null; - - synchronized void initIfNecessary() { - if (metrics == null) { - metrics = new MetricRegistry(); - metrics.registerAll(new MemoryUsageGaugeSet()); - metrics.registerAll(new GarbageCollectorMetricSet()); - } - } - - public synchronized MetricRegistry getMetrics() { - return metrics; - } - - @Override - public void start(Configuration conf) { - initIfNecessary(); - - int metricsOutputFrequency = conf.getInt("codahaleStatsOutputFrequencySeconds", 60); - String prefix = conf.getString("codahaleStatsPrefix", ""); - String graphiteHost = conf.getString("codahaleStatsGraphiteEndpoint"); - String csvDir = conf.getString("codahaleStatsCSVEndpoint"); - String slf4jCat = conf.getString("codahaleStatsSlf4jEndpoint"); - String jmxDomain = conf.getString("codahaleStatsJmxEndpoint"); - - if (!Strings.isNullOrEmpty(graphiteHost)) { - LOG.info("Configuring stats with graphite"); - HostAndPort addr = HostAndPort.fromString(graphiteHost); - final Graphite graphite = new Graphite( - new InetSocketAddress(addr.getHostText(), addr.getPort())); - reporters.add(GraphiteReporter.forRegistry(getMetrics()) - .prefixedWith(prefix) - .convertRatesTo(TimeUnit.SECONDS) - .convertDurationsTo(TimeUnit.MILLISECONDS) - .filter(MetricFilter.ALL) - .build(graphite)); - } - if (!Strings.isNullOrEmpty(csvDir)) { - // NOTE: 1/ metrics output files are exclusive to a given process - // 2/ the output directory must exist - // 3/ if output files already exist they are not overwritten and there is no metrics output - File outdir; - if (!Strings.isNullOrEmpty(prefix)) { - outdir = new File(csvDir, prefix); - } else { - outdir = new File(csvDir); - } - LOG.info("Configuring stats with csv output to directory [{}]", outdir.getAbsolutePath()); - reporters.add(CsvReporter.forRegistry(getMetrics()) - .convertRatesTo(TimeUnit.SECONDS) - .convertDurationsTo(TimeUnit.MILLISECONDS) - .build(outdir)); - } - if (!Strings.isNullOrEmpty(slf4jCat)) { - LOG.info("Configuring stats with slf4j"); - reporters.add(Slf4jReporter.forRegistry(getMetrics()) - .outputTo(LoggerFactory.getLogger(slf4jCat)) - .convertRatesTo(TimeUnit.SECONDS) - .convertDurationsTo(TimeUnit.MILLISECONDS) - .build()); - } - if (!Strings.isNullOrEmpty(jmxDomain)) { - LOG.info("Configuring stats with jmx"); - jmx = JmxReporter.forRegistry(getMetrics()) - .inDomain(jmxDomain) - .convertRatesTo(TimeUnit.SECONDS) - .convertDurationsTo(TimeUnit.MILLISECONDS) - .build(); - jmx.start(); - } - - for (ScheduledReporter r : reporters) { - r.start(metricsOutputFrequency, TimeUnit.SECONDS); - } - } - - @Override - public void stop() { - for (ScheduledReporter r : reporters) { - r.report(); - r.stop(); - } - if (jmx != null) { - jmx.stop(); - } - } - - @Override - public StatsLogger getStatsLogger(String name) { - initIfNecessary(); - return new CodahaleStatsLogger(getMetrics(), name); - } -} diff --git a/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/CodahaleOpStatsLogger.java b/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/CodahaleOpStatsLogger.java deleted file mode 100644 index 690f2b51dce..00000000000 --- a/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/CodahaleOpStatsLogger.java +++ /dev/null @@ -1,74 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.bookkeeper.stats; - -import com.codahale.metrics.Snapshot; -import com.codahale.metrics.Timer; -import java.util.Arrays; -import java.util.concurrent.TimeUnit; - -@Deprecated -class CodahaleOpStatsLogger implements OpStatsLogger { - final Timer success; - final Timer fail; - - CodahaleOpStatsLogger(Timer success, Timer fail) { - this.success = success; - this.fail = fail; - } - - // OpStatsLogger functions - public void registerFailedEvent(long eventLatency, TimeUnit unit) { - fail.update(eventLatency, unit); - } - - public void registerSuccessfulEvent(long eventLatency, TimeUnit unit) { - success.update(eventLatency, unit); - } - - public void registerSuccessfulValue(long value) { - // Values are inserted as millis, which is the unit they will be presented, to maintain 1:1 scale - success.update(value, TimeUnit.MILLISECONDS); - } - - public void registerFailedValue(long value) { - // Values are inserted as millis, which is the unit they will be presented, to maintain 1:1 scale - fail.update(value, TimeUnit.MILLISECONDS); - } - - public synchronized void clear() { - // can't clear a timer - } - - /** - * This function should go away soon (hopefully). - */ - public synchronized OpStatsData toOpStatsData() { - long numFailed = fail.getCount(); - long numSuccess = success.getCount(); - Snapshot s = success.getSnapshot(); - double avgLatencyMillis = s.getMean(); - - double[] defaultPercentiles = {10, 50, 90, 99, 99.9, 99.99}; - long[] latenciesMillis = new long[defaultPercentiles.length]; - Arrays.fill(latenciesMillis, Long.MAX_VALUE); - for (int i = 0; i < defaultPercentiles.length; i++) { - latenciesMillis[i] = (long) s.getValue(defaultPercentiles[i] / 100); - } - return new OpStatsData(numSuccess, numFailed, avgLatencyMillis, latenciesMillis); - } -} diff --git a/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/CodahaleStatsLogger.java b/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/CodahaleStatsLogger.java deleted file mode 100644 index 7a75258ead3..00000000000 --- a/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/CodahaleStatsLogger.java +++ /dev/null @@ -1,109 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.bookkeeper.stats; - -import static com.codahale.metrics.MetricRegistry.name; - -import com.codahale.metrics.MetricRegistry; -import com.codahale.metrics.Timer; - -/** - * A {@link StatsLogger} implemented based on Codahale metrics library. - */ -@Deprecated -public class CodahaleStatsLogger implements StatsLogger { - protected final String basename; - final MetricRegistry metrics; - - CodahaleStatsLogger(MetricRegistry metrics, String basename) { - this.metrics = metrics; - this.basename = basename; - } - - @Override - public OpStatsLogger getOpStatsLogger(String statName) { - Timer success = metrics.timer(name(basename, statName)); - Timer failure = metrics.timer(name(basename, statName + "-fail")); - return new CodahaleOpStatsLogger(success, failure); - } - - @Override - public Counter getCounter(String statName) { - final com.codahale.metrics.Counter c = metrics.counter(name(basename, statName)); - return new Counter() { - @Override - public synchronized void clear() { - long cur = c.getCount(); - c.dec(cur); - } - - @Override - public Long get() { - return c.getCount(); - } - - @Override - public void inc() { - c.inc(); - } - - @Override - public void dec() { - c.dec(); - } - - @Override - public void add(long delta) { - c.inc(delta); - } - }; - } - - @Override - public void registerGauge(final String statName, final Gauge gauge) { - String metricName = name(basename, statName); - metrics.remove(metricName); - - metrics.register(metricName, new com.codahale.metrics.Gauge() { - @Override - public T getValue() { - return gauge.getSample(); - } - }); - } - - @Override - public void unregisterGauge(String statName, Gauge gauge) { - // do nothing right now as the Codahale doesn't support conditional removal - } - - @Override - public StatsLogger scope(String scope) { - String scopeName; - if (0 == basename.length()) { - scopeName = scope; - } else { - scopeName = name(basename, scope); - } - return new CodahaleStatsLogger(metrics, scopeName); - } - - @Override - public void removeScope(String name, StatsLogger statsLogger) { - // no-op. the codahale stats logger doesn't have the references for stats logger. - } -} diff --git a/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/codahale/CodahaleMetricsProvider.java b/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/codahale/CodahaleMetricsProvider.java deleted file mode 100644 index f4ca952f259..00000000000 --- a/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/codahale/CodahaleMetricsProvider.java +++ /dev/null @@ -1,144 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.bookkeeper.stats.codahale; - -import com.codahale.metrics.CsvReporter; -import com.codahale.metrics.JmxReporter; -import com.codahale.metrics.MetricFilter; -import com.codahale.metrics.MetricRegistry; -import com.codahale.metrics.ScheduledReporter; -import com.codahale.metrics.Slf4jReporter; -import com.codahale.metrics.graphite.Graphite; -import com.codahale.metrics.graphite.GraphiteReporter; -import com.codahale.metrics.jvm.GarbageCollectorMetricSet; -import com.codahale.metrics.jvm.MemoryUsageGaugeSet; -import com.google.common.base.Strings; -import com.google.common.net.HostAndPort; -import java.io.File; -import java.net.InetSocketAddress; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.TimeUnit; -import org.apache.bookkeeper.stats.StatsLogger; -import org.apache.bookkeeper.stats.StatsProvider; -import org.apache.commons.configuration.Configuration; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A {@link StatsProvider} implemented based on Codahale metrics library. - */ -@SuppressWarnings("deprecation") -public class CodahaleMetricsProvider implements StatsProvider { - - static final Logger LOG = LoggerFactory.getLogger(CodahaleMetricsProvider.class); - - MetricRegistry metrics = null; - List reporters = new ArrayList(); - JmxReporter jmx = null; - - synchronized void initIfNecessary() { - if (metrics == null) { - metrics = new MetricRegistry(); - metrics.registerAll(new MemoryUsageGaugeSet()); - metrics.registerAll(new GarbageCollectorMetricSet()); - } - } - - public synchronized MetricRegistry getMetrics() { - return metrics; - } - - @Override - public void start(Configuration conf) { - initIfNecessary(); - - int metricsOutputFrequency = conf.getInt("codahaleStatsOutputFrequencySeconds", 60); - String prefix = conf.getString("codahaleStatsPrefix", ""); - String graphiteHost = conf.getString("codahaleStatsGraphiteEndpoint"); - String csvDir = conf.getString("codahaleStatsCSVEndpoint"); - String slf4jCat = conf.getString("codahaleStatsSlf4jEndpoint"); - String jmxDomain = conf.getString("codahaleStatsJmxEndpoint"); - - if (!Strings.isNullOrEmpty(graphiteHost)) { - LOG.info("Configuring stats with graphite"); - HostAndPort addr = HostAndPort.fromString(graphiteHost); - final Graphite graphite = new Graphite( - new InetSocketAddress(addr.getHostText(), addr.getPort())); - reporters.add(GraphiteReporter.forRegistry(getMetrics()) - .prefixedWith(prefix) - .convertRatesTo(TimeUnit.SECONDS) - .convertDurationsTo(TimeUnit.MILLISECONDS) - .filter(MetricFilter.ALL) - .build(graphite)); - } - if (!Strings.isNullOrEmpty(csvDir)) { - // NOTE: 1/ metrics output files are exclusive to a given process - // 2/ the output directory must exist - // 3/ if output files already exist they are not overwritten and there is no metrics output - File outdir; - if (!Strings.isNullOrEmpty(prefix)) { - outdir = new File(csvDir, prefix); - } else { - outdir = new File(csvDir); - } - LOG.info("Configuring stats with csv output to directory [{}]", outdir.getAbsolutePath()); - reporters.add(CsvReporter.forRegistry(getMetrics()) - .convertRatesTo(TimeUnit.SECONDS) - .convertDurationsTo(TimeUnit.MILLISECONDS) - .build(outdir)); - } - if (!Strings.isNullOrEmpty(slf4jCat)) { - LOG.info("Configuring stats with slf4j"); - reporters.add(Slf4jReporter.forRegistry(getMetrics()) - .outputTo(LoggerFactory.getLogger(slf4jCat)) - .convertRatesTo(TimeUnit.SECONDS) - .convertDurationsTo(TimeUnit.MILLISECONDS) - .build()); - } - if (!Strings.isNullOrEmpty(jmxDomain)) { - LOG.info("Configuring stats with jmx"); - jmx = JmxReporter.forRegistry(getMetrics()) - .inDomain(jmxDomain) - .convertRatesTo(TimeUnit.SECONDS) - .convertDurationsTo(TimeUnit.MILLISECONDS) - .build(); - jmx.start(); - } - - for (ScheduledReporter r : reporters) { - r.start(metricsOutputFrequency, TimeUnit.SECONDS); - } - } - - @Override - public void stop() { - for (ScheduledReporter r : reporters) { - r.report(); - r.stop(); - } - if (jmx != null) { - jmx.stop(); - } - } - - @Override - public StatsLogger getStatsLogger(String name) { - initIfNecessary(); - return new CodahaleStatsLogger(getMetrics(), name); - } -} diff --git a/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/codahale/CodahaleOpStatsLogger.java b/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/codahale/CodahaleOpStatsLogger.java deleted file mode 100644 index ae97c7858aa..00000000000 --- a/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/codahale/CodahaleOpStatsLogger.java +++ /dev/null @@ -1,75 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.bookkeeper.stats.codahale; - -import com.codahale.metrics.Snapshot; -import com.codahale.metrics.Timer; -import java.util.Arrays; -import java.util.concurrent.TimeUnit; -import org.apache.bookkeeper.stats.OpStatsData; -import org.apache.bookkeeper.stats.OpStatsLogger; - -class CodahaleOpStatsLogger implements OpStatsLogger { - final Timer success; - final Timer fail; - - CodahaleOpStatsLogger(Timer success, Timer fail) { - this.success = success; - this.fail = fail; - } - - // OpStatsLogger functions - public void registerFailedEvent(long eventLatency, TimeUnit unit) { - fail.update(eventLatency, unit); - } - - public void registerSuccessfulEvent(long eventLatency, TimeUnit unit) { - success.update(eventLatency, unit); - } - - public void registerSuccessfulValue(long value) { - // Values are inserted as millis, which is the unit they will be presented, to maintain 1:1 scale - success.update(value, TimeUnit.MILLISECONDS); - } - - public void registerFailedValue(long value) { - // Values are inserted as millis, which is the unit they will be presented, to maintain 1:1 scale - fail.update(value, TimeUnit.MILLISECONDS); - } - - public synchronized void clear() { - // can't clear a timer - } - - /** - * This function should go away soon (hopefully). - */ - public synchronized OpStatsData toOpStatsData() { - long numFailed = fail.getCount(); - long numSuccess = success.getCount(); - Snapshot s = success.getSnapshot(); - double avgLatencyMillis = s.getMean(); - - double[] defaultPercentiles = {10, 50, 90, 99, 99.9, 99.99}; - long[] latenciesMillis = new long[defaultPercentiles.length]; - Arrays.fill(latenciesMillis, Long.MAX_VALUE); - for (int i = 0; i < defaultPercentiles.length; i++) { - latenciesMillis[i] = (long) s.getValue(defaultPercentiles[i] / 100); - } - return new OpStatsData(numSuccess, numFailed, avgLatencyMillis, latenciesMillis); - } -} diff --git a/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/codahale/CodahaleStatsLogger.java b/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/codahale/CodahaleStatsLogger.java deleted file mode 100644 index dba2e121f52..00000000000 --- a/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/codahale/CodahaleStatsLogger.java +++ /dev/null @@ -1,112 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.bookkeeper.stats.codahale; - -import static com.codahale.metrics.MetricRegistry.name; - -import com.codahale.metrics.MetricRegistry; -import com.codahale.metrics.Timer; -import org.apache.bookkeeper.stats.Counter; -import org.apache.bookkeeper.stats.Gauge; -import org.apache.bookkeeper.stats.OpStatsLogger; -import org.apache.bookkeeper.stats.StatsLogger; - -/** - * A {@link StatsLogger} implemented based on Codahale metrics library. - */ -public class CodahaleStatsLogger implements StatsLogger { - protected final String basename; - final MetricRegistry metrics; - - CodahaleStatsLogger(MetricRegistry metrics, String basename) { - this.metrics = metrics; - this.basename = basename; - } - - @Override - public OpStatsLogger getOpStatsLogger(String statName) { - Timer success = metrics.timer(name(basename, statName)); - Timer failure = metrics.timer(name(basename, statName + "-fail")); - return new CodahaleOpStatsLogger(success, failure); - } - - @Override - public Counter getCounter(String statName) { - final com.codahale.metrics.Counter c = metrics.counter(name(basename, statName)); - return new Counter() { - @Override - public synchronized void clear() { - long cur = c.getCount(); - c.dec(cur); - } - - @Override - public Long get() { - return c.getCount(); - } - - @Override - public void inc() { - c.inc(); - } - - @Override - public void dec() { - c.dec(); - } - - @Override - public void add(long delta) { - c.inc(delta); - } - }; - } - - @Override - public void registerGauge(final String statName, final Gauge gauge) { - String metricName = name(basename, statName); - metrics.remove(metricName); - - metrics.register(metricName, new com.codahale.metrics.Gauge() { - @Override - public T getValue() { - return gauge.getSample(); - } - }); - } - - @Override - public void unregisterGauge(String statName, Gauge gauge) { - // do nothing right now as the Codahale doesn't support conditional removal - } - - @Override - public StatsLogger scope(String scope) { - String scopeName; - if (basename == null || 0 == basename.length()) { - scopeName = scope; - } else { - scopeName = name(basename, scope); - } - return new CodahaleStatsLogger(metrics, scopeName); - } - - @Override - public void removeScope(String name, StatsLogger statsLogger) { - // no-op. the codahale stats logger doesn't have the references for stats logger. - } -} diff --git a/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/codahale/package-info.java b/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/codahale/package-info.java deleted file mode 100644 index 1afa43899af..00000000000 --- a/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/codahale/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -/** - * A lightweight stats library implemention based on Codahale metrics library. - */ -package org.apache.bookkeeper.stats.codahale; diff --git a/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/package-info.java b/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/package-info.java deleted file mode 100644 index 7276104dca7..00000000000 --- a/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -/** - * A lightweight stats library implemention based on Codahale metrics library. - */ -package org.apache.bookkeeper.stats; diff --git a/bookkeeper-stats-providers/pom.xml b/bookkeeper-stats-providers/pom.xml deleted file mode 100644 index e64f32cfb87..00000000000 --- a/bookkeeper-stats-providers/pom.xml +++ /dev/null @@ -1,35 +0,0 @@ - - - - - bookkeeper - org.apache.bookkeeper - 4.9.0-SNAPSHOT - - 4.0.0 - bookkeeper-stats-providers - pom - Apache BookKeeper :: Stats Providers - - twitter-finagle-provider - twitter-science-provider - twitter-ostrich-provider - codahale-metrics-provider - prometheus-metrics-provider - - diff --git a/bookkeeper-stats-providers/prometheus-metrics-provider/pom.xml b/bookkeeper-stats-providers/prometheus-metrics-provider/pom.xml deleted file mode 100644 index 92b35c61ba7..00000000000 --- a/bookkeeper-stats-providers/prometheus-metrics-provider/pom.xml +++ /dev/null @@ -1,72 +0,0 @@ - - - - 4.0.0 - - bookkeeper - org.apache.bookkeeper - 4.9.0-SNAPSHOT - ../.. - - org.apache.bookkeeper.stats - prometheus-metrics-provider - Apache BookKeeper :: Stats Providers :: Prometheus - - - org.apache.bookkeeper.stats - bookkeeper-stats-api - ${project.parent.version} - - - - io.prometheus - simpleclient - - - - io.prometheus - simpleclient_hotspot - - - - io.prometheus - simpleclient_servlet - - - - io.netty - netty-all - - - - org.eclipse.jetty - jetty-servlet - - - - com.google.guava - guava - - - - com.yahoo.datasketches - sketches-core - - - - diff --git a/bookkeeper-stats-providers/prometheus-metrics-provider/src/main/java/org/apache/bookkeeper/stats/prometheus/LongAdderCounter.java b/bookkeeper-stats-providers/prometheus-metrics-provider/src/main/java/org/apache/bookkeeper/stats/prometheus/LongAdderCounter.java deleted file mode 100644 index 4b677037dda..00000000000 --- a/bookkeeper-stats-providers/prometheus-metrics-provider/src/main/java/org/apache/bookkeeper/stats/prometheus/LongAdderCounter.java +++ /dev/null @@ -1,56 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.bookkeeper.stats.prometheus; - -import java.util.concurrent.atomic.LongAdder; - -import org.apache.bookkeeper.stats.Counter; - -/** - * {@link Counter} implementation based on {@link LongAdder}. - * - *

          LongAdder keeps a counter per-thread and then aggregates to get the result, in order to avoid contention between - * multiple threads. - */ -public class LongAdderCounter implements Counter { - private final LongAdder counter = new LongAdder(); - - @Override - public void clear() { - counter.reset(); - } - - @Override - public void inc() { - counter.increment(); - } - - @Override - public void dec() { - counter.decrement(); - } - - @Override - public void add(long delta) { - counter.add(delta); - } - - @Override - public Long get() { - return counter.sum(); - } -} diff --git a/bookkeeper-stats-providers/prometheus-metrics-provider/src/main/java/org/apache/bookkeeper/stats/prometheus/PrometheusMetricsProvider.java b/bookkeeper-stats-providers/prometheus-metrics-provider/src/main/java/org/apache/bookkeeper/stats/prometheus/PrometheusMetricsProvider.java deleted file mode 100644 index 645a686a4ee..00000000000 --- a/bookkeeper-stats-providers/prometheus-metrics-provider/src/main/java/org/apache/bookkeeper/stats/prometheus/PrometheusMetricsProvider.java +++ /dev/null @@ -1,213 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.bookkeeper.stats.prometheus; - -import com.google.common.annotations.VisibleForTesting; - -import io.netty.util.concurrent.DefaultThreadFactory; -//CHECKSTYLE.OFF: IllegalImport -import io.netty.util.internal.PlatformDependent; -//CHECKSTYLE.ON: IllegalImport - -import io.prometheus.client.CollectorRegistry; -import io.prometheus.client.Gauge; -import io.prometheus.client.Gauge.Child; -import io.prometheus.client.hotspot.GarbageCollectorExports; -import io.prometheus.client.hotspot.MemoryPoolsExports; -import io.prometheus.client.hotspot.StandardExports; -import io.prometheus.client.hotspot.ThreadExports; - -import java.io.IOException; -import java.io.Writer; -import java.lang.reflect.Field; -import java.net.InetSocketAddress; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.ConcurrentSkipListMap; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; - -import org.apache.bookkeeper.stats.CachingStatsProvider; -import org.apache.bookkeeper.stats.StatsLogger; -import org.apache.bookkeeper.stats.StatsProvider; -import org.apache.commons.configuration.Configuration; -import org.eclipse.jetty.server.Server; -import org.eclipse.jetty.servlet.ServletContextHandler; -import org.eclipse.jetty.servlet.ServletHolder; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A Prometheus based {@link StatsProvider} implementation. - */ -public class PrometheusMetricsProvider implements StatsProvider { - - private ScheduledExecutorService executor; - - public static final String PROMETHEUS_STATS_HTTP_ENABLE = "prometheusStatsHttpEnable"; - public static final boolean DEFAULT_PROMETHEUS_STATS_HTTP_ENABLE = true; - - public static final String PROMETHEUS_STATS_HTTP_PORT = "prometheusStatsHttpPort"; - public static final int DEFAULT_PROMETHEUS_STATS_HTTP_PORT = 8000; - - public static final String PROMETHEUS_STATS_LATENCY_ROLLOVER_SECONDS = "prometheusStatsLatencyRolloverSeconds"; - public static final int DEFAULT_PROMETHEUS_STATS_LATENCY_ROLLOVER_SECONDS = 60; - - final CollectorRegistry registry; - - Server server; - private final CachingStatsProvider cachingStatsProvider; - - /* - * These acts a registry of the metrics defined in this provider - */ - final ConcurrentMap counters = new ConcurrentSkipListMap<>(); - final ConcurrentMap> gauges = new ConcurrentSkipListMap<>(); - final ConcurrentMap opStats = new ConcurrentSkipListMap<>(); - - public PrometheusMetricsProvider() { - this(new CollectorRegistry()); - } - - public PrometheusMetricsProvider(CollectorRegistry registry) { - this.registry = registry; - this.cachingStatsProvider = new CachingStatsProvider(new StatsProvider() { - @Override - public void start(Configuration conf) { - // nop - } - - @Override - public void stop() { - // nop - } - - @Override - public StatsLogger getStatsLogger(String scope) { - return new PrometheusStatsLogger(PrometheusMetricsProvider.this, scope); - } - }); - } - - @Override - public void start(Configuration conf) { - boolean httpEnabled = conf.getBoolean(PROMETHEUS_STATS_HTTP_ENABLE, DEFAULT_PROMETHEUS_STATS_HTTP_ENABLE); - boolean bkHttpServerEnabled = conf.getBoolean("httpServerEnabled", false); - // only start its own http server when prometheus http is enabled and bk http server is not enabled. - if (httpEnabled && !bkHttpServerEnabled) { - int httpPort = conf.getInt(PROMETHEUS_STATS_HTTP_PORT, DEFAULT_PROMETHEUS_STATS_HTTP_PORT); - InetSocketAddress httpEndpoint = InetSocketAddress.createUnresolved("0.0.0.0", httpPort); - this.server = new Server(httpEndpoint); - ServletContextHandler context = new ServletContextHandler(); - context.setContextPath("/"); - server.setHandler(context); - - context.addServlet(new ServletHolder(new PrometheusServlet(this)), "/metrics"); - - try { - server.start(); - log.info("Started Prometheus stats endpoint at {}", httpEndpoint); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - // Include standard JVM stats - new StandardExports().register(registry); - new MemoryPoolsExports().register(registry); - new GarbageCollectorExports().register(registry); - new ThreadExports().register(registry); - - // Add direct memory allocated through unsafe - Gauge.build("jvm_memory_direct_bytes_used", "-").create().setChild(new Child() { - @Override - public double get() { - return directMemoryUsage != null ? directMemoryUsage.longValue() : Double.NaN; - } - }).register(registry); - - Gauge.build("jvm_memory_direct_bytes_max", "-").create().setChild(new Child() { - @Override - public double get() { - return PlatformDependent.maxDirectMemory(); - } - }).register(registry); - - executor = Executors.newSingleThreadScheduledExecutor(new DefaultThreadFactory("metrics")); - - int latencyRolloverSeconds = conf.getInt(PROMETHEUS_STATS_LATENCY_ROLLOVER_SECONDS, - DEFAULT_PROMETHEUS_STATS_LATENCY_ROLLOVER_SECONDS); - - executor.scheduleAtFixedRate(() -> { - rotateLatencyCollection(); - }, 1, latencyRolloverSeconds, TimeUnit.SECONDS); - - } - - @Override - public void stop() { - if (server != null) { - try { - server.stop(); - } catch (Exception e) { - log.warn("Failed to shutdown Jetty server", e); - } - } - } - - @Override - public StatsLogger getStatsLogger(String scope) { - return this.cachingStatsProvider.getStatsLogger(scope); - } - - @Override - public void writeAllMetrics(Writer writer) throws IOException { - PrometheusTextFormatUtil.writeMetricsCollectedByPrometheusClient(writer, registry); - - gauges.forEach((name, gauge) -> PrometheusTextFormatUtil.writeGauge(writer, name, gauge)); - counters.forEach((name, counter) -> PrometheusTextFormatUtil.writeCounter(writer, name, counter)); - opStats.forEach((name, opStatLogger) -> PrometheusTextFormatUtil.writeOpStat(writer, name, opStatLogger)); - } - - @VisibleForTesting - void rotateLatencyCollection() { - opStats.forEach((name, metric) -> { - metric.rotateLatencyCollection(); - }); - } - - private static final Logger log = LoggerFactory.getLogger(PrometheusMetricsProvider.class); - - /* - * Try to get Netty counter of used direct memory. This will be correct, unlike the JVM values. - */ - private static final AtomicLong directMemoryUsage; - static { - AtomicLong tmpDirectMemoryUsage = null; - - try { - Field field = PlatformDependent.class.getDeclaredField("DIRECT_MEMORY_COUNTER"); - field.setAccessible(true); - tmpDirectMemoryUsage = (AtomicLong) field.get(null); - } catch (Throwable t) { - log.warn("Failed to access netty DIRECT_MEMORY_COUNTER field {}", t.getMessage()); - } - - directMemoryUsage = tmpDirectMemoryUsage; - } -} \ No newline at end of file diff --git a/bookkeeper-stats-providers/prometheus-metrics-provider/src/main/java/org/apache/bookkeeper/stats/prometheus/PrometheusStatsLogger.java b/bookkeeper-stats-providers/prometheus-metrics-provider/src/main/java/org/apache/bookkeeper/stats/prometheus/PrometheusStatsLogger.java deleted file mode 100644 index 472a3fbb98d..00000000000 --- a/bookkeeper-stats-providers/prometheus-metrics-provider/src/main/java/org/apache/bookkeeper/stats/prometheus/PrometheusStatsLogger.java +++ /dev/null @@ -1,75 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.bookkeeper.stats.prometheus; - -import com.google.common.base.Joiner; - -import io.prometheus.client.Collector; - -import org.apache.bookkeeper.stats.Counter; -import org.apache.bookkeeper.stats.Gauge; -import org.apache.bookkeeper.stats.OpStatsLogger; -import org.apache.bookkeeper.stats.StatsLogger; - -/** - * A {@code Prometheus} based {@link StatsLogger} implementation. - */ -public class PrometheusStatsLogger implements StatsLogger { - - private final PrometheusMetricsProvider provider; - private final String scope; - - PrometheusStatsLogger(PrometheusMetricsProvider provider, String scope) { - this.provider = provider; - this.scope = scope; - } - - @Override - public OpStatsLogger getOpStatsLogger(String name) { - return provider.opStats.computeIfAbsent(completeName(name), x -> new DataSketchesOpStatsLogger()); - } - - @Override - public Counter getCounter(String name) { - return provider.counters.computeIfAbsent(completeName(name), x -> new LongAdderCounter()); - } - - @Override - public void registerGauge(String name, Gauge gauge) { - provider.gauges.computeIfAbsent(completeName(name), x -> new SimpleGauge(gauge)); - } - - @Override - public void unregisterGauge(String name, Gauge gauge) { - // no-op - } - - @Override - public void removeScope(String name, StatsLogger statsLogger) { - // no-op - } - - @Override - public StatsLogger scope(String name) { - return new PrometheusStatsLogger(provider, completeName(name)); - } - - private String completeName(String name) { - String completeName = scope.isEmpty() ? name : Joiner.on('_').join(scope, name); - return Collector.sanitizeMetricName(completeName); - } -} diff --git a/bookkeeper-stats-providers/prometheus-metrics-provider/src/main/java/org/apache/bookkeeper/stats/prometheus/PrometheusTextFormatUtil.java b/bookkeeper-stats-providers/prometheus-metrics-provider/src/main/java/org/apache/bookkeeper/stats/prometheus/PrometheusTextFormatUtil.java deleted file mode 100644 index d2fae28004f..00000000000 --- a/bookkeeper-stats-providers/prometheus-metrics-provider/src/main/java/org/apache/bookkeeper/stats/prometheus/PrometheusTextFormatUtil.java +++ /dev/null @@ -1,150 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.bookkeeper.stats.prometheus; - -import io.prometheus.client.Collector; -import io.prometheus.client.Collector.MetricFamilySamples; -import io.prometheus.client.Collector.MetricFamilySamples.Sample; -import io.prometheus.client.CollectorRegistry; - -import java.io.IOException; -import java.io.Writer; -import java.util.Enumeration; - -import org.apache.bookkeeper.stats.Counter; - -/** - * Logic to write metrics in Prometheus text format. - */ -public class PrometheusTextFormatUtil { - static void writeGauge(Writer w, String name, SimpleGauge gauge) { - // Example: - // # TYPE bookie_storage_entries_count gauge - // bookie_storage_entries_count 519 - try { - w.append("# TYPE ").append(name).append(" gauge\n"); - w.append(name).append(' ').append(gauge.getSample().toString()).append('\n'); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - static void writeCounter(Writer w, String name, Counter counter) { - // Example: - // # TYPE jvm_threads_started_total counter - // jvm_threads_started_total 59 - try { - w.append("# TYPE ").append(name).append(" counter\n"); - w.append(name).append(' ').append(counter.get().toString()).append('\n'); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - static void writeOpStat(Writer w, String name, DataSketchesOpStatsLogger opStat) { - // Example: - // # TYPE bookie_journal_JOURNAL_ADD_ENTRY summary - // bookie_journal_JOURNAL_ADD_ENTRY{success="false",quantile="0.5",} NaN - // bookie_journal_JOURNAL_ADD_ENTRY{success="false",quantile="0.75",} NaN - // bookie_journal_JOURNAL_ADD_ENTRY{success="false",quantile="0.95",} NaN - // bookie_journal_JOURNAL_ADD_ENTRY{success="false",quantile="0.99",} NaN - // bookie_journal_JOURNAL_ADD_ENTRY{success="false",quantile="0.999",} NaN - // bookie_journal_JOURNAL_ADD_ENTRY{success="false",quantile="0.9999",} NaN - // bookie_journal_JOURNAL_ADD_ENTRY{success="false",quantile="1.0",} NaN - // bookie_journal_JOURNAL_ADD_ENTRY_count{success="false",} 0.0 - // bookie_journal_JOURNAL_ADD_ENTRY_sum{success="false",} 0.0 - // bookie_journal_JOURNAL_ADD_ENTRY{success="true",quantile="0.5",} 1.706 - // bookie_journal_JOURNAL_ADD_ENTRY{success="true",quantile="0.75",} 1.89 - // bookie_journal_JOURNAL_ADD_ENTRY{success="true",quantile="0.95",} 2.121 - // bookie_journal_JOURNAL_ADD_ENTRY{success="true",quantile="0.99",} 10.708 - // bookie_journal_JOURNAL_ADD_ENTRY{success="true",quantile="0.999",} 10.902 - // bookie_journal_JOURNAL_ADD_ENTRY{success="true",quantile="0.9999",} 10.902 - // bookie_journal_JOURNAL_ADD_ENTRY{success="true",quantile="1.0",} 10.902 - // bookie_journal_JOURNAL_ADD_ENTRY_count{success="true",} 658.0 - // bookie_journal_JOURNAL_ADD_ENTRY_sum{success="true",} 1265.0800000000002 - try { - w.append("# TYPE ").append(name).append(" summary\n"); - writeQuantile(w, opStat, name, false, 0.5); - writeQuantile(w, opStat, name, false, 0.75); - writeQuantile(w, opStat, name, false, 0.95); - writeQuantile(w, opStat, name, false, 0.99); - writeQuantile(w, opStat, name, false, 0.999); - writeQuantile(w, opStat, name, false, 0.9999); - writeQuantile(w, opStat, name, false, 1.0); - writeCount(w, opStat, name, false); - writeSum(w, opStat, name, false); - - writeQuantile(w, opStat, name, true, 0.5); - writeQuantile(w, opStat, name, true, 0.75); - writeQuantile(w, opStat, name, true, 0.95); - writeQuantile(w, opStat, name, true, 0.99); - writeQuantile(w, opStat, name, true, 0.999); - writeQuantile(w, opStat, name, true, 0.9999); - writeQuantile(w, opStat, name, true, 1.0); - writeCount(w, opStat, name, true); - writeSum(w, opStat, name, true); - - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - private static void writeQuantile(Writer w, DataSketchesOpStatsLogger opStat, String name, Boolean success, - double quantile) throws IOException { - w.append(name).append("{success=\"").append(success.toString()).append("\",quantile=\"") - .append(Double.toString(quantile)).append("\"} ") - .append(Double.toString(opStat.getQuantileValue(success, quantile))).append('\n'); - } - - private static void writeCount(Writer w, DataSketchesOpStatsLogger opStat, String name, Boolean success) - throws IOException { - w.append(name).append("_count{success=\"").append(success.toString()).append("\"} ") - .append(Long.toString(opStat.getCount(success))).append('\n'); - } - - private static void writeSum(Writer w, DataSketchesOpStatsLogger opStat, String name, Boolean success) - throws IOException { - w.append(name).append("_sum{success=\"").append(success.toString()).append("\"} ") - .append(Double.toString(opStat.getSum(success))).append('\n'); - } - - static void writeMetricsCollectedByPrometheusClient(Writer w, CollectorRegistry registry) throws IOException { - Enumeration metricFamilySamples = registry.metricFamilySamples(); - while (metricFamilySamples.hasMoreElements()) { - MetricFamilySamples metricFamily = metricFamilySamples.nextElement(); - - for (int i = 0; i < metricFamily.samples.size(); i++) { - Sample sample = metricFamily.samples.get(i); - w.write(sample.name); - w.write('{'); - for (int j = 0; j < sample.labelNames.size(); j++) { - if (j != 0) { - w.write(", "); - } - w.write(sample.labelNames.get(j)); - w.write("=\""); - w.write(sample.labelValues.get(j)); - w.write('"'); - } - - w.write("} "); - w.write(Collector.doubleToGoString(sample.value)); - w.write('\n'); - } - } - } -} diff --git a/bookkeeper-stats-providers/prometheus-metrics-provider/src/test/java/org/apache/bookkeeper/stats/prometheus/TestPrometheusFormatter.java b/bookkeeper-stats-providers/prometheus-metrics-provider/src/test/java/org/apache/bookkeeper/stats/prometheus/TestPrometheusFormatter.java deleted file mode 100644 index f7b6ebe090b..00000000000 --- a/bookkeeper-stats-providers/prometheus-metrics-provider/src/test/java/org/apache/bookkeeper/stats/prometheus/TestPrometheusFormatter.java +++ /dev/null @@ -1,177 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.bookkeeper.stats.prometheus; - -import static com.google.common.base.Preconditions.checkArgument; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import com.google.common.base.MoreObjects; -import com.google.common.base.Splitter; -import com.google.common.collect.ArrayListMultimap; -import com.google.common.collect.Multimap; - -import java.io.StringWriter; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; -import java.util.concurrent.TimeUnit; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.apache.bookkeeper.stats.Counter; -import org.apache.bookkeeper.stats.OpStatsLogger; -import org.apache.bookkeeper.stats.StatsLogger; -import org.junit.Test; - -/** - * Test for {@link PrometheusMetricsProvider}. - */ -public class TestPrometheusFormatter { - - @Test - public void testStatsOutput() throws Exception { - PrometheusMetricsProvider provider = new PrometheusMetricsProvider(); - StatsLogger statsLogger = provider.getStatsLogger("test"); - Counter counter = statsLogger.getCounter("my_counter"); - - counter.inc(); - counter.inc(); - - OpStatsLogger opStats = statsLogger.getOpStatsLogger("op"); - opStats.registerSuccessfulEvent(10, TimeUnit.MILLISECONDS); - opStats.registerSuccessfulEvent(5, TimeUnit.MILLISECONDS); - - provider.rotateLatencyCollection(); - - StringWriter writer = new StringWriter(); - provider.writeAllMetrics(writer); - System.out.println(writer); - Multimap metrics = parseMetrics(writer.toString()); - System.out.println(metrics); - - List cm = (List) metrics.get("test_my_counter"); - assertEquals(1, cm.size()); - assertEquals(0, cm.get(0).tags.size()); - assertEquals(2.0, cm.get(0).value, 0.0); - - // test_op_sum - cm = (List) metrics.get("test_op_sum"); - assertEquals(2, cm.size()); - Metric m = cm.get(0); - assertEquals(1, cm.get(0).tags.size()); - assertEquals(0.0, m.value, 0.0); - assertEquals(1, m.tags.size()); - assertEquals("false", m.tags.get("success")); - - m = cm.get(1); - assertEquals(1, cm.get(0).tags.size()); - assertEquals(15.0, m.value, 0.0); - assertEquals(1, m.tags.size()); - assertEquals("true", m.tags.get("success")); - - // test_op_count - cm = (List) metrics.get("test_op_count"); - assertEquals(2, cm.size()); - m = cm.get(0); - assertEquals(1, cm.get(0).tags.size()); - assertEquals(0.0, m.value, 0.0); - assertEquals(1, m.tags.size()); - assertEquals("false", m.tags.get("success")); - - m = cm.get(1); - assertEquals(1, cm.get(0).tags.size()); - assertEquals(2.0, m.value, 0.0); - assertEquals(1, m.tags.size()); - assertEquals("true", m.tags.get("success")); - - // Latency - cm = (List) metrics.get("test_op"); - assertEquals(14, cm.size()); - - boolean found = false; - for (Metric mt : cm) { - if ("true".equals(mt.tags.get("success")) && "1.0".equals(mt.tags.get("quantile"))) { - assertEquals(10.0, mt.value, 0.0); - found = true; - } - } - - assertTrue(found); - } - - /** - * Hacky parsing of Prometheus text format. Sould be good enough for unit tests - */ - private static Multimap parseMetrics(String metrics) { - Multimap parsed = ArrayListMultimap.create(); - - // Example of lines are - // jvm_threads_current{cluster="standalone",} 203.0 - // or - // pulsar_subscriptions_count{cluster="standalone", namespace="sample/standalone/ns1", - // topic="persistent://sample/standalone/ns1/test-2"} 0.0 1517945780897 - Pattern pattern = Pattern.compile("^(\\w+)(\\{([^\\}]+)\\})?\\s(-?[\\d\\w\\.]+)(\\s(\\d+))?$"); - Pattern tagsPattern = Pattern.compile("(\\w+)=\"([^\"]+)\"(,\\s?)?"); - - Splitter.on("\n").split(metrics).forEach(line -> { - if (line.isEmpty() || line.startsWith("#")) { - return; - } - - System.err.println("LINE: '" + line + "'"); - Matcher matcher = pattern.matcher(line); - System.err.println("Matches: " + matcher.matches()); - System.err.println(matcher); - - System.err.println("groups: " + matcher.groupCount()); - for (int i = 0; i < matcher.groupCount(); i++) { - System.err.println(" GROUP " + i + " -- " + matcher.group(i)); - } - - checkArgument(matcher.matches()); - String name = matcher.group(1); - - Metric m = new Metric(); - m.value = Double.valueOf(matcher.group(4)); - - String tags = matcher.group(3); - if (tags != null) { - Matcher tagsMatcher = tagsPattern.matcher(tags); - while (tagsMatcher.find()) { - String tag = tagsMatcher.group(1); - String value = tagsMatcher.group(2); - m.tags.put(tag, value); - } - } - - parsed.put(name, m); - }); - - return parsed; - } - - static class Metric { - Map tags = new TreeMap<>(); - double value; - - @Override - public String toString() { - return MoreObjects.toStringHelper(this).add("tags", tags).add("value", value).toString(); - } - } -} diff --git a/bookkeeper-stats-providers/prometheus-metrics-provider/src/test/java/org/apache/bookkeeper/stats/prometheus/TestPrometheusMetricsProvider.java b/bookkeeper-stats-providers/prometheus-metrics-provider/src/test/java/org/apache/bookkeeper/stats/prometheus/TestPrometheusMetricsProvider.java deleted file mode 100644 index 8e90e04024e..00000000000 --- a/bookkeeper-stats-providers/prometheus-metrics-provider/src/test/java/org/apache/bookkeeper/stats/prometheus/TestPrometheusMetricsProvider.java +++ /dev/null @@ -1,118 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.bookkeeper.stats.prometheus; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertSame; - -import lombok.Cleanup; -import org.apache.bookkeeper.stats.Counter; -import org.apache.bookkeeper.stats.OpStatsLogger; -import org.apache.bookkeeper.stats.StatsLogger; -import org.apache.commons.configuration.PropertiesConfiguration; -import org.junit.Test; - -/** - * Unit test of {@link PrometheusMetricsProvider}. - */ -public class TestPrometheusMetricsProvider { - - @Test - public void testCache() { - PrometheusMetricsProvider provider = new PrometheusMetricsProvider(); - - StatsLogger statsLogger = provider.getStatsLogger("test"); - - OpStatsLogger opStatsLogger1 = statsLogger.getOpStatsLogger("optest"); - OpStatsLogger opStatsLogger2 = statsLogger.getOpStatsLogger("optest"); - assertSame(opStatsLogger1, opStatsLogger2); - - Counter counter1 = statsLogger.getCounter("countertest"); - Counter counter2 = statsLogger.getCounter("countertest"); - assertSame(counter1, counter2); - - StatsLogger scope1 = statsLogger.scope("scopetest"); - StatsLogger scope2 = statsLogger.scope("scopetest"); - assertSame(scope1, scope2); - } - - @Test - public void testStartNoHttp() { - PropertiesConfiguration config = new PropertiesConfiguration(); - config.setProperty(PrometheusMetricsProvider.PROMETHEUS_STATS_HTTP_ENABLE, false); - PrometheusMetricsProvider provider = new PrometheusMetricsProvider(); - try { - provider.start(config); - assertNull(provider.server); - } finally { - provider.stop(); - } - } - - @Test - public void testStartNoHttpWhenBkHttpEnabled() { - PropertiesConfiguration config = new PropertiesConfiguration(); - config.setProperty(PrometheusMetricsProvider.PROMETHEUS_STATS_HTTP_ENABLE, true); - config.setProperty("httpServerEnabled", true); - @Cleanup("stop") PrometheusMetricsProvider provider = new PrometheusMetricsProvider(); - provider.start(config); - assertNull(provider.server); - } - - @Test - public void testStartWithHttp() { - PropertiesConfiguration config = new PropertiesConfiguration(); - config.setProperty(PrometheusMetricsProvider.PROMETHEUS_STATS_HTTP_ENABLE, true); - config.setProperty(PrometheusMetricsProvider.PROMETHEUS_STATS_HTTP_PORT, 0); // ephemeral - PrometheusMetricsProvider provider = new PrometheusMetricsProvider(); - try { - provider.start(config); - assertNotNull(provider.server); - } finally { - provider.stop(); - } - } - - @Test - public void testCounter() { - LongAdderCounter counter = new LongAdderCounter(); - long value = counter.get(); - assertEquals(0L, value); - counter.inc(); - assertEquals(1L, counter.get().longValue()); - counter.dec(); - assertEquals(0L, counter.get().longValue()); - counter.add(3); - assertEquals(3L, counter.get().longValue()); - } - - @Test - public void testTwoCounters() throws Exception { - PrometheusMetricsProvider provider = new PrometheusMetricsProvider(); - StatsLogger statsLogger = provider.getStatsLogger("test"); - - Counter counter1 = statsLogger.getCounter("counter"); - Counter counter2 = statsLogger.getCounter("counter"); - assertEquals(counter1, counter2); - assertSame(counter1, counter2); - - assertEquals(1, provider.counters.size()); - } - -} diff --git a/bookkeeper-stats-providers/twitter-finagle-provider/pom.xml b/bookkeeper-stats-providers/twitter-finagle-provider/pom.xml deleted file mode 100644 index 90dcb818faa..00000000000 --- a/bookkeeper-stats-providers/twitter-finagle-provider/pom.xml +++ /dev/null @@ -1,41 +0,0 @@ - - - - 4.0.0 - - bookkeeper - org.apache.bookkeeper - 4.9.0-SNAPSHOT - ../.. - - org.apache.bookkeeper.stats - twitter-finagle-provider - Apache BookKeeper :: Stats Providers :: Twitter Finagle Stats - http://maven.apache.org - - - org.apache.bookkeeper.stats - bookkeeper-stats-api - ${project.parent.version} - - - com.twitter - finagle-core_2.11 - - - diff --git a/bookkeeper-stats-providers/twitter-finagle-provider/src/main/java/org/apache/bookkeeper/stats/twitter/finagle/CounterImpl.java b/bookkeeper-stats-providers/twitter-finagle-provider/src/main/java/org/apache/bookkeeper/stats/twitter/finagle/CounterImpl.java deleted file mode 100644 index 192474abbb9..00000000000 --- a/bookkeeper-stats-providers/twitter-finagle-provider/src/main/java/org/apache/bookkeeper/stats/twitter/finagle/CounterImpl.java +++ /dev/null @@ -1,60 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bookkeeper.stats.twitter.finagle; - -import com.twitter.finagle.stats.StatsReceiver; -import org.apache.bookkeeper.stats.Counter; - -/** - * Note: this counter doesn't support resetting values or getting the current value. - * It also has a limitation in size: max bound is signed integer, not long. - */ -class CounterImpl implements Counter { - private final com.twitter.finagle.stats.Counter counter; - - public CounterImpl(final String name, - final StatsReceiver stats) { - this.counter = stats.counter0(name); - } - - @Override - public void clear() { /* not supported */ } - - @Override - public void inc() { - this.counter.incr(); - } - - @Override - public void dec() { - this.counter.incr(-1); - } - - @Override - public void add(final long delta) { - if (delta < Integer.MIN_VALUE || delta > Integer.MAX_VALUE) { - throw new IllegalArgumentException("This counter doesn't support long values"); - } - this.counter.incr((int) delta); - } - - @Override - public Long get() { - return null; // not supported - } -} diff --git a/bookkeeper-stats-providers/twitter-finagle-provider/src/main/java/org/apache/bookkeeper/stats/twitter/finagle/FinagleStatsLoggerImpl.java b/bookkeeper-stats-providers/twitter-finagle-provider/src/main/java/org/apache/bookkeeper/stats/twitter/finagle/FinagleStatsLoggerImpl.java deleted file mode 100644 index 0aa7d12e370..00000000000 --- a/bookkeeper-stats-providers/twitter-finagle-provider/src/main/java/org/apache/bookkeeper/stats/twitter/finagle/FinagleStatsLoggerImpl.java +++ /dev/null @@ -1,88 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bookkeeper.stats.twitter.finagle; - -import com.twitter.finagle.stats.StatsReceiver; -import com.twitter.util.Function0; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; -import org.apache.bookkeeper.stats.Counter; -import org.apache.bookkeeper.stats.Gauge; -import org.apache.bookkeeper.stats.OpStatsLogger; -import org.apache.bookkeeper.stats.StatsLogger; -import scala.collection.Seq; - -/** - * A Finagle Stats library based {@link StatsLogger} implementation. - */ -public class FinagleStatsLoggerImpl implements StatsLogger { - private final StatsReceiver stats; - // keep the references for finagle gauges. they are destroyed when the stats logger is destroyed. - final Map finagleGauges; - - public FinagleStatsLoggerImpl(final StatsReceiver stats) { - this.stats = stats; - this.finagleGauges = new HashMap(); - } - - @Override - public OpStatsLogger getOpStatsLogger(final String name) { - return new OpStatsLoggerImpl(name, this.stats); - } - - @Override - public Counter getCounter(final String name) { - return new CounterImpl(name, this.stats); - } - - @Override - public void registerGauge(final String name, final Gauge gauge) { - // This is done to inter-op with Scala Seq - final Seq gaugeName = scala.collection.JavaConversions.asScalaBuffer(Arrays.asList(name)).toList(); - synchronized (finagleGauges) { - finagleGauges.put(gauge, this.stats.addGauge(gaugeName, gaugeProvider(gauge))); - } - } - - @Override - public void unregisterGauge(String name, Gauge gauge) { - synchronized (finagleGauges) { - finagleGauges.remove(gauge); - } - } - - private Function0 gaugeProvider(final Gauge gauge) { - return new Function0() { - @Override - public Object apply() { - return gauge.getSample().floatValue(); - } - }; - } - - @Override - public StatsLogger scope(String name) { - return new FinagleStatsLoggerImpl(this.stats.scope(name)); - } - - @Override - public void removeScope(String name, StatsLogger statsLogger) { - // no-op - } -} diff --git a/bookkeeper-stats-providers/twitter-finagle-provider/src/main/java/org/apache/bookkeeper/stats/twitter/finagle/FinagleStatsProvider.java b/bookkeeper-stats-providers/twitter-finagle-provider/src/main/java/org/apache/bookkeeper/stats/twitter/finagle/FinagleStatsProvider.java deleted file mode 100644 index aff129d9846..00000000000 --- a/bookkeeper-stats-providers/twitter-finagle-provider/src/main/java/org/apache/bookkeeper/stats/twitter/finagle/FinagleStatsProvider.java +++ /dev/null @@ -1,67 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bookkeeper.stats.twitter.finagle; - -import com.twitter.finagle.stats.StatsReceiver; -import org.apache.bookkeeper.stats.CachingStatsProvider; -import org.apache.bookkeeper.stats.StatsLogger; -import org.apache.bookkeeper.stats.StatsProvider; -import org.apache.commons.configuration.Configuration; - -/** - * Main entry point to use Finagle stats for Bookkeeper. - * - *

          There's no requirement to start or stop it.

          - */ -public class FinagleStatsProvider implements StatsProvider { - private final StatsReceiver stats; - private final CachingStatsProvider cachingStatsProvider; - - public FinagleStatsProvider(final StatsReceiver stats) { - this.stats = stats; - this.cachingStatsProvider = new CachingStatsProvider( - new StatsProvider() { - @Override - public void start(Configuration conf) { - // nop - } - - @Override - public void stop() { - // nop - } - - @Override - public StatsLogger getStatsLogger(String scope) { - return new FinagleStatsLoggerImpl(stats.scope(scope)); - } - } - ); - } - - @Override - public void start(Configuration conf) { /* no-op */ } - - @Override - public void stop() { /* no-op */ } - - @Override - public StatsLogger getStatsLogger(final String scope) { - return this.cachingStatsProvider.getStatsLogger(scope); - } -} diff --git a/bookkeeper-stats-providers/twitter-finagle-provider/src/main/java/org/apache/bookkeeper/stats/twitter/finagle/OpStatsLoggerImpl.java b/bookkeeper-stats-providers/twitter-finagle-provider/src/main/java/org/apache/bookkeeper/stats/twitter/finagle/OpStatsLoggerImpl.java deleted file mode 100644 index 1fd61feb635..00000000000 --- a/bookkeeper-stats-providers/twitter-finagle-provider/src/main/java/org/apache/bookkeeper/stats/twitter/finagle/OpStatsLoggerImpl.java +++ /dev/null @@ -1,76 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bookkeeper.stats.twitter.finagle; - -import com.twitter.finagle.stats.Stat; -import com.twitter.finagle.stats.StatsReceiver; -import java.util.concurrent.TimeUnit; -import org.apache.bookkeeper.stats.OpStatsData; -import org.apache.bookkeeper.stats.OpStatsLogger; - -/** - * A percentile stat that will delegate to Finagle stats' implementation library - * to compute the percentiles. - * - *

          Note: metrics will be exposed in form $name/success.p99 for successful events, - * and $name/failure.p99 for failed ones.

          - */ -public class OpStatsLoggerImpl implements OpStatsLogger { - private static final OpStatsData NULL_OP_STATS = new OpStatsData(0, 0, 0, new long[6]); - private final Stat success; - private final Stat failure; - - public OpStatsLoggerImpl(final String name, final StatsReceiver stats) { - this.success = stats.scope(String.format("%s/success", name)).stat0(name); - this.failure = stats.scope(String.format("%s/failure", name)).stat0(name); - } - - @Override - public void registerFailedEvent(long eventLatency, TimeUnit unit) { - this.success.add(unit.toMillis(eventLatency)); - } - - @Override - public void registerSuccessfulEvent(long eventLatency, TimeUnit unit) { - this.failure.add(unit.toMillis(eventLatency)); - } - - @Override - public void registerSuccessfulValue(final long value) { - this.success.add(value); - } - - @Override - public void registerFailedValue(final long value) { - this.failure.add(value); - } - - /** - * We don't need to support percentiles as a part of this provider, - * since they're part of the Stats implementation library. - * - * @return dummy null-stats object - */ - @Override - public OpStatsData toOpStatsData() { - return NULL_OP_STATS; - } - - @Override - public void clear() { /* not supported */ } -} diff --git a/bookkeeper-stats-providers/twitter-finagle-provider/src/main/java/org/apache/bookkeeper/stats/twitter/finagle/package-info.java b/bookkeeper-stats-providers/twitter-finagle-provider/src/main/java/org/apache/bookkeeper/stats/twitter/finagle/package-info.java deleted file mode 100644 index a839cd32062..00000000000 --- a/bookkeeper-stats-providers/twitter-finagle-provider/src/main/java/org/apache/bookkeeper/stats/twitter/finagle/package-info.java +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - * A stats provider implemented based on finagle stats library. - */ -package org.apache.bookkeeper.stats.twitter.finagle; diff --git a/bookkeeper-stats-providers/twitter-ostrich-provider/pom.xml b/bookkeeper-stats-providers/twitter-ostrich-provider/pom.xml deleted file mode 100644 index f1662e28223..00000000000 --- a/bookkeeper-stats-providers/twitter-ostrich-provider/pom.xml +++ /dev/null @@ -1,41 +0,0 @@ - - - - 4.0.0 - - bookkeeper - org.apache.bookkeeper - 4.9.0-SNAPSHOT - ../.. - - org.apache.bookkeeper.stats - twitter-ostrich-provider - Apache BookKeeper :: Stats Providers :: Twitter Ostrich Stats - http://maven.apache.org - - - org.apache.bookkeeper.stats - bookkeeper-stats-api - ${project.parent.version} - - - com.twitter - ostrich_2.9.2 - - - diff --git a/bookkeeper-stats-providers/twitter-ostrich-provider/src/main/java/org/apache/bookkeeper/stats/twitter/ostrich/CounterImpl.java b/bookkeeper-stats-providers/twitter-ostrich-provider/src/main/java/org/apache/bookkeeper/stats/twitter/ostrich/CounterImpl.java deleted file mode 100644 index bd5ca697a2c..00000000000 --- a/bookkeeper-stats-providers/twitter-ostrich-provider/src/main/java/org/apache/bookkeeper/stats/twitter/ostrich/CounterImpl.java +++ /dev/null @@ -1,53 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.bookkeeper.stats.twitter.ostrich; - -import org.apache.bookkeeper.stats.Counter; - -class CounterImpl implements Counter { - - private final com.twitter.ostrich.stats.Counter ostrichCounter; - - CounterImpl(com.twitter.ostrich.stats.Counter ostrichCounter) { - this.ostrichCounter = ostrichCounter; - } - - @Override - public void clear() { - this.ostrichCounter.reset(); - } - - @Override - public void inc() { - this.ostrichCounter.incr(); - } - - @Override - public void dec() { - this.ostrichCounter.incr(-1); - } - - @Override - public void add(long delta) { - this.ostrichCounter.incr((int) delta); - } - - @Override - public Long get() { - return this.ostrichCounter.apply(); - } -} diff --git a/bookkeeper-stats-providers/twitter-ostrich-provider/src/main/java/org/apache/bookkeeper/stats/twitter/ostrich/OpStatsLoggerImpl.java b/bookkeeper-stats-providers/twitter-ostrich-provider/src/main/java/org/apache/bookkeeper/stats/twitter/ostrich/OpStatsLoggerImpl.java deleted file mode 100644 index b67976f5606..00000000000 --- a/bookkeeper-stats-providers/twitter-ostrich-provider/src/main/java/org/apache/bookkeeper/stats/twitter/ostrich/OpStatsLoggerImpl.java +++ /dev/null @@ -1,114 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.bookkeeper.stats.twitter.ostrich; - -import java.util.concurrent.TimeUnit; -import org.apache.bookkeeper.stats.OpStatsData; -import org.apache.bookkeeper.stats.OpStatsLogger; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -class OpStatsLoggerImpl implements OpStatsLogger { - - static final Logger LOG = LoggerFactory.getLogger(OpStatsLoggerImpl.class); - - static final double[] PERCENTILES = new double[] { - 0.1, 0.5, 0.9, 0.99, 0.999, 0.9999 - }; - - private final String scope; - private final com.twitter.ostrich.stats.Counter successCounter; - private final com.twitter.ostrich.stats.Counter failureCounter; - private final com.twitter.ostrich.stats.Metric successMetric; - private final com.twitter.ostrich.stats.Metric failureMetric; - - OpStatsLoggerImpl(String scope, com.twitter.ostrich.stats.StatsProvider statsProvider) { - this.scope = scope; - successCounter = statsProvider.getCounter(statName("requests/success")); - failureCounter = statsProvider.getCounter(statName("requests/failure")); - successMetric = statsProvider.getMetric(statName("latency/success")); - failureMetric = statsProvider.getMetric(statName("latency/failure")); - } - - private String statName(String statName) { - return String.format("%s/%s", scope, statName); - } - - @Override - public void registerFailedEvent(long eventLatency, TimeUnit unit) { - if (eventLatency < 0) { - LOG.debug("{} : tried to register negative failure", scope); - } else { - failureMetric.add((int) unit.toMillis(eventLatency)); - failureCounter.incr(); - } - } - - @Override - public void registerSuccessfulEvent(long eventLatency, TimeUnit unit) { - if (eventLatency < 0) { - LOG.debug("{} : tried to register negative success", scope); - } else { - successMetric.add((int) unit.toMillis(eventLatency)); - successCounter.incr(); - } - } - - @Override - public void registerSuccessfulValue(long value) { - if (value < 0) { - LOG.debug("{} : tried to register negative success", scope); - } else { - successMetric.add((int) value); - successCounter.incr(); - } - } - - @Override - public void registerFailedValue(long value) { - if (value < 0) { - LOG.debug("{} : tried to register negative success", scope); - } else { - failureMetric.add((int) value); - failureCounter.incr(); - } - } - - @Override - public OpStatsData toOpStatsData() { - long numSuccess = successCounter.apply(); - long numFailures = failureCounter.apply(); - com.twitter.ostrich.stats.Distribution distribution = successMetric.apply(); - com.twitter.ostrich.stats.Histogram histogram = distribution.histogram(); - double avgLatency = distribution.average(); - long[] percentiles = new long[PERCENTILES.length]; - int i = 0; - for (double percentile : PERCENTILES) { - percentiles[i] = histogram.getPercentile(percentile); - ++i; - } - return new OpStatsData(numSuccess, numFailures, avgLatency, percentiles); - } - - @Override - public void clear() { - successCounter.reset(); - failureCounter.reset(); - successMetric.clear(); - failureMetric.clear(); - } -} diff --git a/bookkeeper-stats-providers/twitter-ostrich-provider/src/main/java/org/apache/bookkeeper/stats/twitter/ostrich/OstrichProvider.java b/bookkeeper-stats-providers/twitter-ostrich-provider/src/main/java/org/apache/bookkeeper/stats/twitter/ostrich/OstrichProvider.java deleted file mode 100644 index 1cc7bf6958f..00000000000 --- a/bookkeeper-stats-providers/twitter-ostrich-provider/src/main/java/org/apache/bookkeeper/stats/twitter/ostrich/OstrichProvider.java +++ /dev/null @@ -1,121 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.bookkeeper.stats.twitter.ostrich; - -import com.twitter.ostrich.admin.CustomHttpHandler; -import com.twitter.ostrich.admin.RuntimeEnvironment; -import com.twitter.ostrich.admin.ServiceTracker; -import com.twitter.ostrich.admin.StatsFactory; -import com.twitter.util.Duration; -import java.util.concurrent.TimeUnit; -import org.apache.bookkeeper.stats.CachingStatsProvider; -import org.apache.bookkeeper.stats.StatsLogger; -import org.apache.bookkeeper.stats.StatsProvider; -import org.apache.commons.configuration.Configuration; -import scala.Some; -import scala.collection.immutable.$colon$colon; -import scala.collection.immutable.List; -import scala.collection.immutable.List$; -import scala.collection.immutable.Map; -import scala.collection.immutable.Map$; -import scala.util.matching.Regex; - -/** - * A Twitter Ostrich library based {@link StatsProvider} implementation. - */ -public class OstrichProvider implements StatsProvider { - - protected static final String STATS_EXPORT = "statsExport"; - protected static final String STATS_HTTP_PORT = "statsHttpPort"; - protected static final String SHOULD_SHUTDOWN_SERVICE_TRACKER = "shouldShutdownServiceTracker"; - - private com.twitter.ostrich.admin.AdminHttpService statsExporter = null; - private final CachingStatsProvider cachingStatsProvider; - private boolean shutdownServiceTracker = false; - - private static List list(Duration ... ts) { - List result = List$.MODULE$.empty(); - for (int i = ts.length; i > 0; i--) { - result = new $colon$colon<>(ts[i - 1], result); - } - return result; - } - - private static List emptyList() { - return List$.MODULE$.empty(); - } - - private static Map emptyMap() { - Map result = Map$.MODULE$.empty(); - return result; - } - - public OstrichProvider() { - this(""); - } - - public OstrichProvider(final String collectionName) { - this.cachingStatsProvider = new CachingStatsProvider(new StatsProvider() { - @Override - public void start(Configuration conf) { - // nop - } - - @Override - public void stop() { - // nop - } - - @Override - public StatsLogger getStatsLogger(String scope) { - return new OstrichStatsLoggerImpl(scope, - com.twitter.ostrich.stats.Stats.get(collectionName)); - } - }); - } - - @Override - public void start(Configuration conf) { - if (conf.getBoolean(STATS_EXPORT, false)) { - statsExporter = new com.twitter.ostrich.admin.AdminServiceFactory( - conf.getInt(STATS_HTTP_PORT, 9002), 20, OstrichProvider.emptyList(), - Some.apply(""), OstrichProvider.emptyList(), - OstrichProvider.emptyMap(), list(Duration.apply(1, TimeUnit.MINUTES)) - ).apply(RuntimeEnvironment.apply(this, new String[0])); - this.shutdownServiceTracker = conf.getBoolean(SHOULD_SHUTDOWN_SERVICE_TRACKER, false); - } - } - - @Override - public void stop() { - if (null != statsExporter) { - statsExporter.shutdown(); - if (shutdownServiceTracker) { - // ostrich admin service registered some threads in service tracker - // shutdown doesn't stopped those threads. we need to stop service tracker - // to shutdown them. but that potentially has side effects. so adding a flag - // to let caller decide. - ServiceTracker.shutdown(); - } - } - } - - @Override - public StatsLogger getStatsLogger(String scope) { - return cachingStatsProvider.getStatsLogger(scope); - } -} diff --git a/bookkeeper-stats-providers/twitter-ostrich-provider/src/main/java/org/apache/bookkeeper/stats/twitter/ostrich/OstrichStatsLoggerImpl.java b/bookkeeper-stats-providers/twitter-ostrich-provider/src/main/java/org/apache/bookkeeper/stats/twitter/ostrich/OstrichStatsLoggerImpl.java deleted file mode 100644 index 52027fce65d..00000000000 --- a/bookkeeper-stats-providers/twitter-ostrich-provider/src/main/java/org/apache/bookkeeper/stats/twitter/ostrich/OstrichStatsLoggerImpl.java +++ /dev/null @@ -1,78 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.bookkeeper.stats.twitter.ostrich; - -import org.apache.bookkeeper.stats.Counter; -import org.apache.bookkeeper.stats.Gauge; -import org.apache.bookkeeper.stats.OpStatsLogger; -import org.apache.bookkeeper.stats.StatsLogger; -import scala.Function0; -import scala.runtime.AbstractFunction0; - -/** - * Implementation of ostrich logger. - */ -class OstrichStatsLoggerImpl implements StatsLogger { - - protected final String scope; - protected final com.twitter.ostrich.stats.StatsProvider ostrichProvider; - - OstrichStatsLoggerImpl(String scope, com.twitter.ostrich.stats.StatsProvider ostrichProvider) { - this.scope = scope; - this.ostrichProvider = ostrichProvider; - } - - @Override - public OpStatsLogger getOpStatsLogger(String statName) { - return new OpStatsLoggerImpl(getStatName(statName), ostrichProvider); - } - - @Override - public Counter getCounter(String statName) { - return new CounterImpl(ostrichProvider.getCounter(getStatName(statName))); - } - - @Override - public void registerGauge(final String statName, final Gauge gauge) { - Function0 gaugeFunc = new AbstractFunction0() { - @Override - public Object apply() { - return gauge.getSample().doubleValue(); - } - }; - ostrichProvider.addGauge(getStatName(statName), gaugeFunc); - } - - @Override - public void unregisterGauge(String statName, Gauge gauge) { - ostrichProvider.clearGauge(getStatName(statName)); - } - - private String getStatName(String statName) { - return String.format("%s/%s", scope, statName); - } - - @Override - public StatsLogger scope(String scope) { - return new OstrichStatsLoggerImpl(getStatName(scope), ostrichProvider); - } - - @Override - public void removeScope(String name, StatsLogger statsLogger) { - // no-op - } -} diff --git a/bookkeeper-stats-providers/twitter-ostrich-provider/src/main/java/org/apache/bookkeeper/stats/twitter/ostrich/package-info.java b/bookkeeper-stats-providers/twitter-ostrich-provider/src/main/java/org/apache/bookkeeper/stats/twitter/ostrich/package-info.java deleted file mode 100644 index 2db25846eda..00000000000 --- a/bookkeeper-stats-providers/twitter-ostrich-provider/src/main/java/org/apache/bookkeeper/stats/twitter/ostrich/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -/** - * A stats provider implemented based on Twitter Ostrich library. - */ -package org.apache.bookkeeper.stats.twitter.ostrich; diff --git a/bookkeeper-stats-providers/twitter-science-provider/pom.xml b/bookkeeper-stats-providers/twitter-science-provider/pom.xml deleted file mode 100644 index 278a5b5262c..00000000000 --- a/bookkeeper-stats-providers/twitter-science-provider/pom.xml +++ /dev/null @@ -1,84 +0,0 @@ - - - - 4.0.0 - - bookkeeper - org.apache.bookkeeper - 4.9.0-SNAPSHOT - ../.. - - org.apache.bookkeeper.stats - twitter-science-provider - Apache BookKeeper :: Stats Providers :: Twitter Science Stats - - - - org.apache.bookkeeper.stats - bookkeeper-stats-api - ${project.parent.version} - - - com.twitter.common - stats - 0.0.64 - - - com.twitter.common - net-http-handlers - - - javax.servlet - servlet-api - - - 0.0.39 - compile - - - com.twitter.common - stats-time-series - 0.0.36 - compile - - - com.twitter.common - stats-jvm - 0.0.33 - compile - - - org.eclipse.jetty - jetty-server - - - org.eclipse.jetty - jetty-servlet - - - - - twitter - Twitter repo - default - http://maven.twttr.com - - - diff --git a/bookkeeper-stats-providers/twitter-science-provider/src/main/java/org/apache/bookkeeper/stats/twitter/science/CounterImpl.java b/bookkeeper-stats-providers/twitter-science-provider/src/main/java/org/apache/bookkeeper/stats/twitter/science/CounterImpl.java deleted file mode 100644 index 689a7e8a26b..00000000000 --- a/bookkeeper-stats-providers/twitter-science-provider/src/main/java/org/apache/bookkeeper/stats/twitter/science/CounterImpl.java +++ /dev/null @@ -1,69 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.bookkeeper.stats.twitter.science; - -import com.twitter.common.stats.Rate; -import com.twitter.common.stats.Stats; -import java.util.concurrent.atomic.AtomicLong; -import org.apache.bookkeeper.stats.Counter; - -/** - * This will export the value and the rate (per sec) to {@link org.apache.bookkeeper.stats.Stats}. - */ -public class CounterImpl implements Counter { - // The name used to export this stat - private String name; - private AtomicLong value; - - public CounterImpl(String name) { - this.name = name; - value = new AtomicLong(0); - setUpStatsExport(); - } - - @Override - public synchronized void clear() { - value.getAndSet(0); - } - - @Override - public Long get() { - return value.get(); - } - - private void setUpStatsExport() { - // Export the value. - Stats.export(name, value); - // Export the rate of this value. - Stats.export(Rate.of(name + "_per_sec", value).build()); - } - - @Override - public void inc() { - value.incrementAndGet(); - } - - @Override - public void dec() { - value.decrementAndGet(); - } - - @Override - public void add(long delta) { - value.addAndGet(delta); - } -} diff --git a/bookkeeper-stats-providers/twitter-science-provider/src/main/java/org/apache/bookkeeper/stats/twitter/science/HTTPStatsExporter.java b/bookkeeper-stats-providers/twitter-science-provider/src/main/java/org/apache/bookkeeper/stats/twitter/science/HTTPStatsExporter.java deleted file mode 100644 index 34e0eea6d5a..00000000000 --- a/bookkeeper-stats-providers/twitter-science-provider/src/main/java/org/apache/bookkeeper/stats/twitter/science/HTTPStatsExporter.java +++ /dev/null @@ -1,80 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bookkeeper.stats.twitter.science; - -import com.twitter.common.application.ShutdownRegistry; -import com.twitter.common.base.Supplier; -import com.twitter.common.net.http.handlers.VarsHandler; -import com.twitter.common.net.http.handlers.VarsJsonHandler; -import com.twitter.common.quantity.Amount; -import com.twitter.common.quantity.Time; -import com.twitter.common.stats.JvmStats; -import com.twitter.common.stats.Stat; -import com.twitter.common.stats.Stats; -import com.twitter.common.stats.TimeSeriesRepository; -import com.twitter.common.stats.TimeSeriesRepositoryImpl; -import org.eclipse.jetty.server.Server; -import org.eclipse.jetty.servlet.ServletContextHandler; -import org.eclipse.jetty.servlet.ServletHolder; - -/** - * Starts a jetty server on a configurable port and the samplers to export stats. - */ -public class HTTPStatsExporter { - final Server jettyServer; - final ShutdownRegistry.ShutdownRegistryImpl shutDownRegistry; - final int port; - - public HTTPStatsExporter(int port) { - // Create the ShutdownRegistry needed for our sampler - this.shutDownRegistry = new ShutdownRegistry.ShutdownRegistryImpl(); - this.port = port; - this.jettyServer = new Server(port); - } - - public void start() throws Exception { - // Start the sampler. Sample every 1 second and retain for 1 hour - TimeSeriesRepository sampler = new TimeSeriesRepositoryImpl(Stats.STAT_REGISTRY, - Amount.of(1L, Time.SECONDS), Amount.of(1L, Time.HOURS)); - sampler.start(this.shutDownRegistry); - // Export JVM stats - JvmStats.export(); - // Configure handlers - Supplier>> supplier = new Supplier>>() { - @Override - public Iterable> get() { - return Stats.getVariables(); - } - }; - - // Start jetty. - ServletContextHandler context = new ServletContextHandler(); - context.setContextPath("/"); - this.jettyServer.setHandler(context); - context.addServlet(new ServletHolder(new VarsHandler(supplier)), "/vars"); - context.addServlet(new ServletHolder(new VarsJsonHandler(supplier)), "/vars.json"); - this.jettyServer.start(); - } - - public void stop() throws Exception { - this.jettyServer.stop(); - if (this.shutDownRegistry != null) { - this.shutDownRegistry.execute(); - } - } -} diff --git a/bookkeeper-stats-providers/twitter-science-provider/src/main/java/org/apache/bookkeeper/stats/twitter/science/OpStatsLoggerImpl.java b/bookkeeper-stats-providers/twitter-science-provider/src/main/java/org/apache/bookkeeper/stats/twitter/science/OpStatsLoggerImpl.java deleted file mode 100644 index dae7f6fc96c..00000000000 --- a/bookkeeper-stats-providers/twitter-science-provider/src/main/java/org/apache/bookkeeper/stats/twitter/science/OpStatsLoggerImpl.java +++ /dev/null @@ -1,79 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.bookkeeper.stats.twitter.science; - -import com.twitter.common.stats.RequestStats; -import com.twitter.common.stats.Stat; -import java.util.Arrays; -import java.util.Map; -import java.util.concurrent.TimeUnit; -import org.apache.bookkeeper.stats.OpStatsData; -import org.apache.bookkeeper.stats.OpStatsLogger; - -/** - * An implementation of the OpStatsLogger interface that handles per operation type stats. - * Internals use twitter.common.stats for exporting metrics. - */ -public class OpStatsLoggerImpl implements OpStatsLogger { - private final RequestStats events; - - public OpStatsLoggerImpl(String name) { - this.events = new RequestStats(name); - } - - // OpStatsLogger functions - public void registerFailedEvent(long eventLatency, TimeUnit unit) { - this.events.incErrors(unit.toMicros(eventLatency)); - } - - public void registerSuccessfulEvent(long eventLatency, TimeUnit unit) { - this.events.requestComplete(unit.toMicros(eventLatency)); - } - - public void registerSuccessfulValue(long value) { - this.events.requestComplete(TimeUnit.MILLISECONDS.toMicros(value)); - } - - public void registerFailedValue(long value) { - this.events.incErrors(TimeUnit.MILLISECONDS.toMicros(value)); - } - - public synchronized void clear() { - } - - /** - * This function should go away soon (hopefully). - */ - public synchronized OpStatsData toOpStatsData() { - long numFailed = this.events.getErrorCount(); - long numSuccess = this.events.getSlidingStats().getEventCounter().get() - numFailed; - double avgLatencyMillis = this.events.getSlidingStats().getPerEventLatency().read() / 1000.0; - double[] defaultPercentiles = {10, 50, 90, 99, 99.9, 99.99}; - long[] latenciesMillis = new long[defaultPercentiles.length]; - Arrays.fill(latenciesMillis, Long.MAX_VALUE); - Map realPercentileLatencies = - this.events.getPercentile().getPercentiles(); - for (int i = 0; i < defaultPercentiles.length; i++) { - if (realPercentileLatencies.containsKey(defaultPercentiles[i])) { - @SuppressWarnings("unchecked") - Stat latency = realPercentileLatencies.get(defaultPercentiles[i]); - latenciesMillis[i] = TimeUnit.MICROSECONDS.toMillis(latency.read().longValue()); - } - } - return new OpStatsData(numSuccess, numFailed, avgLatencyMillis, latenciesMillis); - } -} diff --git a/bookkeeper-stats-providers/twitter-science-provider/src/main/java/org/apache/bookkeeper/stats/twitter/science/TwitterStatsLoggerImpl.java b/bookkeeper-stats-providers/twitter-science-provider/src/main/java/org/apache/bookkeeper/stats/twitter/science/TwitterStatsLoggerImpl.java deleted file mode 100644 index cf63ed9c87d..00000000000 --- a/bookkeeper-stats-providers/twitter-science-provider/src/main/java/org/apache/bookkeeper/stats/twitter/science/TwitterStatsLoggerImpl.java +++ /dev/null @@ -1,81 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.bookkeeper.stats.twitter.science; - -import com.twitter.common.stats.SampledStat; -import com.twitter.common.stats.Stats; -import org.apache.bookkeeper.stats.Counter; -import org.apache.bookkeeper.stats.Gauge; -import org.apache.bookkeeper.stats.OpStatsLogger; -import org.apache.bookkeeper.stats.StatsLogger; - -/** - * Implementation of twitter-stats logger. - */ -public class TwitterStatsLoggerImpl implements StatsLogger { - - protected final String name; - - public TwitterStatsLoggerImpl(String name) { - this.name = name; - } - - @Override - public OpStatsLogger getOpStatsLogger(String statName) { - return new OpStatsLoggerImpl(getStatName(statName)); - } - - @Override - public Counter getCounter(String statName) { - return new CounterImpl(getStatName(statName)); - } - - @Override - public void registerGauge(final String statName, final Gauge gauge) { - Stats.export(new SampledStat(getStatName(statName), gauge.getDefaultValue()) { - @Override - public T doSample() { - return gauge.getSample(); - } - }); - } - - @Override - public void unregisterGauge(String name, Gauge gauge) { - // no-op - } - - private String getStatName(String statName) { - return (name + "_" + statName).toLowerCase(); - } - - @Override - public StatsLogger scope(String scope) { - String scopeName; - if (0 == name.length()) { - scopeName = scope; - } else { - scopeName = name + "_" + scope; - } - return new TwitterStatsLoggerImpl(scopeName); - } - - @Override - public void removeScope(String name, StatsLogger statsLogger) { - // no-op - } -} diff --git a/bookkeeper-stats-providers/twitter-science-provider/src/main/java/org/apache/bookkeeper/stats/twitter/science/TwitterStatsProvider.java b/bookkeeper-stats-providers/twitter-science-provider/src/main/java/org/apache/bookkeeper/stats/twitter/science/TwitterStatsProvider.java deleted file mode 100644 index 75c2842c986..00000000000 --- a/bookkeeper-stats-providers/twitter-science-provider/src/main/java/org/apache/bookkeeper/stats/twitter/science/TwitterStatsProvider.java +++ /dev/null @@ -1,88 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.bookkeeper.stats.twitter.science; - -import org.apache.bookkeeper.stats.CachingStatsProvider; -import org.apache.bookkeeper.stats.StatsLogger; -import org.apache.bookkeeper.stats.StatsProvider; -import org.apache.commons.configuration.Configuration; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A stats provider implemented based on Twitter Stats library. - */ -public class TwitterStatsProvider implements StatsProvider { - - static final Logger LOG = LoggerFactory.getLogger(TwitterStatsProvider.class); - - protected static final String STATS_EXPORT = "statsExport"; - protected static final String STATS_HTTP_PORT = "statsHttpPort"; - - private HTTPStatsExporter statsExporter = null; - private final CachingStatsProvider cachingStatsProvider; - - public TwitterStatsProvider() { - this.cachingStatsProvider = new CachingStatsProvider(new StatsProvider() { - - @Override - public void start(Configuration conf) { - // nop - } - - @Override - public void stop() { - // nop - } - - @Override - public StatsLogger getStatsLogger(String scope) { - return new TwitterStatsLoggerImpl(scope); - } - }); - } - - @Override - public void start(Configuration conf) { - if (conf.getBoolean(STATS_EXPORT, false)) { - statsExporter = new HTTPStatsExporter(conf.getInt(STATS_HTTP_PORT, 9002)); - } - if (null != statsExporter) { - try { - statsExporter.start(); - } catch (Exception e) { - LOG.error("Fail to start stats exporter : ", e); - } - } - } - - @Override - public void stop() { - if (null != statsExporter) { - try { - statsExporter.stop(); - } catch (Exception e) { - LOG.error("Fail to stop stats exporter : ", e); - } - } - } - - @Override - public StatsLogger getStatsLogger(String name) { - return this.cachingStatsProvider.getStatsLogger(name); - } -} diff --git a/bookkeeper-stats-providers/twitter-science-provider/src/main/java/org/apache/bookkeeper/stats/twitter/science/package-info.java b/bookkeeper-stats-providers/twitter-science-provider/src/main/java/org/apache/bookkeeper/stats/twitter/science/package-info.java deleted file mode 100644 index bc358f4861a..00000000000 --- a/bookkeeper-stats-providers/twitter-science-provider/src/main/java/org/apache/bookkeeper/stats/twitter/science/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -/** - * A stats provider implementation based on Twitter Science Stats library. - */ -package org.apache.bookkeeper.stats.twitter.science; diff --git a/bookkeeper-stats/pom.xml b/bookkeeper-stats/pom.xml deleted file mode 100644 index 0d4a9b20e66..00000000000 --- a/bookkeeper-stats/pom.xml +++ /dev/null @@ -1,63 +0,0 @@ - - - - 4.0.0 - - bookkeeper - org.apache.bookkeeper - 4.9.0-SNAPSHOT - - org.apache.bookkeeper.stats - bookkeeper-stats-api - Apache BookKeeper :: Stats API - http://maven.apache.org - - - - org.apache.maven.plugins - maven-javadoc-plugin - ${maven-javadoc-plugin.version} - - - -Xdoclint:none - org.apache.bookkeeper.stats - - - Bookkeeper Stats API - org.apache.bookkeeper.stats - - - - - - attach-javadocs - - jar - - - - - - - - - commons-configuration - commons-configuration - - - diff --git a/bookkeeper-stats/src/main/java/org/apache/bookkeeper/stats/StatsLogger.java b/bookkeeper-stats/src/main/java/org/apache/bookkeeper/stats/StatsLogger.java deleted file mode 100644 index f7506859ef3..00000000000 --- a/bookkeeper-stats/src/main/java/org/apache/bookkeeper/stats/StatsLogger.java +++ /dev/null @@ -1,76 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.bookkeeper.stats; - -/** - * A simple interface that exposes just 2 useful methods. One to get the logger for an Op stat - * and another to get the logger for a simple stat - */ -public interface StatsLogger { - /** - * @param name - * Stats Name - * @return Get the logger for an OpStat described by the name. - */ - OpStatsLogger getOpStatsLogger(String name); - - /** - * @param name - * Stats Name - * @return Get the logger for a simple stat described by the name - */ - Counter getCounter(String name); - - /** - * Register given gauge as name name. - * - * @param name - * gauge name - * @param gauge - * gauge function - */ - void registerGauge(String name, Gauge gauge); - - /** - * Unregister given gauge from name name. - * - * @param name - * name of the gauge - * @param gauge - * gauge function - */ - void unregisterGauge(String name, Gauge gauge); - - /** - * Provide the stats logger under scope name. - * - * @param name - * scope name. - * @return stats logger under scope name. - */ - StatsLogger scope(String name); - - /** - * Remove the given statsLogger for scope name. - * It can be no-op if the underlying stats provider doesn't have the ability to remove scope. - * - * @param name name of the scope - * @param statsLogger the stats logger of this scope. - */ - void removeScope(String name, StatsLogger statsLogger); - -} diff --git a/buildtools/pom.xml b/buildtools/pom.xml index ea4665b63bd..7248d4f3afc 100644 --- a/buildtools/pom.xml +++ b/buildtools/pom.xml @@ -20,9 +20,9 @@ org.apache.bookkeeper bookkeeper - 4.9.0-SNAPSHOT + 4.18.0-SNAPSHOT buildtools Apache BookKeeper :: Build Tools - 4.9.0-SNAPSHOT + 4.18.0-SNAPSHOT diff --git a/buildtools/src/main/resources/bookkeeper/checkstyle.xml b/buildtools/src/main/resources/bookkeeper/checkstyle.xml index 452f64041fd..fc3e3c52e0c 100644 --- a/buildtools/src/main/resources/bookkeeper/checkstyle.xml +++ b/buildtools/src/main/resources/bookkeeper/checkstyle.xml @@ -29,6 +29,21 @@ page at http://checkstyle.sourceforge.net/config.html --> + + + + + + + + + + @@ -53,14 +68,8 @@ page at http://checkstyle.sourceforge.net/config.html --> - - - - - - - + @@ -69,6 +78,13 @@ page at http://checkstyle.sourceforge.net/config.html --> + + + + + + + @@ -91,6 +107,7 @@ page at http://checkstyle.sourceforge.net/config.html --> + - + @@ -140,23 +157,15 @@ page at http://checkstyle.sourceforge.net/config.html --> JAVADOC CHECKS --> - - + - - - - - - - @@ -275,25 +284,9 @@ page at http://checkstyle.sourceforge.net/config.html --> - - - - - - - - - - - @@ -434,8 +427,8 @@ page at http://checkstyle.sourceforge.net/config.html --> - - + + diff --git a/buildtools/src/main/resources/bookkeeper/findbugsExclude.xml b/buildtools/src/main/resources/bookkeeper/findbugsExclude.xml index 83a39f46ca9..ea2b65e4474 100644 --- a/buildtools/src/main/resources/bookkeeper/findbugsExclude.xml +++ b/buildtools/src/main/resources/bookkeeper/findbugsExclude.xml @@ -16,6 +16,16 @@ limitations under the License. //--> + + + + + + + + + + @@ -97,10 +107,6 @@ - - - - @@ -157,4 +163,169 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/buildtools/src/main/resources/bookkeeper/suppressions.xml b/buildtools/src/main/resources/bookkeeper/suppressions.xml index c45e2fa2b32..ecec0a1e788 100644 --- a/buildtools/src/main/resources/bookkeeper/suppressions.xml +++ b/buildtools/src/main/resources/bookkeeper/suppressions.xml @@ -21,11 +21,21 @@ + + + + + + + + + + diff --git a/buildtools/src/main/resources/distributedlog/findbugsExclude.xml b/buildtools/src/main/resources/distributedlog/findbugsExclude.xml index f85686fcb75..c4ba2cd1273 100644 --- a/buildtools/src/main/resources/distributedlog/findbugsExclude.xml +++ b/buildtools/src/main/resources/distributedlog/findbugsExclude.xml @@ -16,6 +16,16 @@ limitations under the License. //--> + + + + + + + + + + diff --git a/buildtools/src/main/resources/ide/eclipse/formatter.xml b/buildtools/src/main/resources/ide/eclipse/formatter.xml index b8648e0702a..01b73dcd996 100644 --- a/buildtools/src/main/resources/ide/eclipse/formatter.xml +++ b/buildtools/src/main/resources/ide/eclipse/formatter.xml @@ -1,3 +1,4 @@ + - diff --git a/buildtools/src/main/resources/log4j.properties b/buildtools/src/main/resources/log4j.properties deleted file mode 100644 index 10ae6bfcbba..00000000000 --- a/buildtools/src/main/resources/log4j.properties +++ /dev/null @@ -1,42 +0,0 @@ -# -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# - -# -# Bookkeeper Logging Configuration -# - -# Format is " (, )+ - -# DEFAULT: console appender only, level INFO -bookkeeper.root.logger=INFO,CONSOLE -log4j.rootLogger=${bookkeeper.root.logger} - -# -# Log INFO level and above messages to the console -# -log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender -log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout -log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} - %-5p - [%t:%C{1}@%L] - %m%n - -#disable zookeeper logging -log4j.logger.org.apache.zookeeper=OFF -log4j.logger.org.apache.bookkeeper.bookie=INFO -log4j.logger.org.apache.bookkeeper.meta=INFO diff --git a/buildtools/src/main/resources/log4j2.xml b/buildtools/src/main/resources/log4j2.xml new file mode 100644 index 00000000000..050035d8002 --- /dev/null +++ b/buildtools/src/main/resources/log4j2.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + diff --git a/circe-checksum/pom.xml b/circe-checksum/pom.xml index cb2ebdb8eff..9f9ca5f0c6f 100644 --- a/circe-checksum/pom.xml +++ b/circe-checksum/pom.xml @@ -24,7 +24,7 @@ org.apache.bookkeeper bookkeeper - 4.9.0-SNAPSHOT + 4.18.0-SNAPSHOT .. @@ -50,6 +50,12 @@ netty-buffer + + org.apache.bookkeeper + testtools + ${project.parent.version} + test + @@ -57,30 +63,37 @@ org.apache.maven.plugins maven-compiler-plugin - ${maven-compiler-plugin.version} - - 1.8 - 1.8 - - - - -Xlint:deprecation - -Xlint:unchecked - - -Xpkginfo:always - - com.github.maven-nar nar-maven-plugin - ${nar-maven-plugin.version} true + + ${nar.runtime} + circe-checksum + + + jni + com.scurrilous.circe.checksum + + + + ${nar.cpp.optionSet} + false + false + full + + **/*.cpp + + + ${skipTests} + + true + org.apache.maven.plugins maven-assembly-plugin - ${maven-assembly-plugin.version} src/main/assembly/assembly.xml @@ -98,17 +111,6 @@ - - org.jacoco - jacoco-maven-plugin - ${jacoco-maven-plugin.version} - - - - com/scurrilous/circe/checksum/NarSystem* - - - @@ -126,7 +128,6 @@ com.github.maven-nar nar-maven-plugin - ${nar-maven-plugin.version} true @@ -140,17 +141,8 @@ org.apache.maven.plugins maven-compiler-plugin - ${maven-compiler-plugin.version} - 1.8 - 1.8 - - - -Xlint:deprecation - -Xlint:unchecked - - -Xpkginfo:always -h ${project.build.directory}/nar/javah-include @@ -172,23 +164,9 @@ com.github.maven-nar nar-maven-plugin - ${nar-maven-plugin.version} true - ${nar.runtime} - circe-checksum - - - jni - com.scurrilous.circe.checksum - - - - ${nar.cpp.optionSet} - false - false - full - + false @@ -206,25 +184,31 @@ com.github.maven-nar nar-maven-plugin - ${nar-maven-plugin.version} true - ${nar.runtime} - circe-checksum - - - jni - com.scurrilous.circe.checksum - - - - ${nar.cpp.optionSet} - false - false - full - + false + + + + + + + Windows + + + Windows + + + + + + com.github.maven-nar + nar-maven-plugin + true + + false - rt + g++ diff --git a/circe-checksum/src/main/assembly/assembly.xml b/circe-checksum/src/main/assembly/assembly.xml index bded0cab8bf..432c1ac4282 100644 --- a/circe-checksum/src/main/assembly/assembly.xml +++ b/circe-checksum/src/main/assembly/assembly.xml @@ -26,15 +26,6 @@ false - - - - org.slf4j:slf4j-api - io.netty - *:nar:* - - - ${project.build.directory}/nar/${project.artifactId}-${project.version}-${os.arch}-MacOSX-gpp-jni/lib/${os.arch}-MacOSX-gpp/jni diff --git a/circe-checksum/src/main/java/com/scurrilous/circe/Hashes.java b/circe-checksum/src/main/java/com/scurrilous/circe/Hashes.java index 786ead5f3d3..16e3b4a9a35 100644 --- a/circe-checksum/src/main/java/com/scurrilous/circe/Hashes.java +++ b/circe-checksum/src/main/java/com/scurrilous/circe/Hashes.java @@ -17,15 +17,6 @@ import java.util.EnumSet; -import com.scurrilous.circe.HashParameters; -import com.scurrilous.circe.HashProviders; -import com.scurrilous.circe.HashSupport; -import com.scurrilous.circe.IncrementalIntHash; -import com.scurrilous.circe.IncrementalLongHash; -import com.scurrilous.circe.StatefulHash; -import com.scurrilous.circe.StatelessIntHash; -import com.scurrilous.circe.StatelessLongHash; - /** * Static methods to obtain various forms of abstract hash functions. Each * method uses {@link HashProviders#best} to find the best provider for the diff --git a/circe-checksum/src/main/java/com/scurrilous/circe/checksum/Crc32cIntChecksum.java b/circe-checksum/src/main/java/com/scurrilous/circe/checksum/Crc32cIntChecksum.java index 528244b12be..d90f8b7ea5d 100644 --- a/circe-checksum/src/main/java/com/scurrilous/circe/checksum/Crc32cIntChecksum.java +++ b/circe-checksum/src/main/java/com/scurrilous/circe/checksum/Crc32cIntChecksum.java @@ -18,30 +18,20 @@ */ package com.scurrilous.circe.checksum; -import static com.scurrilous.circe.params.CrcParameters.CRC32C; - -import com.google.common.annotations.VisibleForTesting; -import com.scurrilous.circe.IncrementalIntHash; import com.scurrilous.circe.crc.Sse42Crc32C; -import com.scurrilous.circe.crc.StandardCrcProvider; import io.netty.buffer.ByteBuf; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; public class Crc32cIntChecksum { - private static final Logger log = LoggerFactory.getLogger(Crc32cIntChecksum.class); - - @VisibleForTesting - final static IncrementalIntHash CRC32C_HASH; + private final static IntHash CRC32C_HASH; static { if (Sse42Crc32C.isSupported()) { - CRC32C_HASH = new Crc32cSse42Provider().getIncrementalInt(CRC32C); - log.info("SSE4.2 CRC32C provider initialized"); + CRC32C_HASH = new JniIntHash(); + } else if (Java9IntHash.HAS_JAVA9_CRC32C) { + CRC32C_HASH = new Java9IntHash(); } else { - CRC32C_HASH = new StandardCrcProvider().getIncrementalInt(CRC32C); - log.warn("Failed to load Circe JNI library. Falling back to Java based CRC32c provider"); + CRC32C_HASH = new Java8IntHash(); } } @@ -53,16 +43,19 @@ public class Crc32cIntChecksum { * @return */ public static int computeChecksum(ByteBuf payload) { - if (payload.hasMemoryAddress() && (CRC32C_HASH instanceof Sse42Crc32C)) { - return CRC32C_HASH.calculate(payload.memoryAddress() + payload.readerIndex(), payload.readableBytes()); - } else if (payload.hasArray()) { - return CRC32C_HASH.calculate(payload.array(), payload.arrayOffset() + payload.readerIndex(), - payload.readableBytes()); - } else { - return CRC32C_HASH.calculate(payload.nioBuffer()); - } + return CRC32C_HASH.calculate(payload); } + /** + * Computes crc32c checksum: if it is able to load crc32c native library then it computes using that native library + * which is faster as it computes using hardware machine instruction else it computes using crc32c algo. + * + * @param payload + * @return + */ + public static int computeChecksum(ByteBuf payload, int offset, int len) { + return CRC32C_HASH.calculate(payload, offset, len); + } /** * Computes incremental checksum with input previousChecksum and input payload @@ -72,15 +65,36 @@ public static int computeChecksum(ByteBuf payload) { * @return */ public static int resumeChecksum(int previousChecksum, ByteBuf payload) { - if (payload.hasMemoryAddress() && (CRC32C_HASH instanceof Sse42Crc32C)) { - return CRC32C_HASH.resume(previousChecksum, payload.memoryAddress() + payload.readerIndex(), - payload.readableBytes()); - } else if (payload.hasArray()) { - return CRC32C_HASH.resume(previousChecksum, payload.array(), payload.arrayOffset() + payload.readerIndex(), - payload.readableBytes()); - } else { - return CRC32C_HASH.resume(previousChecksum, payload.nioBuffer()); - } + return CRC32C_HASH.resume(previousChecksum, payload); } + /** + * Computes incremental checksum with input previousChecksum and input payload + * + * @param previousChecksum the previously computed checksum + * @param payload the data for which the checksum is to be computed + * @param offset the starting position in the payload + * @param len the number of bytes to include in the checksum computation + * @return the updated checksum + */ + public static int resumeChecksum(int previousChecksum, ByteBuf payload, int offset, int len) { + return CRC32C_HASH.resume(previousChecksum, payload, offset, len); + } + + /** + * Computes incremental checksum with input previousChecksum and input payload + * + * @param previousChecksum the previously computed checksum + * @param payload the data for which the checksum is to be computed + * @param offset the starting position in the payload + * @param len the number of bytes to include in the checksum computation + * @return the updated checksum + */ + public static int resumeChecksum(int previousChecksum, byte[] payload, int offset, int len) { + return CRC32C_HASH.resume(previousChecksum, payload, offset, len); + } + + public static boolean acceptsMemoryAddressBuffer() { + return CRC32C_HASH.acceptsMemoryAddressBuffer(); + } } diff --git a/circe-checksum/src/main/java/com/scurrilous/circe/checksum/Crc32cLongChecksum.java b/circe-checksum/src/main/java/com/scurrilous/circe/checksum/Crc32cLongChecksum.java deleted file mode 100644 index c283e5680ba..00000000000 --- a/circe-checksum/src/main/java/com/scurrilous/circe/checksum/Crc32cLongChecksum.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package com.scurrilous.circe.checksum; - -import static com.scurrilous.circe.params.CrcParameters.CRC32C; - -import com.google.common.annotations.VisibleForTesting; -import com.scurrilous.circe.IncrementalIntHash; -import com.scurrilous.circe.crc.Sse42Crc32C; -import com.scurrilous.circe.crc.StandardCrcProvider; -import io.netty.buffer.ByteBuf; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class Crc32cLongChecksum { - - private static final Logger log = LoggerFactory.getLogger(Crc32cLongChecksum.class); - - @VisibleForTesting - final static IncrementalIntHash CRC32C_HASH; - - static { - if (Sse42Crc32C.isSupported()) { - CRC32C_HASH = new Crc32cSse42Provider().getIncrementalInt(CRC32C); - if (log.isDebugEnabled()) { - log.debug("SSE4.2 CRC32C provider initialized"); - } - } else { - CRC32C_HASH = new StandardCrcProvider().getIncrementalInt(CRC32C); - log.warn("Failed to load Circe JNI library. Falling back to Java based CRC32c provider"); - } - } - - /** - * Computes crc32c checksum: if it is able to load crc32c native library then it computes using that native library - * which is faster as it computes using hardware machine instruction else it computes using crc32c algo. - * - * @param payload - * @return - */ - public static long computeChecksum(ByteBuf payload) { - int crc; - if (payload.hasMemoryAddress() && (CRC32C_HASH instanceof Sse42Crc32C)) { - crc = CRC32C_HASH.calculate(payload.memoryAddress() + payload.readerIndex(), payload.readableBytes()); - } else if (payload.hasArray()) { - crc = CRC32C_HASH.calculate(payload.array(), payload.arrayOffset() + payload.readerIndex(), - payload.readableBytes()); - } else { - crc = CRC32C_HASH.calculate(payload.nioBuffer()); - } - return crc & 0xffffffffL; - } - - - /** - * Computes incremental checksum with input previousChecksum and input payload - * - * @param previousChecksum : previously computed checksum - * @param payload - * @return - */ - public static long resumeChecksum(long previousChecksum, ByteBuf payload) { - int crc = (int) previousChecksum; - if (payload.hasMemoryAddress() && (CRC32C_HASH instanceof Sse42Crc32C)) { - crc = CRC32C_HASH.resume(crc, payload.memoryAddress() + payload.readerIndex(), - payload.readableBytes()); - } else if (payload.hasArray()) { - crc = CRC32C_HASH.resume(crc, payload.array(), payload.arrayOffset() + payload.readerIndex(), - payload.readableBytes()); - } else { - crc = CRC32C_HASH.resume(crc, payload.nioBuffer()); - } - return crc & 0xffffffffL; - } - -} diff --git a/circe-checksum/src/main/java/com/scurrilous/circe/checksum/IntHash.java b/circe-checksum/src/main/java/com/scurrilous/circe/checksum/IntHash.java new file mode 100644 index 00000000000..be98ae19be1 --- /dev/null +++ b/circe-checksum/src/main/java/com/scurrilous/circe/checksum/IntHash.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package com.scurrilous.circe.checksum; + +import io.netty.buffer.ByteBuf; + +public interface IntHash { + int calculate(ByteBuf buffer); + + int calculate(ByteBuf buffer, int offset, int len); + + int resume(int current, ByteBuf buffer); + + int resume(int current, ByteBuf buffer, int offset, int len); + + int resume(int current, byte[] buffer, int offset, int len); + + boolean acceptsMemoryAddressBuffer(); +} diff --git a/circe-checksum/src/main/java/com/scurrilous/circe/checksum/Java8IntHash.java b/circe-checksum/src/main/java/com/scurrilous/circe/checksum/Java8IntHash.java new file mode 100644 index 00000000000..0065baeec41 --- /dev/null +++ b/circe-checksum/src/main/java/com/scurrilous/circe/checksum/Java8IntHash.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package com.scurrilous.circe.checksum; + +import static com.scurrilous.circe.params.CrcParameters.CRC32C; +import com.scurrilous.circe.IncrementalIntHash; +import com.scurrilous.circe.crc.StandardCrcProvider; +import io.netty.buffer.ByteBuf; + +public class Java8IntHash implements IntHash { + + private final IncrementalIntHash hash = new StandardCrcProvider().getIncrementalInt(CRC32C); + + @Override + public int calculate(ByteBuf buffer) { + return resume(0, buffer); + } + + @Override + public int resume(int current, ByteBuf buffer) { + return resume(current, buffer, buffer.readerIndex(), buffer.readableBytes()); + } + + @Override + public int calculate(ByteBuf buffer, int offset, int len) { + return resume(0, buffer, offset, len); + } + + @Override + public int resume(int current, ByteBuf buffer, int offset, int len) { + if (buffer.hasArray()) { + return hash.resume(current, buffer.array(), buffer.arrayOffset() + offset, + len); + } else { + return hash.resume(current, buffer.slice(offset, len).nioBuffer()); + } + } + + @Override + public int resume(int current, byte[] buffer, int offset, int len) { + return hash.resume(current, buffer, offset, len); + } + + @Override + public boolean acceptsMemoryAddressBuffer() { + return false; + } +} diff --git a/circe-checksum/src/main/java/com/scurrilous/circe/checksum/Java9IntHash.java b/circe-checksum/src/main/java/com/scurrilous/circe/checksum/Java9IntHash.java new file mode 100644 index 00000000000..2e779a92766 --- /dev/null +++ b/circe-checksum/src/main/java/com/scurrilous/circe/checksum/Java9IntHash.java @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package com.scurrilous.circe.checksum; + +import io.netty.buffer.ByteBuf; +import io.netty.util.concurrent.FastThreadLocal; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class Java9IntHash implements IntHash { + static final boolean HAS_JAVA9_CRC32C; + private static final Method UPDATE_BYTES; + private static final Method UPDATE_DIRECT_BYTEBUFFER; + + private static final String CRC32C_CLASS_NAME = "java.util.zip.CRC32C"; + + private static final FastThreadLocal TL_BUFFER = new FastThreadLocal() { + @Override + protected byte[] initialValue() { + return new byte[4096]; + } + }; + + static { + boolean hasJava9CRC32C = false; + Method updateBytes = null; + Method updateDirectByteBuffer = null; + + try { + Class c = Class.forName(CRC32C_CLASS_NAME); + updateBytes = c.getDeclaredMethod("updateBytes", int.class, byte[].class, int.class, int.class); + updateBytes.setAccessible(true); + updateDirectByteBuffer = + c.getDeclaredMethod("updateDirectByteBuffer", int.class, long.class, int.class, int.class); + updateDirectByteBuffer.setAccessible(true); + + hasJava9CRC32C = true; + } catch (Exception e) { + if (log.isDebugEnabled()) { + log.debug("Unable to use reflected methods: ", e); + } + updateBytes = null; + updateDirectByteBuffer = null; + } + + HAS_JAVA9_CRC32C = hasJava9CRC32C; + UPDATE_BYTES = updateBytes; + UPDATE_DIRECT_BYTEBUFFER = updateDirectByteBuffer; + } + + @Override + public int calculate(ByteBuf buffer) { + return resume(0, buffer); + } + + @Override + public int calculate(ByteBuf buffer, int offset, int len) { + return resume(0, buffer, offset, len); + } + + private int updateDirectByteBuffer(int current, long address, int offset, int length) { + try { + return (int) UPDATE_DIRECT_BYTEBUFFER.invoke(null, current, address, offset, offset + length); + } catch (IllegalAccessException | InvocationTargetException e) { + throw new RuntimeException(e); + } + } + + @Override + public int resume(int current, byte[] array, int offset, int length) { + // the bit-wise complementing of the input and output is explained in the resume method below + current = ~current; + current = updateBytes(current, array, offset, length); + return ~current; + } + + @Override + public boolean acceptsMemoryAddressBuffer() { + return true; + } + + private static int updateBytes(int current, byte[] array, int offset, int length) { + try { + return (int) UPDATE_BYTES.invoke(null, current, array, offset, offset + length); + } catch (IllegalAccessException | InvocationTargetException e) { + throw new RuntimeException(e); + } + } + + @Override + public int resume(int current, ByteBuf buffer) { + return resume(current, buffer, buffer.readerIndex(), buffer.readableBytes()); + } + + @Override + public int resume(int current, ByteBuf buffer, int offset, int len) { + // The input value is bit-wise complemented for two reasons: + // 1. The CRC32C algorithm is designed to start with a seed value where all bits are set to 1 (0xffffffff). + // When 0 is initially passed in, ~0 results in the correct initial value (0xffffffff). + // 2. The CRC32C algorithm complements the final value as the last step. This method will always complement + // the return value. Therefore, when the algorithm is used iteratively, it is necessary to complement + // the input value to continue calculations. + // This allows the algorithm to be used incrementally without needing separate initialization and + // finalization steps. + current = ~current; + + if (buffer.hasMemoryAddress()) { + current = updateDirectByteBuffer(current, buffer.memoryAddress(), offset, len); + } else if (buffer.hasArray()) { + int arrayOffset = buffer.arrayOffset() + offset; + current = updateBytes(current, buffer.array(), arrayOffset, len); + } else { + byte[] b = TL_BUFFER.get(); + int toRead = len; + int loopOffset = offset; + while (toRead > 0) { + int length = Math.min(toRead, b.length); + buffer.getBytes(loopOffset, b, 0, length); + current = updateBytes(current, b, 0, length); + toRead -= length; + loopOffset += length; + } + } + + // The current value is complemented to align with the finalization step of the CRC32C algorithm. + // If there is a subsequent resume step, the value will be complemented again to initiate the next step + // as described in the comments in the beginning of this method. + return ~current; + } +} diff --git a/circe-checksum/src/main/java/com/scurrilous/circe/checksum/JniIntHash.java b/circe-checksum/src/main/java/com/scurrilous/circe/checksum/JniIntHash.java new file mode 100644 index 00000000000..dc5bed0fc1c --- /dev/null +++ b/circe-checksum/src/main/java/com/scurrilous/circe/checksum/JniIntHash.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package com.scurrilous.circe.checksum; + +import static com.scurrilous.circe.params.CrcParameters.CRC32C; +import com.scurrilous.circe.IncrementalIntHash; +import io.netty.buffer.ByteBuf; + +public class JniIntHash implements IntHash { + + private final IncrementalIntHash hash = new Crc32cSse42Provider().getIncrementalInt(CRC32C); + + @Override + public int calculate(ByteBuf buffer) { + return calculate(buffer, buffer.readerIndex(), buffer.readableBytes()); + } + + @Override + public int resume(int current, ByteBuf buffer) { + return resume(current, buffer, buffer.readerIndex(), buffer.readableBytes()); + } + + @Override + public int calculate(ByteBuf buffer, int offset, int len) { + return resume(0, buffer, offset, len); + } + + @Override + public int resume(int current, ByteBuf buffer, int offset, int len) { + if (buffer.hasMemoryAddress()) { + return hash.resume(current, buffer.memoryAddress() + offset, len); + } else if (buffer.hasArray()) { + return hash.resume(current, buffer.array(), buffer.arrayOffset() + offset, len); + } else { + return hash.resume(current, buffer.slice(offset, len).nioBuffer()); + } + } + + @Override + public int resume(int current, byte[] buffer, int offset, int len) { + return hash.resume(current, buffer, offset, len); + } + + @Override + public boolean acceptsMemoryAddressBuffer() { + return true; + } +} diff --git a/circe-checksum/src/main/java/com/scurrilous/circe/crc/JavaCrc32.java b/circe-checksum/src/main/java/com/scurrilous/circe/crc/JavaCrc32.java index afe22efc7bc..4072d932665 100644 --- a/circe-checksum/src/main/java/com/scurrilous/circe/crc/JavaCrc32.java +++ b/circe-checksum/src/main/java/com/scurrilous/circe/crc/JavaCrc32.java @@ -16,7 +16,6 @@ package com.scurrilous.circe.crc; import java.util.zip.CRC32; - import com.scurrilous.circe.StatefulHash; import com.scurrilous.circe.StatefulIntHash; import com.scurrilous.circe.StatelessIntHash; diff --git a/circe-checksum/src/main/java/com/scurrilous/circe/crc/Sse42Crc32C.java b/circe-checksum/src/main/java/com/scurrilous/circe/crc/Sse42Crc32C.java index 28a989d2c56..22379a0e32d 100644 --- a/circe-checksum/src/main/java/com/scurrilous/circe/crc/Sse42Crc32C.java +++ b/circe-checksum/src/main/java/com/scurrilous/circe/crc/Sse42Crc32C.java @@ -54,7 +54,7 @@ public static boolean isSupported() { config = 0; } - public Sse42Crc32C(int chunkWords[]) { + public Sse42Crc32C(int[] chunkWords) { if (chunkWords.length == 0) { config = 0; } else { diff --git a/circe-checksum/src/main/java/com/scurrilous/circe/crc/StandardCrcProvider.java b/circe-checksum/src/main/java/com/scurrilous/circe/crc/StandardCrcProvider.java index b54fad6426e..064ee70a760 100644 --- a/circe-checksum/src/main/java/com/scurrilous/circe/crc/StandardCrcProvider.java +++ b/circe-checksum/src/main/java/com/scurrilous/circe/crc/StandardCrcProvider.java @@ -16,7 +16,6 @@ package com.scurrilous.circe.crc; import java.util.EnumSet; - import com.scurrilous.circe.Hash; import com.scurrilous.circe.HashSupport; import com.scurrilous.circe.StatelessHash; diff --git a/circe-checksum/src/main/java/com/scurrilous/circe/impl/AbstractHashProvider.java b/circe-checksum/src/main/java/com/scurrilous/circe/impl/AbstractHashProvider.java index fb09624abf7..ee60aaae4bb 100644 --- a/circe-checksum/src/main/java/com/scurrilous/circe/impl/AbstractHashProvider.java +++ b/circe-checksum/src/main/java/com/scurrilous/circe/impl/AbstractHashProvider.java @@ -18,7 +18,6 @@ import java.util.EnumSet; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; - import com.scurrilous.circe.Hash; import com.scurrilous.circe.HashParameters; import com.scurrilous.circe.HashProvider; diff --git a/circe-checksum/src/main/java/com/scurrilous/circe/impl/AbstractIncrementalIntHash.java b/circe-checksum/src/main/java/com/scurrilous/circe/impl/AbstractIncrementalIntHash.java index 094cf22889f..ab7e92e9041 100644 --- a/circe-checksum/src/main/java/com/scurrilous/circe/impl/AbstractIncrementalIntHash.java +++ b/circe-checksum/src/main/java/com/scurrilous/circe/impl/AbstractIncrementalIntHash.java @@ -16,7 +16,6 @@ package com.scurrilous.circe.impl; import java.nio.ByteBuffer; - import com.scurrilous.circe.IncrementalIntHash; import com.scurrilous.circe.StatefulIntHash; diff --git a/circe-checksum/src/main/java/com/scurrilous/circe/impl/AbstractIncrementalLongHash.java b/circe-checksum/src/main/java/com/scurrilous/circe/impl/AbstractIncrementalLongHash.java index 9996f8eb429..dbcf096a841 100644 --- a/circe-checksum/src/main/java/com/scurrilous/circe/impl/AbstractIncrementalLongHash.java +++ b/circe-checksum/src/main/java/com/scurrilous/circe/impl/AbstractIncrementalLongHash.java @@ -16,7 +16,6 @@ package com.scurrilous.circe.impl; import java.nio.ByteBuffer; - import com.scurrilous.circe.IncrementalLongHash; import com.scurrilous.circe.StatefulLongHash; diff --git a/circe-checksum/src/main/java/com/scurrilous/circe/impl/AbstractStatefulHash.java b/circe-checksum/src/main/java/com/scurrilous/circe/impl/AbstractStatefulHash.java index e3edcec6ecb..5b1da5017e0 100644 --- a/circe-checksum/src/main/java/com/scurrilous/circe/impl/AbstractStatefulHash.java +++ b/circe-checksum/src/main/java/com/scurrilous/circe/impl/AbstractStatefulHash.java @@ -16,7 +16,6 @@ package com.scurrilous.circe.impl; import java.nio.ByteBuffer; - import com.scurrilous.circe.StatefulHash; /** diff --git a/circe-checksum/src/main/java/com/scurrilous/circe/impl/AbstractStatelessIntHash.java b/circe-checksum/src/main/java/com/scurrilous/circe/impl/AbstractStatelessIntHash.java index 2228697445c..fafe74ede1d 100644 --- a/circe-checksum/src/main/java/com/scurrilous/circe/impl/AbstractStatelessIntHash.java +++ b/circe-checksum/src/main/java/com/scurrilous/circe/impl/AbstractStatelessIntHash.java @@ -16,7 +16,6 @@ package com.scurrilous.circe.impl; import java.nio.ByteBuffer; - import com.scurrilous.circe.StatelessIntHash; /** diff --git a/circe-checksum/src/main/java/com/scurrilous/circe/impl/AbstractStatelessLongHash.java b/circe-checksum/src/main/java/com/scurrilous/circe/impl/AbstractStatelessLongHash.java index 8688b99849e..82d9358faa9 100644 --- a/circe-checksum/src/main/java/com/scurrilous/circe/impl/AbstractStatelessLongHash.java +++ b/circe-checksum/src/main/java/com/scurrilous/circe/impl/AbstractStatelessLongHash.java @@ -16,7 +16,6 @@ package com.scurrilous.circe.impl; import java.nio.ByteBuffer; - import com.scurrilous.circe.StatelessLongHash; /** diff --git a/circe-checksum/src/main/java/com/scurrilous/circe/impl/HashCache.java b/circe-checksum/src/main/java/com/scurrilous/circe/impl/HashCache.java index e8d367e56b7..dd176de473a 100644 --- a/circe-checksum/src/main/java/com/scurrilous/circe/impl/HashCache.java +++ b/circe-checksum/src/main/java/com/scurrilous/circe/impl/HashCache.java @@ -18,7 +18,6 @@ import java.util.EnumSet; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; - import com.scurrilous.circe.Hash; import com.scurrilous.circe.HashParameters; import com.scurrilous.circe.HashSupport; diff --git a/circe-checksum/src/main/java/com/scurrilous/circe/impl/IncrementalIntStatefulHash.java b/circe-checksum/src/main/java/com/scurrilous/circe/impl/IncrementalIntStatefulHash.java index 22f57b779e0..735067f4162 100644 --- a/circe-checksum/src/main/java/com/scurrilous/circe/impl/IncrementalIntStatefulHash.java +++ b/circe-checksum/src/main/java/com/scurrilous/circe/impl/IncrementalIntStatefulHash.java @@ -16,7 +16,6 @@ package com.scurrilous.circe.impl; import java.nio.ByteBuffer; - import com.scurrilous.circe.StatefulHash; import com.scurrilous.circe.StatefulIntHash; import com.scurrilous.circe.StatelessIntHash; diff --git a/circe-checksum/src/main/java/com/scurrilous/circe/impl/IncrementalLongStatefulHash.java b/circe-checksum/src/main/java/com/scurrilous/circe/impl/IncrementalLongStatefulHash.java index 9d7f7f89c44..eaf9839b07c 100644 --- a/circe-checksum/src/main/java/com/scurrilous/circe/impl/IncrementalLongStatefulHash.java +++ b/circe-checksum/src/main/java/com/scurrilous/circe/impl/IncrementalLongStatefulHash.java @@ -16,7 +16,6 @@ package com.scurrilous.circe.impl; import java.nio.ByteBuffer; - import com.scurrilous.circe.StatefulHash; import com.scurrilous.circe.StatefulLongHash; import com.scurrilous.circe.StatelessLongHash; diff --git a/circe-checksum/src/main/java/com/scurrilous/circe/impl/IntStatefulLongHash.java b/circe-checksum/src/main/java/com/scurrilous/circe/impl/IntStatefulLongHash.java index 3b9a3acd5af..65f72e2a66b 100644 --- a/circe-checksum/src/main/java/com/scurrilous/circe/impl/IntStatefulLongHash.java +++ b/circe-checksum/src/main/java/com/scurrilous/circe/impl/IntStatefulLongHash.java @@ -16,7 +16,6 @@ package com.scurrilous.circe.impl; import java.nio.ByteBuffer; - import com.scurrilous.circe.StatefulHash; import com.scurrilous.circe.StatefulIntHash; import com.scurrilous.circe.StatefulLongHash; @@ -39,70 +38,87 @@ public IntStatefulLongHash(StatefulIntHash intHash) { this.intHash = intHash; } + @Override public StatelessLongHash asStateless() { return new IntStatelessLongHash(intHash.asStateless()); } + @Override public String algorithm() { return intHash.algorithm(); } + @Override public int length() { return intHash.length(); } + @Override public StatefulHash createNew() { return intHash.createNew(); } + @Override public boolean supportsUnsafe() { return intHash.supportsUnsafe(); } + @Override public boolean supportsIncremental() { return intHash.supportsIncremental(); } + @Override public void reset() { intHash.reset(); } + @Override public void update(byte[] input) { intHash.update(input); } + @Override public void update(byte[] input, int index, int length) { intHash.update(input, index, length); } + @Override public void update(ByteBuffer input) { intHash.update(input); } + @Override public void update(long address, long length) { intHash.update(address, length); } + @Override public byte[] getBytes() { return intHash.getBytes(); } + @Override public int getBytes(byte[] output, int index, int maxLength) { return intHash.getBytes(output, index, maxLength); } + @Override public byte getByte() { return intHash.getByte(); } + @Override public short getShort() { return intHash.getShort(); } + @Override public int getInt() { return intHash.getInt(); } + @Override public long getLong() { return intHash.getLong(); } diff --git a/circe-checksum/src/main/java/com/scurrilous/circe/impl/IntStatelessLongHash.java b/circe-checksum/src/main/java/com/scurrilous/circe/impl/IntStatelessLongHash.java index 4111842bfa4..ca440ab53d2 100644 --- a/circe-checksum/src/main/java/com/scurrilous/circe/impl/IntStatelessLongHash.java +++ b/circe-checksum/src/main/java/com/scurrilous/circe/impl/IntStatelessLongHash.java @@ -16,7 +16,6 @@ package com.scurrilous.circe.impl; import java.nio.ByteBuffer; - import com.scurrilous.circe.StatefulLongHash; import com.scurrilous.circe.StatelessIntHash; import com.scurrilous.circe.StatelessLongHash; diff --git a/circe-checksum/src/main/java/com/scurrilous/circe/utils/NativeUtils.java b/circe-checksum/src/main/java/com/scurrilous/circe/utils/NativeUtils.java index 8776092d583..bd950616c51 100644 --- a/circe-checksum/src/main/java/com/scurrilous/circe/utils/NativeUtils.java +++ b/circe-checksum/src/main/java/com/scurrilous/circe/utils/NativeUtils.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -23,9 +23,10 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; -import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.Locale; /** @@ -37,7 +38,7 @@ public class NativeUtils { /** * loads given library from the this jar. ie: this jar contains: /lib/pulsar-checksum.jnilib - * + * * @param path * : absolute path of the library in the jar
          * if this jar contains: /lib/pulsar-checksum.jnilib then provide the same absolute path as input @@ -50,13 +51,9 @@ public static void loadLibraryFromJar(String path) throws Exception { String[] parts = path.split("/"); String filename = (parts.length > 0) ? parts[parts.length - 1] : null; - File dir = File.createTempFile("native", ""); - dir.delete(); - if (!(dir.mkdir())) { - throw new IOException("Failed to create temp directory " + dir.getAbsolutePath()); - } - dir.deleteOnExit(); - File temp = new File(dir, filename); + Path dir = Files.createTempDirectory("native"); + dir.toFile().deleteOnExit(); + File temp = new File(dir.toString(), filename); temp.deleteOnExit(); byte[] buffer = new byte[1024]; @@ -67,13 +64,16 @@ public static void loadLibraryFromJar(String path) throws Exception { throw new FileNotFoundException("Couldn't find file into jar " + path); } - OutputStream out = new FileOutputStream(temp); try { - while ((read = input.read(buffer)) != -1) { - out.write(buffer, 0, read); + OutputStream out = new FileOutputStream(temp); + try { + while ((read = input.read(buffer)) != -1) { + out.write(buffer, 0, read); + } + } finally { + out.close(); } } finally { - out.close(); input.close(); } @@ -87,7 +87,7 @@ public static void loadLibraryFromJar(String path) throws Exception { /** * Returns jni library extension based on OS specification. Maven-nar generates jni library based on different OS : * http://mark.donszelmann.org/maven-nar-plugin/aol.html (jni.extension) - * + * * @return */ public static String libType() { diff --git a/circe-checksum/src/test/java/com/scurrilous/circe/checksum/ChecksumTest.java b/circe-checksum/src/test/java/com/scurrilous/circe/checksum/ChecksumTest.java index 67fee3ce37a..f68b362a1af 100644 --- a/circe-checksum/src/test/java/com/scurrilous/circe/checksum/ChecksumTest.java +++ b/circe-checksum/src/test/java/com/scurrilous/circe/checksum/ChecksumTest.java @@ -44,7 +44,7 @@ public void testCrc32cValue() { @Test public void testCrc32cValueResume() { final byte[] bytes = "Some String".getBytes(); - int checksum = Crc32cIntChecksum.resumeChecksum(0, Unpooled.wrappedBuffer(bytes)); + int checksum = Crc32cIntChecksum.resumeChecksum(0, Unpooled.wrappedBuffer(bytes), 0, bytes.length); assertEquals(608512271, checksum); } @@ -53,24 +53,24 @@ public void testCrc32cValueResume() { public void testCrc32cValueIncremental() { final byte[] bytes = "Some String".getBytes(); - int checksum = Crc32cIntChecksum.CRC32C_HASH.calculate(bytes, 0, bytes.length); + int checksum = Crc32cIntChecksum.computeChecksum(Unpooled.wrappedBuffer(bytes)); assertEquals(608512271, checksum); - checksum = Crc32cIntChecksum.CRC32C_HASH.calculate(bytes, 0, 1); + checksum = Crc32cIntChecksum.computeChecksum(Unpooled.wrappedBuffer(bytes, 0, 1)); for (int i = 1; i < bytes.length; i++) { - checksum = Crc32cIntChecksum.CRC32C_HASH.resume(checksum, bytes, i, 1); + checksum = Crc32cIntChecksum.resumeChecksum(checksum, Unpooled.wrappedBuffer(bytes), i, 1); } assertEquals(608512271, checksum); - checksum = Crc32cIntChecksum.CRC32C_HASH.calculate(bytes, 0, 4); - checksum = Crc32cIntChecksum.CRC32C_HASH.resume(checksum, bytes, 4, 7); + checksum = Crc32cIntChecksum.computeChecksum(Unpooled.wrappedBuffer(bytes, 0, 4)); + checksum = Crc32cIntChecksum.resumeChecksum(checksum, Unpooled.wrappedBuffer(bytes), 4, 7); assertEquals(608512271, checksum); ByteBuf buffer = Unpooled.wrappedBuffer(bytes, 0, 4); checksum = Crc32cIntChecksum.computeChecksum(buffer); checksum = Crc32cIntChecksum.resumeChecksum( - checksum, Unpooled.wrappedBuffer(bytes, 4, bytes.length - 4)); + checksum, Unpooled.wrappedBuffer(bytes), 4, bytes.length - 4); assertEquals(608512271, checksum); } @@ -86,33 +86,11 @@ public void testCrc32cLongValue() { @Test public void testCrc32cLongValueResume() { final byte[] bytes = "Some String".getBytes(); - long checksum = Crc32cIntChecksum.resumeChecksum(0, Unpooled.wrappedBuffer(bytes)); + long checksum = Crc32cIntChecksum.resumeChecksum(0, Unpooled.wrappedBuffer(bytes), 0, bytes.length); assertEquals(608512271L, checksum); } - @Test - public void testCrc32cLongValueIncremental() { - final byte[] bytes = "Some String".getBytes(); - - long checksum = Crc32cLongChecksum.computeChecksum(Unpooled.wrappedBuffer(bytes)); - assertEquals(608512271, checksum); - - checksum = Crc32cLongChecksum.computeChecksum(Unpooled.wrappedBuffer(bytes, 0, 1)); - for (int i = 1; i < bytes.length; i++) { - checksum = Crc32cLongChecksum.resumeChecksum( - checksum, Unpooled.wrappedBuffer(bytes, i, 1)); - } - assertEquals(608512271, checksum); - - ByteBuf buffer = Unpooled.wrappedBuffer(bytes, 0, 4); - checksum = Crc32cLongChecksum.computeChecksum(buffer); - checksum = Crc32cLongChecksum.resumeChecksum( - checksum, Unpooled.wrappedBuffer(bytes, 4, bytes.length - 4)); - - assertEquals(608512271, checksum); - } - @Test public void testCRC32CIncrementalLong() { IncrementalLongHash crc32c = new StandardCrcProvider().getIncrementalLong(CRC32C); diff --git a/circe-checksum/src/test/java/com/scurrilous/circe/checksum/Java9IntHashTest.java b/circe-checksum/src/test/java/com/scurrilous/circe/checksum/Java9IntHashTest.java new file mode 100644 index 00000000000..3fb57b4a1aa --- /dev/null +++ b/circe-checksum/src/test/java/com/scurrilous/circe/checksum/Java9IntHashTest.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package com.scurrilous.circe.checksum; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.CompositeByteBuf; +import io.netty.buffer.DuplicatedByteBuf; +import java.util.Random; +import lombok.extern.slf4j.Slf4j; +import org.junit.Assert; +import org.junit.Test; + +@Slf4j +public class Java9IntHashTest { + + private ByteBuf[] generateByteBuffers() { + Random random = new Random(); + int hugeDataLen = 4096 * 3; + byte[] hugeData = new byte[hugeDataLen]; + for (int i = 0; i < hugeDataLen; i ++) { + hugeData[i] = (byte) (random.nextInt() % 127); + } + + // b_total = b1 + b2 + b3; + ByteBuf bTotal = ByteBufAllocator.DEFAULT.heapBuffer(6 + hugeDataLen); + bTotal.writeBytes(new byte[]{1,2,3,4,5,6}); + bTotal.writeBytes(hugeData); + ByteBuf b1 = ByteBufAllocator.DEFAULT.heapBuffer(3); + b1.writeBytes(new byte[]{1,2,3}); + ByteBuf b2 = ByteBufAllocator.DEFAULT.heapBuffer(3); + b2.writeBytes(new byte[]{4,5,6}); + ByteBuf b3 = ByteBufAllocator.DEFAULT.heapBuffer(hugeDataLen); + b3.writeBytes(hugeData); + + return new ByteBuf[]{bTotal, b1, new CompositeByteBuf(ByteBufAllocator.DEFAULT, false, 2, b2, b3)}; + } + + @Test + public void calculateCheckSumUsingCompositeByteBuf() { + // byteBuffers[0] = byteBuffers[1] + byteBuffers[2]. + // byteBuffers[2] is a composite ByteBuf. + ByteBuf[] byteBuffers = generateByteBuffers(); + ByteBuf bTotal = byteBuffers[0]; + ByteBuf b1 = byteBuffers[1]; + ByteBuf b2 = byteBuffers[2]; + + // Calculate: case-1. + int checksumRes1 = Crc32cIntChecksum.computeChecksum(bTotal); + + // Calculate: case-2. + int b1CheckSum = Crc32cIntChecksum.computeChecksum(b1); + int checksumRes2 = Crc32cIntChecksum.resumeChecksum(b1CheckSum, b2); + + // Verify: the results of both ways to calculate the checksum are same. + Assert.assertEquals(checksumRes1, checksumRes2); + + // cleanup. + bTotal.release(); + b1.release(); + b2.release(); + } + + @Test + public void calculateCheckSumUsingNoArrayNoMemoryAddrByteBuf() { + // byteBuffers[0] = byteBuffers[1] + byteBuffers[2]. + // byteBuffers[2] is a composite ByteBuf. + ByteBuf[] byteBuffers = generateByteBuffers(); + ByteBuf bTotal = byteBuffers[0]; + ByteBuf b1 = byteBuffers[1]; + ByteBuf b2 = new NoArrayNoMemoryAddrByteBuff(byteBuffers[2]); + + // Calculate: case-1. + int checksumRes1 = Crc32cIntChecksum.computeChecksum(bTotal); + + // Calculate: case-2. + int b1CheckSum = Crc32cIntChecksum.computeChecksum(b1); + int checksumRes2 = Crc32cIntChecksum.resumeChecksum(b1CheckSum, b2); + + // Verify: the results of both ways to calculate the checksum are same. + Assert.assertEquals(checksumRes1, checksumRes2); + + // cleanup. + bTotal.release(); + b1.release(); + b2.release(); + } + + public static class NoArrayNoMemoryAddrByteBuff extends DuplicatedByteBuf { + + public NoArrayNoMemoryAddrByteBuff(ByteBuf buffer) { + super(buffer); + } + + @Override + public boolean hasArray(){ + return false; + } + + @Override + public boolean hasMemoryAddress(){ + return false; + } + } +} \ No newline at end of file diff --git a/circe-checksum/src/test/java/com/scurrilous/circe/impl/AbstractStatefulHashTest.java b/circe-checksum/src/test/java/com/scurrilous/circe/impl/AbstractStatefulHashTest.java index 968e18b556b..039dd5c05d0 100644 --- a/circe-checksum/src/test/java/com/scurrilous/circe/impl/AbstractStatefulHashTest.java +++ b/circe-checksum/src/test/java/com/scurrilous/circe/impl/AbstractStatefulHashTest.java @@ -29,7 +29,6 @@ import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; - import org.junit.Test; import org.mockito.Mockito; diff --git a/circe-checksum/src/test/java/com/scurrilous/circe/impl/AbstractStatelessIntHashTest.java b/circe-checksum/src/test/java/com/scurrilous/circe/impl/AbstractStatelessIntHashTest.java index 1aa3d5517b8..69fc4f79e18 100644 --- a/circe-checksum/src/test/java/com/scurrilous/circe/impl/AbstractStatelessIntHashTest.java +++ b/circe-checksum/src/test/java/com/scurrilous/circe/impl/AbstractStatelessIntHashTest.java @@ -23,7 +23,6 @@ import static org.mockito.Mockito.verify; import java.nio.ByteBuffer; - import org.junit.Before; import org.junit.Test; import org.mockito.Mockito; diff --git a/circe-checksum/src/test/java/com/scurrilous/circe/impl/AbstractStatelessLongHashTest.java b/circe-checksum/src/test/java/com/scurrilous/circe/impl/AbstractStatelessLongHashTest.java index d0aabd59e79..0b05e2e4218 100644 --- a/circe-checksum/src/test/java/com/scurrilous/circe/impl/AbstractStatelessLongHashTest.java +++ b/circe-checksum/src/test/java/com/scurrilous/circe/impl/AbstractStatelessLongHashTest.java @@ -23,7 +23,6 @@ import static org.mockito.Mockito.verify; import java.nio.ByteBuffer; - import org.junit.Before; import org.junit.Test; import org.mockito.Mockito; diff --git a/circe-checksum/src/test/java/com/scurrilous/circe/utils/NativeUtilsTests.java b/circe-checksum/src/test/java/com/scurrilous/circe/utils/NativeUtilsTests.java index adaf7715d2b..17bb45ace2a 100644 --- a/circe-checksum/src/test/java/com/scurrilous/circe/utils/NativeUtilsTests.java +++ b/circe-checksum/src/test/java/com/scurrilous/circe/utils/NativeUtilsTests.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/circe-checksum/src/test/resources/log4j.properties b/circe-checksum/src/test/resources/log4j.properties deleted file mode 100644 index 10ae6bfcbba..00000000000 --- a/circe-checksum/src/test/resources/log4j.properties +++ /dev/null @@ -1,42 +0,0 @@ -# -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# - -# -# Bookkeeper Logging Configuration -# - -# Format is " (, )+ - -# DEFAULT: console appender only, level INFO -bookkeeper.root.logger=INFO,CONSOLE -log4j.rootLogger=${bookkeeper.root.logger} - -# -# Log INFO level and above messages to the console -# -log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender -log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout -log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} - %-5p - [%t:%C{1}@%L] - %m%n - -#disable zookeeper logging -log4j.logger.org.apache.zookeeper=OFF -log4j.logger.org.apache.bookkeeper.bookie=INFO -log4j.logger.org.apache.bookkeeper.meta=INFO diff --git a/conf/bk_cli_env.sh b/conf/bk_cli_env.sh index 8faa03d3651..dde6c4b3110 100644 --- a/conf/bk_cli_env.sh +++ b/conf/bk_cli_env.sh @@ -50,7 +50,8 @@ # CLI_LOG_FILE="bookkeeper-cli.log" # Log level & appender -# CLI_ROOT_LOGGER="INFO,CONSOLE" +# CLI_ROOT_LOG_LEVEL="INFO" +# CLI_ROOT_LOG_APPENDER="CONSOLE" ################################# # JVM memory options diff --git a/conf/bk_server.conf b/conf/bk_server.conf old mode 100755 new mode 100644 index 1eaa1914e04..555e92034c6 --- a/conf/bk_server.conf +++ b/conf/bk_server.conf @@ -33,12 +33,17 @@ ## Server parameters ############################################################################# +# Unique ID for the bookie, if it is not set if will be computed from the network address +# of the Bookie. +#bookieId= + # The port that the bookie server listens on. bookiePort=3181 # Configure the bookie to allow/disallow multiple ledger/index/journal directories -# in the same filesystem disk partition -# allowMultipleDirsUnderSameDiskPartition=false +# in the same filesystem disk partition. +# Defaults to true. +# allowMultipleDirsUnderSameDiskPartition=true # Set the network interface that the bookie should listen on. # If not set, the bookie will listen on all interfaces. @@ -117,10 +122,6 @@ extraServerComponents= # The number of threads that should handle long poll requests. # numLongPollWorkerThreads=0 -# The number of threads used for handling journal callback. If a zero or negative number is provided, -# the callbacks are executed directly at force write threads. -# numJournalCallbackThreads=1 - # Number of threads that should be used for high priority requests # (i.e. recovery reads and adds, and fencing). # numHighPriorityWorkerThreads=8 @@ -133,6 +134,22 @@ extraServerComponents= # avoid the executor queue to grow indefinitely # maxPendingAddRequestsPerThread=10000 +# Use auto-throttling of the read-worker threads. This is done +# to ensure the bookie is not using unlimited amount of memory +# to respond to read-requests. +# readWorkerThreadsThrottlingEnabled=true + +# Option to enable busy-wait settings. Default is false. +# WARNING: This option will enable spin-waiting on executors and IO threads in order to reduce latency during +# context switches. The spinning will consume 100% CPU even when bookie is not doing any work. It is recommended to +# reduce the number of threads in the main workers pool and Netty event loop to only have few CPU cores busy. +# enableBusyWait=false + +# This is the number of threads used by Netty to handle TCP connections. +# Default is 2 * Runtime.getRuntime().availableProcessors() +# serverNumIOThreads= + + ############################################################################# ## Long poll request parameter settings ############################################################################# @@ -150,16 +167,23 @@ extraServerComponents= # If all ledger directories configured are full, then support only read requests for clients. # If "readOnlyModeEnabled=true" then on all ledger disks full, bookie will be converted # to read-only mode and serve only read requests. Otherwise the bookie will be shutdown. -# By default this will be disabled. +# By default this will be enabled. # readOnlyModeEnabled=true # Whether the bookie is force started in read only mode or not # forceReadOnlyBookie=false -# Persiste the bookie status locally on the disks. So the bookies can keep their status upon restarts +# Persist the bookie status locally on the disks. So the bookies can keep their status upon restarts # @Since 4.6 # persistBookieStatusEnabled=false +# If any ledger directories configured are full, then support only read requests for clients. +# If "readOnlyModeOnAnyDiskFullEnabled=true" then on any ledger disks full, bookie will be converted +# to read-only mode and serve only read requests. When all disks recovered, +# the bookie will be converted to read-write mode.Otherwise it will obey the `readOnlyModeEnabled` behavior. +# By default this will be disabled. +# readOnlyModeOnAnyDiskFullEnabled=false + ############################################################################# ## Netty server settings ############################################################################# @@ -204,9 +228,22 @@ httpServerEnabled=false # The http server port to listen on. Default value is 8080. httpServerPort=8080 +# The http server host to listen on. Default value is '0.0.0.0'. +httpServerHost=0.0.0.0 + # The http server class httpServerClass=org.apache.bookkeeper.http.vertx.VertxHttpServer +httpServerTlsEnable=false + +httpServerKeyStorePath= + +httpServerKeyStorePassword= + +httpServerTrustStorePath= + +httpServerTrustStorePassword= + ############################################## Security ############################################## # The bookie authentication provider factory class name. @@ -220,6 +257,12 @@ httpServerClass=org.apache.bookkeeper.http.vertx.VertxHttpServer # # permittedStartupUsers= +# Certificate role based authorization for Bookie +# To enable this option, bookieAuthProviderFactoryClass should be set to org.apache.bookkeeper.tls.BookieAuthZFactory +# comma separated values of roles from the certificates OU field which we want to authorize for access +# +# authorizedRoles= + ############################################################################# ## TLS settings ############################################################################# @@ -228,7 +271,7 @@ httpServerClass=org.apache.bookkeeper.http.vertx.VertxHttpServer # tlsProvider=OpenSSL # The path to the class that provides security. -# tlsProviderFactoryClass=org.apache.bookkeeper.security.SSLContextFactory +# tlsProviderFactoryClass=org.apache.bookkeeper.tls.TLSContextFactory # Type of security used by server. # tlsClientAuthentication=true @@ -251,6 +294,11 @@ httpServerClass=org.apache.bookkeeper.http.vertx.VertxHttpServer # Bookie Truststore password path, if the trust store is protected by a password. # tlsTrustStorePasswordPath=null +# Bookie TLS certificate path. +# tlsCertificatePath=null + +# Tls certificate files refresh duration in seconds. +# tlsCertFilesRefreshDurationSeconds=0 ############################################## Bookie Storage ############################################## @@ -272,27 +320,26 @@ journalDirectories=/tmp/bk-txn # journalDirectory=/tmp/bk-txn # The journal format version to write. -# Available formats are 1-5: +# Available formats are 1-6: # 1: no header # 2: a header section was added # 3: ledger key was introduced # 4: fencing key was introduced # 5: expanding header to 512 and padding writes to align sector size configured by `journalAlignmentSize` # 6: persisting explicitLac is introduced -# By default, it is `4`. If you'd like to enable `padding-writes` feature, you can set journal version to `5`. +# By default, it is `6`. +# If you'd like to disable persisting ExplicitLac, you can set this config to < `6` and also +# fileInfoFormatVersionToWrite should be set to 0. If there is mismatch then the serverconfig is considered invalid. # You can disable `padding-writes` by setting journal version back to `4`. This feature is available in 4.5.0 # and onward versions. -# If you'd like to enable persisting ExplicitLac, you can set this config to 6 and also -# fileInfoFormatVersionToWrite should be atleast 1. If there is mismatch then the serverconfig is considered -# invalid. -# journalFormatVersionToWrite=4 +# journalFormatVersionToWrite=6 # Max file size of journal file, in mega bytes # A new journal file will be created when the old one reaches the file size limitation # journalMaxSizeMB=2048 # Max number of old journal file to kept -# Keep a number of old journal files would help data recovery in specia case +# Keep a number of old journal files would help data recovery in special case # journalMaxBackups=5 # How much space should we pre-allocate at a time in the journal. @@ -304,6 +351,14 @@ journalDirectories=/tmp/bk-txn # Should we remove pages from page cache after force write # journalRemoveFromPageCache=true +# Should the data be written on journal. +# By default, data is written on journal for durability of writes. +# Beware: while disabling data journaling in the Bookie journal might improve the bookie write performance, it will also +# introduce the possibility of data loss. With no journal, the write operations are passed to the storage engine +# and then acknowledged. In case of power failure, the affected bookie might lose the unflushed data. If the ledger +# is replicated to multiple bookies, the chances of data loss are reduced though still present. +# journalWriteData=true + # Should the data be fsynced on journal before acknowledgment. # By default, data sync is enabled to guarantee durability of writes. # Beware: while disabling data sync in the Bookie journal might improve the bookie write performance, it will also @@ -333,6 +388,20 @@ journalDirectories=/tmp/bk-txn # If we should flush the journal when journal queue is empty # journalFlushWhenQueueEmpty=false +# Set the size of the journal queue. +# journalQueueSize=10000 + +# Set the max amount of memory that can be used by the journal. +# If empty, this will be set to use 5% of available direct memory +# Setting it to 0, it will disable the max memory control for the journal. +# journalMaxMemorySizeMb= + +# Set PageCache flush interval (millisecond) when journalSyncData disabled +# journalPageCacheFlushIntervalMSec = 1000 + +# Set the Channel Provider for journal. +# The default value is +# journalChannelProvider=org.apache.bookkeeper.bookie.DefaultFileChannelProvider ############################################################################# ## Ledger storage settings ############################################################################# @@ -350,14 +419,14 @@ journalDirectories=/tmp/bk-txn # @Deprecated - `sortedLedgerStorageEnabled` is deprecated in favor of using `ledgerStorageClass` # Whether sorted-ledger storage enabled (default true) -# sortedLedgerStorageEnabled=ture +# sortedLedgerStorageEnabled=true # Directory Bookkeeper outputs ledger snapshots # could define multi directories to store snapshots, separated by ',' # For example: # ledgerDirectories=/tmp/bk1-data,/tmp/bk2-data # -# Ideally ledger dirs and journal dir are each in a differet device, +# Ideally ledger dirs and journal dir are each in a different device, # which reduce the contention between random i/o and sequential write. # It is possible to run with a single disk, but performance will be significantly lower. ledgerDirectories=/tmp/bk-data @@ -398,7 +467,8 @@ ledgerDirectories=/tmp/bk-data # on too frequent flushing. You can consider increment flush interval # to get better performance, but you need to pay more time on bookie # server restart after failure. -# This config is used only when entryLogPerLedgerEnabled is enabled. +# This config is used when entryLogPerLedgerEnabled=true +# or ledgerStorageClass=org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorage. # flushInterval=10000 # Allow the expansion of bookie storage capacity. Newly added ledger @@ -445,7 +515,7 @@ ledgerDirectories=/tmp/bk-data # active at a given point in time # maximumNumberOfActiveEntryLogs=500 -# in EntryLogManagerForEntryLogPerLedger, this config value specifies the metrics cache size +# in EntryLogManagerForEntryLogPerLedger, this config value specifies the metrics cache size # limits in multiples of entrylogMap cache size limits. # entryLogPerLedgerCounterLimitsMultFactor=10 @@ -453,7 +523,13 @@ ledgerDirectories=/tmp/bk-data ## Entry log compaction settings ############################################################################# -# Set the rate at which compaction will readd entries. The unit is adds per second. +# Allow force compaction when disabling the entry log compaction or not. +# It will enable you to manually force compact the entry log even if +# the entry log compaction is disabled. The 'minorCompactionThreshold' or +# 'majorCompactionThreshold' still needs to be specified. +# forceAllowCompaction=false + +# Set the rate at which compaction will read entries. The unit is adds per second. # compactionRate=1000 # Threshold of minor compaction @@ -464,8 +540,12 @@ ledgerDirectories=/tmp/bk-data # Interval to run minor compaction, in seconds # If it is set to less than zero, the minor compaction is disabled. +# Note: should be greater than gcWaitTime. # minorCompactionInterval=3600 +# Maximum milliseconds to run minor Compaction. Defaults to -1 run indefinitely. +# minorCompactionMaxTimeMillis=-1 + # Set the maximum number of entries which can be compacted without flushing. # When compacting, the entries are written to the entrylog and the new offsets # are cached in memory. Once the entrylog is flushed the index is updated with @@ -486,8 +566,12 @@ ledgerDirectories=/tmp/bk-data # Interval to run major compaction, in seconds # If it is set to less than zero, the major compaction is disabled. +# Note: should be greater than gcWaitTime. # majorCompactionInterval=86400 +# Maximum milliseconds to run major Compaction. Defaults to -1 run indefinitely. +# majorCompactionMaxTimeMillis=-1 + # Throttle compaction by bytes or by entries. # isThrottleByBytes=false @@ -506,7 +590,7 @@ ledgerDirectories=/tmp/bk-data ## Garbage collection settings ############################################################################# -# How long the interval to trigger next garbage collection, in milliseconds +# Fixed delay in milliseconds to trigger the next garbage collection # Since garbage collection is running in background, too frequent gc # will heart performance. It is better to give a higher number of gc # interval if there is enough disk capacity. @@ -517,6 +601,9 @@ ledgerDirectories=/tmp/bk-data # since we read the metadata for all the ledgers on the bookie from zk # gcOverreplicatedLedgerWaitTime=86400000 +# Max number of concurrent requests in garbage collection of overreplicated ledgers. +# gcOverreplicatedLedgerMaxConcurrentRequests=1000 + # Whether force compaction is allowed when the disk is full or almost full. # Forcing GC may get some space back, but may also fill up disk space more quickly. # This is because new log files are created before GC, while old garbage @@ -526,25 +613,42 @@ ledgerDirectories=/tmp/bk-data # True if the bookie should double check readMetadata prior to gc # verifyMetadataOnGC=false +# True if bookie should persist entrylog file metadata and avoid in-memory object allocation +gcEntryLogMetadataCacheEnabled=false + +# Directory to persist Entrylog metadata if gcPersistentEntrylogMetadataMapEnabled is true +# [Default: it creates a sub-directory under a first available base ledger directory with +# name "entrylogIndexCache"] +# gcEntryLogMetadataCachePath= + +# When judging whether an entry log file need to be compacted, we calculate the usage rate of the entry log file based +# on the actual size of the entry log file. However, if an entry log file is 1MB in size and 0.9MB of data is +# being used, this entry log file won't be compacted by garbage collector due to the high usage ratio, +# which will result in many small entry log files. +# We introduced the parameter `useTargetEntryLogSizeForGc` to determine whether to calculate entry log file usage +# based on the configured target entry log file size, which is configured by `logSizeLimit`. +# Default: useTargetEntryLogSizeForGc is false. +# useTargetEntryLogSizeForGc=false + ############################################################################# ## Disk utilization ############################################################################# # For each ledger dir, maximum disk space which can be used. # Default is 0.95f. i.e. 95% of disk can be used at most after which nothing will -# be written to that partition. If all ledger dir partions are full, then bookie +# be written to that partition. If all ledger dir partitions are full, then bookie # will turn to readonly mode if 'readOnlyModeEnabled=true' is set, else it will -# shutdown. -# Valid values should be in between 0 and 1 (exclusive). +# shutdown. Bookie will also suspend the minor and major compaction when usage threshold is exceed +# if `isForceGCAllowWhenNoSpace` is disabled. When the usage becomes lower than the threshold, the major and minor +# compaction will be resumed. +# Valid values should be in between 0 and 1 (exclusive). The default value is 0.95. # diskUsageThreshold=0.95 -# The disk free space low water mark threshold. -# Disk is considered full when usage threshold is exceeded. -# Disk returns back to non-full state when usage is below low water mark threshold. -# This prevents it from going back and forth between these states frequently -# when concurrent writes and compaction are happening. This also prevent bookie from -# switching frequently between read-only and read-writes states in the same cases. -# diskUsageWarnThreshold=0.95 +# The disk free space warn threshold. +# Disk is considered almost full when usage threshold is exceeded. Bookie will suspend the major +# compaction when usage threshold is exceed if `isForceGCAllowWhenNoSpace` is disabled. When the usage becomes lower +# than the threshold, the major compaction will be resumed. The default value is 0.90. +# diskUsageWarnThreshold=0.90 # Set the disk free space low water mark threshold. Disk is considered full when # usage threshold is exceeded. Disk returns back to non-full state when usage is @@ -552,7 +656,10 @@ ledgerDirectories=/tmp/bk-data # between these states frequently when concurrent writes and compaction are # happening. This also prevent bookie from switching frequently between # read-only and read-writes states in the same cases. -# diskUsageLwmThreshold=0.90 +# If the bookie already runs into read-only mode and the disk usage becomes lower than this threshold, the bookie +# will change from read-only to read-write mode. At the same time, the major and minor compaction will be resumed +# if `isForceGCAllowWhenNoSpace` is disabled. The default value is the same with `diskUsageThreshold`. +# diskUsageLwmThreshold=0.95 # Disk check interval in milli seconds, interval to check the ledger dirs usage. # Default is 10000 @@ -600,14 +707,15 @@ ledgerDirectories=/tmp/bk-data # Available formats are 0-1: # 0: Initial version # 1: persisting explicitLac is introduced -# By default, it is `0`. If you'd like to enable persisting ExplicitLac, you can set -# this config to 1 and also journalFormatVersionToWrite should be atleast 6. If -# there is mismatch then the serverconfig is considered invalid. -# fileInfoFormatVersionToWrite = 0 +# By default, it is `1`. +# If you'd like to disable persisting ExplicitLac, you can set this config to 0 and +# also journalFormatVersionToWrite should be set to < 6. If there is mismatch then the +# serverconfig is considered invalid. +# fileInfoFormatVersionToWrite = 1 # Size of a index page in ledger cache, in bytes # A larger index page can improve performance writing page to disk, -# which is efficent when you have small number of ledgers and these +# which is efficient when you have small number of ledgers and these # ledgers have similar number of entries. # If you have large number of ledgers and each ledger has fewer entries, # smaller index page would improve memory usage. @@ -620,7 +728,7 @@ ledgerDirectories=/tmp/bk-data # pageLimit*pageSize should not more than JVM max memory limitation, # otherwise you would got OutOfMemoryException. # In general, incrementing pageLimit, using smaller index page would -# gain bettern performance in lager number of ledgers with fewer entries case +# gain better performance in lager number of ledgers with fewer entries case # If pageLimit is -1, bookie server will use 1/3 of JVM memory to compute # the limitation of number of index pages. # pageLimit=-1 @@ -635,15 +743,20 @@ ledgerDirectories=/tmp/bk-data # Size of Write Cache. Memory is allocated from JVM direct memory. # Write cache is used to buffer entries before flushing into the entry log -# For good performance, it should be big enough to hold a sub -# dbStorage_writeCacheMaxSizeMb=512 +# For good performance, it should be big enough to hold a substantial amount of entries in the flush interval +# By default it will be allocated to 25% of the available direct memory +# dbStorage_writeCacheMaxSizeMb= # Size of Read cache. Memory is allocated from JVM direct memory. # This read cache is pre-filled doing read-ahead whenever a cache miss happens -# dbStorage_readAheadCacheMaxSizeMb=256 +# By default it will be allocated to 25% of the available direct memory +# dbStorage_readAheadCacheMaxSizeMb= + +# How many entries' bytes to pre-fill in cache after a read cache miss. Default is -1. 0 or less disables this feature +# dbStorage_readAheadCacheBatchBytesSize=-1 # How many entries to pre-fill in cache after a read cache miss -# dbStorage_readAheadCacheBatchSize=1000 +# dbStorage_readAheadCacheBatchSize=100 ## RocksDB specific configurations ## DbLedgerStorage uses RocksDB to store the indexes from @@ -652,8 +765,8 @@ ledgerDirectories=/tmp/bk-data # Size of RocksDB block-cache. For best performance, this cache # should be big enough to hold a significant portion of the index # database which can reach ~2GB in some cases -# Default is 256 MBytes -# dbStorage_rocksDB_blockCacheSize=268435456 +# Default is to use 10% / numberOfLedgers of the direct memory size +# dbStorage_rocksDB_blockCacheSize= # Other RocksDB specific tunables # dbStorage_rocksDB_writeBufferSizeMB=64 @@ -663,6 +776,34 @@ ledgerDirectories=/tmp/bk-data # dbStorage_rocksDB_numLevels=-1 # dbStorage_rocksDB_numFilesInLevel0=4 # dbStorage_rocksDB_maxSizeInLevel1MB=256 +# dbStorage_rocksDB_logPath= +# dbStorage_rocksDB_format_version=2 + +############################################################################# +## DirectIO entry logger configuration +############################################################################# +# DirectIO entry logger only support DbLedgerStorage + +# Enable/Disable directIO entry logger. +# dbStorage_directIOEntryLogger = false + +# Total write buffer size in megabytes for all the entry directories. +# The write buffer size of each entry directory needs to be divided by the number of entry directories. +# By default it will be allocated to 12.5% of the available direct memory. +# dbStorage_directIOEntryLoggerTotalWriteBufferSizeMB= + +# Total read buffer size in megabytes for all the entry directories. +# The read buffer size of each entry directory needs to be divided by the number of entry directories. +# By default it will be allocated to 12.5% of the available direct memory. +# dbStorage_directIOEntryLoggerTotalReadBufferSizeMB= + +# The buffer size, in megabytes, for each direct reader to read data from the entry log file. +# An entry log file will have only one direct reader. +# By default it will be set to 8MB +# dbStorage_directIOEntryLoggerReadBufferSizeMB=8 + +# Maximum cache time after a direct reader is accessed. +# dbStorage_directIOEntryLoggerMaxFdCacheTimeSeconds=300 ############################################## Metadata Services ############################################## @@ -673,6 +814,8 @@ ledgerDirectories=/tmp/bk-data ############################################################################# # metadata service uri that bookkeeper is used for loading corresponding metadata driver and resolving its metadata service location +# The server list can be semicolon separated values, for example: +# metadataServiceUri=zk+hierarchical://zk1:2181;zk2:2181;zk3:2181/ledgers metadataServiceUri=zk+hierarchical://localhost:2181/ledgers # @Deprecated - `ledgerManagerFactoryClass` is deprecated in favor of using `metadataServiceUri` @@ -702,7 +845,7 @@ metadataServiceUri=zk+hierarchical://localhost:2181/ledgers # store all ledgers. # zkLedgersRootPath=/ledgers -# @Deprecated - `zkLedgersRootPath` is deprecated in favor of using `metadataServiceUri` +# @Deprecated - `zkServers` is deprecated in favor of using `metadataServiceUri` # A list of one of more servers on which Zookeeper is running. # The server list can be comma separated values, for example: # zkServers=zk1:2181,zk2:2181,zk3:2181 @@ -742,6 +885,9 @@ zkEnableSecurity=false # Whether statistics are enabled # enableStatistics=true +# Flag to enable sanity check metrics in bookie stats +# sanityCheckMetricsEnabled=false + # The flag to enable recording task execution stats. # enableTaskExecutionStats=false @@ -749,15 +895,12 @@ zkEnableSecurity=false # Options: # - Prometheus : org.apache.bookkeeper.stats.prometheus.PrometheusMetricsProvider # - Codahale : org.apache.bookkeeper.stats.codahale.CodahaleMetricsProvider -# - Twitter Finagle : org.apache.bookkeeper.stats.twitter.finagle.FinagleStatsProvider -# - Twitter Ostrich : org.apache.bookkeeper.stats.twitter.ostrich.OstrichProvider -# - Twitter Science : org.apache.bookkeeper.stats.twitter.science.TwitterStatsProvider # Default value is: # org.apache.bookkeeper.stats.prometheus.PrometheusMetricsProvider # # For configuring corresponding stats provider, see details at each section below. # -# statsProviderClass=org.apache.bookkeeper.stats.prometheus.PrometheusMetricsProvider +statsProviderClass=org.apache.bookkeeper.stats.prometheus.PrometheusMetricsProvider ############################################################################# ## Prometheus Metrics Provider @@ -766,12 +909,19 @@ zkEnableSecurity=false # These configs are used when using `PrometheusMetricsProvider`. # statsProviderClass=org.apache.bookkeeper.stats.prometheus.PrometheusMetricsProvider -# Default port for Prometheus metrics exporter +# default bind address for Prometheus metrics exporter +# prometheusStatsHttpAddress=0.0.0.0 + +# default port for Prometheus metrics exporter # prometheusStatsHttpPort=8000 # latency stats rollover interval, in seconds # prometheusStatsLatencyRolloverSeconds=60 +# Expose the default JVM Metrics or not. If you are using the BookKeeper as an embedded service and you want to +# expose metrics in your application, you might need to disable this to avoid the JVM metrics register duplicated. +# exposeDefaultJVMMetrics=true + ############################################################################# ## Codahale Metrics Provider ############################################################################# @@ -797,40 +947,6 @@ zkEnableSecurity=false # the jmx endpoint for reporting stats. see {@link https://metrics.dropwizard.io/3.1.0/manual/core/#jmx} for more details. # codahaleStatsJmxEndpoint= -############################################################################# -## Twitter Finagle Metrics Provider -############################################################################# - -# These configs are used when using `FinagleStatsProvider`. -# statsProviderClass=org.apache.bookkeeper.stats.twitter.finagle.FinagleStatsProvider - -############################################################################# -## Twitter Ostrich Metrics Provider -############################################################################# - -# These configs are used when using `OstrichProvider`. -# statsProviderClass=org.apache.bookkeeper.stats.twitter.ostrich.OstrichProvider - -# Flag to control whether to expose ostrich metrics via a http endpoint configured by `statsHttpPort`. -# statsExport=false - -# The http port of exposing ostrich stats if `statsExport` is set to true -# statsHttpPort=9002 - -############################################################################# -## Twitter Science Metrics Provider -############################################################################# - -# These configs are used when using `TwitterStatsProvider`. -# statsProviderClass=org.apache.bookkeeper.stats.twitter.science.TwitterStatsProvider - -# Flag to control whether to expose metrics throught a http endpoint configured by `statsHttpPort`. -# statsExport=false - -# The http port of exposing stats if `statsExport` is set to true -# statsHttpPort=9002 - - ############################################## Auto Recovery ############################################## # Whether the bookie itself can start auto-recovery service also or not @@ -846,6 +962,13 @@ zkEnableSecurity=false # the provided digest type provided at `digestType` and the provided passwd provided at `passwd`. # enableDigestTypeAutodetection=true +# Semaphore limit of getting ledger from zookeeper. Used to throttle the zookeeper client request operation +# sending to Zookeeper server. Default value is 500 +# auditorMaxNumberOfConcurrentOpenLedgerOperations=500 + +# Wait timeout of acquiring semaphore of concurrent open ledger operations. Default value is 120000ms. +# auditorAcquireConcurrentOpenLedgerOperationsTimeOutMSec=120000 + ############################################################################# ## Placement settings ############################################################################# @@ -882,6 +1005,17 @@ zkEnableSecurity=false # bookie then it would throw BKNotEnoughBookiesException instead of picking random one. # enforceMinNumRacksPerWriteQuorum=false +# Enforce write being acknowledged by bookies belonging to atleast minimum +# number of fault domains(depending on the placement policy) before being +# acknowledged by bookkeeper. +# enforceMinNumFaultDomainsForWrite=false + +# Whether to enable BookieAddressResolver. +# If this flag is true, read bookie information from the metadata service (e.g. ZooKeeper) to resolve the address +# from each bookie ID. If all bookie IDs in the cluster are "address:port" or "hostname:port", you can set this +# flag to false to reduce requests to the metadata service. +# bookieAddressResolverEnabled=true + ############################################################################# ## Auditor settings ############################################################################# @@ -911,7 +1045,7 @@ zkEnableSecurity=false # Enable the Auditor to use system time as underreplicated ledger mark time. # If this is enabled, Auditor will write a ctime field into the underreplicated ledger znode. -# storeSystemTimeAsLedgerUnderreplicatedMarkTime=false +# storeSystemTimeAsLedgerUnderreplicatedMarkTime=true ############################################################################# ## Replication Worker settings @@ -920,13 +1054,25 @@ zkEnableSecurity=false # The number of entries that a replication will rereplicate in parallel. # rereplicationEntryBatchSize=10 -# The grace period, in seconds, that the replication worker waits before fencing and +# Enable/disable having read operations for a ledger to be sticky to a single bookie. +stickyReadSEnabled=true + +# Enable/disable reordering read sequence on reading entries. +reorderReadSequenceEnabled=true + +# The grace period, in milliseconds, that the replication worker waits before fencing and # replicating a ledger fragment that's still being written to upon bookie failure. -# openLedgerRereplicationGracePeriod=30 +# openLedgerRereplicationGracePeriod=30000 # The time to backoff when replication worker encounters exceptions on replicating a ledger, in milliseconds. # rwRereplicateBackoffMs=5000 +# The rate limit for replicators trying to acquire the re-replication task from ZooKeeper. +# Used to relieve the pressure on ZooKeeper in AutoRecovery. +# It is only enabled when setting a positive value. Default value is 0. +# Decimals are also allowed. For example: +# 0.5 means 1 task per 2 seconds, 1 means 1 task per second. +# zkReplicationTaskRateLimit=0 ################################################################## ################################################################## @@ -939,6 +1085,13 @@ zkEnableSecurity=false # the grpc server port to listen on. default is 4181 storageserver.grpc.port=4181 +### Dlog Settings for table service ### + +#### Replication Settings +dlog.bkcEnsembleSize=3 +dlog.bkcWriteQuorumSize=2 +dlog.bkcAckQuorumSize=2 + ### Storage ### # local storage directories for storing table ranges data (e.g. rocksdb sst files) @@ -949,3 +1102,53 @@ storage.serve.readonly.tables=false # the cluster controller schedule interval, in milliseconds. default is 30 seconds. storage.cluster.controller.schedule.interval.ms=30000 + + +############################################################################# +## Netty Allocator Settings +############################################################################# + +# Define the memory pooling policy. +# Available options are: +# - PooledDirect: Use Direct memory for all buffers and pool the memory. +# Direct memory will avoid the overhead of JVM GC and most +# memory copies when reading and writing to socket channel. +# Pooling will add memory space overhead due to the fact that +# there will be fragmentation in the allocator and that threads +# will keep a portion of memory as thread-local to avoid +# contention when possible. +# - UnpooledHeap: Allocate memory from JVM heap without any pooling. +# This option has the least overhead in terms of memory usage +# since the memory will be automatically reclaimed by the +# JVM GC but might impose a performance penalty at high +# throughput. +# Default is: PooledDirect +# allocatorPoolingPolicy=PooledDirect + +# Controls the amount of concurrency for the memory pool. +# Default is to have a number of allocator arenas equals to 2 * CPUS. +# Decreasing this number will reduce the amount of memory overhead, at the +# expense of increased allocation contention. +# allocatorPoolingConcurrency=8 + +# Define the memory allocator out of memory policy. +# Available options are: +# - FallbackToHeap: If it's not possible to allocate a buffer from direct memory, +# fallback to allocate an unpooled buffer from JVM heap. +# This will help absorb memory allocation spikes because the heap +# allocations will naturally slow down the process and will result +# if full GC cleanup if the Heap itself is full. +# - ThrowException: Throw regular OOM exception without taking addition actions. +# Default is: FallbackToHeap +# allocatorOutOfMemoryPolicy=FallbackToHeap + +# Available options are: +# - Disabled: No leak detection and no overhead. +# - Simple: Instruments 1% of the allocated buffer to track for leaks. +# - Advanced: Instruments 1% of the allocated buffer to track for leaks, reporting +# stack traces of places where the buffer was used. +# - Paranoid: Instruments 100% of the allocated buffer to track for leaks, reporting +# stack traces of places where the buffer was used. Introduce very +# significant overhead. +# Default is: Disabled +# allocatorLeakDetectionPolicy=Disabled diff --git a/conf/bkenv.sh b/conf/bkenv.sh index f6d061611fc..d6446f1d107 100644 --- a/conf/bkenv.sh +++ b/conf/bkenv.sh @@ -43,9 +43,6 @@ # Wait time before forcefully kill the Bookie server instance, if the stop is not successful # BOOKIE_STOP_TIMEOUT= -# Entry formatter class to format entries. -# ENTRY_FORMATTER_CLASS= - # this default config dir should match the 'localBookiesConfigDirectory' config value in the conf file of LocalBookKeeper # LOCALBOOKIES_CONFIG_DIR=/tmp/localbookies-config @@ -63,7 +60,8 @@ # BOOKIE_LOG_FILE="bookkeeper.log" # Log level & appender -# BOOKIE_ROOT_LOGGER="INFO,CONSOLE" +# BOOKIE_ROOT_LOG_LEVEL="INFO" +# BOOKIE_ROOT_LOG_APPENDER="CONSOLE" ################################# # BookKeeper JVM memory options diff --git a/conf/default_rocksdb.conf.default b/conf/default_rocksdb.conf.default new file mode 100644 index 00000000000..e9b8e7c3ecd --- /dev/null +++ b/conf/default_rocksdb.conf.default @@ -0,0 +1,33 @@ +#/** +# * Licensed to the Apache Software Foundation (ASF) under one +# * or more contributor license agreements. See the NOTICE file +# * distributed with this work for additional information +# * regarding copyright ownership. The ASF licenses this file +# * to you under the Apache License, Version 2.0 (the +# * "License"); you may not use this file except in compliance +# * with the License. You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ + +[DBOptions] + # set by jni: options.setCreateIfMissing + create_if_missing=true + # set by jni: options.setInfoLogLevel + info_log_level=INFO_LEVEL + # set by jni: options.setKeepLogFileNum + keep_log_file_num=30 + +[CFOptions "default"] + # set by jni: options.setLogFileTimeToRoll + log_file_time_to_roll=86400 + +[TableOptions/BlockBasedTable "default"] + # set by jni: tableOptions.setChecksumType + checksum=kxxHash \ No newline at end of file diff --git a/conf/entry_location_rocksdb.conf.default b/conf/entry_location_rocksdb.conf.default new file mode 100644 index 00000000000..6f6c1b4d052 --- /dev/null +++ b/conf/entry_location_rocksdb.conf.default @@ -0,0 +1,69 @@ +#/** +# * Licensed to the Apache Software Foundation (ASF) under one +# * or more contributor license agreements. See the NOTICE file +# * distributed with this work for additional information +# * regarding copyright ownership. The ASF licenses this file +# * to you under the Apache License, Version 2.0 (the +# * "License"); you may not use this file except in compliance +# * with the License. You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ + +[DBOptions] + # set by jni: options.setCreateIfMissing + create_if_missing=true + # set by jni: options.setInfoLogLevel + info_log_level=INFO_LEVEL + # set by jni: options.setKeepLogFileNum + keep_log_file_num=30 + # set by jni: options.setLogFileTimeToRoll + log_file_time_to_roll=86400 + # set by jni: options.setMaxBackgroundJobs or options.setIncreaseParallelism + max_background_jobs=2 + # set by jni: options.setMaxSubcompactions + max_subcompactions=1 + # set by jni: options.setMaxTotalWalSize + max_total_wal_size=536870912 + # set by jni: options.setMaxOpenFiles + max_open_files=-1 + # set by jni: options.setDeleteObsoleteFilesPeriodMicros + delete_obsolete_files_period_micros=3600000000 + +[CFOptions "default"] + # set by jni: options.setCompressionType + compression=kLZ4Compression + # set by jni: options.setWriteBufferSize + write_buffer_size=67108864 + # set by jni: options.setMaxWriteBufferNumber + max_write_buffer_number=4 + # set by jni: options.setNumLevels + num_levels=7 + # set by jni: options.setLevelZeroFileNumCompactionTrigger + level0_file_num_compaction_trigger=4 + # set by jni: options.setMaxBytesForLevelBase + max_bytes_for_level_base=268435456 + # set by jni: options.setTargetFileSizeBase + target_file_size_base=67108864 + # set by jni: options.setLevelCompactionDynamicLevelBytes + level_compaction_dynamic_level_bytes=true + +[TableOptions/BlockBasedTable "default"] + # set by jni: tableOptions.setBlockSize + block_size=65536 + # set by jni: tableOptions.setBlockCache + block_cache=206150041 + # set by jni: tableOptions.setFormatVersion + format_version=2 + # set by jni: tableOptions.setChecksumType + checksum=kxxHash + # set by jni: tableOptions.setFilterPolicy, bloomfilter:[bits_per_key]:[use_block_based_builder] + filter_policy=rocksdb.BloomFilter:10:false + # set by jni: tableOptions.setCacheIndexAndFilterBlocks + cache_index_and_filter_blocks=true \ No newline at end of file diff --git a/conf/ledger_metadata_rocksdb.conf.default b/conf/ledger_metadata_rocksdb.conf.default new file mode 100644 index 00000000000..e9b8e7c3ecd --- /dev/null +++ b/conf/ledger_metadata_rocksdb.conf.default @@ -0,0 +1,33 @@ +#/** +# * Licensed to the Apache Software Foundation (ASF) under one +# * or more contributor license agreements. See the NOTICE file +# * distributed with this work for additional information +# * regarding copyright ownership. The ASF licenses this file +# * to you under the Apache License, Version 2.0 (the +# * "License"); you may not use this file except in compliance +# * with the License. You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ + +[DBOptions] + # set by jni: options.setCreateIfMissing + create_if_missing=true + # set by jni: options.setInfoLogLevel + info_log_level=INFO_LEVEL + # set by jni: options.setKeepLogFileNum + keep_log_file_num=30 + +[CFOptions "default"] + # set by jni: options.setLogFileTimeToRoll + log_file_time_to_roll=86400 + +[TableOptions/BlockBasedTable "default"] + # set by jni: tableOptions.setChecksumType + checksum=kxxHash \ No newline at end of file diff --git a/conf/log4j.cli.properties b/conf/log4j.cli.properties deleted file mode 100644 index 51c95f58c88..00000000000 --- a/conf/log4j.cli.properties +++ /dev/null @@ -1,59 +0,0 @@ -# -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# - -# -# BookKeeper CLI (experimental) log4j configuration (used by `bin/bookkeeper-cli.sh`) -# - -# DEFAULT: console appender only -# Define some default values that can be overridden by system properties -bookkeeper.cli.root.logger=INFO,CONSOLE -bookkeeper.cli.log.dir=logs -bookkeeper.cli.log.file=bookkeeper-cli.log - -log4j.rootLogger=${bookkeeper.cli.root.logger} - -# -# Log INFO level and above messages to the console -# -log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender -log4j.appender.CONSOLE.Threshold=INFO -log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout -log4j.appender.CONSOLE.layout.ConversionPattern=%d{ABSOLUTE} %-5p %m%n - -# verbose console logging -log4j.appender.VERBOSECONSOLE=org.apache.log4j.ConsoleAppender -log4j.appender.VERBOSECONSOLE.Threshold=INFO -log4j.appender.VERBOSECONSOLE.layout=org.apache.log4j.PatternLayout -log4j.appender.VERBOSECONSOLE.layout.ConversionPattern=%m%n - -# file logging -log4j.appender.ROLLINGFILE=org.apache.log4j.DailyRollingFileAppender -log4j.appender.ROLLINGFILE.Threshold=INFO -log4j.appender.ROLLINGFILE.File=${bookkeeper.cli.log.dir}/${bookkeeper.cli.log.file} -log4j.appender.ROLLINGFILE.layout=org.apache.log4j.PatternLayout -log4j.appender.ROLLINGFILE.layout.ConversionPattern=%d{ISO8601} - %-5p - [%t:%C{1}@%L] - %m%n - -log4j.logger.verbose=INFO,VERBOSECONSOLE -log4j.logger.org.apache.zookeeper=ERROR -log4j.logger.org.apache.bookkeeper=ERROR -log4j.logger.org.apache.bookkeeper.bookie.BookieShell=INFO -log4j.logger.org.apache.bookkeeper.client.BookKeeperAdmin=INFO diff --git a/conf/log4j.properties b/conf/log4j.properties deleted file mode 100644 index cee7364bc40..00000000000 --- a/conf/log4j.properties +++ /dev/null @@ -1,77 +0,0 @@ -# -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# - -# -# BookKeeper Daemon log4j configuration -# - -# Format is " (, )+ - -# DEFAULT: console appender only -# Define some default values that can be overridden by system properties -bookkeeper.root.logger=WARN,CONSOLE -bookkeeper.log.dir=. -bookkeeper.log.file=bookkeeper-server.log - -log4j.rootLogger=${bookkeeper.root.logger} - -# Example with rolling log file -#log4j.rootLogger=DEBUG, CONSOLE, ROLLINGFILE - -# Example with rolling log file and tracing -#log4j.rootLogger=TRACE, CONSOLE, ROLLINGFILE, TRACEFILE - -# -# Log INFO level and above messages to the console -# -log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender -log4j.appender.CONSOLE.Threshold=INFO -log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout -log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} - %-5p - [%t:%C{1}@%L] - %m%n - -# -# Add ROLLINGFILE to rootLogger to get log file output -# Log DEBUG level and above messages to a log file -log4j.appender.ROLLINGFILE=org.apache.log4j.DailyRollingFileAppender - -log4j.appender.ROLLINGFILE.Threshold=INFO -log4j.appender.ROLLINGFILE.File=${bookkeeper.log.dir}/${bookkeeper.log.file} -log4j.appender.ROLLINGFILE.layout=org.apache.log4j.PatternLayout -log4j.appender.ROLLINGFILE.layout.ConversionPattern=%d{ISO8601} - %-5p - [%t:%C{1}@%L] - %m%n - -# Max log file size of 10MB -#log4j.appender.ROLLINGFILE.MaxFileSize=10MB -# uncomment the next line to limit number of backup files -#log4j.appender.ROLLINGFILE.MaxBackupIndex=10 - -log4j.appender.ROLLINGFILE.layout=org.apache.log4j.PatternLayout -log4j.appender.ROLLINGFILE.layout.ConversionPattern=%d{ISO8601} - %-5p [%t:%C{1}@%L] - %m%n - -# -# Add TRACEFILE to rootLogger to get log file output -# Log DEBUG level and above messages to a log file -log4j.appender.TRACEFILE=org.apache.log4j.FileAppender -log4j.appender.TRACEFILE.Threshold=TRACE -log4j.appender.TRACEFILE.File=bookkeeper-trace.log - -log4j.appender.TRACEFILE.layout=org.apache.log4j.PatternLayout -### Notice we are including log4j's NDC here (%x) -log4j.appender.TRACEFILE.layout.ConversionPattern=%d{ISO8601} - %-5p [%t:%C{1}@%L][%x] - %m%n diff --git a/conf/log4j.shell.properties b/conf/log4j.shell.properties deleted file mode 100644 index caec94859e8..00000000000 --- a/conf/log4j.shell.properties +++ /dev/null @@ -1,50 +0,0 @@ -# -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# - -# -# BookieShell log4j configuration -# - -# DEFAULT: console appender only -# Define some default values that can be overridden by system properties -bookkeeper.root.logger=ERROR,CONSOLE - -log4j.rootLogger=${bookkeeper.root.logger} - -# -# Log INFO level and above messages to the console -# -log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender -log4j.appender.CONSOLE.Threshold=INFO -log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout -log4j.appender.CONSOLE.layout.ConversionPattern=%d{ABSOLUTE} %-5p %m%n - -# verbose console logging -log4j.appender.VERBOSECONSOLE=org.apache.log4j.ConsoleAppender -log4j.appender.VERBOSECONSOLE.Threshold=INFO -log4j.appender.VERBOSECONSOLE.layout=org.apache.log4j.PatternLayout -log4j.appender.VERBOSECONSOLE.layout.ConversionPattern=%m%n - -log4j.logger.verbose=INFO,VERBOSECONSOLE -log4j.logger.org.apache.zookeeper=ERROR -log4j.logger.org.apache.bookkeeper=ERROR -log4j.logger.org.apache.bookkeeper.bookie.BookieShell=INFO -log4j.logger.org.apache.bookkeeper.client.BookKeeperAdmin=INFO diff --git a/conf/log4j2.cli.xml b/conf/log4j2.cli.xml new file mode 100644 index 00000000000..3a1d2fa4ffb --- /dev/null +++ b/conf/log4j2.cli.xml @@ -0,0 +1,53 @@ + + + + + . + bookkeeper-server.log + INFO + CONSOLE + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/conf/log4j2.shell.xml b/conf/log4j2.shell.xml new file mode 100644 index 00000000000..c6e6b0fb9cd --- /dev/null +++ b/conf/log4j2.shell.xml @@ -0,0 +1,58 @@ + + + + + . + bookkeeper-shell.log + INFO + ROLLINGFILE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/conf/log4j2.xml b/conf/log4j2.xml new file mode 100644 index 00000000000..08d5d5613c1 --- /dev/null +++ b/conf/log4j2.xml @@ -0,0 +1,65 @@ + + + + + . + bookkeeper-server.log + INFO + CONSOLE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/conf/nettyenv.sh b/conf/nettyenv.sh index c4702b0237b..fe07a09ff70 100644 --- a/conf/nettyenv.sh +++ b/conf/nettyenv.sh @@ -23,8 +23,3 @@ # netty buffer leak detection level - {@link http://netty.io/wiki/reference-counted-objects.html#wiki-h3-11} # NETTY_LEAK_DETECTION_LEVEL= -# netty recycler max capacity -# NETTY_RECYCLER_MAXCAPACITY= - -# netty recycler link capacity -# NETTY_RECYCLER_LINKCAPACITY= diff --git a/conf/standalone.conf b/conf/standalone.conf old mode 100755 new mode 100644 index 4b52f2bf28b..fb54adcea86 --- a/conf/standalone.conf +++ b/conf/standalone.conf @@ -16,15 +16,31 @@ # * limitations under the License. # */ -# Standalone configuration +# This file is used by 'bin/bookkeeper standalone' command +# please copy from bk_server.conf the entries you need + +#Bookie configuration +#bookieId= +#allowLoopback=false -################################################################## ################################################################## # stream/table service ################################################################## -################################################################## ### Storage ### # the cluster controller schedule interval, in milliseconds. default is 30 seconds. storage.cluster.controller.schedule.interval.ms=30000 + +################################################################# +# httpserver +################################################################# + +# The flag enables/disables starting the admin http server. Default value is 'false' +httpServerEnabled=true + +# The http server port to listen on. Default value is 8080. +httpServerPort=8080 + +# The http server class +httpServerClass=org.apache.bookkeeper.http.vertx.VertxHttpServer diff --git a/conf/zookeeper.conf b/conf/zookeeper.conf index 89c985194ca..77e3524079a 100644 --- a/conf/zookeeper.conf +++ b/conf/zookeeper.conf @@ -58,7 +58,7 @@ maxClientCnxns=100 electionAlg=3 # Leader accepts client connections. Default value is "yes". The leader -# machine coordinates updates. For higher update throughput at thes slight +# machine coordinates updates. For higher update throughput at these slight # expense of read throughput the leader can be configured to not accept # clients and focus on coordination. leaderServes=yes diff --git a/cpu-affinity/pom.xml b/cpu-affinity/pom.xml new file mode 100644 index 00000000000..5ba9bc95de5 --- /dev/null +++ b/cpu-affinity/pom.xml @@ -0,0 +1,236 @@ + + + 4.0.0 + + org.apache.bookkeeper + bookkeeper + 4.18.0-SNAPSHOT + .. + + + cpu-affinity + nar + Apache BookKeeper :: CPU Affinity Library + CPU Affinity Library + + + dynamic + + + + + com.google.guava + guava + + + org.apache.commons + commons-lang3 + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + com.github.maven-nar + nar-maven-plugin + true + + ${skipTests} + + + + org.apache.maven.plugins + maven-assembly-plugin + + + src/main/assembly/assembly.xml + + false + posix + + + + make-assembly + package + + single + + + + + + org.apache.rat + apache-rat-plugin + + + + **/src/test/resources/proc_cpuinfo.txt + + + + + + + + + + jdk-without-javah + + [10,) + + + + + com.github.maven-nar + nar-maven-plugin + true + + + + default-nar-javah + none + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + + -h + ${project.build.directory}/nar/javah-include + + + + + + + + mac + + + Mac OS X + + + + + + com.github.maven-nar + nar-maven-plugin + true + + ${nar.runtime} + cpu-affinity + + + jni + org.apache.bookkeeper.utils.affinity + + + + ${nar.cpp.optionSet} + false + false + full + + + + + + + + + Linux + + + Linux + + + + + + com.github.maven-nar + nar-maven-plugin + true + + ${nar.runtime} + cpu-affinity + + + jni + org.apache.bookkeeper.utils.affinity + + + + ${nar.cpp.optionSet} + false + false + full + + + + + + + + Windows + + + Windows + + + + + + com.github.maven-nar + nar-maven-plugin + true + + ${nar.runtime} + cpu-affinity + + + jni + org.apache.bookkeeper.utils.affinity + + + + ${nar.cpp.optionSet} + false + false + full + + + g++ + + + + + + + + + diff --git a/cpu-affinity/src/main/affinity/cpp/affinity_jni.c b/cpu-affinity/src/main/affinity/cpp/affinity_jni.c new file mode 100644 index 00000000000..ef8c1a7be55 --- /dev/null +++ b/cpu-affinity/src/main/affinity/cpp/affinity_jni.c @@ -0,0 +1,106 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +// Use different error code to differentiate non-implemented error +static const int NOT_IMPLEMENTED = -2; + +#ifdef __linux__ +#define _GNU_SOURCE +#include +#include +#include + +static int set_affinity(int cpuid) { + cpu_set_t cpus; + CPU_ZERO(&cpus); + CPU_SET((size_t)cpuid, &cpus); + int threadId = (int)syscall(SYS_gettid); + return sched_setaffinity(threadId, sizeof(cpu_set_t), &cpus); +} + +static const int IS_AVAILABLE = 1; + +#else + +static int set_affinity(int cpuid) { return NOT_IMPLEMENTED; } + +static const int IS_AVAILABLE = 0; + +#endif + +#ifdef _WIN32 + +#define strerror_r(errno,buf,len) strerror_s(buf,len,errno) + +#endif + +#include +#include +#include +#include + +#include + +/* + * Class: org_apache_bookkeeper_common_util_affinity_impl_CpuAffinityJni + * Method: isRoot + * Signature: ()Z + */ +JNIEXPORT jboolean JNICALL +Java_org_apache_bookkeeper_common_util_affinity_impl_CpuAffinityJni_isRoot(JNIEnv *env, jclass cls) { +#ifdef __linux__ + return getuid() == 0; +#else + return 0; +#endif +} + +/* + * Class: org_apache_bookkeeper_common_util_affinity_impl_CpuAffinityJni + * Method: isAvailable + * Signature: ()Z + */ +JNIEXPORT jboolean JNICALL +Java_org_apache_bookkeeper_common_util_affinity_impl_CpuAffinityJni_isAvailable(JNIEnv *env, jclass cls) { + return IS_AVAILABLE == 1; +} + +/* + * Class: org_apache_bookkeeper_common_util_affinity_impl_CpuAffinityJni + * Method: setAffinity + * Signature: (I)V + */ +JNIEXPORT void JNICALL Java_org_apache_bookkeeper_common_util_affinity_impl_CpuAffinityJni_setAffinity( + JNIEnv *env, jclass cls, jint cpuid) { + int res = set_affinity(cpuid); + + if (res == 0) { + // Success + return; + } else if (res == NOT_IMPLEMENTED) { + (*env)->ThrowNew(env, (*env)->FindClass(env, "java/lang/Exception"), "CPU affinity not implemented"); + } else { + // Error in sched_setaffinity, get message from errno + char buffer[1024]; + strerror_r(errno, buffer, sizeof(buffer)); + (*env)->ThrowNew(env, (*env)->FindClass(env, "java/lang/Exception"), buffer); + } +} diff --git a/cpu-affinity/src/main/assembly/assembly.xml b/cpu-affinity/src/main/assembly/assembly.xml new file mode 100644 index 00000000000..377b97ff54e --- /dev/null +++ b/cpu-affinity/src/main/assembly/assembly.xml @@ -0,0 +1,62 @@ + + + + all + + jar + + + false + + + ${project.build.directory}/nar/${project.artifactId}-${project.version}-${os.arch}-MacOSX-gpp-jni/lib/${os.arch}-MacOSX-gpp/jni + + lib + + lib* + + + + ${project.build.directory}/nar/${project.artifactId}-${project.version}-${os.arch}-Linux-gpp-jni/lib/${os.arch}-Linux-gpp/jni + + lib + + lib* + + + + ${project.build.directory}/nar/${project.artifactId}-${project.version}-${os.arch}-${os.name}-gpp-jni/lib/${os.arch}-${os.name}-gpp/jni + + lib + + lib* + + + + ${project.build.directory}/classes + + + **/* + + + + diff --git a/cpu-affinity/src/main/java/org/apache/bookkeeper/common/util/affinity/CpuAffinity.java b/cpu-affinity/src/main/java/org/apache/bookkeeper/common/util/affinity/CpuAffinity.java new file mode 100644 index 00000000000..5efa137d126 --- /dev/null +++ b/cpu-affinity/src/main/java/org/apache/bookkeeper/common/util/affinity/CpuAffinity.java @@ -0,0 +1,50 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.bookkeeper.common.util.affinity; + +import lombok.experimental.UtilityClass; +import org.apache.bookkeeper.common.util.affinity.impl.CpuAffinityImpl; + +/** + * Utilities for enabling thread to CPU affinity. + */ +@UtilityClass +public class CpuAffinity { + + /** + * Acquire ownership of one CPU core for the current thread. + * + *

          Notes: + * + *

            + *
          1. This method will only consider CPUs that are "isolated" by the OS. Eg: boot the kernel with + * isolcpus=2,3,6,7 parameter + *
          2. + *
          3. This method will disable hyper-threading on the owned core + *
          4. Once a thread successfully acquires a CPU, ownership will be retained, even if the thread exits, for as long + * as the JVM process is alive. + *
          + */ + public static void acquireCore() { + CpuAffinityImpl.acquireCore(); + } +} diff --git a/cpu-affinity/src/main/java/org/apache/bookkeeper/common/util/affinity/impl/CpuAffinityImpl.java b/cpu-affinity/src/main/java/org/apache/bookkeeper/common/util/affinity/impl/CpuAffinityImpl.java new file mode 100644 index 00000000000..380509c2fb9 --- /dev/null +++ b/cpu-affinity/src/main/java/org/apache/bookkeeper/common/util/affinity/impl/CpuAffinityImpl.java @@ -0,0 +1,217 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.common.util.affinity.impl; + +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.channels.FileChannel; +import java.nio.channels.FileLock; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; +import lombok.experimental.UtilityClass; +import lombok.extern.slf4j.Slf4j; + +/** + * Implementation of CPU Affinity functionality. + */ +@UtilityClass +@Slf4j +public class CpuAffinityImpl { + + private static boolean isInitialized = false; + private static boolean isSupported; + + // Id of CPU cores acquired by this process + private static final SortedSet acquiredProcessors = new TreeSet<>(); + + // Ids of processors that were isolated by Linux at boot time. This is the set + // of processors that can acquired by this or other processes. + private static SortedSet isolatedProcessors = null; + + private static ProcessorsInfo processorsInfo = null; + + public static synchronized void acquireCore() { + if (!isInitialized) { + init(); + } + + if (!isSupported) { + throw new RuntimeException("CPU Affinity not supported in current environment"); + } + + if (!CpuAffinityJni.isRoot()) { + throw new RuntimeException("CPU Affinity can only be set if the process is running as root"); + } + + try { + int cpu = pickAvailableCpu(); + CpuAffinityJni.setAffinity(cpu); + + log.info("Thread {} has successfully acquired ownership of cpu {}", Thread.currentThread().getName(), cpu); + } catch (IOException e) { + throw new RuntimeException("Failed to acquire CPU core: " + e.getMessage()); + } + } + + private static final String LOCK_FILE_PREFIX = Paths.get(System.getProperty("java.io.tmpdir"), "cpu-lock-") + .toString(); + + /** + * Other than the cores acquired by this process, there might be other processes on the same host trying to acquire + * the available cores. + * + *

          We use file-locks to ensure that other processes are aware of which CPUs are taken and that these locks are + * automatically released if the process crashes. + */ + private static synchronized int pickAvailableCpu() throws IOException { + if (isolatedProcessors == null) { + isolatedProcessors = IsolatedProcessors.get(); + } + for (int isolatedCpu : isolatedProcessors) { + if (log.isDebugEnabled()) { + log.debug("Checking CPU {}", isolatedCpu); + } + if (acquiredProcessors.contains(isolatedCpu)) { + if (log.isDebugEnabled()) { + log.debug("Ignoring CPU {} since it's already acquired", isolatedCpu); + } + continue; + } + + if (tryAcquireCpu(isolatedCpu)) { + if (log.isDebugEnabled()) { + log.debug("Using CPU {}", isolatedCpu); + } + return isolatedCpu; + } + } + + throw new RuntimeException( + "There is no available isolated CPU to acquire for thread " + Thread.currentThread().getName()); + } + + private static boolean tryAcquireCpu(int targetCpu) throws IOException { + // First, acquire lock on all the cpus that share the same core as target cpu + if (processorsInfo == null) { + processorsInfo = ProcessorsInfo.parseCpuInfo(); + } + + Set cpusToAcquire = processorsInfo.getCpusOnSameCore(targetCpu); + List acquiredCpus = new ArrayList<>(); + + for (int cpu : cpusToAcquire) { + Closeable lock = tryAcquireFileLock(cpu); + if (lock == null) { + if (log.isDebugEnabled()) { + log.debug("Failed to acquire lock on CPU {}", cpu); + } + + // Failed to acquire one cpu, release the rest that were already locked + for (Closeable l : acquiredCpus) { + l.close(); + } + + return false; + } else { + acquiredCpus.add(lock); + } + } + + // At this point, we have ownership of all required cpus + // Make sure the requested CPU is enabled and that all other CPUs on the same core are disabled, so that + // hyper-threading will not affect latency. + for (int cpu : cpusToAcquire) { + if (cpu == targetCpu) { + IsolatedProcessors.enableCpu(cpu); + } else { + IsolatedProcessors.disableCpu(cpu); + } + + acquiredProcessors.add(cpu); + } + return true; + } + + /** + * Try to acquire a lock on a particular cpu. + * + * @return null if the lock was not available + * @return a {@link Closeable} lock object if the lock was acquired + */ + private static Closeable tryAcquireFileLock(int cpu) throws IOException { + String lockPath = LOCK_FILE_PREFIX + cpu; + + RandomAccessFile file = null; + FileChannel channel = null; + FileLock lock = null; + + try { + file = new RandomAccessFile(new File(lockPath), "rw"); + channel = file.getChannel(); + lock = channel.tryLock(); + + if (lock == null) { + return null; + } else { + final FileLock finalLock = lock; + final FileChannel finalChannel = channel; + final RandomAccessFile finalFile = file; + return () -> { + // Closable object + finalLock.close(); + finalChannel.close(); + finalFile.close(); + }; + } + } finally { + if (lock == null) { + if (channel != null) { + channel.close(); + } + + if (file != null) { + file.close(); + } + } + } + } + + private static void init() { + try { + // Since this feature is only available in Linux, there's no point + // in checking for MacOS jnilib or Windows dll extensions + NativeUtils.loadLibraryFromJar("/lib/libcpu-affinity.so"); + isSupported = CpuAffinityJni.isAvailable(); + } catch (final Exception | UnsatisfiedLinkError e) { + log.warn("Unable to load CPU affinity library: {}", e.getMessage(), e); + isSupported = false; + } finally { + isInitialized = true; + } + } + +} diff --git a/cpu-affinity/src/main/java/org/apache/bookkeeper/common/util/affinity/impl/CpuAffinityJni.java b/cpu-affinity/src/main/java/org/apache/bookkeeper/common/util/affinity/impl/CpuAffinityJni.java new file mode 100644 index 00000000000..720478ba973 --- /dev/null +++ b/cpu-affinity/src/main/java/org/apache/bookkeeper/common/util/affinity/impl/CpuAffinityJni.java @@ -0,0 +1,36 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.common.util.affinity.impl; + +import java.io.IOException; +import lombok.experimental.UtilityClass; + +/** + * JNI wrapper of native functions for CPU affinity. + */ +@UtilityClass +public class CpuAffinityJni { + static native boolean isRoot(); + + static native boolean isAvailable(); + + static native void setAffinity(int cpuId) throws IOException; +} diff --git a/cpu-affinity/src/main/java/org/apache/bookkeeper/common/util/affinity/impl/IsolatedProcessors.java b/cpu-affinity/src/main/java/org/apache/bookkeeper/common/util/affinity/impl/IsolatedProcessors.java new file mode 100644 index 00000000000..2d9cd190273 --- /dev/null +++ b/cpu-affinity/src/main/java/org/apache/bookkeeper/common/util/affinity/impl/IsolatedProcessors.java @@ -0,0 +1,99 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.bookkeeper.common.util.affinity.impl; + +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.SortedSet; +import java.util.TreeSet; +import lombok.experimental.UtilityClass; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; + +@UtilityClass +@Slf4j +class IsolatedProcessors { + + private static final Charset ENCODING = StandardCharsets.US_ASCII; + + private static final String ISOLATED_CPU_PATH = "/sys/devices/system/cpu/isolated"; + + @SuppressFBWarnings("DMI_HARDCODED_ABSOLUTE_FILENAME") + static SortedSet get() throws IOException { + return parseProcessorRange(new String(Files.readAllBytes(Paths.get(ISOLATED_CPU_PATH)), ENCODING)); + } + + static SortedSet parseProcessorRange(String range) { + SortedSet processors = new TreeSet<>(); + + for (String part : StringUtils.trim(range).split(",")) { + if (part.contains("-")) { + // This is a range, eg: 1-5 with both edges included + String[] parts = part.split("-"); + int first = Integer.parseInt(parts[0]); + int last = Integer.parseInt(parts[1]); + + for (int i = first; i <= last; i++) { + processors.add(i); + } + } else if (!part.isEmpty()) { + processors.add(Integer.parseInt(part)); + } + } + + return processors; + } + + /** + * Instruct Linux to disable a particular CPU. This is used to disable hyper-threading on a particular core, by + * shutting down the cpu that shares the same core. + */ + static void disableCpu(int cpu) throws IOException { + changeCpuStatus(cpu, false); + } + + static void enableCpu(int cpu) throws IOException { + changeCpuStatus(cpu, true); + } + + /** + * Instruct Linux to disable a particular CPU. This is used to disable hyper-threading on a particular core, by + * shutting down the cpu that shares the same core. + */ + private static void changeCpuStatus(int cpu, boolean enable) throws IOException { + Path cpuPath = Paths.get(String.format("/sys/devices/system/cpu/cpu%d/online", cpu)); + + boolean currentState = Integer + .parseInt(StringUtils.trim(new String(Files.readAllBytes(cpuPath), ENCODING))) != 0; + + if (currentState != enable) { + Files.write(cpuPath, (enable ? "1\n" : "0\n").getBytes(ENCODING), StandardOpenOption.TRUNCATE_EXISTING); + log.info("{} CPU {}", enable ? "Enabled" : "Disabled", cpu); + } + } +} diff --git a/cpu-affinity/src/main/java/org/apache/bookkeeper/common/util/affinity/impl/NativeUtils.java b/cpu-affinity/src/main/java/org/apache/bookkeeper/common/util/affinity/impl/NativeUtils.java new file mode 100644 index 00000000000..fde69f90114 --- /dev/null +++ b/cpu-affinity/src/main/java/org/apache/bookkeeper/common/util/affinity/impl/NativeUtils.java @@ -0,0 +1,90 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.common.util.affinity.impl; + +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import lombok.NonNull; +import lombok.experimental.UtilityClass; + +/** + * Utility class to load jni library from inside a JAR. + */ +@UtilityClass +public class NativeUtils { + /** + * loads given library from the this jar. ie: this jar contains: /lib/pulsar-checksum.jnilib + * + * @param path + * : absolute path of the library in the jar
          + * if this jar contains: /lib/pulsar-checksum.jnilib then provide the same absolute path as input + * @throws Exception + */ + @SuppressFBWarnings( + value = "RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE", + justification = "work around for java 9: https://github.com/spotbugs/spotbugs/issues/493") + public static void loadLibraryFromJar(String path) throws Exception { + checkArgument(path.startsWith("/"), "absolute path must start with /"); + + String[] parts = path.split("/"); + checkArgument(parts.length > 0, "absolute path must contain file name"); + + String filename = parts[parts.length - 1]; + checkArgument(path.startsWith("/"), "absolute path must start with /"); + + Path dir = Files.createTempDirectory("native"); + dir.toFile().deleteOnExit(); + File temp = new File(dir.toString(), filename); + temp.deleteOnExit(); + + byte[] buffer = new byte[1024]; + int read; + + try (InputStream input = NativeUtils.class.getResourceAsStream(path); + OutputStream out = new FileOutputStream(temp)) { + if (input == null) { + throw new FileNotFoundException("Couldn't find file into jar " + path); + } + + while ((read = input.read(buffer)) != -1) { + out.write(buffer, 0, read); + } + } + + if (!temp.exists()) { + throw new FileNotFoundException("Failed to copy file from jar at " + temp.getAbsolutePath()); + } + + System.load(temp.getAbsolutePath()); + } + + private static void checkArgument(boolean expression, @NonNull Object errorMessage) { + if (!expression) { + throw new IllegalArgumentException(String.valueOf(errorMessage)); + } + } +} diff --git a/cpu-affinity/src/main/java/org/apache/bookkeeper/common/util/affinity/impl/ProcessorsInfo.java b/cpu-affinity/src/main/java/org/apache/bookkeeper/common/util/affinity/impl/ProcessorsInfo.java new file mode 100644 index 00000000000..bc416fdec9a --- /dev/null +++ b/cpu-affinity/src/main/java/org/apache/bookkeeper/common/util/affinity/impl/ProcessorsInfo.java @@ -0,0 +1,95 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.common.util.affinity.impl; + +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; +import org.apache.commons.lang3.StringUtils; + +/** + * Discover the list of processors from /proc/cpuinfo. + */ +class ProcessorsInfo { + + private static final Charset ENCODING = StandardCharsets.US_ASCII; + + /** + * Given one cpu id, return all CPUs that are sharing the same core. + */ + public Set getCpusOnSameCore(int targetCpu) { + Set result = new TreeSet<>(); + int targetCore = cpus.get(targetCpu); + + cpus.forEach((cpu, core) -> { + if (core == targetCore) { + result.add(cpu); + } + }); + + return result; + } + + private final Map cpus = new TreeMap<>(); + + static ProcessorsInfo parseCpuInfo(String cpuInfoString) { + ProcessorsInfo pi = new ProcessorsInfo(); + + for (String cpu : cpuInfoString.split("\n\n")) { + int cpuId = -1; + int coreId = -1; + + for (String line : cpu.split("\n")) { + String[] parts = line.split(":", 2); + String key = StringUtils.trim(parts[0]); + String value = StringUtils.trim(parts[1]); + + if (key.equals("core id")) { + coreId = Integer.parseInt(value); + } else if (key.equals("processor")) { + cpuId = Integer.parseInt(value); + } else { + // ignore + } + } + + com.google.common.base.Preconditions.checkArgument(cpuId >= 0); + com.google.common.base.Preconditions.checkArgument(coreId >= 0); + pi.cpus.put(cpuId, coreId); + } + + return pi; + } + + static ProcessorsInfo parseCpuInfo() { + try { + return parseCpuInfo(new String(Files.readAllBytes(Paths.get("/proc/cpuinfo")), ENCODING)); + } catch (IOException e) { + throw new RuntimeException(e); + } + } +} diff --git a/cpu-affinity/src/main/java/org/apache/bookkeeper/common/util/affinity/impl/package-info.java b/cpu-affinity/src/main/java/org/apache/bookkeeper/common/util/affinity/impl/package-info.java new file mode 100644 index 00000000000..35da8164662 --- /dev/null +++ b/cpu-affinity/src/main/java/org/apache/bookkeeper/common/util/affinity/impl/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Implementation of util functions for CPU affinity. + */ +package org.apache.bookkeeper.common.util.affinity.impl; \ No newline at end of file diff --git a/cpu-affinity/src/main/java/org/apache/bookkeeper/common/util/affinity/package-info.java b/cpu-affinity/src/main/java/org/apache/bookkeeper/common/util/affinity/package-info.java new file mode 100644 index 00000000000..af4f4b4add1 --- /dev/null +++ b/cpu-affinity/src/main/java/org/apache/bookkeeper/common/util/affinity/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Util functions for CPU affinity. + */ +package org.apache.bookkeeper.common.util.affinity; \ No newline at end of file diff --git a/cpu-affinity/src/test/java/org/apache/bookkeeper/common/util/affinity/impl/CpuInfoTest.java b/cpu-affinity/src/test/java/org/apache/bookkeeper/common/util/affinity/impl/CpuInfoTest.java new file mode 100644 index 00000000000..bcb3bcd1c3f --- /dev/null +++ b/cpu-affinity/src/test/java/org/apache/bookkeeper/common/util/affinity/impl/CpuInfoTest.java @@ -0,0 +1,51 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.common.util.affinity.impl; + +import static org.junit.Assert.assertEquals; + +import com.google.common.collect.Sets; +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.util.stream.Collectors; +import org.junit.Test; + +/** + * Tests for CpuInfo class. + */ +public class CpuInfoTest { + + @Test + public void testParseCpuInfo() throws Exception { + try (BufferedReader r = new BufferedReader( + new InputStreamReader(CpuInfoTest.class.getResourceAsStream("/proc_cpuinfo.txt")))) { + String text = r.lines().collect(Collectors.joining("\n")); + + ProcessorsInfo pi = ProcessorsInfo.parseCpuInfo(text); + + assertEquals(Sets.newHashSet(0, 12), pi.getCpusOnSameCore(0)); + assertEquals(Sets.newHashSet(0, 12), pi.getCpusOnSameCore(12)); + + assertEquals(Sets.newHashSet(8, 20), pi.getCpusOnSameCore(8)); + assertEquals(Sets.newHashSet(8, 20), pi.getCpusOnSameCore(20)); + } + } +} diff --git a/cpu-affinity/src/test/java/org/apache/bookkeeper/common/util/affinity/impl/IsolatedProcessorsTest.java b/cpu-affinity/src/test/java/org/apache/bookkeeper/common/util/affinity/impl/IsolatedProcessorsTest.java new file mode 100644 index 00000000000..eb3d1b21566 --- /dev/null +++ b/cpu-affinity/src/test/java/org/apache/bookkeeper/common/util/affinity/impl/IsolatedProcessorsTest.java @@ -0,0 +1,46 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.common.util.affinity.impl; + +import static org.junit.Assert.assertEquals; + +import com.google.common.collect.Sets; +import org.junit.Test; + +/** + * Tests for {@link IsolatedProcessors}. + */ +public class IsolatedProcessorsTest { + + @Test + public void testParseProcessors() throws Exception { + assertEquals(Sets.newHashSet(), IsolatedProcessors.parseProcessorRange("")); + + assertEquals(Sets.newHashSet(1, 3, 4, 5, 6, 7), IsolatedProcessors.parseProcessorRange("1,3-7")); + + assertEquals(Sets.newHashSet(1), IsolatedProcessors.parseProcessorRange("1")); + assertEquals(Sets.newHashSet(1, 3), IsolatedProcessors.parseProcessorRange("1,3")); + assertEquals(Sets.newHashSet(1, 3, 4, 5, 6, 7, 10, 11, 12, 13), + IsolatedProcessors.parseProcessorRange("1,3-7,10-13")); + + assertEquals(Sets.newHashSet(1, 3, 4, 5, 6, 7), IsolatedProcessors.parseProcessorRange("1,3-7\n")); + } +} diff --git a/cpu-affinity/src/test/resources/proc_cpuinfo.txt b/cpu-affinity/src/test/resources/proc_cpuinfo.txt new file mode 100644 index 00000000000..9ffe3e3a00d --- /dev/null +++ b/cpu-affinity/src/test/resources/proc_cpuinfo.txt @@ -0,0 +1,648 @@ +processor : 0 +vendor_id : GenuineIntel +cpu family : 6 +model : 79 +model name : Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz +stepping : 1 +microcode : 0xb00002a +cpu MHz : 3199.951 +cache size : 30720 KB +physical id : 0 +siblings : 24 +core id : 0 +cpu cores : 12 +apicid : 0 +initial apicid : 0 +fpu : yes +fpu_exception : yes +cpuid level : 20 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb invpcid_single ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap intel_pt xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass +bogomips : 5986.05 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 1 +vendor_id : GenuineIntel +cpu family : 6 +model : 79 +model name : Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz +stepping : 1 +microcode : 0xb00002a +cpu MHz : 3199.951 +cache size : 30720 KB +physical id : 0 +siblings : 24 +core id : 1 +cpu cores : 12 +apicid : 2 +initial apicid : 2 +fpu : yes +fpu_exception : yes +cpuid level : 20 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb invpcid_single ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap intel_pt xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass +bogomips : 5986.05 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 2 +vendor_id : GenuineIntel +cpu family : 6 +model : 79 +model name : Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz +stepping : 1 +microcode : 0xb00002a +cpu MHz : 3199.951 +cache size : 30720 KB +physical id : 0 +siblings : 24 +core id : 2 +cpu cores : 12 +apicid : 4 +initial apicid : 4 +fpu : yes +fpu_exception : yes +cpuid level : 20 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb invpcid_single ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap intel_pt xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass +bogomips : 5986.05 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 3 +vendor_id : GenuineIntel +cpu family : 6 +model : 79 +model name : Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz +stepping : 1 +microcode : 0xb00002a +cpu MHz : 3199.951 +cache size : 30720 KB +physical id : 0 +siblings : 24 +core id : 3 +cpu cores : 12 +apicid : 6 +initial apicid : 6 +fpu : yes +fpu_exception : yes +cpuid level : 20 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb invpcid_single ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap intel_pt xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass +bogomips : 5986.05 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 4 +vendor_id : GenuineIntel +cpu family : 6 +model : 79 +model name : Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz +stepping : 1 +microcode : 0xb00002a +cpu MHz : 3199.951 +cache size : 30720 KB +physical id : 0 +siblings : 24 +core id : 4 +cpu cores : 12 +apicid : 8 +initial apicid : 8 +fpu : yes +fpu_exception : yes +cpuid level : 20 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb invpcid_single ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap intel_pt xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass +bogomips : 5986.05 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 5 +vendor_id : GenuineIntel +cpu family : 6 +model : 79 +model name : Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz +stepping : 1 +microcode : 0xb00002a +cpu MHz : 3199.951 +cache size : 30720 KB +physical id : 0 +siblings : 24 +core id : 5 +cpu cores : 12 +apicid : 10 +initial apicid : 10 +fpu : yes +fpu_exception : yes +cpuid level : 20 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb invpcid_single ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap intel_pt xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass +bogomips : 5986.05 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 6 +vendor_id : GenuineIntel +cpu family : 6 +model : 79 +model name : Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz +stepping : 1 +microcode : 0xb00002a +cpu MHz : 3199.951 +cache size : 30720 KB +physical id : 0 +siblings : 24 +core id : 8 +cpu cores : 12 +apicid : 16 +initial apicid : 16 +fpu : yes +fpu_exception : yes +cpuid level : 20 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb invpcid_single ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap intel_pt xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass +bogomips : 5986.05 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 7 +vendor_id : GenuineIntel +cpu family : 6 +model : 79 +model name : Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz +stepping : 1 +microcode : 0xb00002a +cpu MHz : 3199.951 +cache size : 30720 KB +physical id : 0 +siblings : 24 +core id : 9 +cpu cores : 12 +apicid : 18 +initial apicid : 18 +fpu : yes +fpu_exception : yes +cpuid level : 20 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb invpcid_single ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap intel_pt xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass +bogomips : 5986.05 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 8 +vendor_id : GenuineIntel +cpu family : 6 +model : 79 +model name : Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz +stepping : 1 +microcode : 0xb00002a +cpu MHz : 3199.951 +cache size : 30720 KB +physical id : 0 +siblings : 24 +core id : 10 +cpu cores : 12 +apicid : 20 +initial apicid : 20 +fpu : yes +fpu_exception : yes +cpuid level : 20 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb invpcid_single ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap intel_pt xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass +bogomips : 5986.05 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 9 +vendor_id : GenuineIntel +cpu family : 6 +model : 79 +model name : Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz +stepping : 1 +microcode : 0xb00002a +cpu MHz : 3199.951 +cache size : 30720 KB +physical id : 0 +siblings : 24 +core id : 11 +cpu cores : 12 +apicid : 22 +initial apicid : 22 +fpu : yes +fpu_exception : yes +cpuid level : 20 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb invpcid_single ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap intel_pt xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass +bogomips : 5986.05 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 10 +vendor_id : GenuineIntel +cpu family : 6 +model : 79 +model name : Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz +stepping : 1 +microcode : 0xb00002a +cpu MHz : 3199.951 +cache size : 30720 KB +physical id : 0 +siblings : 24 +core id : 12 +cpu cores : 12 +apicid : 24 +initial apicid : 24 +fpu : yes +fpu_exception : yes +cpuid level : 20 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb invpcid_single ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap intel_pt xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass +bogomips : 5986.05 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 11 +vendor_id : GenuineIntel +cpu family : 6 +model : 79 +model name : Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz +stepping : 1 +microcode : 0xb00002a +cpu MHz : 3199.951 +cache size : 30720 KB +physical id : 0 +siblings : 24 +core id : 13 +cpu cores : 12 +apicid : 26 +initial apicid : 26 +fpu : yes +fpu_exception : yes +cpuid level : 20 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb invpcid_single ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap intel_pt xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass +bogomips : 5986.05 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 12 +vendor_id : GenuineIntel +cpu family : 6 +model : 79 +model name : Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz +stepping : 1 +microcode : 0xb00002a +cpu MHz : 3199.951 +cache size : 30720 KB +physical id : 0 +siblings : 24 +core id : 0 +cpu cores : 12 +apicid : 1 +initial apicid : 1 +fpu : yes +fpu_exception : yes +cpuid level : 20 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb invpcid_single ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap intel_pt xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass +bogomips : 5986.05 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 13 +vendor_id : GenuineIntel +cpu family : 6 +model : 79 +model name : Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz +stepping : 1 +microcode : 0xb00002a +cpu MHz : 3199.951 +cache size : 30720 KB +physical id : 0 +siblings : 24 +core id : 1 +cpu cores : 12 +apicid : 3 +initial apicid : 3 +fpu : yes +fpu_exception : yes +cpuid level : 20 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb invpcid_single ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap intel_pt xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass +bogomips : 5986.05 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 14 +vendor_id : GenuineIntel +cpu family : 6 +model : 79 +model name : Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz +stepping : 1 +microcode : 0xb00002a +cpu MHz : 3199.951 +cache size : 30720 KB +physical id : 0 +siblings : 24 +core id : 2 +cpu cores : 12 +apicid : 5 +initial apicid : 5 +fpu : yes +fpu_exception : yes +cpuid level : 20 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb invpcid_single ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap intel_pt xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass +bogomips : 5986.05 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 15 +vendor_id : GenuineIntel +cpu family : 6 +model : 79 +model name : Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz +stepping : 1 +microcode : 0xb00002a +cpu MHz : 3199.951 +cache size : 30720 KB +physical id : 0 +siblings : 24 +core id : 3 +cpu cores : 12 +apicid : 7 +initial apicid : 7 +fpu : yes +fpu_exception : yes +cpuid level : 20 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb invpcid_single ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap intel_pt xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass +bogomips : 5986.05 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 16 +vendor_id : GenuineIntel +cpu family : 6 +model : 79 +model name : Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz +stepping : 1 +microcode : 0xb00002a +cpu MHz : 3199.951 +cache size : 30720 KB +physical id : 0 +siblings : 24 +core id : 4 +cpu cores : 12 +apicid : 9 +initial apicid : 9 +fpu : yes +fpu_exception : yes +cpuid level : 20 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb invpcid_single ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap intel_pt xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass +bogomips : 5986.05 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 17 +vendor_id : GenuineIntel +cpu family : 6 +model : 79 +model name : Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz +stepping : 1 +microcode : 0xb00002a +cpu MHz : 3199.951 +cache size : 30720 KB +physical id : 0 +siblings : 24 +core id : 5 +cpu cores : 12 +apicid : 11 +initial apicid : 11 +fpu : yes +fpu_exception : yes +cpuid level : 20 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb invpcid_single ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap intel_pt xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass +bogomips : 5986.05 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 18 +vendor_id : GenuineIntel +cpu family : 6 +model : 79 +model name : Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz +stepping : 1 +microcode : 0xb00002a +cpu MHz : 3199.951 +cache size : 30720 KB +physical id : 0 +siblings : 24 +core id : 8 +cpu cores : 12 +apicid : 17 +initial apicid : 17 +fpu : yes +fpu_exception : yes +cpuid level : 20 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb invpcid_single ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap intel_pt xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass +bogomips : 5986.05 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 19 +vendor_id : GenuineIntel +cpu family : 6 +model : 79 +model name : Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz +stepping : 1 +microcode : 0xb00002a +cpu MHz : 3199.951 +cache size : 30720 KB +physical id : 0 +siblings : 24 +core id : 9 +cpu cores : 12 +apicid : 19 +initial apicid : 19 +fpu : yes +fpu_exception : yes +cpuid level : 20 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb invpcid_single ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap intel_pt xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass +bogomips : 5986.05 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 20 +vendor_id : GenuineIntel +cpu family : 6 +model : 79 +model name : Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz +stepping : 1 +microcode : 0xb00002a +cpu MHz : 3199.951 +cache size : 30720 KB +physical id : 0 +siblings : 24 +core id : 10 +cpu cores : 12 +apicid : 21 +initial apicid : 21 +fpu : yes +fpu_exception : yes +cpuid level : 20 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb invpcid_single ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap intel_pt xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass +bogomips : 5986.05 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 21 +vendor_id : GenuineIntel +cpu family : 6 +model : 79 +model name : Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz +stepping : 1 +microcode : 0xb00002a +cpu MHz : 3199.951 +cache size : 30720 KB +physical id : 0 +siblings : 24 +core id : 11 +cpu cores : 12 +apicid : 23 +initial apicid : 23 +fpu : yes +fpu_exception : yes +cpuid level : 20 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb invpcid_single ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap intel_pt xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass +bogomips : 5986.05 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 22 +vendor_id : GenuineIntel +cpu family : 6 +model : 79 +model name : Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz +stepping : 1 +microcode : 0xb00002a +cpu MHz : 3199.951 +cache size : 30720 KB +physical id : 0 +siblings : 24 +core id : 12 +cpu cores : 12 +apicid : 25 +initial apicid : 25 +fpu : yes +fpu_exception : yes +cpuid level : 20 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb invpcid_single ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap intel_pt xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass +bogomips : 5986.05 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + +processor : 23 +vendor_id : GenuineIntel +cpu family : 6 +model : 79 +model name : Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz +stepping : 1 +microcode : 0xb00002a +cpu MHz : 3199.951 +cache size : 30720 KB +physical id : 0 +siblings : 24 +core id : 13 +cpu cores : 12 +apicid : 27 +initial apicid : 27 +fpu : yes +fpu_exception : yes +cpuid level : 20 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb invpcid_single ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap intel_pt xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts +bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass +bogomips : 5986.05 +clflush size : 64 +cache_alignment : 64 +address sizes : 46 bits physical, 48 bits virtual +power management: + diff --git a/deploy/docker-compose/README.md b/deploy/docker-compose/README.md index e8dcb8a78a9..9517b2b1cd7 100644 --- a/deploy/docker-compose/README.md +++ b/deploy/docker-compose/README.md @@ -2,16 +2,16 @@ ## Requirements -* Docker >= 16.10 -* Docker Compose >= 1.6.0 +* Docker >= 20.10 +* Docker Compose >= 2.12.0 ## Quick start ```bash $ git clone https://github.com/apache/bookkeeper.git $ cd bookkeeper/deploy/docker-compose -$ docker-compose pull # Get the latest Docker images -$ docker-compose up -d +$ docker compose pull # Get the latest Docker images +$ docker compose up -d $ cd ../../ $ bin/bkctl bookies list $ bin/bkctl ledger simpletest @@ -19,7 +19,6 @@ $ bin/bkctl ledger simpletest ## Access Apache BookKeeper cluster - ### Ledger Service You can use `zk://localhost:2181/ledgers` as metadataServiceUri to access ledger storage service. @@ -31,7 +30,7 @@ $ bin/bkctl -u 'zk://localhost:2181/ledgers' ledger simpletest ### DistributedLog You can use `distributedlog://localhost:2181/distributedlog` as dlog uri to access ledger storage service -using [distributedlog](http://bookkeeper.apache.org/docs/latest/api/distributedlog-api/) API. +using [distributedlog](https://bookkeeper.apache.org/docs/api/distributedlog-api/) API. ```bash $ bin/dlog tool create -u 'distributedlog://localhost:2181/distributedlog' --prefix test-stream -e 0-99 @@ -41,13 +40,9 @@ $ bin/dlog tool create -u 'distributedlog://localhost:2181/distributedlog' --pre ### Install Helm -[Helm](https://helm.sh) is used as a template render engine - -``` -curl https://raw.githubusercontent.com/kubernetes/helm/master/scripts/get | bash -``` +[Helm](https://helm.sh) is used as a template render engine. -Or if you use Mac, you can use homebrew to install Helm by `brew install kubernetes-helm` +See how to [install Helm](https://helm.sh/docs/intro/install/). ### Bring up Apache BookKeeper cluster @@ -56,6 +51,6 @@ $ git clone https://github.com/apache/bookkeeper.git $ cd bookkeeper/deploy/docker-compose $ vi compose/values.yaml # custom cluster size, docker image, port mapping etc $ helm template compose > generated-docker-compose.yaml -$ docker-compose -f generated-docker-compose.yaml pull # Get the latest Docker images -$ docker-compose -f generated-docker-compose.yaml up -d +$ docker compose -f generated-docker-compose.yaml pull # Get the latest Docker images +$ docker compose -f generated-docker-compose.yaml up -d ``` diff --git a/deploy/docker-compose/compose/Chart.yaml b/deploy/docker-compose/compose/Chart.yaml index 73bb4b216e1..8579f60fdbb 100644 --- a/deploy/docker-compose/compose/Chart.yaml +++ b/deploy/docker-compose/compose/Chart.yaml @@ -19,10 +19,10 @@ apiVersion: v1 description: apache-bookkeeper-docker-compose name: apache-bookkeeper-docker-compose -version: 4.8.0 +version: 4.15.3 home: https://github.com/apache/bookkeeper sources: - - https://github.com/apache/bookkeeper/deploy/docker-compose + - https://github.com/apache/bookkeeper/tree/master/deploy/docker-compose keywords: - log storage - stream storage diff --git a/deploy/docker-compose/compose/templates/docker-compose.yml b/deploy/docker-compose/compose/templates/docker-compose.yml index fbc70be15ea..756971ab51e 100644 --- a/deploy/docker-compose/compose/templates/docker-compose.yml +++ b/deploy/docker-compose/compose/templates/docker-compose.yml @@ -29,40 +29,38 @@ {{- $bookieHttpPort := .Values.bookkeeper.bookieHttpPort | int }} {{- define "zookeeper_servers" }} - {{- range until (.Values.zookeeper.size | int) }} - {{- if . -}} - , - {{- end -}} - zookeeper-{{ . }}:2181 - {{- end -}} +{{- range until (.Values.zookeeper.size | int) }} +{{- if . -}} +, +{{- end -}} +zookeeper-{{ add 1 . }}:2181 +{{- end -}} {{- end -}} {{- define "metadata_service_uri" }} - {{- range until (.Values.zookeeper.size | int) }} - {{- if . -}} - ; - {{- end -}} - zookeeper-{{ . }}:2181 - {{- end -}} +{{- range until (.Values.zookeeper.size | int) }} +{{- if . -}} +; +{{- end -}} +zookeeper-{{ add 1 . }}:2181 +{{- end -}} {{- end -}} -{{- define "zookeeper_server_list" }} - {{- $zk := dict "servers" (list) -}} - {{- range until (.Values.zookeeper.size | int) }} - {{- $noop := printf "server.%d=zookeeper-%d:%d:%d:participant;0.0.0.0:%d" . . ($.Values.zookeeper.peerPort | int) ($.Values.zookeeper.leaderPort | int) ($.Values.zookeeper.clientPort | int) | append $zk.servers | set $zk "servers" -}} - {{- end -}} - {{- join " " $zk.servers -}} +{{- define "zookeeper_server_list" }} +{{- $zk := dict "servers" (list) -}} +{{- range until (.Values.zookeeper.size | int) }} +{{- $noop := printf "server.%d=zookeeper-%d:%d:%d;%d" (add 1 .) (add 1 .) ($.Values.zookeeper.peerPort | int) ($.Values.zookeeper.leaderPort | int) ($.Values.zookeeper.clientPort | int) | append $zk.servers | set $zk "servers" -}} +{{- end -}} +{{- join " " $zk.servers -}} {{- end -}} version: '3' services: {{- range until $zkSize }} - zookeeper-{{ . }}: + zookeeper-{{ add 1 . }}: image: {{ $.Values.zookeeper.image }} - hostname: zookeeper-{{ . }} - command: ["zookeeper"] {{- if eq $.Values.networkMode "host" }} network_mode: host {{- else }} @@ -70,16 +68,15 @@ services: - "{{ add $zkAdminPort . 1000 }}:{{ $zkAdminPort }}" - "{{ add $zkClientPort . }}:{{ $zkClientPort }}" {{- end }} + {{- if $.Values.dataDir }} volumes: - - {{ $.Values.dataDir }}/zookeeper-{{ . }}/data:/data/zookeeper/data - - {{ $.Values.dataDir }}/zookeeper-{{ . }}/txlog:/data/zookeeper/txlog + - {{ $.Values.dataDir }}/zookeeper-{{ add 1 . }}/data:/data + - {{ $.Values.dataDir }}/zookeeper-{{ add 1 . }}/txlog:/datalog + {{- end }} environment: - - ZK_dataDir=/data/zookeeper/data - - ZK_dataLogDir=/data/zookeeper/txlog - - ZK_clientPort={{ $zkClientPort }} - - ZK_ID={{ . }} - - ZK_SERVERS={{- template "zookeeper_server_list" $ }} - - ZK_standaloneEnabled=false + - ZOO_MY_ID={{ add 1 . }} + - ZOO_SERVERS={{- template "zookeeper_server_list" $ }} + - ZOO_STANDALONE_ENABLED=false healthcheck: test: ["CMD", "curl", "-s", "http://localhost:{{ $zkAdminPort }}/commands/stat"] interval: 60s @@ -89,9 +86,8 @@ services: {{ end }} {{- range until $bkSize }} - bookie-{{ . }}: + bookie-{{ add 1 . }}: image: {{ $.Values.bookkeeper.image }} - hostname: bookie-{{ . }} {{- if eq $.Values.networkMode "host" }} network_mode: host {{- else }} @@ -104,13 +100,14 @@ services: - "{{ add $bookieHttpPort . }}:{{ $bookieHttpPort }}" - "{{ add $bookieGrpcPort . }}:{{ $bookieGrpcPort }}" {{- end }} + {{- if $.Values.dataDir }} volumes: - - {{ $.Values.dataDir }}/bookie-{{ . }}/journal:/data/bookkeeper/journal - - {{ $.Values.dataDir }}/bookie-{{ . }}/ledgers:/data/bookkeeper/ledgers + - {{ $.Values.dataDir }}/bookie-{{ add 1 . }}/journal:/data/bookkeeper/journal + - {{ $.Values.dataDir }}/bookie-{{ add 1 . }}/ledgers:/data/bookkeeper/ledgers + {{- end }} environment: - BK_zkServers={{- template "zookeeper_servers" $ }} - - BK_zkLedgersRootPath=/ledgers - - BK_metadataServiceUri=zk://{{- template "metadata_service_uri" $ }}/ledgers + - BK_metadataServiceUri=zk+hierarchical://{{- template "metadata_service_uri" $ }}/ledgers - BK_DATA_DIR=/data/bookkeeper {{- if eq $.Values.networkMode "host" }} - BK_bookiePort={{ $bookiePort }} @@ -121,7 +118,7 @@ services: - BK_httpServerEnabled=true depends_on: {{- range until $zkSize }} - - "zookeeper-{{.}}" + - "zookeeper-{{add 1 .}}" {{- end }} healthcheck: test: ["CMD", "curl", "-s", "http://localhost:{{ $bookieHttpPort }}/heartbeat"] diff --git a/deploy/docker-compose/compose/values.yaml b/deploy/docker-compose/compose/values.yaml index a3b3eda2b45..5b33da83aaa 100644 --- a/deploy/docker-compose/compose/values.yaml +++ b/deploy/docker-compose/compose/values.yaml @@ -16,8 +16,9 @@ # * limitations under the License. # */ -# data directories -dataDir: ./data +# data directories, set .e.g `./data` if you want to have +# Zookeeper (data and datalog) and BookKeeper (journal and ledgers) volumes mounted to your host +dataDir: # advertised address that bookies used for advertising themselves. # host network mode is useless on Mac, so in order to let clients @@ -31,7 +32,7 @@ networkMode: bridge zookeeper: size: 3 - image: apachebookkeeper/bookkeeper-current:latest + image: zookeeper:3.8.0 adminPort: 8080 clientPort: 2181 peerPort: 2888 @@ -39,7 +40,7 @@ zookeeper: bookkeeper: size: 3 - image: apachebookkeeper/bookkeeper-current:latest + image: apache/bookkeeper:4.15.3 bookiePort: 3181 bookieGrpcPort: 4181 bookieHttpPort: 8080 diff --git a/deploy/docker-compose/docker-compose.yaml b/deploy/docker-compose/docker-compose.yaml index 1e25df2129d..4a07a315425 100644 --- a/deploy/docker-compose/docker-compose.yaml +++ b/deploy/docker-compose/docker-compose.yaml @@ -22,23 +22,15 @@ version: '3' services: - zookeeper-0: - image: apachebookkeeper/bookkeeper-current:latest - hostname: zookeeper-0 - command: ["zookeeper"] + zookeeper-1: + image: zookeeper:3.8.0 ports: - "9080:8080" - "2181:2181" - volumes: - - ./data/zookeeper-0/data:/data/zookeeper/data - - ./data/zookeeper-0/txlog:/data/zookeeper/txlog environment: - - ZK_dataDir=/data/zookeeper/data - - ZK_dataLogDir=/data/zookeeper/txlog - - ZK_clientPort=2181 - - ZK_ID=0 - - ZK_SERVERS=server.0=zookeeper-0:2888:3888:participant;0.0.0.0:2181 server.1=zookeeper-1:2888:3888:participant;0.0.0.0:2181 server.2=zookeeper-2:2888:3888:participant;0.0.0.0:2181 - - ZK_standaloneEnabled=false + - ZOO_MY_ID=1 + - ZOO_SERVERS=server.1=zookeeper-1:2888:3888;2181 server.2=zookeeper-2:2888:3888;2181 server.3=zookeeper-3:2888:3888;2181 + - ZOO_STANDALONE_ENABLED=false healthcheck: test: ["CMD", "curl", "-s", "http://localhost:8080/commands/stat"] interval: 60s @@ -46,23 +38,15 @@ services: retries: 60 restart: on-failure - zookeeper-1: - image: apachebookkeeper/bookkeeper-current:latest - hostname: zookeeper-1 - command: ["zookeeper"] + zookeeper-2: + image: zookeeper:3.8.0 ports: - "9081:8080" - "2182:2181" - volumes: - - ./data/zookeeper-1/data:/data/zookeeper/data - - ./data/zookeeper-1/txlog:/data/zookeeper/txlog environment: - - ZK_dataDir=/data/zookeeper/data - - ZK_dataLogDir=/data/zookeeper/txlog - - ZK_clientPort=2181 - - ZK_ID=1 - - ZK_SERVERS=server.0=zookeeper-0:2888:3888:participant;0.0.0.0:2181 server.1=zookeeper-1:2888:3888:participant;0.0.0.0:2181 server.2=zookeeper-2:2888:3888:participant;0.0.0.0:2181 - - ZK_standaloneEnabled=false + - ZOO_MY_ID=2 + - ZOO_SERVERS=server.1=zookeeper-1:2888:3888;2181 server.2=zookeeper-2:2888:3888;2181 server.3=zookeeper-3:2888:3888;2181 + - ZOO_STANDALONE_ENABLED=false healthcheck: test: ["CMD", "curl", "-s", "http://localhost:8080/commands/stat"] interval: 60s @@ -70,23 +54,15 @@ services: retries: 60 restart: on-failure - zookeeper-2: - image: apachebookkeeper/bookkeeper-current:latest - hostname: zookeeper-2 - command: ["zookeeper"] + zookeeper-3: + image: zookeeper:3.8.0 ports: - "9082:8080" - "2183:2181" - volumes: - - ./data/zookeeper-2/data:/data/zookeeper/data - - ./data/zookeeper-2/txlog:/data/zookeeper/txlog environment: - - ZK_dataDir=/data/zookeeper/data - - ZK_dataLogDir=/data/zookeeper/txlog - - ZK_clientPort=2181 - - ZK_ID=2 - - ZK_SERVERS=server.0=zookeeper-0:2888:3888:participant;0.0.0.0:2181 server.1=zookeeper-1:2888:3888:participant;0.0.0.0:2181 server.2=zookeeper-2:2888:3888:participant;0.0.0.0:2181 - - ZK_standaloneEnabled=false + - ZOO_MY_ID=3 + - ZOO_SERVERS=server.1=zookeeper-1:2888:3888;2181 server.2=zookeeper-2:2888:3888;2181 server.3=zookeeper-3:2888:3888;2181 + - ZOO_STANDALONE_ENABLED=false healthcheck: test: ["CMD", "curl", "-s", "http://localhost:8080/commands/stat"] interval: 60s @@ -94,28 +70,23 @@ services: retries: 60 restart: on-failure - bookie-0: - image: apachebookkeeper/bookkeeper-current:latest - hostname: bookie-0 + bookie-1: + image: apache/bookkeeper:4.15.3 ports: - "3181:3181" - "8080:8080" - "4181:4181" - volumes: - - ./data/bookie-0/journal:/data/bookkeeper/journal - - ./data/bookie-0/ledgers:/data/bookkeeper/ledgers environment: - - BK_zkServers=zookeeper-0:2181,zookeeper-1:2181,zookeeper-2:2181 - - BK_zkLedgersRootPath=/ledgers - - BK_metadataServiceUri=zk://zookeeper-0:2181;zookeeper-1:2181;zookeeper-2:2181/ledgers + - BK_zkServers=zookeeper-1:2181,zookeeper-2:2181,zookeeper-3:2181 + - BK_metadataServiceUri=zk+hierarchical://zookeeper-1:2181;zookeeper-2:2181;zookeeper-3:2181/ledgers - BK_DATA_DIR=/data/bookkeeper - BK_advertisedAddress=127.0.0.1 - BK_bookiePort=3181 - BK_httpServerEnabled=true depends_on: - - "zookeeper-0" - "zookeeper-1" - "zookeeper-2" + - "zookeeper-3" healthcheck: test: ["CMD", "curl", "-s", "http://localhost:8080/heartbeat"] interval: 60s @@ -123,28 +94,23 @@ services: retries: 60 restart: on-failure - bookie-1: - image: apachebookkeeper/bookkeeper-current:latest - hostname: bookie-1 + bookie-2: + image: apache/bookkeeper:4.15.3 ports: - "3182:3182" - "8081:8080" - "4182:4181" - volumes: - - ./data/bookie-1/journal:/data/bookkeeper/journal - - ./data/bookie-1/ledgers:/data/bookkeeper/ledgers environment: - - BK_zkServers=zookeeper-0:2181,zookeeper-1:2181,zookeeper-2:2181 - - BK_zkLedgersRootPath=/ledgers - - BK_metadataServiceUri=zk://zookeeper-0:2181;zookeeper-1:2181;zookeeper-2:2181/ledgers + - BK_zkServers=zookeeper-1:2181,zookeeper-2:2181,zookeeper-3:2181 + - BK_metadataServiceUri=zk+hierarchical://zookeeper-1:2181;zookeeper-2:2181;zookeeper-3:2181/ledgers - BK_DATA_DIR=/data/bookkeeper - BK_advertisedAddress=127.0.0.1 - BK_bookiePort=3182 - BK_httpServerEnabled=true depends_on: - - "zookeeper-0" - "zookeeper-1" - "zookeeper-2" + - "zookeeper-3" healthcheck: test: ["CMD", "curl", "-s", "http://localhost:8080/heartbeat"] interval: 60s @@ -152,33 +118,26 @@ services: retries: 60 restart: on-failure - bookie-2: - image: apachebookkeeper/bookkeeper-current:latest - hostname: bookie-2 + bookie-3: + image: apache/bookkeeper:4.15.3 ports: - "3183:3183" - "8082:8080" - "4183:4181" - volumes: - - ./data/bookie-2/journal:/data/bookkeeper/journal - - ./data/bookie-2/ledgers:/data/bookkeeper/ledgers environment: - - BK_zkServers=zookeeper-0:2181,zookeeper-1:2181,zookeeper-2:2181 - - BK_zkLedgersRootPath=/ledgers - - BK_metadataServiceUri=zk://zookeeper-0:2181;zookeeper-1:2181;zookeeper-2:2181/ledgers + - BK_zkServers=zookeeper-1:2181,zookeeper-2:2181,zookeeper-3:2181 + - BK_metadataServiceUri=zk+hierarchical://zookeeper-1:2181;zookeeper-2:2181;zookeeper-3:2181/ledgers - BK_DATA_DIR=/data/bookkeeper - BK_advertisedAddress=127.0.0.1 - BK_bookiePort=3183 - BK_httpServerEnabled=true depends_on: - - "zookeeper-0" - "zookeeper-1" - "zookeeper-2" + - "zookeeper-3" healthcheck: test: ["CMD", "curl", "-s", "http://localhost:8080/heartbeat"] interval: 60s timeout: 3s retries: 60 restart: on-failure - - diff --git a/deploy/kubernetes/gke/bookkeeper.statefulset.yml b/deploy/kubernetes/gke/bookkeeper.statefulset.yml index 03ea637df4f..ca32b91137d 100644 --- a/deploy/kubernetes/gke/bookkeeper.statefulset.yml +++ b/deploy/kubernetes/gke/bookkeeper.statefulset.yml @@ -33,7 +33,7 @@ kind: ConfigMap metadata: name: bookie-config data: - BK_BOOKIE_EXTRA_OPTS: "\"-Xms1g -Xmx1g -XX:MaxDirectMemorySize=1g -XX:+UseG1GC -XX:MaxGCPauseMillis=10 -XX:+ParallelRefProcEnabled -XX:+UnlockExperimentalVMOptions -XX:+AggressiveOpts -XX:+DoEscapeAnalysis -XX:ParallelGCThreads=32 -XX:ConcGCThreads=32 -XX:G1NewSizePercent=50 -XX:+DisableExplicitGC -XX:-ResizePLAB\"" + BK_BOOKIE_EXTRA_OPTS: "\"-Xms1g -Xmx1g -XX:MaxDirectMemorySize=1g -XX:+UseG1GC -XX:MaxGCPauseMillis=10 -XX:+ParallelRefProcEnabled -XX:+UnlockExperimentalVMOptions -XX:+DoEscapeAnalysis -XX:ParallelGCThreads=32 -XX:ConcGCThreads=32 -XX:G1NewSizePercent=50 -XX:+DisableExplicitGC -XX:-ResizePLAB\"" BK_bookiePort: "3181" BK_journalDirectory: "/bookkeeper/data/journal" BK_ledgerDirectories: "/bookkeeper/data/ledgers" @@ -124,7 +124,6 @@ metadata: app: bookkeeper component: bookie spec: - ports: ports: - name: bookie port: 3181 diff --git a/deploy/kubernetes/gke/bookkeeper.yaml b/deploy/kubernetes/gke/bookkeeper.yaml index 7ad58af0416..78b28c892a1 100644 --- a/deploy/kubernetes/gke/bookkeeper.yaml +++ b/deploy/kubernetes/gke/bookkeeper.yaml @@ -24,7 +24,7 @@ kind: ConfigMap metadata: name: bookie-config data: - BK_BOOKIE_EXTRA_OPTS: "\"-Xms1g -Xmx1g -XX:MaxDirectMemorySize=1g -XX:+UseG1GC -XX:MaxGCPauseMillis=10 -XX:+ParallelRefProcEnabled -XX:+UnlockExperimentalVMOptions -XX:+AggressiveOpts -XX:+DoEscapeAnalysis -XX:ParallelGCThreads=32 -XX:ConcGCThreads=32 -XX:G1NewSizePercent=50 -XX:+DisableExplicitGC -XX:-ResizePLAB\"" + BK_BOOKIE_EXTRA_OPTS: "\"-Xms1g -Xmx1g -XX:MaxDirectMemorySize=1g -XX:+UseG1GC -XX:MaxGCPauseMillis=10 -XX:+ParallelRefProcEnabled -XX:+UnlockExperimentalVMOptions -XX:+DoEscapeAnalysis -XX:ParallelGCThreads=32 -XX:ConcGCThreads=32 -XX:G1NewSizePercent=50 -XX:+DisableExplicitGC -XX:-ResizePLAB\"" BK_bookiePort: "3181" BK_journalDirectory: "/bookkeeper/data/journal" BK_ledgerDirectories: "/bookkeeper/data/ledgers" diff --git a/dev/bk-merge-pr.py b/dev/bk-merge-pr.py deleted file mode 100755 index c7d2adf4cb1..00000000000 --- a/dev/bk-merge-pr.py +++ /dev/null @@ -1,867 +0,0 @@ -#!/usr/bin/env python - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Utility for creating well-formed pull request merges and pushing them to Apache. This script is a modified version -# of the one created by the Spark project (https://github.com/apache/spark/blob/master/dev/merge_spark_pr.py). -# -# Usage: ./bk-merge-pr.py (see config env vars below) -# -# This utility assumes you already have local a bookkeeper git folder and that you -# have added remotes corresponding to both: -# (i) the github apache bookkeeper mirror and -# (ii) the apache bookkeeper git repo. - -import json -import os -import re -import subprocess -import sys -import urllib2 - -try: - import jira.client - JIRA_IMPORTED = True -except ImportError: - JIRA_IMPORTED = False - -PROJECT_NAME = "bookkeeper" - -CAPITALIZED_PROJECT_NAME = "bookkeeper".upper() -GITHUB_ISSUES_NAME = "issue".upper() - -# Location of the local git repository -REPO_HOME = os.environ.get("%s_HOME" % CAPITALIZED_PROJECT_NAME, os.getcwd()) -# Remote name which points to the GitHub site -PR_REMOTE_NAME = os.environ.get("PR_REMOTE_NAME", "apache-github") -# Remote name which points to Apache git -PUSH_REMOTE_NAME = os.environ.get("PUSH_REMOTE_NAME", "apache") -# ASF JIRA username -JIRA_USERNAME = os.environ.get("JIRA_USERNAME", "") -# ASF JIRA password -JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", "") -# Github API page size -GITHUB_PAGE_SIZE = os.environ.get("GH_PAGE_SIZE", "100") -# OAuth key used for issuing requests against the GitHub API. If this is not defined, then requests -# will be unauthenticated. You should only need to configure this if you find yourself regularly -# exceeding your IP's unauthenticated request rate limit. You can create an OAuth key at -# https://github.com/settings/tokens. This script only requires the "public_repo" scope. -GITHUB_OAUTH_KEY = os.environ.get("GITHUB_OAUTH_KEY") - -GITHUB_USER = os.environ.get("GITHUB_USER", "apache") -GITHUB_BASE = "https://github.com/%s/%s/pull" % (GITHUB_USER, PROJECT_NAME) -GITHUB_API_URL = "https://api.github.com" -GITHUB_API_BASE = "%s/repos/%s/%s" % (GITHUB_API_URL, GITHUB_USER, PROJECT_NAME) -JIRA_BASE = "https://issues.apache.org/jira/browse" -JIRA_API_BASE = "https://issues.apache.org/jira" -# Prefix added to temporary branches -TEMP_BRANCH_PREFIX = "PR_TOOL" -RELEASE_BRANCH_PREFIX = "branch-" - -DEFAULT_FIX_VERSION = os.environ.get("DEFAULT_FIX_VERSION", "0.9.1.0") - -def get_json(url, preview_api = False): - try: - request = urllib2.Request(url) - if GITHUB_OAUTH_KEY: - request.add_header('Authorization', 'token %s' % GITHUB_OAUTH_KEY) - if preview_api: - request.add_header('Accept', 'application/vnd.github.black-cat-preview+json') - return json.load(urllib2.urlopen(request)) - except urllib2.HTTPError as e: - if "X-RateLimit-Remaining" in e.headers and e.headers["X-RateLimit-Remaining"] == '0': - print "Exceeded the GitHub API rate limit; see the instructions in " + \ - "bk-merge-pr.py to configure an OAuth token for making authenticated " + \ - "GitHub requests." - else: - print "Unable to fetch URL, exiting: %s" % url - sys.exit(-1) - -def post_json(url, data): - try: - request = urllib2.Request(url, data, { 'Content-Type': 'application/json' }) - if GITHUB_OAUTH_KEY: - request.add_header('Authorization', 'token %s' % GITHUB_OAUTH_KEY) - return json.load(urllib2.urlopen(request)) - except urllib2.HTTPError as e: - if "X-RateLimit-Remaining" in e.headers and e.headers["X-RateLimit-Remaining"] == '0': - print "Exceeded the GitHub API rate limit; see the instructions in " + \ - "bk-merge-pr.py to configure an OAuth token for making authenticated " + \ - "GitHub requests." - else: - print "Unable to fetch URL, exiting: %s - %s" % (url, e) - sys.exit(-1) - -def fail(msg): - print msg - clean_up() - sys.exit(-1) - - -def run_cmd(cmd): - print cmd - if isinstance(cmd, list): - return subprocess.check_output(cmd) - else: - return subprocess.check_output(cmd.split(" ")) - - -def continue_maybe(prompt): - result = raw_input("\n%s (y/n): " % prompt) - if result.lower() != "y": - fail("Okay, exiting") - -def clean_up(): - if original_head != get_current_branch(): - print "Restoring head pointer to %s" % original_head - run_cmd("git checkout %s" % original_head) - - branches = run_cmd("git branch").replace(" ", "").split("\n") - - for branch in filter(lambda x: x.startswith(TEMP_BRANCH_PREFIX), branches): - print "Deleting local branch %s" % branch - run_cmd("git branch -D %s" % branch) - -def get_current_branch(): - return run_cmd("git rev-parse --abbrev-ref HEAD").replace("\n", "") - -def get_milestones(): - return get_json("https://api.github.com/repos/%s/%s/milestones?state=open&sort=due_on&direction=asc" % (GITHUB_USER, PROJECT_NAME)) - -def get_all_labels(): - result = get_json("https://api.github.com/repos/%s/%s/labels?per_page=%s" % (GITHUB_USER, PROJECT_NAME, GITHUB_PAGE_SIZE)) - return map(lambda x: x['name'], result) - -# merge the requested PR and return the merge hash -def merge_pr(pr_num, target_ref, title, body, default_pr_reviewers, pr_repo_desc): - pr_branch_name = "%s_MERGE_PR_%s" % (TEMP_BRANCH_PREFIX, pr_num) - target_branch_name = "%s_MERGE_PR_%s_%s" % (TEMP_BRANCH_PREFIX, pr_num, target_ref.upper()) - run_cmd("git fetch %s pull/%s/head:%s" % (PR_REMOTE_NAME, pr_num, pr_branch_name)) - run_cmd("git fetch %s %s:%s" % (PUSH_REMOTE_NAME, target_ref, target_branch_name)) - run_cmd("git checkout %s" % target_branch_name) - - had_conflicts = False - try: - run_cmd(['git', 'merge', pr_branch_name, '--squash']) - except Exception as e: - msg = "Error merging: %s\nWould you like to manually fix-up this merge?" % e - continue_maybe(msg) - msg = "Okay, please fix any conflicts and 'git add' conflicting files... Finished?" - continue_maybe(msg) - had_conflicts = True - - # Offer to run unit tests before committing - result = raw_input('Do you want to validate unit tests after the merge? (y/n): ') - if result.lower() == 'y': - test_res = subprocess.call('mvn clean install'.split()) - if test_res == 0: - print('Unit tests execution succeeded') - else: - continue_maybe("Unit tests execution FAILED. Do you want to continue with the merge anyway?") - - # Offer to run spotbugs and rat before committing - result = raw_input('Do you want to validate spotbugs and rat after the merge? (y/n): ') - if result.lower() == 'y': - test_res = subprocess.call('mvn clean install -DskipTests spotbugs:check apache-rat:check'.split()) - if test_res == 0: - print('QA tests execution succeeded') - else: - continue_maybe("QA tests execution FAILED. Do you want to continue with the merge anyway?") - - commit_authors = run_cmd(['git', 'log', 'HEAD..%s' % pr_branch_name, - '--pretty=format:%an <%ae>']).split("\n") - distinct_authors = sorted(set(commit_authors), - key=lambda x: commit_authors.count(x), reverse=True) - primary_author = raw_input( - "Enter primary author in the format of \"name \" [%s]: " % - distinct_authors[0]) - if primary_author == "": - primary_author = distinct_authors[0] - - reviewers = raw_input("Enter reviewers [%s]: " % default_pr_reviewers).strip() - if reviewers == '': - reviewers = default_pr_reviewers - - commits = run_cmd(['git', 'log', 'HEAD..%s' % pr_branch_name, - '--pretty=format:%h [%an] %s']).split("\n") - - if len(commits) > 1: - result = raw_input("List pull request commits in squashed commit message? (y/n): ") - if result.lower() == "y": - should_list_commits = True - else: - should_list_commits = False - else: - should_list_commits = False - - merge_message_flags = [] - - merge_message_flags += ["-m", title] - if body is not None: - # We remove @ symbols from the body to avoid triggering e-mails - # to people every time someone creates a public fork of the project. - merge_message_flags += ["-m", body.replace("@", "")] - - authors = "\n".join(["Author: %s" % a for a in distinct_authors]) - - merge_message_flags += ["-m", authors] - - if (reviewers != ""): - merge_message_flags += ["-m", "Reviewers: %s" % reviewers] - - if had_conflicts: - committer_name = run_cmd("git config --get user.name").strip() - committer_email = run_cmd("git config --get user.email").strip() - message = "This patch had conflicts when merged, resolved by\nCommitter: %s <%s>" % ( - committer_name, committer_email) - merge_message_flags += ["-m", message] - - # The string "Closes #%s" string is required for GitHub to correctly close the PR - close_line = "This closes #%s from %s" % (pr_num, pr_repo_desc) - # Find the github issues to close - github_issues = re.findall("#[0-9]{3,6}", title) - - if len(github_issues) != 0: - for issue_id in github_issues: - close_line += ", closes %s" % (issue_id) - - if should_list_commits: - close_line += " and squashes the following commits:" - merge_message_flags += ["-m", close_line] - - if should_list_commits: - merge_message_flags += ["-m", "\n".join(commits)] - - run_cmd(['git', 'commit', '--author="%s"' % primary_author] + merge_message_flags) - - continue_maybe("Merge complete (local ref %s). Push to %s?" % ( - target_branch_name, PUSH_REMOTE_NAME)) - - try: - run_cmd('git push %s %s:%s' % (PUSH_REMOTE_NAME, target_branch_name, target_ref)) - except Exception as e: - clean_up() - fail("Exception while pushing: %s" % e) - - merge_hash = run_cmd("git rev-parse %s" % target_branch_name)[:8] - merge_log = run_cmd("git show --format=fuller -q %s" % target_branch_name) - clean_up() - print("Pull request #%s merged!" % pr_num) - print("Merge hash: %s" % merge_hash) - return merge_hash, merge_log - -def ask_for_branch(default_branch): - pick_ref = raw_input("Enter a branch name [%s]: " % default_branch) - if pick_ref == "": - pick_ref = default_branch - return pick_ref - -def cherry_pick(pr_num, merge_hash, pick_ref): - pick_branch_name = "%s_PICK_PR_%s_%s" % (TEMP_BRANCH_PREFIX, pr_num, pick_ref.upper()) - - run_cmd("git fetch %s %s:%s" % (PUSH_REMOTE_NAME, pick_ref, pick_branch_name)) - run_cmd("git checkout %s" % pick_branch_name) - - try: - run_cmd("git cherry-pick -sx %s" % merge_hash) - except Exception as e: - msg = "Error cherry-picking: %s\nWould you like to manually fix-up this merge?" % e - continue_maybe(msg) - msg = "Okay, please fix any conflicts and finish the cherry-pick. Finished?" - continue_maybe(msg) - - continue_maybe("Pick complete (local ref %s). Push to %s?" % ( - pick_branch_name, PUSH_REMOTE_NAME)) - - try: - run_cmd('git push %s %s:%s' % (PUSH_REMOTE_NAME, pick_branch_name, pick_ref)) - except Exception as e: - clean_up() - fail("Exception while pushing: %s" % e) - - pick_hash = run_cmd("git rev-parse %s" % pick_branch_name)[:8] - clean_up() - - print("Pull request #%s picked into %s!" % (pr_num, pick_ref)) - print("Pick hash: %s" % pick_hash) - return pick_ref - - -def fix_version_from_branch(branch, versions, target_ref): - # Note: Assumes this is a sorted (newest->oldest) list of un-released versions - if branch == target_ref: - versions = filter(lambda x: x == DEFAULT_FIX_VERSION, versions) - if len(versions) > 0: - return versions[0] - else: - return None - else: - versions = filter(lambda x: x.startswith(branch), versions) - if len(versions) > 0: - return versions[-1] - else: - return None - - -def resolve_jira_issue(merge_branches, comment, jira_id, target_ref): - asf_jira = jira.client.JIRA({'server': JIRA_API_BASE}, - basic_auth=(JIRA_USERNAME, JIRA_PASSWORD)) - - result = raw_input("Resolve JIRA %s ? (y/n): " % jira_id) - if result.lower() != "y": - return - - try: - issue = asf_jira.issue(jira_id) - except Exception as e: - fail("ASF JIRA could not find %s\n%s" % (jira_id, e)) - - cur_status = issue.fields.status.name - cur_summary = issue.fields.summary - cur_assignee = issue.fields.assignee - if cur_assignee is None: - cur_assignee = "NOT ASSIGNED!!!" - else: - cur_assignee = cur_assignee.displayName - - if cur_status == "Resolved" or cur_status == "Closed": - fail("JIRA issue %s already has status '%s'" % (jira_id, cur_status)) - print ("=== JIRA %s ===" % jira_id) - print ("summary\t\t%s\nassignee\t%s\nstatus\t\t%s\nurl\t\t%s/%s\n" % ( - cur_summary, cur_assignee, cur_status, JIRA_BASE, jira_id)) - - versions = asf_jira.project_versions(CAPITALIZED_PROJECT_NAME) - versions = sorted(versions, key=lambda x: x.name, reverse=True) - versions = filter(lambda x: x.raw['released'] is False, versions) - - version_names = map(lambda x: x.name, versions) - default_fix_versions = map(lambda x: fix_version_from_branch(x, version_names), merge_branches, target_ref) - default_fix_versions = filter(lambda x: x != None, default_fix_versions) - default_fix_versions = ",".join(default_fix_versions) - - fix_versions = raw_input("Enter comma-separated fix version(s) [%s]: " % default_fix_versions) - if fix_versions == "": - fix_versions = default_fix_versions - fix_versions = fix_versions.replace(" ", "").split(",") - - def get_version_json(version_str): - return filter(lambda v: v.name == version_str, versions)[0].raw - - jira_fix_versions = map(lambda v: get_version_json(v), fix_versions) - - resolve = filter(lambda a: a['name'] == "Resolve Issue", asf_jira.transitions(jira_id))[0] - resolution = filter(lambda r: r.raw['name'] == "Fixed", asf_jira.resolutions())[0] - asf_jira.transition_issue( - jira_id, resolve["id"], fixVersions = jira_fix_versions, - comment = comment, resolution = {'id': resolution.raw['id']}) - - print "Successfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions) - - -def resolve_jira_issues(title, merge_branches, comment, target_ref): - jira_ids = re.findall("%s-[0-9]{3,6}" % CAPITALIZED_PROJECT_NAME, title) - - if len(jira_ids) == 0: - print "No JIRA issue found to update" - for jira_id in jira_ids: - resolve_jira_issue(merge_branches, comment, jira_id, target_ref) - - -def standardize_jira_ref(text): - """ - Standardize the jira reference commit message prefix to "PROJECT_NAME-XXX: Issue" - - 'BOOKKEEPER-877: Script for generating patch for reviews' - 'ISSUE #376: Script for generating patch for reviews' - """ - jira_refs = [] - github_issue_refs = [] - github_issue_ids = [] - components = [] - - # Extract JIRA ref(s): - pattern = re.compile(r'(%s[-\s]*[0-9]{3,6})+' % CAPITALIZED_PROJECT_NAME, re.IGNORECASE) - for ref in pattern.findall(text): - # Add brackets, replace spaces with a dash, & convert to uppercase - jira_refs.append(re.sub(r'\s+', '-', ref.upper())) - text = text.replace(ref, '') - - # Extract Github Issue ref(s) - pattern = re.compile(r'(%s[-\s]*([0-9]{3,6}))+' % GITHUB_ISSUES_NAME, re.IGNORECASE) - for ref in pattern.findall(text): - # Add brackets, replace spaces or a dash with ' #', & convert to uppercase - github_issue_refs.append(re.sub(r'[-\s]+', ' #', ref[0].upper())) - text = text.replace(ref[0], '') - github_issue_ids.append(ref[1].upper()) - - # Extract project name component(s): - # Look for alphanumeric chars, spaces, dashes, periods, and/or commas - pattern = re.compile(r'(\[[\w\s,-\.]+\])', re.IGNORECASE) - for component in pattern.findall(text): - components.append(component.upper()) - text = text.replace(component, '') - - # Cleanup any remaining symbols: - pattern = re.compile(r'^\W+(.*)', re.IGNORECASE) - if (pattern.search(text) is not None): - text = pattern.search(text).groups()[0] - - # Assemble full text (JIRA ref(s), module(s), remaining text) - prefix = '' - jira_prefix = ' '.join(jira_refs).strip() - if jira_prefix: - prefix = jira_prefix + ": " - github_prefix = ' '.join(github_issue_refs).strip() - if github_prefix: - prefix = github_prefix + ": " - clean_text = prefix + ' '.join(components).strip() + " " + text.strip() - - # Replace multiple spaces with a single space, e.g. if no jira refs and/or components were included - clean_text = re.sub(r'\s+', ' ', clean_text.strip()) - - return clean_text, github_issue_ids - -def get_reviewers(pr_num): - """ - Get a candidate list of reviewers that have commented on the PR with '+1' or 'LGTM' - """ - approval_msgs = ['+1', 'lgtm'] - - pr_comments = get_json("%s/issues/%s/comments" % (GITHUB_API_BASE, pr_num)) - - reviewers_ids = set() - for comment in pr_comments: - for approval_msg in approval_msgs: - if approval_msg in comment['body'].lower(): - reviewers_ids.add(comment['user']['login']) - - approval_review_states = ['approved'] - pr_reviews = get_json('{0}/pulls/{1}/reviews?per_page=100'.format(GITHUB_API_BASE, pr_num), True) - for review in pr_reviews: - for approval_state in approval_review_states: - if approval_state in review['state'].lower(): - reviewers_ids.add(review['user']['login']) - - if len(reviewers_ids) == 0: - fail("No approvals found in this pull request") - - reviewers_emails = [] - for reviewer_id in reviewers_ids: - username = None - useremail = None - user = get_json("%s/users/%s" % (GITHUB_API_URL, reviewer_id)) - if user['email']: - useremail = user['email'].strip() - if user['name']: - username = user['name'].strip() - if username is None: - continue - reviewers_emails.append('{0} <{1}>'.format(username.encode('utf8'), useremail)) - return ', '.join(reviewers_emails) - -def check_ci_status(pr): - ci_status = get_json("%s/commits/%s/status" % (GITHUB_API_BASE, pr["head"]["sha"])) - state = ci_status["state"] - if state != "success": - comments = get_json(pr["comments_url"]) - ignore_ci_comments = [c for c in comments if c["body"].upper() == "IGNORE CI"] - if len(ignore_ci_comments) > 0: - print "\n\nWARNING: The PR has not passed CI (state is %s)" % (state) \ - + ", but this has been overridden by %s. \n" % (ignore_ci_comments[0]["user"]["login"]) \ - + "Proceed at your own peril!\n\n" - else: - check_individual_ci_status(ci_status, comments) - -def is_check_passed(check): - passed = check["state"] == "success" - if (not passed) and is_jenkins_check(check): - try: - return is_jenkins_passed(check["target_url"]) - except: - fail("failed to fetch the jenkins build status for check '%s'.\nPlease manually check its build status at %s" % (check["context"], check["target_url"])) - return passed - -def is_jenkins_check(check): - return check["context"].startswith("Jenkins:") - -def is_jenkins_passed(url): - jenkins_status = get_json("%sapi/json?tree=result" % (url)) - return "SUCCESS" == jenkins_status['result'] - -def is_integration_test_check(check): - return check["context"] == u"Jenkins: Integration Tests" - -def check_individual_ci_status(ci_status, comments): - ci_failures = [] - ci_integration_test_failures = [] - for status in ci_status["statuses"]: - is_passed = is_check_passed(status) - is_integration_test = is_integration_test_check(status) - - if is_integration_test and (not is_passed): - ci_integration_test_failures.append(status) - else: - if not is_passed: - ci_failures.append(status) - - if len(ci_integration_test_failures) != 0 and len(ci_failures) == 0: - # all ci passed except integration tests - ignore_it_ci_comments = [c for c in comments if c["body"].upper() == "IGNORE IT CI"] - if len(ignore_it_ci_comments) > 0: - print "\n\nWARNING: The PR has not passed integration tests CI" \ - + ", but this has been overridden by %s. \n" % (ignore_it_ci_comments[0]["user"]["login"]) \ - + "Proceed at your own peril!\n\n" - else: - fail("The PR has not passed integration tests CI") - elif len(ci_failures) != 0 or len(ci_integration_test_failures) != 0: - fail_msg = "The PR has not passed CI:\n" - print "" - for status in ci_failures: - fail_msg += "\t %s = %s\n" % (status["context"], status["state"]) - for status in ci_integration_test_failures: - fail_msg += "\t %s = %s\n" % (status["context"], status["state"]) - fail(fail_msg) - -def ask_release_for_github_issues(branch, labels): - print "=== Add release to github issues ===" - while True: - fix_releases = ask_for_labels("release/%s" % branch, labels, []) - if len(fix_releases) != 1: - print "Please choose only one release to add for branch '%s'." % branch - continue - - print "=== Apply following releases to github issues ==" - print "Fix Releases: %s" % ', '.join(fix_releases) - print "" - - if raw_input("Would you like to add these releases to github issues? (y/n): ") == "y": - break - return fix_releases - -def ask_updates_for_github_issues(milestones, labels, issue_labels, milestone_required): - while True: - fix_milestone, fix_milestone_number, fix_areas, fix_types = \ - get_updates_for_github_issues(milestones, labels, issue_labels, milestone_required) - - print "=== Apply following milestone, area, type to github issues ==" - print "Fix Types: %s" % ', '.join(fix_types) - print "Fix Areas: %s" % ', '.join(fix_areas) - if milestone_required: - print "Fix Milestone: %s" % fix_milestone - print "" - - if raw_input("Would you like to update github issues with these labels? (y/n): ") == "y": - break - - return fix_milestone, fix_milestone_number, fix_areas, fix_types - -def get_updates_for_github_issues(milestones, labels, issue_labels, milestone_required): - # get milestone - fix_milestone = "" - fix_milestone_number = "" - if milestone_required: - default_milestone_name = milestones[0]['title'] - milestone_list = map(lambda x: x['title'], milestones) - milestone_map = dict((milestone['title'], milestone['number']) for milestone in milestones) - while True: - fix_milestone = raw_input("Choose fix milestone : options are [%s] - default: [%s]: " % (', '.join(milestone_list).strip(), default_milestone_name)) - fix_milestone = fix_milestone.strip() - if fix_milestone == "": - fix_milestone = default_milestone_name - break - elif fix_milestone in milestone_map: - break - else: - print "Invalid milestone: %s." % fix_milestone - fix_milestone_number = milestone_map[fix_milestone] - - # get area - fix_areas = ask_for_labels("area/", labels, issue_labels) - - # get types - fix_types = ask_for_labels("type/", labels, issue_labels) - - return fix_milestone, fix_milestone_number, fix_areas, fix_types - -def ask_for_labels(prefix, labels, issue_labels): - issue_filtered_labels = map(lambda l: l.split('/')[1], filter(lambda x: x.startswith(prefix), issue_labels)) - filtered_labels = map(lambda l: l.split('/')[1], filter(lambda x: x.startswith(prefix), labels)) - while True: - fix_labels = raw_input("Choose label '%s' - options are: [%s] - default: [%s] (comma separated): " - % (prefix, ', '.join(filtered_labels).strip(), ', '.join(issue_filtered_labels).strip())) - if fix_labels == "": - if not issue_filtered_labels: - print "Please specify a '%s' label to close the issue!" % prefix - continue - else: - fix_labels = issue_filtered_labels - break - fix_labels = fix_labels.replace(" ", "").split(",") - if not fix_labels: - print "Please specify a '%s' label to close the issue!" % prefix - continue - invalid_label = False - for label in fix_labels: - if label not in filtered_labels: - print "Invalid '%s' label: %s." % (prefix, label) - invalid_label = True - break - if invalid_label: - continue - else: - break - return fix_labels - -def get_github_issue_url(github_issue_id): - return "https://api.github.com/repos/%s/%s/issues/%s" % (GITHUB_USER, PROJECT_NAME, github_issue_id) - -def get_assignees_url(github_issue_id): - return "https://api.github.com/repos/%s/%s/issues/%s/assignees" % (GITHUB_USER, PROJECT_NAME, github_issue_id) - -def get_github_issue_labels(github_issue_id): - url = "https://api.github.com/repos/%s/%s/issues/%s/labels" % (GITHUB_USER, PROJECT_NAME, github_issue_id) - result = get_json(url) - return map(lambda x: x["name"], result) - -def add_release_to_github_issues(github_issue_ids, labels, fix_release): - for github_issue_id in github_issue_ids: - labels = add_release_to_github_issue(github_issue_id, labels, fix_release) - return labels - -def add_release_to_github_issue(github_issue_id, labels, fix_release): - url = get_github_issue_url(github_issue_id) - labels = ["release/%s" % fix_release] + labels - data = json.dumps({ - 'labels': labels - }) - post_json(url, data) - return labels - -def update_github_issue(github_issue_id, fix_milestone_number, fix_milestone, fix_areas, fix_types, other_labels): - url = get_github_issue_url(github_issue_id) - labels = other_labels + map(lambda x: "area/%s" % x, fix_areas) - labels = labels + map(lambda x: "type/%s" % x, fix_types) - if fix_milestone_number == '': - data = json.dumps({ - 'labels': labels, - }) - else: - labels.append("release/%s" % fix_milestone) - data = json.dumps({ - 'milestone': int(fix_milestone_number), - 'labels': labels, - }) - - post_json(url, data) - return labels - -def update_github_issues(github_issue_ids, fix_milestone_number, fix_milestone, fix_areas, fix_types, other_labels): - for github_issue_id in github_issue_ids: - labels = update_github_issue(github_issue_id, fix_milestone_number, fix_milestone, fix_areas, fix_types, other_labels) - return labels - -def add_assignees_to_github_issues(github_issue_ids, assignees): - for github_issue_id in github_issue_ids: - add_assignees_to_github_issue(github_issue_id, assignees) - -def add_assignees_to_github_issue(github_issue_id, assignees): - url = get_assignees_url(github_issue_id) - data = json.dumps({ - "assignees": assignees - }) - post_json(url, data) - -def main(): - global original_head - - if not GITHUB_OAUTH_KEY: - print "OAuth key is needed for merging bookkeeper pull requests." - print "If environment variable 'GITHUB_OAUTH_KEY' is not defined," - print "then requests will be unauthenticated." - print "You can create an OAuth key at https://github.com/settings/tokens" - print "and set it to the environment variable 'GITHUB_OAUTH_KEY'." - print "(This token only needs the 'public_repo' scope permissions)" - exit(-1) - - # 0. get the current state so we can go back - original_head = get_current_branch() - - # 1. retrieve milestones, labels, branches - milestones = get_milestones() - labels = get_all_labels() - branches = get_json("%s/branches" % GITHUB_API_BASE) - branch_names = filter(lambda x: x.startswith(RELEASE_BRANCH_PREFIX), [x['name'] for x in branches]) - # Assumes branch names can be sorted lexicographically - latest_branch = sorted(branch_names, reverse=True)[0] - - # 2. retrieve the details for a given pull request - pr_num = raw_input("Which pull request would you like to merge? (e.g. 34): ") - pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num)) - pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num)) - pr_reviewers = get_reviewers(pr_num) - check_ci_status(pr) - - url = pr["url"] - - # 3. repair the title for commit message - pr_title = pr["title"] - commit_title = raw_input("Commit title [%s]: " % pr_title.encode("utf-8")).decode("utf-8") - if commit_title == "": - commit_title = pr_title - - # Decide whether to use the modified title or not - modified_title, github_issue_ids = standardize_jira_ref(commit_title) - if modified_title != commit_title: - print "I've re-written the title as follows to match the standard format:" - print "Original: %s" % commit_title - print "Modified: %s" % modified_title - result = raw_input("Would you like to use the modified title? (y/n): ") - if result.lower() == "y": - commit_title = modified_title - print "Using modified title:" - else: - print "Using original title:" - print commit_title - - body = pr["body"] - modified_body = "" - for line in body.split('\n'): - if line.startswith('>'): - continue - modified_body = modified_body + line + "\n" - if modified_body != body: - print "I've re-written the body as follows to match the standard formats:" - print "Original: " - print body - print "Modified: " - print modified_body - result = raw_input("Would you like to use the modified body? (y/n): ") - if result.lower() == "y": - body = modified_body - print "Using modified body." - else: - print "Using original body." - - target_ref = pr["base"]["ref"] - user_login = pr["user"]["login"] - base_ref = pr["head"]["ref"] - pr_repo_desc = "%s/%s" % (user_login, base_ref) - - # append pr num to the github issues - we need to attach label and milestone to them - github_issue_ids.append(pr_num) - - # - # 4. attach milestone, area, type and release to github issues - # - - # get issue labels - issue_labels = get_github_issue_labels(pr_num) - # ask for fix milestone, area and type - fix_milestone, fix_milestone_number, fix_areas, fix_types = \ - ask_updates_for_github_issues(milestones, labels, issue_labels, target_ref == "master") - # update issues with fix milestone, are and type - other_labels = filter(lambda x: not x.startswith("area"), issue_labels) - all_issue_labels = update_github_issues( \ - github_issue_ids, \ - fix_milestone_number, \ - fix_milestone, \ - fix_areas, \ - fix_types, \ - other_labels) - # add the pr author to the assignees - add_assignees_to_github_issues(github_issue_ids, [ user_login ]) - - if target_ref != "master": - branch_version = target_ref.split('-')[1] - # add releases - fix_releases = ask_release_for_github_issues(branch_version, labels) - if len(fix_releases) > 0: - all_issue_labels = add_release_to_github_issues(github_issue_ids, all_issue_labels, fix_releases[0]) - - # - # 5. Process the merge - # - - # Merged pull requests don't appear as merged in the GitHub API; - # Instead, they're closed by asfgit. - merge_commits = \ - [e for e in pr_events if e["actor"]["login"] == "asfgit" and e["event"] == "closed"] - - if merge_commits: - merge_hash = merge_commits[0]["commit_id"] - message = get_json("%s/commits/%s" % (GITHUB_API_BASE, merge_hash))["commit"]["message"] - - print "Pull request %s has already been merged, assuming you want to backport" % pr_num - commit_is_downloaded = run_cmd(['git', 'rev-parse', '--quiet', '--verify', - "%s^{commit}" % merge_hash]).strip() != "" - if not commit_is_downloaded: - fail("Couldn't find any merge commit for #%s, you may need to update HEAD." % pr_num) - - print "Found commit %s:\n%s" % (merge_hash, message) - - cherry_pick(pr_num, merge_hash, ask_for_branch(latest_branch)) - sys.exit(0) - - if not bool(pr["mergeable"]): - msg = "Pull request %s is not mergeable in its current form.\n" % pr_num + \ - "Continue? (experts only!)" - continue_maybe(msg) - - print ("\n=== Pull Request #%s ===" % pr_num) - print ("PR title\t%s\nCommit title\t%s\nSource\t\t%s\nTarget\t\t%s\nURL\t\t%s" % ( - pr_title, commit_title, pr_repo_desc, target_ref, url)) - continue_maybe("Proceed with merging pull request #%s?" % pr_num) - - merged_refs = [target_ref] - # proceed with the merge - merge_hash, merge_commit_log = merge_pr(pr_num, target_ref, commit_title, body, pr_reviewers, pr_repo_desc) - - pick_prompt = "Would you like to pick %s into another branch?" % merge_hash - while raw_input("\n%s (y/n): " % pick_prompt).lower() == "y": - pick_ref = ask_for_branch(latest_branch) - branch_version = pick_ref.split('-')[1] - # add releases - fix_releases = ask_release_for_github_issues(branch_version, labels) - if len(fix_releases) > 0: - all_issue_labels = add_release_to_github_issues(github_issue_ids, all_issue_labels, fix_releases[0]) - merged_refs = merged_refs + [cherry_pick(pr_num, merge_hash, pick_ref)] - - if JIRA_IMPORTED: - if JIRA_USERNAME and JIRA_PASSWORD: - jira_comment = '''Issue resolved by merging pull request %s - [%s/%s] - - {noformat} - %s - {noformat} - ''' % (pr_num, GITHUB_BASE, pr_num, merge_commit_log) - resolve_jira_issues(commit_title, merged_refs, jira_comment, target_ref) - else: - print "JIRA_USERNAME and JIRA_PASSWORD not set" - print "Exiting without trying to close the associated JIRA." - else: - print "Could not find jira-python library. Run 'sudo pip install jira' to install." - print "Exiting without trying to close the associated JIRA." - -if __name__ == "__main__": - import doctest - (failure_count, test_count) = doctest.testmod() - if (failure_count): - exit(-1) - - main() diff --git a/dev/bk-merge-pr3.py b/dev/bk-merge-pr3.py new file mode 100755 index 00000000000..fdf10a02db7 --- /dev/null +++ b/dev/bk-merge-pr3.py @@ -0,0 +1,741 @@ +#!/usr/bin/env python + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Utility for creating well-formed pull request merges and pushing them to Apache. This script is a modified version +# of the one created by the Spark project (https://github.com/apache/spark/blob/master/dev/merge_spark_pr.py). +# +# Usage: ./bk-merge-pr.py (see config env vars below) +# +# This utility assumes you already have local a bookkeeper git folder and that you +# have added remotes corresponding to the github apache bookkeeper repo. + +import json +import os +import re +import subprocess +import sys +from urllib.request import urlopen, Request +from urllib.error import HTTPError + +PROJECT_NAME = "bookkeeper" + +CAPITALIZED_PROJECT_NAME = PROJECT_NAME.upper() +GITHUB_ISSUES_NAME = "issue".upper() + +# Location of the local git repository +REPO_HOME = os.environ.get("%s_HOME" % CAPITALIZED_PROJECT_NAME, os.getcwd()) +# Remote name which points to the GitHub site +PR_REMOTE_NAME = os.environ.get("PR_REMOTE_NAME", "apache") +# Github API page size +GITHUB_PAGE_SIZE = os.environ.get("GH_PAGE_SIZE", "100") +# OAuth key used for issuing requests against the GitHub API. If this is not defined, then requests +# will be unauthenticated. You should only need to configure this if you find yourself regularly +# exceeding your IP's unauthenticated request rate limit. You can create an OAuth key at +# https://github.com/settings/tokens. This script only requires the "public_repo" scope. +GITHUB_OAUTH_KEY = os.environ.get("GITHUB_OAUTH_KEY") + +GITHUB_USER = os.environ.get("GITHUB_USER", "apache") +GITHUB_BASE = "https://github.com/%s/%s/pull" % (GITHUB_USER, PROJECT_NAME) +GITHUB_API_URL = "https://api.github.com" +GITHUB_API_BASE = "%s/repos/%s/%s" % (GITHUB_API_URL, GITHUB_USER, PROJECT_NAME) +# Prefix added to temporary branches +TEMP_BRANCH_PREFIX = "PR_TOOL" +RELEASE_BRANCH_PREFIX = "branch-" + +DEFAULT_FIX_VERSION = os.environ.get("DEFAULT_FIX_VERSION", "0.9.1.0") + +def get_json(url, preview_api = False): + try: + request = Request(url) + if GITHUB_OAUTH_KEY: + request.add_header('Authorization', 'token %s' % GITHUB_OAUTH_KEY) + if preview_api: + request.add_header('Accept', 'application/vnd.github.black-cat-preview+json') + return json.loads(urlopen(request).read()) + except HTTPError as e: + if "X-RateLimit-Remaining" in e.headers and e.headers["X-RateLimit-Remaining"] == '0': + print("Exceeded the GitHub API rate limit; see the instructions in " + \ + "bk-merge-pr.py to configure an OAuth token for making authenticated " + \ + "GitHub requests.") + else: + print("Unable to fetch URL, exiting: %s" % url) + sys.exit(-1) + +def post_json(url, data): + try: + request = Request(url, data.encode(encoding='utf-8'), { 'Content-Type': 'application/json' }) + request.add_header('Authorization', 'token %s' % GITHUB_OAUTH_KEY) + return json.loads(urlopen(request).read()) + except HTTPError as e: + if "X-RateLimit-Remaining" in e.headers and e.headers["X-RateLimit-Remaining"] == '0': + print("Exceeded the GitHub API rate limit; see the instructions in " + \ + "bk-merge-pr.py to configure an OAuth token for making authenticated " + \ + "GitHub requests.") + else: + print("Unable to fetch URL, exiting: %s - %s" % (url, e)) + sys.exit(-1) + +def put_json(url, data): + try: + request = Request(url, data.encode(encoding='utf-8'), { 'Content-Type': 'application/json' }) + request.get_method = lambda: 'PUT' + request.add_header('Authorization', 'token %s' % GITHUB_OAUTH_KEY) + return json.loads(urlopen(request).read()) + except HTTPError as e: + if "X-RateLimit-Remaining" in e.headers and e.headers["X-RateLimit-Remaining"] == '0': + print("Exceeded the GitHub API rate limit; see the instructions in " + \ + "bk-merge-pr.py to configure an OAuth token for making authenticated " + \ + "GitHub requests.") + else: + print("Unable to fetch URL, exiting: %s - %s" % (url, e)) + print(e) + sys.exit(-1) + + +def fail(msg): + print(msg) + clean_up() + sys.exit(-1) + + +def run_cmd(cmd): + print(cmd) + if isinstance(cmd, list): + return subprocess.check_output(cmd).decode(encoding='utf-8') + else: + return subprocess.check_output(cmd.split(" ")).decode(encoding='utf-8') + + +def continue_maybe(prompt): + result = input("\n%s (y/n): " % prompt) + if result.lower() != "y": + fail("Okay, exiting") + +def clean_up(): + if original_head != get_current_branch(): + print("Restoring head pointer to %s" % original_head) + run_cmd("git checkout %s" % original_head) + + branches = run_cmd("git branch").replace(" ", "").split("\n") + + for branch in list(filter(lambda x: x.startswith(TEMP_BRANCH_PREFIX), branches)): + print("Deleting local branch %s" % branch) + run_cmd("git branch -D %s" % branch) + +def get_current_branch(): + return run_cmd("git rev-parse --abbrev-ref HEAD").replace("\n", "") + +def get_milestones(): + return get_json("https://api.github.com/repos/%s/%s/milestones?state=open&sort=due_on&direction=asc" % (GITHUB_USER, PROJECT_NAME)) + +def get_all_labels(): + collected_labels = []; + page = 1; + while True: + url = "https://api.github.com/repos/%s/%s/labels?per_page=%s&page=%s" % (GITHUB_USER, PROJECT_NAME, GITHUB_PAGE_SIZE, page); + result = get_json(url); + parsed = list(map(lambda x: x['name'], result)) + collected_labels = collected_labels + parsed + page = page + 1; + if len(parsed) == 0: + break + return collected_labels; + +# merge the requested PR and return the merge hash +def merge_pr(pr_num, target_ref, title, body, default_pr_reviewers, pr_repo_desc): + pr_branch_name = "%s_MERGE_PR_%s" % (TEMP_BRANCH_PREFIX, pr_num) + run_cmd("git fetch %s pull/%s/head:%s" % (PR_REMOTE_NAME, pr_num, pr_branch_name)) + + reviewers = input("Enter reviewers [%s]: " % default_pr_reviewers).strip() + if reviewers == '': + reviewers = default_pr_reviewers + + commits = run_cmd(['git', 'log', 'HEAD..%s' % pr_branch_name, + '--pretty=format:%h [%an] %s']).split("\n") + + if len(commits) > 1: + result = input("List pull request commits in squashed commit message? (y/n): ") + if result.lower() == "y": + should_list_commits = True + else: + should_list_commits = False + else: + should_list_commits = False + + merge_message_flags = [] + + if body is not None: + # We remove @ symbols from the body to avoid triggering e-mails + # to people every time someone creates a public fork of the project. + merge_message_flags += [body.replace("@", "")] + + if (reviewers != ""): + merge_message_flags += ["Reviewers: %s" % reviewers] + + # The string "Closes #%s" string is required for GitHub to correctly close the PR + close_line = "This closes #%s from %s" % (pr_num, pr_repo_desc) + # Find the github issues to close + github_issues = re.findall("#[0-9]{3,6}", title) + + if len(github_issues) != 0: + for issue_id in github_issues: + close_line += ", closes %s" % (issue_id) + + if should_list_commits: + close_line += " and squashes the following commits:" + merge_message_flags += [close_line] + + if should_list_commits: + merge_message_flags += ["\n".join(commits)] + + pr_sha = run_cmd("git rev-parse %s" % pr_branch_name).rstrip() + + merge_url = get_github_issue_merge_url(pr_num) + data = json.dumps({ + 'commit_title': title, + 'commit_message': "\n\n".join(merge_message_flags), + 'sha': pr_sha, + 'merge_method': 'squash' + }, indent = 4) + + continue_maybe("Merge Pull Request (%s) with following details:\n%s" % ( + pr_num, data)) + + resp = put_json(merge_url, data) + + merge_hash = resp['sha'] + merge_log = title + '\n' + "\n\n".join(merge_message_flags) + clean_up() + print("Pull request #%s merged!" % pr_num) + print("Merge hash: %s" % merge_hash) + return merge_hash, merge_log + +def ask_for_branch(default_branch): + pick_ref = input("Enter a branch name [%s]: " % default_branch) + if pick_ref == "": + pick_ref = default_branch + return pick_ref + +def cherry_pick(pr_num, merge_hash, pick_ref): + pick_branch_name = "%s_PICK_PR_%s_%s" % (TEMP_BRANCH_PREFIX, pr_num, pick_ref.upper()) + + run_cmd("git fetch %s %s:%s" % (PR_REMOTE_NAME, pick_ref, pick_branch_name)) + run_cmd("git checkout %s" % pick_branch_name) + + try: + run_cmd("git cherry-pick -sx %s" % merge_hash) + except Exception as e: + msg = "Error cherry-picking: %s\nWould you like to manually fix-up this merge?" % e + continue_maybe(msg) + msg = "Okay, please fix any conflicts and finish the cherry-pick. Finished?" + continue_maybe(msg) + + continue_maybe("Pick complete (local ref %s). Push to %s?" % ( + pick_branch_name, PR_REMOTE_NAME)) + + try: + run_cmd('git push %s %s:%s' % (PR_REMOTE_NAME, pick_branch_name, pick_ref)) + except Exception as e: + clean_up() + fail("Exception while pushing: %s" % e) + + pick_hash = run_cmd("git rev-parse %s" % pick_branch_name)[:8] + clean_up() + + print("Pull request #%s picked into %s!" % (pr_num, pick_ref)) + print("Pick hash: %s" % pick_hash) + return pick_ref + + +def fix_version_from_branch(branch, versions, target_ref): + # Note: Assumes this is a sorted (newest->oldest) list of un-released versions + if branch == target_ref: + versions = list(filter(lambda x: x == DEFAULT_FIX_VERSION, versions)) + if len(versions) > 0: + return versions[0] + else: + return None + else: + versions = list(filter(lambda x: x.startswith(branch), versions)) + if len(versions) > 0: + return versions[-1] + else: + return None + +def standardize_issue_ref(text): + """ + Standardize the github reference commit message prefix to "Issue #XXX: Issue" + + 'ISSUE #376: Script for generating patch for reviews' + """ + github_issue_refs = [] + github_issue_ids = [] + components = [] + + # Extract Github Issue ref(s) + pattern = re.compile(r'(%s[-\s]*([0-9]{3,6}))+' % GITHUB_ISSUES_NAME, re.IGNORECASE) + for ref in pattern.findall(text): + # Add brackets, replace spaces or a dash with ' #', & convert to uppercase + github_issue_refs.append(re.sub(r'[-\s]+', ' #', ref[0].upper())) + text = text.replace(ref[0], '') + github_issue_ids.append(ref[1].upper()) + + # Extract project name component(s): + # Look for alphanumeric chars, spaces, dashes, periods, and/or commas + pattern = re.compile(r'(\[[\w\s,-\.]+\])', re.IGNORECASE) + for component in pattern.findall(text): + components.append(component.upper()) + text = text.replace(component, '') + + # Cleanup any remaining symbols: + pattern = re.compile(r'^\W+(.*)', re.IGNORECASE) + if (pattern.search(text) is not None): + text = pattern.search(text).groups()[0] + + # Assemble full text (github ref(s), module(s), remaining text) + prefix = '' + github_prefix = ' '.join(github_issue_refs).strip() + if github_prefix: + prefix = github_prefix + ": " + clean_text = prefix + ' '.join(components).strip() + " " + text.strip() + + # Replace multiple spaces with a single space, e.g. if no refs and/or components were included + clean_text = re.sub(r'\s+', ' ', clean_text.strip()) + + return clean_text, github_issue_ids + +def get_reviewers(pr_num): + """ + Get a candidate list of reviewers that have commented on the PR with '+1' or 'LGTM' + """ + approval_msgs = ['+1', 'lgtm'] + + pr_comments = get_json("%s/issues/%s/comments" % (GITHUB_API_BASE, pr_num)) + + reviewers_ids = set() + for comment in pr_comments: + for approval_msg in approval_msgs: + if approval_msg in comment['body'].lower(): + reviewers_ids.add(comment['user']['login']) + + approval_review_states = ['approved'] + pr_reviews = get_json('{0}/pulls/{1}/reviews?per_page=100'.format(GITHUB_API_BASE, pr_num), True) + for review in pr_reviews: + for approval_state in approval_review_states: + if approval_state in review['state'].lower(): + reviewers_ids.add(review['user']['login']) + + if len(reviewers_ids) == 0: + fail("No approvals found in this pull request") + + reviewers_emails = [] + for reviewer_id in reviewers_ids: + username = None + useremail = None + user = get_json("%s/users/%s" % (GITHUB_API_URL, reviewer_id)) + if user['email']: + useremail = user['email'].strip() + if user['name']: + username = user['name'].strip() + if username is None: + continue + reviewers_emails.append('{0} <{1}>'.format(username, useremail)) + return ', '.join(reviewers_emails) + +def check_ci_status(pr): + ci_status = get_json("%s/commits/%s/status" % (GITHUB_API_BASE, pr["head"]["sha"])) + state = ci_status["state"] + if state != "success": + comments = get_json(pr["comments_url"]) + ignore_ci_comments = [c for c in comments if c["body"].upper() == "IGNORE CI"] + if len(ignore_ci_comments) > 0: + print("\n\nWARNING: The PR has not passed CI (state is %s)" % (state) \ + + ", but this has been overridden by %s. \n" % (ignore_ci_comments[0]["user"]["login"]) \ + + "Proceed at your own peril!\n\n") + else: + check_individual_ci_status(ci_status, comments) + +def is_check_passed(check): + passed = check["state"] == "success" + if (not passed) and is_jenkins_check(check): + try: + return is_jenkins_passed(check["target_url"]) + except: + fail("failed to fetch the jenkins build status for check '%s'.\nPlease manually check its build status at %s" % (check["context"], check["target_url"])) + return passed + +def is_jenkins_check(check): + return check["context"].startswith("Jenkins:") + +def is_jenkins_passed(url): + jenkins_status = get_json("%sapi/json?tree=result" % (url)) + return "SUCCESS" == jenkins_status['result'] + +def is_integration_test_check(check): + return check["context"] == u"Jenkins: Integration Tests" + +def check_individual_ci_status(ci_status, comments): + ci_failures = [] + ci_integration_test_failures = [] + for status in ci_status["statuses"]: + is_passed = is_check_passed(status) + is_integration_test = is_integration_test_check(status) + + if is_integration_test and (not is_passed): + ci_integration_test_failures.append(status) + else: + if not is_passed: + ci_failures.append(status) + + if len(ci_integration_test_failures) != 0 and len(ci_failures) == 0: + # all ci passed except integration tests + ignore_it_ci_comments = [c for c in comments if c["body"].upper() == "IGNORE IT CI"] + if len(ignore_it_ci_comments) > 0: + print("\n\nWARNING: The PR has not passed integration tests CI" \ + + ", but this has been overridden by %s. \n" % (ignore_it_ci_comments[0]["user"]["login"]) \ + + "Proceed at your own peril!\n\n") + else: + fail("The PR has not passed integration tests CI") + elif len(ci_failures) != 0 or len(ci_integration_test_failures) != 0: + fail_msg = "The PR has not passed CI:\n" + print("") + for status in ci_failures: + fail_msg += "\t %s = %s\n" % (status["context"], status["state"]) + for status in ci_integration_test_failures: + fail_msg += "\t %s = %s\n" % (status["context"], status["state"]) + fail(fail_msg) + +def ask_release_for_github_issues(branch, labels): + print("=== Add release to github issues ===") + while True: + fix_releases = ask_for_labels("release/%s" % branch, labels, []) + if len(fix_releases) != 1: + print("Please choose only one release to add for branch '%s'." % branch) + continue + + print("=== Apply following releases to github issues ==") + print("Fix Releases: %s" % ', '.join(fix_releases)) + print("") + + if input("Would you like to add these releases to github issues? (y/n): ") == "y": + break + return fix_releases + +def ask_updates_for_github_issues(milestones, labels, issue_labels, milestone_required): + while True: + fix_milestone, fix_milestone_number, fix_areas, fix_types = \ + get_updates_for_github_issues(milestones, labels, issue_labels, milestone_required) + + print("=== Apply following milestone, area, type to github issues ==") + print("Fix Types: %s" % ', '.join(fix_types)) + print("Fix Areas: %s" % ', '.join(fix_areas)) + if milestone_required: + print("Fix Milestone: %s" % fix_milestone) + print("") + + if input("Would you like to update github issues with these labels? (y/n): ") == "y": + break + + return fix_milestone, fix_milestone_number, fix_areas, fix_types + +def get_updates_for_github_issues(milestones, labels, issue_labels, milestone_required): + # get milestone + fix_milestone = "" + fix_milestone_number = "" + if milestone_required: + default_milestone_name = milestones[0]['title'] + milestone_list = list(map(lambda x: x['title'], milestones)) + milestone_map = dict((milestone['title'], milestone['number']) for milestone in milestones) + while True: + fix_milestone = input("Choose fix milestone : options are [%s] - default: [%s]: " % (', '.join(milestone_list).strip(), default_milestone_name)) + fix_milestone = fix_milestone.strip() + if fix_milestone == "": + fix_milestone = default_milestone_name + break + elif fix_milestone in milestone_map: + break + else: + print("Invalid milestone: %s." % fix_milestone) + fix_milestone_number = milestone_map[fix_milestone] + + # get area + fix_areas = ask_for_labels("area/", labels, issue_labels) + + # get types + fix_types = ask_for_labels("type/", labels, issue_labels) + + return fix_milestone, fix_milestone_number, fix_areas, fix_types + +def ask_for_labels(prefix, labels, issue_labels): + issue_filtered_labels = list(map(lambda l: l.split('/')[1], filter(lambda x: x.startswith(prefix), issue_labels))) + filtered_labels = list(map(lambda l: l.split('/')[1], filter(lambda x: x.startswith(prefix), labels))) + while True: + fix_labels = input("Choose label '%s' - options are: [%s] - default: [%s] (comma separated): " + % (prefix, ', '.join(filtered_labels).strip(), ', '.join(issue_filtered_labels).strip())) + if fix_labels == "": + if not issue_filtered_labels: + print("Please specify a '%s' label to close the issue!" % prefix) + continue + else: + fix_labels = issue_filtered_labels + break + fix_labels = fix_labels.replace(" ", "").split(",") + if not fix_labels: + print("Please specify a '%s' label to close the issue!" % prefix) + continue + invalid_label = False + for label in fix_labels: + if label not in filtered_labels: + print("Invalid '%s' label: %s." % (prefix, label)) + invalid_label = True + break + if invalid_label: + continue + else: + break + return fix_labels + +def get_github_issue_url(github_issue_id): + return "https://api.github.com/repos/%s/%s/issues/%s" % (GITHUB_USER, PROJECT_NAME, github_issue_id) + +def get_github_issue_merge_url(github_issue_id): + return "https://api.github.com/repos/%s/%s/pulls/%s/merge" % (GITHUB_USER, PROJECT_NAME, github_issue_id) + +def get_assignees_url(github_issue_id): + return "https://api.github.com/repos/%s/%s/issues/%s/assignees" % (GITHUB_USER, PROJECT_NAME, github_issue_id) + +def get_github_issue_labels(github_issue_id): + url = "https://api.github.com/repos/%s/%s/issues/%s/labels" % (GITHUB_USER, PROJECT_NAME, github_issue_id) + result = get_json(url) + return list(map(lambda x: x["name"], result)) + +def add_release_to_github_issues(github_issue_ids, labels, fix_release): + for github_issue_id in github_issue_ids: + labels = add_release_to_github_issue(github_issue_id, labels, fix_release) + return labels + +def add_release_to_github_issue(github_issue_id, labels, fix_release): + url = get_github_issue_url(github_issue_id) + labels = ["release/%s" % fix_release] + labels + data = json.dumps({ + 'labels': labels + }) + post_json(url, data) + return labels + +def update_github_issue(github_issue_id, fix_milestone_number, fix_milestone, fix_areas, fix_types, other_labels): + url = get_github_issue_url(github_issue_id) + labels = other_labels + list(map(lambda x: ("area/%s" % x), fix_areas)) + labels = labels + list(map(lambda x: ("type/%s" % x), fix_types)) + if fix_milestone_number == '': + data = json.dumps({ + 'labels': labels, + }) + else: + labels.append("release/%s" % fix_milestone) + data = json.dumps({ + 'milestone': int(fix_milestone_number), + 'labels': labels, + }) + + post_json(url, data) + return labels + +def update_github_issues(github_issue_ids, fix_milestone_number, fix_milestone, fix_areas, fix_types, other_labels): + for github_issue_id in github_issue_ids: + labels = update_github_issue(github_issue_id, fix_milestone_number, fix_milestone, fix_areas, fix_types, other_labels) + return labels + +def add_assignees_to_github_issues(github_issue_ids, assignees): + for github_issue_id in github_issue_ids: + add_assignees_to_github_issue(github_issue_id, assignees) + +def add_assignees_to_github_issue(github_issue_id, assignees): + url = get_assignees_url(github_issue_id) + data = json.dumps({ + "assignees": assignees + }) + post_json(url, data) + +def main(): + global original_head + + if not GITHUB_OAUTH_KEY: + print("OAuth key is needed for merging bookkeeper pull requests.") + print("If environment variable 'GITHUB_OAUTH_KEY' is not defined,") + print("then requests will be unauthenticated.") + print("You can create an OAuth key at https://github.com/settings/tokens") + print("and set it to the environment variable 'GITHUB_OAUTH_KEY'.") + print("(This token only needs the 'public_repo' scope permissions)") + exit(-1) + + # 0. get the current state so we can go back + original_head = get_current_branch() + + # 1. retrieve milestones, labels, branches + milestones = get_milestones() + labels = get_all_labels() + branches = get_json("%s/branches" % GITHUB_API_BASE) + branch_names = list(filter(lambda x: x.startswith(RELEASE_BRANCH_PREFIX), [x['name'] for x in branches])) + # Assumes branch names can be sorted lexicographically + latest_branch = sorted(branch_names, reverse=True)[0] + + # 2. retrieve the details for a given pull request + pr_num = input("Which pull request would you like to merge? (e.g. 34): ") + pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num)) + pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num)) + pr_reviewers = get_reviewers(pr_num) + check_ci_status(pr) + + url = pr["url"] + + # 3. repair the title for commit message + pr_title = pr["title"] + commit_title = input("Commit title [%s]: " % pr_title) + if commit_title == "": + commit_title = pr_title + + # Decide whether to use the modified title or not + modified_title, github_issue_ids = standardize_issue_ref(commit_title) + if modified_title != commit_title: + print("I've re-written the title as follows to match the standard format:") + print("Original: %s" % commit_title) + print("Modified: %s" % modified_title) + result = input("Would you like to use the modified title? (y/n): ") + if result.lower() == "y": + commit_title = modified_title + print("Using modified title:") + else: + print("Using original title:") + print(commit_title) + + body = pr["body"] + modified_body = "" + for line in body.split('\n'): + if line.startswith('>'): + continue + modified_body = modified_body + line + "\n" + modified_body = modified_body[:-1] + if modified_body != body: + print("I've re-written the body as follows to match the standard formats:") + print("Original: ") + print(body) + print("Modified: ") + print(modified_body) + result = input("Would you like to use the modified body? (y/n): ") + if result.lower() == "y": + body = modified_body + print("Using modified body.") + else: + print("Using original body.") + + target_ref = pr["base"]["ref"] + user_login = pr["user"]["login"] + base_ref = pr["head"]["ref"] + pr_repo_desc = "%s/%s" % (user_login, base_ref) + + # append pr num to the github issues - we need to attach label and milestone to them + github_issue_ids.append(pr_num) + + # + # 4. attach milestone, area, type and release to github issues + # + + # get issue labels + issue_labels = get_github_issue_labels(pr_num) + # ask for fix milestone, area and type + fix_milestone, fix_milestone_number, fix_areas, fix_types = \ + ask_updates_for_github_issues(milestones, labels, issue_labels, target_ref == "master") + # update issues with fix milestone, are and type + other_labels = list(filter(lambda x: not x.startswith("area"), issue_labels)) + all_issue_labels = update_github_issues( \ + github_issue_ids, \ + fix_milestone_number, \ + fix_milestone, \ + fix_areas, \ + fix_types, \ + other_labels) + # add the pr author to the assignees + add_assignees_to_github_issues(github_issue_ids, [ user_login ]) + + if target_ref != "master": + branch_version = target_ref.split('-')[1] + # add releases + fix_releases = ask_release_for_github_issues(branch_version, labels) + if len(fix_releases) > 0: + all_issue_labels = add_release_to_github_issues(github_issue_ids, all_issue_labels, fix_releases[0]) + + # + # 5. Process the merge + # + + # Merged pull requests don't appear as merged in the GitHub API; + # Instead, they're closed by asfgit. + merge_commits = \ + [e for e in pr_events if e["actor"]["login"] == "asfgit" and e["event"] == "closed"] + + if merge_commits: + merge_hash = merge_commits[0]["commit_id"] + message = get_json("%s/commits/%s" % (GITHUB_API_BASE, merge_hash))["commit"]["message"] + + print("Pull request %s has already been merged, assuming you want to backport" % pr_num) + commit_is_downloaded = run_cmd(['git', 'rev-parse', '--quiet', '--verify', + "%s^{commit}" % merge_hash]).strip() != "" + if not commit_is_downloaded: + fail("Couldn't find any merge commit for #%s, you may need to update HEAD." % pr_num) + + print("Found commit %s:\n%s" % (merge_hash, message)) + + cherry_pick(pr_num, merge_hash, ask_for_branch(latest_branch)) + sys.exit(0) + + if not bool(pr["mergeable"]): + msg = "Pull request %s is not mergeable in its current form.\n" % pr_num + \ + "You may need to rebase the PR." + fail(msg) + + print("\n=== Pull Request #%s ===" % pr_num) + print("PR title\t%s\nCommit title\t%s\nSource\t\t%s\nTarget\t\t%s\nURL\t\t%s" % ( + pr_title, commit_title, pr_repo_desc, target_ref, url)) + continue_maybe("Proceed with merging pull request #%s?" % pr_num) + + merged_refs = [target_ref] + # proceed with the merge + merge_hash, merge_commit_log = merge_pr(pr_num, target_ref, commit_title, body, pr_reviewers, pr_repo_desc) + + # once the pr is merged, refresh the local repo + run_cmd("git fetch %s" % PR_REMOTE_NAME) + + pick_prompt = "Would you like to pick %s into another branch?" % merge_hash + while input("\n%s (y/n): " % pick_prompt).lower() == "y": + pick_ref = ask_for_branch(latest_branch) + branch_version = pick_ref.split('-')[1] + # add releases + fix_releases = ask_release_for_github_issues(branch_version, labels) + if len(fix_releases) > 0: + all_issue_labels = add_release_to_github_issues(github_issue_ids, all_issue_labels, fix_releases[0]) + merged_refs = merged_refs + [cherry_pick(pr_num, merge_hash, pick_ref)] + +if __name__ == "__main__": + import doctest + (failure_count, test_count) = doctest.testmod() + if (failure_count): + exit(-1) + + main() diff --git a/dev/check-all-licenses b/dev/check-all-licenses new file mode 100755 index 00000000000..1f548c3cef1 --- /dev/null +++ b/dev/check-all-licenses @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Script to check licenses on a binary tarball. +# It extracts the list of bundled jars, the NOTICE, and the LICENSE +# files. It checked that every non-bk jar bundled is mentioned in the +# LICENSE file. It checked that all jar files mentioned in NOTICE and +# LICENSE are actually bundled. + +# all error fatal +set -e -x + +HERE=$(dirname $0) +BOOKKEEPER_DIST=$HERE/../bookkeeper-dist +$HERE/check-binary-license $BOOKKEEPER_DIST/server/target/bookkeeper-server-*-bin.tar.gz +$HERE/check-binary-license $BOOKKEEPER_DIST/all/target/bookkeeper-all-*-bin.tar.gz +$HERE/check-binary-license $BOOKKEEPER_DIST/bkctl/target/bkctl-*-bin.tar.gz + diff --git a/dev/check-binary-license b/dev/check-binary-license index 04f8e7f858c..27058a86e6e 100755 --- a/dev/check-binary-license +++ b/dev/check-binary-license @@ -43,7 +43,6 @@ LICENSEPATH=$(${TAR} -tf $TARBALL | awk '/^[^\/]*\/LICENSE/') LICENSE=$(${TAR} -O -xf $TARBALL "$LICENSEPATH") NOTICEPATH=$(${TAR} -tf $TARBALL | awk '/^[^\/]*\/NOTICE/') NOTICE=$(${TAR} -O -xf $TARBALL $NOTICEPATH) - LICENSEJARS=$(echo "$LICENSE" | sed -nE 's!.*lib/(.*\.jar).*!\1!gp') NOTICEJARS=$(echo "$NOTICE" | sed -nE 's!.*lib/(.*\.jar).*!\1!gp') @@ -70,6 +69,21 @@ for J in $JARS; do continue fi + echo $J | grep -q "bookkeeper-dist-server" + if [ $? == 0 ]; then + continue + fi + + echo $J | grep -q "bookkeeper-dist-all" + if [ $? == 0 ]; then + continue + fi + + echo $J | grep -q "bookkeeper-dist-bkctl" + if [ $? == 0 ]; then + continue + fi + echo "$LICENSE" | grep -q $J if [ $? != 0 ]; then echo $J unaccounted for in LICENSE @@ -115,8 +129,8 @@ done if [ $EXIT != 0 ]; then echo - echo It looks like there are issues with the LICENSE/NOTICE. - echo See http://bookkeeper.apache.org/community/licensing for details on how to fix. + echo It looks like there are issues with the LICENSE/NOTICE in $TARBALL. + echo See https://bookkeeper.apache.org/community/licensing for details on how to fix. fi exit $EXIT diff --git a/dev/ci-tool b/dev/ci-tool new file mode 100755 index 00000000000..0f26955f643 --- /dev/null +++ b/dev/ci-tool @@ -0,0 +1,138 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# shell function library for Bookkeeper CI builds + +# lists all available functions in this tool +function ci_list_functions() { + declare -F | awk '{print $NF}' | sort | egrep '^ci_' | sed 's/^ci_//' +} + +# prints thread dumps for all running JVMs +# used in CI when a job gets cancelled because of a job timeout +function ci_print_thread_dumps() { + for java_pid in $(jps -q -J-XX:+PerfDisableSharedMem); do + echo "----------------------- pid $java_pid -----------------------" + cat /proc/$java_pid/cmdline | xargs -0 echo + jcmd $java_pid Thread.print -l + jcmd $java_pid GC.heap_info + done + return 0 +} + +# copies test reports into test-reports and surefire-reports directory +# subsequent runs of tests might overwrite previous reports. This ensures that all test runs get reported. +function ci_move_test_reports() { + ( + if [ -n "${GITHUB_WORKSPACE}" ]; then + cd "${GITHUB_WORKSPACE}" + mkdir -p test-reports + mkdir -p surefire-reports + fi + # aggregate all junit xml reports in a single directory + if [ -d test-reports ]; then + # copy test reports to single directory, rename duplicates + find . -path '*/target/surefire-reports/junitreports/TEST-*.xml' -print0 | xargs -0 -r -n 1 mv -t test-reports --backup=numbered + # rename possible duplicates to have ".xml" extension + ( + for f in test-reports/*~; do + mv -- "$f" "${f}.xml" + done 2>/dev/null + ) || true + fi + # aggregate all surefire-reports in a single directory + if [ -d surefire-reports ]; then + ( + find . -type d -path '*/target/surefire-reports' -not -path './surefire-reports/*' | + while IFS=$'\n' read -r directory; do + echo "Copying reports from $directory" + target_dir="surefire-reports/${directory}" + if [ -d "$target_dir" ]; then + # rotate backup directory names *~3 -> *~2, *~2 -> *~3, *~1 -> *~2, ... + ( command ls -vr1d "${target_dir}~"* 2> /dev/null | awk '{print "mv "$0" "substr($0,0,length-1)substr($0,length,1)+1}' | sh ) || true + # backup existing target directory, these are the results of the previous test run + mv "$target_dir" "${target_dir}~1" + fi + # copy files + cp -R --parents "$directory" surefire-reports + # remove the original directory + rm -rf "$directory" + done + ) + fi + ) +} + +# Finds fastest up-to-date ubuntu mirror based on download speed +function ci_find_fast_ubuntu_mirror() { + local ubuntu_release=${1:-"$(lsb_release -c 2>/dev/null | cut -f2 || echo "jammy")"} + local ubuntu_arch=${2:-"$(dpkg --print-architecture 2>/dev/null || echo "amd64")"} + { + # choose mirrors that are up-to-date by checking the Last-Modified header for + { + # randomly choose up to 10 mirrors using http:// protocol + # (https isn't supported in docker containers that don't have ca-certificates installed) + curl -s http://mirrors.ubuntu.com/mirrors.txt | grep '^http://' | shuf -n 10 + # also consider Azure's Ubuntu mirror + echo http://azure.archive.ubuntu.com/ubuntu/ + } | xargs -I {} sh -c "ubuntu_release=$ubuntu_release ubuntu_arch=$ubuntu_arch;"'echo "$(curl -m 5 -sI {}dists/${ubuntu_release}/Contents-${ubuntu_arch}.gz|sed s/\\r\$//|grep Last-Modified|awk -F": " "{ print \$2 }" | LANG=C date -f- -u +%s)" "{}"' | sort -rg | awk '{ if (NR==1) TS=$1; if ($1 == TS) print $2 }' + } | xargs -I {} sh -c 'echo `curl -r 0-102400 -m 5 -s -w %{speed_download} -o /dev/null {}ls-lR.gz` {}' \ + |sort -g -r |head -1| awk '{ print $2 }' +} + +function ci_pick_ubuntu_mirror() { + echo "Choosing fastest up-to-date ubuntu mirror based on download speed..." + UBUNTU_MIRROR=$(ci_find_fast_ubuntu_mirror) + if [ -z "$UBUNTU_MIRROR" ]; then + # fallback to no mirror + UBUNTU_MIRROR="http://archive.ubuntu.com/ubuntu/" + UBUNTU_SECURITY_MIRROR="http://security.ubuntu.com/ubuntu/" + else + UBUNTU_SECURITY_MIRROR="${UBUNTU_MIRROR}" + fi + echo "Picked '$UBUNTU_MIRROR'." + # set the chosen mirror also in the UBUNTU_MIRROR and UBUNTU_SECURITY_MIRROR environment variables + # that can be used by docker builds + export UBUNTU_MIRROR + export UBUNTU_SECURITY_MIRROR + # make environment variables available for later GitHub Actions steps + if [ -n "$GITHUB_ENV" ]; then + echo "UBUNTU_MIRROR=$UBUNTU_MIRROR" >> $GITHUB_ENV + echo "UBUNTU_SECURITY_MIRROR=$UBUNTU_SECURITY_MIRROR" >> $GITHUB_ENV + fi +} + +if [ -z "$1" ]; then + echo "usage: $0 [ci_tool_function_name]" + echo "Available ci tool functions:" + ci_list_functions + exit 1 +fi +ci_function_name="ci_$1" +shift + +if [[ "$(LC_ALL=C type -t $ci_function_name)" == "function" ]]; then + eval "$ci_function_name" "$@" +else + echo "Invalid ci tool function" + echo "Available ci tool functions:" + ci_list_functions + exit 1 +fi \ No newline at end of file diff --git a/dev/docker/Dockerfile b/dev/docker/Dockerfile index 228c23449c8..a23a363beef 100644 --- a/dev/docker/Dockerfile +++ b/dev/docker/Dockerfile @@ -17,7 +17,7 @@ # under the License. # -FROM maven:3.5.0-jdk-9 +FROM maven:3.9.0-eclipse-temurin-11 RUN apt-get update RUN apt-get install -y g++ cmake diff --git a/dev/docker/run.sh b/dev/docker/run.sh index 235cba786a8..b4b72a7b036 100755 --- a/dev/docker/run.sh +++ b/dev/docker/run.sh @@ -31,7 +31,7 @@ if [ "$(uname -s)" == "Linux" ]; then USER_NAME=${SUDO_USER:=$USER} USER_ID=$(id -u "${USER_NAME}") GROUP_ID=$(id -g "${USER_NAME}") - LOCAL_HOME="/home/${USER_NAME}" + LOCAL_HOME=$(realpath ~) else # boot2docker uid and gid USER_NAME=$USER USER_ID=1000 @@ -63,7 +63,7 @@ docker run -i -t \ --rm=true \ -w ${BOOKKEEPER_ROOT} \ -u "${USER}" \ - -v "${BOOKKEEPER_ROOT}:${BOOKKEEPER_ROOT}" \ + -v "$(realpath $BOOKKEEPER_ROOT):${BOOKKEEPER_ROOT}" \ -v "${LOCAL_HOME}:/home/${USER_NAME}" \ ${IMAGE_NAME}-${USER_NAME} \ bash -c "${CMD}" diff --git a/dev/publish-docker-images.sh b/dev/publish-docker-images.sh index 66d3e805e5f..7ed40bbb99f 100755 --- a/dev/publish-docker-images.sh +++ b/dev/publish-docker-images.sh @@ -45,7 +45,7 @@ DOCKER_ORG="${DOCKER_ORG:-apachebookkeeper}" docker login ${DOCKER_REGISTRY} -u="${DOCKER_USER}" -p="${DOCKER_PASSWORD}" if [ $? -ne 0 ]; then - echo "Failed to loging to Docker Hub" + echo "Failed to login to Docker Hub" exit 1 fi diff --git a/dev/release/000-run-docker.sh b/dev/release/000-run-docker.sh index 255a6a210ac..845b17ec7e1 100755 --- a/dev/release/000-run-docker.sh +++ b/dev/release/000-run-docker.sh @@ -33,7 +33,7 @@ export IMAGE_NAME="bookkeeper-release-build" pushd ${SCRIPT_DIR} -docker build --rm=true -t ${IMAGE_NAME} . +docker build --platform linux/amd64 --rm=true -t ${IMAGE_NAME} . popd @@ -41,24 +41,24 @@ if [ "$(uname -s)" == "Linux" ]; then USER_NAME=${SUDO_USER:=$USER} USER_ID=$(id -u "${USER_NAME}") GROUP_ID=$(id -g "${USER_NAME}") - LOCAL_HOME="/home/${USER_NAME}" else # boot2docker uid and gid USER_NAME=$USER USER_ID=1000 GROUP_ID=50 - LOCAL_HOME="/Users/${USER_NAME}" fi -docker build -t "${IMAGE_NAME}-${USER_NAME}" - < /dev/null; then groupadd --non-unique -g ${GROUP_ID} ${USER_NAME}; fi && \ + if ! getent passwd ${USER_NAME} > /dev/null; then useradd -l -g ${GROUP_ID} -u ${GROUP_ID} -k /root -m ${USER_NAME}; fi && \ + ([ "$(dirname "$HOME")" = "/home" ] || ln -s /home $(dirname "$HOME")) && \ + mkdir -p /gpg && chown ${USER_ID}:${GROUP_ID} /gpg && chmod 700 /gpg ENV HOME /home/${USER_NAME} UserSpecificDocker BOOKKEEPER_ROOT=${SCRIPT_DIR}/../.. -VERSION=`cd $BOOKKEEPER_ROOT && mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | grep -Ev '(^\[|Download\w+:)' | sed 's/^\(.*\)-SNAPSHOT/\1/'` +VERSION=`cd $BOOKKEEPER_ROOT && mvn initialize help:evaluate -Dexpression=project.version -pl . -q -DforceStdout | grep -Ev '(^\[|Download\w+:)' | sed 's/^\(.*\)-SNAPSHOT/\1/'` versions_list=(`echo $VERSION | tr '.' ' '`) major_version=${versions_list[0]} minor_version=${versions_list[1]} @@ -74,7 +74,13 @@ RC_DIR="bookkeeper-${VERSION}-rc${RC_NUM}" RC_TAG="v${VERSION}-rc${RC_NUM}" CMD=" -gpg-agent --daemon --pinentry-program /usr/bin/pinentry --homedir \$HOME/.gnupg --use-standard-socket +# copy ~/.gnupg to /gpg in the container to workaround issue with permissions +cp -Rdp \$HOME/.gnupg /gpg +# remove any previous sockets +rm -rf /gpg/.gnupg/S.* +# set GNUPGHOME to /gpg/.gnupg +export GNUPGHOME=/gpg/.gnupg +gpg-agent --daemon --pinentry-program /usr/bin/pinentry --allow-loopback-pinentry --default-cache-ttl 3600 echo echo 'Welcome to Apache BookKeeper Release Build Env' echo @@ -94,21 +100,35 @@ echo 'RC_TAG = $RC_TAG' echo echo 'Before executing any release scripts, PLEASE configure your git to cache your github password:' echo +echo ' // take a backup of ~/.gitconfig, remember to restore it after the release process' +echo ' \$ cp ~/.gitconfig ~/.gitconfig.bak.\$(date -I)' +echo ' // remove any previous credential helper configuration' +echo ' \$ git config --global -l --name-only | grep credential | uniq | xargs -i{} git config --global --unset-all {}' +echo ' // fix permission warning with git in docker on MacOS' +echo ' \$ git config --global --add safe.directory $PWD' +echo ' \$ git config --global --add safe.directory \$PWD' echo ' // configure credential helper to cache your github password for 1 hr during the whole release process ' echo ' \$ git config --global credential.helper \"cache --timeout=3600\" ' -echo ' \$ git push apache --dry-run ' +echo ' // in another terminal get a GitHub token to be used as a password for the release process, assuming you are using GitHub CLI.' +echo ' \$ gh auth token ' +echo ' // attempt to push to apache remote to cache your password' +echo ' \$ git push apache HEAD:test --dry-run ' +echo ' // cache gpg password by signing a dummy file' +echo ' \$ echo dummy > /tmp/dummy && gpg -sa /tmp/dummy' echo bash " pushd ${BOOKKEEPER_ROOT} +echo $BOOKKEEPER_ROOT docker run -i -t \ --rm=true \ -w ${BOOKKEEPER_ROOT} \ -u "${USER}" \ - -v "${BOOKKEEPER_ROOT}:${BOOKKEEPER_ROOT}" \ - -v "${LOCAL_HOME}:/home/${USER_NAME}" \ + -v "$BOOKKEEPER_ROOT:${BOOKKEEPER_ROOT}" \ + -v "$(realpath ~/):/home/${USER_NAME}" \ + -e MAVEN_CONFIG=/home/${USER_NAME}/.m2 \ -e VERSION=${VERSION} \ -e MAJOR_VERSION=${MAJOR_VERSION} \ -e NEXT_VERSION=${NEXT_VERSION} \ diff --git a/dev/release/001-release-branch.sh b/dev/release/001-release-branch.sh index 0bff4aa2130..2e48403fc05 100755 --- a/dev/release/001-release-branch.sh +++ b/dev/release/001-release-branch.sh @@ -25,3 +25,4 @@ cd $BK_HOME mvn release:branch \ -DbranchName=${BRANCH_NAME} \ -DdevelopmentVersion=${DEVELOPMENT_VERSION} + -Darguments="-Dmaven.javadoc.skip=true -DskipTests=true" diff --git a/dev/release/002-release-prepare.sh b/dev/release/002-release-prepare.sh index 232fb502aa1..4746dbce052 100755 --- a/dev/release/002-release-prepare.sh +++ b/dev/release/002-release-prepare.sh @@ -26,6 +26,5 @@ mvn release:prepare \ -DreleaseVersion=${VERSION} \ -Dtag=${RC_TAG} \ -DupdateWorkingCopyVersions=false \ - -Darguments="-Dmaven.javadoc.skip=true -DskipTests=true -Dstream" \ - -Dstream \ + -Darguments="-Dmaven.javadoc.skip=true -DskipTests=true" \ -Dresume=true diff --git a/dev/release/003-release-perform.sh b/dev/release/003-release-perform.sh index 2a5b531de13..36f16b56888 100755 --- a/dev/release/003-release-perform.sh +++ b/dev/release/003-release-perform.sh @@ -23,6 +23,5 @@ BK_HOME=`cd $BINDIR/../..;pwd` cd $BK_HOME mvn release:perform \ - -Darguments="-Dmaven.javadoc.skip=true -DskipTests=true -Dstream" \ - -Dstream \ + -Darguments="-Dmaven.javadoc.skip=true -DskipTests=true" \ -Dresume=true diff --git a/dev/release/004-stage-packages.sh b/dev/release/004-stage-packages.sh index 8926ccf9d30..e2e5bbaf7c5 100755 --- a/dev/release/004-stage-packages.sh +++ b/dev/release/004-stage-packages.sh @@ -50,13 +50,17 @@ cp ${SRC_DIR}/bookkeeper-dist/server/target/bookkeeper-server-${VERSION}-bin.tar cp ${SRC_DIR}/bookkeeper-dist/server/target/bookkeeper-server-${VERSION}-bin.tar.gz.asc ${DEST_DIR}/bookkeeper-server-${VERSION}-bin.tar.gz.asc cp ${SRC_DIR}/bookkeeper-dist/all/target/bookkeeper-all-${VERSION}-bin.tar.gz ${DEST_DIR}/bookkeeper-all-${VERSION}-bin.tar.gz cp ${SRC_DIR}/bookkeeper-dist/all/target/bookkeeper-all-${VERSION}-bin.tar.gz.asc ${DEST_DIR}/bookkeeper-all-${VERSION}-bin.tar.gz.asc +cp ${SRC_DIR}/bookkeeper-dist/bkctl/target/bkctl-${VERSION}-bin.tar.gz ${DEST_DIR}/bkctl-${VERSION}-bin.tar.gz +cp ${SRC_DIR}/bookkeeper-dist/bkctl/target/bkctl-${VERSION}-bin.tar.gz.asc ${DEST_DIR}/bkctl-${VERSION}-bin.tar.gz.asc echo "Copied packages." -echo "Generating sha1 files ..." -sha1sum ${DEST_DIR}/bookkeeper-${VERSION}-src.tar.gz > ${DEST_DIR}/bookkeeper-${VERSION}-src.tar.gz.sha1 -sha1sum ${DEST_DIR}/bookkeeper-server-${VERSION}-bin.tar.gz > ${DEST_DIR}/bookkeeper-server-${VERSION}-bin.tar.gz.sha1 -sha1sum ${DEST_DIR}/bookkeeper-all-${VERSION}-bin.tar.gz > ${DEST_DIR}/bookkeeper-all-${VERSION}-bin.tar.gz.sha1 -echo "Generated sha1 files." +echo "Generating sha512 files ..." +cd ${DEST_DIR} +shasum -a 512 bookkeeper-${VERSION}-src.tar.gz > bookkeeper-${VERSION}-src.tar.gz.sha512 +shasum -a 512 bookkeeper-server-${VERSION}-bin.tar.gz > bookkeeper-server-${VERSION}-bin.tar.gz.sha512 +shasum -a 512 bookkeeper-all-${VERSION}-bin.tar.gz > bookkeeper-all-${VERSION}-bin.tar.gz.sha512 +shasum -a 512 bkctl-${VERSION}-bin.tar.gz > bkctl-${VERSION}-bin.tar.gz.sha512 +echo "Generated sha512 files." cd ${DIST_DEV_DIR}/bookkeeper svn add ${RC_DIR} diff --git a/dev/release/Dockerfile b/dev/release/Dockerfile index b99e262552f..23c28954c17 100644 --- a/dev/release/Dockerfile +++ b/dev/release/Dockerfile @@ -17,7 +17,9 @@ # under the License. # -FROM maven:3.5.0-jdk-8 +FROM --platform=linux/amd64 maven:3.9.0-eclipse-temurin-8 +ARG DEBIAN_FRONTEND=noninteractive +RUN apt-get update && \ + apt-get dist-upgrade -y && \ + apt-get install -y g++ cmake gnupg2 vim subversion less zip unzip -RUN apt-get update -RUN apt-get install -y g++ cmake gnupg2 vim subversion diff --git a/dev/stats-doc-gen b/dev/stats-doc-gen new file mode 100755 index 00000000000..ae8969fad1d --- /dev/null +++ b/dev/stats-doc-gen @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +# +#/** +# * Licensed to the Apache Software Foundation (ASF) under one +# * or more contributor license agreements. See the NOTICE file +# * distributed with this work for additional information +# * regarding copyright ownership. The ASF licenses this file +# * to you under the Apache License, Version 2.0 (the +# * "License"); you may not use this file except in compliance +# * with the License. You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ + +# Stats Documentation Generator + +BINDIR=`dirname "$0"` +BK_HOME=`cd ${BINDIR}/..;pwd` + +source ${BK_HOME}/bin/common.sh +source ${BK_HOME}/conf/bk_cli_env.sh + +CLI_MODULE_PATH=stats/utils +CLI_MODULE_NAME="(org.apache.bookkeeper.stats-)?bookkeeper-stats-utils" +CLI_MODULE_HOME=${BK_HOME}/${CLI_MODULE_PATH} + +# find the module jar +CLI_JAR=$(find_module_jar ${CLI_MODULE_PATH} ${CLI_MODULE_NAME}) + +# set up the classpath +CLI_CLASSPATH=$(set_module_classpath ${CLI_MODULE_PATH}) + +DEFAULT_LOG_CONF=${BK_HOME}/conf/log4j2.cli.xml +if [ -z "${CLI_LOG_CONF}" ]; then + CLI_LOG_CONF=${DEFAULT_LOG_CONF} +fi +CLI_LOG_DIR=${CLI_LOG_DIR:-"$BK_HOME/logs"} +CLI_LOG_FILE=${CLI_LOG_FILE:-"stats-doc-gen.log"} +CLI_ROOT_LOG_LEVEL=${CLI_ROOT_LOG_LEVEL:-"INFO"} +CLI_ROOT_LOG_APPENDER=${CLI_ROOT_LOG_APPENDER:-"ROLLINGFILE"} + +# add all dependencies in the classpath +ALL_MODULE_PATH=bookkeeper-dist/all +ALL_MODULE_CLASSPATH=$(set_module_classpath ${ALL_MODULE_PATH}) + +# Configure the classpath +CLI_CLASSPATH="$CLI_JAR:$CLI_CLASSPATH:$CLI_EXTRA_CLASSPATH:$ALL_MODULE_CLASSPATH" +CLI_CLASSPATH="`dirname $CLI_LOG_CONF`:$CLI_CLASSPATH" + +# Build the OPTs +BOOKIE_OPTS=$(build_bookie_opts) +GC_OPTS=$(build_cli_jvm_opts ${CLI_LOG_DIR} "stats-doc-gen-gc.log") +NETTY_OPTS=$(build_netty_opts) +LOGGING_OPTS=$(build_cli_logging_opts ${CLI_LOG_CONF} ${CLI_ROOT_LOG_LEVEL} ${CLI_ROOT_LOG_APPENDER} ${CLI_LOG_DIR} ${CLI_LOG_FILE}) + +OPTS="${OPTS} -cp ${CLI_CLASSPATH} ${BOOKIE_OPTS} ${GC_OPTS} ${NETTY_OPTS} ${LOGGING_OPTS} ${CLI_EXTRA_OPTS}" + +#Change to BK_HOME to support relative paths +cd "$BK_HOME" +echo "running stats-doc-gen, logging to ${CLI_LOG_DIR}/${CLI_LOG_FILE}" +exec ${JAVA} ${OPTS} org.apache.bookkeeper.stats.utils.StatsDocGenerator $@ diff --git a/dev/ticktoc.sh b/dev/ticktoc.sh deleted file mode 100755 index 7903905c46b..00000000000 --- a/dev/ticktoc.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env bash - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Script from https://github.com/travis-ci/travis-ci/issues/4190 - -set -e -set -u - -command=$1 - -# launch command in the background -${command} & - -# ping every second -seconds=0 -limit=40*60 -while kill -0 $! >/dev/null 2>&1; -do - echo -n -e " \b" # never leave evidence - - if [ $seconds == $limit ]; then - break; - fi - - seconds=$((seconds + 1)) - - sleep 1 -done diff --git a/dev/update-snapshot-version.sh b/dev/update-snapshot-version.sh index 59adf078bf9..8d4edbe0daf 100755 --- a/dev/update-snapshot-version.sh +++ b/dev/update-snapshot-version.sh @@ -33,6 +33,6 @@ if [ "x${PUBLISH_GITSHA}" = "xtrue" ]; then NEW_VERSION=$(get_snapshot_version_with_gitsha) echo "Update version from ${OLD_VERSION} to ${NEW_VERSION}" - mvn versions:set -Dstream -DnewVersion=${NEW_VERSION} - mvn versions:commit -Dstream + mvn versions:set -DnewVersion=${NEW_VERSION} + mvn versions:commit fi diff --git a/docker/Dockerfile b/docker/Dockerfile index 8c7f255072f..a58d0da0c45 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -17,40 +17,72 @@ # under the License. # -FROM centos:7 +FROM eclipse-temurin:17 as jre-build + +# Create a custom Java runtime +RUN $JAVA_HOME/bin/jlink \ + --add-modules ALL-MODULE-PATH \ + --strip-debug \ + --no-man-pages \ + --no-header-files \ + --compress=2 \ + --output /javaruntime + +RUN echo networkaddress.cache.ttl=1 >> /javaruntime/conf/security/java.security +RUN echo networkaddress.cache.negative.ttl=1 >> /javaruntime/conf/security/java.security + +FROM ubuntu:22.04 MAINTAINER Apache BookKeeper -ARG BK_VERSION=4.7.1 +ARG TARGETARCH +ARG BK_VERSION=4.17.1 ARG DISTRO_NAME=bookkeeper-server-${BK_VERSION}-bin -ARG GPG_KEY=FD74402C +ARG DISTRO_URL=https://archive.apache.org/dist/bookkeeper/bookkeeper-${BK_VERSION}/${DISTRO_NAME}.tar.gz ENV BOOKIE_PORT=3181 -EXPOSE $BOOKIE_PORT +ENV BOOKIE_HTTP_PORT=8080 +EXPOSE $BOOKIE_PORT $BOOKIE_HTTP_PORT ENV BK_USER=bookkeeper ENV BK_HOME=/opt/bookkeeper -ENV JAVA_HOME=/usr/lib/jvm/jre-1.8.0 - +ENV DEBIAN_FRONTEND=noninteractive +ARG UBUNTU_MIRROR=http://archive.ubuntu.com/ubuntu/ +ARG UBUNTU_SECURITY_MIRROR=http://security.ubuntu.com/ubuntu/ # Download Apache Bookkeeper, untar and clean up RUN set -x \ + && sed -i -e "s|http://archive\.ubuntu\.com/ubuntu/|${UBUNTU_MIRROR:-http://archive.ubuntu.com/ubuntu/}|g" \ + -e "s|http://security\.ubuntu\.com/ubuntu/|${UBUNTU_SECURITY_MIRROR:-http://security.ubuntu.com/ubuntu/}|g" /etc/apt/sources.list \ + && echo 'Acquire::http::Timeout "30";\nAcquire::http::ConnectionAttemptDelayMsec "2000";\nAcquire::https::Timeout "30";\nAcquire::https::ConnectionAttemptDelayMsec "2000";\nAcquire::ftp::Timeout "30";\nAcquire::ftp::ConnectionAttemptDelayMsec "2000";\nAcquire::Retries "15";' > /etc/apt/apt.conf.d/99timeout_and_retries \ && adduser "${BK_USER}" \ - && yum install -y java-1.8.0-openjdk-headless wget bash python sudo \ + && apt-get update \ + && apt-get install -y ca-certificates apt-transport-https \ + && apt-get install -y --no-install-recommends python3 pip \ + && ln -s /usr/bin/python3 /usr/bin/python \ + && apt-get install -y --no-install-recommends gpg gpg-agent wget sudo \ + && apt-get -y --purge autoremove \ + && apt-get autoclean \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* \ && mkdir -pv /opt \ && cd /opt \ - && wget -q "https://archive.apache.org/dist/bookkeeper/bookkeeper-${BK_VERSION}/${DISTRO_NAME}.tar.gz" \ - && wget -q "https://archive.apache.org/dist/bookkeeper/bookkeeper-${BK_VERSION}/${DISTRO_NAME}.tar.gz.asc" \ - && wget -q "https://archive.apache.org/dist/bookkeeper/bookkeeper-${BK_VERSION}/${DISTRO_NAME}.tar.gz.sha1" \ - && sha1sum -c ${DISTRO_NAME}.tar.gz.sha1 \ - && gpg --keyserver ha.pool.sks-keyservers.net --recv-key "$GPG_KEY" \ + && wget -q "${DISTRO_URL}" \ + && wget -q "${DISTRO_URL}.asc" \ + && wget -q "${DISTRO_URL}.sha512" \ + && sha512sum -c ${DISTRO_NAME}.tar.gz.sha512 \ + && wget https://dist.apache.org/repos/dist/release/bookkeeper/KEYS \ + && gpg --import KEYS \ && gpg --batch --verify "$DISTRO_NAME.tar.gz.asc" "$DISTRO_NAME.tar.gz" \ && tar -xzf "$DISTRO_NAME.tar.gz" \ && mv bookkeeper-server-${BK_VERSION}/ /opt/bookkeeper/ \ - && rm -rf "$DISTRO_NAME.tar.gz" "$DISTRO_NAME.tar.gz.asc" "$DISTRO_NAME.tar.gz.sha1" \ - && yum remove -y wget \ - && yum clean all + && rm -rf "$DISTRO_NAME.tar.gz" "$DISTRO_NAME.tar.gz.asc" "$DISTRO_NAME.tar.gz.sha512" \ + && pip install zk-shell WORKDIR /opt/bookkeeper +ENV JAVA_HOME=/opt/java/openjdk +ENV PATH="$PATH:$JAVA_HOME/bin" +COPY --from=jre-build /javaruntime $JAVA_HOME + COPY scripts /opt/bookkeeper/scripts RUN chmod +x -R /opt/bookkeeper/scripts/ diff --git a/docker/Makefile b/docker/Makefile index bc448bfc6bf..5e4dfd01c92 100644 --- a/docker/Makefile +++ b/docker/Makefile @@ -89,7 +89,7 @@ run-bk: # -------------------------------- # # Create run and destroy a container that will -# intializes new bookkeeper cluster by creating required znodes for the cluster +# initializes new bookkeeper cluster by creating required znodes for the cluster # make run-init run-init: diff --git a/docker/README.md b/docker/README.md index 6cd337f27bd..651054835ee 100644 --- a/docker/README.md +++ b/docker/README.md @@ -3,23 +3,22 @@ Apache Bookkeeper is a software project of the Apache Software Foundation, providing a replicated log service which can be used to build replicated state machines. A log contains a sequence of events which can be applied to a state machine. BookKeeper guarantees that each replica state machine will see all the same entries, in the same order. -> [Apache Bookkeeper](http://bookkeeper.apache.org/) +> [Apache Bookkeeper](https://bookkeeper.apache.org/) # How to use this image Bookkeeper needs [Zookeeper](https://zookeeper.apache.org/) in order to preserve its state and publish its bookies (Bookkeeper servers). The client only need to connect to a Zookeeper server in the ensamble in order to obtain the list of Bookkeeper servers. ## standalone BookKeeper cluster -Just like running a BookKeeper cluster in one machine(http://bookkeeper.apache.org/docs/latest/getting-started/run-locally/), you can run a standalone BookKeeper in one docker container, the command is: +Just like running a BookKeeper cluster in one machine(https://bookkeeper.apache.org/docs/getting-started/run-locally/), you can run a standalone BookKeeper in one docker container, the command is: ``` docker run -it \ - --env JAVA_HOME=/usr/lib/jvm/jre-1.8.0 \ --entrypoint "/bin/bash" \ apache/bookkeeper \ -c "/opt/bookkeeper/bin/bookkeeper localbookie 3" ``` Note: you can first start the container, and then execute "bin/bookkeeper localbookie 3" in the container. -After that, you can execute BookKeeper shell command(http://bookkeeper.apache.org/docs/latest/reference/cli/) to test the cluster, you need first log into the container, use command below: +After that, you can execute BookKeeper shell command(https://bookkeeper.apache.org/docs/reference/cli/) to test the cluster, you need first log into the container, use command below: ``` docker exec -it bash ``` @@ -118,7 +117,7 @@ docker run -it --rm \ --network "bk_network" \ --env BK_zkServers=test_zookeeper:2181 \ apache/bookkeeper \ - bookkeeper shell metaformat + /opt/bookkeeper/bin/bookkeeper shell metaformat ``` Now we can start our Bookkeeper ensemble (e.g. with three bookies): ``` @@ -131,7 +130,7 @@ docker run -it\ ``` And so on for "bookie2" and "bookie3". We have now our fully functional ensemble, ready to accept clients. -In order to play with our freshly created ensemble, you can use the simple application taken from [Bookkeeper Tutorial](http://bookkeeper.apache.org/docs/master/bookkeeperTutorial.html) and packaged in a [docker image](https://github.com/caiok/bookkeeper-tutorial) for convenience. +In order to play with our freshly created ensemble, you can use the simple application taken from [Bookkeeper Tutorial](https://github.com/ivankelly/bookkeeper-tutorial) and packaged in a [docker image](https://github.com/caiok/bookkeeper-tutorial) for convenience. This application check if it can be leader, if yes start to roll a dice and book this rolls on Bookkeeper, otherwise it will start to follow the leader rolls. If leader stops, follower will try to become leader and so on. @@ -148,7 +147,7 @@ Bookkeeper configuration is located in `/opt/bookkeeper/conf` in the docker cont There are 2 ways to set Bookkeeper configuration: -1, Apply setted (e.g. docker -e kk=vv) environment variables into configuration files. Environment variable names is in format "BK_originalName", in which "originalName" is the key in config files. +1, Apply set (e.g. docker -e kk=vv) environment variables into configuration files. Environment variable names is in format "BK_originalName", in which "originalName" is the key in config files. 2, If you are able to handle your local volumes, use `docker --volume` command to bind-mount your local configure volumes to `/opt/bookkeeper/conf`. @@ -158,7 +157,7 @@ $ docker run --name bookie1 -d \ -v $(local_configure_dir):/opt/bookkeeper/conf/ \ < == use 2nd approach, mount dir contains config_files -e BK_bookiePort=3181 \ < == use 1st approach, set bookiePort -e BK_zkServers=zk-server1:2181,zk-server2:2181 \ < == use 1st approach, set zookeeper servers - -e BK_journalPreAllocSizeMB=32 \ < == use 1st approach, set journalPreAllocSizeMB in [bk_server.conf](https://github.com/apache/bookkeeper/blob/master/bookkeeper-server/conf/bk_server.conf) + -e BK_journalPreAllocSizeMB=32 \ < == use 1st approach, set journalPreAllocSizeMB in [bk_server.conf](https://github.com/apache/bookkeeper/blob/master/conf/bk_server.conf) apache/bookkeeper ``` @@ -186,7 +185,7 @@ Because This variable allows you to specify the port on which Bookkeeper should listen for incoming connections. -This will override `bookiePort` in [bk_server.conf](https://github.com/apache/bookkeeper/blob/master/bookkeeper-server/conf/bk_server.conf). +This will override `bookiePort` in [bk_server.conf](https://github.com/apache/bookkeeper/blob/master/conf/bk_server.conf). Default value is "3181". @@ -194,7 +193,7 @@ Default value is "3181". This variable allows you to specify a list of machines of the Zookeeper ensemble. Each entry has the form of `host:port`. Entries are separated with a comma. -This will override `zkServers` in [bk_server.conf](https://github.com/apache/bookkeeper/blob/master/bookkeeper-server/conf/bk_server.conf). +This will override `zkServers` in [bk_server.conf](https://github.com/apache/bookkeeper/blob/master/conf/bk_server.conf). Default value is "127.0.0.1:2181" @@ -202,7 +201,7 @@ Default value is "127.0.0.1:2181" This variable allows you to specify the root directory Bookkeeper will use on Zookeeper to store ledgers metadata. -This will override `zkLedgersRootPath ` in [bk_server.conf](https://github.com/apache/bookkeeper/blob/master/bookkeeper-server/conf/bk_server.conf). +This will override `zkLedgersRootPath ` in [bk_server.conf](https://github.com/apache/bookkeeper/blob/master/conf/bk_server.conf). Default value is "/bookkeeper/ledgers" @@ -215,25 +214,25 @@ Default value is empty - " ". so ledgers dir in zookeeper will be at "/ledgers" #### `BK_DATA_DIR` This variable allows you to specify where to store data in docker instance. -This could be override by env vars "BK_journalDirectory", "BK_ledgerDirectories", "BK_indexDirectories" and also `journalDirectory`, `ledgerDirectories`, `indexDirectories` in [bk_server.conf](https://github.com/apache/bookkeeper/blob/master/bookkeeper-server/conf/bk_server.conf). +This could be override by env vars "BK_journalDirectories", "BK_ledgerDirectories", "BK_indexDirectories" and also `journalDirectories`, `ledgerDirectories`, `indexDirectories` in [bk_server.conf](https://github.com/apache/bookkeeper/blob/master/conf/bk_server.conf). -Default value is "/data/bookkeeper", which contains volumes `/data/bookkeeper/journal`, `/data/bookkeeper/ledger` and `/data/bookkeeper/index` to hold Bookkeeper data in docker. +Default value is "/data/bookkeeper", which contains volumes `/data/bookkeeper/journal`, `/data/bookkeeper/ledgers` and `/data/bookkeeper/index` to hold Bookkeeper data in docker. ### Configure files under /opt/bookkeeper/conf These files is originally un-tared from the bookkeeper building binary, such as [bookkeeper-server-4.4.0-bin.tar.tgz](https://archive.apache.org/dist/bookkeeper/bookkeeper-4.4.0/bookkeeper-4.4.0-src.tar.gz), and it comes from [these files](https://github.com/apache/bookkeeper/tree/master/bookkeeper-server/conf) in Bookkeeper repo. -Usually we could config files bk_server.conf, bkenv.sh, log4j.properties, and log4j.shell.properties. Please read and understand them before you do the configuration. +Usually we could config files bk_server.conf, bkenv.sh, log4j2.xml, and log4j2.shell.xml. Please read and understand them before you do the configuration. ### Caveats Be careful where you put the transaction log (journal). A dedicated transaction log device is key to consistent good performance. Putting the log on a busy device will adversely effect performance. -Here is some useful and graceful command the could be used to replace the default command, once you want to delete the cookeis and do auto recovery: +Here is some useful and graceful command the could be used to replace the default command, once you want to delete the cookies and do auto recovery: ``` -/bookkeeper/bookkeeper-server/bin/bookkeeper shell bookieformat -nonInteractive -force -deleteCookie -/bookkeeper/bookkeeper-server/bin/bookkeeper autorecovery +/opt/bookkeeper/bin/bookkeeper shell bookieformat -nonInteractive -force -deleteCookie +/opt/bookkeeper/bin/bookkeeper autorecovery ``` Use them, and replace the default [CMD] when you wanted to do things other than start a bookie. diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 41774933cf1..a9c3f9dc8e7 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -28,9 +28,8 @@ services: links: - zookeeper environment: - - JAVA_HOME=/usr/lib/jvm/jre-1.8.0 - BK_zkServers=zookeeper:2181 - - BK_zkLedgersRootPath = /ledgers + - BK_zkLedgersRootPath=/ledgers bookie2: image: apache/bookkeeper @@ -38,9 +37,8 @@ services: links: - zookeeper environment: - - JAVA_HOME=/usr/lib/jvm/jre-1.8.0 - BK_zkServers=zookeeper:2181 - - BK_zkLedgersRootPath = /ledgers + - BK_zkLedgersRootPath=/ledgers bookie3: image: apache/bookkeeper @@ -48,9 +46,8 @@ services: links: - zookeeper environment: - - JAVA_HOME=/usr/lib/jvm/jre-1.8.0 - BK_zkServers=zookeeper:2181 - - BK_zkLedgersRootPath = /ledgers + - BK_zkLedgersRootPath=/ledgers dice: image: caiok/bookkeeper-tutorial diff --git a/docker/hooks/build b/docker/hooks/build new file mode 100755 index 00000000000..c7b0faa5667 --- /dev/null +++ b/docker/hooks/build @@ -0,0 +1,9 @@ +#!/bin/bash +set -x +# When we build 'latest' tag we want to not override BK_VERSION variable +if [[ "$DOCKER_TAG" = "latest" ]] +then + docker build -t $IMAGE_NAME . +else + docker build --build-arg BK_VERSION=$DOCKER_TAG -t $IMAGE_NAME . +fi diff --git a/docker/scripts/apply-config-from-env.py b/docker/scripts/apply-config-from-env.py index 3fccab98164..7b74b503411 100755 --- a/docker/scripts/apply-config-from-env.py +++ b/docker/scripts/apply-config-from-env.py @@ -23,23 +23,30 @@ ## based on the ENV variables ## export my-key=new-value ## -## ./apply-config-from-env config_dir +## ./apply-config-from-env file ... ## import os, sys -if len(sys.argv) != 2: - print 'Usage: %s ' + 'config_dir' % (sys.argv[0]) +if len(sys.argv) < 2: + print('Usage: %s file ...' % (sys.argv[0])) sys.exit(1) -def mylistdir(dir): - return [os.path.join(dir, filename) for filename in os.listdir(dir)] +def prepare_conf_files(files): + conf_files = [] + for f in files: + if os.path.isfile(f): + if not os.path.isabs(f): + f = os.path.join(os.getcwd(), f) + conf_files.append(f) + else: + print('%s is not a readable file' % f) + sys.exit(1) + return conf_files -# Always apply env config to all the files under conf -conf_dir = sys.argv[1] -conf_files = mylistdir(conf_dir) -print 'conf files: ' -print conf_files +conf_files = prepare_conf_files(sys.argv[1:]) +print('conf files: ') +print(conf_files) bk_env_prefix = 'BK_' zk_env_prefix = 'ZK_' @@ -75,13 +82,13 @@ def mylistdir(dir): if k.startswith(bk_env_prefix): search_key = k[len(bk_env_prefix):] if search_key in keys: - print '[%s] Applying config %s = %s' % (conf_filename, search_key, v) + print('[%s] Applying config %s = %s' % (conf_filename, search_key, v)) idx = keys[search_key] lines[idx] = '%s=%s\n' % (search_key, v) if k.startswith(zk_env_prefix): search_key = k[len(zk_env_prefix):] if search_key in keys: - print '[%s] Applying config %s = %s' % (conf_filename, search_key, v) + print('[%s] Applying config %s = %s' % (conf_filename, search_key, v)) idx = keys[search_key] lines[idx] = '%s=%s\n' % (search_key, v) diff --git a/docker/scripts/common.sh b/docker/scripts/common.sh index 5bbcd208072..0f745db4b45 100755 --- a/docker/scripts/common.sh +++ b/docker/scripts/common.sh @@ -71,7 +71,7 @@ echo " BK_STREAM_STORAGE_ROOT_PATH is ${BK_STREAM_STORAGE_ROOT_PATH}" echo " BK_NUM_STORAGE_CONTAINERS is ${BK_NUM_STORAGE_CONTAINERS}" echo " BOOKIE_GRPC_PORT is ${BOOKIE_GRPC_PORT}" -python scripts/apply-config-from-env.py ${BK_HOME}/conf +python scripts/apply-config-from-env.py ${BK_HOME}/conf/*.conf export BOOKIE_CONF=${BK_HOME}/conf/bk_server.conf export SERVICE_PORT=${PORT0} diff --git a/docker/scripts/entrypoint.sh b/docker/scripts/entrypoint.sh index 86911170edd..753f59a20db 100755 --- a/docker/scripts/entrypoint.sh +++ b/docker/scripts/entrypoint.sh @@ -21,7 +21,6 @@ # */ export PATH=$PATH:/opt/bookkeeper/bin -export JAVA_HOME=/usr/lib/jvm/jre-1.8.0 BK_HOME=/opt/bookkeeper BINDIR=${BK_HOME}/bin @@ -42,11 +41,11 @@ function run_command() { chmod -R +x ${BINDIR} chmod -R +x ${SCRIPTS_DIR} echo "This is root, will use user $BK_USER to run command '$@'" - sudo -s -E -u "$BK_USER" /bin/bash "$@" + exec sudo -s -E -u "$BK_USER" /bin/bash -c 'exec "$@"' -- "$@" exit else echo "Run command '$@'" - $@ + exec "$@" fi } diff --git a/docker/scripts/healthcheck.sh b/docker/scripts/healthcheck.sh index 47a21bf50a6..2fa30a1de3c 100755 --- a/docker/scripts/healthcheck.sh +++ b/docker/scripts/healthcheck.sh @@ -24,11 +24,9 @@ set -x -e -u -export JAVA_HOME=/usr/lib/jvm/jre-1.8.0 - # Sanity check that creates a ledger, writes a few entries, reads them and deletes the ledger. DEFAULT_HEALTH_CHECK_CMD="/opt/bookkeeper/bin/bookkeeper shell bookiesanity" HEALTH_CHECK_CMD=${HEALTH_CHECK_CMD:-"${DEFAULT_HEALTH_CHECK_CMD}"} -exec "${HEALTH_CHECK_CMD}" +eval "${HEALTH_CHECK_CMD}" diff --git a/docker/scripts/init_bookie.sh b/docker/scripts/init_bookie.sh index 07a7feecd3c..1617442d179 100755 --- a/docker/scripts/init_bookie.sh +++ b/docker/scripts/init_bookie.sh @@ -19,65 +19,57 @@ # * See the License for the specific language governing permissions and # * limitations under the License. # */ - source ${SCRIPTS_DIR}/common.sh function wait_for_zookeeper() { echo "wait for zookeeper" - until /opt/bookkeeper/bin/bookkeeper org.apache.zookeeper.ZooKeeperMain -server ${BK_zkServers} ls /; do sleep 5; done + until zk-shell --run-once "ls /" ${BK_zkServers}; do sleep 5; done } function create_zk_root() { if [ "x${BK_CLUSTER_ROOT_PATH}" != "x" ]; then echo "create the zk root dir for bookkeeper at '${BK_CLUSTER_ROOT_PATH}'" - /opt/bookkeeper/bin/bookkeeper org.apache.zookeeper.ZooKeeperMain -server ${BK_zkServers} create ${BK_CLUSTER_ROOT_PATH} + zk-shell --run-once "create ${BK_CLUSTER_ROOT_PATH} '' false false true" ${BK_zkServers} fi } -# Init the cluster if required znodes not exist in Zookeeper. -# Use ephemeral zk node as lock to keep initialize atomic. function init_cluster() { - if [ "x${BK_STREAM_STORAGE_ROOT_PATH}" == "x" ]; then - echo "BK_STREAM_STORAGE_ROOT_PATH is not set. fail fast." - exit -1 - fi - - /opt/bookkeeper/bin/bookkeeper org.apache.zookeeper.ZooKeeperMain -server ${BK_zkServers} stat ${BK_STREAM_STORAGE_ROOT_PATH} + zk-shell --run-once "ls ${BK_zkLedgersRootPath}/available/readonly" ${BK_zkServers} if [ $? -eq 0 ]; then - echo "Metadata of cluster already exists, no need to init" + echo "Cluster metadata already exists" else - # create ephemeral zk node bkInitLock, initiator who this node, then do init; other initiators will wait. - /opt/bookkeeper/bin/bookkeeper org.apache.zookeeper.ZooKeeperMain -server ${BK_zkServers} create -e ${BK_CLUSTER_ROOT_PATH}/bkInitLock - if [ $? -eq 0 ]; then - # bkInitLock created success, this is the successor to do znode init - echo "Initializing bookkeeper cluster at service uri ${BK_metadataServiceUri}." - /opt/bookkeeper/bin/bkctl --service-uri ${BK_metadataServiceUri} cluster init + # Create an ephemeral zk node `bkInitLock` for use as a lock. + lock=`zk-shell --run-once "create ${BK_CLUSTER_ROOT_PATH}/bkInitLock '' true false false" ${BK_zkServers}` + if [ -z "$lock" ]; then + echo "znodes do not exist in Zookeeper for Bookkeeper. Initializing a new Bookkeekeper cluster in Zookeeper." + /opt/bookkeeper/bin/bookkeeper shell initnewcluster if [ $? -eq 0 ]; then - echo "Successfully initialized bookkeeper cluster at service uri ${BK_metadataServiceUri}." + echo "initnewcluster operation succeeded" else - echo "Failed to initialize bookkeeper cluster at service uri ${BK_metadataServiceUri}. please check the reason." + echo "initnewcluster operation failed. Please check the reason." + echo "Exit status of initnewcluster" + echo $? exit fi else - echo "Other docker instance is doing initialize at the same time, will wait in this instance." + echo "Others may be initializing the cluster at the same time." tenSeconds=1 - while [ ${tenSeconds} -lt 10 ] + while [ ${tenSeconds} -lt 100 ] do sleep 10 - echo "run '/opt/bookkeeper/bin/bookkeeper org.apache.zookeeper.ZooKeeperMain -server ${BK_zkServers} stat ${BK_STREAM_STORAGE_ROOT_PATH}'" - /opt/bookkeeper/bin/bookkeeper org.apache.zookeeper.ZooKeeperMain -server ${BK_zkServers} stat ${BK_STREAM_STORAGE_ROOT_PATH} + zk-shell --run-once "ls ${BK_zkLedgersRootPath}/available/readonly" ${BK_zkServers} if [ $? -eq 0 ]; then - echo "Waited $tenSeconds * 10 seconds, bookkeeper inited" + echo "Waited $tenSeconds * 10 seconds. Successfully listed ''${BK_zkLedgersRootPath}/available/readonly'" break else - echo "Waited $tenSeconds * 10 seconds, still not init" + echo "Waited $tenSeconds * 10 seconds. Continue waiting." (( tenSeconds++ )) continue fi done - if [ ${tenSeconds} -eq 10 ]; then - echo "Waited 100 seconds for bookkeeper cluster init, something wrong, please check" + if [ ${tenSeconds} -eq 100 ]; then + echo "Waited 100 seconds for bookkeeper cluster to initialize, but to no avail. Something is wrong, please check." exit fi fi @@ -97,5 +89,4 @@ function init_bookie() { # init the cluster init_cluster - } \ No newline at end of file diff --git a/docker/scripts/init_zookeeper.sh b/docker/scripts/init_zookeeper.sh index 803ef91d786..cff981211c8 100755 --- a/docker/scripts/init_zookeeper.sh +++ b/docker/scripts/init_zookeeper.sh @@ -63,7 +63,7 @@ function create_zk_dynamic_conf() { function init_zookeeper() { # apply zookeeper envs - python scripts/apply-config-from-env.py ${BK_HOME}/conf + python scripts/apply-config-from-env.py ${BK_HOME}/conf/zookeeper.conf # create dirs if they don't exist create_zk_dirs diff --git a/metadata-drivers/etcd/pom.xml b/metadata-drivers/etcd/pom.xml new file mode 100644 index 00000000000..3dfbdcde1ed --- /dev/null +++ b/metadata-drivers/etcd/pom.xml @@ -0,0 +1,150 @@ + + + + + org.apache.bookkeeper.metadata.drivers + metadata-drivers-parent + 4.18.0-SNAPSHOT + .. + + 4.0.0 + org.apache.bookkeeper.metadata.drivers + metadata-stores-etcd + Apache BookKeeper :: Metadata Drivers:: Etcd + + + org.apache.bookkeeper + bookkeeper-server + ${project.version} + + + + org.apache.bookkeeper.metadata.drivers + jetcd-core-shaded + ${project.version} + shaded + + + io.etcd + * + + + io.vertx + * + + + + + + io.grpc + grpc-all + + + io.grpc + grpc-netty-shaded + + + org.arquillian.cube + arquillian-cube-docker + ${arquillian-cube.version} + + + com.github.docker-java + * + + + test + + + org.jboss.arquillian.junit + arquillian-junit-standalone + ${arquillian-junit.version} + + + com.github.docker-java + * + + + test + + + org.testcontainers + testcontainers + test + + + org.apache.bookkeeper + testtools + ${project.parent.version} + test + + + org.apache.bookkeeper + bookkeeper-common + ${project.version} + test-jar + test + + + org.apache.bookkeeper + bookkeeper-server + ${project.version} + test-jar + test + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + true + + ${project.version} + ${project.build.directory} + + + + + + + + + integrationTests + + + integrationTests + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + false + + + + + + + diff --git a/metadata-drivers/etcd/src/main/java/io/etcd/jetcd/impl/EtcdClientUtils.java b/metadata-drivers/etcd/src/main/java/io/etcd/jetcd/impl/EtcdClientUtils.java new file mode 100644 index 00000000000..4c74cc9c4f1 --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/io/etcd/jetcd/impl/EtcdClientUtils.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.etcd.jetcd.impl; + +import java.lang.reflect.Field; + +/** + * Utils to access fields in Etcd client. + */ +class EtcdClientUtils { + + @SuppressWarnings("unchecked") + static T getField(Object obj, String fieldName) + throws NoSuchFieldException, IllegalAccessException { + Class cls = obj.getClass(); + Field field = cls.getDeclaredField(fieldName); + field.setAccessible(true); + return (T) field.get(obj); + } + +} diff --git a/metadata-drivers/etcd/src/main/java/io/etcd/jetcd/impl/EtcdConnectionManager.java b/metadata-drivers/etcd/src/main/java/io/etcd/jetcd/impl/EtcdConnectionManager.java new file mode 100644 index 00000000000..db090135c90 --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/io/etcd/jetcd/impl/EtcdConnectionManager.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.etcd.jetcd.impl; + +import io.etcd.jetcd.Client; +import io.etcd.jetcd.api.WatchGrpc; +import lombok.extern.slf4j.Slf4j; + +/** + * Keep a reference to etcd internal connection manager. + */ +@Slf4j +public class EtcdConnectionManager { + + private final ClientImpl client; + private ClientConnectionManager connMgr; + + public EtcdConnectionManager(Client client) { + this((ClientImpl) client); + } + + EtcdConnectionManager(ClientImpl client) { + this.client = client; + try { + this.connMgr = EtcdClientUtils.getField( + client, "connectionManager" + ); + } catch (NoSuchFieldException e) { + log.error("No `connectionManager` field found in etcd client", e); + throw new RuntimeException( + "No `connectionManager` field found in etcd client", e); + } catch (IllegalAccessException e) { + log.error("Illegal access to `connectionManager` field in etcd client", e); + throw new RuntimeException( + "Illegal access to `connectionManager` field in etcd client", e); + } + } + + /** + * Create a watch api grpc stub. + * + * @return a watch api grpc stub. + */ + public WatchGrpc.WatchStub newWatchStub() { + return connMgr.newStub(WatchGrpc::newStub); + } + +} diff --git a/metadata-drivers/etcd/src/main/java/io/etcd/jetcd/impl/package-info.java b/metadata-drivers/etcd/src/main/java/io/etcd/jetcd/impl/package-info.java new file mode 100644 index 00000000000..7cc4a43579a --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/io/etcd/jetcd/impl/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Etcd client changes for bookkeeper metadata driver. + */ +package io.etcd.jetcd.impl; diff --git a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/Etcd64bitIdGenerator.java b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/Etcd64bitIdGenerator.java new file mode 100644 index 00000000000..652033e4e9f --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/Etcd64bitIdGenerator.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.metadata.etcd; + +import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.bookkeeper.metadata.etcd.EtcdConstants.EMPTY_BS; + +import io.etcd.jetcd.ByteSequence; +import io.etcd.jetcd.KV; +import io.etcd.jetcd.KeyValue; +import io.etcd.jetcd.Txn; +import io.etcd.jetcd.kv.GetResponse; +import io.etcd.jetcd.op.Cmp; +import io.etcd.jetcd.op.Cmp.Op; +import io.etcd.jetcd.op.CmpTarget; +import io.etcd.jetcd.options.GetOption; +import io.etcd.jetcd.options.PutOption; +import java.nio.charset.StandardCharsets; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.atomic.AtomicIntegerFieldUpdater; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.client.BKException.Code; +import org.apache.bookkeeper.meta.LedgerIdGenerator; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback; + +/** + * Generate 64-bit ledger ids from a bucket. + * + *

          The most significant 8 bits is used as bucket id. The remaining 56 bits are + * used as the id generated per bucket. + */ +@Slf4j +class Etcd64bitIdGenerator implements LedgerIdGenerator { + + static final long MAX_ID_PER_BUCKET = 0x00ffffffffffffffL; + static final long BUCKET_ID_MASK = 0xff00000000000000L; + static final int BUCKET_ID_SHIFT = 56; + static final int NUM_BUCKETS = 0x80; + + static int getBucketId(long lid) { + return (int) ((lid & BUCKET_ID_MASK) >>> BUCKET_ID_SHIFT); + } + + static long getIdInBucket(long lid) { + return lid & MAX_ID_PER_BUCKET; + } + + private static final AtomicIntegerFieldUpdater nextBucketIdUpdater = + AtomicIntegerFieldUpdater.newUpdater(Etcd64bitIdGenerator.class, "nextBucketId"); + + private final String scope; + private final KV kvClient; + private volatile int nextBucketId; + + Etcd64bitIdGenerator(KV kvClient, String scope) { + this.kvClient = kvClient; + this.scope = scope; + this.nextBucketId = ThreadLocalRandom.current().nextInt(NUM_BUCKETS); + } + + int nextBucketId() { + while (true) { + int bucketId = nextBucketIdUpdater.incrementAndGet(this); + if (bucketId >= NUM_BUCKETS) { + if (nextBucketIdUpdater.compareAndSet(this, bucketId, 0)) { + bucketId = 0; + } else { + // someone has been updated bucketId, try it again. + continue; + } + } + return bucketId; + } + } + + @Override + public void generateLedgerId(GenericCallback cb) { + int bucketId = nextBucketId(); + checkArgument(bucketId >= 0 && bucketId < NUM_BUCKETS, + "Invalid bucket id : " + bucketId); + + ByteSequence bucketKey = ByteSequence.from(EtcdUtils.getBucketPath(scope, bucketId), StandardCharsets.UTF_8); + Txn txn = kvClient.txn() + .If(new Cmp(bucketKey, Op.GREATER, CmpTarget.createRevision(0))) + .Then( + io.etcd.jetcd.op.Op.put(bucketKey, EMPTY_BS, PutOption.DEFAULT), + io.etcd.jetcd.op.Op.get(bucketKey, GetOption.DEFAULT) + ) + .Else( + io.etcd.jetcd.op.Op.put(bucketKey, EMPTY_BS, PutOption.DEFAULT), + io.etcd.jetcd.op.Op.get(bucketKey, GetOption.DEFAULT) + ); + txn.commit() + .thenAccept(txnResponse -> { + if (txnResponse.getGetResponses().size() <= 0) { + cb.operationComplete(Code.UnexpectedConditionException, null); + } else { + GetResponse resp = txnResponse.getGetResponses().get(0); + if (resp.getCount() > 0) { + KeyValue kv = resp.getKvs().get(0); + if (kv.getVersion() > MAX_ID_PER_BUCKET) { + log.warn("Etcd bucket '{}' is overflowed", bucketKey.toString(StandardCharsets.UTF_8)); + // the bucket is overflowed, moved to next bucket. + generateLedgerId(cb); + } else { + long version = kv.getVersion(); + long lid = ((((long) bucketId) << BUCKET_ID_SHIFT) & BUCKET_ID_MASK) + | (version & MAX_ID_PER_BUCKET); + cb.operationComplete(Code.OK, lid); + } + } else { + cb.operationComplete(Code.UnexpectedConditionException, null); + } + } + }) + .exceptionally(cause -> { + cb.operationComplete(Code.MetaStoreException, null); + return null; + }); + } + + @Override + public void close() { + // no-op + } +} diff --git a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdBookieRegister.java b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdBookieRegister.java new file mode 100644 index 00000000000..2116e575de9 --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdBookieRegister.java @@ -0,0 +1,221 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.metadata.etcd; + +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.msResult; + +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import io.etcd.jetcd.Lease; +import io.etcd.jetcd.lease.LeaseKeepAliveResponse; +import io.etcd.jetcd.support.CloseableClient; +import io.grpc.stub.StreamObserver; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.function.Supplier; +import lombok.AccessLevel; +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.BookieException.MetadataStoreException; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.discover.RegistrationManager.RegistrationListener; + +/** + * Register to register a bookie in Etcd. + */ +@Slf4j +class EtcdBookieRegister implements AutoCloseable, Runnable, Supplier { + + private final Lease leaseClient; + private final long ttlSeconds; + private final ScheduledExecutorService executor; + private RegistrationListener regListener; + private volatile CompletableFuture leaseFuture = new CompletableFuture<>(); + private volatile CompletableFuture keepAliveFuture = new CompletableFuture<>(); + + @Getter(AccessLevel.PACKAGE) + private volatile long leaseId = -0xabcd; + private volatile CloseableClient kaListener = null; + private volatile boolean running = true; + private long nextWaitTimeMs = 200; + private Future runFuture = null; + + EtcdBookieRegister(Lease leaseClient, + long ttlSeconds) { + this.leaseClient = leaseClient; + this.ttlSeconds = ttlSeconds; + this.executor = Executors.newSingleThreadScheduledExecutor( + new ThreadFactoryBuilder() + .setNameFormat("bookie-etcd-keepalive-thread") + .build()); + } + + public EtcdBookieRegister addRegistrationListener(RegistrationListener regListener) { + this.regListener = regListener; + return this; + } + + long getTtlSeconds() { + return ttlSeconds; + } + + public synchronized EtcdBookieRegister start() { + if (null == runFuture) { + runFuture = executor.submit(this); + } + return this; + } + + private void newLeaseIfNeeded() throws MetadataStoreException { + boolean newLeaseNeeded; + synchronized (this) { + newLeaseNeeded = !leaseFuture.isDone(); + } + if (newLeaseNeeded) { + long leaseId = msResult(leaseClient.grant(ttlSeconds)).getID(); + keepAliveFuture = new CompletableFuture<>(); + if (kaListener != null) { + synchronized (this) { + kaListener.close(); + kaListener = null; + } + } + this.kaListener = leaseClient.keepAlive(leaseId, new StreamObserver() { + @Override + public void onNext(LeaseKeepAliveResponse response) { + log.info("KeepAlive response : lease = {}, ttl = {}", + response.getID(), response.getTTL()); + } + + @Override + public void onError(Throwable t) { + log.info("KeepAlive renewal failed, leaseId {}", leaseId, t.fillInStackTrace()); + keepAliveFuture.completeExceptionally(t); + } + + @Override + public void onCompleted() { + log.info("lease completed! leaseId {}", leaseId); + keepAliveFuture.cancel(true); + } + }); + + this.leaseId = leaseId; + leaseFuture.complete(leaseId); + log.info("New lease '{}' is granted.", leaseId); + } + } + + private void waitForNewLeaseId() { + while (running) { + try { + newLeaseIfNeeded(); + nextWaitTimeMs = 100L; + } catch (MetadataStoreException e) { + log.error("Failed to grant a new lease for leaseId {}", leaseId, e); + try { + TimeUnit.MILLISECONDS.sleep(nextWaitTimeMs); + nextWaitTimeMs *= 2; + nextWaitTimeMs = Math.min(nextWaitTimeMs, TimeUnit.SECONDS.toMillis(ttlSeconds)); + } catch (InterruptedException e1) { + Thread.currentThread().interrupt(); + log.warn("Interrupted at backing off granting a new lease for leaseId {}", leaseId); + } + continue; + } + } + } + + @Override + public void run() { + while (running) { + waitForNewLeaseId(); + // here we get a lease, keep it alive + try { + log.info("Keeping Alive at lease = {}", get()); + keepAliveFuture.get(); + continue; + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + log.warn("Interrupted at keeping lease '{}' alive", leaseId); + resetLease(); + } catch (ExecutionException ee) { + log.warn("Failed to keep alive lease '{}'", leaseId, ee); + resetLease(); + } + } + } + + private void resetLease() { + synchronized (this) { + leaseFuture = new CompletableFuture<>(); + } + if (null != regListener) { + regListener.onRegistrationExpired(); + } + } + + @Override + public void close() { + synchronized (this) { + if (!running) { + return; + } else { + running = false; + } + if (null != runFuture) { + if (runFuture.cancel(true)) { + log.info("Successfully interrupted bookie register."); + } + } + keepAliveFuture.cancel(true); + if (kaListener != null) { + kaListener.close(); + kaListener = null; + } + } + CompletableFuture closeFuture = new CompletableFuture<>(); + executor.submit(() -> { + FutureUtils.complete(closeFuture, (Void) null); + }); + closeFuture.join(); + } + + @Override + public Long get() { + while (true) { + try { + return leaseFuture.get(100, TimeUnit.MILLISECONDS); + } catch (InterruptedException e) { + log.warn("Interrupted at getting lease id", e); + return -1L; + } catch (ExecutionException e) { + throw new IllegalArgumentException("Should never reach here"); + } catch (TimeoutException e) { + continue; + } + + } + } + +} diff --git a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdConstants.java b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdConstants.java new file mode 100644 index 00000000000..c9a1dadd23e --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdConstants.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.metadata.etcd; + +import io.etcd.jetcd.ByteSequence; + +/** + * Constants used in the Etcd metadata drivers. + */ +final class EtcdConstants { + + private EtcdConstants() {} + + public static final String END_SEP = "0"; + + public static final String LAYOUT_NODE = "layout"; + public static final String INSTANCEID_NODE = "instanceid"; + public static final String COOKIES_NODE = "cookies"; + public static final String LEDGERS_NODE = "ledgers"; + public static final String BUCKETS_NODE = "buckets"; + + // + // membership related constants + // + + public static final String MEMBERS_NODE = "bookies"; + public static final String WRITEABLE_NODE = "writable"; + public static final String READONLY_NODE = "readonly"; + + // + // underreplication related constants + // + + public static final String UR_NODE = "underreplication"; + + public static final ByteSequence EMPTY_BS = ByteSequence.from(new byte[0]); + +} diff --git a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdLayoutManager.java b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdLayoutManager.java new file mode 100644 index 00000000000..6a7344e2841 --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdLayoutManager.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.metadata.etcd; + +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.ioResult; + +import io.etcd.jetcd.ByteSequence; +import io.etcd.jetcd.Client; +import io.etcd.jetcd.KV; +import io.etcd.jetcd.kv.DeleteResponse; +import io.etcd.jetcd.kv.GetResponse; +import io.etcd.jetcd.kv.TxnResponse; +import io.etcd.jetcd.op.Cmp; +import io.etcd.jetcd.op.CmpTarget; +import io.etcd.jetcd.options.GetOption; +import io.etcd.jetcd.options.PutOption; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import lombok.AccessLevel; +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.meta.LayoutManager; +import org.apache.bookkeeper.meta.LedgerLayout; + +/** + * Etcd based layout manager. + */ +@Slf4j +@Getter(AccessLevel.PACKAGE) +class EtcdLayoutManager implements LayoutManager { + + private final Client client; + private final KV kvClient; + private final String scope; + private final ByteSequence layoutKey; + + EtcdLayoutManager(Client client, String scope) { + this.client = client; + this.kvClient = client.getKVClient(); + this.scope = scope; + this.layoutKey = ByteSequence.from(EtcdUtils.getLayoutKey(scope), StandardCharsets.UTF_8); + } + + @Override + public LedgerLayout readLedgerLayout() throws IOException { + GetResponse response = ioResult(kvClient.get(layoutKey, GetOption.DEFAULT)); + if (response.getCount() <= 0) { + return null; + } else { + byte[] layoutData = response.getKvs().get(0).getValue().getBytes(); + return LedgerLayout.parseLayout(layoutData); + } + } + + @Override + public void storeLedgerLayout(LedgerLayout layout) throws IOException { + ByteSequence layoutData = ByteSequence.from(layout.serialize()); + TxnResponse response = ioResult(kvClient.txn() + .If(new Cmp(layoutKey, Cmp.Op.GREATER, CmpTarget.createRevision(0))) + .Then(io.etcd.jetcd.op.Op.get(layoutKey, GetOption.DEFAULT)) + .Else(io.etcd.jetcd.op.Op.put(layoutKey, layoutData, PutOption.DEFAULT)) + .commit()); + // key doesn't exist and we created the key + if (!response.isSucceeded()) { + return; + // key exists and we retrieved the key + } else { + GetResponse resp = response.getGetResponses().get(0); + if (resp.getCount() <= 0) { + // fail to put key/value but key is not found + throw new IOException("Creating layout node '" + layoutKey.toString(StandardCharsets.UTF_8) + + "' failed due to it already exists but no layout node is found"); + } else { + throw new LedgerLayoutExistsException( + "Ledger layout already exists under '" + layoutKey.toString(StandardCharsets.UTF_8) + "'"); + } + } + } + + @Override + public void deleteLedgerLayout() throws IOException { + DeleteResponse response = ioResult(kvClient.delete(layoutKey)); + if (response.getDeleted() > 0) { + if (log.isDebugEnabled()) { + log.debug("Successfully delete layout '{}'", layoutKey.toString(StandardCharsets.UTF_8)); + } + return; + } else { + throw new IOException("No ledger layout is found under '" + layoutKey.toString(StandardCharsets.UTF_8) + + "'"); + } + } +} diff --git a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdLedgerManager.java b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdLedgerManager.java new file mode 100644 index 00000000000..1f211b28dcd --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdLedgerManager.java @@ -0,0 +1,477 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.metadata.etcd; + +import com.google.common.collect.Sets; +import io.etcd.jetcd.ByteSequence; +import io.etcd.jetcd.Client; +import io.etcd.jetcd.KV; +import io.etcd.jetcd.KeyValue; +import io.etcd.jetcd.Txn; +import io.etcd.jetcd.common.exception.ClosedClientException; +import io.etcd.jetcd.kv.GetResponse; +import io.etcd.jetcd.op.Cmp; +import io.etcd.jetcd.op.CmpTarget; +import io.etcd.jetcd.options.DeleteOption; +import io.etcd.jetcd.options.GetOption; +import io.etcd.jetcd.options.PutOption; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.function.Consumer; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.LedgerMetadataSerDe; +import org.apache.bookkeeper.metadata.etcd.helpers.KeyIterator; +import org.apache.bookkeeper.metadata.etcd.helpers.KeyStream; +import org.apache.bookkeeper.metadata.etcd.helpers.ValueStream; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.LedgerMetadataListener; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.Processor; +import org.apache.bookkeeper.util.collections.ConcurrentLongHashMap; +import org.apache.bookkeeper.versioning.LongVersion; +import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; +import org.apache.zookeeper.AsyncCallback.VoidCallback; + +/** + * Etcd ledger manager. + */ +@Slf4j +class EtcdLedgerManager implements LedgerManager { + + private final LedgerMetadataSerDe serDe = new LedgerMetadataSerDe(); + + private final String scope; + private final Client client; + private final KV kvClient; + private final EtcdWatchClient watchClient; + private final ConcurrentLongHashMap> watchers = + ConcurrentLongHashMap.>newBuilder().build(); + private final ConcurrentMap listeners = + new ConcurrentHashMap<>(); + + private volatile boolean closed = false; + + EtcdLedgerManager(Client client, + String scope) { + this.client = client; + this.kvClient = client.getKVClient(); + this.scope = scope; + this.watchClient = new EtcdWatchClient(client); + } + + private boolean isClosed() { + return closed; + } + + ValueStream getLedgerMetadataStream(long ledgerId) { + return watchers.get(ledgerId); + } + + @Override + public CompletableFuture> createLedgerMetadata(long ledgerId, + LedgerMetadata metadata) { + CompletableFuture> promise = new CompletableFuture<>(); + String ledgerKey = EtcdUtils.getLedgerKey(scope, ledgerId); + log.info("Create ledger metadata under key {}", ledgerKey); + + ByteSequence ledgerKeyBs = ByteSequence.from(ledgerKey, StandardCharsets.UTF_8); + final ByteSequence valueBs; + try { + valueBs = ByteSequence.from(serDe.serialize(metadata)); + } catch (IOException ioe) { + promise.completeExceptionally(new BKException.BKMetadataSerializationException(ioe)); + return promise; + } + kvClient.txn() + .If(new Cmp( + ledgerKeyBs, + Cmp.Op.GREATER, + CmpTarget.createRevision(0L))) + .Then(io.etcd.jetcd.op.Op.get( + ledgerKeyBs, + GetOption.newBuilder() + .withCountOnly(true) + .build())) + .Else(io.etcd.jetcd.op.Op.put( + ledgerKeyBs, + valueBs, + PutOption.DEFAULT)) + .commit() + .thenAccept(resp -> { + if (resp.isSucceeded()) { + GetResponse getResp = resp.getGetResponses().get(0); + if (getResp.getCount() <= 0) { + // key doesn't exist but we fail to put the key + promise.completeExceptionally(new BKException.BKUnexpectedConditionException()); + } else { + // key exists + promise.completeExceptionally(new BKException.BKLedgerExistException()); + } + } else { + promise.complete(new Versioned<>(metadata, + new LongVersion(resp.getHeader().getRevision()))); + } + }) + .exceptionally(cause -> { + promise.completeExceptionally(new BKException.MetaStoreException()); + return null; + }); + return promise; + } + + @Override + public CompletableFuture removeLedgerMetadata(long ledgerId, Version version) { + CompletableFuture promise = new CompletableFuture<>(); + long revision = -0xabcd; + if (Version.NEW == version) { + log.error("Request to delete ledger {} metadata with version set to the initial one", ledgerId); + promise.completeExceptionally(new BKException.BKMetadataVersionException()); + return promise; + } else if (Version.ANY != version) { + if (!(version instanceof LongVersion)) { + log.info("Not an instance of LongVersion : {}", ledgerId); + promise.completeExceptionally(new BKException.BKMetadataVersionException()); + return promise; + } else { + revision = ((LongVersion) version).getLongVersion(); + } + } + + String ledgerKey = EtcdUtils.getLedgerKey(scope, ledgerId); + ByteSequence ledgerKeyBs = ByteSequence.from(ledgerKey, StandardCharsets.UTF_8); + Txn txn = kvClient.txn(); + if (revision == -0xabcd) { + txn = txn.If(new Cmp( + ledgerKeyBs, + Cmp.Op.GREATER, + CmpTarget.createRevision(0L) + )); + } else { + txn = txn.If(new Cmp( + ledgerKeyBs, + Cmp.Op.EQUAL, + CmpTarget.modRevision(revision) + )); + } + txn + .Then(io.etcd.jetcd.op.Op.delete( + ledgerKeyBs, + DeleteOption.DEFAULT + )) + .Else(io.etcd.jetcd.op.Op.get( + ledgerKeyBs, + GetOption.DEFAULT + )) + .commit() + .thenAccept(txnResp -> { + if (txnResp.isSucceeded()) { + promise.complete(null); + } else { + GetResponse getResp = txnResp.getGetResponses().get(0); + if (getResp.getCount() > 0) { + // fail to delete the ledger + promise.completeExceptionally(new BKException.BKMetadataVersionException()); + } else { + log.warn("Deleting ledger {} failed due to : ledger key {} doesn't exist", ledgerId, ledgerKey); + promise.completeExceptionally(new BKException.BKNoSuchLedgerExistsException()); + } + } + }) + .exceptionally(cause -> { + promise.completeExceptionally(new BKException.MetaStoreException()); + return null; + }); + return promise; + } + + @Override + public CompletableFuture> readLedgerMetadata(long ledgerId) { + CompletableFuture> promise = new CompletableFuture<>(); + String ledgerKey = EtcdUtils.getLedgerKey(scope, ledgerId); + ByteSequence ledgerKeyBs = ByteSequence.from(ledgerKey, StandardCharsets.UTF_8); + log.info("read ledger metadata under key {}", ledgerKey); + kvClient.get(ledgerKeyBs) + .thenAccept(getResp -> { + if (getResp.getCount() > 0) { + KeyValue kv = getResp.getKvs().get(0); + byte[] data = kv.getValue().getBytes(); + try { + LedgerMetadata metadata = serDe.parseConfig(data, ledgerId, Optional.empty()); + promise.complete(new Versioned<>(metadata, new LongVersion(kv.getModRevision()))); + } catch (IOException ioe) { + log.error("Could not parse ledger metadata for ledger : {}", ledgerId, ioe); + promise.completeExceptionally(new BKException.MetaStoreException()); + return; + } + } else { + promise.completeExceptionally(new BKException.BKNoSuchLedgerExistsException()); + } + }) + .exceptionally(cause -> { + promise.completeExceptionally(new BKException.MetaStoreException()); + return null; + }); + return promise; + } + + @Override + public CompletableFuture> writeLedgerMetadata(long ledgerId, LedgerMetadata metadata, + Version currentVersion) { + CompletableFuture> promise = new CompletableFuture<>(); + if (Version.NEW == currentVersion || !(currentVersion instanceof LongVersion)) { + promise.completeExceptionally(new BKException.BKMetadataVersionException()); + return promise; + } + final LongVersion lv = (LongVersion) currentVersion; + String ledgerKey = EtcdUtils.getLedgerKey(scope, ledgerId); + ByteSequence ledgerKeyBs = ByteSequence.from(ledgerKey, StandardCharsets.UTF_8); + + final ByteSequence valueBs; + try { + valueBs = ByteSequence.from(serDe.serialize(metadata)); + } catch (IOException ioe) { + promise.completeExceptionally(new BKException.BKMetadataSerializationException(ioe)); + return promise; + } + + kvClient.txn() + .If(new Cmp( + ledgerKeyBs, + Cmp.Op.EQUAL, + CmpTarget.modRevision(lv.getLongVersion()))) + .Then(io.etcd.jetcd.op.Op.put( + ledgerKeyBs, + valueBs, + PutOption.DEFAULT)) + .Else(io.etcd.jetcd.op.Op.get( + ledgerKeyBs, + GetOption.DEFAULT)) + .commit() + .thenAccept(resp -> { + if (resp.isSucceeded()) { + promise.complete(new Versioned<>(metadata, new LongVersion(resp.getHeader().getRevision()))); + } else { + GetResponse getResp = resp.getGetResponses().get(0); + if (getResp.getCount() > 0) { + log.warn("Conditional update ledger metadata failed :" + + " expected version = {}, actual version = {}", + getResp.getKvs().get(0).getModRevision(), lv); + promise.completeExceptionally(new BKException.BKMetadataVersionException()); + } else { + promise.completeExceptionally(new BKException.BKNoSuchLedgerExistsException()); + } + } + }) + .exceptionally(cause -> { + promise.completeExceptionally(new BKException.MetaStoreException()); + return null; + }); + return promise; + } + + private LedgerMetadataConsumer listenerToConsumer(long ledgerId, + LedgerMetadataListener listener, + Consumer onDeletedConsumer) { + return new LedgerMetadataConsumer( + ledgerId, + listener, + onDeletedConsumer + ); + } + + @Override + public void registerLedgerMetadataListener(long ledgerId, LedgerMetadataListener listener) { + if (listeners.containsKey(listener)) { + return; + } + + ValueStream lmStream = watchers.computeIfAbsent( + ledgerId, (lid) -> new ValueStream<>( + client, + watchClient, + bs -> { + try { + return serDe.parseConfig( + bs.getBytes(), + lid, + Optional.empty() + ); + } catch (IOException ioe) { + log.error("Could not parse ledger metadata : {}", + bs.toString(StandardCharsets.UTF_8), ioe); + throw new RuntimeException( + "Could not parse ledger metadata : " + + bs.toString(StandardCharsets.UTF_8), ioe); + } + }, + ByteSequence.from(EtcdUtils.getLedgerKey(scope, ledgerId), StandardCharsets.UTF_8)) + ); + LedgerMetadataConsumer lmConsumer = listenerToConsumer(ledgerId, listener, + (lid) -> { + if (watchers.remove(lid, lmStream)) { + log.info("Closed ledger metadata watcher on ledger {} deletion.", lid); + lmStream.closeAsync(); + } + }); + LedgerMetadataConsumer oldConsumer = listeners.putIfAbsent(listener, lmConsumer); + if (null != oldConsumer) { + return; + } else { + lmStream.readAndWatch(lmConsumer) + .whenComplete((values, cause) -> { + if (null != cause && !(cause instanceof ClosedClientException)) { + // fail to register ledger metadata listener, re-attempt it + registerLedgerMetadataListener(ledgerId, listener); + } + }); + } + } + + @Override + public void unregisterLedgerMetadataListener(long ledgerId, LedgerMetadataListener listener) { + LedgerMetadataConsumer lmConsumer = listeners.remove(listener); + unregisterLedgerMetadataListener(ledgerId, lmConsumer); + } + + private void unregisterLedgerMetadataListener(long ledgerId, LedgerMetadataConsumer lmConsumer) { + ValueStream lmStream = watchers.get(ledgerId); + if (null == lmStream) { + return; + } else { + lmStream.unwatch(lmConsumer).thenAccept(noConsumers -> { + if (noConsumers) { + if (watchers.remove(ledgerId, lmStream)) { + log.info("Closed ledger metadata watcher on ledger {} since there are no listeners any more.", + ledgerId); + lmStream.closeAsync(); + } + } + }).exceptionally(cause -> { + if (cause instanceof ClosedClientException) { + // fail to unwatch a consumer + unregisterLedgerMetadataListener(ledgerId, lmConsumer); + } + return null; + }); + } + } + + @Override + public void asyncProcessLedgers(Processor processor, + VoidCallback finalCb, + Object context, + int successRc, + int failureRc) { + KeyStream ks = new KeyStream<>( + kvClient, + ByteSequence.from(EtcdUtils.getLedgerKey(scope, 0L), StandardCharsets.UTF_8), + ByteSequence.from(EtcdUtils.getLedgerKey(scope, Long.MAX_VALUE), StandardCharsets.UTF_8), + bs -> { + UUID uuid = EtcdUtils.parseLedgerKey(bs.toString(StandardCharsets.UTF_8)); + return uuid.getLeastSignificantBits(); + } + ); + processLedgers( + ks, processor, finalCb, context, successRc, failureRc); + } + + private void processLedgers(KeyStream ks, + Processor processor, + VoidCallback finalCb, + Object context, + int successRc, + int failureRc) { + ks.readNext().whenCompleteAsync((ledgers, cause) -> { + if (null != cause) { + finalCb.processResult(failureRc, null, context); + } else { + if (ledgers.isEmpty()) { + finalCb.processResult(successRc, null, context); + } else { + ledgers.forEach(l -> processor.process(l, finalCb)); + processLedgers(ks, processor, finalCb, context, successRc, failureRc); + } + } + }); + } + + @Override + public LedgerRangeIterator getLedgerRanges(long opTimeOutMs) { + KeyStream ks = new KeyStream<>( + kvClient, + ByteSequence.from(EtcdUtils.getLedgerKey(scope, 0L), StandardCharsets.UTF_8), + ByteSequence.from(EtcdUtils.getLedgerKey(scope, Long.MAX_VALUE), StandardCharsets.UTF_8), + bs -> { + UUID uuid = EtcdUtils.parseLedgerKey(bs.toString(StandardCharsets.UTF_8)); + return uuid.getLeastSignificantBits(); + } + ); + KeyIterator ki = new KeyIterator<>(ks); + return new LedgerRangeIterator() { + @Override + public boolean hasNext() throws IOException { + try { + return ki.hasNext(); + } catch (Exception e) { + if (e instanceof IOException) { + throw ((IOException) e); + } else { + throw new IOException(e); + } + } + } + + @Override + public LedgerRange next() throws IOException { + try { + final List values = ki.next(); + final Set ledgers = Sets.newTreeSet(); + ledgers.addAll(values); + return new LedgerRange(ledgers); + } catch (Exception e) { + if (e instanceof IOException) { + throw ((IOException) e); + } else { + throw new IOException(e); + } + } + } + }; + } + + @Override + public void close() { + synchronized (this) { + if (closed) { + return; + } + closed = true; + } + watchClient.close(); + } +} diff --git a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdLedgerManagerFactory.java b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdLedgerManagerFactory.java new file mode 100644 index 00000000000..11eef381c1f --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdLedgerManagerFactory.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.metadata.etcd; + +import static com.google.common.base.Preconditions.checkArgument; + +import io.etcd.jetcd.Client; +import java.io.IOException; +import org.apache.bookkeeper.common.net.ServiceURI; +import org.apache.bookkeeper.conf.AbstractConfiguration; +import org.apache.bookkeeper.meta.LayoutManager; +import org.apache.bookkeeper.meta.LedgerAuditorManager; +import org.apache.bookkeeper.meta.LedgerIdGenerator; +import org.apache.bookkeeper.meta.LedgerManager; +import org.apache.bookkeeper.meta.LedgerManagerFactory; +import org.apache.bookkeeper.meta.LedgerUnderreplicationManager; +import org.apache.bookkeeper.replication.ReplicationException; +import org.apache.bookkeeper.replication.ReplicationException.CompatibilityException; +import org.apache.commons.configuration.ConfigurationException; +import org.apache.zookeeper.KeeperException; + +/** + * Etcd based ledger manager factory. + */ +class EtcdLedgerManagerFactory implements LedgerManagerFactory { + + static final int VERSION = 0; + + private String scope; + private Client client; + + @Override + public int getCurrentVersion() { + return VERSION; + } + + @Override + public LedgerManagerFactory initialize(AbstractConfiguration conf, + LayoutManager layoutManager, + int factoryVersion) throws IOException { + checkArgument(layoutManager instanceof EtcdLayoutManager); + + EtcdLayoutManager etcdLayoutManager = (EtcdLayoutManager) layoutManager; + + if (VERSION != factoryVersion) { + throw new IOException("Incompatible layout version found : " + factoryVersion); + } + try { + ServiceURI uri = ServiceURI.create(conf.getMetadataServiceUri()); + this.scope = uri.getServicePath(); + } catch (ConfigurationException e) { + throw new IOException("Invalid metadata service uri", e); + } + this.client = etcdLayoutManager.getClient(); + return this; + } + + @Override + public void close() { + // since layout manager is passed from outside. + // we don't need to close it here + } + + @Override + public LedgerIdGenerator newLedgerIdGenerator() { + return new Etcd64bitIdGenerator(client.getKVClient(), scope); + } + + @Override + public LedgerManager newLedgerManager() { + return new EtcdLedgerManager(client, scope); + } + + @Override + public LedgerUnderreplicationManager newLedgerUnderreplicationManager() + throws ReplicationException.UnavailableException, InterruptedException, CompatibilityException { + throw new UnsupportedOperationException(); + } + + @Override + public LedgerAuditorManager newLedgerAuditorManager() throws IOException, InterruptedException { + throw new UnsupportedOperationException(); + } + + @Override + public void format(AbstractConfiguration conf, LayoutManager lm) + throws InterruptedException, KeeperException, IOException { + try { + EtcdRegistrationManager.format(client.getKVClient(), scope); + } catch (Exception e) { + if (e instanceof IOException) { + throw (IOException) e; + } else { + throw new IOException(e); + } + } + } + + @Override + public boolean validateAndNukeExistingCluster(AbstractConfiguration conf, LayoutManager lm) + throws InterruptedException, KeeperException, IOException { + try { + return EtcdRegistrationManager.nukeExistingCluster(client.getKVClient(), scope); + } catch (Exception e) { + if (e instanceof IOException) { + throw (IOException) e; + } else { + throw new IOException(e); + } + } + } +} diff --git a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdMetadataBookieDriver.java b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdMetadataBookieDriver.java new file mode 100644 index 00000000000..4cbb352fda0 --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdMetadataBookieDriver.java @@ -0,0 +1,89 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.metadata.etcd; + +import java.util.concurrent.TimeUnit; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.meta.MetadataBookieDriver; +import org.apache.bookkeeper.meta.MetadataDrivers; +import org.apache.bookkeeper.meta.exceptions.MetadataException; +import org.apache.bookkeeper.stats.StatsLogger; + +/** + * Etcd based metadata bookie driver. + */ +@Slf4j +public class EtcdMetadataBookieDriver extends EtcdMetadataDriverBase implements MetadataBookieDriver { + + // register myself + static { + MetadataDrivers.registerBookieDriver( + SCHEME, EtcdMetadataBookieDriver.class); + log.info("Registered etcd metadata bookie driver"); + } + + ServerConfiguration conf; + EtcdBookieRegister bkRegister; + RegistrationManager regMgr; + + @Override + public synchronized MetadataBookieDriver initialize(ServerConfiguration conf, + StatsLogger statsLogger) + throws MetadataException { + super.initialize(conf, statsLogger); + this.conf = conf; + this.statsLogger = statsLogger; + return null; + } + + @Override + public synchronized RegistrationManager createRegistrationManager() { + if (null == bkRegister) { + bkRegister = new EtcdBookieRegister( + client.getLeaseClient(), + TimeUnit.MILLISECONDS.toSeconds(conf.getZkTimeout()) + ).start(); + } + if (null == regMgr) { + regMgr = new EtcdRegistrationManager( + client, + keyPrefix, + bkRegister + ); + } + return regMgr; + } + + @Override + public void close() { + RegistrationManager rmToClose; + EtcdBookieRegister bkRegisterToClose; + synchronized (this) { + rmToClose = regMgr; + regMgr = null; + bkRegisterToClose = bkRegister; + bkRegister = null; + } + if (null != rmToClose) { + rmToClose.close(); + } + if (null != bkRegisterToClose) { + bkRegisterToClose.close(); + } + super.close(); + } +} diff --git a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdMetadataClientDriver.java b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdMetadataClientDriver.java new file mode 100644 index 00000000000..07d64186aa3 --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdMetadataClientDriver.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.metadata.etcd; + +import java.util.Optional; +import java.util.concurrent.ScheduledExecutorService; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.discover.RegistrationClient; +import org.apache.bookkeeper.meta.MetadataClientDriver; +import org.apache.bookkeeper.meta.MetadataDrivers; +import org.apache.bookkeeper.meta.exceptions.MetadataException; +import org.apache.bookkeeper.stats.StatsLogger; + +/** + * Etcd based metadata client driver. + */ +@Slf4j +public class EtcdMetadataClientDriver extends EtcdMetadataDriverBase implements MetadataClientDriver { + + // register myself to driver manager + static { + MetadataDrivers.registerClientDriver( + SCHEME, EtcdMetadataClientDriver.class); + log.info("Registered etcd metadata client driver."); + } + + ClientConfiguration conf; + ScheduledExecutorService scheduler; + RegistrationClient regClient; + + @Override + public MetadataClientDriver initialize(ClientConfiguration conf, + ScheduledExecutorService scheduler, + StatsLogger statsLogger, + Optional ctx) + throws MetadataException { + super.initialize(conf, statsLogger); + this.conf = conf; + this.statsLogger = statsLogger; + return this; + } + + @Override + public synchronized RegistrationClient getRegistrationClient() { + if (null == regClient) { + regClient = new EtcdRegistrationClient(keyPrefix, client); + } + return regClient; + } + + @Override + public synchronized void close() { + if (null != regClient) { + regClient.close(); + regClient = null; + } + super.close(); + } + + @Override + public void setSessionStateListener(SessionStateListener sessionStateListener) { + /* + * TODO: EtcdMetadataClientDriver has to implement this method. + */ + throw new UnsupportedOperationException(); + } +} diff --git a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdMetadataDriverBase.java b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdMetadataDriverBase.java new file mode 100644 index 00000000000..2938770de85 --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdMetadataDriverBase.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bookkeeper.metadata.etcd; + +import com.google.common.collect.Lists; +import io.etcd.jetcd.Client; +import java.io.IOException; +import java.util.List; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.common.net.ServiceURI; +import org.apache.bookkeeper.conf.AbstractConfiguration; +import org.apache.bookkeeper.meta.LayoutManager; +import org.apache.bookkeeper.meta.LedgerManagerFactory; +import org.apache.bookkeeper.meta.exceptions.Code; +import org.apache.bookkeeper.meta.exceptions.MetadataException; +import org.apache.bookkeeper.stats.StatsLogger; +import org.apache.commons.configuration.ConfigurationException; + +/** + * This is a mixin class for supporting etcd based metadata drivers. + */ +@Slf4j +class EtcdMetadataDriverBase implements AutoCloseable { + + static final String SCHEME = "etcd"; + + protected AbstractConfiguration conf; + protected StatsLogger statsLogger; + + // service uri + protected Client client; + protected String keyPrefix; + + // managers + protected LayoutManager layoutManager; + protected LedgerManagerFactory lmFactory; + + public String getScheme() { + return SCHEME; + } + + /** + * Initialize metadata driver with provided configuration and statsLogger. + * + * @param conf configuration to initialize metadata driver + * @param statsLogger stats logger + * @throws MetadataException + */ + protected void initialize(AbstractConfiguration conf, StatsLogger statsLogger) + throws MetadataException { + this.conf = conf; + this.statsLogger = statsLogger; + + final String metadataServiceUriStr; + try { + metadataServiceUriStr = conf.getMetadataServiceUri(); + } catch (ConfigurationException ce) { + log.error("Failed to retrieve metadata service uri from configuration", ce); + throw new MetadataException(Code.INVALID_METADATA_SERVICE_URI, ce); + } + ServiceURI serviceURI = ServiceURI.create(metadataServiceUriStr); + this.keyPrefix = serviceURI.getServicePath(); + + List etcdEndpoints = Lists.newArrayList(serviceURI.getServiceHosts()) + .stream() + .map(host -> String.format("http://%s", host)) + .collect(Collectors.toList()); + + log.info("Initializing etcd metadata driver : etcd endpoints = {}, key scope = {}", + etcdEndpoints, keyPrefix); + + synchronized (this) { + this.client = Client.builder() + .endpoints(etcdEndpoints.toArray(new String[etcdEndpoints.size()])) + .build(); + } + + this.layoutManager = new EtcdLayoutManager( + client, + keyPrefix + ); + } + + public LayoutManager getLayoutManager() { + return layoutManager; + } + + public synchronized LedgerManagerFactory getLedgerManagerFactory() + throws MetadataException { + if (null == lmFactory) { + try { + lmFactory = new EtcdLedgerManagerFactory(); + lmFactory.initialize(conf, layoutManager, EtcdLedgerManagerFactory.VERSION); + } catch (IOException ioe) { + throw new MetadataException( + Code.METADATA_SERVICE_ERROR, "Failed to initialize ledger manager factory", ioe); + } + } + return lmFactory; + } + + @Override + public synchronized void close() { + if (null != lmFactory) { + try { + lmFactory.close(); + } catch (IOException e) { + log.error("Failed to close ledger manager factory", e); + } + lmFactory = null; + } + if (null != client) { + client.close(); + client = null; + } + } +} diff --git a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdRegistrationClient.java b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdRegistrationClient.java new file mode 100644 index 00000000000..3dd7808d619 --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdRegistrationClient.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.metadata.etcd; + +import com.google.common.collect.Maps; +import io.etcd.jetcd.ByteSequence; +import io.etcd.jetcd.Client; +import java.nio.charset.StandardCharsets; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.function.Consumer; +import java.util.function.Function; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.discover.RegistrationClient; +import org.apache.bookkeeper.metadata.etcd.helpers.KeySetReader; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.versioning.Versioned; + +/** + * Etcd based registration client. + */ +@Slf4j +class EtcdRegistrationClient implements RegistrationClient { + + private static Function newBookieSocketAddressFunc(String prefix) { + return bs -> { + String addrStr = bs.toString(StandardCharsets.UTF_8); + return BookieId.parse(addrStr.replace(prefix, "")); + }; + } + + private final EtcdWatchClient watchClient; + private final KeySetReader writableBookiesReader; + private final KeySetReader readonlyBookiesReader; + private Map>>> writableListeners = + Maps.newHashMap(); + private Map>>> readonlyListeners = + Maps.newHashMap(); + + EtcdRegistrationClient(String scope, + Client client) { + this.watchClient = new EtcdWatchClient(client); + this.writableBookiesReader = new KeySetReader<>( + client, + watchClient, + newBookieSocketAddressFunc(EtcdUtils.getWritableBookiesBeginPath(scope)), + ByteSequence.from(EtcdUtils.getWritableBookiesBeginPath(scope), StandardCharsets.UTF_8), + ByteSequence.from(EtcdUtils.getWritableBookiesEndPath(scope), StandardCharsets.UTF_8) + ); + this.readonlyBookiesReader = new KeySetReader<>( + client, + watchClient, + newBookieSocketAddressFunc(EtcdUtils.getReadonlyBookiesBeginPath(scope)), + ByteSequence.from(EtcdUtils.getReadonlyBookiesBeginPath(scope), StandardCharsets.UTF_8), + ByteSequence.from(EtcdUtils.getReadonlyBookiesEndPath(scope), StandardCharsets.UTF_8) + ); + } + + + @Override + public void close() { + this.writableBookiesReader.close(); + this.readonlyBookiesReader.close(); + this.watchClient.close(); + } + + @Override + public CompletableFuture>> getWritableBookies() { + return writableBookiesReader.read(); + } + + @Override + public CompletableFuture>> getAllBookies() { + return FutureUtils.exception(new BKException.BKIllegalOpException()); + } + + @Override + public CompletableFuture>> getReadOnlyBookies() { + return readonlyBookiesReader.read(); + } + + private static CompletableFuture registerListener( + KeySetReader keySetReader, + Map>>> listeners, + RegistrationListener listener + ) { + Consumer>> consumer; + synchronized (listeners) { + consumer = listeners.get(listener); + if (null != consumer) { + // already registered + return FutureUtils.Void(); + } else { + consumer = bookies -> listener.onBookiesChanged(bookies); + listeners.put(listener, consumer); + } + } + return keySetReader + .readAndWatch(consumer) + .thenApply(ignored -> null); + } + + private static CompletableFuture unregisterListener( + KeySetReader keySetReader, + Map>>> listeners, + RegistrationListener listener + ) { + Consumer>> consumer = listeners.get(listener); + if (null == consumer) { + return FutureUtils.Void(); + } else { + return keySetReader.unwatch(consumer); + } + } + + @Override + public CompletableFuture watchWritableBookies(RegistrationListener listener) { + return registerListener( + writableBookiesReader, + writableListeners, + listener + ); + } + + @Override + public void unwatchWritableBookies(RegistrationListener listener) { + unregisterListener( + writableBookiesReader, + writableListeners, + listener + ); + } + + @Override + public CompletableFuture watchReadOnlyBookies(RegistrationListener listener) { + return registerListener( + readonlyBookiesReader, + readonlyListeners, + listener + ); + } + + @Override + public void unwatchReadOnlyBookies(RegistrationListener listener) { + unregisterListener( + readonlyBookiesReader, + readonlyListeners, + listener + ); + } + +} diff --git a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdRegistrationManager.java b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdRegistrationManager.java new file mode 100644 index 00000000000..50badb8f07f --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdRegistrationManager.java @@ -0,0 +1,574 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.metadata.etcd; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.getBookiesEndPath; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.getBookiesPath; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.getBucketsPath; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.getClusterInstanceIdPath; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.getCookiePath; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.getCookiesPath; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.getLayoutKey; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.getLedgersPath; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.getReadonlyBookiePath; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.getReadonlyBookiesPath; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.getScopeEndKey; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.getUnderreplicationPath; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.getWritableBookiePath; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.getWritableBookiesPath; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.msResult; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.util.concurrent.UncheckedExecutionException; +import io.etcd.jetcd.ByteSequence; +import io.etcd.jetcd.Client; +import io.etcd.jetcd.KV; +import io.etcd.jetcd.KeyValue; +import io.etcd.jetcd.Txn; +import io.etcd.jetcd.Watch; +import io.etcd.jetcd.Watch.Watcher; +import io.etcd.jetcd.kv.DeleteResponse; +import io.etcd.jetcd.kv.GetResponse; +import io.etcd.jetcd.kv.TxnResponse; +import io.etcd.jetcd.op.Cmp; +import io.etcd.jetcd.op.Cmp.Op; +import io.etcd.jetcd.op.CmpTarget; +import io.etcd.jetcd.options.DeleteOption; +import io.etcd.jetcd.options.GetOption; +import io.etcd.jetcd.options.PutOption; +import io.etcd.jetcd.options.WatchOption; +import io.etcd.jetcd.watch.WatchEvent; +import io.etcd.jetcd.watch.WatchEvent.EventType; +import java.util.UUID; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import lombok.AccessLevel; +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.BookieException.BookieIllegalOpException; +import org.apache.bookkeeper.bookie.BookieException.CookieNotFoundException; +import org.apache.bookkeeper.bookie.BookieException.MetadataStoreException; +import org.apache.bookkeeper.discover.BookieServiceInfo; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.meta.LedgerLayout; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.versioning.LongVersion; +import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; + +/** + * Etcd registration manager. + */ +@Slf4j +class EtcdRegistrationManager implements RegistrationManager { + + private final String scope; + @Getter(AccessLevel.PACKAGE) + private final Client client; + private final boolean ownClient; + private final KV kvClient; + @Getter(AccessLevel.PACKAGE) + private final EtcdBookieRegister bkRegister; + + @VisibleForTesting + EtcdRegistrationManager(Client client, + String scope) { + this(client, scope, 60); + } + + @VisibleForTesting + EtcdRegistrationManager(Client client, + String scope, + long ttlSeconds) { + this(client, scope, ttlSeconds, () -> {}); + } + + @VisibleForTesting + EtcdRegistrationManager(Client client, + String scope, + long ttlSeconds, + RegistrationListener listener) { + this( + client, + scope, + new EtcdBookieRegister( + client.getLeaseClient(), + ttlSeconds + ).addRegistrationListener(listener).start(), + true); + } + + EtcdRegistrationManager(Client client, + String scope, + EtcdBookieRegister bkRegister) { + this(client, scope, bkRegister, false); + } + + private EtcdRegistrationManager(Client client, + String scope, + EtcdBookieRegister bkRegister, + boolean ownClient) { + this.scope = scope; + this.client = client; + this.kvClient = client.getKVClient(); + this.bkRegister = bkRegister; + this.ownClient = ownClient; + } + + @Override + public void close() { + if (ownClient) { + log.info("Closing registration manager under scope '{}'", scope); + bkRegister.close(); + client.close(); + log.info("Successfully closed registration manager under scope '{}'", scope); + } + } + + @Override + public void registerBookie(BookieId bookieId, boolean readOnly, + BookieServiceInfo bookieServiceInfo) throws BookieException { + if (readOnly) { + doRegisterReadonlyBookie(bookieId, bkRegister.get()); + } else { + doRegisterBookie(getWritableBookiePath(scope, bookieId), bkRegister.get()); + } + } + + private boolean checkRegNodeAndWaitExpired(String regPath, long leaseId) + throws MetadataStoreException { + ByteSequence regPathBs = ByteSequence.from(regPath, UTF_8); + GetResponse getResp = msResult(kvClient.get(regPathBs)); + if (getResp.getCount() <= 0) { + // key doesn't exist anymore + return false; + } else { + return waitUntilRegNodeExpired(regPath, leaseId); + } + } + + private boolean waitUntilRegNodeExpired(String regPath, long leaseId) + throws MetadataStoreException { + ByteSequence regPathBs = ByteSequence.from(regPath, UTF_8); + // check regPath again + GetResponse getResp = msResult(kvClient.get(regPathBs)); + if (getResp.getCount() <= 0) { + // key disappears after watching it + return false; + } else { + KeyValue kv = getResp.getKvs().get(0); + if (kv.getLease() != leaseId) { + Watch watchClient = client.getWatchClient(); + final CompletableFuture watchFuture = new CompletableFuture<>(); + Watcher watcher = watchClient.watch( + regPathBs, + WatchOption.newBuilder() + .withRevision(getResp.getHeader().getRevision() + 1) + .build(), + response -> { + for (WatchEvent event : response.getEvents()) { + log.info("Received watch event on '{}' : EventType = {}, lease {}", + regPath, event.getEventType(), leaseId); + if (EventType.DELETE == event.getEventType()) { + watchFuture.complete(null); + return; + } + } + }, + exception -> { + log.warn("Exception in keepAlive for watch event on {}, lease {}", + regPath, leaseId, exception); + watchFuture.completeExceptionally(new UncheckedExecutionException( + "Interrupted at waiting previous registration under " + + regPath + " (lease = " + kv.getLease() + ") to be expired", exception)); + } + ); + log.info("Previous bookie registration (lease = {}) still exists at {}, " + + "so new lease '{}' will be waiting previous lease for {} seconds to be expired", + kv.getLease(), regPath, leaseId, bkRegister.getTtlSeconds()); + + try { + msResult(watchFuture, 2 * bkRegister.getTtlSeconds(), TimeUnit.SECONDS); + return false; + } catch (TimeoutException e) { + watchFuture.cancel(true); + throw new MetadataStoreException( + "Previous bookie registration still exists at " + + regPath + " (lease = " + kv.getLease() + ") after " + + (2 * bkRegister.getTtlSeconds()) + " seconds elapsed"); + } catch (UncheckedExecutionException uee) { + throw new MetadataStoreException(uee.getMessage(), uee.getCause()); + } finally { + watcher.close(); + } + } else { + // key exists with same lease + return true; + } + } + } + + private void doRegisterBookie(String regPath, long leaseId) throws MetadataStoreException { + if (checkRegNodeAndWaitExpired(regPath, leaseId)) { + // the bookie is already registered under `${regPath}` with `${leaseId}`. + return; + } + + ByteSequence regPathBs = ByteSequence.from(regPath, UTF_8); + Txn txn = kvClient.txn() + .If(new Cmp( + regPathBs, + Op.GREATER, + CmpTarget.createRevision(0))) + .Then(io.etcd.jetcd.op.Op.get(regPathBs, GetOption.DEFAULT)) + .Else(io.etcd.jetcd.op.Op.put( + regPathBs, + ByteSequence.from(new byte[0]), + PutOption.newBuilder() + .withLeaseId(bkRegister.get()) + .build() + )); + TxnResponse txnResp = msResult(txn.commit()); + if (txnResp.isSucceeded()) { + // the key already exists + GetResponse getResp = txnResp.getGetResponses().get(0); + if (getResp.getCount() <= 0) { + throw new MetadataStoreException( + "Failed to register bookie under '" + regPath + + "', but no bookie is registered there."); + } else { + KeyValue kv = getResp.getKvs().get(0); + throw new MetadataStoreException("Another bookie already registered under '" + + regPath + "': lease = " + kv.getLease()); + } + } else { + log.info("Successfully registered bookie at {}", regPath); + } + } + + private void doRegisterReadonlyBookie(BookieId bookieId, long leaseId) throws MetadataStoreException { + String readonlyRegPath = getReadonlyBookiePath(scope, bookieId); + doRegisterBookie(readonlyRegPath, leaseId); + String writableRegPath = getWritableBookiePath(scope, bookieId); + msResult(kvClient.delete(ByteSequence.from(writableRegPath, UTF_8))); + } + + @Override + public void unregisterBookie(BookieId bookieId, boolean readOnly) throws BookieException { + String regPath; + if (readOnly) { + regPath = getReadonlyBookiePath(scope, bookieId); + } else { + regPath = getWritableBookiePath(scope, bookieId); + } + DeleteResponse delResp = msResult(kvClient.delete(ByteSequence.from(regPath, UTF_8))); + if (delResp.getDeleted() > 0) { + log.info("Successfully unregistered bookie {} from {}", bookieId, regPath); + } else { + log.info("Bookie disappeared from {} before unregistering", regPath); + } + } + + @Override + public boolean isBookieRegistered(BookieId bookieId) throws BookieException { + CompletableFuture getWritableFuture = kvClient.get( + ByteSequence.from(getWritableBookiePath(scope, bookieId), UTF_8), + GetOption.newBuilder() + .withCountOnly(true) + .build()); + CompletableFuture getReadonlyFuture = kvClient.get( + ByteSequence.from(getReadonlyBookiePath(scope, bookieId), UTF_8), + GetOption.newBuilder() + .withCountOnly(true) + .build()); + + return msResult(getWritableFuture).getCount() > 0 + || msResult(getReadonlyFuture).getCount() > 0; + } + + @Override + public void writeCookie(BookieId bookieId, Versioned cookieData) throws BookieException { + ByteSequence cookiePath = ByteSequence.from(getCookiePath(scope, bookieId), UTF_8); + Txn txn = kvClient.txn(); + if (Version.NEW == cookieData.getVersion()) { + txn.If(new Cmp( + cookiePath, + Op.GREATER, + CmpTarget.createRevision(0L)) + ) + // if key not exists, create one. + .Else(io.etcd.jetcd.op.Op.put( + cookiePath, + ByteSequence.from(cookieData.getValue()), + PutOption.DEFAULT) + ); + } else { + if (!(cookieData.getVersion() instanceof LongVersion)) { + throw new BookieIllegalOpException("Invalid version type, expected it to be LongVersion"); + } + txn.If(new Cmp( + cookiePath, + Op.EQUAL, + CmpTarget.modRevision(((LongVersion) cookieData.getVersion()).getLongVersion())) + ) + .Then(io.etcd.jetcd.op.Op.put( + cookiePath, + ByteSequence.from(cookieData.getValue()), + PutOption.DEFAULT) + ); + } + TxnResponse response = msResult(txn.commit()); + if (response.isSucceeded() != (Version.NEW != cookieData.getVersion())) { + throw new MetadataStoreException( + "Conflict on writing cookie for bookie " + bookieId); + } + } + + @Override + public Versioned readCookie(BookieId bookieId) throws BookieException { + ByteSequence cookiePath = ByteSequence.from(getCookiePath(scope, bookieId), UTF_8); + GetResponse resp = msResult(kvClient.get(cookiePath)); + if (resp.getCount() <= 0) { + throw new CookieNotFoundException(bookieId.toString()); + } else { + KeyValue kv = resp.getKvs().get(0); + return new Versioned<>( + kv.getValue().getBytes(), + new LongVersion(kv.getModRevision())); + } + } + + @Override + public void removeCookie(BookieId bookieId, Version version) throws BookieException { + ByteSequence cookiePath = ByteSequence.from(getCookiePath(scope, bookieId), UTF_8); + Txn delTxn = kvClient.txn() + .If(new Cmp( + cookiePath, + Op.EQUAL, + CmpTarget.modRevision(((LongVersion) version).getLongVersion()) + )) + .Then(io.etcd.jetcd.op.Op.delete( + cookiePath, + DeleteOption.DEFAULT + )) + .Else(io.etcd.jetcd.op.Op.get( + cookiePath, + GetOption.newBuilder().withCountOnly(true).build() + )); + TxnResponse txnResp = msResult(delTxn.commit()); + if (!txnResp.isSucceeded()) { + GetResponse getResp = txnResp.getGetResponses().get(0); + if (getResp.getCount() > 0) { + throw new MetadataStoreException( + "Failed to remove cookie from " + cookiePath.toString(UTF_8) + + " for bookie " + bookieId + " : bad version '" + version + "'"); + } else { + throw new CookieNotFoundException(bookieId.toString()); + } + } else { + log.info("Removed cookie from {} for bookie {}", + cookiePath.toString(UTF_8), bookieId); + } + } + + @Override + public String getClusterInstanceId() throws BookieException { + GetResponse response = msResult( + kvClient.get(ByteSequence.from(getClusterInstanceIdPath(scope), UTF_8))); + if (response.getCount() <= 0) { + log.error("BookKeeper metadata doesn't exist in Etcd. " + + "Has the cluster been initialized? " + + "Try running bin/bookkeeper shell initNewCluster"); + throw new MetadataStoreException("BookKeeper is not initialized under '" + scope + "' yet"); + } else { + KeyValue kv = response.getKvs().get(0); + return new String(kv.getValue().getBytes(), UTF_8); + } + } + + @Override + public boolean prepareFormat() throws Exception { + ByteSequence rootScopeKey = ByteSequence.from(scope, UTF_8); + GetResponse resp = msResult(kvClient.get(rootScopeKey)); + return resp.getCount() > 0; + } + + @Override + public boolean initNewCluster() throws Exception { + return initNewCluster(kvClient, scope); + } + + static boolean initNewCluster(KV kvClient, String scope) throws Exception { + ByteSequence rootScopeKey = ByteSequence.from(scope, UTF_8); + String instanceId = UUID.randomUUID().toString(); + LedgerLayout layout = new LedgerLayout( + EtcdLedgerManagerFactory.class.getName(), + EtcdLedgerManagerFactory.VERSION + ); + Txn initTxn = kvClient.txn() + .If(new Cmp( + rootScopeKey, + Op.GREATER, + CmpTarget.createRevision(0L) + )) + // only put keys when root scope doesn't exist + .Else( + // `${scope}` + io.etcd.jetcd.op.Op.put( + rootScopeKey, + EtcdConstants.EMPTY_BS, + PutOption.DEFAULT + ), + // `${scope}/layout` + io.etcd.jetcd.op.Op.put( + ByteSequence.from(getLayoutKey(scope), UTF_8), + ByteSequence.from(layout.serialize()), + PutOption.DEFAULT + ), + // `${scope}/instanceid` + io.etcd.jetcd.op.Op.put( + ByteSequence.from(getClusterInstanceIdPath(scope), UTF_8), + ByteSequence.from(instanceId, UTF_8), + PutOption.DEFAULT + ), + // `${scope}/cookies` + io.etcd.jetcd.op.Op.put( + ByteSequence.from(getCookiesPath(scope), UTF_8), + EtcdConstants.EMPTY_BS, + PutOption.DEFAULT + ), + // `${scope}/bookies` + io.etcd.jetcd.op.Op.put( + ByteSequence.from(getBookiesPath(scope), UTF_8), + EtcdConstants.EMPTY_BS, + PutOption.DEFAULT + ), + // `${scope}/bookies/writable` + io.etcd.jetcd.op.Op.put( + ByteSequence.from(getWritableBookiesPath(scope), UTF_8), + EtcdConstants.EMPTY_BS, + PutOption.DEFAULT + ), + // `${scope}/bookies/readonly` + io.etcd.jetcd.op.Op.put( + ByteSequence.from(getReadonlyBookiesPath(scope), UTF_8), + EtcdConstants.EMPTY_BS, + PutOption.DEFAULT + ), + // `${scope}/ledgers` + io.etcd.jetcd.op.Op.put( + ByteSequence.from(getLedgersPath(scope), UTF_8), + EtcdConstants.EMPTY_BS, + PutOption.DEFAULT + ), + // `${scope}/buckets` + io.etcd.jetcd.op.Op.put( + ByteSequence.from(getBucketsPath(scope), UTF_8), + EtcdConstants.EMPTY_BS, + PutOption.DEFAULT + ), + // `${scope}/underreplication` + io.etcd.jetcd.op.Op.put( + ByteSequence.from(getUnderreplicationPath(scope), UTF_8), + EtcdConstants.EMPTY_BS, + PutOption.DEFAULT + ) + ); + + return !msResult(initTxn.commit()).isSucceeded(); + } + + @Override + public boolean format() throws Exception { + return format(kvClient, scope); + } + + static boolean format(KV kvClient, String scope) throws Exception { + ByteSequence rootScopeKey = ByteSequence.from(scope, UTF_8); + GetResponse resp = msResult(kvClient.get(rootScopeKey)); + if (resp.getCount() <= 0) { + // cluster doesn't exist + return initNewCluster(kvClient, scope); + } else if (nukeExistingCluster(kvClient, scope)) { // cluster exists and has successfully nuked it + return initNewCluster(kvClient, scope); + } else { + return false; + } + } + + @Override + public boolean nukeExistingCluster() throws Exception { + return nukeExistingCluster(kvClient, scope); + } + + @Override + public void addRegistrationListener(RegistrationListener listener) { + bkRegister.addRegistrationListener(listener); + } + + static boolean nukeExistingCluster(KV kvClient, String scope) throws Exception { + ByteSequence rootScopeKey = ByteSequence.from(scope, UTF_8); + GetResponse resp = msResult(kvClient.get(rootScopeKey)); + if (resp.getCount() <= 0) { + log.info("There is no existing cluster with under scope '{}' in Etcd, " + + "so exiting nuke operation", scope); + return true; + } + + String bookiesPath = getBookiesPath(scope); + String bookiesEndPath = getBookiesEndPath(scope); + resp = msResult(kvClient.get( + ByteSequence.from(bookiesPath, UTF_8), + GetOption.newBuilder() + .withRange(ByteSequence.from(bookiesEndPath, UTF_8)) + .withKeysOnly(true) + .build() + )); + String writableBookiesPath = getWritableBookiesPath(scope); + String readonlyBookiesPath = getReadonlyBookiesPath(scope); + boolean hasBookiesAlive = false; + for (KeyValue kv : resp.getKvs()) { + String keyStr = new String(kv.getKey().getBytes(), UTF_8); + if (keyStr.equals(bookiesPath) + || keyStr.equals(writableBookiesPath) + || keyStr.equals(readonlyBookiesPath)) { + continue; + } else { + hasBookiesAlive = true; + break; + } + } + if (hasBookiesAlive) { + log.error("Bookies are still up and connected to this cluster, " + + "stop all bookies before nuking the cluster"); + return false; + } + DeleteResponse delResp = msResult(kvClient.delete( + rootScopeKey, + DeleteOption.newBuilder() + .withRange(ByteSequence.from(getScopeEndKey(scope), UTF_8)) + .build())); + log.info("Successfully nuked cluster under scope '{}' : {} kv pairs deleted", + scope, delResp.getDeleted()); + return true; + } +} diff --git a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdUtils.java b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdUtils.java new file mode 100644 index 00000000000..431034128af --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdUtils.java @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.metadata.etcd; + +import static org.apache.bookkeeper.metadata.etcd.EtcdConstants.BUCKETS_NODE; +import static org.apache.bookkeeper.metadata.etcd.EtcdConstants.COOKIES_NODE; +import static org.apache.bookkeeper.metadata.etcd.EtcdConstants.END_SEP; +import static org.apache.bookkeeper.metadata.etcd.EtcdConstants.INSTANCEID_NODE; +import static org.apache.bookkeeper.metadata.etcd.EtcdConstants.LAYOUT_NODE; +import static org.apache.bookkeeper.metadata.etcd.EtcdConstants.LEDGERS_NODE; +import static org.apache.bookkeeper.metadata.etcd.EtcdConstants.MEMBERS_NODE; +import static org.apache.bookkeeper.metadata.etcd.EtcdConstants.READONLY_NODE; +import static org.apache.bookkeeper.metadata.etcd.EtcdConstants.UR_NODE; +import static org.apache.bookkeeper.metadata.etcd.EtcdConstants.WRITEABLE_NODE; + +import java.io.IOException; +import java.util.UUID; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import lombok.AccessLevel; +import lombok.NoArgsConstructor; +import org.apache.bookkeeper.bookie.BookieException.MetadataStoreException; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.net.BookieId; +import org.apache.commons.lang3.StringUtils; + +/** + * Utils for etcd based metadata store. + */ +@NoArgsConstructor(access = AccessLevel.PRIVATE) +final class EtcdUtils { + + static String getScopeEndKey(String scope) { + return String.format("%s%s", scope, END_SEP); + } + + static String getBucketsPath(String scope) { + return String.format("%s/%s", scope, BUCKETS_NODE); + } + + static String getBucketPath(String scope, int bucket) { + return String.format("%s/%s/%03d", + scope, + BUCKETS_NODE, + bucket); + } + + static String getLayoutKey(String scope) { + return String.format("%s/%s", scope, LAYOUT_NODE); + } + + static String getLedgersPath(String scope) { + return String.format("%s/%s", scope, LEDGERS_NODE); + } + + static String getLedgerKey(String scope, long ledgerId) { + return getLedgerKey(scope, 0, ledgerId); + } + + static String getLedgerKey(String scope, long scopeId, long ledgerId) { + UUID uuid = new UUID(scopeId, ledgerId); + return String.format("%s/ledgers/%s", scope, uuid); + } + + static UUID parseLedgerKey(String ledgerKey) { + String[] keyParts = StringUtils.split(ledgerKey, '/'); + return UUID.fromString(keyParts[keyParts.length - 1]); + } + + static String getBookiesPath(String scope) { + return String.format("%s/%s", scope, MEMBERS_NODE); + } + + static String getBookiesEndPath(String scope) { + return String.format("%s/%s%s", scope, MEMBERS_NODE, END_SEP); + } + + static String getWritableBookiesPath(String scope) { + return String.format("%s/%s/%s", scope, MEMBERS_NODE, WRITEABLE_NODE); + } + + static String getWritableBookiesBeginPath(String scope) { + return String.format("%s/%s/%s/", scope, MEMBERS_NODE, WRITEABLE_NODE); + } + + static String getWritableBookiesEndPath(String scope) { + return String.format("%s/%s/%s%s", scope, MEMBERS_NODE, WRITEABLE_NODE, END_SEP); + } + + static String getWritableBookiePath(String scope, BookieId bookieId) { + return String.format("%s/%s/%s/%s", + scope, MEMBERS_NODE, WRITEABLE_NODE, bookieId.toString()); + } + + static String getReadonlyBookiesPath(String scope) { + return String.format("%s/%s/%s", scope, MEMBERS_NODE, READONLY_NODE); + } + + static String getReadonlyBookiesBeginPath(String scope) { + return String.format("%s/%s/%s/", scope, MEMBERS_NODE, READONLY_NODE); + } + + static String getReadonlyBookiesEndPath(String scope) { + return String.format("%s/%s/%s%s", scope, MEMBERS_NODE, READONLY_NODE, END_SEP); + } + + static String getReadonlyBookiePath(String scope, BookieId bookieId) { + return String.format("%s/%s/%s/%s", + scope, MEMBERS_NODE, READONLY_NODE, bookieId.toString()); + } + + static String getCookiesPath(String scope) { + return String.format("%s/%s", scope, COOKIES_NODE); + } + + static String getCookiePath(String scope, BookieId bookieId) { + return String.format("%s/%s/%s", scope, COOKIES_NODE, bookieId.toString()); + } + + static String getClusterInstanceIdPath(String scope) { + return String.format("%s/%s", scope, INSTANCEID_NODE); + } + + static String getUnderreplicationPath(String scope) { + return String.format("%s/%s", scope, UR_NODE); + } + + static T ioResult(CompletableFuture future) throws IOException { + return FutureUtils.result(future, cause -> { + if (cause instanceof IOException) { + return (IOException) cause; + } else { + return new IOException(cause); + } + }); + } + + static T msResult(CompletableFuture future) throws MetadataStoreException { + return FutureUtils.result(future, cause -> { + if (cause instanceof MetadataStoreException) { + return (MetadataStoreException) cause; + } else { + return new MetadataStoreException(cause); + } + }); + } + + static T msResult(CompletableFuture future, + long timeout, + TimeUnit timeUnit) + throws MetadataStoreException, TimeoutException { + return FutureUtils.result(future, cause -> { + if (cause instanceof MetadataStoreException) { + return (MetadataStoreException) cause; + } else { + return new MetadataStoreException(cause); + } + }, timeout, timeUnit); + } + + public static long toLong(byte[] memory, int index) { + return ((long) memory[index] & 0xff) << 56 + | ((long) memory[index + 1] & 0xff) << 48 + | ((long) memory[index + 2] & 0xff) << 40 + | ((long) memory[index + 3] & 0xff) << 32 + | ((long) memory[index + 4] & 0xff) << 24 + | ((long) memory[index + 5] & 0xff) << 16 + | ((long) memory[index + 6] & 0xff) << 8 + | (long) memory[index + 7] & 0xff; + } + + /** + * Convert a long number to a bytes array. + * + * @param value the long number + * @return the bytes array + */ + public static byte[] toBytes(long value) { + byte[] memory = new byte[8]; + toBytes(value, memory, 0); + return memory; + } + + public static void toBytes(long value, byte[] memory, int index) { + memory[index] = (byte) (value >>> 56); + memory[index + 1] = (byte) (value >>> 48); + memory[index + 2] = (byte) (value >>> 40); + memory[index + 3] = (byte) (value >>> 32); + memory[index + 4] = (byte) (value >>> 24); + memory[index + 5] = (byte) (value >>> 16); + memory[index + 6] = (byte) (value >>> 8); + memory[index + 7] = (byte) value; + } + +} diff --git a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdWatchClient.java b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdWatchClient.java new file mode 100644 index 00000000000..6a046ba4e2f --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdWatchClient.java @@ -0,0 +1,400 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.metadata.etcd; + +import static io.etcd.jetcd.common.exception.EtcdExceptionFactory.newClosedWatchClientException; +import static io.etcd.jetcd.common.exception.EtcdExceptionFactory.newEtcdException; +import static io.etcd.jetcd.common.exception.EtcdExceptionFactory.toEtcdException; + +import com.google.common.base.Strings; +import com.google.protobuf.ByteString; +import com.google.protobuf.UnsafeByteOperations; +import io.etcd.jetcd.ByteSequence; +import io.etcd.jetcd.Client; +import io.etcd.jetcd.api.WatchCancelRequest; +import io.etcd.jetcd.api.WatchCreateRequest; +import io.etcd.jetcd.api.WatchGrpc; +import io.etcd.jetcd.api.WatchRequest; +import io.etcd.jetcd.api.WatchResponse; +import io.etcd.jetcd.common.exception.ErrorCode; +import io.etcd.jetcd.common.exception.EtcdException; +import io.etcd.jetcd.common.exception.EtcdExceptionFactory; +import io.etcd.jetcd.impl.EtcdConnectionManager; +import io.etcd.jetcd.options.WatchOption; +import io.etcd.jetcd.watch.WatchResponseWithError; +import io.grpc.Status; +import io.grpc.Status.Code; +import io.grpc.stub.StreamObserver; +import java.util.LinkedList; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.function.BiConsumer; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.common.util.OrderedScheduler; +import org.apache.bookkeeper.util.collections.ConcurrentLongHashMap; +import org.apache.bookkeeper.util.collections.ConcurrentLongHashSet; + +/** + * An async watch implementation. + */ +@Slf4j +public class EtcdWatchClient implements AutoCloseable { + + private final EtcdConnectionManager connMgr; + private final WatchGrpc.WatchStub stub; + private volatile StreamObserver grpcWatchStreamObserver; + // watchers stores a mapping between watchID -> EtcdWatcher. + private final ConcurrentLongHashMap watchers = + ConcurrentLongHashMap.newBuilder().build(); + private final LinkedList pendingWatchers = new LinkedList<>(); + private final ConcurrentLongHashSet cancelSet = ConcurrentLongHashSet.newBuilder().build(); + + // scheduler + private final OrderedScheduler scheduler; + private final ScheduledExecutorService watchExecutor; + + // close state + private CompletableFuture closeFuture = null; + + public EtcdWatchClient(Client client) { + this.connMgr = new EtcdConnectionManager(client); + this.stub = connMgr.newWatchStub(); + this.scheduler = OrderedScheduler.newSchedulerBuilder() + .name("etcd-watcher-scheduler") + .numThreads(Runtime.getRuntime().availableProcessors()) + .build(); + this.watchExecutor = this.scheduler.chooseThread(); + } + + public synchronized boolean isClosed() { + return closeFuture != null; + } + + public CompletableFuture watch(ByteSequence key, + BiConsumer consumer) { + return watch(key, WatchOption.DEFAULT, consumer); + } + + public CompletableFuture watch(ByteSequence key, + WatchOption watchOption, + BiConsumer consumer) { + return CompletableFuture.supplyAsync(() -> { + if (isClosed()) { + throw EtcdExceptionFactory.newClosedWatchClientException(); + } + + EtcdWatcher watcher = new EtcdWatcher(key, watchOption, scheduler.chooseThread(), this); + watcher.addConsumer(consumer); + pendingWatchers.add(watcher); + if (pendingWatchers.size() == 1) { + WatchRequest request = toWatchCreateRequest(watcher); + getGrpcWatchStreamObserver().onNext(request); + } + return watcher; + }, watchExecutor); + } + + // notifies all watchers about a exception. it doesn't close watchers. + // it is the responsibility of user to close watchers. + private void notifyWatchers(EtcdException e) { + WatchResponseWithError wre = new WatchResponseWithError(e); + this.pendingWatchers.forEach(watcher -> watcher.notifyWatchResponse(wre)); + this.pendingWatchers.clear(); + this.watchers.values().forEach(watcher -> watcher.notifyWatchResponse(wre)); + this.watchers.clear(); + } + + public CompletableFuture unwatch(EtcdWatcher watcher) { + return CompletableFuture.runAsync(() -> cancelWatcher(watcher.getWatchID()), watchExecutor); + } + + private void cancelWatcher(long watchID) { + if (isClosed()) { + return; + } + + if (cancelSet.contains(watchID)) { + return; + } + + watchers.remove(watchID); + cancelSet.add(watchID); + + WatchCancelRequest watchCancelRequest = WatchCancelRequest.newBuilder() + .setWatchId(watchID) + .build(); + WatchRequest cancelRequest = WatchRequest.newBuilder() + .setCancelRequest(watchCancelRequest) + .build(); + getGrpcWatchStreamObserver().onNext(cancelRequest); + } + + public CompletableFuture closeAsync() { + CompletableFuture future; + synchronized (this) { + if (null == closeFuture) { + log.info("Closing watch client"); + closeFuture = CompletableFuture.runAsync(() -> { + notifyWatchers(newClosedWatchClientException()); + closeGrpcWatchStreamObserver(); + }, watchExecutor); + } + future = closeFuture; + } + return future.whenComplete((ignored, cause) -> { + this.scheduler.shutdown(); + }); + } + + @Override + public void close() { + try { + FutureUtils.result(closeAsync()); + } catch (Exception e) { + log.warn("Encountered exceptions on closing watch client", e); + } + this.scheduler.forceShutdown(10, TimeUnit.SECONDS); + } + + private StreamObserver createWatchStreamObserver() { + return new StreamObserver() { + @Override + public void onNext(WatchResponse watchResponse) { + if (isClosed()) { + return; + } + watchExecutor.submit(() -> processWatchResponse(watchResponse)); + } + + @Override + public void onError(Throwable t) { + if (isClosed()) { + return; + } + watchExecutor.submit(() -> processError(t)); + } + + @Override + public void onCompleted() { + } + }; + } + + private void processWatchResponse(WatchResponse watchResponse) { + // prevents grpc on sending watchResponse to a closed watch client. + if (isClosed()) { + return; + } + + if (watchResponse.getCreated()) { + processCreate(watchResponse); + } else if (watchResponse.getCanceled()) { + processCanceled(watchResponse); + } else { + processEvents(watchResponse); + } + } + + private void processError(Throwable t) { + // prevents grpc on sending error to a closed watch client. + if (this.isClosed()) { + return; + } + + Status status = Status.fromThrowable(t); + if (this.isHaltError(status) || this.isNoLeaderError(status)) { + this.notifyWatchers(toEtcdException(status)); + this.closeGrpcWatchStreamObserver(); + this.cancelSet.clear(); + return; + } + // resume with a delay; avoiding immediate retry on a long connection downtime. + scheduler.schedule(this::resume, 500, TimeUnit.MILLISECONDS); + } + + private void resume() { + this.closeGrpcWatchStreamObserver(); + this.cancelSet.clear(); + this.resumeWatchers(); + } + + private synchronized StreamObserver getGrpcWatchStreamObserver() { + if (this.grpcWatchStreamObserver == null) { + this.grpcWatchStreamObserver = this.stub.watch(this.createWatchStreamObserver()); + } + return this.grpcWatchStreamObserver; + } + + // closeGrpcWatchStreamObserver closes the underlying grpc watch stream. + private void closeGrpcWatchStreamObserver() { + if (this.grpcWatchStreamObserver == null) { + return; + } + this.grpcWatchStreamObserver.onCompleted(); + this.grpcWatchStreamObserver = null; + } + + private void processCreate(WatchResponse response) { + EtcdWatcher watcher = this.pendingWatchers.poll(); + + this.sendNextWatchCreateRequest(); + + if (watcher == null) { + // shouldn't happen + // may happen due to duplicate watch create responses. + log.warn("Watch client receives watch create response but find no corresponding watcher"); + return; + } + + if (watcher.isClosed()) { + return; + } + + if (response.getWatchId() == -1) { + watcher.notifyWatchResponse(new WatchResponseWithError( + newEtcdException(ErrorCode.INTERNAL, "etcd server failed to create watch id"))); + return; + } + + if (watcher.getRevision() == 0) { + watcher.setRevision(response.getHeader().getRevision()); + } + + watcher.setWatchID(response.getWatchId()); + this.watchers.put(watcher.getWatchID(), watcher); + } + + /** + * chooses the next resuming watcher to register with the grpc stream. + */ + private Optional nextResume() { + EtcdWatcher pendingWatcher = this.pendingWatchers.peek(); + if (pendingWatcher != null) { + return Optional.of(this.toWatchCreateRequest(pendingWatcher)); + } + return Optional.empty(); + } + + private void sendNextWatchCreateRequest() { + this.nextResume().ifPresent( + (nextWatchRequest -> this.getGrpcWatchStreamObserver().onNext(nextWatchRequest))); + } + + private void processEvents(WatchResponse response) { + EtcdWatcher watcher = this.watchers.get(response.getWatchId()); + if (watcher == null) { + // cancel server side watcher. + this.cancelWatcher(response.getWatchId()); + return; + } + + if (response.getCompactRevision() != 0) { + watcher.notifyWatchResponse(new WatchResponseWithError( + EtcdExceptionFactory + .newCompactedException(response.getCompactRevision()))); + return; + } + + if (response.getEventsCount() == 0) { + watcher.setRevision(response.getHeader().getRevision()); + return; + } + + watcher.notifyWatchResponse(new WatchResponseWithError(new io.etcd.jetcd.watch.WatchResponse(response))); + watcher.setRevision( + response + .getEvents(response.getEventsCount() - 1) + .getKv().getModRevision() + 1); + } + + private void resumeWatchers() { + this.watchers.values().forEach(watcher -> { + if (watcher.isClosed()) { + return; + } + watcher.setWatchID(-1); + this.pendingWatchers.add(watcher); + }); + + this.watchers.clear(); + + this.sendNextWatchCreateRequest(); + } + + private void processCanceled(WatchResponse response) { + EtcdWatcher watcher = this.watchers.get(response.getWatchId()); + this.cancelSet.remove(response.getWatchId()); + if (watcher == null) { + return; + } + String reason = response.getCancelReason(); + if (Strings.isNullOrEmpty(reason)) { + watcher.notifyWatchResponse(new WatchResponseWithError(newEtcdException( + ErrorCode.OUT_OF_RANGE, + "etcdserver: mvcc: required revision is a future revision")) + ); + + } else { + watcher.notifyWatchResponse( + new WatchResponseWithError(newEtcdException(ErrorCode.FAILED_PRECONDITION, reason))); + } + } + + private static boolean isNoLeaderError(Status status) { + return status.getCode() == Code.UNAVAILABLE + && "etcdserver: no leader".equals(status.getDescription()); + } + + private static boolean isHaltError(Status status) { + // Unavailable codes mean the system will be right back. + // (e.g., can't connect, lost leader) + // Treat Internal codes as if something failed, leaving the + // system in an inconsistent state, but retrying could make progress. + // (e.g., failed in middle of send, corrupted frame) + return status.getCode() != Code.UNAVAILABLE && status.getCode() != Code.INTERNAL; + } + + private static WatchRequest toWatchCreateRequest(EtcdWatcher watcher) { + ByteString key = UnsafeByteOperations.unsafeWrap(watcher.getKey().getBytes()); + WatchOption option = watcher.getWatchOption(); + WatchCreateRequest.Builder builder = WatchCreateRequest.newBuilder() + .setKey(key) + .setPrevKv(option.isPrevKV()) + .setProgressNotify(option.isProgressNotify()) + .setStartRevision(watcher.getRevision()); + + option.getEndKey() + .ifPresent(endKey -> builder.setRangeEnd(UnsafeByteOperations.unsafeWrap(endKey.getBytes()))); + + if (option.isNoDelete()) { + builder.addFilters(WatchCreateRequest.FilterType.NODELETE); + } + + if (option.isNoPut()) { + builder.addFilters(WatchCreateRequest.FilterType.NOPUT); + } + + return WatchRequest.newBuilder().setCreateRequest(builder).build(); + } + + +} diff --git a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdWatcher.java b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdWatcher.java new file mode 100644 index 00000000000..203bfb14253 --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdWatcher.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.metadata.etcd; + +import io.etcd.jetcd.ByteSequence; +import io.etcd.jetcd.options.WatchOption; +import io.etcd.jetcd.watch.WatchResponse; +import io.etcd.jetcd.watch.WatchResponseWithError; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CopyOnWriteArraySet; +import java.util.concurrent.ScheduledExecutorService; +import java.util.function.BiConsumer; +import lombok.Getter; +import lombok.Setter; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.common.concurrent.FutureUtils; + +/** + * Watcher class holds watcher information. + */ +@Slf4j +public class EtcdWatcher implements AutoCloseable { + + private final ScheduledExecutorService executor; + @Getter + private final WatchOption watchOption; + @Getter + private final ByteSequence key; + // watch listener + private final CopyOnWriteArraySet> consumers; + @Getter + @Setter + private long watchID; + // the revision to watch on. + @Getter + @Setter + private long revision; + private boolean closed = false; + // the client owns this watcher + private final EtcdWatchClient owner; + + EtcdWatcher(ByteSequence key, + WatchOption watchOption, + ScheduledExecutorService executor, + EtcdWatchClient owner) { + this.key = key; + this.watchOption = watchOption; + this.executor = executor; + this.owner = owner; + this.consumers = new CopyOnWriteArraySet<>(); + } + + public void addConsumer(BiConsumer consumer) { + this.consumers.add(consumer); + } + + synchronized boolean isClosed() { + return closed; + } + + void notifyWatchResponse(WatchResponseWithError watchResponse) { + synchronized (this) { + if (closed) { + return; + } + } + + this.executor.submit(() -> consumers.forEach(c -> { + if (watchResponse.getException() != null) { + c.accept(null, watchResponse.getException()); + } else { + c.accept( + watchResponse.getWatchResponse(), + null); + } + })); + } + + public CompletableFuture closeAsync() { + return owner.unwatch(this); + } + + @Override + public void close() { + synchronized (this) { + if (closed) { + return; + } + closed = true; + } + try { + FutureUtils.result(closeAsync()); + } catch (Exception e) { + log.warn("Encountered error on removing watcher '{}' from watch client : {}", + watchID, e.getMessage()); + } + consumers.clear(); + } +} diff --git a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/LedgerMetadataConsumer.java b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/LedgerMetadataConsumer.java new file mode 100644 index 00000000000..292d9736372 --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/LedgerMetadataConsumer.java @@ -0,0 +1,70 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.metadata.etcd; + +import java.util.Objects; +import java.util.function.Consumer; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.LedgerMetadataListener; +import org.apache.bookkeeper.versioning.Versioned; + +/** + * A consumer wrapper over ledger metadata listener. + */ +class LedgerMetadataConsumer implements Consumer> { + + private final long ledgerId; + private final LedgerMetadataListener listener; + private final Consumer onDeletedConsumer; + + LedgerMetadataConsumer(long ledgerId, + LedgerMetadataListener listener, + Consumer onDeletedConsumer) { + this.ledgerId = ledgerId; + this.listener = listener; + this.onDeletedConsumer = onDeletedConsumer; + } + + @Override + public int hashCode() { + return listener.hashCode(); + } + + @Override + public String toString() { + return listener.toString(); + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof LedgerMetadataConsumer)) { + return false; + } + + LedgerMetadataConsumer another = (LedgerMetadataConsumer) obj; + return ledgerId == another.ledgerId + && Objects.equals(listener, another.listener) + && Objects.equals(onDeletedConsumer, another.onDeletedConsumer); + } + + @Override + public void accept(Versioned ledgerMetadataVersioned) { + if (null == ledgerMetadataVersioned.getValue()) { + onDeletedConsumer.accept(ledgerId); + } else { + listener.onChanged(ledgerId, ledgerMetadataVersioned); + } + } +} diff --git a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/helpers/KeyIterator.java b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/helpers/KeyIterator.java new file mode 100644 index 00000000000..7bf628412a4 --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/helpers/KeyIterator.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.metadata.etcd.helpers; + +import static org.apache.bookkeeper.common.concurrent.FutureUtils.result; + +import java.util.List; +import java.util.NoSuchElementException; +import java.util.concurrent.CompletableFuture; + +/** + * Iterator over a range of key/value pairs. + */ +public class KeyIterator { + + private final KeyStream stream; + private CompletableFuture> readFuture = null; + private boolean hasNext = true; + private List keys = null; + + public KeyIterator(KeyStream stream) { + this.stream = stream; + } + + public synchronized boolean hasNext() throws Exception { + if (hasNext) { + if (null == readFuture) { + readFuture = stream.readNext(); + } + keys = result(readFuture); + if (keys.isEmpty()) { + hasNext = false; + } + return hasNext; + } else { + return false; + } + } + + public synchronized List next() throws Exception { + try { + if (!hasNext()) { + throw new NoSuchElementException("Reach end of key stream"); + } + return keys; + } finally { + // read next + readFuture = stream.readNext(); + } + } + +} diff --git a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/helpers/KeySetReader.java b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/helpers/KeySetReader.java new file mode 100644 index 00000000000..5f8afdb6e40 --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/helpers/KeySetReader.java @@ -0,0 +1,282 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.metadata.etcd.helpers; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Sets; +import io.etcd.jetcd.ByteSequence; +import io.etcd.jetcd.Client; +import io.etcd.jetcd.KeyValue; +import io.etcd.jetcd.kv.GetResponse; +import io.etcd.jetcd.options.GetOption; +import io.etcd.jetcd.options.WatchOption; +import io.etcd.jetcd.watch.WatchResponse; +import java.util.Collections; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CopyOnWriteArraySet; +import java.util.function.BiConsumer; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.metadata.etcd.EtcdWatchClient; +import org.apache.bookkeeper.metadata.etcd.EtcdWatcher; +import org.apache.bookkeeper.versioning.LongVersion; +import org.apache.bookkeeper.versioning.Versioned; + +/** + * A helper class to read a set of keys and watch them. + */ +@Slf4j +public class KeySetReader implements BiConsumer, AutoCloseable { + + private final Client client; + private final boolean ownWatchClient; + private final EtcdWatchClient watchClient; + private final Function encoder; + private final ByteSequence beginKey; + private final ByteSequence endKey; + private final Set keys; + private final CopyOnWriteArraySet>>> consumers = + new CopyOnWriteArraySet<>(); + private volatile long revision = -1L; + private CompletableFuture watchFuture = null; + private CompletableFuture closeFuture = null; + + public KeySetReader(Client client, + Function encoder, + ByteSequence beginKey, + ByteSequence endKey) { + this(client, new EtcdWatchClient(client), encoder, beginKey, endKey); + } + + public KeySetReader(Client client, + EtcdWatchClient watchClient, + Function encoder, + ByteSequence beginKey, + ByteSequence endKey) { + this.client = client; + this.watchClient = watchClient; + this.ownWatchClient = false; + this.encoder = encoder; + this.beginKey = beginKey; + this.endKey = endKey; + this.keys = Collections.synchronizedSet(Sets.newHashSet()); + } + + public CompletableFuture>> read() { + GetOption.Builder optionBuilder = GetOption.newBuilder() + .withKeysOnly(true); + if (null != endKey) { + optionBuilder.withRange(endKey); + } + return client.getKVClient().get( + beginKey, + optionBuilder.build() + ).thenApply(getResp -> { + boolean updated = updateLocalValue(getResp); + Versioned> localValue = getLocalValue(); + try { + return localValue; + } finally { + if (updated) { + notifyConsumers(localValue); + } + } + }); + } + + @VisibleForTesting + long getRevision() { + return revision; + } + + private void notifyConsumers(Versioned> localValue) { + consumers.forEach(consumer -> consumer.accept(localValue)); + } + + private synchronized boolean updateLocalValue(GetResponse response) { + if (revision < response.getHeader().getRevision()) { + revision = response.getHeader().getRevision(); + keys.clear(); + for (KeyValue kv : response.getKvs()) { + ByteSequence key = kv.getKey(); + keys.add(key); + } + return true; + } else { + return false; + } + } + + private synchronized Versioned> processWatchResponse(WatchResponse response) { + if (null != closeFuture) { + return null; + } + if (log.isDebugEnabled()) { + log.debug("Received watch response : revision = {}, {} events = {}", + response.getHeader().getRevision(), response.getEvents().size(), response.getEvents()); + } + + if (response.getHeader().getRevision() <= revision) { + return null; + } + revision = response.getHeader().getRevision(); + response.getEvents().forEach(event -> { + switch (event.getEventType()) { + case PUT: + keys.add(event.getKeyValue().getKey()); + break; + case DELETE: + keys.remove(event.getKeyValue().getKey()); + break; + default: + // ignore + break; + } + + }); + return getLocalValue(); + } + + @VisibleForTesting + synchronized Versioned> getLocalValue() { + return new Versioned<>( + keys.stream().map(encoder).collect(Collectors.toSet()), + new LongVersion(revision) + ); + } + + private CompletableFuture>> getOrRead() { + boolean shouldRead = false; + synchronized (this) { + if (revision < 0L) { + // the value is never cached. + shouldRead = true; + } + } + if (shouldRead) { + return read(); + } else { + return FutureUtils.value(getLocalValue()); + } + } + + @VisibleForTesting + synchronized boolean isWatcherSet() { + return null != watchFuture; + } + + public CompletableFuture>> readAndWatch(Consumer>> consumer) { + if (!consumers.add(consumer) || isWatcherSet()) { + return getOrRead(); + } + + return getOrRead() + .thenCompose(versionedKeys -> { + long revision = ((LongVersion) versionedKeys.getVersion()).getLongVersion(); + return watch(revision).thenApply(ignored -> versionedKeys); + }); + } + + public CompletableFuture unwatch(Consumer>> consumer) { + if (consumers.remove(consumer) && consumers.isEmpty()) { + return closeOrRewatch(false); + } else { + return FutureUtils.Void(); + } + } + + private synchronized CompletableFuture watch(long revision) { + if (null != watchFuture) { + return watchFuture; + } + + WatchOption.Builder optionBuilder = WatchOption.newBuilder() + .withRevision(revision); + if (null != endKey) { + optionBuilder.withRange(endKey); + } + watchFuture = watchClient.watch(beginKey, optionBuilder.build(), this); + return watchFuture.whenComplete((watcher, cause) -> { + if (null != cause) { + synchronized (KeySetReader.this) { + watchFuture = null; + } + } + }); + } + + private CompletableFuture closeOrRewatch(boolean rewatch) { + CompletableFuture oldWatcherFuture; + synchronized (this) { + oldWatcherFuture = watchFuture; + if (rewatch && null == closeFuture) { + watchFuture = watch(revision); + } else { + watchFuture = null; + } + } + if (null != oldWatcherFuture) { + return oldWatcherFuture.thenCompose(EtcdWatcher::closeAsync); + } else { + return FutureUtils.Void(); + } + } + + @Override + public void accept(WatchResponse watchResponse, Throwable throwable) { + if (null == throwable) { + Versioned> localValue = processWatchResponse(watchResponse); + if (null != localValue) { + notifyConsumers(localValue); + } + } else { + closeOrRewatch(true); + } + } + + public CompletableFuture closeAsync() { + CompletableFuture future; + synchronized (this) { + if (null == closeFuture) { + closeFuture = closeOrRewatch(false).thenCompose(ignored -> { + if (ownWatchClient) { + return watchClient.closeAsync(); + } else { + return FutureUtils.Void(); + } + }); + } + future = closeFuture; + } + return future; + } + + @Override + public void close() { + try { + FutureUtils.result(closeAsync()); + } catch (Exception e) { + log.warn("Encountered exceptions on closing key reader : {}", e.getMessage()); + } + } +} diff --git a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/helpers/KeyStream.java b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/helpers/KeyStream.java new file mode 100644 index 00000000000..268daf42e43 --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/helpers/KeyStream.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.metadata.etcd.helpers; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import com.google.common.primitives.UnsignedBytes; +import io.etcd.jetcd.ByteSequence; +import io.etcd.jetcd.KV; +import io.etcd.jetcd.KeyValue; +import io.etcd.jetcd.options.GetOption; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.function.Function; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.common.concurrent.FutureUtils; + +/** + * Read a range of key/value pairs in a streaming way. + */ +@Slf4j +public class KeyStream { + + private final KV kvClient; + private final ByteSequence startKey; + private final ByteSequence endKey; + private final Function encoder; + private final int batchSize; + private ByteSequence nextKey; + private ByteSequence lastKey = null; + private boolean hasMore = true; + + public KeyStream(KV kvClient, + ByteSequence startKey, + ByteSequence endKey, + Function encoder) { + this(kvClient, startKey, endKey, encoder, 100); + } + + public KeyStream(KV kvClient, + ByteSequence startKey, + ByteSequence endKey, + Function encoder, + int batchSize) { + this.kvClient = kvClient; + this.startKey = startKey; + this.endKey = endKey; + this.nextKey = startKey; + this.encoder = encoder; + this.batchSize = batchSize; + } + + public CompletableFuture> readNext() { + ByteSequence beginKey; + int batchSize = this.batchSize; + synchronized (this) { + if (!hasMore) { + return FutureUtils.value(Collections.emptyList()); + } + + beginKey = nextKey; + if (null != lastKey) { + // read one more in since we are including last key. + batchSize += 1; + } + } + if (log.isTraceEnabled()) { + log.trace("Read keys between {} and {}", beginKey.toString(UTF_8), endKey.toString(UTF_8)); + } + return kvClient.get( + beginKey, + GetOption.newBuilder() + .withRange(endKey) + .withKeysOnly(true) + .withLimit(batchSize) + .withSortField(GetOption.SortTarget.KEY) + .withSortOrder(GetOption.SortOrder.ASCEND) + .build() + ).thenApply(getResp -> { + List kvs = getResp.getKvs(); + ByteSequence lkey; + synchronized (KeyStream.this) { + hasMore = getResp.isMore(); + lkey = lastKey; + if (kvs.size() > 0) { + lastKey = nextKey = kvs.get(kvs.size() - 1).getKey(); + } + } + if (null != lkey + && kvs.size() > 0 + && UnsignedBytes.lexicographicalComparator().compare( + lkey.getBytes(), + kvs.get(0).getKey().getBytes()) == 0) { + kvs.remove(0); + } + return kvs.stream() + .map(kv -> encoder.apply(kv.getKey())) + .collect(Collectors.toList()); + }); + } + +} diff --git a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/helpers/RevisionedConsumer.java b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/helpers/RevisionedConsumer.java new file mode 100644 index 00000000000..69095a01287 --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/helpers/RevisionedConsumer.java @@ -0,0 +1,67 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.metadata.etcd.helpers; + +import java.util.function.Consumer; +import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Version.Occurred; +import org.apache.bookkeeper.versioning.Versioned; + +/** + * A consumer that cache last version. + */ +public class RevisionedConsumer implements Consumer> { + + protected final Consumer> consumer; + protected volatile Version localVersion = null; + + protected RevisionedConsumer(Consumer> consumer) { + this.consumer = consumer; + } + + @Override + public void accept(Versioned versionedVal) { + synchronized (this) { + if (localVersion != null + && Occurred.BEFORE != localVersion.compare(versionedVal.getVersion())) { + return; + } + localVersion = versionedVal.getVersion(); + } + consumer.accept(versionedVal); + } + + @Override + public int hashCode() { + return consumer.hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof Consumer)) { + return false; + } + if (obj instanceof RevisionedConsumer) { + return consumer.equals(((RevisionedConsumer) obj).consumer); + } else { + return consumer.equals(obj); + } + } + + @Override + public String toString() { + return consumer.toString(); + } +} diff --git a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/helpers/ValueStream.java b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/helpers/ValueStream.java new file mode 100644 index 00000000000..25e47df3323 --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/helpers/ValueStream.java @@ -0,0 +1,310 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.metadata.etcd.helpers; + +import com.google.common.annotations.VisibleForTesting; +import io.etcd.jetcd.ByteSequence; +import io.etcd.jetcd.Client; +import io.etcd.jetcd.common.exception.ClosedClientException; +import io.etcd.jetcd.kv.GetResponse; +import io.etcd.jetcd.options.WatchOption; +import io.etcd.jetcd.watch.WatchResponse; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; +import java.util.function.BiConsumer; +import java.util.function.Consumer; +import java.util.function.Function; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.metadata.etcd.EtcdWatchClient; +import org.apache.bookkeeper.metadata.etcd.EtcdWatcher; +import org.apache.bookkeeper.versioning.LongVersion; +import org.apache.bookkeeper.versioning.Versioned; + +/** + * A helper class to read the stream of values of a given key. + */ +@Slf4j +public class ValueStream implements BiConsumer, AutoCloseable { + + private final Client client; + private final boolean ownWatchClient; + private final EtcdWatchClient watchClient; + private final Function encoder; + private final ByteSequence key; + private final Map>, RevisionedConsumer> consumers = + new HashMap<>(); + private volatile T localValue = null; + private volatile long revision = -1L; + private CompletableFuture watchFuture = null; + private CompletableFuture closeFuture = null; + + public ValueStream(Client client, + Function encoder, + ByteSequence key) { + this(client, new EtcdWatchClient(client), encoder, key); + } + + public ValueStream(Client client, + EtcdWatchClient watchClient, + Function encoder, + ByteSequence key) { + this.client = client; + this.watchClient = watchClient; + this.ownWatchClient = false; + this.encoder = encoder; + this.key = key; + } + + public CompletableFuture> read() { + return client.getKVClient().get( + key + ).thenApply(getResp -> { + boolean updated = updateLocalValue(getResp); + Versioned localValue = getLocalValue(); + try { + return localValue; + } finally { + if (updated) { + notifyConsumers(localValue); + } + } + }); + } + + @VisibleForTesting + public int getNumConsumers() { + synchronized (consumers) { + return consumers.size(); + } + } + + private void notifyConsumers(Versioned localValue) { + synchronized (consumers) { + consumers.values().forEach(consumer -> consumer.accept(localValue)); + } + } + + private synchronized boolean updateLocalValue(GetResponse response) { + if (revision < response.getHeader().getRevision()) { + revision = response.getHeader().getRevision(); + if (response.getCount() > 0) { + localValue = encoder.apply(response.getKvs().get(0).getValue()); + } else { + localValue = null; + } + return true; + } else { + return false; + } + } + + private synchronized Versioned processWatchResponse(WatchResponse response) { + if (null != closeFuture) { + return null; + } + if (log.isDebugEnabled()) { + log.debug("Received watch response : revision = {}, {} events = {}", + response.getHeader().getRevision(), response.getEvents().size(), response.getEvents()); + } + + if (response.getHeader().getRevision() <= revision) { + return null; + } + revision = response.getHeader().getRevision(); + response.getEvents().forEach(event -> { + switch (event.getEventType()) { + case PUT: + this.localValue = encoder.apply(event.getKeyValue().getValue()); + break; + case DELETE: + this.localValue = null; + break; + default: + // ignore + break; + } + + }); + return getLocalValue(); + } + + @VisibleForTesting + synchronized Versioned getLocalValue() { + return new Versioned<>( + localValue, + new LongVersion(revision) + ); + } + + private CompletableFuture> getOrRead() { + boolean shouldRead = false; + synchronized (this) { + if (revision < 0L) { + // the value is never cached. + shouldRead = true; + } + } + if (shouldRead) { + return read(); + } else { + return FutureUtils.value(getLocalValue()); + } + } + + @VisibleForTesting + synchronized boolean isWatcherSet() { + return null != watchFuture; + } + + private synchronized CompletableFuture getWatchFuture() { + return this.watchFuture; + } + + @VisibleForTesting + public CompletableFuture waitUntilWatched() { + CompletableFuture wf; + while ((wf = getWatchFuture()) == null) { + try { + TimeUnit.MILLISECONDS.sleep(100); + } catch (InterruptedException e) { + if (log.isDebugEnabled()) { + log.debug("Interrupted at waiting until the key is watched", e); + } + } + } + return wf; + } + + public CompletableFuture> readAndWatch(Consumer> consumer) { + final RevisionedConsumer revisionedConsumer = new RevisionedConsumer<>(consumer); + final boolean consumerExisted; + synchronized (consumers) { + consumerExisted = (null != consumers.put(consumer, revisionedConsumer)); + } + if (consumerExisted) { + return getOrRead(); + } + + return getOrRead() + .thenCompose(versionedVal -> { + long revision = ((LongVersion) versionedVal.getVersion()).getLongVersion(); + synchronized (this) { + notifyConsumers(versionedVal); + } + return watchIfNeeded(revision).thenApply(ignored -> versionedVal); + }); + } + + public CompletableFuture unwatch(Consumer> consumer) { + boolean lastConsumer; + synchronized (consumers) { + lastConsumer = (null != consumers.remove(consumer) && consumers.isEmpty()); + } + if (lastConsumer) { + return closeOrRewatch(false).thenApply(ignored -> true); + } else { + return FutureUtils.value(false); + } + } + + private synchronized CompletableFuture watchIfNeeded(long revision) { + if (null != watchFuture) { + return watchFuture; + } + watchFuture = watch(revision); + return watchFuture; + } + + private CompletableFuture watch(long revision) { + WatchOption.Builder optionBuilder = WatchOption.newBuilder() + .withRevision(revision); + return watchClient.watch(key, optionBuilder.build(), this) + .whenComplete((watcher, cause) -> { + if (null != cause) { + synchronized (ValueStream.this) { + watchFuture = null; + } + } + }); + } + + private CompletableFuture closeOrRewatch(boolean rewatch) { + CompletableFuture oldWatcherFuture; + synchronized (this) { + oldWatcherFuture = watchFuture; + if (rewatch && null == closeFuture) { + watchFuture = watch(revision); + } else { + watchFuture = null; + } + } + if (null != oldWatcherFuture) { + return oldWatcherFuture.thenCompose(EtcdWatcher::closeAsync); + } else { + return FutureUtils.Void(); + } + } + + @Override + public void accept(WatchResponse watchResponse, Throwable throwable) { + if (null == throwable) { + if (log.isDebugEnabled()) { + log.debug("Received watch response : revision = {}, {} events = {}", + watchResponse.getHeader().getRevision(), + watchResponse.getEvents().size(), + watchResponse.getEvents()); + } + + synchronized (this) { + Versioned localValue = processWatchResponse(watchResponse); + if (null != localValue) { + notifyConsumers(localValue); + } + } + } else { + // rewatch if it is not a `ClosedClientException` + closeOrRewatch(!(throwable instanceof ClosedClientException)); + } + } + + public CompletableFuture closeAsync() { + CompletableFuture future; + synchronized (this) { + if (null == closeFuture) { + closeFuture = closeOrRewatch(false).thenCompose(ignored -> { + if (ownWatchClient) { + return watchClient.closeAsync(); + } else { + return FutureUtils.Void(); + } + }); + } + future = closeFuture; + } + return future; + } + + @Override + public void close() { + try { + FutureUtils.result(closeAsync()); + } catch (Exception e) { + log.warn("Encountered exceptions on closing key reader : {}", e.getMessage()); + } + } +} diff --git a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/helpers/package-info.java b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/helpers/package-info.java new file mode 100644 index 00000000000..ce8f2e515ed --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/helpers/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Etcd helper classes. + */ +package org.apache.bookkeeper.metadata.etcd.helpers; \ No newline at end of file diff --git a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/package-info.java b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/package-info.java new file mode 100644 index 00000000000..81369d82cc1 --- /dev/null +++ b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Etcd based metadata driver. + */ +package org.apache.bookkeeper.metadata.etcd; \ No newline at end of file diff --git a/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/Etcd64bitIdGeneratorTest.java b/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/Etcd64bitIdGeneratorTest.java new file mode 100644 index 00000000000..fff2f2264ed --- /dev/null +++ b/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/Etcd64bitIdGeneratorTest.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.metadata.etcd; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import com.google.common.util.concurrent.RateLimiter; +import io.etcd.jetcd.Client; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; +import lombok.Cleanup; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.metadata.etcd.testing.EtcdTestBase; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallbackFuture; +import org.apache.commons.lang.RandomStringUtils; +import org.junit.Before; +import org.junit.Test; + +/** + * Integration test {@link Etcd64bitIdGenerator}. + */ +@Slf4j +public class Etcd64bitIdGeneratorTest extends EtcdTestBase { + + private String scope; + private Etcd64bitIdGenerator generator; + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + this.scope = "/" + RandomStringUtils.randomAlphabetic(8); + this.generator = new Etcd64bitIdGenerator(etcdClient.getKVClient(), scope); + log.info("Setup id generator under scope {}", scope); + } + + @Test + public void testGenerateIdSequence() throws Exception { + Map buckets = new HashMap<>(); + + int numIterations = 10; + + for (int i = 0; i < numIterations; i++) { + log.info("Id generation iteration : {}", i); + for (int j = 0; j < Etcd64bitIdGenerator.NUM_BUCKETS; j++) { + GenericCallbackFuture future = new GenericCallbackFuture<>(); + generator.generateLedgerId(future); + long lid = future.get(); + int bucketId = Etcd64bitIdGenerator.getBucketId(lid); + long idInBucket = Etcd64bitIdGenerator.getIdInBucket(lid); + Long prevIdInBucket = buckets.put(bucketId, idInBucket); + if (null == prevIdInBucket) { + assertEquals(1, idInBucket); + } else { + assertEquals(prevIdInBucket + 1, idInBucket); + } + } + } + + assertEquals(Etcd64bitIdGenerator.NUM_BUCKETS, buckets.size()); + for (Map.Entry bucketEntry : buckets.entrySet()) { + assertEquals(numIterations, bucketEntry.getValue().intValue()); + } + } + + /** + * Test generating id in parallel and ensure there is no duplicated id. + */ + @Test + public void testGenerateIdParallel() throws Exception { + final int numThreads = 10; + @Cleanup("shutdown") + ExecutorService executor = Executors.newFixedThreadPool(numThreads); + + final int numIds = 10000; + final AtomicLong totalIds = new AtomicLong(numIds); + final Set ids = Collections.newSetFromMap(new ConcurrentHashMap<>()); + final RateLimiter limiter = RateLimiter.create(1000); + final CompletableFuture doneFuture = new CompletableFuture<>(); + for (int i = 0; i < numThreads; i++) { + executor.submit(() -> { + Client client = Client.builder() + .endpoints(etcdContainer.getClientEndpoint()) + .build(); + Etcd64bitIdGenerator gen = new Etcd64bitIdGenerator( + client.getKVClient(), + scope + ); + + AtomicBoolean running = new AtomicBoolean(true); + + while (running.get()) { + limiter.acquire(); + + GenericCallbackFuture genFuture = new GenericCallbackFuture<>(); + gen.generateLedgerId(genFuture); + + genFuture + .thenAccept(lid -> { + boolean duplicatedFound = !(ids.add(lid)); + if (duplicatedFound) { + running.set(false); + doneFuture.completeExceptionally( + new IllegalStateException("Duplicated id " + lid + " generated : " + ids)); + return; + } else { + if (totalIds.decrementAndGet() <= 0) { + running.set(false); + doneFuture.complete(null); + } + } + }) + .exceptionally(cause -> { + running.set(false); + doneFuture.completeExceptionally(cause); + return null; + }); + } + }); + } + + FutureUtils.result(doneFuture); + assertTrue(totalIds.get() <= 0); + assertTrue(ids.size() >= numIds); + } + +} diff --git a/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/EtcdClusterTest.java b/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/EtcdClusterTest.java new file mode 100644 index 00000000000..3f2d99e0a75 --- /dev/null +++ b/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/EtcdClusterTest.java @@ -0,0 +1,342 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.metadata.etcd; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.getBookiesPath; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.getBucketsPath; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.getClusterInstanceIdPath; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.getLayoutKey; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.getLedgersPath; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.getReadonlyBookiesPath; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.getScopeEndKey; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.getUnderreplicationPath; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.getWritableBookiesPath; +import static org.apache.bookkeeper.metadata.etcd.EtcdUtils.msResult; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import io.etcd.jetcd.ByteSequence; +import io.etcd.jetcd.Client; +import io.etcd.jetcd.kv.GetResponse; +import io.etcd.jetcd.options.GetOption; +import java.util.UUID; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.BookieException.MetadataStoreException; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.meta.LedgerLayout; +import org.apache.bookkeeper.metadata.etcd.testing.EtcdTestBase; +import org.apache.bookkeeper.net.BookieId; +import org.apache.commons.lang.RandomStringUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Test cluster related operation on Etcd based registration manager. + */ +@Slf4j +public class EtcdClusterTest extends EtcdTestBase { + + private String scope; + private RegistrationManager regMgr; + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + this.scope = RandomStringUtils.randomAlphabetic(32); + this.regMgr = new EtcdRegistrationManager( + newEtcdClient(), scope + ); + } + + @After + @Override + public void tearDown() throws Exception { + this.regMgr.close(); + super.tearDown(); + } + + @Test + public void testGetClusterInstanceIdIfClusterNotInitialized() throws Exception { + try { + regMgr.getClusterInstanceId(); + fail("Should fail getting cluster instance id if cluster not initialized"); + } catch (MetadataStoreException e) { + assertTrue(e.getMessage().contains("BookKeeper is not initialized")); + } + } + + @Test + public void testGetClusterInstanceId() throws Exception { + assertClusterNotExists(etcdClient, scope); + regMgr.initNewCluster(); + String instanceId = regMgr.getClusterInstanceId(); + UUID uuid = UUID.fromString(instanceId); + log.info("Cluster instance id : {}", uuid); + } + + @Test + public void testNukeNonExistingCluster() throws Exception { + assertClusterNotExists(etcdClient, scope); + assertTrue(regMgr.nukeExistingCluster()); + assertClusterNotExists(etcdClient, scope); + } + + @Test + public void testNukeExistingCluster() throws Exception { + assertTrue(regMgr.initNewCluster()); + assertClusterExists(etcdClient, scope); + assertTrue(regMgr.nukeExistingCluster()); + assertClusterNotExists(etcdClient, scope); + } + + @Test + public void testInitNewClusterTwice() throws Exception { + assertTrue(regMgr.initNewCluster()); + assertClusterExists(etcdClient, scope); + String instanceId = regMgr.getClusterInstanceId(); + assertFalse(regMgr.initNewCluster()); + assertClusterExists(etcdClient, scope); + assertEquals(instanceId, regMgr.getClusterInstanceId()); + } + + @Test + public void testPrepareFormatNonExistingCluster() throws Exception { + assertFalse(regMgr.prepareFormat()); + } + + @Test + public void testPrepareFormatExistingCluster() throws Exception { + assertTrue(regMgr.initNewCluster()); + assertClusterExists(etcdClient, scope); + assertTrue(regMgr.prepareFormat()); + } + + @Test + public void testNukeExistingClusterWithWritableBookies() throws Exception { + testNukeExistingClusterWithBookies(false); + } + + @Test + public void testNukeExistingClusterWithReadonlyBookies() throws Exception { + testNukeExistingClusterWithBookies(true); + } + + private void testNukeExistingClusterWithBookies(boolean readonly) throws Exception { + assertTrue(regMgr.initNewCluster()); + assertClusterExists(etcdClient, scope); + createNumBookies(etcdClient, scope, 3, readonly); + assertFalse(regMgr.nukeExistingCluster()); + assertClusterExists(etcdClient, scope); + removeNumBookies(etcdClient, scope, 3, readonly); + assertTrue(regMgr.nukeExistingCluster()); + assertClusterNotExists(etcdClient, scope); + } + + @Test + public void testNukeExistingClusterWithAllBookies() throws Exception { + assertTrue(regMgr.initNewCluster()); + assertClusterExists(etcdClient, scope); + createNumBookies(etcdClient, scope, 1, false); + createNumBookies(etcdClient, scope, 2, true); + assertFalse(regMgr.nukeExistingCluster()); + assertClusterExists(etcdClient, scope); + removeNumBookies(etcdClient, scope, 1, false); + removeNumBookies(etcdClient, scope, 2, true); + assertTrue(regMgr.nukeExistingCluster()); + assertClusterNotExists(etcdClient, scope); + } + + @Test + public void testFormatNonExistingCluster() throws Exception { + assertClusterNotExists(etcdClient, scope); + assertTrue(regMgr.format()); + assertClusterExists(etcdClient, scope); + } + + @Test + public void testFormatExistingCluster() throws Exception { + assertClusterNotExists(etcdClient, scope); + assertTrue(regMgr.initNewCluster()); + assertClusterExists(etcdClient, scope); + String clusterInstanceId = regMgr.getClusterInstanceId(); + assertTrue(regMgr.format()); + assertClusterExists(etcdClient, scope); + assertNotEquals(clusterInstanceId, regMgr.getClusterInstanceId()); + } + + @Test + public void testFormatExistingClusterWithBookies() throws Exception { + assertClusterNotExists(etcdClient, scope); + assertTrue(regMgr.initNewCluster()); + assertClusterExists(etcdClient, scope); + String clusterInstanceId = regMgr.getClusterInstanceId(); + createNumBookies(etcdClient, scope, 3, false); + assertFalse(regMgr.format()); + assertClusterExists(etcdClient, scope); + assertEquals(clusterInstanceId, regMgr.getClusterInstanceId()); + } + + private static void createNumBookies(Client client, + String scope, + int numBookies, + boolean readonly) throws Exception { + for (int i = 0; i < numBookies; i++) { + BookieId bookieId = BookieId.parse("bookie-" + i + ":3181"); + String bookiePath; + if (readonly) { + bookiePath = EtcdUtils.getReadonlyBookiePath(scope, bookieId); + } else { + bookiePath = EtcdUtils.getWritableBookiePath(scope, bookieId); + } + msResult(client.getKVClient().put( + ByteSequence.from(bookiePath, UTF_8), + EtcdConstants.EMPTY_BS + )); + } + } + + private static void removeNumBookies(Client client, + String scope, + int numBookies, + boolean readonly) throws Exception { + for (int i = 0; i < numBookies; i++) { + BookieId bookieId = BookieId.parse("bookie-" + i + ":3181"); + String bookiePath; + if (readonly) { + bookiePath = EtcdUtils.getReadonlyBookiePath(scope, bookieId); + } else { + bookiePath = EtcdUtils.getWritableBookiePath(scope, bookieId); + } + msResult(client.getKVClient().delete( + ByteSequence.from(bookiePath, UTF_8) + )); + } + } + + private static void assertClusterScope(Client client, + String scope) throws Exception { + GetResponse resp = msResult( + client.getKVClient().get( + ByteSequence.from(scope, UTF_8))); + assertEquals(1, resp.getCount()); + } + + private static void assertClusterLayout(Client client, + String scope) throws Exception { + String layoutPath = getLayoutKey(scope); + GetResponse resp = msResult( + client.getKVClient().get( + ByteSequence.from(layoutPath, UTF_8))); + assertEquals(1, resp.getCount()); + LedgerLayout layout = LedgerLayout.parseLayout( + resp.getKvs().get(0).getValue().getBytes() + ); + assertEquals( + EtcdLedgerManagerFactory.class.getName(), + layout.getManagerFactoryClass() + ); + assertEquals(EtcdLedgerManagerFactory.VERSION, layout.getManagerVersion()); + assertEquals(LedgerLayout.LAYOUT_FORMAT_VERSION, layout.getLayoutFormatVersion()); + } + + private static void assertClusterInstanceId(Client client, + String scope) throws Exception { + String instanceIdPath = getClusterInstanceIdPath(scope); + GetResponse resp = msResult( + client.getKVClient().get(ByteSequence.from(instanceIdPath, UTF_8))); + assertEquals(1, resp.getCount()); + String instanceId = new String(resp.getKvs().get(0).getValue().getBytes(), UTF_8); + UUID uuid = UUID.fromString(instanceId); + log.info("Cluster instance id : {}", uuid); + } + + private static void assertBookiesPath(Client client, + String scope) throws Exception { + String bookiesPath = getBookiesPath(scope); + GetResponse resp = msResult( + client.getKVClient().get(ByteSequence.from(bookiesPath, UTF_8))); + assertEquals(1, resp.getCount()); + } + + private static void assertWritableBookiesPath(Client client, + String scope) throws Exception { + String bookiesPath = getWritableBookiesPath(scope); + GetResponse resp = msResult( + client.getKVClient().get(ByteSequence.from(bookiesPath, UTF_8))); + assertEquals(1, resp.getCount()); + } + + private static void assertReadonlyBookiesPath(Client client, + String scope) throws Exception { + String bookiesPath = getReadonlyBookiesPath(scope); + GetResponse resp = msResult( + client.getKVClient().get(ByteSequence.from(bookiesPath, UTF_8))); + assertEquals(1, resp.getCount()); + } + + private static void assertLedgersPath(Client client, String scope) throws Exception { + String ledgersPath = getLedgersPath(scope); + GetResponse resp = msResult( + client.getKVClient().get(ByteSequence.from(ledgersPath, UTF_8))); + assertEquals(1, resp.getCount()); + } + + private static void assertBucketsPath(Client client, String scope) throws Exception { + String bucketsPath = getBucketsPath(scope); + GetResponse resp = msResult( + client.getKVClient().get(ByteSequence.from(bucketsPath, UTF_8))); + assertEquals(1, resp.getCount()); + } + + private static void assertUnderreplicationPath(Client client, String scope) throws Exception { + String urPath = getUnderreplicationPath(scope); + GetResponse resp = msResult( + client.getKVClient().get(ByteSequence.from(urPath, UTF_8))); + assertEquals(1, resp.getCount()); + } + + private static void assertClusterExists(Client client, String scope) throws Exception { + assertClusterScope(client, scope); + assertClusterLayout(client, scope); + assertClusterInstanceId(client, scope); + assertBookiesPath(client, scope); + assertWritableBookiesPath(client, scope); + assertReadonlyBookiesPath(client, scope); + assertLedgersPath(client, scope); + assertBucketsPath(client, scope); + assertUnderreplicationPath(client, scope); + } + + private static void assertClusterNotExists(Client client, String scope) throws Exception { + GetResponse response = msResult( + client.getKVClient().get( + ByteSequence.from(scope, UTF_8), + GetOption.newBuilder() + .withRange(ByteSequence.from(getScopeEndKey(scope), UTF_8)) + .build())); + assertEquals(0, response.getCount()); + } + +} diff --git a/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/EtcdCookieTest.java b/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/EtcdCookieTest.java new file mode 100644 index 00000000000..c9d1a412e4a --- /dev/null +++ b/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/EtcdCookieTest.java @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.metadata.etcd; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.BookieException.CookieNotFoundException; +import org.apache.bookkeeper.bookie.BookieException.MetadataStoreException; +import org.apache.bookkeeper.discover.RegistrationManager; +import org.apache.bookkeeper.metadata.etcd.testing.EtcdTestBase; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.versioning.LongVersion; +import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Version.Occurred; +import org.apache.bookkeeper.versioning.Versioned; +import org.apache.commons.lang.RandomStringUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; + +/** + * Test Etcd based cookie management. + */ +@Slf4j +public class EtcdCookieTest extends EtcdTestBase { + + @Rule + public final TestName runtime = new TestName(); + + private RegistrationManager regMgr; + + @Before + @Override + public void setUp() throws Exception { + log.info("setup"); + super.setUp(); + String scope = RandomStringUtils.randomAlphabetic(16); + this.regMgr = new EtcdRegistrationManager( + newEtcdClient(), + scope + ); + log.info("done setup"); + } + + @After + @Override + public void tearDown() throws Exception { + log.info("tear down"); + this.regMgr.close(); + super.tearDown(); + } + + private static void assertCookieEquals(Versioned expected, Versioned actual) { + assertEquals(Occurred.CONCURRENTLY, expected.getVersion().compare(actual.getVersion())); + assertArrayEquals(expected.getValue(), actual.getValue()); + } + + @Test + public void readWriteRemoveCookie() throws Exception { + BookieId bookieId = BookieId.parse(runtime.getMethodName() + ":3181"); + + log.info("read non-existing cookie"); + // read the cookie doesn't exist + try { + regMgr.readCookie(bookieId); + fail("Should fail reading cookie if cookie doesn't exist"); + } catch (CookieNotFoundException cnfe) { + // expected + } + + log.info("create cookie"); + // create the cookie + String cookieData = RandomStringUtils.randomAlphanumeric(1024); + Versioned cookie = new Versioned<>( + cookieData.getBytes(UTF_8), Version.NEW + ); + regMgr.writeCookie(bookieId, cookie); + + log.info("read cookie"); + // read the cookie + Versioned readCookie = regMgr.readCookie(bookieId); + assertEquals(cookieData, new String(readCookie.getValue(), UTF_8)); + + log.info("try to create cookie again"); + // attempt to create the cookie again + String newCookieData = RandomStringUtils.randomAlphabetic(512); + Versioned newCookie = new Versioned<>( + newCookieData.getBytes(UTF_8), Version.NEW + ); + try { + regMgr.writeCookie(bookieId, newCookie); + fail("Should fail creating cookie if the cookie already exists"); + } catch (MetadataStoreException mse) { + assertTrue(mse.getMessage().contains("Conflict on writing cookie")); + } + Versioned readCookie2 = regMgr.readCookie(bookieId); + assertCookieEquals(readCookie, readCookie2); + + log.info("update cookie with wrong version"); + // attempt to update the cookie with a wrong version + newCookie = new Versioned<>( + newCookieData.getBytes(UTF_8), new LongVersion(Long.MAX_VALUE) + ); + try { + regMgr.writeCookie(bookieId, newCookie); + } catch (MetadataStoreException mse) { + assertTrue(mse.getMessage().contains("Conflict on writing cookie")); + } + readCookie2 = regMgr.readCookie(bookieId); + assertCookieEquals(readCookie, readCookie2); + + log.info("delete cookie with wrong version"); + // delete the cookie with a wrong version + LongVersion badVersion = new LongVersion(Long.MAX_VALUE); + try { + regMgr.removeCookie(bookieId, badVersion); + fail("Should fail to remove cookie with bad version"); + } catch (MetadataStoreException mse) { + assertTrue(mse.getMessage().contains( + "bad version '" + badVersion + "'" + )); + } + readCookie2 = regMgr.readCookie(bookieId); + assertCookieEquals(readCookie, readCookie2); + + log.info("update with right version"); + // update the cookie with right version + newCookie = new Versioned<>( + newCookieData.getBytes(UTF_8), readCookie2.getVersion()); + regMgr.writeCookie(bookieId, newCookie); + readCookie2 = regMgr.readCookie(bookieId); + assertEquals(newCookieData, new String(readCookie2.getValue(), UTF_8)); + assertEquals(Occurred.AFTER, readCookie2.getVersion().compare(readCookie.getVersion())); + + log.info("delete with right version"); + // delete the cookie with right version + regMgr.removeCookie(bookieId, readCookie2.getVersion()); + try { + regMgr.readCookie(bookieId); + fail("Should fail reading cookie if cookie doesn't exist"); + } catch (CookieNotFoundException cnfe) { + // expected + } + + log.info("remove non-existing cookie"); + // remove a cookie that doesn't exist + try { + regMgr.removeCookie(bookieId, readCookie2.getVersion()); + fail("Should fail removing cookie if cookie doesn't exist"); + } catch (CookieNotFoundException cnfe) { + // expected + } + } + +} diff --git a/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/EtcdLayoutManagerTest.java b/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/EtcdLayoutManagerTest.java new file mode 100644 index 00000000000..a0a1f696c49 --- /dev/null +++ b/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/EtcdLayoutManagerTest.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.metadata.etcd; + +import static org.apache.bookkeeper.metadata.etcd.EtcdConstants.LAYOUT_NODE; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.fail; + +import java.io.IOException; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.meta.LayoutManager.LedgerLayoutExistsException; +import org.apache.bookkeeper.meta.LedgerLayout; +import org.apache.bookkeeper.metadata.etcd.testing.EtcdTestBase; +import org.apache.commons.lang.RandomStringUtils; +import org.junit.Before; +import org.junit.Test; + +/** + * Integration test {@link EtcdLayoutManager}. + */ +@Slf4j +public class EtcdLayoutManagerTest extends EtcdTestBase { + + private static final int managerVersion = 0xabcd; + + private String scope; + private EtcdLayoutManager layoutManager; + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + this.scope = "/" + RandomStringUtils.randomAlphabetic(8); + this.layoutManager = new EtcdLayoutManager(etcdClient, scope); + log.info("setup layout manager under scope {}", scope); + } + + @Test + public void testReadCreateDeleteLayout() throws Exception { + // layout doesn't exist + assertNull(layoutManager.readLedgerLayout()); + + // create the layout + LedgerLayout layout = new LedgerLayout( + EtcdLedgerManagerFactory.class.getName(), + managerVersion + ); + layoutManager.storeLedgerLayout(layout); + + // read the layout + LedgerLayout readLayout = layoutManager.readLedgerLayout(); + assertEquals(layout, readLayout); + + // attempts to create the layout again and it should fail + LedgerLayout newLayout = new LedgerLayout( + "new layout", + managerVersion + 1 + ); + try { + layoutManager.storeLedgerLayout(newLayout); + fail("Should fail storeLedgerLayout if layout already exists"); + } catch (LedgerLayoutExistsException e) { + // expected + } + + // read the layout again (layout should not be changed) + readLayout = layoutManager.readLedgerLayout(); + assertEquals(layout, readLayout); + + // delete the layout + layoutManager.deleteLedgerLayout(); + + // the layout should be gone now + assertNull(layoutManager.readLedgerLayout()); + + // delete the layout again. it should fail since layout doesn't exist + try { + layoutManager.deleteLedgerLayout(); + fail("Should fail deleteLedgerLayout is layout not found"); + } catch (IOException ioe) { + assertEquals( + "No ledger layout is found under '" + scope + "/" + LAYOUT_NODE + "'", + ioe.getMessage()); + } + } + +} diff --git a/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/EtcdLedgerManagerTest.java b/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/EtcdLedgerManagerTest.java new file mode 100644 index 00000000000..f206817b6cd --- /dev/null +++ b/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/EtcdLedgerManagerTest.java @@ -0,0 +1,360 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.metadata.etcd; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.bookkeeper.common.concurrent.FutureUtils.result; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import com.google.common.collect.Lists; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.BKException.Code; +import org.apache.bookkeeper.client.BookKeeper.DigestType; +import org.apache.bookkeeper.client.LedgerMetadataBuilder; +import org.apache.bookkeeper.client.api.LedgerMetadata; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.meta.LedgerManager.LedgerRange; +import org.apache.bookkeeper.meta.LedgerManager.LedgerRangeIterator; +import org.apache.bookkeeper.metadata.etcd.helpers.ValueStream; +import org.apache.bookkeeper.metadata.etcd.testing.EtcdTestBase; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.LedgerMetadataListener; +import org.apache.bookkeeper.versioning.LongVersion; +import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Versioned; +import org.apache.commons.lang.RandomStringUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Integration test {@link EtcdLedgerManager}. + */ +@Slf4j +public class EtcdLedgerManagerTest extends EtcdTestBase { + + private String scope; + private EtcdLedgerManager lm; + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + this.scope = RandomStringUtils.randomAlphabetic(8); + this.lm = new EtcdLedgerManager(etcdClient, scope); + } + + @Override + @After + public void tearDown() throws Exception { + if (null != lm) { + lm.close(); + } + super.tearDown(); + } + + @Test + public void testLedgerCRUD() throws Exception { + long ledgerId = System.currentTimeMillis(); + List ensemble = Lists.newArrayList( + BookieId.parse("192.0.2.1:1234"), + BookieId.parse("192.0.2.2:1234"), + BookieId.parse("192.0.2.3:1234")); + LedgerMetadata metadata = LedgerMetadataBuilder.create().withId(ledgerId) + .withEnsembleSize(3).withWriteQuorumSize(3).withAckQuorumSize(2) + .withPassword("test-password".getBytes(UTF_8)) + .withDigestType(DigestType.CRC32C.toApiDigestType()) + .newEnsembleEntry(0L, ensemble) + .build(); + + // ledger doesn't exist: read + try { + result(lm.readLedgerMetadata(ledgerId)); + fail("Should fail on reading ledger metadata if the ledger doesn't exist"); + } catch (BKException bke) { + assertEquals(Code.NoSuchLedgerExistsException, bke.getCode()); + } + + // ledger doesn't exist : delete + try { + result(lm.removeLedgerMetadata(ledgerId, new LongVersion(999L))); + fail("Should fail on deleting ledger metadata if the ledger doesn't exist"); + } catch (BKException bke) { + assertEquals(Code.NoSuchLedgerExistsException, bke.getCode()); + } + + // ledger doesn't exist : write + try { + result(lm.writeLedgerMetadata(ledgerId, metadata, new LongVersion(999L))); + fail("Should fail on updating ledger metadata if the ledger doesn't exist"); + } catch (BKException bke) { + assertEquals(Code.NoSuchLedgerExistsException, bke.getCode()); + } + + // ledger doesn't exist : create + Versioned writtenMetadata = result(lm.createLedgerMetadata(ledgerId, metadata)); + assertSame(metadata, writtenMetadata.getValue()); + Version version = writtenMetadata.getVersion(); + assertNotNull(version); + assertTrue(version instanceof LongVersion); + assertTrue(((LongVersion) version).getLongVersion() > 0L); + + // ledger exists : create + + // attempt to create the ledger again will result in exception `LedgerExistsException` + try { + result(lm.createLedgerMetadata(ledgerId, metadata)); + fail("Should fail on creating ledger metadata if the ledger already exists"); + } catch (BKException bke) { + assertEquals(Code.LedgerExistException, bke.getCode()); + } + + // ledger exists: get + Versioned readMetadata = result(lm.readLedgerMetadata(ledgerId)); + assertEquals(metadata, readMetadata.getValue()); + + // ledger exists: update metadata with wrong version + try { + result(lm.writeLedgerMetadata(ledgerId, readMetadata.getValue(), new LongVersion(Long.MAX_VALUE))); + fail("Should fail to write metadata using a wrong version"); + } catch (BKException bke) { + assertEquals(Code.MetadataVersionException, bke.getCode()); + } + readMetadata = result(lm.readLedgerMetadata(ledgerId)); + assertEquals(metadata, readMetadata.getValue()); + + // ledger exists: delete metadata with wrong version + try { + result(lm.removeLedgerMetadata(ledgerId, new LongVersion(Long.MAX_VALUE))); + fail("Should fail to delete metadata using a wrong version"); + } catch (BKException bke) { + assertEquals(Code.MetadataVersionException, bke.getCode()); + } + + readMetadata = result(lm.readLedgerMetadata(ledgerId)); + assertEquals(metadata, readMetadata.getValue()); + + // ledger exists: update metadata with the right version + + LongVersion curVersion = (LongVersion) readMetadata.getVersion(); + writtenMetadata = result(lm.writeLedgerMetadata(ledgerId, readMetadata.getValue(), curVersion)); + LongVersion newVersion = (LongVersion) writtenMetadata.getVersion(); + assertTrue(curVersion.getLongVersion() < newVersion.getLongVersion()); + + readMetadata = result(lm.readLedgerMetadata(ledgerId)); + assertEquals(writtenMetadata, readMetadata); + + // ledger exists: delete metadata with the right version + result(lm.removeLedgerMetadata(ledgerId, newVersion)); + try { + result(lm.readLedgerMetadata(ledgerId)); + fail("Should fail to read ledger if it is deleted"); + } catch (BKException bke) { + assertEquals(Code.NoSuchLedgerExistsException, bke.getCode()); + } + + } + + @Test + public void testProcessLedgers() throws Exception { + final int numLedgers = 100; + createNumLedgers(numLedgers); + + final CountDownLatch processLatch = new CountDownLatch(numLedgers); + final CompletableFuture doneFuture = new CompletableFuture<>(); + lm.asyncProcessLedgers( + (l, cb) -> processLatch.countDown(), + (rc, path, ctx) -> { + if (Code.OK == rc) { + FutureUtils.complete(doneFuture, null); + } else { + FutureUtils.completeExceptionally(doneFuture, BKException.create(rc)); + } + }, + null, + Code.OK, + Code.MetaStoreException); + + result(doneFuture); + processLatch.await(); + } + + @Test + public void testLedgerRangeIterator() throws Exception { + final int numLedgers = 100; + createNumLedgers(numLedgers); + + long nextLedgerId = 0L; + LedgerRangeIterator iter = lm.getLedgerRanges(0); + while (iter.hasNext()) { + LedgerRange lr = iter.next(); + for (Long lid : lr.getLedgers()) { + assertEquals(nextLedgerId, lid.longValue()); + ++nextLedgerId; + } + } + assertEquals((long) numLedgers, nextLedgerId); + } + + private void createNumLedgers(int numLedgers) throws Exception { + List>> createFutures = new ArrayList<>(numLedgers); + for (int i = 0; i < numLedgers; i++) { + LedgerMetadata metadata = LedgerMetadataBuilder.create().withId(i) + .withEnsembleSize(3).withWriteQuorumSize(3).withAckQuorumSize(2) + .withPassword("test-password".getBytes(UTF_8)) + .withDigestType(DigestType.CRC32C.toApiDigestType()) + .newEnsembleEntry(0L, createNumBookies(3)).build(); + createFutures.add(lm.createLedgerMetadata(i, metadata)); + } + FutureUtils.result(FutureUtils.collect(createFutures)); + } + + @Test + public void testRegisterLedgerMetadataListener() throws Exception { + long ledgerId = System.currentTimeMillis(); + + // create a ledger metadata + LedgerMetadata metadata = LedgerMetadataBuilder.create().withId(ledgerId) + .withEnsembleSize(3).withWriteQuorumSize(3).withAckQuorumSize(2) + .withPassword("test-password".getBytes(UTF_8)) + .withDigestType(DigestType.CRC32C.toApiDigestType()) + .newEnsembleEntry(0L, createNumBookies(3)).build(); + result(lm.createLedgerMetadata(ledgerId, metadata)); + Versioned readMetadata = lm.readLedgerMetadata(ledgerId).get(); + log.info("Create ledger metadata : {}", readMetadata.getValue()); + + // register first listener + + LinkedBlockingQueue> metadataQueue1 = new LinkedBlockingQueue<>(); + LedgerMetadataListener listener1 = (lid, m) -> { + log.info("[listener1] Received ledger {} metadata : {}", lid, m); + metadataQueue1.add(m); + }; + log.info("Registered first listener for ledger {}", ledgerId); + lm.registerLedgerMetadataListener(ledgerId, listener1); + // we should receive a metadata notification when a ledger is created + Versioned notifiedMetadata = metadataQueue1.take(); + assertEquals(readMetadata, notifiedMetadata); + ValueStream lms = lm.getLedgerMetadataStream(ledgerId); + assertNotNull(lms.waitUntilWatched()); + assertNotNull(result(lms.waitUntilWatched())); + + // register second listener + + LinkedBlockingQueue> metadataQueue2 = new LinkedBlockingQueue<>(); + LedgerMetadataListener listener2 = (lid, m) -> { + log.info("[listener2] Received ledger {} metadata : {}", lid, m); + metadataQueue2.add(m); + }; + log.info("Registered second listener for ledger {}", ledgerId); + lm.registerLedgerMetadataListener(ledgerId, listener2); + Versioned notifiedMetadata2 = metadataQueue2.take(); + assertEquals(readMetadata, notifiedMetadata2); + assertNotNull(lm.getLedgerMetadataStream(ledgerId)); + + // update the metadata + lm.writeLedgerMetadata(ledgerId, + LedgerMetadataBuilder.from(metadata).newEnsembleEntry(10L, createNumBookies(3)).build(), + notifiedMetadata.getVersion()).get(); + readMetadata = lm.readLedgerMetadata(ledgerId).get(); + assertEquals(readMetadata, metadataQueue1.take()); + assertEquals(readMetadata, metadataQueue2.take()); + lms = lm.getLedgerMetadataStream(ledgerId); + assertNotNull(lms); + assertEquals(2, lms.getNumConsumers()); + + // remove listener2 + lm.unregisterLedgerMetadataListener(ledgerId, listener2); + lms = lm.getLedgerMetadataStream(ledgerId); + assertNotNull(lms); + assertEquals(1, lms.getNumConsumers()); + + // update the metadata again + lm.writeLedgerMetadata(ledgerId, + LedgerMetadataBuilder.from(metadata).newEnsembleEntry(20L, createNumBookies(3)).build(), + readMetadata.getVersion()).get(); + readMetadata = lm.readLedgerMetadata(ledgerId).get(); + assertEquals(readMetadata, metadataQueue1.take()); + assertNull(metadataQueue2.poll()); + + // remove listener1 + lm.unregisterLedgerMetadataListener(ledgerId, listener1); + // the value stream will be removed + while (lm.getLedgerMetadataStream(ledgerId) != null) { + TimeUnit.MILLISECONDS.sleep(100); + } + assertEquals(0, lms.getNumConsumers()); + + // update the metadata again + lm.writeLedgerMetadata(ledgerId, + LedgerMetadataBuilder.from(metadata).newEnsembleEntry(30L, createNumBookies(3)).build(), + readMetadata.getVersion()).get(); + readMetadata = lm.readLedgerMetadata(ledgerId).get(); + assertNull(metadataQueue1.poll()); + assertNull(metadataQueue2.poll()); + + log.info("Registered first listener for ledger {} again", ledgerId); + lm.registerLedgerMetadataListener(ledgerId, listener1); + notifiedMetadata = metadataQueue1.take(); + assertEquals(readMetadata, notifiedMetadata); + lms = lm.getLedgerMetadataStream(ledgerId); + assertNotNull(lms); + assertEquals(1, lms.getNumConsumers()); + + // delete the ledger + lm.removeLedgerMetadata(ledgerId, readMetadata.getVersion()).get(); + // the listener will eventually be removed + while (lm.getLedgerMetadataStream(ledgerId) != null) { + TimeUnit.MILLISECONDS.sleep(100); + } + assertEquals(1, lms.getNumConsumers()); + assertNull(metadataQueue1.poll()); + assertNull(metadataQueue2.poll()); + } + + static List createNumBookies(int numBookies) { + return IntStream.range(0, numBookies) + .mapToObj(idx -> BookieId.parse("127.0.0.1:" + (3181 + idx))) + .collect(Collectors.toList()); + } +} diff --git a/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/EtcdRegistrationTest.java b/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/EtcdRegistrationTest.java new file mode 100644 index 00000000000..d9cf7a5086e --- /dev/null +++ b/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/EtcdRegistrationTest.java @@ -0,0 +1,418 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.metadata.etcd; + +import static org.apache.bookkeeper.common.concurrent.FutureUtils.result; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import com.google.common.collect.Sets; +import io.etcd.jetcd.Client; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.BrokenBarrierException; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CyclicBarrier; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.atomic.AtomicInteger; +import lombok.Cleanup; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.BookieException; +import org.apache.bookkeeper.bookie.BookieException.MetadataStoreException; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.discover.BookieServiceInfo; +import org.apache.bookkeeper.discover.RegistrationClient; +import org.apache.bookkeeper.discover.RegistrationClient.RegistrationListener; +import org.apache.bookkeeper.metadata.etcd.testing.EtcdTestBase; +import org.apache.bookkeeper.net.BookieId; +import org.apache.bookkeeper.versioning.LongVersion; +import org.apache.bookkeeper.versioning.Version; +import org.apache.bookkeeper.versioning.Version.Occurred; +import org.apache.bookkeeper.versioning.Versioned; +import org.apache.commons.lang.RandomStringUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; + +/** + * Test etcd based bookie registration. + */ +@Slf4j +public class EtcdRegistrationTest extends EtcdTestBase { + + static BookieId newBookie(int i) { + return BookieId.parse("127.0.0.1:" + (3181 + i)); + } + + @Rule + public final TestName runtime = new TestName(); + + private String scope; + private RegistrationClient regClient; + + protected static RegistrationListener newRegistrationListener( + LinkedBlockingQueue>> notifications) { + return bookies -> { + log.info("Received new bookies: {}", bookies); + try { + notifications.put(bookies); + } catch (InterruptedException e) { + log.error("Interrupted at enqueuing updated key set", e); + } + }; + } + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + this.scope = RandomStringUtils.randomAlphabetic(16); + this.regClient = new EtcdRegistrationClient(scope, etcdClient); + } + + @After + @Override + public void tearDown() throws Exception { + this.regClient.close(); + super.tearDown(); + } + + interface MultiBookiesTester { + + void test(String scope, int numBookies, boolean readonly) throws Exception; + + } + + private static void runNumBookiesTest(final String scope, + final int numBookies, + final boolean readonly, + MultiBookiesTester tester) throws Exception { + + final List bookies = createNumBookies(readonly, numBookies, scope); + try { + tester.test(scope, numBookies, readonly); + } finally { + bookies.forEach(EtcdRegistrationManager::close); + } + + } + + @Test + public void testRegisterWritableBookies() throws Exception { + testRegisterBookie(false); + } + + @Test + public void testRegisterReadonlyBookies() throws Exception { + testRegisterBookie(true); + } + + private void testRegisterBookie(boolean readonly) throws Exception { + runNumBookiesTest(scope, 3, readonly, (scope, numBookies, ro) -> { + Set expectedBookies = Sets.newHashSet(); + for (int i = 0; i < numBookies; i++) { + expectedBookies.add(newBookie(i)); + } + Set writableBookies = result(regClient.getWritableBookies()).getValue(); + Set readonlyBookies = result(regClient.getReadOnlyBookies()).getValue(); + if (ro) { + assertEquals(0, writableBookies.size()); + assertEquals(numBookies, readonlyBookies.size()); + assertEquals(expectedBookies, readonlyBookies); + } else { + assertEquals(0, readonlyBookies.size()); + assertEquals(numBookies, writableBookies.size()); + assertEquals(expectedBookies, writableBookies); + } + + }); + } + + @Test + public void testWatchWritableBookies() throws Exception { + testWatchBookies(false); + } + + @Test + public void testWatchReadonlyBookies() throws Exception { + testWatchBookies(true); + } + + private void testWatchBookies(boolean readonly) throws Exception { + LinkedBlockingQueue>> writableChanges = new LinkedBlockingQueue<>(); + LinkedBlockingQueue>> readonlyChanges = new LinkedBlockingQueue<>(); + result(regClient.watchReadOnlyBookies(newRegistrationListener(readonlyChanges))); + result(regClient.watchWritableBookies(newRegistrationListener(writableChanges))); + Versioned> versionedBookies = writableChanges.take(); + assertTrue(versionedBookies.getValue().isEmpty()); + versionedBookies = readonlyChanges.take(); + assertTrue(versionedBookies.getValue().isEmpty()); + + final int numBookies = 3; + final List bookies = createNumBookies(readonly, numBookies, scope, 1); + + LinkedBlockingQueue>> changes; + if (readonly) { + changes = readonlyChanges; + } else { + changes = writableChanges; + } + + Version preVersion = new LongVersion(-1); + Set expectedBookies = new HashSet<>(); + for (int i = 0; i < numBookies; i++) { + BookieId address = newBookie(i); + expectedBookies.add(address); + + versionedBookies = changes.take(); + Version curVersion = versionedBookies.getVersion(); + assertEquals(Occurred.AFTER, curVersion.compare(preVersion)); + assertEquals(expectedBookies, versionedBookies.getValue()); + preVersion = curVersion; + } + + bookies.forEach(EtcdRegistrationManager::close); + for (int i = 0; i < numBookies; i++) { + versionedBookies = changes.take(); + Version curVersion = versionedBookies.getVersion(); + assertEquals(Occurred.AFTER, curVersion.compare(preVersion)); + assertEquals(numBookies - i - 1, versionedBookies.getValue().size()); + preVersion = curVersion; + } + if (readonly) { + assertEquals(0, writableChanges.size()); + } else { + assertEquals(0, readonlyChanges.size()); + } + } + + private static List createNumBookies(boolean readonly, + int numBookies, + String scope, + long ttlSeconds) throws BookieException { + List bookies = new ArrayList<>(numBookies); + for (int i = 0; i < numBookies; i++) { + Client client = newEtcdClient(); + EtcdRegistrationManager regMgr = new EtcdRegistrationManager(client, scope, ttlSeconds); + bookies.add(regMgr); + regMgr.registerBookie(newBookie(i), readonly, BookieServiceInfo.EMPTY); + } + return bookies; + } + + private static List createNumBookies(boolean readonly, + int numBookies, + String scope) throws BookieException { + return createNumBookies(readonly, numBookies, scope, 60); + } + + @Test + public void testRegisterBookieWaitUntilPreviousExpiredSuccess() throws Exception { + long ttlSeconds = 1; + long leaseId = -0xabcd; + BookieId bookieId = BookieId.parse(runtime.getMethodName() + ":3181"); + try (EtcdRegistrationManager regManager = new EtcdRegistrationManager( + newEtcdClient(), scope, ttlSeconds) + ) { + regManager.registerBookie(bookieId, false, BookieServiceInfo.EMPTY); + leaseId = regManager.getBkRegister().getLeaseId(); + log.info("Registered bookie under scope '{}' with lease = {}", scope, leaseId); + } + assertNotEquals(-0xabcd, leaseId); + final long prevLeaseId = leaseId; + try (EtcdRegistrationManager regManager = new EtcdRegistrationManager( + newEtcdClient(), scope, 100000 * ttlSeconds) + ) { + regManager.registerBookie(bookieId, false, BookieServiceInfo.EMPTY); + leaseId = regManager.getBkRegister().getLeaseId(); + log.info("Registered bookie under scope '{}' with new lease = {}", scope, leaseId); + } + assertNotEquals(prevLeaseId, leaseId); + } + + @Test + public void testRegisterBookieWaitUntilPreviousExpiredFailure() throws Exception { + long ttlSeconds = 1; + long leaseId = -0xabcd; + BookieId bookieId = BookieId.parse(runtime.getMethodName() + ":3181"); + try (EtcdRegistrationManager regManager = new EtcdRegistrationManager( + newEtcdClient(), scope, 10000000 * ttlSeconds) + ) { + regManager.registerBookie(bookieId, false, BookieServiceInfo.EMPTY); + leaseId = regManager.getBkRegister().getLeaseId(); + log.info("Registered bookie under scope '{}' with lease = {}", scope, leaseId); + } + assertNotEquals(-0xabcd, leaseId); + try (EtcdRegistrationManager regManager = new EtcdRegistrationManager( + newEtcdClient(), scope, ttlSeconds) + ) { + regManager.registerBookie(bookieId, false, BookieServiceInfo.EMPTY); + fail("Should fail to register bookie under scope '{}'" + + " since previous registration has not been expired yet"); + } catch (MetadataStoreException mse) { + log.info("Encountered exception on registering bookie under scope '{}'", scope, mse); + // expected + } + } + + @Test + public void testRegisterWritableBookieWithSameLeaseId() throws Exception { + testRegisterBookieWithSameLeaseId(false); + } + + @Test + public void testRegisterReadonlyBookieWithSameLeaseId() throws Exception { + testRegisterBookieWithSameLeaseId(true); + } + + private void testRegisterBookieWithSameLeaseId(boolean readonly) throws Exception { + long ttlSeconds = 1; + long leaseId = -0xabcd; + BookieId bookieId = BookieId.parse(runtime.getMethodName() + ":3181"); + try (EtcdRegistrationManager regManager = new EtcdRegistrationManager( + newEtcdClient(), scope, 10000000 * ttlSeconds) + ) { + regManager.registerBookie(bookieId, readonly, BookieServiceInfo.EMPTY); + leaseId = regManager.getBkRegister().getLeaseId(); + log.info("Registered bookie under scope '{}' with lease = {}", scope, leaseId); + log.info("Trying to register using same lease '{}'", leaseId); + try (EtcdRegistrationManager regManager2 = new EtcdRegistrationManager( + regManager.getClient(), scope, regManager.getBkRegister() + )) { + regManager.registerBookie(bookieId, readonly, BookieServiceInfo.EMPTY); + } + } + } + + private Set getBookies(boolean readonly) throws Exception { + Set bookies; + if (readonly) { + bookies = result(regClient.getReadOnlyBookies()).getValue(); + } else { + bookies = result(regClient.getWritableBookies()).getValue(); + } + return bookies; + } + + @Test + public void testRegisterUnregisterWritableBookie() throws Exception { + testRegisterUnregister(false); + } + + @Test + public void testRegisterUnregisterReadonlyBookie() throws Exception { + testRegisterUnregister(true); + } + + private void testRegisterUnregister(boolean readonly) throws Exception { + String bookieIdStr = runtime.getMethodName(); + if (readonly) { + bookieIdStr += "-readonly"; + } + bookieIdStr += ":3181"; + BookieId bookieId = BookieId.parse(bookieIdStr); + try (EtcdRegistrationManager regMgr = new EtcdRegistrationManager( + newEtcdClient(), scope, 1000000000 + )) { + // before registration + Set bookies = getBookies(readonly); + log.info("before registration : bookies = {}", bookies); + assertEquals(0, bookies.size()); + // registered + regMgr.registerBookie(bookieId, readonly, BookieServiceInfo.EMPTY); + bookies = getBookies(readonly); + log.info("after registered: bookies = {}", bookies); + assertEquals(1, bookies.size()); + assertEquals( + Sets.newHashSet(bookieId), + bookies); + // unregistered + regMgr.unregisterBookie(bookieId, readonly); + bookies = getBookies(readonly); + log.info("after unregistered: bookies = {}", bookies); + assertEquals(0, bookies.size()); + } + } + + @Test + public void testConcurrentWritableRegistration() throws Exception { + testConcurrentRegistration(false); + } + + @Test + public void testConcurrentReadonlyRegistration() throws Exception { + testConcurrentRegistration(true); + } + + private void testConcurrentRegistration(boolean readonly) throws Exception { + final BookieId bookieId; + if (readonly) { + bookieId = BookieId.parse(runtime.getMethodName() + "-readonly:3181"); + } else { + bookieId = BookieId.parse(runtime.getMethodName() + ":3181"); + } + final int numBookies = 10; + @Cleanup("shutdown") + ExecutorService executor = Executors.newFixedThreadPool(numBookies); + final CyclicBarrier startBarrier = new CyclicBarrier(numBookies); + final CyclicBarrier completeBarrier = new CyclicBarrier(numBookies); + final CompletableFuture doneFuture = new CompletableFuture<>(); + final AtomicInteger numSuccesses = new AtomicInteger(0); + final AtomicInteger numFailures = new AtomicInteger(0); + for (int i = 0; i < numBookies; i++) { + executor.submit(() -> { + try (EtcdRegistrationManager regMgr = new EtcdRegistrationManager( + newEtcdClient(), scope, 1 + )) { + try { + startBarrier.await(); + regMgr.registerBookie(bookieId, readonly, BookieServiceInfo.EMPTY); + numSuccesses.incrementAndGet(); + } catch (InterruptedException e) { + log.warn("Interrupted at waiting for the other threads to start", e); + } catch (BrokenBarrierException e) { + log.warn("Start barrier is broken", e); + } catch (BookieException e) { + numFailures.incrementAndGet(); + } + try { + completeBarrier.await(); + } catch (InterruptedException e) { + log.warn("Interrupted at waiting for the other threads to complete", e); + } catch (BrokenBarrierException e) { + log.warn("Complete barrier is broken", e); + } + FutureUtils.complete(doneFuture, null); + } + }); + } + doneFuture.join(); + assertEquals(1, numSuccesses.get()); + assertEquals(numBookies - 1, numFailures.get()); + } + +} diff --git a/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/helpers/HelpersTest.java b/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/helpers/HelpersTest.java new file mode 100644 index 00000000000..f328149e9e8 --- /dev/null +++ b/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/helpers/HelpersTest.java @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.metadata.etcd.helpers; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.bookkeeper.common.concurrent.FutureUtils.result; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import io.etcd.jetcd.ByteSequence; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Function; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.metadata.etcd.testing.EtcdTestBase; +import org.apache.commons.lang.RandomStringUtils; +import org.apache.commons.lang3.StringUtils; +import org.junit.Before; +import org.junit.Test; + +/** + * Integration test helpers. + */ +@Slf4j +public class HelpersTest extends EtcdTestBase { + + private static final Function BYTE_SEQUENCE_STRING_FUNCTION = + bs -> bs.toString(UTF_8); + + private static String getKey(String scope, int i) { + return String.format("%s-key-%010d", scope, i); + } + + private String scope; + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + scope = RandomStringUtils.randomAlphabetic(8); + } + + @Test + public void testEmptyKeyStream() throws Exception { + KeyStream ks = new KeyStream<>( + etcdClient.getKVClient(), + ByteSequence.from(getKey(scope, 0), UTF_8), + ByteSequence.from(getKey(scope, 100), UTF_8), + BYTE_SEQUENCE_STRING_FUNCTION + ); + List values = result(ks.readNext()); + assertTrue(values.isEmpty()); + + // read the values again + values = result(ks.readNext()); + assertTrue(values.isEmpty()); + } + + @Test + public void testKeyStreamBatch1() throws Exception { + testKeyStream(20, 1); + } + + @Test + public void testKeyStreamBatch2() throws Exception { + testKeyStream(20, 2); + } + + @Test + public void testKeyStreamBatch7() throws Exception { + testKeyStream(20, 7); + } + + @Test + public void testKeyStreamBatch10() throws Exception { + testKeyStream(20, 10); + } + + @Test + public void testKeyStreamBatch20() throws Exception { + testKeyStream(20, 20); + } + + @Test + public void testKeyStreamBatch40() throws Exception { + testKeyStream(20, 40); + } + + @Test + public void testKeyStreamBatchUnlimited() throws Exception { + testKeyStream(20, 0); + } + + private void testKeyStream(int numKeys, int batchSize) throws Exception { + for (int i = 0; i < numKeys; i++) { + String key = getKey(scope, i); + ByteSequence keyBs = ByteSequence.from(key.getBytes(UTF_8)); + result(etcdClient.getKVClient().put(keyBs, keyBs)); + } + + KeyStream ks = openKeyStream(batchSize); + AtomicInteger numReceived = new AtomicInteger(0); + while (true) { + List values = result(ks.readNext()); + log.info("Received values : {}", values); + if (values.isEmpty()) { + break; + } + for (int value : values) { + assertEquals(numReceived.getAndIncrement(), value); + } + } + assertEquals(numKeys, numReceived.get()); + } + + private void testKeyIterator(int numKeys, int batchSize) throws Exception { + for (int i = 0; i < numKeys; i++) { + String key = getKey(scope, i); + ByteSequence keyBs = ByteSequence.from(key, UTF_8); + result(etcdClient.getKVClient().put(keyBs, keyBs)); + } + + KeyStream ks = openKeyStream(batchSize); + KeyIterator ki = new KeyIterator<>(ks); + + AtomicInteger numReceived = new AtomicInteger(0); + while (ki.hasNext()) { + List values = ki.next(); + log.info("Received values : {}", values); + if (values.isEmpty()) { + break; + } + for (int value : values) { + assertEquals(numReceived.getAndIncrement(), value); + } + } + assertEquals(numKeys, numReceived.get()); + } + + @Test + public void testKeyIteratorBatch1() throws Exception { + testKeyIterator(20, 1); + } + + @Test + public void testKeyIteratorBatch2() throws Exception { + testKeyIterator(20, 2); + } + + @Test + public void testKeyIteratorBatch7() throws Exception { + testKeyIterator(20, 7); + } + + @Test + public void testKeyIteratorBatch10() throws Exception { + testKeyIterator(20, 10); + } + + @Test + public void testKeyIteratorBatch20() throws Exception { + testKeyIterator(20, 20); + } + + @Test + public void testKeyIteratorBatch40() throws Exception { + testKeyIterator(20, 40); + } + + @Test + public void testKeyIteratorBatchUnlimited() throws Exception { + testKeyIterator(20, 0); + } + + private KeyStream openKeyStream(int batchSize) { + KeyStream ks = new KeyStream<>( + etcdClient.getKVClient(), + ByteSequence.from(getKey(scope, 0).getBytes(UTF_8)), + ByteSequence.from(getKey(scope, Integer.MAX_VALUE).getBytes(UTF_8)), + bs -> { + String[] keyParts = StringUtils.split(bs.toString(UTF_8), '-'); + try { + return Integer.parseInt(keyParts[2]); + } catch (NumberFormatException nfe) { + log.error("Failed to parse key string '{}' : ", + bs.toString(UTF_8), nfe); + return -0xabcd; + } + }, + batchSize + ); + return ks; + } + +} diff --git a/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/helpers/KeySetReaderTest.java b/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/helpers/KeySetReaderTest.java new file mode 100644 index 00000000000..d7a07fc2ff2 --- /dev/null +++ b/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/helpers/KeySetReaderTest.java @@ -0,0 +1,418 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.metadata.etcd.helpers; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import io.etcd.jetcd.ByteSequence; +import io.etcd.jetcd.options.PutOption; +import io.etcd.jetcd.support.CloseableClient; +import io.etcd.jetcd.support.Observers; +import java.nio.charset.StandardCharsets; +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.function.Consumer; +import java.util.function.Function; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.common.concurrent.FutureUtils; +import org.apache.bookkeeper.metadata.etcd.testing.EtcdTestBase; +import org.apache.bookkeeper.versioning.LongVersion; +import org.apache.bookkeeper.versioning.Version.Occurred; +import org.apache.bookkeeper.versioning.Versioned; +import org.apache.commons.compress.utils.Sets; +import org.apache.commons.lang3.RandomStringUtils; +import org.junit.Test; + +/** + * Integration test {@link KeySetReader}. + */ +@Slf4j +public class KeySetReaderTest extends EtcdTestBase { + + private static final Function BYTE_SEQUENCE_STRING_FUNCTION = + bs -> bs.toString(StandardCharsets.UTF_8); + + @Test + public void testReadSingleKey() throws Exception { + String key = RandomStringUtils.randomAlphabetic(16); + ByteSequence keyBs = ByteSequence.from(key, StandardCharsets.UTF_8); + try (KeySetReader ksReader = new KeySetReader<>( + etcdClient, + BYTE_SEQUENCE_STRING_FUNCTION, + keyBs, + null + )) { + // key not exists + Versioned> versionedKeys = FutureUtils.result(ksReader.read()); + assertTrue( + "VersionedKeys : " + versionedKeys, + ((LongVersion) versionedKeys.getVersion()).getLongVersion() > 0L); + assertEquals(0, versionedKeys.getValue().size()); + assertFalse(ksReader.isWatcherSet()); + + // keys should be cached + assertEquals(versionedKeys, ksReader.getLocalValue()); + + // update a value + String value = RandomStringUtils.randomAlphabetic(32); + ByteSequence valueBs = ByteSequence.from(value, StandardCharsets.UTF_8); + FutureUtils.result(etcdClient.getKVClient().put(keyBs, valueBs)); + + // update the value should not change local value + assertEquals(versionedKeys, ksReader.getLocalValue()); + + // read the key again + Versioned> newVersionedKey = FutureUtils.result(ksReader.read()); + assertEquals(Occurred.AFTER, newVersionedKey.getVersion().compare(versionedKeys.getVersion())); + assertEquals(1, newVersionedKey.getValue().size()); + assertEquals(Sets.newHashSet(key), newVersionedKey.getValue()); + + // local value should be changed + assertEquals(newVersionedKey, ksReader.getLocalValue()); + } + } + + @Test + public void testWatchSingleKey() throws Exception { + String key = RandomStringUtils.randomAlphabetic(16); + ByteSequence keyBs = ByteSequence.from(key, StandardCharsets.UTF_8); + KeySetReader ksReader = null; + try { + ksReader = new KeySetReader<>( + etcdClient, + BYTE_SEQUENCE_STRING_FUNCTION, + keyBs, + null + ); + LinkedBlockingQueue>> notifications = new LinkedBlockingQueue<>(); + Consumer>> keyConsumer = consumeVersionedKeySet(notifications); + + // key not exists + Versioned> versionedKeys = FutureUtils.result(ksReader.readAndWatch(keyConsumer)); + assertTrue( + "VersionedKeys : " + versionedKeys, + ((LongVersion) versionedKeys.getVersion()).getLongVersion() > 0L); + assertEquals(0, versionedKeys.getValue().size()); + assertTrue(ksReader.isWatcherSet()); + + // keys should be cached + assertEquals(versionedKeys, ksReader.getLocalValue()); + Versioned> newVersionedKey = notifications.take(); + assertEquals(Occurred.CONCURRENTLY, newVersionedKey.getVersion().compare(versionedKeys.getVersion())); + assertEquals(versionedKeys, newVersionedKey); + versionedKeys = newVersionedKey; + + // update a value + String value = RandomStringUtils.randomAlphabetic(32); + ByteSequence valueBs = ByteSequence.from(value, StandardCharsets.UTF_8); + FutureUtils.result(etcdClient.getKVClient().put(keyBs, valueBs)); + + // we should get notified with updated key set + newVersionedKey = notifications.take(); + assertEquals(Occurred.AFTER, newVersionedKey.getVersion().compare(versionedKeys.getVersion())); + assertEquals(1, newVersionedKey.getValue().size()); + assertEquals(Sets.newHashSet(key), newVersionedKey.getValue()); + + // local value should be changed + assertEquals(newVersionedKey, ksReader.getLocalValue()); + versionedKeys = newVersionedKey; + + // delete the key + FutureUtils.result(etcdClient.getKVClient().delete(keyBs)); + newVersionedKey = notifications.take(); + assertEquals(Occurred.AFTER, newVersionedKey.getVersion().compare(versionedKeys.getVersion())); + assertEquals(0, newVersionedKey.getValue().size()); + + // local value should be changed + assertEquals(newVersionedKey, ksReader.getLocalValue()); + } finally { + if (null != ksReader) { + ksReader.close(); + } + } + assertNotNull(ksReader); + assertFalse(ksReader.isWatcherSet()); + } + + @Test + public void testWatchSingleKeyWithTTL() throws Exception { + String key = RandomStringUtils.randomAlphabetic(16); + ByteSequence keyBs = ByteSequence.from(key, StandardCharsets.UTF_8); + KeySetReader ksReader = null; + try { + ksReader = new KeySetReader<>( + etcdClient, + BYTE_SEQUENCE_STRING_FUNCTION, + keyBs, + null + ); + LinkedBlockingQueue>> notifications = new LinkedBlockingQueue<>(); + Consumer>> keyConsumer = consumeVersionedKeySet(notifications); + + // key not exists + Versioned> versionedKeys = FutureUtils.result(ksReader.readAndWatch(keyConsumer)); + assertTrue( + "VersionedKeys : " + versionedKeys, + ((LongVersion) versionedKeys.getVersion()).getLongVersion() > 0L); + assertEquals(0, versionedKeys.getValue().size()); + assertTrue(ksReader.isWatcherSet()); + + // keys should be cached + assertEquals(versionedKeys, ksReader.getLocalValue()); + // no watch event should be issued + Versioned> newVersionedKey = notifications.take(); + assertEquals(Occurred.CONCURRENTLY, newVersionedKey.getVersion().compare(versionedKeys.getVersion())); + assertEquals(versionedKeys, newVersionedKey); + versionedKeys = newVersionedKey; + + // create a key with ttl + long leaseId = FutureUtils.result(etcdClient.getLeaseClient().grant(1)).getID(); + String value = RandomStringUtils.randomAlphabetic(32); + ByteSequence valueBs = ByteSequence.from(value, StandardCharsets.UTF_8); + FutureUtils.result(etcdClient.getKVClient() + .put(keyBs, valueBs, PutOption.newBuilder().withLeaseId(leaseId).build())); + + // we should get notified with updated key set + newVersionedKey = notifications.take(); + assertEquals(Occurred.AFTER, newVersionedKey.getVersion().compare(versionedKeys.getVersion())); + assertEquals(1, newVersionedKey.getValue().size()); + assertEquals(Sets.newHashSet(key), newVersionedKey.getValue()); + + // local value should be changed + assertEquals(newVersionedKey, ksReader.getLocalValue()); + versionedKeys = newVersionedKey; + + // the key will be deleted after TTL + newVersionedKey = notifications.take(); + assertEquals(Occurred.AFTER, newVersionedKey.getVersion().compare(versionedKeys.getVersion())); + assertEquals(0, newVersionedKey.getValue().size()); + + // local value should be changed + assertEquals(newVersionedKey, ksReader.getLocalValue()); + } finally { + if (null != ksReader) { + ksReader.close(); + } + } + assertNotNull(ksReader); + assertFalse(ksReader.isWatcherSet()); + } + + @Test + public void testReadKeySet() throws Exception { + String prefix = RandomStringUtils.randomAlphabetic(16); + ByteSequence beginKeyBs = ByteSequence.from(prefix + "-000", StandardCharsets.UTF_8); + ByteSequence endKeyBs = ByteSequence.from(prefix + "-999", StandardCharsets.UTF_8); + try (KeySetReader ksReader = new KeySetReader<>( + etcdClient, + BYTE_SEQUENCE_STRING_FUNCTION, + beginKeyBs, + endKeyBs + )) { + // key not exists + Versioned> versionedKeys = FutureUtils.result(ksReader.read()); + assertTrue( + "VersionedKeys : " + versionedKeys, + ((LongVersion) versionedKeys.getVersion()).getLongVersion() > 0L); + assertEquals(0, versionedKeys.getValue().size()); + assertFalse(ksReader.isWatcherSet()); + + // keys should be cached + assertEquals(versionedKeys, ksReader.getLocalValue()); + + Set expectedKeySet = new HashSet<>(); + for (int i = 0; i < 20; i++) { + // update a value + String key = String.format("%s-%03d", prefix, i); + String value = RandomStringUtils.randomAlphabetic(32); + ByteSequence keyBs = ByteSequence.from(key, StandardCharsets.UTF_8); + ByteSequence valueBs = ByteSequence.from(value, StandardCharsets.UTF_8); + expectedKeySet.add(key); + FutureUtils.result(etcdClient.getKVClient().put(keyBs, valueBs)); + + // update the value should not change local value + assertEquals(versionedKeys, ksReader.getLocalValue()); + + // read the key again + Versioned> newVersionedKey = FutureUtils.result(ksReader.read()); + assertEquals(Occurred.AFTER, newVersionedKey.getVersion().compare(versionedKeys.getVersion())); + assertEquals(expectedKeySet, newVersionedKey.getValue()); + + // local value should be changed + assertEquals(newVersionedKey, ksReader.getLocalValue()); + versionedKeys = newVersionedKey; + } + } + } + + @Test + public void testWatchKeySet() throws Exception { + String prefix = RandomStringUtils.randomAlphabetic(16); + ByteSequence beginKeyBs = ByteSequence.from(prefix + "-000", StandardCharsets.UTF_8); + ByteSequence endKeyBs = ByteSequence.from(prefix + "-999", StandardCharsets.UTF_8); + KeySetReader ksReader = null; + try { + ksReader = new KeySetReader<>( + etcdClient, + BYTE_SEQUENCE_STRING_FUNCTION, + beginKeyBs, + endKeyBs + ); + LinkedBlockingQueue>> notifications = new LinkedBlockingQueue<>(); + Consumer>> keyConsumer = consumeVersionedKeySet(notifications); + + // key not exists + Versioned> versionedKeys = FutureUtils.result(ksReader.readAndWatch(keyConsumer)); + assertTrue( + "VersionedKeys : " + versionedKeys, + ((LongVersion) versionedKeys.getVersion()).getLongVersion() > 0L); + assertEquals(0, versionedKeys.getValue().size()); + assertTrue(ksReader.isWatcherSet()); + + // keys should be cached + assertEquals(versionedKeys, ksReader.getLocalValue()); + Versioned> newVersionedKey = notifications.take(); + assertEquals(Occurred.CONCURRENTLY, newVersionedKey.getVersion().compare(versionedKeys.getVersion())); + assertEquals(versionedKeys, newVersionedKey); + versionedKeys = newVersionedKey; + + Set expectedKeySet = new HashSet<>(); + for (int i = 0; i < 20; i++) { + // update a value + String key = String.format("%s-%03d", prefix, i); + String value = RandomStringUtils.randomAlphabetic(32); + ByteSequence keyBs = ByteSequence.from(key, StandardCharsets.UTF_8); + ByteSequence valueBs = ByteSequence.from(value, StandardCharsets.UTF_8); + expectedKeySet.add(key); + FutureUtils.result(etcdClient.getKVClient().put(keyBs, valueBs)); + + // we should get notified with updated key set + newVersionedKey = notifications.take(); + assertEquals(Occurred.AFTER, newVersionedKey.getVersion().compare(versionedKeys.getVersion())); + assertEquals(expectedKeySet, newVersionedKey.getValue()); + + // local value should be changed + assertEquals(newVersionedKey, ksReader.getLocalValue()); + versionedKeys = newVersionedKey; + } + + for (int i = 0; i < 20; i++) { + // delete the key + String key = String.format("%s-%03d", prefix, i); + ByteSequence keyBs = ByteSequence.from(key, StandardCharsets.UTF_8); + expectedKeySet.remove(key); + FutureUtils.result(etcdClient.getKVClient().delete(keyBs)); + + // we should get notified with updated key set + newVersionedKey = notifications.take(); + assertEquals(Occurred.AFTER, newVersionedKey.getVersion().compare(versionedKeys.getVersion())); + assertEquals(expectedKeySet, newVersionedKey.getValue()); + + // local value should be changed + assertEquals(newVersionedKey, ksReader.getLocalValue()); + versionedKeys = newVersionedKey; + } + } finally { + if (null != ksReader) { + ksReader.close(); + } + } + assertNotNull(ksReader); + assertFalse(ksReader.isWatcherSet()); + } + + @Test + public void testWatchKeySetWithTTL() throws Exception { + String prefix = RandomStringUtils.randomAlphabetic(16); + ByteSequence beginKeyBs = ByteSequence.from(prefix + "-000", StandardCharsets.UTF_8); + ByteSequence endKeyBs = ByteSequence.from(prefix + "-999", StandardCharsets.UTF_8); + KeySetReader ksReader = null; + try { + ksReader = new KeySetReader<>( + etcdClient, + BYTE_SEQUENCE_STRING_FUNCTION, + beginKeyBs, + endKeyBs + ); + LinkedBlockingQueue>> notifications = new LinkedBlockingQueue<>(); + Consumer>> keyConsumer = consumeVersionedKeySet(notifications); + + // key not exists + Versioned> versionedKeys = FutureUtils.result(ksReader.readAndWatch(keyConsumer)); + assertTrue( + "VersionedKeys : " + versionedKeys, + ((LongVersion) versionedKeys.getVersion()).getLongVersion() > 0L); + assertEquals(0, versionedKeys.getValue().size()); + assertTrue(ksReader.isWatcherSet()); + + // keys should be cached + assertEquals(versionedKeys, ksReader.getLocalValue()); + // no watch event should be issued + Versioned> newVersionedKey = notifications.take(); + assertEquals(Occurred.CONCURRENTLY, newVersionedKey.getVersion().compare(versionedKeys.getVersion())); + assertEquals(versionedKeys, newVersionedKey); + versionedKeys = newVersionedKey; + + // create keys with ttl + long leaseId = FutureUtils.result(etcdClient.getLeaseClient().grant(1)).getID(); + CloseableClient ka = etcdClient.getLeaseClient().keepAlive(leaseId, Observers.observer(response -> { + })); + + Set expectedKeySet = new HashSet<>(); + for (int i = 0; i < 20; i++) { + String key = String.format("%s-%03d", prefix, i); + String value = RandomStringUtils.randomAlphabetic(32); + ByteSequence keyBs = ByteSequence.from(key, StandardCharsets.UTF_8); + ByteSequence valueBs = ByteSequence.from(value, StandardCharsets.UTF_8); + expectedKeySet.add(key); + FutureUtils.result(etcdClient.getKVClient() + .put(keyBs, valueBs, PutOption.newBuilder().withLeaseId(leaseId).build())); + + // we should get notified with updated key set + newVersionedKey = notifications.take(); + assertEquals(Occurred.AFTER, newVersionedKey.getVersion().compare(versionedKeys.getVersion())); + assertEquals(expectedKeySet, newVersionedKey.getValue()); + + // local value should be changed + assertEquals(newVersionedKey, ksReader.getLocalValue()); + versionedKeys = newVersionedKey; + } + + // stop keep alive all the keys should be expired. + ka.close(); + + // all the keys will be deleted after TTL in same batch. + newVersionedKey = notifications.take(); + // local value should be changed + assertEquals(newVersionedKey, ksReader.getLocalValue()); + assertEquals(Occurred.AFTER, newVersionedKey.getVersion().compare(versionedKeys.getVersion())); + assertTrue(newVersionedKey.getValue().isEmpty()); + } finally { + if (null != ksReader) { + ksReader.close(); + } + } + assertNotNull(ksReader); + assertFalse(ksReader.isWatcherSet()); + } +} diff --git a/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/integration/SmokeTest.java b/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/integration/SmokeTest.java new file mode 100644 index 00000000000..c29ad426628 --- /dev/null +++ b/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/integration/SmokeTest.java @@ -0,0 +1,105 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.metadata.etcd.integration; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.bookkeeper.common.concurrent.FutureUtils.result; +import static org.junit.Assert.assertEquals; + +import java.util.concurrent.atomic.AtomicInteger; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.client.api.BookKeeper; +import org.apache.bookkeeper.client.api.DigestType; +import org.apache.bookkeeper.client.api.LedgerEntries; +import org.apache.bookkeeper.client.api.ReadHandle; +import org.apache.bookkeeper.client.api.WriteAdvHandle; +import org.apache.bookkeeper.client.api.WriteHandle; +import org.apache.bookkeeper.metadata.etcd.testing.EtcdBKClusterTestBase; +import org.junit.Test; + +/** + * Smoke testing etcd metadata drives. + */ +@Slf4j +public class SmokeTest extends EtcdBKClusterTestBase { + + private static final byte[] PASSWD = "smoketest".getBytes(UTF_8); + + private static void readEntries(BookKeeper bk, + long ledgerId, + int numExpectedEntries) throws Exception { + try (ReadHandle readlh = result(bk.newOpenLedgerOp() + .withLedgerId(ledgerId) + .withDigestType(DigestType.CRC32C) + .withPassword(PASSWD) + .execute() + )) { + long lac = readlh.getLastAddConfirmed(); + AtomicInteger idx = new AtomicInteger(0); + try (LedgerEntries entries = readlh.read(0, lac)) { + entries.forEach(e -> assertEquals( + String.format("entry-%03d", idx.getAndIncrement()), + new String(e.getEntryBytes(), UTF_8))); + } + assertEquals(idx.get(), numExpectedEntries); + } + } + + @Test + public void testReadWrite() throws Exception { + int numEntries = 100; + try (BookKeeper bk = BookKeeper.newBuilder(conf).build()) { + long ledgerId; + try (WriteHandle wh = result(bk.newCreateLedgerOp() + .withDigestType(DigestType.CRC32C) + .withPassword(PASSWD) + .execute())) { + ledgerId = wh.getId(); + log.info("Successfully created ledger {} to append entries.", ledgerId); + for (int i = 0; i < numEntries; i++) { + wh.append(String.format("entry-%03d", i).getBytes(UTF_8)); + } + } + log.info("Opening ledger {} to read entries ...", ledgerId); + readEntries(bk, ledgerId, numEntries); + log.info("Successfully read {} entries from ledger {}", numEntries, ledgerId); + } + } + + @Test + public void testReadWriteAdv() throws Exception { + final int numEntries = 100; + try (BookKeeper bk = BookKeeper.newBuilder(conf).build()) { + long ledgerId; + try (WriteAdvHandle wah = result(bk.newCreateLedgerOp() + .withDigestType(DigestType.CRC32C) + .withPassword(PASSWD) + .makeAdv() + .execute())) { + ledgerId = wah.getId(); + log.info("Successfully created adv ledger {} to append entries.", ledgerId); + for (int i = 0; i < numEntries; i++) { + wah.write(i, String.format("entry-%03d", i).getBytes(UTF_8)); + } + } + log.info("Opening adv ledger {} to read entries ...", ledgerId); + readEntries(bk, ledgerId, numEntries); + log.info("Successfully read {} entries from adv ledger {}", numEntries, ledgerId); + } + } + + + +} diff --git a/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/testing/EtcdBKClusterTestBase.java b/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/testing/EtcdBKClusterTestBase.java new file mode 100644 index 00000000000..c7e81f4d7e3 --- /dev/null +++ b/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/testing/EtcdBKClusterTestBase.java @@ -0,0 +1,173 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.metadata.etcd.testing; + +import static org.junit.Assert.assertTrue; + +import io.netty.buffer.ByteBufAllocator; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.bookie.Bookie; +import org.apache.bookkeeper.bookie.MockUncleanShutdownDetection; +import org.apache.bookkeeper.bookie.TestBookieImpl; +import org.apache.bookkeeper.client.BookKeeperAdmin; +import org.apache.bookkeeper.client.api.BookKeeper; +import org.apache.bookkeeper.common.net.ServiceURI; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.apache.bookkeeper.conf.ServerConfiguration; +import org.apache.bookkeeper.conf.TestBKConfiguration; +import org.apache.bookkeeper.meta.MetadataDrivers; +import org.apache.bookkeeper.metadata.etcd.EtcdMetadataBookieDriver; +import org.apache.bookkeeper.metadata.etcd.EtcdMetadataClientDriver; +import org.apache.bookkeeper.proto.BookieServer; +import org.apache.bookkeeper.test.TestStatsProvider; +import org.apache.bookkeeper.util.IOUtils; +import org.apache.bookkeeper.util.PortManager; +import org.apache.commons.io.FileUtils; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; + +/** + * A test base that run an Etcd based bookkeeper cluster. + */ +@Slf4j +public abstract class EtcdBKClusterTestBase extends EtcdTestBase { + + protected static ClientConfiguration baseClientConf; + protected static ServerConfiguration baseServerConf; + protected static final int NUM_BOOKIES = 3; + protected static final List BOOKIES = new ArrayList<>(NUM_BOOKIES); + protected static final List TMP_DIRS = new ArrayList<>(NUM_BOOKIES); + + protected static File createTempDir(String prefix, String suffix) throws IOException { + File dir = IOUtils.createTempDir(prefix, suffix); + TMP_DIRS.add(dir); + return dir; + } + + protected static ServerConfiguration newServerConfiguration() throws Exception { + File f = createTempDir("bookie", "test"); + int port = PortManager.nextFreePort(); + return newServerConfiguration(port, f, new File[] { f }); + } + + protected static ServerConfiguration newServerConfiguration(int port, File journalDir, File[] ledgerDirs) { + ServerConfiguration conf = new ServerConfiguration(baseServerConf); + conf.setBookiePort(port); + conf.setJournalDirName(journalDir.getPath()); + String[] ledgerDirNames = new String[ledgerDirs.length]; + for (int i = 0; i < ledgerDirs.length; i++) { + ledgerDirNames[i] = ledgerDirs[i].getPath(); + } + conf.setLedgerDirNames(ledgerDirNames); + conf.setEnableTaskExecutionStats(true); + return conf; + } + + @BeforeClass + public static void setupCluster() throws Exception { + setupCluster(NUM_BOOKIES); + } + protected static void setupCluster(int numBookies) throws Exception { + EtcdTestBase.setupCluster(); + + MetadataDrivers.registerBookieDriver( + "etcd", EtcdMetadataBookieDriver.class + ); + MetadataDrivers.registerClientDriver( + "etcd", EtcdMetadataClientDriver.class + ); + + log.info("Successfully started etcd at:" + + " internal service uri = {}, external service uri = {}", + etcdContainer.getInternalServiceUri(), etcdContainer.getExternalServiceUri()); + + ServiceURI uri = ServiceURI.create(etcdContainer.getExternalServiceUri()); + + baseClientConf = new ClientConfiguration() + .setMetadataServiceUri(uri.getUri().toString()); + baseServerConf = TestBKConfiguration.newServerConfiguration() + .setMetadataServiceUri(uri.getUri().toString()); + // format the cluster + assertTrue(BookKeeperAdmin.format(baseServerConf, false, true)); + // start bookies + startNumBookies(numBookies); + } + + private static void startNumBookies(int numBookies) throws Exception { + for (int i = 0; i < numBookies; i++) { + ServerConfiguration conf = newServerConfiguration(); + log.info("Starting new bookie on port : {}", conf.getBookiePort()); + BookieServer server = startBookie(conf); + synchronized (BOOKIES) { + BOOKIES.add(server); + } + } + } + private static BookieServer startBookie(ServerConfiguration conf) throws Exception { + conf.setAutoRecoveryDaemonEnabled(true); + TestStatsProvider provider = new TestStatsProvider(); + Bookie bookie = new TestBookieImpl(conf); + BookieServer server = new BookieServer(conf, bookie, provider.getStatsLogger(""), + ByteBufAllocator.DEFAULT, + new MockUncleanShutdownDetection()); + server.start(); + return server; + } + + @AfterClass + public static void teardownCluster() throws Exception { + // stop bookies + stopBookies(); + // stop metadata store + EtcdTestBase.teardownCluster(); + log.info("Stopped the metadata store."); + // clean up temp dirs + for (File f : TMP_DIRS) { + FileUtils.deleteDirectory(f); + } + log.info("Clean up all the temp directories."); + } + + private static void stopBookies() { + synchronized (BOOKIES) { + BOOKIES.forEach(BookieServer::shutdown); + log.info("Stopped all the bookies."); + } + } + + protected ClientConfiguration conf; + protected BookKeeper bk; + + @Before + public void setUp() throws Exception { + conf = new ClientConfiguration() + .setMetadataServiceUri(etcdContainer.getExternalServiceUri()); + bk = BookKeeper.newBuilder(conf).build(); + } + + @After + public void tearDown() throws Exception { + if (null != bk) { + bk.close(); + } + } + +} diff --git a/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/testing/EtcdContainer.java b/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/testing/EtcdContainer.java new file mode 100644 index 00000000000..b0a80942074 --- /dev/null +++ b/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/testing/EtcdContainer.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.metadata.etcd.testing; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import com.github.dockerjava.api.DockerClient; +import com.github.dockerjava.api.async.ResultCallback; +import com.github.dockerjava.api.command.LogContainerCmd; +import com.github.dockerjava.api.model.Frame; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import lombok.extern.slf4j.Slf4j; +import org.testcontainers.DockerClientFactory; +import org.testcontainers.containers.ContainerLaunchException; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.output.WaitingConsumer; +import org.testcontainers.containers.wait.strategy.WaitStrategy; +import org.testcontainers.utility.LogUtils; + +/** + * Etcd test container. + */ +@Slf4j +public class EtcdContainer extends GenericContainer { + + static class LogContainerResultCb extends ResultCallback.Adapter { + @Override + public void onNext(Frame frame) { + log.info(new String(frame.getPayload(), UTF_8)); + } + } + + public static final String NAME = "etcd"; + public static final int CLIENT_PORT = 2379; + + private final String clusterName; + + public EtcdContainer(String clusterName) { + super("quay.io/coreos/etcd:v3.5.14"); + this.clusterName = clusterName; + } + + public String getExternalServiceUri() { + return "etcd://" + getHost() + ":" + getEtcdClientPort() + "/clusters/" + clusterName; + } + + public String getInternalServiceUri() { + return "etcd://" + NAME + ":" + CLIENT_PORT + "/clusters/" + clusterName; + } + + @Override + protected void configure() { + super.configure(); + + String[] command = new String[] { + "/usr/local/bin/etcd", + "--name", NAME + "0", + "--initial-advertise-peer-urls", "http://" + NAME + ":2380", + "--listen-peer-urls", "http://0.0.0.0:2380", + "--advertise-client-urls", "http://" + NAME + ":2379", + "--listen-client-urls", "http://0.0.0.0:2379", + "--initial-cluster", NAME + "0=http://" + NAME + ":2380" + }; + + this.withNetworkAliases(NAME) + .withExposedPorts(CLIENT_PORT) + .withCreateContainerCmdModifier(createContainerCmd -> { + createContainerCmd.withHostName(NAME); + createContainerCmd.withName(clusterName + "-" + NAME); + }) + .withCommand(command) + .withNetworkAliases(NAME) + .waitingFor(waitStrategy()); + tailContainerLog(); + } + + public void tailContainerLog() { + CompletableFuture.runAsync(() -> { + while (null == this.getContainerId()) { + try { + TimeUnit.MILLISECONDS.sleep(100); + } catch (InterruptedException e) { + return; + } + } + + LogContainerCmd logContainerCmd = this.dockerClient.logContainerCmd(this.getContainerId()); + logContainerCmd.withStdOut(true).withStdErr(true).withFollowStream(true); + logContainerCmd.exec(new LogContainerResultCb()); + }); + } + + public int getEtcdClientPort() { + return getMappedPort(CLIENT_PORT); + } + + public String getClientEndpoint() { + return String.format("http://%s:%d", getHost(), getEtcdClientPort()); + } + + private WaitStrategy waitStrategy() { + return new org.testcontainers.containers.wait.strategy.AbstractWaitStrategy() { + @Override + protected void waitUntilReady() { + final DockerClient client = DockerClientFactory.instance().client(); + final WaitingConsumer waitingConsumer = new WaitingConsumer(); + + LogUtils.followOutput(client, waitStrategyTarget.getContainerId(), waitingConsumer); + + try { + waitingConsumer.waitUntil( + f -> f.getUtf8String().contains("ready to serve client requests"), + startupTimeout.getSeconds(), + TimeUnit.SECONDS, + 1 + ); + } catch (TimeoutException e) { + throw new ContainerLaunchException("Timed out"); + } + } + }; + } + + +} diff --git a/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/testing/EtcdTestBase.java b/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/testing/EtcdTestBase.java new file mode 100644 index 00000000000..00758420057 --- /dev/null +++ b/metadata-drivers/etcd/src/test/java/org/apache/bookkeeper/metadata/etcd/testing/EtcdTestBase.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.metadata.etcd.testing; + +import io.etcd.jetcd.Client; +import java.util.Set; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.function.Consumer; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.versioning.Versioned; +import org.apache.commons.lang.RandomStringUtils; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Rule; +import org.junit.rules.Timeout; + +/** + * A test base that setup etcd cluster for testing. + */ +@Slf4j +public abstract class EtcdTestBase { + + @Rule + public Timeout globalTimeout = Timeout.seconds(120); + + protected static EtcdContainer etcdContainer; + + @BeforeClass + public static void setupCluster() throws Exception { + etcdContainer = new EtcdContainer(RandomStringUtils.randomAlphabetic(8)); + etcdContainer.start(); + log.info("Successfully started etcd at {}", etcdContainer.getClientEndpoint()); + } + + @AfterClass + public static void teardownCluster() throws Exception { + if (null != etcdContainer) { + etcdContainer.stop(); + log.info("Successfully stopped etcd."); + } + } + + protected Client etcdClient; + + protected static Client newEtcdClient() { + Client client = Client.builder() + .endpoints(etcdContainer.getClientEndpoint()) + .build(); + return client; + } + + protected static Consumer>> consumeVersionedKeySet( + LinkedBlockingQueue>> notifications) { + return versionedKeys -> { + log.info("Received new keyset : {}", versionedKeys); + try { + notifications.put(versionedKeys); + } catch (InterruptedException e) { + log.error("Interrupted at enqueuing updated key set", e); + } + }; + } + + @Before + public void setUp() throws Exception { + etcdClient = newEtcdClient(); + log.info("Successfully build etcd client to endpoint {}", etcdContainer.getClientEndpoint()); + } + + @After + public void tearDown() throws Exception { + if (null != etcdClient) { + etcdClient.close(); + log.info("Successfully close etcd client to endpoint {}", etcdContainer.getClientEndpoint()); + } + } + +} diff --git a/metadata-drivers/jetcd-core-shaded/pom.xml b/metadata-drivers/jetcd-core-shaded/pom.xml new file mode 100644 index 00000000000..2b8e855aa22 --- /dev/null +++ b/metadata-drivers/jetcd-core-shaded/pom.xml @@ -0,0 +1,196 @@ + + + + 4.0.0 + + org.apache.bookkeeper.metadata.drivers + metadata-drivers-parent + 4.18.0-SNAPSHOT + .. + + org.apache.bookkeeper.metadata.drivers + jetcd-core-shaded + Apache BookKeeper :: Metadata Drivers:: jetcd-core shaded + + + + io.etcd + jetcd-core + ${jetcd.version} + + + io.grpc + grpc-netty + + + io.netty + * + + + + + io.grpc + grpc-netty-shaded + + + + dev.failsafe + failsafe + + + io.grpc + grpc-protobuf + + + io.grpc + grpc-stub + + + io.grpc + grpc-grpclb + + + io.grpc + grpc-util + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + package + + shade + + + true + true + false + + + io.etcd:* + io.vertx:* + + + + + + io.vertx + org.apache.pulsar.jetcd.shaded.io.vertx + + + + io.grpc.netty + io.grpc.netty.shaded.io.grpc.netty + + + + io.netty + io.grpc.netty.shaded.io.netty + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + META-INF/maven/${project.groupId}/${project.artifactId}/pom.xml + + + + + + + + META-INF/maven/${project.groupId}/${project.artifactId}/pom.xml + ${project.basedir}/dependency-reduced-pom.xml + + + + true + shaded + + + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + attach-shaded-jar + package + + attach-artifact + + + + + ${project.build.directory}/${project.artifactId}-${project.version}-shaded.jar + jar + shaded + + + + + + + + + org.apache.maven.plugins + maven-antrun-plugin + ${maven-antrun-plugin.version} + + + unpack-shaded-jar + package + + run + + + + + + + + + + + com.github.spotbugs + spotbugs-maven-plugin + + true + + + + + diff --git a/metadata-drivers/pom.xml b/metadata-drivers/pom.xml new file mode 100644 index 00000000000..b54b2cfc064 --- /dev/null +++ b/metadata-drivers/pom.xml @@ -0,0 +1,33 @@ + + + + + bookkeeper + org.apache.bookkeeper + 4.18.0-SNAPSHOT + + 4.0.0 + org.apache.bookkeeper.metadata.drivers + metadata-drivers-parent + pom + Apache BookKeeper :: Metadata Drivers :: Parent + + jetcd-core-shaded + etcd + + diff --git a/microbenchmarks/pom.xml b/microbenchmarks/pom.xml index a0841bb5419..2815aa95a4c 100644 --- a/microbenchmarks/pom.xml +++ b/microbenchmarks/pom.xml @@ -16,11 +16,11 @@ limitations under the License. --> - 4.0.0 + 4.0.0 org.apache.bookkeeper bookkeeper - 4.9.0-SNAPSHOT + 4.18.0-SNAPSHOT microbenchmarks Apache BookKeeper :: microbenchmarks @@ -38,18 +38,6 @@ jmh-generator-annprocess provided - - io.netty - netty-all - - - org.slf4j - slf4j-api - - - org.slf4j - log4j-over-slf4j - org.apache.bookkeeper bookkeeper-server @@ -67,33 +55,32 @@ codahale-metrics-provider ${project.version} - - org.apache.bookkeeper.stats - twitter-science-provider - ${project.version} - - - org.apache.bookkeeper.stats - twitter-ostrich-provider - ${project.version} - + + org.apache.maven.plugins + maven-deploy-plugin + + true + + org.apache.maven.plugins maven-compiler-plugin - ${maven-compiler-plugin.version} - ${javac.target} - ${javac.target} - ${javac.target} + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmh.version} + + org.apache.maven.plugins maven-shade-plugin - ${maven-shade-plugin.version} package @@ -104,7 +91,10 @@ ${uberjar.name} - org.openjdk.jmh.Main + + org.openjdk.jmh.Main + true + @@ -126,6 +116,10 @@ org.apache.maven.plugins maven-checkstyle-plugin + + + com.github.spotbugs + spotbugs-maven-plugin true diff --git a/microbenchmarks/run.sh b/microbenchmarks/run.sh index 6d1348c72d6..a95d8d2085f 100755 --- a/microbenchmarks/run.sh +++ b/microbenchmarks/run.sh @@ -15,5 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -java -Xms1G -Xmx1G -Djdk.nio.maxCachedBufferSize=0 -Djava.net.preferIPv4Stack=true -Duser.timezone=UTC -XX:-MaxFDLimit -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:+ResizeTLAB -XX:-ResizePLAB -XX:MetaspaceSize=128m -XX:MinMetaspaceFreeRatio=50 -XX:MaxMetaspaceFreeRatio=80 -XX:+ParallelRefProcEnabled -XX:StackShadowPages=20 -XX:+UseCompressedOops -XX:+DisableExplicitGC -XX:StringTableSize=1000003 -XX:InitiatingHeapOccupancyPercent=40 -jar target/benchmarks.jar $@ -prof gc -prof stack:lines=5;time=1;top=3 +java -Xms1G -Xmx1G -Djdk.nio.maxCachedBufferSize=0 -Djava.net.preferIPv4Stack=true -Duser.timezone=UTC \ + --add-opens java.base/java.util.zip=ALL-UNNAMED \ + -XX:-MaxFDLimit -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:+ResizeTLAB -XX:-ResizePLAB -XX:MetaspaceSize=128m -XX:MinMetaspaceFreeRatio=50 -XX:MaxMetaspaceFreeRatio=80 -XX:+ParallelRefProcEnabled -XX:StackShadowPages=20 -XX:+UseCompressedOops -XX:+DisableExplicitGC -XX:StringTableSize=1000003 -XX:InitiatingHeapOccupancyPercent=40 -jar target/benchmarks.jar $@ -prof gc -prof stack:lines=5;time=1;top=3 diff --git a/microbenchmarks/src/main/java/org/apache/bookkeeper/bookie/GroupSortBenchmark.java b/microbenchmarks/src/main/java/org/apache/bookkeeper/bookie/GroupSortBenchmark.java new file mode 100644 index 00000000000..b51fc758b6d --- /dev/null +++ b/microbenchmarks/src/main/java/org/apache/bookkeeper/bookie/GroupSortBenchmark.java @@ -0,0 +1,119 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.bookkeeper.bookie; + +import java.util.Arrays; +import java.util.Random; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; +import org.apache.bookkeeper.bookie.storage.ldb.ArrayGroupSort; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; + +@OutputTimeUnit(TimeUnit.SECONDS) +@Fork(1) +@Warmup(iterations = 1, time = 10) +@Measurement(iterations = 3, time = 10) +public class GroupSortBenchmark { + + private static final int N = 10_000; + + @State(Scope.Benchmark) + public static class TestState { + + private final long[] randomItems = new long[N * 4]; + private final long[] sortedItems; + private final long[] reverseSortedItems = new long[N * 4]; + private final long[] groupSortedItems; + private final long[] reverseGroupSortedItems = new long[N * 4]; + + private long[] items; + + public TestState() { + Random r = ThreadLocalRandom.current(); + for (int i = 0; i < (N * 4); i++) { + randomItems[i] = r.nextLong(); + } + + groupSortedItems = Arrays.copyOf(randomItems, randomItems.length); + ArrayGroupSort.sort(groupSortedItems); + for (int i = 0; i < (N * 4); i += 4) { + reverseGroupSortedItems[i] = groupSortedItems[(N - 1) * 4 - i]; + reverseGroupSortedItems[i + 1] = groupSortedItems[(N - 1) * 4 - i + 1]; + reverseGroupSortedItems[i + 2] = groupSortedItems[(N - 1) * 4 - i + 2]; + reverseGroupSortedItems[i + 3] = groupSortedItems[(N - 1) * 4 - i + 3]; + } + + sortedItems = Arrays.copyOf(randomItems, randomItems.length); + Arrays.sort(sortedItems); + for (int i = 0; i < (N * 4); i++) { + reverseSortedItems[i] = sortedItems[N * 4 - 1 - i]; + } + } + + @Setup(Level.Invocation) + public void setupInvocation() { + items = Arrays.copyOf(randomItems, randomItems.length); + } + } + + @Benchmark + public void randomGroupSort(GroupSortBenchmark.TestState s) { + ArrayGroupSort.sort(s.items); + } + + + @Benchmark + public void randomArraySort(GroupSortBenchmark.TestState s) { + Arrays.sort(s.items); + } + + + @Benchmark + public void preSortedGroupSort(GroupSortBenchmark.TestState s) { + ArrayGroupSort.sort(s.groupSortedItems); + } + + + @Benchmark + public void preSortedArraySort(GroupSortBenchmark.TestState s) { + Arrays.sort(s.sortedItems); + } + + @Benchmark + public void reverseSortedGroupSort(GroupSortBenchmark.TestState s) { + ArrayGroupSort.sort(s.reverseGroupSortedItems); + } + + + @Benchmark + public void reverseSortedArraySort(GroupSortBenchmark.TestState s) { + Arrays.sort(s.reverseSortedItems); + } +} diff --git a/microbenchmarks/src/main/java/org/apache/bookkeeper/bookie/package-info.java b/microbenchmarks/src/main/java/org/apache/bookkeeper/bookie/package-info.java new file mode 100644 index 00000000000..f7c6e14e9af --- /dev/null +++ b/microbenchmarks/src/main/java/org/apache/bookkeeper/bookie/package-info.java @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.bookie; diff --git a/microbenchmarks/src/main/java/org/apache/bookkeeper/common/MpScQueueBenchmark.java b/microbenchmarks/src/main/java/org/apache/bookkeeper/common/MpScQueueBenchmark.java new file mode 100644 index 00000000000..306140c2a79 --- /dev/null +++ b/microbenchmarks/src/main/java/org/apache/bookkeeper/common/MpScQueueBenchmark.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.common; + +import java.util.ArrayList; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import lombok.SneakyThrows; +import org.apache.bookkeeper.common.collections.BatchedArrayBlockingQueue; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OperationsPerInvocation; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; + +/** + * Microbenchmarks for different executors providers. + */ +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@BenchmarkMode(Mode.Throughput) +@Threads(16) +@Fork(1) +@Warmup(iterations = 1, time = 10, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 3, time = 10, timeUnit = TimeUnit.SECONDS) +public class MpScQueueBenchmark { + + private static final int QUEUE_SIZE = 100_000; + + /** + * State holder of the test. + */ + @State(Scope.Benchmark) + public static class TestState { + + private ArrayBlockingQueue arrayBlockingQueue = new ArrayBlockingQueue<>(QUEUE_SIZE); + + private BatchedArrayBlockingQueue batchedArrayBlockingQueue = new BatchedArrayBlockingQueue<>(QUEUE_SIZE); + + private final Integer[] batchArray = new Integer[1000]; + + private final ExecutorService executor = Executors.newCachedThreadPool(); + + @Setup(Level.Trial) + public void setup() { + for (int i = 0; i < 1000; i++) { + batchArray[i] = i; + } + + executor.execute(this::consumeABQ); + executor.execute(this::consumeBAABQ); + } + + @SneakyThrows + private void consumeABQ() { + ArrayList localList = new ArrayList<>(); + + try { + while (true) { + arrayBlockingQueue.drainTo(localList); + if (localList.isEmpty()) { + arrayBlockingQueue.take(); + } + localList.clear(); + } + } catch (InterruptedException ie) { + } + } + + @SneakyThrows + private void consumeBAABQ() { + Integer[] localArray = new Integer[20_000]; + + try { + while (true) { + batchedArrayBlockingQueue.takeAll(localArray); + } + } catch (InterruptedException ie) { + } + } + + @TearDown(Level.Trial) + public void teardown() { + executor.shutdownNow(); + } + + @TearDown(Level.Iteration) + public void cleanupQueue() throws InterruptedException{ + Thread.sleep(1_000); + } + } + + @Benchmark + public void arrayBlockingQueue(TestState s) throws Exception { + s.arrayBlockingQueue.put(1); + } + + @Benchmark + public void batchAwareArrayBlockingQueueSingleEnqueue(TestState s) throws Exception { + s.batchedArrayBlockingQueue.put(1); + } + + @Benchmark + @OperationsPerInvocation(1000) + public void batchAwareArrayBlockingQueueBatch(TestState s) throws Exception { + s.batchedArrayBlockingQueue.putAll(s.batchArray, 0, 1000); + } +} diff --git a/microbenchmarks/src/main/java/org/apache/bookkeeper/common/OrderedExecutorBenchmark.java b/microbenchmarks/src/main/java/org/apache/bookkeeper/common/OrderedExecutorBenchmark.java index 66ce59bc39f..02232439fdd 100644 --- a/microbenchmarks/src/main/java/org/apache/bookkeeper/common/OrderedExecutorBenchmark.java +++ b/microbenchmarks/src/main/java/org/apache/bookkeeper/common/OrderedExecutorBenchmark.java @@ -19,13 +19,11 @@ package org.apache.bookkeeper.common; import com.google.common.collect.ImmutableMap; - import java.util.Map; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.function.Supplier; - import org.apache.bookkeeper.common.util.OrderedExecutor; import org.apache.bookkeeper.common.util.OrderedScheduler; import org.openjdk.jmh.annotations.Benchmark; @@ -51,14 +49,17 @@ @Measurement(iterations = 3, time = 10, timeUnit = TimeUnit.SECONDS) public class OrderedExecutorBenchmark { - private static Map> providers = ImmutableMap.of( // + private static Map> providers = ImmutableMap.of( "JDK-ThreadPool", () -> Executors.newFixedThreadPool(1), - "OrderedExecutor", () -> OrderedExecutor.newBuilder().numThreads(1).build(), // + "OrderedExecutor", () -> OrderedExecutor.newBuilder().numThreads(1).build(), "OrderedScheduler", () -> OrderedScheduler.newSchedulerBuilder().numThreads(1).build()); + /** + * State holder of the test. + */ @State(Scope.Benchmark) public static class TestState { - @Param({ "JDK-ThreadPool", "OrderedExecutor", "OrderedScheduler" }) + @Param({"JDK-ThreadPool", "OrderedExecutor", "OrderedScheduler"}) private String executorName; private ExecutorService executor; diff --git a/microbenchmarks/src/main/java/org/apache/bookkeeper/common/package-info.java b/microbenchmarks/src/main/java/org/apache/bookkeeper/common/package-info.java new file mode 100644 index 00000000000..39047c640d5 --- /dev/null +++ b/microbenchmarks/src/main/java/org/apache/bookkeeper/common/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Generic benchmarks. + */ +package org.apache.bookkeeper.common; \ No newline at end of file diff --git a/microbenchmarks/src/main/java/org/apache/bookkeeper/proto/ProtocolBenchmark.java b/microbenchmarks/src/main/java/org/apache/bookkeeper/proto/ProtocolBenchmark.java index ce1f02bd57d..a44890845a2 100644 --- a/microbenchmarks/src/main/java/org/apache/bookkeeper/proto/ProtocolBenchmark.java +++ b/microbenchmarks/src/main/java/org/apache/bookkeeper/proto/ProtocolBenchmark.java @@ -17,14 +17,14 @@ */ package org.apache.bookkeeper.proto; -import static com.google.common.base.Charsets.UTF_8; +import static java.nio.charset.StandardCharsets.UTF_8; import com.google.protobuf.ByteString; import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBufAllocator; import io.netty.buffer.Unpooled; import io.netty.util.ReferenceCountUtil; -import java.util.Random; +import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import org.apache.bookkeeper.proto.BookieProtoEncoding.EnDecoder; import org.apache.bookkeeper.proto.BookieProtoEncoding.RequestEnDeCoderPreV3; @@ -34,7 +34,6 @@ import org.apache.bookkeeper.proto.BookkeeperProtocol.OperationType; import org.apache.bookkeeper.proto.BookkeeperProtocol.ProtocolVersion; import org.apache.bookkeeper.proto.BookkeeperProtocol.Request; -import org.apache.bookkeeper.util.ByteBufList; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Mode; @@ -43,16 +42,17 @@ import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; +import org.slf4j.MDC; /** - * Benchmarking serialization and deserilization. + * Benchmarking serialization and deserialization. */ -@BenchmarkMode({ Mode.Throughput }) +@BenchmarkMode({Mode.Throughput}) @OutputTimeUnit(TimeUnit.MILLISECONDS) @State(Scope.Thread) public class ProtocolBenchmark { - @Param({ "10", "100", "1000", "10000" }) + @Param({"10", "100", "1000", "10000"}) int size; byte[] masterKey; @@ -66,12 +66,11 @@ public class ProtocolBenchmark { @Setup public void prepare() { this.masterKey = "test-benchmark-key".getBytes(UTF_8); - Random r = new Random(System.currentTimeMillis()); byte[] data = new byte[this.size]; - r.nextBytes(data); + ThreadLocalRandom.current().nextBytes(data); this.entry = Unpooled.wrappedBuffer(data); - this.ledgerId = r.nextLong(); - this.entryId = r.nextLong(); + this.ledgerId = ThreadLocalRandom.current().nextLong(); + this.entryId = ThreadLocalRandom.current().nextLong(); this.flags = 1; // prepare the encoder @@ -79,24 +78,37 @@ public void prepare() { this.reqEnDeV3 = new RequestEnDecoderV3(null); } - @Benchmark - public void testAddEntryV2() throws Exception { - ByteBufList list = ByteBufList.get(entry.slice()); - BookieProtocol.AddRequest req = BookieProtocol.AddRequest.create( - BookieProtocol.CURRENT_PROTOCOL_VERSION, - ledgerId, - entryId, - flags, - masterKey, - list); - Object res = this.reqEnDeV2.encode(req, ByteBufAllocator.DEFAULT); + public void testAddEntryV3() throws Exception { + // Build the request and calculate the total size to be included in the packet. + BKPacketHeader.Builder headerBuilder = BKPacketHeader.newBuilder() + .setVersion(ProtocolVersion.VERSION_THREE) + .setOperation(OperationType.ADD_ENTRY) + .setTxnId(0L); + + ByteBuf toSend = entry.slice(); + byte[] toSendArray = new byte[toSend.readableBytes()]; + toSend.getBytes(toSend.readerIndex(), toSendArray); + AddRequest.Builder addBuilder = AddRequest.newBuilder() + .setLedgerId(ledgerId) + .setEntryId(entryId) + .setMasterKey(ByteString.copyFrom(masterKey)) + .setBody(ByteString.copyFrom(toSendArray)) + .setFlag(AddRequest.Flag.RECOVERY_ADD); + + Request request = Request.newBuilder() + .setHeader(headerBuilder) + .setAddRequest(addBuilder) + .build(); + + Object res = this.reqEnDeV3.encode(request, ByteBufAllocator.DEFAULT); ReferenceCountUtil.release(res); - ReferenceCountUtil.release(list); } @Benchmark - public void testAddEntryV3() throws Exception { + public void testAddEntryV3WithMdc() throws Exception { + MDC.put("parent_id", "LetsPutSomeLongParentRequestIdHere"); + MDC.put("request_id", "LetsPutSomeLongRequestIdHere"); // Build the request and calculate the total size to be included in the packet. BKPacketHeader.Builder headerBuilder = BKPacketHeader.newBuilder() .setVersion(ProtocolVersion.VERSION_THREE) @@ -113,13 +125,56 @@ public void testAddEntryV3() throws Exception { .setBody(ByteString.copyFrom(toSendArray)) .setFlag(AddRequest.Flag.RECOVERY_ADD); - Request request = Request.newBuilder() + Request request = PerChannelBookieClient.appendRequestContext(Request.newBuilder()) .setHeader(headerBuilder) .setAddRequest(addBuilder) .build(); Object res = this.reqEnDeV3.encode(request, ByteBufAllocator.DEFAULT); ReferenceCountUtil.release(res); + MDC.clear(); + } + + static Request.Builder appendRequestContextNoMdc(Request.Builder builder) { + final BookkeeperProtocol.ContextPair context1 = BookkeeperProtocol.ContextPair.newBuilder() + .setKey("parent_id") + .setValue("LetsPutSomeLongParentRequestIdHere") + .build(); + builder.addRequestContext(context1); + + final BookkeeperProtocol.ContextPair context2 = BookkeeperProtocol.ContextPair.newBuilder() + .setKey("request_id") + .setValue("LetsPutSomeLongRequestIdHere") + .build(); + builder.addRequestContext(context2); + + return builder; } + @Benchmark + public void testAddEntryV3WithExtraContextDataNoMdc() throws Exception { + // Build the request and calculate the total size to be included in the packet. + BKPacketHeader.Builder headerBuilder = BKPacketHeader.newBuilder() + .setVersion(ProtocolVersion.VERSION_THREE) + .setOperation(OperationType.ADD_ENTRY) + .setTxnId(0L); + + ByteBuf toSend = entry.slice(); + byte[] toSendArray = new byte[toSend.readableBytes()]; + toSend.getBytes(toSend.readerIndex(), toSendArray); + AddRequest.Builder addBuilder = AddRequest.newBuilder() + .setLedgerId(ledgerId) + .setEntryId(entryId) + .setMasterKey(ByteString.copyFrom(masterKey)) + .setBody(ByteString.copyFrom(toSendArray)) + .setFlag(AddRequest.Flag.RECOVERY_ADD); + + Request request = appendRequestContextNoMdc(Request.newBuilder()) + .setHeader(headerBuilder) + .setAddRequest(addBuilder) + .build(); + + Object res = this.reqEnDeV3.encode(request, ByteBufAllocator.DEFAULT); + ReferenceCountUtil.release(res); + } } diff --git a/microbenchmarks/src/main/java/org/apache/bookkeeper/proto/checksum/DigestManagerBenchmark.java b/microbenchmarks/src/main/java/org/apache/bookkeeper/proto/checksum/DigestManagerBenchmark.java new file mode 100644 index 00000000000..cdf8f5a1ed7 --- /dev/null +++ b/microbenchmarks/src/main/java/org/apache/bookkeeper/proto/checksum/DigestManagerBenchmark.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.proto.checksum; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.PooledByteBufAllocator; +import java.nio.charset.StandardCharsets; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; +import org.apache.bookkeeper.proto.DataFormats.LedgerMetadataFormat.DigestType; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; + +/** + * Microbenchmarks for different digest type + * getting started: + * 1. http://tutorials.jenkov.com/java-performance/jmh.html + * 2. http://hg.openjdk.java.net/code-tools/jmh/file/tip/jmh-samples/src/main/java/org/openjdk/jmh/samples/ + * 3. google + * To run: + * build project from command line. + * execute ./run.sh + */ +public class DigestManagerBenchmark { + + static byte[] randomBytes(int sz) { + byte[] b = new byte[sz]; + ThreadLocalRandom.current().nextBytes(b); + return b; + } + + /** + * MyState. + */ + @State(Scope.Thread) + public static class MyState { + + @Param({"64", "1024", "4086", "8192"}) + public int entrySize; + + private DigestManager dm; + + public ByteBuf digestBuf; + + @Setup(Level.Trial) + public void doSetup() throws Exception { + final byte[] password = "password".getBytes(StandardCharsets.UTF_8); + + dm = DigestManager.instantiate(ThreadLocalRandom.current().nextLong(0, Long.MAX_VALUE), + password, DigestType.CRC32C, PooledByteBufAllocator.DEFAULT, true); + + ByteBuf data = ByteBufAllocator.DEFAULT.directBuffer(entrySize, entrySize); + data.writeBytes(randomBytes(entrySize)); + + digestBuf = ByteBufAllocator.DEFAULT.directBuffer(); + digestBuf.writeBytes((ByteBuf) + dm.computeDigestAndPackageForSending(1234, 1234, entrySize, data, + new byte[0], 0)); + } + } + + @Benchmark + @BenchmarkMode(Mode.Throughput) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + @Warmup(iterations = 2, time = 3, timeUnit = TimeUnit.SECONDS) + @Measurement(iterations = 3, time = 10, timeUnit = TimeUnit.SECONDS) + @Threads(2) + @Fork(1) + public void verifyDigest(MyState state) throws Exception { + state.digestBuf.readerIndex(0); + state.dm.verifyDigestAndReturnData(1234, state.digestBuf); + } +} diff --git a/microbenchmarks/src/main/java/org/apache/bookkeeper/proto/checksum/DigestTypeBenchmark.java b/microbenchmarks/src/main/java/org/apache/bookkeeper/proto/checksum/DigestTypeBenchmark.java index 3995ea8875c..7201aba8cf0 100644 --- a/microbenchmarks/src/main/java/org/apache/bookkeeper/proto/checksum/DigestTypeBenchmark.java +++ b/microbenchmarks/src/main/java/org/apache/bookkeeper/proto/checksum/DigestTypeBenchmark.java @@ -1,5 +1,4 @@ -/** - * +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -8,7 +7,7 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an @@ -16,7 +15,6 @@ * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. - * */ package org.apache.bookkeeper.proto.checksum; @@ -24,7 +22,9 @@ import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBufAllocator; import io.netty.buffer.CompositeByteBuf; +import io.netty.buffer.PooledByteBufAllocator; import io.netty.buffer.Unpooled; +import java.nio.charset.StandardCharsets; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import org.apache.bookkeeper.proto.DataFormats.LedgerMetadataFormat.DigestType; @@ -89,7 +89,7 @@ public static class MyState { public BufferType bufferType; @Param public Digest digest; - @Param({"1024", "4086", "8192", "16384", "65536"}) + @Param({"64", "1024", "4086", "8192", "16384", "65536"}) public int entrySize; private DigestManager crc32; @@ -104,15 +104,15 @@ public static class MyState { @Setup(Level.Trial) public void doSetup() throws Exception { - final byte[] password = "password".getBytes("UTF-8"); + final byte[] password = "password".getBytes(StandardCharsets.UTF_8); crc32 = DigestManager.instantiate(ThreadLocalRandom.current().nextLong(0, Long.MAX_VALUE), - password, DigestType.CRC32); + password, DigestType.CRC32, PooledByteBufAllocator.DEFAULT, true); crc32c = DigestManager.instantiate(ThreadLocalRandom.current().nextLong(0, Long.MAX_VALUE), - password, DigestType.CRC32C); + password, DigestType.CRC32C, PooledByteBufAllocator.DEFAULT, true); mac = DigestManager.instantiate(ThreadLocalRandom.current().nextLong(0, Long.MAX_VALUE), - password, DigestType.HMAC); + password, DigestType.HMAC, PooledByteBufAllocator.DEFAULT, true); digestBuf = Unpooled.buffer(getDigestManager(digest).getMacCodeLength()); @@ -137,27 +137,27 @@ public void doTearDown() { public ByteBuf getByteBuff(BufferType bType) { switch (bType) { - case ARRAY_BACKED: - return arrayBackedBuffer; - case NOT_ARRAY_BACKED: - return notArrayBackedBuffer; - case BYTE_BUF_DEFAULT_ALLOC: - return byteBufDefaultAlloc; - default: - throw new IllegalArgumentException("unknown buffer type " + bType); + case ARRAY_BACKED: + return arrayBackedBuffer; + case NOT_ARRAY_BACKED: + return notArrayBackedBuffer; + case BYTE_BUF_DEFAULT_ALLOC: + return byteBufDefaultAlloc; + default: + throw new IllegalArgumentException("unknown buffer type " + bType); } } public DigestManager getDigestManager(Digest digest) { switch (digest) { - case CRC32: - return crc32; - case CRC32_C: - return crc32c; - case MAC: - return mac; - default: - throw new IllegalArgumentException("unknown digest " + digest); + case CRC32: + return crc32; + case CRC32_C: + return crc32c; + case MAC: + return mac; + default: + throw new IllegalArgumentException("unknown digest " + digest); } } } @@ -166,15 +166,15 @@ public DigestManager getDigestManager(Digest digest) { @BenchmarkMode(Mode.Throughput) @OutputTimeUnit(TimeUnit.MILLISECONDS) @Warmup(iterations = 2, time = 3, timeUnit = TimeUnit.SECONDS) - @Measurement(iterations = 5, time = 12, timeUnit = TimeUnit.SECONDS) + @Measurement(iterations = 2, time = 10, timeUnit = TimeUnit.SECONDS) @Threads(2) @Fork(value = 1, warmups = 1) public void digestManager(MyState state) { final ByteBuf buff = state.getByteBuff(state.bufferType); final DigestManager dm = state.getDigestManager(state.digest); - dm.update(buff); + int digest = dm.update(0, buff, 0, buff.readableBytes()); state.digestBuf.clear(); - dm.populateValueAndReset(state.digestBuf); + dm.populateValueAndReset(digest, state.digestBuf); } } diff --git a/microbenchmarks/src/main/java/org/apache/bookkeeper/proto/checksum/package-info.java b/microbenchmarks/src/main/java/org/apache/bookkeeper/proto/checksum/package-info.java new file mode 100644 index 00000000000..da4dc0cbcab --- /dev/null +++ b/microbenchmarks/src/main/java/org/apache/bookkeeper/proto/checksum/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Utilities for checksum functions. + */ +package org.apache.bookkeeper.proto.checksum; \ No newline at end of file diff --git a/microbenchmarks/src/main/java/org/apache/bookkeeper/proto/package-info.java b/microbenchmarks/src/main/java/org/apache/bookkeeper/proto/package-info.java new file mode 100644 index 00000000000..6cd88672f0b --- /dev/null +++ b/microbenchmarks/src/main/java/org/apache/bookkeeper/proto/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Protocol implementations benchmarks. + */ +package org.apache.bookkeeper.proto; \ No newline at end of file diff --git a/microbenchmarks/src/main/java/org/apache/bookkeeper/stats/StatsLoggerBenchmark.java b/microbenchmarks/src/main/java/org/apache/bookkeeper/stats/StatsLoggerBenchmark.java index 0a2cd063800..419d6fadcd6 100644 --- a/microbenchmarks/src/main/java/org/apache/bookkeeper/stats/StatsLoggerBenchmark.java +++ b/microbenchmarks/src/main/java/org/apache/bookkeeper/stats/StatsLoggerBenchmark.java @@ -22,16 +22,9 @@ import java.util.Map; import java.util.concurrent.TimeUnit; import java.util.function.Supplier; - -import org.apache.bookkeeper.stats.Counter; -import org.apache.bookkeeper.stats.OpStatsLogger; -import org.apache.bookkeeper.stats.StatsLogger; -import org.apache.bookkeeper.stats.StatsProvider; import org.apache.bookkeeper.stats.codahale.CodahaleMetricsProvider; import org.apache.bookkeeper.stats.codahale.FastCodahaleMetricsProvider; import org.apache.bookkeeper.stats.prometheus.PrometheusMetricsProvider; -import org.apache.bookkeeper.stats.twitter.ostrich.OstrichProvider; -import org.apache.bookkeeper.stats.twitter.science.TwitterStatsProvider; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.Fork; import org.openjdk.jmh.annotations.Level; @@ -60,13 +53,14 @@ public class StatsLoggerBenchmark { providers.put("Prometheus", PrometheusMetricsProvider::new); providers.put("Codahale", CodahaleMetricsProvider::new); providers.put("FastCodahale", FastCodahaleMetricsProvider::new); - providers.put("Twitter", TwitterStatsProvider::new); - providers.put("Ostrich", OstrichProvider::new); } + /** + * State holder of the logger. + */ @State(Scope.Benchmark) public static class LoggerState { - @Param({ "Prometheus", "Codahale", "FastCodahale", "Twitter", "Ostrich" }) + @Param({"Prometheus", "Codahale", "FastCodahale", "Twitter", "Ostrich"}) private String statsProvider; private Counter counter; @@ -83,7 +77,7 @@ public void setup() { } } - @Benchmark + @Benchmark public void counterIncrement(LoggerState s) { s.counter.inc(); } diff --git a/microbenchmarks/src/main/java/org/apache/bookkeeper/stats/codahale/TimerBenchmark.java b/microbenchmarks/src/main/java/org/apache/bookkeeper/stats/codahale/TimerBenchmark.java index 5d1cf068f23..8c34e91896d 100644 --- a/microbenchmarks/src/main/java/org/apache/bookkeeper/stats/codahale/TimerBenchmark.java +++ b/microbenchmarks/src/main/java/org/apache/bookkeeper/stats/codahale/TimerBenchmark.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with this * work for additional information regarding copyright ownership. The ASF @@ -16,13 +16,11 @@ */ package org.apache.bookkeeper.stats.codahale; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; - import org.apache.bookkeeper.stats.OpStatsLogger; import org.apache.bookkeeper.stats.StatsLogger; -import org.apache.bookkeeper.stats.codahale.CodahaleMetricsProvider; -import org.apache.bookkeeper.stats.codahale.FastCodahaleMetricsProvider; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; @@ -73,15 +71,16 @@ public static class MyState { private int timeIdx = 0; @Setup(Level.Trial) + @SuppressFBWarnings("SSD_DO_NOT_USE_INSTANCE_LOCK_ON_SHARED_STATIC_DATA") public void doSetup() throws Exception { StatsLogger logger = null; switch (timerType) { - case CodahaleTimer: - logger = new CodahaleMetricsProvider().getStatsLogger("test"); - break; - case FastTimer: - logger = new FastCodahaleMetricsProvider().getStatsLogger("test"); - break; + case CodahaleTimer: + logger = new CodahaleMetricsProvider().getStatsLogger("test"); + break; + case FastTimer: + logger = new FastCodahaleMetricsProvider().getStatsLogger("test"); + break; } synchronized (MyState.class) { @@ -170,12 +169,16 @@ public void timerTestWithSnapshots(MyState state) { /** * Test routing for manual testing of memory footprint of default Codahale Timer vs. improved FastTimer. * JMH can't do that, so we have a small stand-alone test routine here. - * Run with: java -Xmx1g -cp target/benchmarks.jar org.apache.bookkeeper.stats.codahale.TimerBenchmark + * Run with: + * + * java -Xmx1g -cp target/benchmarks.jar \ + * org.apache.bookkeeper.stats.codahale.TimerBenchmark <codahale|fast> + * * @param args */ public static void main(String[] args) { - if (args.length != 1 || - (!args[0].equalsIgnoreCase("codahale") && !args[0].equalsIgnoreCase("fast"))) { + if (args.length != 1 + || (!args[0].equalsIgnoreCase("codahale") && !args[0].equalsIgnoreCase("fast"))) { System.out.println("usage: " + TimerBenchmark.class.getCanonicalName() + " "); System.exit(1); } @@ -188,24 +191,24 @@ public static void main(String[] args) { System.out.println("Using " + logger.getClass().getCanonicalName()); System.out.println("Creating 1000 OpStatsLoggers (2000 Timers) and updating each of them 1000 times ..."); OpStatsLogger[] timers = new OpStatsLogger[1000]; - for (int i=0; i' on this JVM to get a heap histogram, then kill this JVM."); - while(true) { + while (true) { try { TimeUnit.MILLISECONDS.sleep(1000); - } catch(Exception e) { + } catch (Exception e) { // ignore } } diff --git a/microbenchmarks/src/main/java/org/apache/bookkeeper/stats/codahale/package-info.java b/microbenchmarks/src/main/java/org/apache/bookkeeper/stats/codahale/package-info.java new file mode 100644 index 00000000000..6678627a9a0 --- /dev/null +++ b/microbenchmarks/src/main/java/org/apache/bookkeeper/stats/codahale/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Stats benchmark runner. + */ +package org.apache.bookkeeper.stats.codahale; \ No newline at end of file diff --git a/microbenchmarks/src/main/java/org/apache/bookkeeper/stats/package-info.java b/microbenchmarks/src/main/java/org/apache/bookkeeper/stats/package-info.java new file mode 100644 index 00000000000..8a53c3a658c --- /dev/null +++ b/microbenchmarks/src/main/java/org/apache/bookkeeper/stats/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Stats implementations benchmarks. + */ +package org.apache.bookkeeper.stats; \ No newline at end of file diff --git a/native-io/pom.xml b/native-io/pom.xml new file mode 100644 index 00000000000..6c7c7fe9f0f --- /dev/null +++ b/native-io/pom.xml @@ -0,0 +1,224 @@ + + + 4.0.0 + + org.apache.bookkeeper + bookkeeper + 4.18.0-SNAPSHOT + .. + + + native-io + nar + Apache BookKeeper :: Native IO Library + Native IO Library + + + dynamic + -msse4.2 -mpclmul + + + + + org.apache.commons + commons-lang3 + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + com.github.maven-nar + nar-maven-plugin + true + + + org.apache.maven.plugins + maven-assembly-plugin + + + src/main/assembly/assembly.xml + + false + posix + + + + make-assembly + package + + single + + + + + + org.apache.rat + apache-rat-plugin + + + + + + + + jdk-without-javah + + [10,) + + + + + com.github.maven-nar + nar-maven-plugin + true + + + + default-nar-javah + none + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + + -h + ${project.build.directory}/nar/javah-include + + + + + + + + mac + + + Mac OS X + + + + + + com.github.maven-nar + nar-maven-plugin + true + + ${nar.runtime} + native-io + + + jni + org.apache.bookkeeper.util.nativeio + + + + ${nar.cpp.optionSet} + false + false + full + + + + + + + + + Linux + + + Linux + + + + + + com.github.maven-nar + nar-maven-plugin + true + + ${nar.runtime} + native-io + + + jni + org.apache.bookkeeper.util.nativeio + + + + ${nar.cpp.optionSet} + false + false + full + + + + + + + + Windows + + + Windows + + + + + + com.github.maven-nar + nar-maven-plugin + true + + ${nar.runtime} + native-io + + + jni + org.apache.bookkeeper.util.nativeio + + + + ${nar.cpp.optionSet} + false + false + full + + + g++ + + + + + + + + + diff --git a/native-io/src/main/assembly/assembly.xml b/native-io/src/main/assembly/assembly.xml new file mode 100644 index 00000000000..377b97ff54e --- /dev/null +++ b/native-io/src/main/assembly/assembly.xml @@ -0,0 +1,62 @@ + + + + all + + jar + + + false + + + ${project.build.directory}/nar/${project.artifactId}-${project.version}-${os.arch}-MacOSX-gpp-jni/lib/${os.arch}-MacOSX-gpp/jni + + lib + + lib* + + + + ${project.build.directory}/nar/${project.artifactId}-${project.version}-${os.arch}-Linux-gpp-jni/lib/${os.arch}-Linux-gpp/jni + + lib + + lib* + + + + ${project.build.directory}/nar/${project.artifactId}-${project.version}-${os.arch}-${os.name}-gpp-jni/lib/${os.arch}-${os.name}-gpp/jni + + lib + + lib* + + + + ${project.build.directory}/classes + + + **/* + + + + diff --git a/native-io/src/main/java/org/apache/bookkeeper/common/util/nativeio/NativeIO.java b/native-io/src/main/java/org/apache/bookkeeper/common/util/nativeio/NativeIO.java new file mode 100644 index 00000000000..4a27544e80d --- /dev/null +++ b/native-io/src/main/java/org/apache/bookkeeper/common/util/nativeio/NativeIO.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

          + * http://www.apache.org/licenses/LICENSE-2.0 + *

          + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bookkeeper.common.util.nativeio; + +/** + * NativeIO API. + */ +public interface NativeIO { + + // These constants are different per each OS, so the correct values are defined in JNI code + int O_CREAT = 0x01; + int O_RDONLY = 0x02; + int O_WRONLY = 0x04; + int O_TRUNC = 0x08; + int O_DIRECT = 0x10; + int O_DSYNC = 0x20; + + int SEEK_SET = 0; + int SEEK_END = 2; + + int FALLOC_FL_ZERO_RANGE = 0x10; + + int open(String pathname, int flags, int mode) throws NativeIOException; + + int fsync(int fd) throws NativeIOException; + + /** + * fallocate is a linux-only syscall, so callers must handle the possibility that it does + * not exist. + */ + int fallocate(int fd, int mode, long offset, long len) throws NativeIOException; + + /** + * posix_fadvise is a linux-only syscall, so callers must handle the possibility that it does + * not exist. + */ + int posix_fadvise(int fd, long offset, long len, int flag) throws NativeIOException; + + int pwrite(int fd, long pointer, int count, long offset) throws NativeIOException; + + long posix_memalign(int alignment, int size) throws NativeIOException; + + void free(long pointer) throws NativeIOException; + + long lseek(int fd, long offset, int whence) throws NativeIOException; + + long pread(int fd, long pointer, long size, long offset) throws NativeIOException; + + int close(int fd) throws NativeIOException; +} diff --git a/native-io/src/main/java/org/apache/bookkeeper/common/util/nativeio/NativeIOException.java b/native-io/src/main/java/org/apache/bookkeeper/common/util/nativeio/NativeIOException.java new file mode 100644 index 00000000000..9be1d1622b0 --- /dev/null +++ b/native-io/src/main/java/org/apache/bookkeeper/common/util/nativeio/NativeIOException.java @@ -0,0 +1,46 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.bookkeeper.common.util.nativeio; + +import java.io.IOException; + +/** + * NativeIOException. + */ +public class NativeIOException extends IOException { + + private final int errno; + + public NativeIOException(String message) { + super(message); + this.errno = -1; + } + + public NativeIOException(String message, int errno) { + super(message + " - errno=" + errno); + this.errno = errno; + } + + public int getErrno() { + return errno; + } +} diff --git a/native-io/src/main/java/org/apache/bookkeeper/common/util/nativeio/NativeIOImpl.java b/native-io/src/main/java/org/apache/bookkeeper/common/util/nativeio/NativeIOImpl.java new file mode 100644 index 00000000000..0ae7fd3eae4 --- /dev/null +++ b/native-io/src/main/java/org/apache/bookkeeper/common/util/nativeio/NativeIOImpl.java @@ -0,0 +1,77 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.common.util.nativeio; + +/** + * NativeIOImpl. + */ +public class NativeIOImpl implements NativeIO { + @Override + public int open(String pathname, int flags, int mode) throws NativeIOException { + int res = NativeIOJni.open(pathname, flags, mode); + return res; + } + + @Override + public int fsync(int fd) throws NativeIOException { + return NativeIOJni.fsync(fd); + } + + @Override + public int fallocate(int fd, int mode, long offset, long len) throws NativeIOException { + return NativeIOJni.fallocate(fd, mode, offset, len); + } + + @Override + public int posix_fadvise(int fd, long offset, long len, int flag) throws NativeIOException { + return NativeIOJni.posix_fadvise(fd, offset, len, flag); + } + + @Override + public long lseek(int fd, long offset, int whence) throws NativeIOException { + return NativeIOJni.lseek(fd, offset, whence); + } + + @Override + public int close(int fd) throws NativeIOException { + return NativeIOJni.close(fd); + } + + @Override + public int pwrite(int fd, long pointer, int count, long offset) throws NativeIOException { + return NativeIOJni.pwrite(fd, pointer, count, offset); + } + + @Override + public long posix_memalign(int alignment, int size) throws NativeIOException { + return NativeIOJni.posix_memalign(alignment, size); + } + + @Override + public void free(long pointer) throws NativeIOException { + NativeIOJni.free(pointer); + } + + @Override + public long pread(int fd, long pointer, long size, long offset) throws NativeIOException { + return NativeIOJni.pread(fd, pointer, size, offset); + } +} diff --git a/native-io/src/main/java/org/apache/bookkeeper/common/util/nativeio/NativeIOJni.java b/native-io/src/main/java/org/apache/bookkeeper/common/util/nativeio/NativeIOJni.java new file mode 100644 index 00000000000..77d2e80b019 --- /dev/null +++ b/native-io/src/main/java/org/apache/bookkeeper/common/util/nativeio/NativeIOJni.java @@ -0,0 +1,64 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.bookkeeper.common.util.nativeio; + +import org.apache.commons.lang3.SystemUtils; + +class NativeIOJni { + + static native int open(String pathname, int flags, int mode) throws NativeIOException; + + static native int fsync(int fd) throws NativeIOException; + + /** + * fallocate is a linux-only syscall, so callers must handle the possibility that it does + * not exist. + */ + static native int fallocate(int fd, int mode, long offset, long len) throws NativeIOException; + + static native int posix_fadvise(int fd, long offset, long len, int flag) throws NativeIOException; + + static native int pwrite(int fd, long pointer, int count, long offset) throws NativeIOException; + + static native long posix_memalign(int alignment, int size) throws NativeIOException; + + static native void free(long pointer) throws NativeIOException; + + static native long lseek(int fd, long offset, int whence) throws NativeIOException; + + static native long pread(int fd, long pointer, long size, long offset) throws NativeIOException; + + static native int close(int fd) throws NativeIOException; + + static { + try { + if (SystemUtils.IS_OS_MAC_OSX) { + NativeUtils.loadLibraryFromJar("/lib/libnative-io.jnilib"); + } else if (SystemUtils.IS_OS_LINUX) { + NativeUtils.loadLibraryFromJar("/lib/libnative-io.so"); + } else { + throw new RuntimeException("OS not supported by Native-IO utils"); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } +} diff --git a/native-io/src/main/java/org/apache/bookkeeper/common/util/nativeio/NativeUtils.java b/native-io/src/main/java/org/apache/bookkeeper/common/util/nativeio/NativeUtils.java new file mode 100644 index 00000000000..b3bdf9ab2f6 --- /dev/null +++ b/native-io/src/main/java/org/apache/bookkeeper/common/util/nativeio/NativeUtils.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

          + * http://www.apache.org/licenses/LICENSE-2.0 + *

          + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bookkeeper.common.util.nativeio; + +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Files; +import lombok.NonNull; +import lombok.experimental.UtilityClass; + +/** + * Utility class to load jni library from inside a JAR. + */ +@UtilityClass +class NativeUtils { + /** + * loads given library from the this jar. ie: this jar contains: /lib/pulsar-checksum.jnilib + * + * @param path + * : absolute path of the library in the jar
          + * if this jar contains: /lib/pulsar-checksum.jnilib then provide the same absolute path as input + * @throws Exception + */ + @SuppressFBWarnings( + value = "OBL_UNSATISFIED_OBLIGATION", + justification = "work around for java 9: https://github.com/spotbugs/spotbugs/issues/493") + static void loadLibraryFromJar(String path) throws Exception { + checkArgument(path.startsWith("/"), "absolute path must start with /"); + + String[] parts = path.split("/"); + checkArgument(parts.length > 0, "absolute path must contain file name"); + + String filename = parts[parts.length - 1]; + + File dir = Files.createTempDirectory("native").toFile(); + dir.deleteOnExit(); + File temp = new File(dir, filename); + temp.deleteOnExit(); + + byte[] buffer = new byte[1024]; + int read; + + try (InputStream input = NativeUtils.class.getResourceAsStream(path); + OutputStream out = new FileOutputStream(temp)) { + if (input == null) { + throw new FileNotFoundException("Couldn't find file into jar " + path); + } + + while ((read = input.read(buffer)) != -1) { + out.write(buffer, 0, read); + } + } + + if (!temp.exists()) { + throw new FileNotFoundException("Failed to copy file from jar at " + temp.getAbsolutePath()); + } + + System.load(temp.getAbsolutePath()); + } + + public static void checkArgument(boolean expression, @NonNull Object errorMessage) { + if (!expression) { + throw new IllegalArgumentException(String.valueOf(errorMessage)); + } + }} diff --git a/native-io/src/main/java/org/apache/bookkeeper/common/util/nativeio/package-info.java b/native-io/src/main/java/org/apache/bookkeeper/common/util/nativeio/package-info.java new file mode 100644 index 00000000000..d215e470874 --- /dev/null +++ b/native-io/src/main/java/org/apache/bookkeeper/common/util/nativeio/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Utiltiies to access I/O using JNI. + */ +package org.apache.bookkeeper.common.util.nativeio; diff --git a/native-io/src/main/native-io-jni/cpp/native_io_jni.c b/native-io/src/main/native-io-jni/cpp/native_io_jni.c new file mode 100644 index 00000000000..d3bc164bec9 --- /dev/null +++ b/native-io/src/main/native-io-jni/cpp/native_io_jni.c @@ -0,0 +1,313 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include + +#include + +#ifdef _WIN32 + +#define fsync(fd) fflush(fd) +#define strerror_r(errno,buf,len) strerror_s(buf,len,errno) + +static ssize_t pread (int fd, void *buf, size_t count, off_t offset) +{ + ssize_t res; + off_t ooffset; + + ooffset = lseek (fd, 0, SEEK_CUR); + lseek (fd, offset, SEEK_SET); + res = read (fd, buf, count); + lseek (fd, ooffset, SEEK_SET); + + return res; +} + +static ssize_t pwrite (int fd, void *buf, size_t count, off_t offset) +{ + ssize_t res; + off_t ooffset; + + ooffset = lseek (fd, 0, SEEK_CUR); + lseek (fd, offset, SEEK_SET); + res = write (fd, buf, count); + lseek (fd, ooffset, SEEK_SET); + + return res; +} + +static int check_align(size_t align) +{ + for (size_t i = sizeof(void *); i != 0; i *= 2) + if (align == i) + return 0; + return EINVAL; +} + +int posix_memalign(void **ptr, size_t align, size_t size) +{ + if (check_align(align)) + return EINVAL; + + int saved_errno = errno; + void *p = _aligned_malloc(size, align); + if (p == NULL) + { + errno = saved_errno; + return ENOMEM; + } + + *ptr = p; + return 0; +} + +#endif + +static void throwExceptionWithErrno(JNIEnv* env, const char* message) { + char err_msg[1024]; + strerror_r(errno, err_msg, sizeof(err_msg)); + unsigned long size = strlen(message) + strlen(err_msg) + 10; + char* str = malloc(size); + snprintf(str, size, "%s: %s", message, err_msg); + + jstring javaMessage = (*env)->NewStringUTF(env, str); + free(str); + + jclass clazz = (*env)->FindClass(env, "org/apache/bookkeeper/common/util/nativeio/NativeIOException"); + jmethodID ctorMethod = (*env)->GetMethodID(env, clazz, "", "(Ljava/lang/String;I)V"); + jobject myException = (*env)->NewObject(env, clazz, ctorMethod, javaMessage, errno); + (*env)->Throw(env, myException); +} + +static void throwException(JNIEnv* env, const char* message) { + (*env)->ThrowNew(env, (*env)->FindClass(env, "org/apache/bookkeeper/common/util/nativeio/NativeIOException"), message); +} + +/* + * Class: org_apache_bookkeeper_common_util_nativeio_NativeIOJni + * Method: open + * Signature: (Ljava/lang/String;II)I + */ +JNIEXPORT jint JNICALL +Java_org_apache_bookkeeper_common_util_nativeio_NativeIOJni_open( + JNIEnv *env, jclass clazz, jstring path, jint javaFlags, jint mode) { + const char *cPath = (*env)->GetStringUTFChars(env, path, 0); + + int flags = 0; + if (javaFlags & 0x01) { + flags |= O_CREAT; + } + + if (javaFlags & 0x02) { + flags |= O_RDONLY; + } + + if (javaFlags & 0x04) { + flags |= O_WRONLY; + } + + if (javaFlags & 0x08) { + flags |= O_TRUNC; + } + +#ifdef __linux__ + if (javaFlags & 0x10) { + flags |= O_DIRECT; + } +#endif + +#ifndef _WIN32 + if (javaFlags & 0x20) { + flags |= O_DSYNC; + } +#endif + + int fd = open(cPath, flags, mode); + + (*env)->ReleaseStringUTFChars(env, path, cPath); + + if (fd == -1) { + throwExceptionWithErrno(env, "Failed to open file"); + } + + return fd; +} + +/* + * Class: org_apache_bookkeeper_common_util_nativeio_NativeIOJni + * Method: fsync + * Signature: (I)I + */ +JNIEXPORT jint JNICALL +Java_org_apache_bookkeeper_common_util_nativeio_NativeIOJni_fsync(JNIEnv * env, + jclass clazz, + jint fd) { + int res = fsync(fd); + + if (res == -1) { + throwExceptionWithErrno(env, "Failed to fsync"); + } + + return res; +} + +/* + * Class: org_apache_bookkeeper_common_util_nativeio_NativeIOJni + * Method: fallocate + * Signature: (IIJJ)I + */ +JNIEXPORT jint JNICALL +Java_org_apache_bookkeeper_common_util_nativeio_NativeIOJni_fallocate( + JNIEnv* env, jclass clazz, + jint fd, jint mode, jlong offset, jlong len) { +#ifdef __linux__ + int res = fallocate(fd, mode, offset, len); + if (res == -1) { + throwExceptionWithErrno(env, "Failed to fallocate"); + } + return res; +#else + throwException(env, "fallocate is not available"); + return -1; +#endif +} + +/* + * Class: org_apache_bookkeeper_common_util_nativeio_NativeIOJni + * Method: posix_fadvise + * Signature: (IJJI)I + */ +JNIEXPORT jint JNICALL +Java_org_apache_bookkeeper_common_util_nativeio_NativeIOJni_posix_1fadvise( + JNIEnv* env, jclass clazz, + jint fd, jlong offset, jlong len, jint flag) { +#ifdef __linux__ + int res = posix_fadvise(fd, offset, len, flag); + if (res == -1) { + throwExceptionWithErrno(env, "Failed to posix_fadvise"); + } + return res; +#else + throwException(env, "posix_fadvise is not available"); + return -1; +#endif +} + +/* + * Class: org_apache_bookkeeper_common_util_nativeio_NativeIOJni + * Method: lseek + * Signature: (IJI)J + */ +JNIEXPORT jlong JNICALL +Java_org_apache_bookkeeper_common_util_nativeio_NativeIOJni_lseek( + JNIEnv* env, jclass clazz, + jint fd, jlong offset, jint whence) { + int res = lseek(fd, offset, whence); + + if (res == -1) { + throwExceptionWithErrno(env, "Failed to lseek"); + } + + return res; +} + +/* + * Class: org_apache_bookkeeper_common_util_nativeio_NativeIOJni + * Method: close + * Signature: (I)I + */ +JNIEXPORT jint JNICALL +Java_org_apache_bookkeeper_common_util_nativeio_NativeIOJni_close(JNIEnv* env, jclass clazz, + jint fd) { + int res = close(fd); + + if (res == -1) { + throwExceptionWithErrno(env, "Failed to close file"); + } + + return res; +} + +/* + * Class: org_apache_bookkeeper_common_util_nativeio_NativeIOJni + * Method: pwrite + * Signature: (IJIJ)I + */ +JNIEXPORT jint JNICALL Java_org_apache_bookkeeper_common_util_nativeio_NativeIOJni_pwrite + (JNIEnv* env, jclass clazz, jint fd, jlong pointer, jint count, jlong offset) { + int res = pwrite(fd, (const void*) pointer, count, offset); + + if (res == -1) { + throwExceptionWithErrno(env, "Failed to write on file"); + } + + return res; +} + +/* + * Class: org_apache_bookkeeper_common_util_nativeio_NativeIOJni + * Method: posix_memalign + * Signature: (II)J + */ +JNIEXPORT jlong JNICALL Java_org_apache_bookkeeper_common_util_nativeio_NativeIOJni_posix_1memalign + (JNIEnv* env, jclass clazz, jint alignment, jint size) { + void* ptr; + int res = posix_memalign(&ptr, alignment, size); + + if (res != 0) { + throwExceptionWithErrno(env, "Failed to allocate aligned memory"); + } + + return (jlong) ptr; +} + +/* + * Class: org_apache_bookkeeper_common_util_nativeio_NativeIOJni + * Method: pread + * Signature: (IJJJ)J + */ +JNIEXPORT jlong JNICALL Java_org_apache_bookkeeper_common_util_nativeio_NativeIOJni_pread + (JNIEnv * env, jclass clazz, jint fd, jlong pointer, jlong size, jlong offset) { + + long res = pread(fd, (void*) pointer, size, offset); + + if (res == -1) { + throwExceptionWithErrno(env, "Failed to read from file"); + } + + return res; +} + +/* + * Class: org_apache_bookkeeper_common_util_nativeio_NativeIOJni + * Method: free + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_apache_bookkeeper_common_util_nativeio_NativeIOJni_free + (JNIEnv * env, jclass clazz, jlong pointer) { + free((const void*) pointer); +} + diff --git a/pom.xml b/pom.xml index b29f423731b..1651bfcac71 100644 --- a/pom.xml +++ b/pom.xml @@ -19,20 +19,20 @@ org.apache apache - 18 + 31 4.0.0 org.apache.bookkeeper - 4.9.0-SNAPSHOT + 4.18.0-SNAPSHOT bookkeeper pom Apache BookKeeper :: Parent - http://bookkeeeper.apache.org + https://bookkeeeper.apache.org 2011 Apache License, Version 2.0 - http://www.apache.org/licenses/LICENSE-2.0.txt + https://www.apache.org/licenses/LICENSE-2.0.txt repo @@ -40,7 +40,7 @@ scm:git:https://github.com/apache/bookkeeper.git scm:git:https://github.com/apache/bookkeeper.git https://github.com/apache/bookkeeper - HEAD + branch-4.18 JIRA @@ -54,18 +54,23 @@ buildtools circe-checksum bookkeeper-common - bookkeeper-stats + bookkeeper-common-allocator + stats bookkeeper-proto bookkeeper-server bookkeeper-benchmark - bookkeeper-stats-providers bookkeeper-http - shaded - tests + stream + tools + cpu-affinity + metadata-drivers bookkeeper-dist + shaded microbenchmarks - stream/distributedlog - tools + bookkeeper-slogger + tests + native-io + testtools @@ -73,117 +78,132 @@ user-subscribe@bookkeeper.apache.org user-unsubscribe@bookkeeper.apache.org user@bookkeeper.apache.org - http://www.mail-archive.com/user@bookkeeper.apache.org + https://www.mail-archive.com/user@bookkeeper.apache.org BookKeeper Dev dev-subscribe@bookkeeper.apache.org dev-unsubscribe@bookkeeper.apache.org dev@bookkeeper.apache.org - http://www.mail-archive.com/dev@bookkeeper.apache.org + https://www.mail-archive.com/dev@bookkeeper.apache.org BookKeeper Commits commits-subscribe@bookkeeper.apache.org commits-unsubscribe@bookkeeper.apache.org commits@bookkeeper.apache.org - http://www.mail-archive.com/commits@bookkeeper.apache.org + https://www.mail-archive.com/commits@bookkeeper.apache.org The Apache BookKeeper Team dev@bookkeeper.apache.org - http://bookkeeper.apache.org + https://bookkeeper.apache.org Apache Software Foundation - http://www.apache.org + https://www.apache.org + 8 + 8 UTF-8 UTF-8 - 1.8 true 2 + src/main/java + - 1.15.1 - 1.1.14.Final + 1.18.2 + 1.8.0.Final + 1.78 3.0.1 1.2 4.1 1.6 1.10 - 1.15 + 1.26.0 2.6 3.6 - 2.4 - 4.0.1 - 3.1.0 - 6.44.0 - 1.14.9 + 2.7 + 1.0.2.5 + 5.1.0 + 4.0.0 + 4.1.12.1 + 0.7.7 + 3.2.2 + 2.8.0 3.0.2 - 2.1.2 - 1.12.0 - 21.0 - 2.7.3 - 1.3 - 2.8.9 - 1.9.11 - 1.48 - 9.4.5.v20170502 - 1.19 + 2.9.0 + 1.64.0 + 32.0.1-jre + 1.1.1 + 3.3.5 + 2.1.10 + 2.17.1 + 1.82 + 9.4.53.v20231009 + 1.37 2.8.2 - 3.2.7 - 4.12 - 0.5.0-1 - 0.9.3 - 1.16.22 + 4.13.2 + 1.3 + 5.10.2 + 3.25.3 + 4.2.0 + 0.14.2 + 1.18.32 + 2.23.1 1.3.0 - 2.13.0 - 4.1.22.Final - 2.0.7.Final - 9.1.3 - 2.0.0-beta.5 - 0.0.21 + 4.11.0 + 4.1.111.Final + 0.0.25.Final + 0.15.0 0.8.3 - 3.5.1 - 3.5.1-1 - 1.12.0 - 5.13.1 - 3.0.1 - 1.7.25 - 3.1.1 - 1.7.0 - 1.29.0 - 3.4.1 - 3.4.13 + 4.5.13 + 4.4.15 + 3.25.1 + ${protobuf.version} + ${grpc.version} + 0.9.11 + 7.10.2 + 3.3.0 + 2.0.12 + 2.0 + 4.6.0 + 1.3.2 + 1.19.4 + 4.5.7 + 3.8.4 + 1.1.10.5 + 2.1.2 + 0.9.1 - 0.12 + 0.16.1 2.7 4.3.0 - 1.3.7 + 1.4.13 + 1.6.8 + 3.0.0 1.6 - 0.8.0 + 1.18.20.0 1.8 - 3.1.0 - 3.2.0 - 3.0.0 - 2.5 - 3.7.0 - 3.0.2 - 2.7 - 2.5.1 - 2.4 - 3.0.0 - 3.1.0 - 2.2.1 - 2.21.0 - 3.1.0 + 3.3.1 + 3.12.1 + 3.2.5 + 9.2.0 + 3.10.1 1.4.1.Final - 0.5.0 - 6.19 - 3.1.0-RC6 + 0.6.1 + 9.3 + 4.7.3.2 + 3.6.0 + 1 + 4.0.0 + 3.0.1 + 1.26.0 + http://archive.ubuntu.com/ubuntu/ + http://security.ubuntu.com/ubuntu/ @@ -195,6 +215,11 @@ spotbugs-annotations ${spotbugs-annotations.version} + + javax.annotation + javax.annotation-api + ${javax-annotations-api.version} + com.google.code.findbugs jsr305 @@ -219,18 +244,22 @@ org.slf4j - slf4j-api + slf4j-bom ${slf4j.version} + pom + import - org.slf4j - slf4j-log4j12 - ${slf4j.version} + org.apache.logging.log4j + log4j-bom + ${log4j.version} + pom + import - org.slf4j - log4j-over-slf4j - ${slf4j.version} + com.lmax + disruptor + ${disruptor.version} @@ -279,6 +308,18 @@ commons-lang3 ${commons-lang3.version} + + + org.bouncycastle + bc-fips + ${bouncycastle.version} + + + + org.reflections + reflections + ${reflections.version} + @@ -287,84 +328,149 @@ ${lz4.version} - + - net.java.dev.jna - jna - ${jna.version} + org.yaml + snakeyaml + ${snakeyaml.version} - com.fasterxml.jackson.core - jackson-core - ${jackson.version} - - - com.fasterxml.jackson.core - jackson-databind + com.fasterxml.jackson + jackson-bom ${jackson.version} + pom + import + - com.fasterxml.jackson.core - jackson-annotations - ${jackson.version} - - - - org.codehaus.jackson - jackson-mapper-asl - ${jackson-mapper-asl.version} + javax.servlet + javax.servlet-api + ${servlet-api.version} com.google.protobuf - protobuf-java + protobuf-bom ${protobuf.version} + pom + import org.apache.thrift libthrift - ${libthrift9.version} + ${libthrift.version} + + + org.apache.tomcat.embed + tomcat-embed-core + + + javax.annotation + javax.annotation-api + + - io.netty - netty-common + netty-bom ${netty.version} + pom + import - io.netty - netty-buffer - ${netty.version} + io.netty.incubator + netty-incubator-transport-native-io_uring + ${netty-iouring.version} - io.netty - netty-all - ${netty.version} + io.netty.incubator + netty-incubator-transport-native-io_uring + ${netty-iouring.version} + linux-x86_64 - io.netty - netty-tcnative-boringssl-static - ${netty-boringssl.version} + io.netty.incubator + netty-incubator-transport-native-io_uring + ${netty-iouring.version} + linux-aarch_64 - - - com.twitter - finagle-core_2.11 - ${finagle.version} + io.netty.incubator + netty-incubator-transport-classes-io_uring + ${netty-iouring.version} + + io.grpc + grpc-bom + ${grpc.version} + pom + import + + io.grpc grpc-all ${grpc.version} + + + io.grpc + grpc-testing + + + io.grpc + grpc-netty + + + io.grpc + grpc-okhttp + + + com.squareup.okhttp + okhttp + + + com.squareup.okio + okio + + + io.grpc + grpc-servlet + + + io.grpc + grpc-servlet-jakarta + + + + + + + dev.failsafe + failsafe + ${failsafe.version} + + + + + com.squareup.okhttp3 + okhttp-bom + 4.12.0 + pom + import + + + + org.jetbrains.kotlin + kotlin-stdlib-common + 1.8.21 @@ -385,20 +491,12 @@ javacc - org.slf4j - slf4j-log4j12 - - - org.slf4j - slf4j-api - - - log4j - log4j + ch.qos.logback + * io.netty - netty + * @@ -408,25 +506,28 @@ ${zookeeper.version} test-jar - - org.slf4j - slf4j-log4j12 - org.slf4j slf4j-api - log4j - log4j + ch.qos.logback + * io.netty - netty + * + + + org.xerial.snappy + snappy-java + ${snappy.version} + + org.apache.curator @@ -441,17 +542,6 @@ - - com.twitter - twitter-server_2.11 - ${twitter-server.version} - - - javax.servlet - servlet-api - - - io.vertx vertx-core @@ -481,19 +571,37 @@ jetty-server ${jetty.version} + + org.eclipse.jetty + jetty-webapp + ${jetty.version} + org.eclipse.jetty jetty-servlet ${jetty.version} - - + + + org.jctools + jctools-core + ${jctools.version} + + + io.dropwizard.metrics metrics-core ${dropwizard.version} + + + + io.dropwizard.metrics + metrics-jmx + ${dropwizard.version} + io.dropwizard.metrics metrics-jvm @@ -526,12 +634,25 @@ sketches-core ${datasketches.version} + + + io.opentelemetry + opentelemetry-bom + ${otel.version} + pom + import + - + + + org.apache.httpcomponents + httpclient + ${httpclient.version} + - com.twitter - ostrich_2.9.2 - ${ostrich.version} + org.apache.httpcomponents + httpcore + ${httpcore.version} @@ -541,56 +662,104 @@ ${jcommander.version} + + + org.hdrhistogram + HdrHistogram + ${hdrhistogram.version} + + junit junit ${junit.version} + + org.junit + junit-bom + ${junit5.version} + pom + import + + + org.assertj + assertj-core + ${assertj-core.version} + org.hamcrest hamcrest-all ${hamcrest.version} + + org.hamcrest + hamcrest-core + ${hamcrest.version} + + + org.hamcrest + hamcrest-library + ${hamcrest.version} + org.jmock jmock ${jmock.version} + + org.awaitility + awaitility + ${awaitility.version} + test + org.mockito mockito-core ${mockito.version} - org.powermock - powermock-api-mockito2 - ${powermock.version} - - - org.powermock - powermock-module-junit4 - ${powermock.version} + org.mockito + mockito-inline + ${mockito.version} org.apache.hadoop hadoop-minikdc - ${hadoop.version} + ${hadoop.minikdc.version} org.arquillian.cube arquillian-cube-docker ${arquillian-cube.version} + + + com.github.docker-java + * + + + + + javax.ws.rs + javax.ws.rs-api + 2.1.1 org.jboss.arquillian.junit arquillian-junit-standalone ${arquillian-junit.version} + + + com.github.docker-java + * + + org.codehaus.groovy groovy-all ${groovy.version} + pom org.jboss.shrinkwrap.resolver @@ -604,8 +773,10 @@ org.testcontainers - testcontainers + testcontainers-bom ${testcontainers.version} + pom + import @@ -619,6 +790,19 @@ jmh-generator-annprocess ${jmh.version} + + + + io.reactivex.rxjava3 + rxjava + ${rxjava.version} + + + + com.carrotsearch + hppc + ${hppc.version} + @@ -645,41 +829,51 @@ commons-configuration commons-configuration - junit junit test - - org.hamcrest - hamcrest-all + + org.junit.jupiter + junit-jupiter-api test - org.slf4j - slf4j-log4j12 + org.junit.jupiter + junit-jupiter-engine test - org.mockito - mockito-core + org.junit.vintage + junit-vintage-engine + test + + + org.junit.jupiter + junit-jupiter-params test - org.powermock - powermock-api-mockito2 + org.assertj + assertj-core + test + + + org.hamcrest + hamcrest-all test - org.powermock - powermock-module-junit4 + org.mockito + mockito-inline test + ${src.dir} kr.motd.maven @@ -703,7 +897,7 @@ buildtools/src/main/resources/bookkeeper/checkstyle.xml buildtools/src/main/resources/bookkeeper/suppressions.xml - UTF-8 + UTF-8 true true false @@ -719,6 +913,30 @@ + + org.apache.maven.plugins + maven-surefire-plugin + ${maven-surefire-plugin.version} + + + org.apache.maven.plugins + maven-failsafe-plugin + ${maven-surefire-plugin.version} + + + com.github.maven-nar + nar-maven-plugin + ${nar-maven-plugin.version} + + + org.apache.maven.plugins + maven-javadoc-plugin + + + true + none + + @@ -731,41 +949,51 @@ + org.apache.maven.plugins maven-compiler-plugin ${maven-compiler-plugin.version} - ${javac.target} - ${javac.target} + UTF-8 + true + true + + + org.projectlombok + lombok + ${lombok.version} + + - -Werror - -Xlint:deprecation - -Xlint:unchecked - - -Xpkginfo:always - + -parameters + org.apache.maven.plugins maven-surefire-plugin - ${maven-surefire-plugin.version} - -Xmx2G -Djava.net.preferIPv4Stack=true -Dio.netty.leakDetection.level=paranoid + -Xmx2G -Djava.net.preferIPv4Stack=true -Dio.netty.leakDetection.level=paranoid ${test.additional.args} ${redirectTestOutputToFile} + ${forkCount.variable} false + false 1800 ${testRetryCount} + + org.apache.maven.plugins + maven-failsafe-plugin + org.apache.maven.plugins maven-javadoc-plugin - ${maven-javadoc-plugin.version} - - -Xdoclint:none -notimestamp - -Xdoclint:none -notimestamp + true + Bookkeeper Client @@ -784,12 +1012,17 @@ Bookkeeper Stats Providers - org.apache.bookkeeper.stats.codahale:org.apache.bookkeeper.stats.twitter.finagle:org.apache.bookkeeper.stats.twitter.ostrich:org.apache.bookkeeper.stats.twitter.science:org.apache.bookkeeper.stats.prometheus + org.apache.bookkeeper.stats.codahale:org.apache.bookkeeper.stats.prometheus BookKeeper Java API (version ${project.version}) site/_site/overview/index.html package + false + + false @@ -804,12 +1037,11 @@ org.apache.maven.plugins maven-source-plugin - ${maven-source-plugin.version} attach-sources - jar + jar-no-fork @@ -820,6 +1052,12 @@ ${apache-rat-plugin.version} + + dependency-reduced-pom.xml + + + **/.idea/** + .git/**/* .github/**/* @@ -833,11 +1071,12 @@ **/README.md + **/README.rst **/apidocs/* **/src/main/resources/deps/** **/META-INF/** - + **/.classpath **/.project **/.checkstyle @@ -845,12 +1084,16 @@ **/*.iml **/*.iws **/*.ipr + + **/.sdkmanrc .repository/** site/** + site2/** + site3/** **/org/apache/distributedlog/thrift/* @@ -858,58 +1101,145 @@ **/*.log + + **/*.json + data/** dev/.vagrant/** + + + **/proto/**.py + + **/python/.coverage + **/python/.Python + **/python/bin/** + **/python/include/** + **/python/lib/** + **/**.pyc + **/.nox/** + **/.pytest_cache/** + **/__pycache__/** + **/bookkeeper.egg-info/** + + + **/test_conf_2.conf true + + com.googlecode.maven-download-plugin + download-maven-plugin + ${download-maven-plugin.version} + + + org.codehaus.mojo + exec-maven-plugin + ${exec-maven-plugin.version} + - - code-coverage + owasp-dependency-check - org.eluder.coveralls - coveralls-maven-plugin - ${coveralls-maven-plugin.version} + org.owasp + dependency-check-maven + ${dependency-check-maven.version} + false + + + src/owasp-dependency-check-suppressions.xml + + 7 + false + false + false + false + false + false + false + false + false + false + false + false + true + + + + + aggregate + + + + org.apache.maven.plugins - maven-surefire-plugin - ${maven-surefire-plugin.version} + maven-source-plugin - - @{argLine} -Xmx2G -Djava.net.preferIPv4Stack=true - ${redirectTestOutputToFile} - false - 1800 - - true + true + + + + + delombok + + ${project.build.directory}/generated-sources/delombok + + + - org.jacoco - jacoco-maven-plugin - 0.8.0 + org.projectlombok + lombok-maven-plugin + ${lombok-maven-plugin.version} + true + + + org.projectlombok + lombok + ${lombok.version} + + + generate-sources - prepare-agent + delombok + + ${project.basedir}/src/main/java + ${project.build.directory}/generated-sources/delombok + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + + false + + - dev @@ -918,10 +1248,10 @@ org.apache.maven.plugins maven-surefire-plugin - ${maven-surefire-plugin.version} -Xmx2G -Djava.net.preferIPv4Stack=true false + ${forkCount.variable} false 1800 false @@ -938,10 +1268,10 @@ org.apache.maven.plugins maven-surefire-plugin - ${maven-surefire-plugin.version} - -Xmx2G -Djava.net.preferIPv4Stack=true -Dbookkeeper.root.logger=DEBUG,CONSOLE + -Xmx2G -Djava.net.preferIPv4Stack=true -Dio.netty.leakDetection.level=paranoid -Dbookkeeper.log.root.level=INFO -Dbookkeeper.log.root.appender=CONSOLE false + ${forkCount.variable} false 1800 false @@ -950,19 +1280,120 @@ - - - stream + aarch64-linux-nar-aol + + + linux + aarch64 + + + + src/aarch64_aol.properties + + + + mac-apple-silicon + + + mac + aarch64 + + + + src/aarch64_aol.properties + + + + jdk11 + + [11,) + + + + + ${maven.compiler.target} + + + --add-opens java.base/java.io=ALL-UNNAMED + --add-opens java.base/java.lang=ALL-UNNAMED + --add-opens java.base/java.lang.reflect=ALL-UNNAMED + --add-opens java.base/java.net=ALL-UNNAMED + --add-opens java.base/java.nio=ALL-UNNAMED + --add-opens java.base/java.nio.channels.spi=ALL-UNNAMED + --add-opens java.base/java.nio.file=ALL-UNNAMED + --add-opens java.base/java.util=ALL-UNNAMED + --add-opens java.base/java.util.concurrent=ALL-UNNAMED + --add-opens java.base/java.util.concurrent.atomic=ALL-UNNAMED + --add-opens java.base/java.util.concurrent.locks=ALL-UNNAMED + --add-opens java.base/java.util.stream=ALL-UNNAMED + --add-opens java.base/java.util.zip=ALL-UNNAMED + --add-opens java.base/java.time=ALL-UNNAMED + --add-opens java.base/jdk.internal.loader=ALL-UNNAMED + --add-opens java.base/sun.net.dns=ALL-UNNAMED + --add-opens java.base/sun.nio.ch=ALL-UNNAMED + --add-opens java.base/sun.security.jca=ALL-UNNAMED + --add-opens java.xml/jdk.xml.internal=ALL-UNNAMED + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + ${maven.compiler.release} + + + + + + + + apache-release + + + + org.apache.maven.plugins + maven-assembly-plugin + + + source-release-assembly + + + true + + + + + + + + + ubuntu-mirror-set + + + env.UBUNTU_MIRROR + + + + + ${env.UBUNTU_MIRROR} + + + + ubuntu-security-mirror-set - stream + env.UBUNTU_SECURITY_MIRROR - - - stream - + + + ${env.UBUNTU_SECURITY_MIRROR} + diff --git a/shaded/bookkeeper-server-shaded/pom.xml b/shaded/bookkeeper-server-shaded/pom.xml index 820dc01a189..1fea94ef154 100644 --- a/shaded/bookkeeper-server-shaded/pom.xml +++ b/shaded/bookkeeper-server-shaded/pom.xml @@ -20,7 +20,7 @@ org.apache.bookkeeper shaded-parent - 4.9.0-SNAPSHOT + 4.18.0-SNAPSHOT .. bookkeeper-server-shaded @@ -38,10 +38,18 @@ org.slf4j slf4j-log4j12 + + org.slf4j + slf4j-reload4j + log4j log4j + + ch.qos.reload4j + reload4j + @@ -50,7 +58,6 @@ org.apache.maven.plugins maven-shade-plugin - ${maven-shade-plugin.version} package @@ -64,8 +71,11 @@ com.google.guava:guava + com.google.guava:failureaccess com.google.protobuf:protobuf-java org.apache.bookkeeper:bookkeeper-common + org.apache.bookkeeper:bookkeeper-common-allocator + org.apache.bookkeeper:cpu-affinity org.apache.bookkeeper:bookkeeper-tools-framework org.apache.bookkeeper:bookkeeper-proto org.apache.bookkeeper:bookkeeper-server @@ -82,7 +92,7 @@ - + org.codehaus.mojo license-maven-plugin @@ -108,8 +118,8 @@ + org.apache.maven.plugins maven-clean-plugin - ${maven-clean-plugin.version} diff --git a/shaded/bookkeeper-server-tests-shaded/pom.xml b/shaded/bookkeeper-server-tests-shaded/pom.xml index 3689415d8bb..9871bb152ee 100644 --- a/shaded/bookkeeper-server-tests-shaded/pom.xml +++ b/shaded/bookkeeper-server-tests-shaded/pom.xml @@ -20,7 +20,7 @@ org.apache.bookkeeper shaded-parent - 4.9.0-SNAPSHOT + 4.18.0-SNAPSHOT .. bookkeeper-server-tests-shaded @@ -39,10 +39,18 @@ org.slf4j slf4j-log4j12 + + org.slf4j + slf4j-reload4j + log4j log4j + + ch.qos.reload4j + reload4j + org.apache.bookkeeper bookkeeper-common @@ -75,7 +83,6 @@ org.apache.maven.plugins maven-shade-plugin - ${maven-shade-plugin.version} package @@ -103,7 +110,7 @@ - + org.codehaus.mojo license-maven-plugin @@ -129,8 +136,8 @@ + org.apache.maven.plugins maven-clean-plugin - ${maven-clean-plugin.version} diff --git a/shaded/distributedlog-core-shaded/pom.xml b/shaded/distributedlog-core-shaded/pom.xml index 4f5c9f2c5c5..36e18f6b641 100644 --- a/shaded/distributedlog-core-shaded/pom.xml +++ b/shaded/distributedlog-core-shaded/pom.xml @@ -20,7 +20,7 @@ org.apache.bookkeeper shaded-parent - 4.9.0-SNAPSHOT + 4.18.0-SNAPSHOT .. org.apache.distributedlog @@ -35,18 +35,10 @@ distributedlog-core ${project.version} - - org.slf4j - slf4j-log4j12 - log4j log4j - - io.netty - netty-common - io.netty netty-buffer @@ -59,7 +51,6 @@ org.apache.maven.plugins maven-shade-plugin - ${maven-shade-plugin.version} package @@ -80,13 +71,13 @@ com.fasterxml.jackson.core:jackson-core com.fasterxml.jackson.core:jackson-databind com.fasterxml.jackson.core:jackson-annotations + com.google.guava:failureaccess com.google.guava:guava com.google.protobuf:protobuf-java - - io.netty:netty - net.java.dev.jna:jna net.jpountz.lz4:lz4 org.apache.bookkeeper:bookkeeper-common + org.apache.bookkeeper:bookkeeper-common-allocator + org.apache.bookkeeper:cpu-affinity org.apache.bookkeeper:bookkeeper-tools-framework org.apache.bookkeeper:bookkeeper-proto org.apache.bookkeeper:bookkeeper-server @@ -102,6 +93,7 @@ org.apache.httpcomponents:httpcore org.apache.thrift:libthrift org.apache.zookeeper:zookeeper + org.apache.zookeeper:zookeeper-jute org.rocksdb:rocksdbjni @@ -163,11 +155,6 @@ com.fasterxml.jackson dlshade.com.fasterxml.jackson - - - com.sun.jna - dlshade.com.sun.jna - com.google @@ -200,13 +187,13 @@ org.apache.distributedlog - org.apache.distributedlog + dlshade.org.apache.distributedlog - + org.codehaus.mojo license-maven-plugin @@ -232,8 +219,8 @@ + org.apache.maven.plugins maven-clean-plugin - ${maven-clean-plugin.version} diff --git a/shaded/pom.xml b/shaded/pom.xml index fca99f2afb7..2f65186263d 100644 --- a/shaded/pom.xml +++ b/shaded/pom.xml @@ -21,7 +21,7 @@ org.apache.bookkeeper bookkeeper - 4.9.0-SNAPSHOT + 4.18.0-SNAPSHOT org.apache.bookkeeper shaded-parent diff --git a/site/.gitignore b/site/.gitignore deleted file mode 100644 index 15a21ca0b91..00000000000 --- a/site/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -_site/ -javadoc/ -vendor/bundle/ -.jekyll-metadata -.bundle/ -.sass-cache/ -local-generated/ -generated_site/ diff --git a/site/Gemfile b/site/Gemfile deleted file mode 100644 index cc4f51e7f0a..00000000000 --- a/site/Gemfile +++ /dev/null @@ -1,7 +0,0 @@ -source 'https://rubygems.org' - -ruby '>=2.3.1' - -gem 'jekyll', '3.7.0' -gem 'nokogiri', '1.8.2' -gem 'jekyll-toc', '0.2.1' diff --git a/site/Gemfile.lock b/site/Gemfile.lock deleted file mode 100644 index 98548973022..00000000000 --- a/site/Gemfile.lock +++ /dev/null @@ -1,73 +0,0 @@ -GEM - remote: https://rubygems.org/ - specs: - addressable (2.5.2) - public_suffix (>= 2.0.2, < 4.0) - colorator (1.1.0) - concurrent-ruby (1.0.5) - em-websocket (0.5.1) - eventmachine (>= 0.12.9) - http_parser.rb (~> 0.6.0) - eventmachine (1.2.5) - ffi (1.9.18) - forwardable-extended (2.6.0) - http_parser.rb (0.6.0) - i18n (0.9.3) - concurrent-ruby (~> 1.0) - jekyll (3.7.0) - addressable (~> 2.4) - colorator (~> 1.0) - em-websocket (~> 0.5) - i18n (~> 0.7) - jekyll-sass-converter (~> 1.0) - jekyll-watch (~> 2.0) - kramdown (~> 1.14) - liquid (~> 4.0) - mercenary (~> 0.3.3) - pathutil (~> 0.9) - rouge (>= 1.7, < 4) - safe_yaml (~> 1.0) - jekyll-sass-converter (1.5.1) - sass (~> 3.4) - jekyll-toc (0.2.1) - nokogiri (~> 1.6) - jekyll-watch (2.0.0) - listen (~> 3.0) - kramdown (1.16.2) - liquid (4.0.0) - listen (3.1.5) - rb-fsevent (~> 0.9, >= 0.9.4) - rb-inotify (~> 0.9, >= 0.9.7) - ruby_dep (~> 1.2) - mercenary (0.3.6) - mini_portile2 (2.3.0) - nokogiri (1.8.2) - mini_portile2 (~> 2.3.0) - pathutil (0.16.1) - forwardable-extended (~> 2.6) - public_suffix (3.0.1) - rb-fsevent (0.10.2) - rb-inotify (0.9.10) - ffi (>= 0.5.0, < 2) - rouge (3.1.1) - ruby_dep (1.5.0) - safe_yaml (1.0.4) - sass (3.5.5) - sass-listen (~> 4.0.0) - sass-listen (4.0.0) - rb-fsevent (~> 0.9, >= 0.9.4) - rb-inotify (~> 0.9, >= 0.9.7) - -PLATFORMS - ruby - -DEPENDENCIES - jekyll (= 3.7.0) - jekyll-toc (= 0.2.1) - nokogiri (= 1.8.2) - -RUBY VERSION - ruby 2.4.1p111 - -BUNDLED WITH - 1.16.2 diff --git a/site/Makefile b/site/Makefile deleted file mode 100644 index 5c20bb697c9..00000000000 --- a/site/Makefile +++ /dev/null @@ -1,42 +0,0 @@ -BUNDLE := bundle -JEKYLL := $(BUNDLE) exec jekyll - -dev: - code . - open http://localhost:4000 - make serve - -clean: - rm -rf _site local-generated generated_site - -setup: - gem install --user-install bundler \ - --no-rdoc \ - --no-ri - NOKOGIRI_USE_SYSTEM_LIBRARIES=true $(BUNDLE) install \ - --path vendor/bundle - -build: clean - $(JEKYLL) build \ - --config _config.yml - -apache: clean - JEKYLL_ENV=production $(JEKYLL) build \ - --config _config.yml,_config.apache.yml - -javadoc: - scripts/javadoc-gen.sh "all" - -latest_javadoc: - scripts/javadoc-gen.sh "latest" - -staging: clean - $(JEKYLL) build --config _config.yml,_config.staging.yml - -serve: build - $(JEKYLL) serve \ - --incremental \ - --livereload \ - --config _config.yml,_config.local.yml - -publish: javadoc-gen build diff --git a/site/README.md b/site/README.md deleted file mode 100644 index 2dd5a1c8587..00000000000 --- a/site/README.md +++ /dev/null @@ -1,60 +0,0 @@ -# The BookKeeper Website - -Welcome to the code for the Apache BookKeeper website! Instructions on building the site and running it locally can be found here in this `README`. - -## Tools - -The site is built using Jekyll and Sass. - -> I'll provide more specific info here later. - -## Prerequisities - -In order to run the site locally, you need to have the following installed: - -* Ruby 2.3.1 and Rubygems -* The Javadoc CLI tool - -## Setup - -```shell -$ make setup -``` - -## Building the site - -```shell -$ make build -``` - -Please note that this will *not* build the Javadoc. That requires a separate command: - -```shell -$ make javadoc -``` - -## Serving the site locally - -To run the site in interactive mode locally: - -```shell -$ make serve -``` - -Then navigate to `localhost:4000`. As you make changes, the browser will auto-update. - -## Staging website for reviews - -When you submit a pull request for modifying website or documentation, you are recommended to make your changes live for reviews. - -Here are a few steps to follow to stage your changes: - -1. You need to create a github repo called `bookkeeper-staging-site` under your github account. You can fork this [staging repo](https://github.com/sijie/bookkeeper-staging-site) as well. -2. In your `bookkeeper-staging-site` repo, go to `Settings > GitHub Pages`. Enable `GitHub Pages` on `master branch /docs folder`. -3. Make changes to the website, follow the steps above to verify the changes locally. -4. Once the changes are verified locally, you can run `make staging`. It will generate the files under `site/local-generated`. -5. Run `scripts/staging-website.sh`. It would push the generated website to your `bookkeeper-staging-site`. -6. Your changes will be live on `https://.github.io/bookkeeper-staging-site`. - -If you have any ideas to improve the review process for website, please feel free to contact us at dev@bookkeeper.apache.org. - diff --git a/site/_config.apache.yml b/site/_config.apache.yml deleted file mode 100644 index b146843dd3e..00000000000 --- a/site/_config.apache.yml +++ /dev/null @@ -1,2 +0,0 @@ -baseurl: / -destination: generated_site/content diff --git a/site/_config.local.yml b/site/_config.local.yml deleted file mode 100644 index f93cfaa6880..00000000000 --- a/site/_config.local.yml +++ /dev/null @@ -1 +0,0 @@ -destination: local-generated diff --git a/site/_config.staging.yml b/site/_config.staging.yml deleted file mode 100644 index 384600141b1..00000000000 --- a/site/_config.staging.yml +++ /dev/null @@ -1,2 +0,0 @@ -baseurl: /bookkeeper-staging-site/ -destination: local-generated diff --git a/site/_config.yml b/site/_config.yml deleted file mode 100644 index a0b9a1070c0..00000000000 --- a/site/_config.yml +++ /dev/null @@ -1,73 +0,0 @@ -title: Apache BookKeeper™ -subtitle: A scalable, fault-tolerant, and low-latency storage service optimized for real-time workloads -github_repo: https://github.com/apache/bookkeeper -github_master: https://github.com/apache/bookkeeper/tree/master -permalink: pretty -baseurl: / -destination: local-generated -twitter_url: https://twitter.com/asfbookkeeper - -versions: -# [next_version_placeholder] -- "4.7.1" -- "4.7.0" -- "4.6.2" -- "4.6.1" -- "4.6.0" -- "4.5.1" -- "4.5.0" - -archived_versions: -- "4.4.0" -- "4.3.2" -- "4.3.1" -- "4.3.0" -- "4.2.4" -- "4.2.3" -- "4.2.2" -- "4.2.1" -- "4.2.0" -- "4.1.0" -- "4.0.0" -latest_version: "4.8.0-SNAPSHOT" -latest_release: "4.7.1" -stable_release: "4.6.2" -distributedlog_version: "4.7.1" - -defaults: -- scope: - path: docs - values: - layout: docs - toc: true -- scope: - path: bps - values: - layout: community -- scope: - path: community - values: - layout: community -- scope: - path: project - values: - layout: community -- scope: - path: releases - values: - layout: community - -gems: -- jekyll-toc - -exclude: -- Gemfile -- Gemfile.lock -- Makefile -- vendor/bundle -- local-generated -- generated_site -- scripts -- overview -- README.md - diff --git a/site/_data/cli/bookkeeper-daemon.yaml b/site/_data/cli/bookkeeper-daemon.yaml deleted file mode 100644 index 86318ed6608..00000000000 --- a/site/_data/cli/bookkeeper-daemon.yaml +++ /dev/null @@ -1,10 +0,0 @@ -description: Manages bookies. -root_path: bookkeeper-server/bin -commands: -- name: bookie - description: Runs the bookie server. -- name: autorecovery - description: Runs the autorecovery service daemon. -options: -- flag: -force - description: Accepted only with the `stop` command. Determines whether the bookie server is stopped forcefully if not stopped by normal shutdown. diff --git a/site/_data/cli/bookkeeper.yaml b/site/_data/cli/bookkeeper.yaml deleted file mode 100644 index 0b9bbd45153..00000000000 --- a/site/_data/cli/bookkeeper.yaml +++ /dev/null @@ -1,38 +0,0 @@ -description: Manages bookies. -root_path: bookkeeper-server/bin -commands: -- name: bookie - description: Starts up a bookie. -- name: localbookie - description: Starts up an ensemble of N bookies in a single JVM process. Typically used for local experimentation and development. - argument: N -- name: autorecovery - description: Runs the autorecovery service daemon. -- name: upgrade - description: Upgrades the bookie's filesystem. - options: - - flag: --upgrade - description: Upgrade the filesystem. - - flag: --rollback - description: Rollback the filesystem to a previous version. - - flag: --finalize - description: Mark the upgrade as complete. -- name: shell - description: Runs the bookie's shell for admin commands. -- name: help - description: Displays the help message for the `bookkeeper` tool. -env_vars: -- name: BOOKIE_LOG_CONF - description: The Log4j configuration file. - default: bookkeeper-server/conf/log4j.properties -- name: BOOKIE_CONF - description: The configuration file for the bookie. - default: bookkeeper-server/conf/bk_server.conf -- name: BOOKIE_EXTRA_CLASSPATH - description: Extra paths to add to BookKeeper's [classpath](https://en.wikipedia.org/wiki/Classpath_(Java)). -- name: ENTRY_FORMATTER_CLASS - description: The entry formatter class used to format entries. -- name: BOOKIE_PID_DIR - description: The directory where the bookie server PID file is stored. -- name: BOOKIE_STOP_TIMEOUT - description: The wait time before forcefully killing the bookie server instance if stopping it is not successful. diff --git a/site/_data/cli/shell.yaml b/site/_data/cli/shell.yaml deleted file mode 100644 index 092c8b1261e..00000000000 --- a/site/_data/cli/shell.yaml +++ /dev/null @@ -1,190 +0,0 @@ -commands: -- name: autorecovery - description: Enable or disable autorecovery in the cluster. - options: - - flag: -enable - description: Enable autorecovery of underreplicated ledgers - - flag: -disable - description: Disable autorecovery of underreplicated ledgers -- name: bookieformat - description: Format the current server contents. - options: - - flag: -nonInteractive - description: Whether to confirm if old data exists. - - flag: -force - description: If [nonInteractive] is specified, then whether to force delete the old data without prompt..? - - flag: -deleteCookie - description: Delete its cookie on zookeeper -- name: initbookie - description: | - Initialize new bookie, by making sure that the journalDir, ledgerDirs and - indexDirs are empty and there is no registered Bookie with this BookieId. - - If there is data present in current bookie server, the init operation will fail. If you want to format - the bookie server, use `bookieformat`. -- name: bookieinfo - description: Retrieve bookie info such as free and total disk space. -- name: bookiesanity - description: Sanity test for local bookie. Create ledger and write/read entries on the local bookie. - options: - - flag: -entries N - description: Total entries to be added for the test (default 10) - - flag: -timeout N - description: Timeout for write/read operations in seconds (default 1) -- name: decommissionbookie - description: Force trigger the Audittask and make sure all the ledgers stored in the decommissioning bookie are replicated. -- name: deleteledger - description: Delete a ledger - options: - - flag: -ledgerid N - description: Ledger ID - - flag: -force - description: Whether to force delete the Ledger without prompt..? -- name: expandstorage - description: Add new empty ledger/index directories. Update the directories info in the conf file before running the command. -- name: help - description: Displays the help message. -- name: lastmark - description: Print last log marker. -- name: ledger - description: Dump ledger index entries into readable format. - options: - - flag: -m LEDGER_ID - description: Print meta information -- name: ledgermetadata - description: Print the metadata for a ledger. - options: - - flag: -ledgerid N - description: Ledger ID -- name: listbookies - description: List the bookies, which are running as either readwrite or readonly mode. - options: - - flag: -readwrite - description: Print readwrite bookies - - flag: -readonly - description: Print readonly bookies - - flag: -hostnames - description: Also print hostname of the bookie -- name: listfilesondisc - description: List the files in JournalDirectory/LedgerDirectories/IndexDirectories. - options: - - flag: -journal - description: Print list of journal files - - flag: -entrylog - description: Print list of entryLog files - - flag: -index - description: Print list of index files -- name: listledgers - description: List all ledgers in the cluster (this may take a long time). - options: - - flag: -meta - description: Print metadata -- name: listunderreplicated - description: List ledgers marked as underreplicated, with optional options to specify missing replica (BookieId) and to exclude missing replica. - options: - - flag: -missingreplica N - description: Bookie Id of missing replica - - flag: -excludingmissingreplica N - description: Bookie Id of missing replica to ignore - - flag: -printmissingreplica - description: Whether to print missingreplicas list? -- name: metaformat - description: | - Format Bookkeeper metadata in Zookeeper. This command is deprecated since 4.7.0, - in favor of using `initnewcluster` for initializing a new cluster and `nukeexistingcluster` for nuking an existing cluster. - options: - - flag: -nonInteractive - description: Whether to confirm if old data exists..? - - flag: -force - description: If [nonInteractive] is specified, then whether to force delete the old data without prompt. -- name: initnewcluster - description: | - Initializes a new bookkeeper cluster. If initnewcluster fails then try nuking - existing cluster by running nukeexistingcluster before running initnewcluster again -- name: nukeexistingcluster - description: Nuke bookkeeper cluster by deleting metadata - options: - - flag: -zkledgersrootpath - description: zookeeper ledgers rootpath - - flag: -instanceid - description: instance id - - flag: -force - description: If instanceid is not specified, then whether to force nuke the metadata without validating instanceid -- name: lostbookierecoverydelay - description: Setter and Getter for LostBookieRecoveryDelay value (in seconds) in Zookeeper. - options: - - flag: -get - description: Get LostBookieRecoveryDelay value (in seconds) - - flag: -set N - description: Set LostBookieRecoveryDelay value (in seconds) -- name: readjournal - description: Scan a journal file and format the entries into readable format. - options: - - flag: -msg JOURNAL_ID|JOURNAL_FILENAME - description: Print message body - - flag: -dir - description: Journal directory (needed if more than one journal configured) -- name: readledger - description: Read a range of entries from a ledger. - argument: [ []] -- name: readlog - description: Scan an entry file and format the entries into readable format. - argument: - options: - - flag: -msg - description: Print message body - - flag: -ledgerid N - description: Ledger ID - - flag: -entryid N - description: Entry ID - - flag: -startpos N - description: Start Position - - flag: -endpos - description: End Position -- name: recover - description: Recover the ledger data for failed bookie. - argument: [] - options: - - flag: -deleteCookie - description: Delete cookie node for the bookie. -- name: simpletest - description: Simple test to create a ledger and write entries to it. - options: - - flag: -ensemble N - description: Ensemble size (default 3) - - flag: -writeQuorum N - description: Write quorum size (default 2) - - flag: ackQuorum N - description: Ack quorum size (default 2) - - flag: -numEntries N - description: Entries to write (default 1000) -- name: triggeraudit - description: Force trigger the Audit by resetting the lostBookieRecoveryDelay. -- name: updatecookie - description: Update bookie id in cookie. - options: - - flag: -bookieId - description: Bookie Id -- name: updateledgers - description: Update bookie id in ledgers (this may take a long time). - options: - - flag: -bookieId - description: Bookie Id - - flag: -updatespersec N - description: Number of ledgers updating per second (default 5 per sec) - - flag: -limit N - description: Maximum number of ledgers to update (default no limit) - - flag: -verbose - description: Print status of the ledger updation (default false) - - flag: -printprogress N - description: Print messages on every configured seconds if verbose turned on (default 10 secs) -- name: whoisauditor - description: Print the node which holds the auditor lock -- name: whatisinstanceid - description: Print the instanceid of the cluster -- name: convert-to-db-storage - description: Convert bookie indexes from InterleavedStorage to DbLedgerStorage format -- name: convert-to-interleaved-storage - description: Convert bookie indexes from DbLedgerStorage to InterleavedStorage format -- name: rebuild-db-ledger-locations-index - description: Rebuild DbLedgerStorage locations index diff --git a/site/_data/config/bk_server.yaml b/site/_data/config/bk_server.yaml deleted file mode 100644 index fd4f9a2ceca..00000000000 --- a/site/_data/config/bk_server.yaml +++ /dev/null @@ -1,667 +0,0 @@ -groups: -- name: Server parameters - params: - - param: bookiePort - description: The port that the bookie server listens on. - default: 3181 - - param: allowMultipleDirsUnderSameDiskPartition - description: Configure the bookie to allow/disallow multiple ledger/index/journal directories in the same filesystem disk partition - default: false - - param: listeningInterface - description: The network interface that the bookie should listen on. If not set, the bookie will listen on all interfaces. - default: eth0 - - param: advertisedAddress - description: | - Configure a specific hostname or IP address that the bookie should use to advertise itself to - clients. If not set, bookie will advertised its own IP address or hostname, depending on the - `listeningInterface` and `useHostNameAsBookieID` settings. - default: eth0 - - param: allowLoopback - description: | - Whether the bookie is allowed to use a loopback interface as its primary - interface (the interface it uses to establish its identity). By default, loopback interfaces are *not* allowed as the primary interface. - - Using a loopback interface as the primary interface usually indicates a configuration error. It's fairly common in some VPS setups, for example, to not configure a hostname or to have the hostname resolve to 127.0.0.1. If this is the case, then all bookies in the cluster will establish their identities as 127.0.0.1:3181, and only one will be able to join the cluster. For VPSs configured like this, you should explicitly set the listening interface. - default: 'false' - - param: useHostNameAsBookieID - description: Whether the bookie should use its hostname to register with the ZooKeeper coordination service. When `false`, the bookie will use its IP address for the registration. - default: 'false' - - param: useShortHostName - description: Whether the bookie should use short hostname or [FQDN](https://en.wikipedia.org/wiki/Fully_qualified_domain_name) hostname for registration and ledger metadata when `useHostNameAsBookieID` is enabled. - default: 'false' - - param: allowEphemeralPorts - description: Whether the bookie is allowed to use an ephemeral port (port 0) as its server port. By default, an ephemeral port is not allowed. Using an ephemeral port as the service port usually indicates a configuration error. However, in unit tests, using an ephemeral port will address port conflict problems and allow running tests in parallel. - default: 'false' - - param: enableLocalTransport - description: Whether allow the bookie to listen for BookKeeper clients executed on the local JVM. - default: 'false' - - param: disableServerSocketBind - description: Whether allow the bookie to disable bind on network interfaces, this bookie will be available only to BookKeeper clients executed on the local JVM. - default: 'false' - - param: bookieDeathWatchInterval - description: Interval to watch whether bookie is dead or not, in milliseconds. - default: 1000 - - param: extraServerComponents - description: Configure a list of extra server components to enable and load on a bookie server. This provides a plugin mechanism to run extra server components along with a bookie server. - default: '' - - param: ignoreExtraServerComponentsStartupFailures - description: Whether the bookie should ignore startup failures on loading server components specified by `extraServerComponents`. - default: 'false' - -- name: Worker thread settings - params: - - param: numAddWorkerThreads - description: The number of threads that handle write requests. if zero, writes are handled by [Netty threads](//netty.io/wiki/thread-model.html) directly. - default: 1 - - param: numReadWorkerThreads - description: The number of threads that handle read requests. If zero, reads are handled by [Netty threads](//netty.io/wiki/thread-model.html) directly. - default: 8 - - param: numLongPollWorkerThreads - description: The number of threads that handle long poll requests. If zero, long poll requests are handled by [Netty threads](//netty.io/wiki/thread-model.html) directly. - default: 0 - - param: numJournalCallbackThreads - description: The number of threads that handle journal callbacks. If zero, journal callbacks are executed directly on force write threads. - default: 1 - - param: numHighPriorityWorkerThreads - description: The number of threads that should be used for high priority requests (i.e. recovery reads and adds, and fencing). If zero, reads are handled by [Netty threads](//netty.io/wiki/thread-model.html) directly. - default: 8 - - param: maxPendingAddRequestsPerThread - description: If read worker threads are enabled, limit the number of pending requests, to avoid the executor queue to grow indefinitely. If zero or negative, the number of pending requests is unlimited. - default: 10000 - - param: maxPendingReadRequestsPerThread - description: If add worker threads are enabled, limit the number of pending requests, to avoid the executor queue to grow indefinitely. If zero or negative, the number of pending requests is unlimited. - default: 10000 - -- name: Long poll settings - params: - - param: requestTimerTickDurationMs - description: The tick duration for long poll request timer, in milliseconds. See [HashedWheelTimer](//netty.io/4.1/api/io/netty/util/HashedWheelTimer.html) for more details. - default: 10 - - param: requestTimerNumTicks - description: The number of ticks per wheel for long poll request timer. See [HashedWheelTimer](//netty.io/4.1/api/io/netty/util/HashedWheelTimer.html) for more details. - default: 1024 - -- name: Read-only mode support - params: - - param: readOnlyModeEnabled - description: If all ledger directories configured are full, then support only read requests for clients. If "readOnlyModeEnabled=true" then on all ledger disks full, bookie will be converted to read-only mode and serve only read requests. Otherwise the bookie will be shutdown. By default this will be disabled. - default: 'true' - - param: forceReadOnlyBookie - description: Whether the bookie is force started in read only mode or not. - default: 'false' - - param: persistBookieStatusEnabled - description: Persist the bookie status locally on the disks. So the bookies can keep their status upon restarts. - default: 'false' - -- name: Netty server settings - params: - - param: serverTcpNoDelay - description: | - This settings is used to enabled/disabled Nagle's algorithm, which is a means of improving the efficiency of TCP/IP networks by reducing the number of packets that need to be sent over the network. - - If you are sending many small messages, such that more than one can fit in a single IP packet, setting server.tcpnodelay to false to enable Nagle algorithm can provide better performance. - default: 'true' - - param: serverSockKeepalive - description: This setting is used to send keep-alive messages on connection-oriented sockets. - default: 'true' - - param: serverTcpLinger - description: The socket linger timeout on close. When enabled, a close or shutdown will not return until all queued messages for the socket have been successfully sent or the linger timeout has been reached. Otherwise, the call returns immediately and the closing is done in the background. - default: 0 - - param: byteBufAllocatorSizeInitial - description: The Recv ByteBuf allocator initial buf size. - default: 65536 - - param: byteBufAllocatorSizeMin - description: The Recv ByteBuf allocator min buf size. - default: 65536 - - param: byteBufAllocatorSizeMax - description: The Recv ByteBuf allocator max buf size. - default: 1048576 - - param: nettyMaxFrameSizeBytes - description: The maximum netty frame size in bytes. Any message received larger than this will be rejected. - default: 5242880 - -- name: Http server settings - params: - - param: httpServerEnabled - description: The flag enables/disables starting the admin http server. - default: 'false' - - param: httpServerPort - description: The http server port to listen on if `httpServerEnabled` is set to true. - default: 8080 - -- name: Security settings - params: - - param: bookieAuthProviderFactoryClass - description: The bookie authentication provider factory class name. If this is null, no authentication will take place. - default: null - - param: permittedStartupUsers - description: | - The list of users are permitted to run the bookie process. Any users can run the bookie process if it is not set. - - Example settings - "permittedStartupUsers=user1,user2,user3" - default: null - -- name: TLS settings - params: - - param: tslProvider - description: TLS Provider (JDK or OpenSSL) - default: OpenSSL - - param: tlsProviderFactoryClass - description: The path to the class that provides security. - default: org.apache.bookkeeper.security.SSLContextFactory - - param: tlsClientAuthentication - description: Type of security used by server. - default: 'true' - - param: tlsKeyStoreType - description: Bookie Keystore type. - default: JKS - - param: tlsKeyStore - description: Bookie Keystore location (path). - default: null - - param: tlsKeyStore - description: Bookie Keystore location (path). - default: null - - param: tlsKeyStorePasswordPath - description: Bookie Keystore password path, if the keystore is protected by a password. - default: null - - param: tlsTrustStoreType - description: Bookie Truststore type. - default: null - - param: tlsTrustStore - description: Bookie Truststore location (path). - default: null - - param: tlsTrustStorePasswordPath - description: Bookie Truststore password path, if the truststore is protected by a password. - default: null - -- name: Journal settings - params: - - param: journalDirectories - description: | - The directories to which Bookkeeper outputs its write-ahead log (WAL). Could define multi directories to store write head logs, separated by ','. - For example: - journalDirectories=/tmp/bk-journal1,/tmp/bk-journal2 - If journalDirectories is set, bookies will skip journalDirectory and use this setting directory. - default: /tmp/bk-journal - - param: journalDirectory - description: | - @Deprecated since 4.5.0, in favor of using `journalDirectories`. - - The directory to which Bookkeeper outputs its write-ahead log (WAL). - default: /tmp/bk-txn - - param: journalFormatVersionToWrite - description: | - The journal format version to write. - Available formats are 1-5: - 1: no header - 2: a header section was added - 3: ledger key was introduced - 4: fencing key was introduced - 5: expanding header to 512 and padding writes to align sector size configured by `journalAlignmentSize` - 6: persisting explicitLac is introduced - - By default, it is `4`. If you'd like to enable `padding-writes` feature, you can set journal version to `5`. - You can disable `padding-writes` by setting journal version back to `4`. This feature is available in 4.5.0 and onward versions. - If you'd like to enable persisting ExplicitLac, you can set this config to 6 and also fileInfoFormatVersionToWrite should be atleast 1. If there is mismatch then the serverconfig is considered invalid. - default: 4 - - param: journalMaxSizeMB - description: Max file size of journal file, in mega bytes. A new journal file will be created when the old one reaches the file size limitation. - default: 2048 - - param: journalMaxBackups - description: Max number of old journal file to kept. Keep a number of old journal files would help data recovery in specia case. - default: 5 - - param: journalPreAllocSizeMB - description: How much space should we pre-allocate at a time in the journal. - default: 16 - - param: journalWriteBufferSizeKB - description: Size of the write buffers used for the journal. - default: 64 - - param: journalRemoveFromPageCache - description: Should we remove pages from page cache after force write - default: 'true' - - param: journalSyncData - description: | - Should the data be fsynced on journal before acknowledgment. - By default, data sync is enabled to guarantee durability of writes. Beware - when disabling data sync in the bookie journal - might improve the bookie write performance, it will also introduce the possibility of data loss. With no fsync, the journal - entries are written in the OS page cache but not flushed to disk. In case of power failure, the affected bookie might lose - the unflushed data. If the ledger is replicated to multiple bookies, the chances of data loss are reduced though still present. - default: 'true' - - param: journalAdaptiveGroupWrites - description: Should we group journal force writes, which optimize group commit for higher throughput. - default: 'true' - - param: journalMaxGroupWaitMSec - description: Maximum latency to impose on a journal write to achieve grouping. - default: 2 - - param: journalBufferedWritesThreshold - description: Maximum writes to buffer to achieve grouping. - default: 524288 - - param: journalFlushWhenQueueEmpty - description: If we should flush the journal when journal queue is empty. - default: 'false' - - param: journalAlignmentSize - description: All the journal writes and commits should be aligned to given size. If not, zeros will be padded to align to given size. - default: 512 - - param: journalBufferedEntriesThreshold - description: Maximum entries to buffer to impose on a journal write to achieve grouping. - default: 0 - - param: journalFlushWhenQueueEmpty - description: If we should flush the journal when journal queue is empty. - default: 'false' - -- name: Ledger storage settings - params: - - param: ledgerStorageClass - description: | - Ledger storage implementation class - - Options: - - org.apache.bookkeeper.bookie.InterleavedLedgerStorage - - org.apache.bookkeeper.bookie.SortedLedgerStorage - - org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorage - default: org.apache.bookkeeper.bookie.SortedLedgerStorage - - param: sortedLedgerStorageEnabled - description: | - @Deprecated in favor of using `ledgerStorageClass` - - Whether sorted-ledger storage enabled (default true) - default: 'true' - - param: ledgerDirectories - description: The directory to which Bookkeeper outputs ledger snapshots. You can define multiple directories to store snapshots separated by a comma, for example `/tmp/data-dir1,/tmp/data-dir2`. - default: /tmp/bk-data - - param: indexDirectories - description: The directories in which index files are stored. If not specified, the value of [`ledgerDirectories`](#ledgerDirectories) will be used. - default: /tmp/bk-data - - param: minUsableSizeForIndexFileCreation - description: Minimum safe usable size to be available in index directory for bookie to create index file while replaying journal at the time of bookie start in readonly mode (in bytes) - default: 1073741824 - - param: minUsableSizeForEntryLogCreation - description: | - Minimum safe usable size to be available in ledger directory for bookie to create entry log files (in bytes). - This parameter allows creating entry log files when there are enough disk spaces, even when - the bookie is running at readonly mode because of the disk usage is exceeding `diskUsageThreshold`. - Because compaction, journal replays can still write data to disks when a bookie is readonly. - default: 1.2 * `logSizeLimit` - - param: minUsableSizeForHighPriorityWrites - description: | - Minimum safe usable size to be available in ledger directory for bookie to accept high priority writes even it is in readonly mode. - default: 1.2 * `logSizeLimit` - - param: flushInterval - description: When entryLogPerLedgerEnabled is enabled, checkpoint doesn't happens when a new active entrylog is created / previous one is rolled over. Instead SyncThread checkpoints periodically with 'flushInterval' delay (in milliseconds) in between executions. Checkpoint flushes both ledger entryLogs and ledger index pages to disk. Flushing entrylog and index files will introduce much random disk I/O. If separating journal dir and ledger dirs each on different devices, flushing would not affect performance. But if putting journal dir and ledger dirs on same device, performance degrade significantly on too frequent flushing. You can consider increment flush interval to get better performance, but you need to pay more time on bookie server restart after failure. This config is used only when entryLogPerLedgerEnabled is enabled. - default: 10000 - - param: allowStorageExpansion - description: Allow the expansion of bookie storage capacity. Newly added ledger and index directories must be empty. - default: 'false' - -- name: Entry log settings - params: - - param: logSizeLimit - description: Max file size of entry logger, in bytes. A new entry log file will be created when the old one reaches the file size limitation. - default: 2147483648 - - param: entryLogFilePreallocationEnabled - description: Enable/Disable entry logger preallocation - default: true - - param: flushEntrylogBytes - description: Entry log flush interval, in bytes. Setting this to 0 or less disables this feature and makes flush happen on log rotation. Flushing in smaller chunks but more frequently reduces spikes in disk I/O. Flushing too frequently may negatively affect performance. - default: 0 - - param: readBufferSizeBytes - description: The capacity allocated for [`BufferedReadChannel`](/api/org/apache/bookkeeper/bookie/BufferedReadChannel)s, in bytes. - default: 512 - - param: writeBufferSizeBytes - description: The number of bytes used as capacity for the write buffer. - default: 65536 - - param: entryLogPerLedgerEnabled - description: Specifies if entryLog per ledger is enabled/disabled. If it is enabled, then there would be a active entrylog for each ledger. It would be ideal to enable this feature if the underlying storage device has multiple DiskPartitions or SSD and if in a given moment, entries of fewer number of active ledgers are written to the bookie. - default: false - - param: entrylogMapAccessExpiryTimeInSeconds - description: config specifying if the entrylog per ledger is enabled, then the amount of time EntryLogManagerForEntryLogPerLedger should wait for closing the entrylog file after the last addEntry call for that ledger, if explicit writeclose for that ledger is not received. - default: 300 - - param: maximumNumberOfActiveEntryLogs - description: in entryLogPerLedger feature, this specifies the maximum number of entrylogs that can be active at a given point in time. If there are more number of active entryLogs then the maximumNumberOfActiveEntryLogs then the entrylog will be evicted from the cache. - default: 500 - - param: entryLogPerLedgerCounterLimitsMultFactor - description: in EntryLogManagerForEntryLogPerLedger, this config value specifies the metrics cache size limits in multiples of entrylogMap cache size limits. - default: 10 - -- name: Entry log compaction settings - params: - - param: compactionRate - description: The rate at which compaction will read entries. The unit is adds per second. - default: 1000 - - param: minorCompactionThreshold - description: Threshold of minor compaction. For those entry log files whose remaining size percentage reaches below this threshold will be compacted in a minor compaction. If it is set to less than zero, the minor compaction is disabled. - default: 0.2 - - param: minorCompactionInterval - description: Interval to run minor compaction, in seconds. If it is set to less than zero, the minor compaction is disabled. - default: 3600 - - param: compactionMaxOutstandingRequests - description: Set the maximum number of entries which can be compacted without flushing. When compacting, the entries are written to the entrylog and the new offsets are cached in memory. Once the entrylog is flushed the index is updated with the new offsets. This parameter controls the number of entries added to the entrylog before a flush is forced. A higher value for this parameter means more memory will be used for offsets. Each offset consists of 3 longs. This parameter should *not* be modified unless you know what you're doing. - default: 100000 - - param: majorCompactionThreshold - description: Threshold of major compaction. For those entry log files whose remaining size percentage reaches below this threshold will be compacted in a major compaction. Those entry log files whose remaining size percentage is still higher than the threshold will never be compacted. If it is set to less than zero, the minor compaction is disabled. - default: 0.8 - - param: majorCompactionInterval - description: Interval to run major compaction, in seconds. If it is set to less than zero, the major compaction is disabled. - default: 86400 - - param: isThrottleByBytes - description: Throttle compaction by bytes or by entries. - default: 'false' - - param: compactionRateByEntries - description: Set the rate at which compaction will read entries. The unit is adds per second. - default: 1000 - - param: compactionRateByBytes - description: Set the rate at which compaction will read entries. The unit is bytes added per second. - default: 1000000 - - param: useTransactionalCompaction - description: | - Flag to enable/disable transactional compaction. If it is set to true, it will use transactional compaction, which uses - new entry log files to store entries after compaction; otherwise, it will use normal compaction, which shares same entry - log file with normal add operations. - default: 'false' - -- name: Garbage collection settings - params: - - param: gcWaitTime - description: How long the interval to trigger next garbage collection, in milliseconds. Since garbage collection is running in background, too frequent gc will heart performance. It is better to give a higher number of gc interval if there is enough disk capacity. - default: 1000 - - param: gcOverreplicatedLedgerWaitTime - description: How long the interval to trigger next garbage collection of overreplicated ledgers, in milliseconds. This should not be run very frequently since we read the metadata for all the ledgers on the bookie from zk. - default: 86400000 - - param: isForceGCAllowWhenNoSpace - description: Whether force compaction is allowed when the disk is full or almost full. Forcing GC may get some space back, but may also fill up disk space more quickly. This is because new log files are created before GC, while old garbage log files are deleted after GC. - default: 'false' - - param: verifyMetadataOnGC - description: Whether the bookie should double check if a ledger exists in metadata service prior to gc. - default: 'false' - -- name: Disk utilization - params: - - param: diskUsageThreshold - description: | - For each ledger dir, maximum disk space which can be used. Default is 0.95f. i.e. 95% of disk can be used at most after which nothing will be written to that partition. If all ledger dir partions are full, then bookie will turn to readonly mode if 'readOnlyModeEnabled=true' is set, else it will shutdown. Valid values should be in between 0 and 1 (exclusive). - default: 0.95 - - param: diskUsageWarnThreshold - description: The disk free space low water mark threshold. Disk is considered full when usage threshold is exceeded. Disk returns back to non-full state when usage is below low water mark threshold. This prevents it from going back and forth between these states frequently when concurrent writes and compaction are happening. This also prevent bookie from switching frequently between read-only and read-writes states in the same cases. - default: 0.95 - - param: diskUsageLwmThreshold - description: | - Set the disk free space low water mark threshold. Disk is considered full when usage threshold is exceeded. Disk returns back to non-full state when usage is below low water mark threshold. This prevents it from going back and forth between these states frequently when concurrent writes and compaction are happening. This also prevent bookie from switching frequently between read-only and read-writes states in the same cases. - default: 0.90 - - param: diskCheckInterval - description: Disk check interval in milliseconds. Interval to check the ledger dirs usage. - default: 10000 - -- name: Sorted Ledger Storage Settings - params: - - param: skipListSizeLimit - description: The skip list data size limitation (default 64MB) in EntryMemTable - default: 67108864 - - param: skipListArenaChunkSize - description: The number of bytes we should use as chunk allocation for org.apache.bookkeeper.bookie.SkipListArena - default: 4194304 - - param: skipListArenaMaxAllocSize - description: The max size we should allocate from the skiplist arena. Allocations larger than this should be allocated directly by the VM to avoid fragmentation. - default: 131072 - - param: openFileLimit - description: | - Max number of ledger index files could be opened in bookie server. If number of ledger index files reaches this limitation, bookie server started to swap some ledgers from memory to disk. Too frequent swap will affect performance. You can tune this number to gain performance according your requirements. - default: 20000 - - param: fileInfoCacheInitialCapacity - description: | - The minimum total size of the internal file info cache table. Providing a large enough estimate at construction time avoids the need for expensive resizing operations later, - but setting this value unnecessarily high wastes memory. The default value is `1/4` of `openFileLimit` if openFileLimit is positive, otherwise it is 64. - - param: fileInfoMaxIdleTime - description: | - The max idle time allowed for an open file info existed in the file info cache. If the file info is idle for a long time, exceed the given time period. The file info will be - evicted and closed. If the value is zero or negative, the file info is evicted only when opened files reached `openFileLimit`. - default: 0 - - param: fileInfoFormatVersionToWrite - description: | - The fileinfo format version to write. - Available formats are 0-1: - 0: Initial version - 1: persisting explicitLac is introduced - - By default, it is `0`. If you'd like to enable persisting ExplicitLac, you can set this config to 1 and also journalFormatVersionToWrite should be atleast 6. If there is mismatch then the serverconfig is considered invalid. - default: 0 - - param: pageSize - description: | - Size of a index page in ledger cache, in bytes. A larger index page can improve performance writing page to disk, which is efficent when you have small number of ledgers and these ledgers have similar number of entries. If you have large number of ledgers and each ledger has fewer entries, smaller index page would improve memory usage. - default: 8192 - - param: pageLimit - description: | - How many index pages provided in ledger cache. If number of index pages reaches this limitation, bookie server starts to swap some ledgers from memory to disk. You can increment this value when you found swap became more frequent. But make sure pageLimit*pageSize should not more than JVM max memory limitation, otherwise you would got OutOfMemoryException. In general, incrementing pageLimit, using smaller index page would gain bettern performance in lager number of ledgers with fewer entries case. If pageLimit is -1, bookie server will use 1/3 of JVM memory to compute the limitation of number of index pages. - default: "-1" - - param: numOfMemtableFlushThreads - description: | - When entryLogPerLedger is enabled SortedLedgerStorage flushes entries from memTable using OrderedExecutor having numOfMemtableFlushThreads number of threads. - default: 8 - -- name: DB Ledger Storage Settings - params: - - param: dbStorage_writeCacheMaxSizeMb - description: Size of write cache. Memory is allocated from JVM direct memory. Write cache is used for buffer entries before flushing into the entry log. For good performance, it should be big enough to hold a sub. - default: 512 - - param: dbStorage_readAheadCacheMaxSizeMb - description: Size of read cache. Memory is allocated from JVM direct memory. The read cache is pre-filled doing read-ahead whenever a cache miss happens. - default: 256 - - param: dbStorage_readAheadCacheBatchSize - description: How many entries to pre-fill in cache after a read cache miss - default: 1000 - - param: dbStorage_rocksDB_blockSize - description: | - Size of RocksDB block-cache. RocksDB is used for storing ledger indexes. - For best performance, this cache should be big enough to hold a significant portion of the index database which can reach ~2GB in some cases. - default: 268435456 - - param: dbStorage_rocksDB_writeBufferSizeMB - description: | - Size of RocksDB write buffer. RocksDB is used for storing ledger indexes. - default: 64 - - param: dbStorage_rocksDB_sstSizeInMB - description: | - Size of RocksDB sst file size in MB. RocksDB is used for storing ledger indexes. - default: 64 - - param: dbStorage_rocksDB_blockSize - default: 65536 - - param: dbStorage_rocksDB_bloomFilterBitsPerKey - default: 10 - - param: dbStorage_rocksDB_numLevels - default: "-1" - - param: dbStorage_rocksDB_numFilesInLevel0 - default: 10 - - param: dbStorage_rocksDB_maxSizeInLevel1MB - default: 256 - -- name: Metadata Service Settings - params: - - param: metadataServiceUri - description: metadata service uri that bookkeeper is used for loading corresponding metadata driver and resolving its metadata service location. - default: "zk+hierarchical://localhost:2181/ledgers" - - param: ledgerManagerFactoryClass - description: | - @Deprecated in favor of using `metadataServiceUri` - - The ledger manager factory class, which defines how ledgers are stored, managed, and garbage collected. See the [Ledger Manager](../../getting-started/concepts#ledger-manager) guide for more details. - default: hierarchical - - param: allowShadedLedgerManagerFactoryClass - description: | - Sometimes the bookkeeper server classes are shaded. The ledger manager factory classes might be relocated to be under other packages. - This would fail the clients using shaded factory classes since the factory classes are stored in cookies and used for verification. - Users can enable this flag to allow using shaded ledger manager factory classes to connect to a bookkeeper cluster. - default: 'false' - - param: shadedLedgerManagerFactoryClassPrefix - description: The shaded ledger manager factory prefix. This is used when `allowShadedLedgerManagerFactoryClass` is set to true. - default: 'dlshade.' - -- name: ZooKeeper Metadata Service Settings - params: - - param: zkLedgersRootPath - description: | - @Deprecated in favor of using `metadataServiceUri` - - Root Zookeeper path to store ledger metadata. This parameter is used by zookeeper-based ledger manager as a root znode to store all ledgers. - default: /ledgers - - param: zkServers - description: | - @Deprecated in favor of using `metadataServiceUri` - - A list of one of more servers on which Zookeeper is running. The server list can be comma separated values, for example `zkServers=zk1:2181,zk2:2181,zk3:2181`. - default: "localhost:2181" - - param: zkTimeout - description: ZooKeeper client session timeout in milliseconds. Bookie server will exit if it received SESSION_EXPIRED because it was partitioned off from ZooKeeper for more than the session timeout JVM garbage collection, disk I/O will cause SESSION_EXPIRED. Increment this value could help avoiding this issue. - default: 10000 - - param: zkRetryBackoffStartMs - description: The Zookeeper client backoff retry start time in millis. - default: 1000 - - param: zkRetryBackoffMaxMs - description: The Zookeeper client backoff retry max time in millis. - default: 10000 - - param: zkRequestRateLimit - description: The Zookeeper request limit. It is only enabled when setting a postivie value. - default: 0 - - param: zkEnableSecurity - description: Set ACLs on every node written on ZooKeeper, this way only allowed users will be able to read and write BookKeeper metadata stored on ZooKeeper. In order to make ACLs work you need to setup ZooKeeper JAAS authentication all the bookies and Client need to share the same user, and this is usually done using Kerberos authentication. See ZooKeeper documentation - default: 'false' - -- name: Statistics - params: - - param: enableStatistics - description: Whether statistics are enabled for the bookie. - default: true - - param: statsProviderClass - description: | - Stats provider class. - Options: - - Prometheus : org.apache.bookkeeper.stats.prometheus.PrometheusMetricsProvider - - Codahale : org.apache.bookkeeper.stats.codahale.CodahaleMetricsProvider - - Twitter Finagle : org.apache.bookkeeper.stats.twitter.finagle.FinagleStatsProvider - - Twitter Ostrich : org.apache.bookkeeper.stats.twitter.ostrich.OstrichProvider - - Twitter Science : org.apache.bookkeeper.stats.twitter.science.TwitterStatsProvider - default: org.apache.bookkeeper.stats.prometheus.PrometheusMetricsProvider - -- name: Prometheus Metrics Provider Settings - params: - - param: prometheusStatsHttpPort - description: default port for prometheus metrics exporter - default: 8000 - - param: prometheusStatsLatencyRolloverSeconds - description: latency stats rollover interval, in seconds - default: 60 - -- name: Codahale Metrics Provider Settings - params: - - param: codahaleStatsPrefix - description: metric name prefix, default is empty. - default: "" - - param: codahaleStatsOutputFrequencySeconds - description: the frequency that stats reporters report stats, in seconds. - default: 60 - - param: codahaleStatsGraphiteEndpoint - description: the graphite endpoint for reporting stats. see [graphite reporter](//metrics.dropwizard.io/3.1.0/manual/graphite/) for more details. - default: "null" - - param: codahaleStatsCSVEndpoint - description: the directory for reporting stats in csv format. see [csv reporter](//metrics.dropwizard.io/3.1.0/manual/core/#csv) for more details. - default: "null" - - param: codahaleStatsSlf4jEndpoint - description: the slf4j endpoint for reporting stats. see [slf4j reporter](//metrics.dropwizard.io/3.1.0/manual/core/#slf4j) for more details. - default: "null" - - param: codahaleStatsJmxEndpoint - description: the jmx endpoint for reporting stats. see [jmx reporter](//metrics.dropwizard.io/3.1.0/manual/core/#jmx) for more details. - -- name: Twitter Ostrich Metrics Provider - params: - - param: statsExport - description: Flag to control whether to expose ostrich metrics via a http endpoint configured by `statsHttpPort`. - default: "false" - - param: statsHttpPort - description: The http port of exposing ostrich stats if `statsExport` is set to true - default: 9002 - -- name: Twitter Science Metrics Provider - params: - - param: statsExport - description: Flag to control whether to expose metrics via a http endpoint configured by `statsHttpPort`. - default: "false" - - param: statsHttpPort - description: The http port of exposing stats if `statsExport` is set to true - default: 9002 - -- name: AutoRecovery general settings - params: - - param: autoRecoveryDaemonEnabled - description: Whether the bookie itself can start auto-recovery service also or not. - default: false - - param: digestType - description: The default digest type used for opening ledgers. - default: "CRC32" - - param: passwd - description: The default password used for opening ledgers. Default value is empty string. - default: "" - - param: enableDigestTypeAutodetection - description: The flag to enable/disable digest type auto-detection. If it is enabled, the bookkeeper client will ignore the provided digest type provided at `digestType` and the provided passwd provided at `passwd`. - default: true -- name: AutoRecovery placement settings - params: - - param: ensemblePlacementPolicy - description: | - The ensemble placement policy used for finding bookie for re-replicating entries. - - Options: - - org.apache.bookkeeper.client.RackawareEnsemblePlacementPolicy - - org.apache.bookkeeper.client.RegionAwareEnsemblePlacementPolicy - default: "org.apache.bookkeeper.client.RackawareEnsemblePlacementPolicy" - - param: reppDnsResolverClass - description: | - The DNS resolver class used for resolving network locations for bookies. The setting is used - when using either RackawareEnsemblePlacementPolicy and RegionAwareEnsemblePlacementPolicy. - default: "org.apache.bookkeeper.net.ScriptBasedMapping" - - param: networkTopologyScriptFileName - description: | - The bash script used by `ScriptBasedMapping` DNS resolver for resolving bookies' network locations. - - param: networkTopologyScriptNumberArgs - description: | - The max number of args used in the script provided at `networkTopologyScriptFileName`. - - param: minNumRacksPerWriteQuorum - description: | - minimum number of racks per write quorum. RackawareEnsemblePlacementPolicy will try to get bookies from atleast 'minNumRacksPerWriteQuorum' racks for a writeQuorum. - - param: enforceMinNumRacksPerWriteQuorum - description: | - 'enforceMinNumRacksPerWriteQuorum' enforces RackawareEnsemblePlacementPolicy to pick bookies from 'minNumRacksPerWriteQuorum' racks for a writeQuorum. If it cann't find bookie then it would throw BKNotEnoughBookiesException instead of picking random one. - -- name: AutoRecovery auditor settings - params: - - param: auditorPeriodicBookieCheckInterval - description: The time interval between auditor bookie checks, in seconds. The auditor bookie check checks ledger metadata to see which bookies should contain entries for each ledger. If a bookie that should contain entries is unavailable, then the ledger containing that entry is marked for recovery. Setting this to 0 disables the periodic check. Bookie checks will still run when a bookie fails. The default is once per day. - default: 86400 - - param: auditorPeriodicCheckInterval - description: | - The time interval, in seconds, at which the auditor will check all ledgers in the cluster. By default this runs once a week. - - Set this to 0 to disable the periodic check completely. Note that periodic checking will put extra load on the cluster, so it should not be run more frequently than once a day. - default: 604800 - - param: auditorLedgerVerificationPercentage - description: | - The percentage of a ledger (fragment)'s entries will be verified before claiming a fragment as missing. If it is 0, it only verifies the first and last entries of a given fragment. - default: 0 - - param: lostBookieRecoveryDelay - description: How long to wait, in seconds, before starting autorecovery of a lost bookie. - default: 0 - - param: storeSystemTimeAsLedgerUnderreplicatedMarkTime - description: Enable the Auditor to use system time as underreplicated ledger mark time. If this is enabled, Auditor will write a ctime field into the underreplicated ledger znode. - -- name: AutoRecovery replication worker settings - params: - - param: rereplicationEntryBatchSize - description: The number of entries that a replication will rereplicate in parallel. - default: 10 - - param: openLedgerRereplicationGracePeriod - description: The grace period, in seconds, that the replication worker waits before fencing and replicating a ledger fragment that's still being written to upon bookie failure. - default: 30 - - param: lockReleaseOfFailedLedgerGracePeriod - description: the grace period so that if the replication worker fails to replicate a underreplicatedledger for more than ReplicationWorker.MAXNUMBER_REPLICATION_FAILURES_ALLOWED_BEFORE_DEFERRING number of times, then instead of releasing the lock immediately after failed attempt, it will hold under replicated ledger lock for this grace period and then it will release the lock. - default: 60 - - param: rwRereplicateBackoffMs - description: The time to backoff when replication worker encounters exceptions on replicating a ledger, in milliseconds. - default: 5000 - - diff --git a/site/_data/popovers.yaml b/site/_data/popovers.yaml deleted file mode 100644 index 8b5347c95d7..00000000000 --- a/site/_data/popovers.yaml +++ /dev/null @@ -1,22 +0,0 @@ -- term: entry - d: An entry is a sequence of bytes (plus some metadata) written to a BookKeeper ledger. Entries are also known as records. -- term: ledger - d: A ledger is a sequence of entries written to BookKeeper. Entries are written sequentially to ledgers and at most once, giving ledgers append-only semantics. -- term: bookie - d: | - A bookie is an individual BookKeeper storage server. - - Bookies store the content of ledgers and act as a distributed ensemble. -- term: rereplication - d: A subsystem that runs in the background on bookies to ensure that ledgers are fully replicated even if one bookie from the ensemble is down. -- term: striping - d: | - Striping is the process of distributing BookKeeper ledgers to sub-groups of bookies rather than to all bookies in a BookKeeper ensemble. - - Striping is essential to ensuring fast performance. -- term: journal - d: A journal file stores BookKeeper transaction logs. -- term: fencing - d: When a reader forces a ledger to close, preventing any further entries from being written to the ledger. -- term: record - d: A record is a sequence of bytes (plus some metadata) written to a BookKeeper ledger. Records are also known as entries. \ No newline at end of file diff --git a/site/_data/releaseNotesSummary.template b/site/_data/releaseNotesSummary.template deleted file mode 100644 index 0468704ce4c..00000000000 --- a/site/_data/releaseNotesSummary.template +++ /dev/null @@ -1,4 +0,0 @@ - -### [date] Release {{ site.latest_version }} available - -[INSERT SUMMARY] diff --git a/site/_data/sidebar.yaml b/site/_data/sidebar.yaml deleted file mode 100644 index f4bed795676..00000000000 --- a/site/_data/sidebar.yaml +++ /dev/null @@ -1,78 +0,0 @@ -groups: -- name: Getting started - dir: getting-started - docs: - - name: Installation - endpoint: installation - - name: Run bookies locally - endpoint: run-locally - - name: Concepts and architecture - endpoint: concepts -- name: Deployment - dir: deployment - docs: - - name: Manual deployment - endpoint: manual - - name: BookKeeper on DC/OS - endpoint: dcos - - name: BookKeeper on Kubernetes - endpoint: kubernetes -- name: Administration - dir: admin - docs: - - name: BookKeeper administration - endpoint: bookies - - name: AutoRecovery - endpoint: autorecovery - - name: Metric collection - endpoint: metrics - - name: Upgrade - endpoint: upgrade - - name: BookKeeper Admin REST API - endpoint: http - #- name: Geo-replication - # endpoint: geo-replication - #- name: Customized placement policies - # endpoint: placement - #- name: Performance tuning - # endpoint: perf -- name: API - dir: api - docs: - - name: Overview - endpoint: overview - - name: Ledger API - endpoint: ledger-api - - name: Advanced Ledger API - endpoint: ledger-adv-api - - name: DistributedLog - endpoint: distributedlog-api - - name: Java API Docs - endpoint: javadoc -- name: Security - dir: security - docs: - - name: Overview - endpoint: overview - - name: TLS Authentication - endpoint: tls - - name: SASL Authentication - endpoint: sasl - - name: ZooKeeper Authentication - endpoint: zookeeper -- name: Development - dir: development - docs: - - name: BookKeeper protocol - endpoint: protocol - #- name: Codebase - # endpoint: codebase -- name: Reference - dir: reference - docs: - - name: Configuration - endpoint: config - - name: Command-line tools - endpoint: cli - - name: Metrics - endpoint: metrics diff --git a/site/_includes/cli.html b/site/_includes/cli.html deleted file mode 100644 index a226107bd32..00000000000 --- a/site/_includes/cli.html +++ /dev/null @@ -1,45 +0,0 @@ -{% assign tool_name = include.id %} -{% assign tool = site.data.cli[tool_name] %} -{% assign commands = tool.commands %} - -

          {{ tool_name }}

          - -{{ tool.description | markdownify }} - -{% if tool.env_vars %} -

          Environment variables

          - - - - - - - - - - - {% for var in tool.env_vars %} - - - - - - {% endfor %} - -
          Environment variableDescriptionDefault
          {{ var.name }}{{ var.description | markdownify }}{% if var.default %}${bookkeeperHome}/{{ var.default }}{% endif %}
          -{% endif %} - -

          Commands

          - -{% for command in commands %} -

          {{ command.name }}

          - -{{ command.description | markdownify }} - -
          Usage
          - -```shell -$ {{ tool.root_path }}/{{ tool_name }} {{ command.name }} {% if command.argument %}{{ command.argument }}{% endif %} -``` -{% unless forloop.last %}
          {% endunless %} -{% endfor %} diff --git a/site/_includes/config.html b/site/_includes/config.html deleted file mode 100644 index 6d1e342aec4..00000000000 --- a/site/_includes/config.html +++ /dev/null @@ -1,25 +0,0 @@ -{% assign configs = site.data.config[include.id] %} -{% for group in configs.groups %} -

          {{ group.name }}

          - -{% if group.description %}{{ group.description | markdownify }}{% endif %} - - - - - - - - - - - {% for param in group.params %} - - - - - - {% endfor %} - -
          ParameterDescriptionDefault
          {{ param.param }}{{ param.description | markdownify }}{% if param.default %}{{ param.default }}{% endif %}
          -{% endfor %} diff --git a/site/_includes/download-button.html b/site/_includes/download-button.html deleted file mode 100644 index 4fd0753a9b6..00000000000 --- a/site/_includes/download-button.html +++ /dev/null @@ -1,6 +0,0 @@ - - - - - Download BookKeeper {{ site.latest_version }} - diff --git a/site/_includes/footer.html b/site/_includes/footer.html deleted file mode 100644 index 63450d384ac..00000000000 --- a/site/_includes/footer.html +++ /dev/null @@ -1,12 +0,0 @@ -
          -
          -
          -

          - Copyright © 2016 - {{ 'now' | date: '%Y' }} The Apache Software Foundation,
          licensed under the Apache License, version 2.0. -

          -

          - Apache BookKeeper, BookKeeper®, Apache®, the Apache feature logo, and the Apache BookKeeper logo are either registered trademarks or trademarks of The Apache Software Foundation. -

          -
          -
          -
          diff --git a/site/_includes/google-analytics.html b/site/_includes/google-analytics.html deleted file mode 100644 index d081572ee41..00000000000 --- a/site/_includes/google-analytics.html +++ /dev/null @@ -1,26 +0,0 @@ - - diff --git a/site/_includes/head.html b/site/_includes/head.html deleted file mode 100644 index bd929244b42..00000000000 --- a/site/_includes/head.html +++ /dev/null @@ -1,20 +0,0 @@ -{{ site.title }} - {{ page.title }} - - - - - - - - - - - - - - \ No newline at end of file diff --git a/site/_includes/javadoc-button.html b/site/_includes/javadoc-button.html deleted file mode 100644 index 31d37386aa3..00000000000 --- a/site/_includes/javadoc-button.html +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - Javadoc - - diff --git a/site/_includes/javascript.html b/site/_includes/javascript.html deleted file mode 100644 index e6b54ece641..00000000000 --- a/site/_includes/javascript.html +++ /dev/null @@ -1 +0,0 @@ - diff --git a/site/_includes/navbar.html b/site/_includes/navbar.html deleted file mode 100644 index 0539c718ffc..00000000000 --- a/site/_includes/navbar.html +++ /dev/null @@ -1,111 +0,0 @@ -{% capture docs_url %}{{ site.baseurl }}docs/{% endcapture %} - diff --git a/site/_includes/popovers.html b/site/_includes/popovers.html deleted file mode 100644 index 0aa83a1d689..00000000000 --- a/site/_includes/popovers.html +++ /dev/null @@ -1,16 +0,0 @@ -{% assign popovers = site.data.popovers %} -{% for popover in popovers %} -
          - {{ popover.d | markdownify }} -
          -{% endfor %} - - \ No newline at end of file diff --git a/site/_includes/shell.html b/site/_includes/shell.html deleted file mode 100644 index ef7caf0f8a7..00000000000 --- a/site/_includes/shell.html +++ /dev/null @@ -1,37 +0,0 @@ -{% assign commands = site.data.cli.shell.commands %} - -{% for command in commands %} -

          {{ command.name }}

          - -{{ command.description | markdownify }} - -
          Usage
          - -```shell -$ bookkeeper-server/bin/bookkeeper shell {{ command.name }}{% if command.argument %} \ - {{ command.argument }}{% endif %}{% if command.options %} \ - {% endif %} -``` - -{% if command.options %} -
          Options
          - - - - - - - - - - {% for option in command.options %} - - - - - {% endfor %} - -
          FlagDescription
          {{ option.flag }}{{ option.description }}
          -{% endif %} -{% unless forloop.last %}
          {% endunless %} -{% endfor %} diff --git a/site/_includes/sidebar.html b/site/_includes/sidebar.html deleted file mode 100644 index d3d9de47b6e..00000000000 --- a/site/_includes/sidebar.html +++ /dev/null @@ -1,24 +0,0 @@ -{% unless page.no_sidebar %} - -{% endunless %} diff --git a/site/_includes/system-requirements.md b/site/_includes/system-requirements.md deleted file mode 100644 index 69e372f7cf1..00000000000 --- a/site/_includes/system-requirements.md +++ /dev/null @@ -1,8 +0,0 @@ -The number of bookies you should run in a BookKeeper cluster depends on the quorum mode that you've chosen, the desired throughput, and the number of clients using the cluster simultaneously. - -Quorum type | Number of bookies -:-----------|:----------------- -Self-verifying quorum | 3 -Generic | 4 - -Increasing the number of bookies will enable higher throughput, and there is **no upper limit** on the number of bookies. \ No newline at end of file diff --git a/site/_includes/table-of-contents.html b/site/_includes/table-of-contents.html deleted file mode 100644 index 053d812ef1c..00000000000 --- a/site/_includes/table-of-contents.html +++ /dev/null @@ -1,6 +0,0 @@ -{% if page.toc %} -
          -

          {{ page.title }}

          - {{ content | toc_only }} -
          -{% endif %} diff --git a/site/_layouts/community.html b/site/_layouts/community.html deleted file mode 100644 index b42b870637e..00000000000 --- a/site/_layouts/community.html +++ /dev/null @@ -1,48 +0,0 @@ ---- -layout: default ---- - -
          -
          -
          -
          - - - {% if page.subtitle %}

          {{ page.subtitle }}

          {% endif %} -
          - -
          - -
          -
          - {{ content }} -
          - - {% if page.next or page.prev %} - - {% endif %} -
          -
          -
          -
          \ No newline at end of file diff --git a/site/_layouts/default.html b/site/_layouts/default.html deleted file mode 100644 index 5e544667301..00000000000 --- a/site/_layouts/default.html +++ /dev/null @@ -1,20 +0,0 @@ - - - - {% include head.html %} - - -
          - {% include navbar.html %} - - {{ content }} -
          - - {% include footer.html %} - - - {% include javascript.html %} - {% if jekyll.environment == "production" %} - {% include google-analytics.html %} - {% endif %} - diff --git a/site/_layouts/docs.html b/site/_layouts/docs.html deleted file mode 100644 index d2313beb093..00000000000 --- a/site/_layouts/docs.html +++ /dev/null @@ -1,62 +0,0 @@ ---- -layout: default ---- - -
          -
          -
          -
          - {% include sidebar.html %} -
          -
          - -
          -
          - - - {% if page.subtitle %}

          {{ page.subtitle }}

          {% endif %} -
          - -
          - -
          -
          - {{ content }} -
          - - {% if page.next or page.prev %} - - {% endif %} -
          -
          - -
          - {% unless page.toc_disable %} - {% include table-of-contents.html %} - {% endunless %} -
          -
          -
          - -{% include popovers.html %} diff --git a/site/_layouts/javadoc-overview.html b/site/_layouts/javadoc-overview.html deleted file mode 100644 index 71220a5de63..00000000000 --- a/site/_layouts/javadoc-overview.html +++ /dev/null @@ -1,3 +0,0 @@ - - {{ content }} - diff --git a/site/_plugins/popovers.rb b/site/_plugins/popovers.rb deleted file mode 100644 index 6bac5bc2e6c..00000000000 --- a/site/_plugins/popovers.rb +++ /dev/null @@ -1,22 +0,0 @@ -require 'yaml' - -TERMS = YAML.load(File.open("_data/popovers.yaml")) - -module Jekyll - class RenderPopover < Liquid::Tag - def initialize(tag_name, text, tokens) - @original_term = text.strip.split(' ').join(' ') - @term = @original_term.gsub(' ', '-').downcase - @term = 'entry' if @term == 'entries' - @term = 'fencing' if @term == 'fence' - @term = 'striping' if @term == 'stripe' or @term == 'stripedma' - @term = @term[0...-1] if @term.end_with? 's' - end - - def render(ctx) - return "#{@original_term}" - end - end -end - -Liquid::Template.register_tag('pop', Jekyll::RenderPopover) diff --git a/site/_sass/_mixins.sass b/site/_sass/_mixins.sass deleted file mode 100644 index bb011e35458..00000000000 --- a/site/_sass/_mixins.sass +++ /dev/null @@ -1,8 +0,0 @@ -=sticky-footer - .body - display: flex - min-height: 100vh - flex-direction: column - - .main - flex: 1 diff --git a/site/_sass/_navbar.sass b/site/_sass/_navbar.sass deleted file mode 100644 index 785e926e31d..00000000000 --- a/site/_sass/_navbar.sass +++ /dev/null @@ -1,15 +0,0 @@ -.bk-topnav - width: 90% - margin: 0 auto - position: sticky - top: 0 - left: 0 - right: 0 - display: block - - a.bk-brand - font-size: 2rem - margin-right: 2rem - - img - margin-right: 1rem \ No newline at end of file diff --git a/site/_sass/_syntax.sass b/site/_sass/_syntax.sass deleted file mode 100644 index 1a6b2cfeda0..00000000000 --- a/site/_sass/_syntax.sass +++ /dev/null @@ -1,120 +0,0 @@ -blockquote - code - font-size: 1.1rem - background-color: lighten($sx-light-gray, 30%) - -.highlighter-rouge - & + blockquote, & + h1, & + h2, & + h3, & + h4, & + h5, & + p - margin-top: 1.5rem - - // Language-specific overrides - &.language-shell, &.language-bash - .highlight - .nb, .o, .k - // This prevents keywords like cd and help from being highlighted - color: $sx-light-gray - - pre.highlight - padding: $sx-padding - color: $sx-light-gray - background-color: $sx-bg-color - border-radius: $sx-border-radius - border: $sx-border - overflow: $sx-overflow - - code - font-size: .9rem - - .w - color: $sx-light-gray - background-color: $sx-bg-color - - .n - color: $sx-red - - .err - color: #002b36 - background-color: #dc322f - - .c, .cd, .cm, .c1, .cs - color: $sx-comment - - .gp - color: $sx-red - - .cp - color: $sx-tan - - .nd - color: $sx-purple - - .nt - color: $sx-tan - - .o - color: $sx-tan - - .ow - color: $sx-dark-gray - - .p, .pi - color: $sx-dark-gray - - .bp - color: $sx-dark-gray - - .gi - color: $sx-olive - - .gd - color: #dc322f - - .gh - color: $sx-7 - background-color: $sx-bg-color - font-weight: bold - - .k, .kn, .kp, .kr, .kv - color: $sx-purple - - .kc - color: $sx-red - - .kt - color: $sx-purple - - .kd - color: $sx-magenta - - .s, .sb, .sc, .sd, .s2, .sh, .sx, .s1 - color: $sx-olive - - .sr - color: #2aa198 - - .si, .se - color: $sx-magenta - - .nn - color: $sx-tan - - .nf - color: $sx-purple - - .nc, .nb - color: $sx-tan - - .no - color: $sx-tan - - .na - color: $sx-7 - - .m, .il, .mo, .mb, .mx - color: $sx-olive - - .mi, .mf, .mh - color: $sx-tan - - .ss - color: $sx-olive diff --git a/site/_sass/_typography.sass b/site/_sass/_typography.sass deleted file mode 100644 index 690761cf6f1..00000000000 --- a/site/_sass/_typography.sass +++ /dev/null @@ -1,11 +0,0 @@ -$base-docs-font-size: 1.1rem - -.content - h2 - font-size: $base-docs-font-size * 1.5 !important - - h3 - font-size: $base-docs-font-size * 1.25 !important - - p, td, li - font-size: $base-docs-font-size \ No newline at end of file diff --git a/site/_sass/_variables.sass b/site/_sass/_variables.sass deleted file mode 100644 index 67c9a5eb254..00000000000 --- a/site/_sass/_variables.sass +++ /dev/null @@ -1,32 +0,0 @@ -@import url(https://fonts.googleapis.com/css?family=Lato|Source+Code+Pro) - -$base-margin: 1rem -$navbar-height: 5rem -$docs-container-vertical-margin: 3 * $base-margin -$docs-title-margin: 0 * $base-margin -$em-right-margin: 0.2rem - -// Bulma variables -$tan: #c88e56 -$blue: #1976d2 -$primary: $tan -$twitter-blue: #4099ff -$almost-black: rgb(35, 41, 55) - -$family-sans-serif: 'Lato', -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", "Oxygen", "Ubuntu", "Cantarell", "Fira Sans", "Droid Sans", "Helvetica Neue", "Helvetica", "Arial", sans-serif -$family-monospace: 'Source Code Pro', monospace - -$sx-border-radius: .25rem -$sx-border: none -$sx-bg-color: $almost-black -$sx-light-gray: lighten(#93a1a1, 15%) -$sx-dark-gray: lighten(#657b83, 30%) -$sx-olive: #859900 -$sx-tan: $tan -$sx-purple: lighten(#6c71c4, 8%) -$sx-magenta: #d33682 -$sx-red: lighten(#cb4b16, 10%) -$sx-7: lighten(#268bd2, 5%) -$sx-comment: darken($sx-dark-gray, 20%) -$sx-overflow: scroll -$sx-padding: 0 diff --git a/site/_sass/vendor/bulma/bulma.sass b/site/_sass/vendor/bulma/bulma.sass deleted file mode 100755 index e8e9abf48a5..00000000000 --- a/site/_sass/vendor/bulma/bulma.sass +++ /dev/null @@ -1,8 +0,0 @@ -@charset "utf-8" -/*! bulma.io v0.4.4 | MIT License | github.com/jgthms/bulma */ -@import "sass/utilities/_all" -@import "sass/base/_all" -@import "sass/elements/_all" -@import "sass/components/_all" -@import "sass/grid/_all" -@import "sass/layout/_all" diff --git a/site/_sass/vendor/bulma/sass/base/_all.sass b/site/_sass/vendor/bulma/sass/base/_all.sass deleted file mode 100755 index e913d6ba394..00000000000 --- a/site/_sass/vendor/bulma/sass/base/_all.sass +++ /dev/null @@ -1,5 +0,0 @@ -@charset "utf-8" - -@import "minireset.sass" -@import "generic.sass" -@import "helpers.sass" diff --git a/site/_sass/vendor/bulma/sass/base/generic.sass b/site/_sass/vendor/bulma/sass/base/generic.sass deleted file mode 100755 index f1b85968b69..00000000000 --- a/site/_sass/vendor/bulma/sass/base/generic.sass +++ /dev/null @@ -1,106 +0,0 @@ -html - background-color: $body-background - font-size: $body-size - -moz-osx-font-smoothing: grayscale - -webkit-font-smoothing: antialiased - min-width: 300px - overflow-x: hidden - overflow-y: scroll - text-rendering: $render-mode - -article, -aside, -figure, -footer, -header, -hgroup, -section - display: block - -body, -button, -input, -select, -textarea - font-family: $family-primary - -code, -pre - -moz-osx-font-smoothing: auto - -webkit-font-smoothing: auto - font-family: $family-code - -body - color: $text - font-size: 1rem - font-weight: $weight-normal - line-height: 1.5 - -// Inline - -a - color: $link - cursor: pointer - text-decoration: none - transition: none $speed $easing - &:hover - color: $link-hover - -code - background-color: $code-background - color: $code - font-size: 0.8em - font-weight: normal - padding: 0.25em 0.5em 0.25em - -hr - background-color: $border - border: none - display: block - height: 1px - margin: 1.5rem 0 - -img - height: auto - max-width: 100% - -input[type="checkbox"], -input[type="radio"] - vertical-align: baseline - -small - font-size: 0.875em - -span - font-style: inherit - font-weight: inherit - -strong - color: $text-strong - font-weight: $weight-bold - -// Block - -pre - background-color: $pre-background - color: $pre - font-size: 0.8em - white-space: pre - word-wrap: normal - code - +overflow-touch - background: none - color: inherit - display: block - font-size: 1em - overflow-x: auto - padding: 1.25rem 1.5rem - -table - width: 100% - td, - th - text-align: left - vertical-align: top - th - color: $text-strong diff --git a/site/_sass/vendor/bulma/sass/base/helpers.sass b/site/_sass/vendor/bulma/sass/base/helpers.sass deleted file mode 100755 index 84d85e1d621..00000000000 --- a/site/_sass/vendor/bulma/sass/base/helpers.sass +++ /dev/null @@ -1,136 +0,0 @@ -// Display - -$displays: 'block' 'flex' 'inline' 'inline-block' 'inline-flex' - -@each $display in $displays - .is-#{$display} - display: #{$display} - +mobile - .is-#{$display}-mobile - display: #{$display} !important - +tablet - .is-#{$display}-tablet - display: #{$display} !important - +tablet-only - .is-#{$display}-tablet-only - display: #{$display} !important - +touch - .is-#{$display}-touch - display: #{$display} !important - +desktop - .is-#{$display}-desktop - display: #{$display} !important - +desktop-only - .is-#{$display}-desktop-only - display: #{$display} !important - +widescreen - .is-#{$display}-widescreen - display: #{$display} !important - -// Float - -.is-clearfix - +clearfix - -.is-pulled-left - float: left !important - -.is-pulled-right - float: right !important - -// Overflow - -.is-clipped - overflow: hidden !important - -// Overlay - -.is-overlay - +overlay - -// Text - -@each $size in $sizes - $i: index($sizes, $size) - .is-size-#{$i} - font-size: $size - +mobile - .is-size-#{$i}-mobile - font-size: $size - +tablet - .is-size-#{$i}-tablet - font-size: $size - +touch - .is-size-#{$i}-touch - font-size: $size - +desktop - .is-size-#{$i}-desktop - font-size: $size - +widescreen - .is-size-#{$i}-widescreen - font-size: $size - +fullhd - .is-size-#{$i}-fullhd - font-size: $size - -.has-text-centered - text-align: center !important - -.has-text-left - text-align: left !important - -.has-text-right - text-align: right !important - -@each $name, $pair in $colors - $color: nth($pair, 1) - .has-text-#{$name} - color: $color - a.has-text-#{$name} - &:hover, - &:focus - color: darken($color, 10%) - -// Visibility - -.is-hidden - display: none !important - -+mobile - .is-hidden-mobile - display: none !important - -+tablet - .is-hidden-tablet - display: none !important - -+tablet-only - .is-hidden-tablet-only - display: none !important - -+touch - .is-hidden-touch - display: none !important - -+desktop - .is-hidden-desktop - display: none !important - -+desktop-only - .is-hidden-desktop-only - display: none !important - -+widescreen - .is-hidden-widescreen - display: none !important - -// Other - -.is-marginless - margin: 0 !important - -.is-paddingless - padding: 0 !important - -.is-unselectable - +unselectable diff --git a/site/_sass/vendor/bulma/sass/base/minireset.sass b/site/_sass/vendor/bulma/sass/base/minireset.sass deleted file mode 100755 index 98ad8147d65..00000000000 --- a/site/_sass/vendor/bulma/sass/base/minireset.sass +++ /dev/null @@ -1,80 +0,0 @@ -/*! minireset.css v0.0.2 | MIT License | github.com/jgthms/minireset.css */ -// Blocks -html, -body, -p, -ol, -ul, -li, -dl, -dt, -dd, -blockquote, -figure, -fieldset, -legend, -textarea, -pre, -iframe, -hr, -h1, -h2, -h3, -h4, -h5, -h6 - margin: 0 - padding: 0 - -// Headings -h1, -h2, -h3, -h4, -h5, -h6 - font-size: 100% - font-weight: normal - -// List -ul - list-style: none - -// Form -button, -input, -select, -textarea - margin: 0 - -// Box sizing -html - box-sizing: border-box - -* - box-sizing: inherit - &:before, - &:after - box-sizing: inherit - -// Media -img, -embed, -object, -audio, -video - max-width: 100% - -// Iframe -iframe - border: 0 - -// Table -table - border-collapse: collapse - border-spacing: 0 - -td, -th - padding: 0 - text-align: left diff --git a/site/_sass/vendor/bulma/sass/components/_all.sass b/site/_sass/vendor/bulma/sass/components/_all.sass deleted file mode 100755 index ed5276e290b..00000000000 --- a/site/_sass/vendor/bulma/sass/components/_all.sass +++ /dev/null @@ -1,15 +0,0 @@ -@charset "utf-8" - -@import "breadcrumb.sass" -@import "card.sass" -@import "dropdown.sass" -@import "level.sass" -@import "media.sass" -@import "menu.sass" -@import "message.sass" -@import "modal.sass" -@import "nav.sass" -@import "navbar.sass" -@import "pagination.sass" -@import "panel.sass" -@import "tabs.sass" diff --git a/site/_sass/vendor/bulma/sass/components/breadcrumb.sass b/site/_sass/vendor/bulma/sass/components/breadcrumb.sass deleted file mode 100755 index e898d258133..00000000000 --- a/site/_sass/vendor/bulma/sass/components/breadcrumb.sass +++ /dev/null @@ -1,66 +0,0 @@ -.breadcrumb - +block - +unselectable - align-items: stretch - display: flex - font-size: $size-normal - overflow: hidden - overflow-x: auto - white-space: nowrap - a - align-items: center - color: $text-light - display: flex - justify-content: center - padding: 0.5em 0.75em - &:hover - color: $link-hover - li - align-items: center - display: flex - &.is-active - a - color: $text-strong - cursor: default - pointer-events: none - & + li:before - color: $text - content: '\0002f' - ul, ol - align-items: center - display: flex - flex-grow: 1 - flex-shrink: 0 - justify-content: flex-start - .icon - &:first-child - margin-right: 0.5em - &:last-child - margin-left: 0.5em - // Alignment - &.is-centered - ol, ul - justify-content: center - &.is-right - ol, ul - justify-content: flex-end - // Sizes - &.is-small - font-size: $size-small - &.is-medium - font-size: $size-medium - &.is-large - font-size: $size-large - // Styles - &.has-arrow-separator - li + li:before - content: '\02192' - &.has-bullet-separator - li + li:before - content: '\02022' - &.has-dot-separator - li + li:before - content: '\000b7' - &.has-succeeds-separator - li + li:before - content: '\0227B' diff --git a/site/_sass/vendor/bulma/sass/components/card.sass b/site/_sass/vendor/bulma/sass/components/card.sass deleted file mode 100755 index 999f56cb948..00000000000 --- a/site/_sass/vendor/bulma/sass/components/card.sass +++ /dev/null @@ -1,65 +0,0 @@ -$card: $text !default -$card-background: $white !default -$card-shadow: 0 2px 3px rgba($black, 0.1), 0 0 0 1px rgba($black, 0.1) !default - -$card-header: $text-strong !default -$card-header-shadow: 0 1px 2px rgba($black, 0.1) !default -$card-header-weight: $weight-bold !default - -$card-footer-border: $border !default - -.card - background-color: $card-background - box-shadow: $card-shadow - color: $card - max-width: 100% - position: relative - -.card-header - align-items: stretch - box-shadow: $card-header-shadow - display: flex - -.card-header-title - align-items: center - color: $card-header - display: flex - flex-grow: 1 - font-weight: $card-header-weight - padding: 0.75rem - -.card-header-icon - align-items: center - cursor: pointer - display: flex - justify-content: center - padding: 0.75rem - -.card-image - display: block - position: relative - -.card-content - padding: 1.5rem - -.card-footer - border-top: 1px solid $card-footer-border - align-items: stretch - display: flex - -.card-footer-item - align-items: center - display: flex - flex-basis: 0 - flex-grow: 1 - flex-shrink: 0 - justify-content: center - padding: 0.75rem - &:not(:last-child) - border-right: 1px solid $card-footer-border - -// Combinations - -.card - .media:not(:last-child) - margin-bottom: 0.75rem diff --git a/site/_sass/vendor/bulma/sass/components/dropdown.sass b/site/_sass/vendor/bulma/sass/components/dropdown.sass deleted file mode 100755 index b4701f55920..00000000000 --- a/site/_sass/vendor/bulma/sass/components/dropdown.sass +++ /dev/null @@ -1,70 +0,0 @@ -$dropdown-content-background: $white !default -$dropdown-content-arrow: $link !default -$dropdown-content-offset: 4px !default -$dropdown-content-radius: $radius !default -$dropdown-content-shadow: 0 2px 3px rgba($black, 0.1), 0 0 0 1px rgba($black, 0.1) !default -$dropdown-content-z: 20 !default - -$dropdown-item: $grey-dark !default -$dropdown-item-hover: $black !default -$dropdown-item-hover-background: $background !default -$dropdown-item-active: $primary-invert !default -$dropdown-item-active-background: $primary !default - -$dropdown-divider-background: $border !default - -.dropdown - display: inline-flex - position: relative - vertical-align: top - &.is-active, - &.is-hoverable:hover - .dropdown-menu - display: block - &.is-right - .dropdown-menu - left: auto - right: 0 - -.dropdown-menu - display: none - left: 0 - max-width: 20rem - min-width: 12rem - padding-top: $dropdown-content-offset - position: absolute - top: 100% - width: 100% - z-index: $dropdown-content-z - -.dropdown-content - background-color: $dropdown-content-background - border-radius: $dropdown-content-radius - box-shadow: $dropdown-content-shadow - padding-bottom: 0.5rem - padding-top: 0.5rem - -.dropdown-item - color: $dropdown-item - display: block - font-size: 0.875rem - line-height: 1.5 - padding: 0.375rem 1rem - position: relative - -a.dropdown-item - padding-right: 3rem - white-space: nowrap - &:hover - background-color: $dropdown-item-hover-background - color: $dropdown-item-hover - &.is-active - background-color: $dropdown-item-active-background - color: $dropdown-item-active - -.dropdown-divider - background-color: $dropdown-divider-background - border: none - display: block - height: 1px - margin: 0.5rem 0 diff --git a/site/_sass/vendor/bulma/sass/components/level.sass b/site/_sass/vendor/bulma/sass/components/level.sass deleted file mode 100755 index 34b982c185f..00000000000 --- a/site/_sass/vendor/bulma/sass/components/level.sass +++ /dev/null @@ -1,72 +0,0 @@ -.level-item - align-items: center - display: flex - flex-basis: auto - flex-grow: 0 - flex-shrink: 0 - justify-content: center - .title, - .subtitle - margin-bottom: 0 - // Responsiveness - +mobile - &:not(:last-child) - margin-bottom: 0.75rem - -.level-left, -.level-right - flex-basis: auto - flex-grow: 0 - flex-shrink: 0 - .level-item - &:not(:last-child) - margin-right: 0.75rem - // Modifiers - &.is-flexible - flex-grow: 1 - -.level-left - align-items: center - justify-content: flex-start - // Responsiveness - +mobile - & + .level-right - margin-top: 1.5rem - +tablet - display: flex - -.level-right - align-items: center - justify-content: flex-end - // Responsiveness - +tablet - display: flex - -.level - +block - align-items: center - justify-content: space-between - code - border-radius: $radius - img - display: inline-block - vertical-align: top - // Modifiers - &.is-mobile - display: flex - .level-left, - .level-right - display: flex - .level-left + .level-right - margin-top: 0 - .level-item - &:not(:last-child) - margin-bottom: 0 - &:not(.is-narrow) - flex-grow: 1 - // Responsiveness - +tablet - display: flex - & > .level-item - &:not(.is-narrow) - flex-grow: 1 diff --git a/site/_sass/vendor/bulma/sass/components/media.sass b/site/_sass/vendor/bulma/sass/components/media.sass deleted file mode 100755 index 575e4fcc95f..00000000000 --- a/site/_sass/vendor/bulma/sass/components/media.sass +++ /dev/null @@ -1,44 +0,0 @@ -.media-left, -.media-right - flex-basis: auto - flex-grow: 0 - flex-shrink: 0 - -.media-left - margin-right: 1rem - -.media-right - margin-left: 1rem - -.media-content - flex-basis: auto - flex-grow: 1 - flex-shrink: 1 - text-align: left - -.media - align-items: flex-start - display: flex - text-align: left - .content:not(:last-child) - margin-bottom: 0.75rem - .media - border-top: 1px solid rgba($border, 0.5) - display: flex - padding-top: 0.75rem - .content:not(:last-child), - .control:not(:last-child) - margin-bottom: 0.5rem - .media - padding-top: 0.5rem - & + .media - margin-top: 0.5rem - & + .media - border-top: 1px solid rgba($border, 0.5) - margin-top: 1rem - padding-top: 1rem - // Sizes - &.is-large - & + .media - margin-top: 1.5rem - padding-top: 1.5rem diff --git a/site/_sass/vendor/bulma/sass/components/menu.sass b/site/_sass/vendor/bulma/sass/components/menu.sass deleted file mode 100755 index 47289704e8d..00000000000 --- a/site/_sass/vendor/bulma/sass/components/menu.sass +++ /dev/null @@ -1,32 +0,0 @@ -.menu - font-size: $size-normal - -.menu-list - line-height: 1.25 - a - border-radius: $radius-small - color: $text - display: block - padding: 0.5em 0.75em - &:hover - background-color: $background - color: $link - // Modifiers - &.is-active - background-color: $link - color: $link-invert - li - ul - border-left: 1px solid $border - margin: 0.75em - padding-left: 0.75em - -.menu-label - color: $text-light - font-size: 0.8em - letter-spacing: 0.1em - text-transform: uppercase - &:not(:first-child) - margin-top: 1em - &:not(:last-child) - margin-bottom: 1em diff --git a/site/_sass/vendor/bulma/sass/components/message.sass b/site/_sass/vendor/bulma/sass/components/message.sass deleted file mode 100755 index cff51e58fd7..00000000000 --- a/site/_sass/vendor/bulma/sass/components/message.sass +++ /dev/null @@ -1,61 +0,0 @@ -.message - +block - background-color: $background - border-radius: $radius - font-size: $size-normal - // Colors - @each $name, $pair in $colors - $color: nth($pair, 1) - $color-invert: nth($pair, 2) - $color-lightning: max((100% - lightness($color)) - 2%, 0%) - $color-luminance: colorLuminance($color) - $darken-percentage: $color-luminance * 70% - $desaturate-percentage: $color-luminance * 30% - &.is-#{$name} - background-color: lighten($color, $color-lightning) - .message-header - background-color: $color - color: $color-invert - .message-body - border-color: $color - color: desaturate(darken($color, $darken-percentage), $desaturate-percentage) - -.message-header - align-items: center - background-color: $text - border-radius: $radius $radius 0 0 - color: $text-invert - display: flex - justify-content: space-between - line-height: 1.25 - padding: 0.5em 0.75em - position: relative - a:not(.button), - strong - color: currentColor - a:not(.button) - text-decoration: underline - .delete - flex-grow: 0 - flex-shrink: 0 - margin-left: 0.75em - & + .message-body - border-top-left-radius: 0 - border-top-right-radius: 0 - border-top: none - -.message-body - border: 1px solid $border - border-radius: $radius - color: $text - padding: 1em 1.25em - a:not(.button), - strong - color: currentColor - a:not(.button) - text-decoration: underline - code, - pre - background: $white - pre code - background: transparent diff --git a/site/_sass/vendor/bulma/sass/components/modal.sass b/site/_sass/vendor/bulma/sass/components/modal.sass deleted file mode 100755 index d3fc015118f..00000000000 --- a/site/_sass/vendor/bulma/sass/components/modal.sass +++ /dev/null @@ -1,81 +0,0 @@ -.modal-background - +overlay - background-color: rgba($black, 0.86) - -.modal-content, -.modal-card - margin: 0 20px - max-height: calc(100vh - 160px) - overflow: auto - position: relative - width: 100% - // Responsiveness - +tablet - margin: 0 auto - max-height: calc(100vh - 40px) - width: 640px - -.modal-close - +delete - background: none - height: 40px - position: fixed - right: 20px - top: 20px - width: 40px - -.modal-card - display: flex - flex-direction: column - max-height: calc(100vh - 40px) - overflow: hidden - -.modal-card-head, -.modal-card-foot - align-items: center - background-color: $background - display: flex - flex-shrink: 0 - justify-content: flex-start - padding: 20px - position: relative - -.modal-card-head - border-bottom: 1px solid $border - border-top-left-radius: $radius-large - border-top-right-radius: $radius-large - -.modal-card-title - color: $text-strong - flex-grow: 1 - flex-shrink: 0 - font-size: $size-4 - line-height: 1 - -.modal-card-foot - border-bottom-left-radius: $radius-large - border-bottom-right-radius: $radius-large - border-top: 1px solid $border - .button - &:not(:last-child) - margin-right: 10px - -.modal-card-body - +overflow-touch - background-color: $white - flex-grow: 1 - flex-shrink: 1 - overflow: auto - padding: 20px - -.modal - +overlay - align-items: center - display: none - justify-content: center - overflow: hidden - position: fixed - z-index: 20 - // Modifiers - &.is-active - display: flex diff --git a/site/_sass/vendor/bulma/sass/components/nav.sass b/site/_sass/vendor/bulma/sass/components/nav.sass deleted file mode 100755 index e6aaa29472b..00000000000 --- a/site/_sass/vendor/bulma/sass/components/nav.sass +++ /dev/null @@ -1,125 +0,0 @@ -$nav-height: 3.25rem !default - -// Components - -.nav-toggle - +hamburger($nav-height) - // Responsiveness - +tablet - display: none - -.nav-item - align-items: center - display: flex - flex-grow: 0 - flex-shrink: 0 - font-size: $size-normal - justify-content: center - line-height: 1.5 - padding: 0.5rem 0.75rem - a - flex-grow: 1 - flex-shrink: 0 - img - max-height: 1.75rem - .tag - &:first-child:not(:last-child) - margin-right: 0.5rem - &:last-child:not(:first-child) - margin-left: 0.5rem - // Responsiveness - +mobile - justify-content: flex-start - -.nav-item a:not(.button), -a.nav-item:not(.button) - color: $text-light - &:hover - color: $link-hover - // Modifiers - &.is-active - color: $link-active - &.is-tab - border-bottom: 1px solid transparent - border-top: 1px solid transparent - padding-bottom: calc(0.75rem - 1px) - padding-left: 1rem - padding-right: 1rem - padding-top: calc(0.75rem - 1px) - &:hover - border-bottom-color: $primary - border-top-color: transparent - &.is-active - border-bottom: 3px solid $primary - color: $primary - padding-bottom: calc(0.75rem - 3px) - // Responsiveness - +desktop - &.is-brand - padding-left: 0 - -// Containers - -.nav-left, -.nav-right - +overflow-touch - align-items: stretch - display: flex - flex-grow: 1 - flex-shrink: 0 - max-width: 100% - overflow: auto - +widescreen - flex-basis: 0 - -.nav-left - justify-content: flex-start - white-space: nowrap - -.nav-right - justify-content: flex-end - -.nav-center - align-items: stretch - display: flex - flex-grow: 0 - flex-shrink: 0 - justify-content: center - margin-left: auto - margin-right: auto - -.nav-menu - // Responsiveness - +mobile - &.nav-right - background-color: $white - box-shadow: 0 4px 7px rgba($black, 0.1) - left: 0 - display: none - right: 0 - top: 100% - position: absolute - .nav-item - border-top: 1px solid rgba($border, 0.5) - padding: 0.75rem - &.is-active - display: block - -// Main container - -.nav - align-items: stretch - background-color: $white - display: flex - height: $nav-height - position: relative - text-align: center - z-index: 10 - & > .container - align-items: stretch - display: flex - min-height: $nav-height - width: 100% - // Modifiers - &.has-shadow - box-shadow: 0 2px 3px rgba($black, 0.1) diff --git a/site/_sass/vendor/bulma/sass/components/navbar.sass b/site/_sass/vendor/bulma/sass/components/navbar.sass deleted file mode 100755 index a5848d01b71..00000000000 --- a/site/_sass/vendor/bulma/sass/components/navbar.sass +++ /dev/null @@ -1,236 +0,0 @@ -$navbar-background: $white !default -$navbar-height: 3.25rem !default - -$navbar-item: $grey-dark !default -$navbar-item-hover: $black !default -$navbar-item-hover-background: $background !default -$navbar-item-active: $black !default -$navbar-item-active-background: transparent !default - -$navbar-tab-hover-background: transparent !default -$navbar-tab-hover-border: $primary !default -$navbar-tab-active: $primary !default -$navbar-tab-active-background: transparent !default -$navbar-tab-active-border: $primary !default - -$navbar-dropdown-background: $white !default -$navbar-dropdown-border: $border !default -$navbar-dropdown-offset: -4px !default -$navbar-dropdown-arrow: $link !default -$navbar-dropdown-radius: $radius-large !default -$navbar-dropdown-z: 20 !default - -$navbar-dropdown-item-hover: $black !default -$navbar-dropdown-item-hover-background: $background !default -$navbar-dropdown-item-active: $primary !default -$navbar-dropdown-item-active-background: $background !default - -$navbar-divider-background: $border !default - -.navbar - background-color: $navbar-background - min-height: $navbar-height - position: relative - & > .container - align-items: stretch - display: flex - min-height: $navbar-height - width: 100% - &.has-shadow - box-shadow: 0 2px 3px rgba($black, 0.1) - -.navbar-brand - +overflow-touch - align-items: stretch - display: flex - min-height: $navbar-height - overflow-x: auto - overflow-y: hidden - -.navbar-burger - +hamburger($navbar-height) - margin-left: auto - -.navbar-menu - display: none - -.navbar-item, -.navbar-link - color: $navbar-item - display: block - line-height: 1.5 - padding: 0.5rem 1rem - position: relative - -a.navbar-item, -.navbar-link - &:hover, - &.is-active - background-color: $navbar-item-hover-background - color: $navbar-item-hover - -.navbar-item - flex-grow: 0 - flex-shrink: 0 - img - max-height: 1.75rem - &.has-dropdown - padding: 0 - &.is-tab - border-bottom: 1px solid transparent - min-height: $navbar-height - padding-bottom: calc(0.5rem - 1px) - &:hover - background-color: $navbar-tab-hover-background - border-bottom-color: $navbar-tab-hover-border - &.is-active - background-color: $navbar-tab-active-background - border-bottom: 3px solid $navbar-tab-active-border - color: $navbar-tab-active - padding-bottom: calc(0.5rem - 3px) - -.navbar-content - flex-grow: 1 - flex-shrink: 1 - -.navbar-link - padding-right: 2.5em - -.navbar-dropdown - font-size: 0.875rem - padding-bottom: 0.5rem - padding-top: 0.5rem - .navbar-item - padding-left: 1.5rem - padding-right: 1.5rem - -.navbar-divider - background-color: $navbar-divider-background - border: none - display: none - height: 1px - margin: 0.5rem 0 - -+touch - .navbar-brand - .navbar-item - align-items: center - display: flex - .navbar-menu - box-shadow: 0 8px 16px rgba($black, 0.1) - padding: 0.5rem 0 - &.is-active - display: block - -+desktop - .navbar, - .navbar-menu, - .navbar-start, - .navbar-end - align-items: stretch - display: flex - .navbar - min-height: $navbar-height - &.is-transparent - a.navbar-item, - .navbar-link - &:hover, - &.is-active - background-color: transparent - .navbar-item.has-dropdown - &.is-active, - &.is-hoverable:hover - .navbar-link - background-color: transparent - .navbar-dropdown - a.navbar-item - &:hover - background-color: $navbar-dropdown-item-hover-background - color: $navbar-dropdown-item-hover - &.is-active - background-color: $navbar-dropdown-item-active-background - color: $navbar-dropdown-item-active - .navbar-burger - display: none - .navbar-item, - .navbar-link - align-items: center - display: flex - .navbar-item - &.has-dropdown - align-items: stretch - &.is-active, - &.is-hoverable:hover - .navbar-dropdown - display: block - &.is-boxed - opacity: 1 - pointer-events: auto - transform: translateY(0) - .navbar-link - &::after - +arrow($navbar-dropdown-arrow) - margin-top: -0.375em - right: 1.125em - top: 50% - .navbar-menu - flex-grow: 1 - flex-shrink: 0 - .navbar-start - justify-content: flex-start - margin-right: auto - .navbar-end - justify-content: flex-end - margin-left: auto - .navbar-dropdown - background-color: $navbar-dropdown-background - border-bottom-left-radius: $navbar-dropdown-radius - border-bottom-right-radius: $navbar-dropdown-radius - border-top: 1px solid $navbar-dropdown-border - box-shadow: 0 8px 8px rgba($black, 0.1) - display: none - font-size: 0.875rem - left: 0 - min-width: 100% - position: absolute - top: 100% - z-index: $navbar-dropdown-z - .navbar-item - padding: 0.375rem 1rem - white-space: nowrap - a.navbar-item - padding-right: 3rem - &:hover - background-color: $navbar-dropdown-item-hover-background - color: $navbar-dropdown-item-hover - &.is-active - background-color: $navbar-dropdown-item-active-background - color: $navbar-dropdown-item-active - &.is-boxed - border-radius: $navbar-dropdown-radius - border-top: none - box-shadow: 0 8px 8px rgba($black, 0.1), 0 0 0 1px rgba($black, 0.1) - display: block - opacity: 0 - pointer-events: none - top: calc(100% + (#{$navbar-dropdown-offset})) - transform: translateY(-5px) - transition-duration: $speed - transition-property: opacity, transform - .navbar-divider - display: block - .container > .navbar - margin-left: -1rem - margin-right: -1rem - // Hover/Active states - a.navbar-item, - .navbar-link - &.is-active - color: $navbar-item-active - &.is-active:not(:hover) - background-color: $navbar-item-active-background - .navbar-item.has-dropdown - &:hover, - &.is-active - .navbar-link - background-color: $navbar-item-hover-background diff --git a/site/_sass/vendor/bulma/sass/components/pagination.sass b/site/_sass/vendor/bulma/sass/components/pagination.sass deleted file mode 100755 index 71af9f0ed88..00000000000 --- a/site/_sass/vendor/bulma/sass/components/pagination.sass +++ /dev/null @@ -1,134 +0,0 @@ -$pagination: $grey-darker !default -$pagination-background: $white !default -$pagination-border: $grey-lighter !default - -$pagination-hover: $link-hover !default -$pagination-hover-border: $link-hover-border !default - -$pagination-focus: $link-focus !default -$pagination-focus-border: $link-focus-border !default - -$pagination-active: $link-active !default -$pagination-active-border: $link-active-border !default - -$pagination-disabled: $grey !default -$pagination-disabled-background: $grey-lighter !default -$pagination-disabled-border: $grey-lighter !default - -$pagination-current: $link-invert !default -$pagination-current-background: $link !default -$pagination-current-border: $link !default - -$pagination-ellipsis: $grey-light !default - -$pagination-shadow-inset: inset 0 1px 2px rgba($black, 0.2) - -.pagination - font-size: $size-normal - margin: -0.25rem - // Sizes - &.is-small - font-size: $size-small - &.is-medium - font-size: $size-medium - &.is-large - font-size: $size-large - -.pagination, -.pagination-list - align-items: center - display: flex - justify-content: center - text-align: center - -.pagination-previous, -.pagination-next, -.pagination-link, -.pagination-ellipsis - +control - +unselectable - font-size: 1em - padding-left: 0.5em - padding-right: 0.5em - justify-content: center - margin: 0.25rem - text-align: center - -.pagination-previous, -.pagination-next, -.pagination-link - border-color: $pagination-border - min-width: 2.25em - &:hover - border-color: $pagination-hover-border - color: $pagination-hover - &:focus - border-color: $pagination-focus-border - &:active - box-shadow: $pagination-shadow-inset - &[disabled] - background-color: $pagination-disabled-background - border-color: $pagination-disabled-border - box-shadow: none - color: $pagination-disabled - opacity: 0.5 - -.pagination-previous, -.pagination-next - padding-left: 0.75em - padding-right: 0.75em - white-space: nowrap - -.pagination-link - &.is-current - background-color: $pagination-current-background - border-color: $pagination-current-border - color: $pagination-current - -.pagination-ellipsis - color: $pagination-ellipsis - pointer-events: none - -.pagination-list - flex-wrap: wrap - -+mobile - .pagination - flex-wrap: wrap - .pagination-previous, - .pagination-next - flex-grow: 1 - flex-shrink: 1 - .pagination-list - li - flex-grow: 1 - flex-shrink: 1 - -+tablet - .pagination-list - flex-grow: 1 - flex-shrink: 1 - justify-content: flex-start - order: 1 - .pagination-previous - order: 2 - .pagination-next - order: 3 - .pagination - justify-content: space-between - &.is-centered - .pagination-previous - order: 1 - .pagination-list - justify-content: center - order: 2 - .pagination-next - order: 3 - &.is-right - .pagination-previous - order: 1 - .pagination-next - order: 2 - .pagination-list - justify-content: flex-end - order: 3 diff --git a/site/_sass/vendor/bulma/sass/components/panel.sass b/site/_sass/vendor/bulma/sass/components/panel.sass deleted file mode 100755 index a37a7731162..00000000000 --- a/site/_sass/vendor/bulma/sass/components/panel.sass +++ /dev/null @@ -1,76 +0,0 @@ -.panel - font-size: $size-normal - &:not(:last-child) - margin-bottom: 1.5rem - -.panel-heading, -.panel-tabs, -.panel-block - border-bottom: 1px solid $border - border-left: 1px solid $border - border-right: 1px solid $border - &:first-child - border-top: 1px solid $border - -.panel-heading - background-color: $background - border-radius: $radius $radius 0 0 - color: $text-strong - font-size: 1.25em - font-weight: $weight-light - line-height: 1.25 - padding: 0.5em 0.75em - -.panel-tabs - align-items: flex-end - display: flex - font-size: 0.875em - justify-content: center - a - border-bottom: 1px solid $border - margin-bottom: -1px - padding: 0.5em - // Modifiers - &.is-active - border-bottom-color: $link-active-border - color: $link-active - -.panel-list - a - color: $text - &:hover - color: $link - -.panel-block - align-items: center - color: $text-strong - display: flex - justify-content: flex-start - padding: 0.5em 0.75em - input[type="checkbox"] - margin-right: 0.75em - & > .control - flex-grow: 1 - flex-shrink: 1 - width: 100% - &.is-wrapped - flex-wrap: wrap - &.is-active - border-left-color: $link - color: $link-active - .panel-icon - color: $link - -a.panel-block, -label.panel-block - cursor: pointer - &:hover - background-color: $background - -.panel-icon - +fa(14px, 1em) - color: $text-light - margin-right: 0.75em - .fa - font-size: inherit - line-height: inherit diff --git a/site/_sass/vendor/bulma/sass/components/tabs.sass b/site/_sass/vendor/bulma/sass/components/tabs.sass deleted file mode 100755 index 605176c55ae..00000000000 --- a/site/_sass/vendor/bulma/sass/components/tabs.sass +++ /dev/null @@ -1,107 +0,0 @@ -.tabs - +block - +overflow-touch - +unselectable - align-items: stretch - display: flex - font-size: $size-normal - justify-content: space-between - overflow: hidden - overflow-x: auto - white-space: nowrap - a - align-items: center - border-bottom: 1px solid $border - color: $text - display: flex - justify-content: center - margin-bottom: -1px - padding: 0.5em 1em - vertical-align: top - &:hover - border-bottom-color: $text-strong - color: $text-strong - li - display: block - &.is-active - a - border-bottom-color: $primary - color: $primary - ul - align-items: center - border-bottom: 1px solid $border - display: flex - flex-grow: 1 - flex-shrink: 0 - justify-content: flex-start - &.is-left - padding-right: 0.75em - &.is-center - flex: none - justify-content: center - padding-left: 0.75em - padding-right: 0.75em - &.is-right - justify-content: flex-end - padding-left: 0.75em - .icon - &:first-child - margin-right: 0.5em - &:last-child - margin-left: 0.5em - // Alignment - &.is-centered - ul - justify-content: center - &.is-right - ul - justify-content: flex-end - // Styles - &.is-boxed - a - border: 1px solid transparent - border-radius: $radius $radius 0 0 - &:hover - background-color: $background - border-bottom-color: $border - li - &.is-active - a - background-color: $white - border-color: $border - border-bottom-color: transparent !important - &.is-fullwidth - li - flex-grow: 1 - flex-shrink: 0 - &.is-toggle - a - border: 1px solid $border - margin-bottom: 0 - position: relative - &:hover - background-color: $background - border-color: $border-hover - z-index: 2 - li - & + li - margin-left: -1px - &:first-child a - border-radius: $radius 0 0 $radius - &:last-child a - border-radius: 0 $radius $radius 0 - &.is-active - a - background-color: $primary - border-color: $primary - color: $primary-invert - z-index: 1 - ul - border-bottom: none - // Sizes - &.is-small - font-size: $size-small - &.is-medium - font-size: $size-medium - &.is-large - font-size: $size-large diff --git a/site/_sass/vendor/bulma/sass/elements/_all.sass b/site/_sass/vendor/bulma/sass/elements/_all.sass deleted file mode 100755 index 54f5b2fcde8..00000000000 --- a/site/_sass/vendor/bulma/sass/elements/_all.sass +++ /dev/null @@ -1,15 +0,0 @@ -@charset "utf-8" - -@import "box.sass" -@import "button.sass" -@import "content.sass" -@import "form.sass" -@import "icon.sass" -@import "image.sass" -@import "notification.sass" -@import "progress.sass" -@import "table.sass" -@import "tag.sass" -@import "title.sass" - -@import "other.sass" diff --git a/site/_sass/vendor/bulma/sass/elements/box.sass b/site/_sass/vendor/bulma/sass/elements/box.sass deleted file mode 100755 index dec3be79c85..00000000000 --- a/site/_sass/vendor/bulma/sass/elements/box.sass +++ /dev/null @@ -1,23 +0,0 @@ -$box: $text !default -$box-background: $white !default -$box-radius: $radius-large !default -$box-shadow: 0 2px 3px rgba($black, 0.1), 0 0 0 1px rgba($black, 0.1) !default - -$box-link-hover-shadow: 0 2px 3px rgba($black, 0.1), 0 0 0 1px $link -$box-link-active-shadow: inset 0 1px 2px rgba($black, 0.2), 0 0 0 1px $link - -.box - +block - background-color: $box-background - border-radius: $box-radius - box-shadow: $box-shadow - color: $box - display: block - padding: 1.25rem - -a.box - &:hover, - &:focus - box-shadow: $box-link-hover-shadow - &:active - box-shadow: $box-link-active-shadow diff --git a/site/_sass/vendor/bulma/sass/elements/button.sass b/site/_sass/vendor/bulma/sass/elements/button.sass deleted file mode 100755 index 23032f9277c..00000000000 --- a/site/_sass/vendor/bulma/sass/elements/button.sass +++ /dev/null @@ -1,197 +0,0 @@ -$button: $grey-darker !default -$button-background: $white !default -$button-border: $grey-lighter !default - -$button-hover: $link-hover !default -$button-hover-border: $link-hover-border !default - -$button-focus: $link-focus !default -$button-focus-border: $link-focus-border !default - -$button-active: $link-active !default -$button-active-border: $link-active-border !default - -$button-static: $grey !default -$button-static-background: $white-ter !default -$button-static-border: $grey-lighter !default - -$button-shadow-inset: inset 0 1px 2px rgba($black, 0.2) !default - -// The button sizes use mixins so they can be used at different breakpoints -=button-small - border-radius: $radius-small - font-size: $size-small -=button-medium - font-size: $size-medium -=button-large - font-size: $size-large - -.button - +control - +unselectable - background-color: $button-background - border-color: $button-border - color: $button - cursor: pointer - justify-content: center - padding-left: 0.75em - padding-right: 0.75em - text-align: center - white-space: nowrap - strong - color: inherit - .icon - &, - &.is-small, - &.is-medium, - &.is-large - height: 1.5em - width: 1.5em - &:first-child:not(:last-child) - margin-left: calc(-0.375em - 1px) - margin-right: 0.1875em - &:last-child:not(:first-child) - margin-left: 0.1875em - margin-right: calc(-0.375em - 1px) - &:first-child:last-child - margin-left: calc(-0.375em - 1px) - margin-right: calc(-0.375em - 1px) - // States - &:hover, - &.is-hovered - border-color: $button-hover-border - color: $button-hover - &:focus, - &.is-focused - border-color: $button-focus-border - box-shadow: 0 0 0.5em rgba($button-focus-border, 0.25) - color: $button-focus - &:active, - &.is-active - border-color: $button-active-border - box-shadow: $button-shadow-inset - color: $button-active - // Colors - &.is-link - background-color: transparent - border-color: transparent - color: $text - text-decoration: underline - &:hover, - &.is-hovered, - &:focus, - &.is-focused, - &:active, - &.is-active - background-color: $background - color: $text-strong - &[disabled] - background-color: transparent - border-color: transparent - box-shadow: none - @each $name, $pair in $colors - $color: nth($pair, 1) - $color-invert: nth($pair, 2) - &.is-#{$name} - background-color: $color - border-color: transparent - color: $color-invert - &:hover, - &.is-hovered - background-color: darken($color, 2.5%) - border-color: transparent - color: $color-invert - &:focus, - &.is-focused - border-color: transparent - box-shadow: 0 0 0.5em rgba($color, 0.25) - color: $color-invert - &:active, - &.is-active - background-color: darken($color, 5%) - border-color: transparent - box-shadow: $button-shadow-inset - color: $color-invert - &[disabled] - background-color: $color - border-color: transparent - box-shadow: none - &.is-inverted - background-color: $color-invert - color: $color - &:hover - background-color: darken($color-invert, 5%) - &[disabled] - background-color: $color-invert - border-color: transparent - box-shadow: none - color: $color - &.is-loading - &:after - border-color: transparent transparent $color-invert $color-invert !important - &.is-outlined - background-color: transparent - border-color: $color - color: $color - &:hover, - &:focus - background-color: $color - border-color: $color - color: $color-invert - &.is-loading - &:after - border-color: transparent transparent $color $color !important - &[disabled] - background-color: transparent - border-color: $color - box-shadow: none - color: $color - &.is-inverted.is-outlined - background-color: transparent - border-color: $color-invert - color: $color-invert - &:hover, - &:focus - background-color: $color-invert - color: $color - &[disabled] - background-color: transparent - border-color: $color-invert - box-shadow: none - color: $color-invert - // Sizes - &.is-small - +button-small - &.is-medium - +button-medium - &.is-large - +button-large - // Modifiers - &[disabled] - background-color: $button-background - border-color: $button-border - box-shadow: none - opacity: 0.5 - &.is-fullwidth - display: flex - width: 100% - &.is-loading - color: transparent !important - pointer-events: none - &:after - +loader - +center(1em) - position: absolute !important - &.is-static - background-color: $button-static-background - border-color: $button-static-border - color: $button-static - box-shadow: none - pointer-events: none - -// Adjustment for vertical spacing -button.button, -input[type="submit"].button - line-height: 1 - padding-bottom: 0.4em - padding-top: 0.35em diff --git a/site/_sass/vendor/bulma/sass/elements/content.sass b/site/_sass/vendor/bulma/sass/elements/content.sass deleted file mode 100755 index 978fd30a09a..00000000000 --- a/site/_sass/vendor/bulma/sass/elements/content.sass +++ /dev/null @@ -1,119 +0,0 @@ -.content - +block - // Inline - li + li - margin-top: 0.25em - // Block - p, - dl, - ol, - ul, - blockquote, - pre, - table - &:not(:last-child) - margin-bottom: 1em - h1, - h2, - h3, - h4, - h5, - h6 - color: $text-strong - font-weight: $weight-normal - line-height: 1.125 - h1 - font-size: 2em - margin-bottom: 0.5em - &:not(:first-child) - margin-top: 1em - h2 - font-size: 1.75em - margin-bottom: 0.5714em - &:not(:first-child) - margin-top: 1.1428em - h3 - font-size: 1.5em - margin-bottom: 0.6666em - &:not(:first-child) - margin-top: 1.3333em - h4 - font-size: 1.25em - margin-bottom: 0.8em - h5 - font-size: 1.125em - margin-bottom: 0.8888em - h6 - font-size: 1em - margin-bottom: 1em - blockquote - background-color: $background - border-left: 5px solid $border - padding: 1.25em 1.5em - ol - list-style: decimal outside - margin-left: 2em - margin-top: 1em - ul - list-style: disc outside - margin-left: 2em - margin-top: 1em - ul - list-style-type: circle - margin-top: 0.5em - ul - list-style-type: square - dd - margin-left: 2em - figure - text-align: center - img - display: inline-block - figcaption - font-style: italic - pre - +overflow-touch - overflow-x: auto - padding: 1.25em 1.5em - white-space: pre - word-wrap: normal - sup, - sub - font-size: 70% - table - width: 100% - td, - th - border: 1px solid $border - border-width: 0 0 1px - padding: 0.5em 0.75em - vertical-align: top - th - color: $text-strong - text-align: left - tr - &:hover - background-color: $background - thead - td, - th - border-width: 0 0 2px - color: $text-strong - tfoot - td, - th - border-width: 2px 0 0 - color: $text-strong - tbody - tr - &:last-child - td, - th - border-bottom-width: 0 - // Sizes - &.is-small - font-size: $size-small - &.is-medium - font-size: $size-medium - &.is-large - font-size: $size-large diff --git a/site/_sass/vendor/bulma/sass/elements/form.sass b/site/_sass/vendor/bulma/sass/elements/form.sass deleted file mode 100755 index af1a120b046..00000000000 --- a/site/_sass/vendor/bulma/sass/elements/form.sass +++ /dev/null @@ -1,391 +0,0 @@ -$input: $grey-darker !default -$input-background: $white !default -$input-border: $grey-lighter !default - -$input-hover: $grey-darker !default -$input-hover-border: $grey-light !default - -$input-focus: $grey-darker !default -$input-focus-border: $link !default - -$input-disabled: $text-light !default -$input-disabled-background: $background !default -$input-disabled-border: $background !default - -$input-arrow: $link !default - -$input-icon: $grey-lighter !default -$input-icon-active: $grey !default - -$input-radius: $radius !default - -=input - +control - background-color: $input-background - border-color: $input-border - color: $input - &:hover, - &.is-hovered - border-color: $input-hover-border - &:focus, - &.is-focused, - &:active, - &.is-active - border-color: $input-focus-border - &[disabled] - background-color: $input-disabled-background - border-color: $input-disabled-border - box-shadow: none - color: $input-disabled - +placeholder - color: rgba($input, 0.3) - -.input, -.textarea - +input - box-shadow: inset 0 1px 2px rgba($black, 0.1) - max-width: 100% - width: 100% - &[type="search"] - border-radius: 290486px - // Colors - @each $name, $pair in $colors - $color: nth($pair, 1) - &.is-#{$name} - border-color: $color - // Sizes - &.is-small - +control-small - &.is-medium - +control-medium - &.is-large - +control-large - // Modifiers - &.is-fullwidth - display: block - width: 100% - &.is-inline - display: inline - width: auto - -.textarea - display: block - max-width: 100% - min-width: 100% - padding: 0.625em - resize: vertical - &:not([rows]) - max-height: 600px - min-height: 120px - &[rows] - height: unset - -.checkbox, -.radio - cursor: pointer - display: inline-block - line-height: 1.25 - position: relative - input - cursor: pointer - &:hover - color: $input-hover - &[disabled] - color: $input-disabled - cursor: not-allowed - -.radio - & + .radio - margin-left: 0.5em - -.select - display: inline-block - max-width: 100% - position: relative - vertical-align: top - &:not(.is-multiple) - height: 2.25em - &::after - +arrow($input-arrow) - margin-top: -0.375em - right: 1.125em - top: 50% - z-index: 4 - select - +input - cursor: pointer - display: block - font-size: 1em - max-width: 100% - outline: none - &:hover - border-color: $input-hover-border - &:focus, - &.is-focused, - &:active, - &.is-active - border-color: $input-focus-border - &::-ms-expand - display: none - &[disabled]:hover - border-color: $input-disabled-border - &:not([multiple]) - padding-right: 2.5em - &[multiple] - height: unset - padding: 0 - option - padding: 0.5em 1em - // States - &:hover - &::after - border-color: $input-hover - // Colors - @each $name, $pair in $colors - $color: nth($pair, 1) - &.is-#{$name} select - border-color: $color - // Sizes - &.is-small - +control-small - &.is-medium - +control-medium - &.is-large - +control-large - // Modifiers - &.is-disabled - &::after - border-color: $input-disabled - &.is-fullwidth - width: 100% - select - width: 100% - &.is-loading - &::after - +loader - margin-top: 0 - position: absolute - right: 0.625em - top: 0.625em - transform: none - &.is-small:after - font-size: $size-small - &.is-medium:after - font-size: $size-medium - &.is-large:after - font-size: $size-large - -.label - color: $input - display: block - font-size: $size-normal - font-weight: $weight-bold - &:not(:last-child) - margin-bottom: 0.5em - // Sizes - &.is-small - font-size: $size-small - &.is-medium - font-size: $size-medium - &.is-large - font-size: $size-large - -.help - display: block - font-size: $size-small - margin-top: 0.25rem - @each $name, $pair in $colors - $color: nth($pair, 1) - &.is-#{$name} - color: $color - -// Containers - -.field - &:not(:last-child) - margin-bottom: 0.75rem - // Modifiers - &.has-addons - display: flex - justify-content: flex-start - .control - &:not(:last-child) - margin-right: -1px - &:first-child - .button, - .input, - .select select - border-bottom-left-radius: $input-radius - border-top-left-radius: $input-radius - &:last-child - .button, - .input, - .select select - border-bottom-right-radius: $input-radius - border-top-right-radius: $input-radius - .button, - .input, - .select select - border-radius: 0 - &:hover, - &.is-hovered - z-index: 2 - &:focus, - &.is-focused, - &:active, - &.is-active - z-index: 3 - &:hover - z-index: 4 - &.is-expanded - flex-grow: 1 - &.has-addons-centered - justify-content: center - &.has-addons-right - justify-content: flex-end - &.has-addons-fullwidth - .control - flex-grow: 1 - flex-shrink: 0 - &.is-grouped - display: flex - justify-content: flex-start - & > .control - flex-shrink: 0 - &:not(:last-child) - margin-bottom: 0 - margin-right: 0.75rem - &.is-expanded - flex-grow: 1 - flex-shrink: 1 - &.is-grouped-centered - justify-content: center - &.is-grouped-right - justify-content: flex-end - &.is-horizontal - +tablet - display: flex - -.field-label - .label - font-size: inherit - +mobile - margin-bottom: 0.5rem - +tablet - flex-basis: 0 - flex-grow: 1 - flex-shrink: 0 - margin-right: 1.5rem - text-align: right - &.is-small - font-size: $size-small - padding-top: 0.375em - &.is-normal - padding-top: 0.375em - &.is-medium - font-size: $size-medium - padding-top: 0.375em - &.is-large - font-size: $size-large - padding-top: 0.375em - -.field-body - .field .field - margin-bottom: 0 - +tablet - display: flex - flex-basis: 0 - flex-grow: 5 - flex-shrink: 1 - .field - margin-bottom: 0 - & > .field - flex-shrink: 1 - &:not(.is-narrow) - flex-grow: 1 - &:not(:last-child) - margin-right: 0.75rem - -.control - font-size: $size-normal - position: relative - text-align: left - // Modifiers - // DEPRECATED - &.has-icon - .icon - color: $input-icon - height: 2.25em - pointer-events: none - position: absolute - top: 0 - width: 2.25em - z-index: 4 - .input - &:focus - & + .icon - color: $input-icon-active - &.is-small - & + .icon - font-size: $size-small - &.is-medium - & + .icon - font-size: $size-medium - &.is-large - & + .icon - font-size: $size-large - &:not(.has-icon-right) - .icon - left: 0 - .input - padding-left: 2.25em - &.has-icon-right - .icon - right: 0 - .input - padding-right: 2.25em - &.has-icons-left, - &.has-icons-right - .input, - .select - &:focus - & ~ .icon - color: $input-icon-active - &.is-small ~ .icon - font-size: $size-small - &.is-medium ~ .icon - font-size: $size-medium - &.is-large ~ .icon - font-size: $size-large - .icon - color: $input-icon - height: 2.25em - pointer-events: none - position: absolute - top: 0 - width: 2.25em - z-index: 4 - &.has-icons-left - .input, - .select select - padding-left: 2.25em - .icon.is-left - left: 0 - &.has-icons-right - .input, - .select select - padding-right: 2.25em - .icon.is-right - right: 0 - &.is-loading - &::after - +loader - position: absolute !important - right: 0.625em - top: 0.625em - &.is-small:after - font-size: $size-small - &.is-medium:after - font-size: $size-medium - &.is-large:after - font-size: $size-large diff --git a/site/_sass/vendor/bulma/sass/elements/icon.sass b/site/_sass/vendor/bulma/sass/elements/icon.sass deleted file mode 100755 index e1c3735aea2..00000000000 --- a/site/_sass/vendor/bulma/sass/elements/icon.sass +++ /dev/null @@ -1,24 +0,0 @@ -.icon - align-items: center - display: inline-flex - justify-content: center - height: 1.5rem - width: 1.5rem - .fa - font-size: 21px - // Sizes - &.is-small - height: 1rem - width: 1rem - .fa - font-size: 14px - &.is-medium - height: 2rem - width: 2rem - .fa - font-size: 28px - &.is-large - height: 3rem - width: 3rem - .fa - font-size: 42px diff --git a/site/_sass/vendor/bulma/sass/elements/image.sass b/site/_sass/vendor/bulma/sass/elements/image.sass deleted file mode 100755 index cc27b444fee..00000000000 --- a/site/_sass/vendor/bulma/sass/elements/image.sass +++ /dev/null @@ -1,36 +0,0 @@ -$dimensions: 16 24 32 48 64 96 128 - -.image - display: block - position: relative - img - display: block - height: auto - width: 100% - // Ratio - &.is-square, - &.is-1by1, - &.is-4by3, - &.is-3by2, - &.is-16by9, - &.is-2by1 - img - +overlay - height: 100% - width: 100% - &.is-square, - &.is-1by1 - padding-top: 100% - &.is-4by3 - padding-top: 75% - &.is-3by2 - padding-top: 66.6666% - &.is-16by9 - padding-top: 56.25% - &.is-2by1 - padding-top: 50% - // Sizes - @each $dimension in $dimensions - &.is-#{$dimension}x#{$dimension} - height: $dimension * 1px - width: $dimension * 1px diff --git a/site/_sass/vendor/bulma/sass/elements/notification.sass b/site/_sass/vendor/bulma/sass/elements/notification.sass deleted file mode 100755 index a3e3d60aba4..00000000000 --- a/site/_sass/vendor/bulma/sass/elements/notification.sass +++ /dev/null @@ -1,31 +0,0 @@ -.notification - +block - background-color: $background - border-radius: $radius - padding: 1.25rem 2.5rem 1.25rem 1.5rem - position: relative - a:not(.button) - color: currentColor - text-decoration: underline - strong - color: currentColor - code, - pre - background: $white - pre code - background: transparent - & > .delete - position: absolute - right: 0.5em - top: 0.5em - .title, - .subtitle, - .content - color: currentColor - // Colors - @each $name, $pair in $colors - $color: nth($pair, 1) - $color-invert: nth($pair, 2) - &.is-#{$name} - background-color: $color - color: $color-invert diff --git a/site/_sass/vendor/bulma/sass/elements/other.sass b/site/_sass/vendor/bulma/sass/elements/other.sass deleted file mode 100755 index 93d87858db7..00000000000 --- a/site/_sass/vendor/bulma/sass/elements/other.sass +++ /dev/null @@ -1,70 +0,0 @@ -.block - +block - -.container - margin: 0 auto - position: relative - +desktop - max-width: $desktop - (2 * $gap) - width: $desktop - (2 * $gap) - &.is-fluid - margin-left: $gap - margin-right: $gap - max-width: none - width: auto - +until($widescreen) - &.is-widescreen - max-width: $widescreen - (2 * $gap) - width: auto - +until($fullhd) - &.is-fullhd - max-width: $fullhd - (2 * $gap) - width: auto - +widescreen - max-width: $widescreen - (2 * $gap) - width: $widescreen - (2 * $gap) - +fullhd - max-width: $fullhd - (2 * $gap) - width: $fullhd - (2 * $gap) - -.delete - +delete - -.fa - font-size: 21px - text-align: center - vertical-align: top - -.heading - display: block - font-size: 11px - letter-spacing: 1px - margin-bottom: 5px - text-transform: uppercase - -.highlight - +block - font-weight: $weight-normal - max-width: 100% - overflow: hidden - padding: 0 - pre - overflow: auto - max-width: 100% - -.loader - +loader - -.number - align-items: center - background-color: $background - border-radius: 290486px - display: inline-flex - font-size: $size-medium - height: 2em - justify-content: center - margin-right: 1.5rem - min-width: 2.5em - padding: 0.25rem 0.5rem - text-align: center - vertical-align: top diff --git a/site/_sass/vendor/bulma/sass/elements/progress.sass b/site/_sass/vendor/bulma/sass/elements/progress.sass deleted file mode 100755 index b17a68787db..00000000000 --- a/site/_sass/vendor/bulma/sass/elements/progress.sass +++ /dev/null @@ -1,32 +0,0 @@ -.progress - +block - -moz-appearance: none - -webkit-appearance: none - border: none - border-radius: 290486px - display: block - height: $size-normal - overflow: hidden - padding: 0 - width: 100% - &::-webkit-progress-bar - background-color: $border - &::-webkit-progress-value - background-color: $text - &::-moz-progress-bar - background-color: $text - // Colors - @each $name, $pair in $colors - $color: nth($pair, 1) - &.is-#{$name} - &::-webkit-progress-value - background-color: $color - &::-moz-progress-bar - background-color: $color - // Sizes - &.is-small - height: $size-small - &.is-medium - height: $size-medium - &.is-large - height: $size-large diff --git a/site/_sass/vendor/bulma/sass/elements/table.sass b/site/_sass/vendor/bulma/sass/elements/table.sass deleted file mode 100755 index 8bbaf468686..00000000000 --- a/site/_sass/vendor/bulma/sass/elements/table.sass +++ /dev/null @@ -1,82 +0,0 @@ -$table: $grey-darker !default -$table-background: $white !default -$table-border: $grey-lighter !default - -$table-head: $grey !default - -$table-row-hover-background: $white-bis !default - -$table-row-active-background: $primary !default -$table-row-active: $primary-invert !default - -$table-row-even-background: $white-bis !default -$table-row-even-hover-background: $white-ter !default - -.table - background-color: $table-background - color: $table - margin-bottom: 1.5rem - width: 100% - td, - th - border: 1px solid $table-border - border-width: 0 0 1px - padding: 0.5em 0.75em - vertical-align: top - // Modifiers - &.is-narrow - white-space: nowrap - width: 1% - th - color: $text-strong - text-align: left - tr - &:hover - background-color: $table-row-hover-background - &.is-selected - background-color: $table-row-active-background - color: $table-row-active - a, - strong - color: currentColor - td, - th - border-color: $table-row-active - color: currentColor - thead - td, - th - border-width: 0 0 2px - color: $table-head - tfoot - td, - th - border-width: 2px 0 0 - color: $table-head - tbody - tr - &:last-child - td, - th - border-bottom-width: 0 - // Modifiers - &.is-bordered - td, - th - border-width: 1px - tr - &:last-child - td, - th - border-bottom-width: 1px - &.is-narrow - td, - th - padding: 0.25em 0.5em - &.is-striped - tbody - tr:not(.is-selected) - &:nth-child(even) - background-color: $table-row-even-background - &:hover - background-color: $table-row-even-hover-background diff --git a/site/_sass/vendor/bulma/sass/elements/tag.sass b/site/_sass/vendor/bulma/sass/elements/tag.sass deleted file mode 100755 index 477c6622f97..00000000000 --- a/site/_sass/vendor/bulma/sass/elements/tag.sass +++ /dev/null @@ -1,28 +0,0 @@ -.tag - align-items: center - background-color: $background - border-radius: 290486px - color: $text - display: inline-flex - font-size: $size-small - height: 2em - justify-content: center - line-height: 1.5 - padding-left: 0.875em - padding-right: 0.875em - white-space: nowrap - .delete - margin-left: 0.25em - margin-right: -0.375em - // Colors - @each $name, $pair in $colors - $color: nth($pair, 1) - $color-invert: nth($pair, 2) - &.is-#{$name} - background-color: $color - color: $color-invert - // Sizes - &.is-medium - font-size: $size-normal - &.is-large - font-size: $size-medium diff --git a/site/_sass/vendor/bulma/sass/elements/title.sass b/site/_sass/vendor/bulma/sass/elements/title.sass deleted file mode 100755 index 98893ec30bb..00000000000 --- a/site/_sass/vendor/bulma/sass/elements/title.sass +++ /dev/null @@ -1,53 +0,0 @@ -$title: $grey-darker !default -$title-size: $size-3 !default -$title-weight: $weight-light !default -$title-weight-bold: $weight-semibold !default - -$subtitle: $grey-dark !default -$subtitle-size: $size-5 !default -$subtitle-strong: $grey-darker !default -$subtitle-weight: $weight-light !default - -.title, -.subtitle - +block - word-break: break-word - em, - span - font-weight: $title-weight - strong - font-weight: $title-weight-bold - .tag - vertical-align: middle - -.title - color: $title - font-size: $title-size - font-weight: $title-weight - line-height: 1.125 - strong - color: inherit - & + .highlight - margin-top: -0.75rem - &:not(.is-spaced) + .subtitle - margin-top: -1.5rem - // Sizes - @each $size in $sizes - $i: index($sizes, $size) - &.is-#{$i} - font-size: $size - -.subtitle - color: $subtitle - font-size: $subtitle-size - font-weight: $subtitle-weight - line-height: 1.25 - strong - color: $subtitle-strong - &:not(.is-spaced) + .title - margin-top: -1.5rem - // Sizes - @each $size in $sizes - $i: index($sizes, $size) - &.is-#{$i} - font-size: $size diff --git a/site/_sass/vendor/bulma/sass/grid/_all.sass b/site/_sass/vendor/bulma/sass/grid/_all.sass deleted file mode 100755 index e53070f6c32..00000000000 --- a/site/_sass/vendor/bulma/sass/grid/_all.sass +++ /dev/null @@ -1,4 +0,0 @@ -@charset "utf-8" - -@import "columns.sass" -@import "tiles.sass" diff --git a/site/_sass/vendor/bulma/sass/grid/columns.sass b/site/_sass/vendor/bulma/sass/grid/columns.sass deleted file mode 100755 index 0dcfeb503d4..00000000000 --- a/site/_sass/vendor/bulma/sass/grid/columns.sass +++ /dev/null @@ -1,325 +0,0 @@ -.column - display: block - flex-basis: 0 - flex-grow: 1 - flex-shrink: 1 - padding: 0.75rem - .columns.is-mobile > &.is-narrow - flex: none - .columns.is-mobile > &.is-full - flex: none - width: 100% - .columns.is-mobile > &.is-three-quarters - flex: none - width: 75% - .columns.is-mobile > &.is-two-thirds - flex: none - width: 66.6666% - .columns.is-mobile > &.is-half - flex: none - width: 50% - .columns.is-mobile > &.is-one-third - flex: none - width: 33.3333% - .columns.is-mobile > &.is-one-quarter - flex: none - width: 25% - .columns.is-mobile > &.is-offset-three-quarters - margin-left: 75% - .columns.is-mobile > &.is-offset-two-thirds - margin-left: 66.6666% - .columns.is-mobile > &.is-offset-half - margin-left: 50% - .columns.is-mobile > &.is-offset-one-third - margin-left: 33.3333% - .columns.is-mobile > &.is-offset-one-quarter - margin-left: 25% - @for $i from 1 through 12 - .columns.is-mobile > &.is-#{$i} - flex: none - width: ($i / 12) * 100% - .columns.is-mobile > &.is-offset-#{$i} - margin-left: ($i / 12) * 100% - +mobile - &.is-narrow-mobile - flex: none - &.is-full-mobile - flex: none - width: 100% - &.is-three-quarters-mobile - flex: none - width: 75% - &.is-two-thirds-mobile - flex: none - width: 66.6666% - &.is-half-mobile - flex: none - width: 50% - &.is-one-third-mobile - flex: none - width: 33.3333% - &.is-one-quarter-mobile - flex: none - width: 25% - &.is-offset-three-quarters-mobile - margin-left: 75% - &.is-offset-two-thirds-mobile - margin-left: 66.6666% - &.is-offset-half-mobile - margin-left: 50% - &.is-offset-one-third-mobile - margin-left: 33.3333% - &.is-offset-one-quarter-mobile - margin-left: 25% - @for $i from 1 through 12 - &.is-#{$i}-mobile - flex: none - width: ($i / 12) * 100% - &.is-offset-#{$i}-mobile - margin-left: ($i / 12) * 100% - +tablet - &.is-narrow, - &.is-narrow-tablet - flex: none - &.is-full, - &.is-full-tablet - flex: none - width: 100% - &.is-three-quarters, - &.is-three-quarters-tablet - flex: none - width: 75% - &.is-two-thirds, - &.is-two-thirds-tablet - flex: none - width: 66.6666% - &.is-half, - &.is-half-tablet - flex: none - width: 50% - &.is-one-third, - &.is-one-third-tablet - flex: none - width: 33.3333% - &.is-one-quarter, - &.is-one-quarter-tablet - flex: none - width: 25% - &.is-offset-three-quarters, - &.is-offset-three-quarters-tablet - margin-left: 75% - &.is-offset-two-thirds, - &.is-offset-two-thirds-tablet - margin-left: 66.6666% - &.is-offset-half, - &.is-offset-half-tablet - margin-left: 50% - &.is-offset-one-third, - &.is-offset-one-third-tablet - margin-left: 33.3333% - &.is-offset-one-quarter, - &.is-offset-one-quarter-tablet - margin-left: 25% - @for $i from 1 through 12 - &.is-#{$i}, - &.is-#{$i}-tablet - flex: none - width: ($i / 12) * 100% - &.is-offset-#{$i}, - &.is-offset-#{$i}-tablet - margin-left: ($i / 12) * 100% - +touch - &.is-narrow-touch - flex: none - &.is-full-touch - flex: none - width: 100% - &.is-three-quarters-touch - flex: none - width: 75% - &.is-two-thirds-touch - flex: none - width: 66.6666% - &.is-half-touch - flex: none - width: 50% - &.is-one-third-touch - flex: none - width: 33.3333% - &.is-one-quarter-touch - flex: none - width: 25% - &.is-offset-three-quarters-touch - margin-left: 75% - &.is-offset-two-thirds-touch - margin-left: 66.6666% - &.is-offset-half-touch - margin-left: 50% - &.is-offset-one-third-touch - margin-left: 33.3333% - &.is-offset-one-quarter-touch - margin-left: 25% - @for $i from 1 through 12 - &.is-#{$i}-touch - flex: none - width: ($i / 12) * 100% - &.is-offset-#{$i}-touch - margin-left: ($i / 12) * 100% - +desktop - &.is-narrow-desktop - flex: none - &.is-full-desktop - flex: none - width: 100% - &.is-three-quarters-desktop - flex: none - width: 75% - &.is-two-thirds-desktop - flex: none - width: 66.6666% - &.is-half-desktop - flex: none - width: 50% - &.is-one-third-desktop - flex: none - width: 33.3333% - &.is-one-quarter-desktop - flex: none - width: 25% - &.is-offset-three-quarters-desktop - margin-left: 75% - &.is-offset-two-thirds-desktop - margin-left: 66.6666% - &.is-offset-half-desktop - margin-left: 50% - &.is-offset-one-third-desktop - margin-left: 33.3333% - &.is-offset-one-quarter-desktop - margin-left: 25% - @for $i from 1 through 12 - &.is-#{$i}-desktop - flex: none - width: ($i / 12) * 100% - &.is-offset-#{$i}-desktop - margin-left: ($i / 12) * 100% - +widescreen - &.is-narrow-widescreen - flex: none - &.is-full-widescreen - flex: none - width: 100% - &.is-three-quarters-widescreen - flex: none - width: 75% - &.is-two-thirds-widescreen - flex: none - width: 66.6666% - &.is-half-widescreen - flex: none - width: 50% - &.is-one-third-widescreen - flex: none - width: 33.3333% - &.is-one-quarter-widescreen - flex: none - width: 25% - &.is-offset-three-quarters-widescreen - margin-left: 75% - &.is-offset-two-thirds-widescreen - margin-left: 66.6666% - &.is-offset-half-widescreen - margin-left: 50% - &.is-offset-one-third-widescreen - margin-left: 33.3333% - &.is-offset-one-quarter-widescreen - margin-left: 25% - @for $i from 1 through 12 - &.is-#{$i}-widescreen - flex: none - width: ($i / 12) * 100% - &.is-offset-#{$i}-widescreen - margin-left: ($i / 12) * 100% - +fullhd - &.is-narrow-fullhd - flex: none - &.is-full-fullhd - flex: none - width: 100% - &.is-three-quarters-fullhd - flex: none - width: 75% - &.is-two-thirds-fullhd - flex: none - width: 66.6666% - &.is-half-fullhd - flex: none - width: 50% - &.is-one-third-fullhd - flex: none - width: 33.3333% - &.is-one-quarter-fullhd - flex: none - width: 25% - &.is-offset-three-quarters-fullhd - margin-left: 75% - &.is-offset-two-thirds-fullhd - margin-left: 66.6666% - &.is-offset-half-fullhd - margin-left: 50% - &.is-offset-one-third-fullhd - margin-left: 33.3333% - &.is-offset-one-quarter-fullhd - margin-left: 25% - @for $i from 1 through 12 - &.is-#{$i}-fullhd - flex: none - width: ($i / 12) * 100% - &.is-offset-#{$i}-fullhd - margin-left: ($i / 12) * 100% - -.columns - margin-left: -0.75rem - margin-right: -0.75rem - margin-top: -0.75rem - &:last-child - margin-bottom: -0.75rem - &:not(:last-child) - margin-bottom: 0.75rem - // Modifiers - &.is-centered - justify-content: center - &.is-gapless - margin-left: 0 - margin-right: 0 - margin-top: 0 - &:last-child - margin-bottom: 0 - &:not(:last-child) - margin-bottom: 1.5rem - & > .column - margin: 0 - padding: 0 - &.is-grid - // Responsiveness - +tablet - flex-wrap: wrap - & > .column - max-width: 33.3333% - padding: 0.75rem - width: 33.3333% - & + .column - margin-left: 0 - &.is-mobile - display: flex - &.is-multiline - flex-wrap: wrap - &.is-vcentered - align-items: center - // Responsiveness - +tablet - &:not(.is-desktop) - display: flex - +desktop - // Modifiers - &.is-desktop - display: flex diff --git a/site/_sass/vendor/bulma/sass/grid/tiles.sass b/site/_sass/vendor/bulma/sass/grid/tiles.sass deleted file mode 100755 index 1f6a626fc6b..00000000000 --- a/site/_sass/vendor/bulma/sass/grid/tiles.sass +++ /dev/null @@ -1,32 +0,0 @@ -.tile - align-items: stretch - display: block - flex-basis: 0 - flex-grow: 1 - flex-shrink: 1 - min-height: min-content - // Modifiers - &.is-ancestor - margin-left: -0.75rem - margin-right: -0.75rem - margin-top: -0.75rem - &:last-child - margin-bottom: -0.75rem - &:not(:last-child) - margin-bottom: 0.75rem - &.is-child - margin: 0 !important - &.is-parent - padding: 0.75rem - &.is-vertical - flex-direction: column - & > .tile.is-child:not(:last-child) - margin-bottom: 1.5rem !important - // Responsiveness - +tablet - &:not(.is-child) - display: flex - @for $i from 1 through 12 - &.is-#{$i} - flex: none - width: ($i / 12) * 100% diff --git a/site/_sass/vendor/bulma/sass/layout/_all.sass b/site/_sass/vendor/bulma/sass/layout/_all.sass deleted file mode 100755 index 143ada35be8..00000000000 --- a/site/_sass/vendor/bulma/sass/layout/_all.sass +++ /dev/null @@ -1,5 +0,0 @@ -@charset "utf-8" - -@import "hero.sass" -@import "section.sass" -@import "footer.sass" diff --git a/site/_sass/vendor/bulma/sass/layout/footer.sass b/site/_sass/vendor/bulma/sass/layout/footer.sass deleted file mode 100755 index f8285ca4306..00000000000 --- a/site/_sass/vendor/bulma/sass/layout/footer.sass +++ /dev/null @@ -1,3 +0,0 @@ -.footer - background-color: $background - padding: 3rem 1.5rem 6rem diff --git a/site/_sass/vendor/bulma/sass/layout/hero.sass b/site/_sass/vendor/bulma/sass/layout/hero.sass deleted file mode 100755 index 9c694c6eca7..00000000000 --- a/site/_sass/vendor/bulma/sass/layout/hero.sass +++ /dev/null @@ -1,152 +0,0 @@ -// Components - -.hero-video - +overlay - overflow: hidden - video - left: 50% - min-height: 100% - min-width: 100% - position: absolute - top: 50% - transform: translate3d(-50%, -50%, 0) - // Modifiers - &.is-transparent - opacity: 0.3 - // Responsiveness - +mobile - display: none - -.hero-buttons - margin-top: 1.5rem - // Responsiveness - +mobile - .button - display: flex - &:not(:last-child) - margin-bottom: 0.75rem - +tablet - display: flex - justify-content: center - .button:not(:last-child) - margin-right: 1.5rem - -// Containers - -.hero-head, -.hero-foot - flex-grow: 0 - flex-shrink: 0 - -.hero-body - flex-grow: 1 - flex-shrink: 0 - padding: 3rem 1.5rem - -// Main container - -.hero - align-items: stretch - background-color: $white - display: flex - flex-direction: column - justify-content: space-between - .nav - background: none - box-shadow: 0 1px 0 rgba($border, 0.3) - .tabs - ul - border-bottom: none - // Colors - @each $name, $pair in $colors - $color: nth($pair, 1) - $color-invert: nth($pair, 2) - &.is-#{$name} - background-color: $color - color: $color-invert - a:not(.button), - strong - color: inherit - .title - color: $color-invert - .subtitle - color: rgba($color-invert, 0.9) - a:not(.button), - strong - color: $color-invert - .nav - box-shadow: 0 1px 0 rgba($color-invert, 0.2) - .nav-menu - +mobile - background-color: $color - a.nav-item, - .nav-item a:not(.button) - color: rgba($color-invert, 0.7) - &:hover, - &.is-active - color: $color-invert - .tabs - a - color: $color-invert - opacity: 0.9 - &:hover - opacity: 1 - li - &.is-active a - opacity: 1 - &.is-boxed, - &.is-toggle - a - color: $color-invert - &:hover - background-color: rgba($black, 0.1) - li.is-active a - &, - &:hover - background-color: $color-invert - border-color: $color-invert - color: $color - // Modifiers - &.is-bold - $gradient-top-left: darken(saturate(adjust-hue($color, -10deg), 10%), 10%) - $gradient-bottom-right: lighten(saturate(adjust-hue($color, 10deg), 5%), 5%) - background-image: linear-gradient(141deg, $gradient-top-left 0%, $color 71%, $gradient-bottom-right 100%) - +mobile - .nav-menu - background-image: linear-gradient(141deg, $gradient-top-left 0%, $color 71%, $gradient-bottom-right 100%) - // Responsiveness - +mobile - .nav-toggle - span - background-color: $color-invert - &:hover - background-color: rgba($black, 0.1) - &.is-active - span - background-color: $color-invert - .nav-menu - .nav-item - border-top-color: rgba($color-invert, 0.2) - // Sizes - &.is-medium - +tablet - .hero-body - padding-bottom: 9rem - padding-top: 9rem - &.is-large - +tablet - .hero-body - padding-bottom: 18rem - padding-top: 18rem - &.is-halfheight, - &.is-fullheight - .hero-body - align-items: center - display: flex - & > .container - flex-grow: 1 - flex-shrink: 1 - &.is-halfheight - min-height: 50vh - &.is-fullheight - min-height: 100vh diff --git a/site/_sass/vendor/bulma/sass/layout/section.sass b/site/_sass/vendor/bulma/sass/layout/section.sass deleted file mode 100755 index 7139cfb5fac..00000000000 --- a/site/_sass/vendor/bulma/sass/layout/section.sass +++ /dev/null @@ -1,10 +0,0 @@ -.section - background-color: $white - padding: 3rem 1.5rem - // Responsiveness - +desktop - // Sizes - &.is-medium - padding: 9rem 1.5rem - &.is-large - padding: 18rem 1.5rem diff --git a/site/_sass/vendor/bulma/sass/utilities/_all.sass b/site/_sass/vendor/bulma/sass/utilities/_all.sass deleted file mode 100755 index bf4ecfe3585..00000000000 --- a/site/_sass/vendor/bulma/sass/utilities/_all.sass +++ /dev/null @@ -1,8 +0,0 @@ -@charset "utf-8" - -@import "initial-variables.sass" -@import "functions.sass" -@import "derived-variables.sass" -@import "animations.sass" -@import "mixins.sass" -@import "controls.sass" diff --git a/site/_sass/vendor/bulma/sass/utilities/animations.sass b/site/_sass/vendor/bulma/sass/utilities/animations.sass deleted file mode 100755 index a14525d75ec..00000000000 --- a/site/_sass/vendor/bulma/sass/utilities/animations.sass +++ /dev/null @@ -1,5 +0,0 @@ -@keyframes spinAround - from - transform: rotate(0deg) - to - transform: rotate(359deg) diff --git a/site/_sass/vendor/bulma/sass/utilities/controls.sass b/site/_sass/vendor/bulma/sass/utilities/controls.sass deleted file mode 100755 index cb258df115b..00000000000 --- a/site/_sass/vendor/bulma/sass/utilities/controls.sass +++ /dev/null @@ -1,41 +0,0 @@ -$control-radius: $radius !default -$control-radius-small: $radius-small !default - -$control-padding-vertical: calc(0.375em - 1px) !default -$control-padding-horizontal: calc(0.625em - 1px) !default - -=control - -moz-appearance: none - -webkit-appearance: none - align-items: center - border: 1px solid transparent - border-radius: $control-radius - box-shadow: none - display: inline-flex - font-size: $size-normal - height: 2.25em - justify-content: flex-start - line-height: 1.5 - padding-bottom: $control-padding-vertical - padding-left: $control-padding-horizontal - padding-right: $control-padding-horizontal - padding-top: $control-padding-vertical - position: relative - vertical-align: top - // States - &:focus, - &.is-focused, - &:active, - &.is-active - outline: none - &[disabled] - cursor: not-allowed - -// The controls sizes use mixins so they can be used at different breakpoints -=control-small - border-radius: $control-radius-small - font-size: $size-small -=control-medium - font-size: $size-medium -=control-large - font-size: $size-large diff --git a/site/_sass/vendor/bulma/sass/utilities/derived-variables.sass b/site/_sass/vendor/bulma/sass/utilities/derived-variables.sass deleted file mode 100755 index 73e8a66bd9f..00000000000 --- a/site/_sass/vendor/bulma/sass/utilities/derived-variables.sass +++ /dev/null @@ -1,80 +0,0 @@ -$primary: $turquoise !default - -$info: $blue !default -$success: $green !default -$warning: $yellow !default -$danger: $red !default - -$light: $white-ter !default -$dark: $grey-darker !default - -// Invert colors - -$orange-invert: findColorInvert($orange) !default -$yellow-invert: findColorInvert($yellow) !default -$green-invert: findColorInvert($green) !default -$turquoise-invert: findColorInvert($turquoise) !default -$blue-invert: findColorInvert($blue) !default -$purple-invert: findColorInvert($purple) !default -$red-invert: findColorInvert($red) !default - -$primary-invert: $turquoise-invert !default -$info-invert: $blue-invert !default -$success-invert: $green-invert !default -$warning-invert: $yellow-invert !default -$danger-invert: $red-invert !default -$light-invert: $dark !default -$dark-invert: $light !default - -// General colors - -$background: $white-ter !default - -$border: $grey-lighter !default -$border-hover: $grey-light !default - -// Text colors - -$text: $grey-dark !default -$text-invert: findColorInvert($text) !default -$text-light: $grey !default -$text-strong: $grey-darker !default - -// Code colors - -$code: $red !default -$code-background: $background !default - -$pre: $text !default -$pre-background: $background !default - -// Link colors - -$link: $primary !default -$link-invert: $primary-invert !default -$link-visited: $purple !default - -$link-hover: $grey-darker !default -$link-hover-border: $grey-light !default - -$link-focus: $grey-darker !default -$link-focus-border: $primary !default - -$link-active: $grey-darker !default -$link-active-border: $grey-dark !default - -// Typography - -$family-primary: $family-sans-serif !default -$family-code: $family-monospace !default - -$size-small: $size-7 !default -$size-normal: $size-6 !default -$size-medium: $size-5 !default -$size-large: $size-4 !default - -// Lists and maps - -$colors: ("white": ($white, $black), "black": ($black, $white), "light": ($light, $light-invert), "dark": ($dark, $dark-invert), "primary": ($primary, $primary-invert), "info": ($info, $info-invert), "success": ($success, $success-invert), "warning": ($warning, $warning-invert), "danger": ($danger, $danger-invert)) !default - -$sizes: $size-1 $size-2 $size-3 $size-4 $size-5 $size-6 !default diff --git a/site/_sass/vendor/bulma/sass/utilities/functions.sass b/site/_sass/vendor/bulma/sass/utilities/functions.sass deleted file mode 100755 index e38d1727cd8..00000000000 --- a/site/_sass/vendor/bulma/sass/utilities/functions.sass +++ /dev/null @@ -1,28 +0,0 @@ -@function powerNumber($number, $exp) - $value: 1 - @if $exp > 0 - @for $i from 1 through $exp - $value: $value * $number - @else if $exp < 0 - @for $i from 1 through -$exp - $value: $value / $number - @return $value - -@function colorLuminance($color) - $color-rgb: ('red': red($color),'green': green($color),'blue': blue($color)) - @each $name, $value in $color-rgb - $adjusted: 0 - $value: $value / 255 - @if $value < 0.03928 - $value: $value / 12.92 - @else - $value: ($value + .055) / 1.055 - $value: powerNumber($value, 2) - $color-rgb: map-merge($color-rgb, ($name: $value)) - @return (map-get($color-rgb, 'red') * .2126) + (map-get($color-rgb, 'green') * .7152) + (map-get($color-rgb, 'blue') * .0722) - -@function findColorInvert($color) - @if (colorLuminance($color) > 0.55) - @return rgba(#000, 0.7) - @else - @return #fff diff --git a/site/_sass/vendor/bulma/sass/utilities/initial-variables.sass b/site/_sass/vendor/bulma/sass/utilities/initial-variables.sass deleted file mode 100755 index 9777d58b0d5..00000000000 --- a/site/_sass/vendor/bulma/sass/utilities/initial-variables.sass +++ /dev/null @@ -1,68 +0,0 @@ -// Colors - -$black: hsl(0, 0%, 4%) !default -$black-bis: hsl(0, 0%, 7%) !default -$black-ter: hsl(0, 0%, 14%) !default - -$grey-darker: hsl(0, 0%, 21%) !default -$grey-dark: hsl(0, 0%, 29%) !default -$grey: hsl(0, 0%, 48%) !default -$grey-light: hsl(0, 0%, 71%) !default -$grey-lighter: hsl(0, 0%, 86%) !default - -$white-ter: hsl(0, 0%, 96%) !default -$white-bis: hsl(0, 0%, 98%) !default -$white: hsl(0, 0%, 100%) !default - -$orange: hsl(14, 100%, 53%) !default -$yellow: hsl(48, 100%, 67%) !default -$green: hsl(141, 71%, 48%) !default -$turquoise: hsl(171, 100%, 41%) !default -$blue: hsl(217, 71%, 53%) !default -$purple: hsl(271, 100%, 71%) !default -$red: hsl(348, 100%, 61%) !default - -// Typography - -$family-sans-serif: BlinkMacSystemFont, -apple-system, "Segoe UI", "Roboto", "Oxygen", "Ubuntu", "Cantarell", "Fira Sans", "Droid Sans", "Helvetica Neue", "Helvetica", "Arial", sans-serif !default -$family-monospace: monospace !default -$render-mode: optimizeLegibility !default - -$size-1: 3rem !default -$size-2: 2.5rem !default -$size-3: 2rem !default -$size-4: 1.5rem !default -$size-5: 1.25rem !default -$size-6: 1rem !default -$size-7: 0.75rem !default - -$weight-light: 300 !default -$weight-normal: 400 !default -$weight-semibold: 500 !default -$weight-bold: 700 !default - -// Body - -$body-background: #fff !default -$body-size: 16px !default - -// Responsiveness - -// The container gap, which acts as the offset for breakpoints -$gap: 24px !default -// 960, 1152, and 1344 have been chosen because they are divisible by both 12 and 16 -$tablet: 769px !default -// 960px container + 3rem -$desktop: 960px + (2 * $gap) !default -// 1152px container + 3rem -$widescreen: 1152px + (2 * $gap) !default -// 1344px container + 3rem -$fullhd: 1344px + (2 * $gap) !default - -// Miscellaneous - -$easing: ease-out !default -$radius-small: 2px !default -$radius: 3px !default -$radius-large: 5px !default -$speed: 86ms !default diff --git a/site/_sass/vendor/bulma/sass/utilities/mixins.sass b/site/_sass/vendor/bulma/sass/utilities/mixins.sass deleted file mode 100755 index 5997c1a660b..00000000000 --- a/site/_sass/vendor/bulma/sass/utilities/mixins.sass +++ /dev/null @@ -1,225 +0,0 @@ -=arrow($color) - border: 1px solid $color - border-right: 0 - border-top: 0 - content: " " - display: block - height: 0.5em - pointer-events: none - position: absolute - transform: rotate(-45deg) - width: 0.5em - -=block - &:not(:last-child) - margin-bottom: 1.5rem - -=clearfix - &:after - clear: both - content: " " - display: table - -=center($width, $height: 0) - position: absolute - @if $height != 0 - left: calc(50% - (#{$width} / 2)) - top: calc(50% - (#{$height} / 2)) - @else - left: calc(50% - (#{$width} / 2)) - top: calc(50% - (#{$width} / 2)) - -=delete - +unselectable - -moz-appearance: none - -webkit-appearance: none - background-color: rgba($black, 0.2) - border: none - border-radius: 290486px - cursor: pointer - display: inline-block - flex-grow: 0 - flex-shrink: 0 - font-size: $size-normal - height: 20px - max-height: 20px - max-width: 20px - min-height: 20px - min-width: 20px - outline: none - position: relative - vertical-align: top - width: 20px - &:before, - &:after - background-color: $white - content: "" - display: block - left: 50% - position: absolute - top: 50% - transform: translateX(-50%) translateY(-50%) rotate(45deg) - transform-origin: center center - &:before - height: 2px - width: 50% - &:after - height: 50% - width: 2px - &:hover, - &:focus - background-color: rgba($black, 0.3) - &:active - background-color: rgba($black, 0.4) - // Sizes - &.is-small - height: 16px - max-height: 16px - max-width: 16px - min-height: 16px - min-width: 16px - width: 16px - &.is-medium - height: 24px - max-height: 24px - max-width: 24px - min-height: 24px - min-width: 24px - width: 24px - &.is-large - height: 32px - max-height: 32px - max-width: 32px - min-height: 32px - min-width: 32px - width: 32px - -=fa($size, $dimensions) - display: inline-block - font-size: $size - height: $dimensions - line-height: $dimensions - text-align: center - vertical-align: top - width: $dimensions - -=hamburger($dimensions) - cursor: pointer - display: block - height: $dimensions - position: relative - width: $dimensions - span - background-color: $text - display: block - height: 1px - left: 50% - margin-left: -7px - position: absolute - top: 50% - transition: none $speed $easing - transition-property: background, left, opacity, transform - width: 15px - &:nth-child(1) - margin-top: -6px - &:nth-child(2) - margin-top: -1px - &:nth-child(3) - margin-top: 4px - &:hover - background-color: $background - // Modifers - &.is-active - span - background-color: $link - &:nth-child(1) - margin-left: -5px - transform: rotate(45deg) - transform-origin: left top - &:nth-child(2) - opacity: 0 - &:nth-child(3) - margin-left: -5px - transform: rotate(-45deg) - transform-origin: left bottom - -=loader - animation: spinAround 500ms infinite linear - border: 2px solid $border - border-radius: 290486px - border-right-color: transparent - border-top-color: transparent - content: "" - display: block - height: 1em - position: relative - width: 1em - -=overflow-touch - -webkit-overflow-scrolling: touch - -=overlay($offset: 0) - bottom: $offset - left: $offset - position: absolute - right: $offset - top: $offset - -=placeholder - $placeholders: ':-moz' ':-webkit-input' '-moz' '-ms-input' - @each $placeholder in $placeholders - &:#{$placeholder}-placeholder - @content - -=unselectable - -webkit-touch-callout: none - -webkit-user-select: none - -moz-user-select: none - -ms-user-select: none - user-select: none - -// Responsiveness - -=from($device) - @media screen and (min-width: $device) - @content - -=until($device) - @media screen and (max-width: $device - 1px) - @content - -=mobile - @media screen and (max-width: $tablet - 1px) - @content - -=tablet - @media screen and (min-width: $tablet), print - @content - -=tablet-only - @media screen and (min-width: $tablet) and (max-width: $desktop - 1px) - @content - -=touch - @media screen and (max-width: $desktop - 1px) - @content - -=desktop - @media screen and (min-width: $desktop) - @content - -=desktop-only - @media screen and (min-width: $desktop) and (max-width: $widescreen - 1px) - @content - -=widescreen - @media screen and (min-width: $widescreen) - @content - -=widescreen-only - @media screen and (min-width: $widescreen) and (max-width: $fullhd - 1px) - @content - -=fullhd - @media screen and (min-width: $fullhd) - @content diff --git a/site/_sass/vendor/bulma/sass/utilities/variables.sass b/site/_sass/vendor/bulma/sass/utilities/variables.sass deleted file mode 100755 index 0a9b9cb86df..00000000000 --- a/site/_sass/vendor/bulma/sass/utilities/variables.sass +++ /dev/null @@ -1,150 +0,0 @@ -//////////////////////////////////////////////// -//////////////////////////////////////////////// -// 1. Initial variables - -// Colors -$black: hsl(0, 0%, 4%) !default -$black-bis: hsl(0, 0%, 7%) !default -$black-ter: hsl(0, 0%, 14%) !default - -$grey-darker: hsl(0, 0%, 21%) !default -$grey-dark: hsl(0, 0%, 29%) !default -$grey: hsl(0, 0%, 48%) !default -$grey-light: hsl(0, 0%, 71%) !default -$grey-lighter: hsl(0, 0%, 86%) !default - -$white-ter: hsl(0, 0%, 96%) !default -$white-bis: hsl(0, 0%, 98%) !default -$white: hsl(0, 0%, 100%) !default - -$orange: hsl(14, 100%, 53%) !default -$yellow: hsl(48, 100%, 67%) !default -$green: hsl(141, 71%, 48%) !default -$turquoise: hsl(171, 100%, 41%) !default -$blue: hsl(217, 71%, 53%) !default -$purple: hsl(271, 100%, 71%) !default -$red: hsl(348, 100%, 61%) !default - -// Typography -$family-sans-serif: BlinkMacSystemFont, -apple-system, "Segoe UI", "Roboto", "Oxygen", "Ubuntu", "Cantarell", "Fira Sans", "Droid Sans", "Helvetica Neue", "Helvetica", "Arial", sans-serif !default -$family-monospace: monospace !default -$render-mode: optimizeLegibility !default - -$size-1: 3rem !default -$size-2: 2.5rem !default -$size-3: 2rem !default -$size-4: 1.5rem !default -$size-5: 1.25rem !default -$size-6: 1rem !default -$size-7: 0.75rem !default - -$weight-light: 300 !default -$weight-normal: 400 !default -$weight-semibold: 500 !default -$weight-bold: 700 !default - -// Body -$body-background: #fff !default -$body-size: 16px !default - -// Responsiveness -// 960, 1152, and 1344 have been chosen because they are divisible by both 12 and 16 -$tablet: 769px !default -// 960px container + 40px -$desktop: 1000px !default -// 1152px container + 40 -$widescreen: 1192px !default -// 1344px container + 40 -$fullhd: 1384px !default - -// Miscellaneous -$easing: ease-out !default -$radius-small: 2px !default -$radius: 3px !default -$radius-large: 5px !default -$speed: 86ms !default - -//////////////////////////////////////////////// -//////////////////////////////////////////////// -// 2. Primary colors - -$primary: $turquoise !default - -$info: $blue !default -$success: $green !default -$warning: $yellow !default -$danger: $red !default - -$light: $white-ter !default -$dark: $grey-darker !default - -//////////////////////////////////////////////// -//////////////////////////////////////////////// -// 3. Applied variables - -// Invert colors -$orange-invert: findColorInvert($orange) !default -$yellow-invert: findColorInvert($yellow) !default -$green-invert: findColorInvert($green) !default -$turquoise-invert: findColorInvert($turquoise) !default -$blue-invert: findColorInvert($blue) !default -$purple-invert: findColorInvert($purple) !default -$red-invert: findColorInvert($red) !default - -$primary-invert: $turquoise-invert !default -$info-invert: $blue-invert !default -$success-invert: $green-invert !default -$warning-invert: $yellow-invert !default -$danger-invert: $red-invert !default -$light-invert: $dark !default -$dark-invert: $light !default - -// General colors -$background: $white-ter !default - -$border: $grey-lighter !default -$border-hover: $grey-light !default - -// Text colors -$text: $grey-dark !default -$text-invert: findColorInvert($text) !default -$text-light: $grey !default -$text-strong: $grey-darker !default - -// Code colors -$code: $red !default -$code-background: $background !default - -$pre: $text !default -$pre-background: $background !default - -// Link colors -$link: $primary !default -$link-invert: $primary-invert !default -$link-visited: $purple !default - -$link-hover: $grey-darker !default -$link-hover-border: $grey-light !default - -$link-focus: $grey-darker !default -$link-focus-border: $primary !default - -$link-active: $grey-darker !default -$link-active-border: $grey-dark !default - -// Typography -$family-primary: $family-sans-serif !default -$family-code: $family-monospace !default - -$size-small: $size-7 !default -$size-normal: $size-6 !default -$size-medium: $size-5 !default -$size-large: $size-4 !default - -//////////////////////////////////////////////// -//////////////////////////////////////////////// -// 4. Lists and maps - -$colors: ("white": ($white, $black), "black": ($black, $white), "light": ($light, $light-invert), "dark": ($dark, $dark-invert), "primary": ($primary, $primary-invert), "info": ($info, $info-invert), "success": ($success, $success-invert), "warning": ($warning, $warning-invert), "danger": ($danger, $danger-invert)) !default - -$sizes: $size-1 $size-2 $size-3 $size-4 $size-5 $size-6 !default diff --git a/site/_sass/vendor/font-awesome/_animated.scss b/site/_sass/vendor/font-awesome/_animated.scss deleted file mode 100644 index 8a020dbfff7..00000000000 --- a/site/_sass/vendor/font-awesome/_animated.scss +++ /dev/null @@ -1,34 +0,0 @@ -// Spinning Icons -// -------------------------- - -.#{$fa-css-prefix}-spin { - -webkit-animation: fa-spin 2s infinite linear; - animation: fa-spin 2s infinite linear; -} - -.#{$fa-css-prefix}-pulse { - -webkit-animation: fa-spin 1s infinite steps(8); - animation: fa-spin 1s infinite steps(8); -} - -@-webkit-keyframes fa-spin { - 0% { - -webkit-transform: rotate(0deg); - transform: rotate(0deg); - } - 100% { - -webkit-transform: rotate(359deg); - transform: rotate(359deg); - } -} - -@keyframes fa-spin { - 0% { - -webkit-transform: rotate(0deg); - transform: rotate(0deg); - } - 100% { - -webkit-transform: rotate(359deg); - transform: rotate(359deg); - } -} diff --git a/site/_sass/vendor/font-awesome/_bordered-pulled.scss b/site/_sass/vendor/font-awesome/_bordered-pulled.scss deleted file mode 100644 index d4b85a02f24..00000000000 --- a/site/_sass/vendor/font-awesome/_bordered-pulled.scss +++ /dev/null @@ -1,25 +0,0 @@ -// Bordered & Pulled -// ------------------------- - -.#{$fa-css-prefix}-border { - padding: .2em .25em .15em; - border: solid .08em $fa-border-color; - border-radius: .1em; -} - -.#{$fa-css-prefix}-pull-left { float: left; } -.#{$fa-css-prefix}-pull-right { float: right; } - -.#{$fa-css-prefix} { - &.#{$fa-css-prefix}-pull-left { margin-right: .3em; } - &.#{$fa-css-prefix}-pull-right { margin-left: .3em; } -} - -/* Deprecated as of 4.4.0 */ -.pull-right { float: right; } -.pull-left { float: left; } - -.#{$fa-css-prefix} { - &.pull-left { margin-right: .3em; } - &.pull-right { margin-left: .3em; } -} diff --git a/site/_sass/vendor/font-awesome/_core.scss b/site/_sass/vendor/font-awesome/_core.scss deleted file mode 100644 index 7425ef85fc8..00000000000 --- a/site/_sass/vendor/font-awesome/_core.scss +++ /dev/null @@ -1,12 +0,0 @@ -// Base Class Definition -// ------------------------- - -.#{$fa-css-prefix} { - display: inline-block; - font: normal normal normal #{$fa-font-size-base}/#{$fa-line-height-base} FontAwesome; // shortening font declaration - font-size: inherit; // can't have font-size inherit on line above, so need to override - text-rendering: auto; // optimizelegibility throws things off #1094 - -webkit-font-smoothing: antialiased; - -moz-osx-font-smoothing: grayscale; - -} diff --git a/site/_sass/vendor/font-awesome/_fixed-width.scss b/site/_sass/vendor/font-awesome/_fixed-width.scss deleted file mode 100644 index b221c98133a..00000000000 --- a/site/_sass/vendor/font-awesome/_fixed-width.scss +++ /dev/null @@ -1,6 +0,0 @@ -// Fixed Width Icons -// ------------------------- -.#{$fa-css-prefix}-fw { - width: (18em / 14); - text-align: center; -} diff --git a/site/_sass/vendor/font-awesome/_icons.scss b/site/_sass/vendor/font-awesome/_icons.scss deleted file mode 100644 index e63e702c4d9..00000000000 --- a/site/_sass/vendor/font-awesome/_icons.scss +++ /dev/null @@ -1,789 +0,0 @@ -/* Font Awesome uses the Unicode Private Use Area (PUA) to ensure screen - readers do not read off random characters that represent icons */ - -.#{$fa-css-prefix}-glass:before { content: $fa-var-glass; } -.#{$fa-css-prefix}-music:before { content: $fa-var-music; } -.#{$fa-css-prefix}-search:before { content: $fa-var-search; } -.#{$fa-css-prefix}-envelope-o:before { content: $fa-var-envelope-o; } -.#{$fa-css-prefix}-heart:before { content: $fa-var-heart; } -.#{$fa-css-prefix}-star:before { content: $fa-var-star; } -.#{$fa-css-prefix}-star-o:before { content: $fa-var-star-o; } -.#{$fa-css-prefix}-user:before { content: $fa-var-user; } -.#{$fa-css-prefix}-film:before { content: $fa-var-film; } -.#{$fa-css-prefix}-th-large:before { content: $fa-var-th-large; } -.#{$fa-css-prefix}-th:before { content: $fa-var-th; } -.#{$fa-css-prefix}-th-list:before { content: $fa-var-th-list; } -.#{$fa-css-prefix}-check:before { content: $fa-var-check; } -.#{$fa-css-prefix}-remove:before, -.#{$fa-css-prefix}-close:before, -.#{$fa-css-prefix}-times:before { content: $fa-var-times; } -.#{$fa-css-prefix}-search-plus:before { content: $fa-var-search-plus; } -.#{$fa-css-prefix}-search-minus:before { content: $fa-var-search-minus; } -.#{$fa-css-prefix}-power-off:before { content: $fa-var-power-off; } -.#{$fa-css-prefix}-signal:before { content: $fa-var-signal; } -.#{$fa-css-prefix}-gear:before, -.#{$fa-css-prefix}-cog:before { content: $fa-var-cog; } -.#{$fa-css-prefix}-trash-o:before { content: $fa-var-trash-o; } -.#{$fa-css-prefix}-home:before { content: $fa-var-home; } -.#{$fa-css-prefix}-file-o:before { content: $fa-var-file-o; } -.#{$fa-css-prefix}-clock-o:before { content: $fa-var-clock-o; } -.#{$fa-css-prefix}-road:before { content: $fa-var-road; } -.#{$fa-css-prefix}-download:before { content: $fa-var-download; } -.#{$fa-css-prefix}-arrow-circle-o-down:before { content: $fa-var-arrow-circle-o-down; } -.#{$fa-css-prefix}-arrow-circle-o-up:before { content: $fa-var-arrow-circle-o-up; } -.#{$fa-css-prefix}-inbox:before { content: $fa-var-inbox; } -.#{$fa-css-prefix}-play-circle-o:before { content: $fa-var-play-circle-o; } -.#{$fa-css-prefix}-rotate-right:before, -.#{$fa-css-prefix}-repeat:before { content: $fa-var-repeat; } -.#{$fa-css-prefix}-refresh:before { content: $fa-var-refresh; } -.#{$fa-css-prefix}-list-alt:before { content: $fa-var-list-alt; } -.#{$fa-css-prefix}-lock:before { content: $fa-var-lock; } -.#{$fa-css-prefix}-flag:before { content: $fa-var-flag; } -.#{$fa-css-prefix}-headphones:before { content: $fa-var-headphones; } -.#{$fa-css-prefix}-volume-off:before { content: $fa-var-volume-off; } -.#{$fa-css-prefix}-volume-down:before { content: $fa-var-volume-down; } -.#{$fa-css-prefix}-volume-up:before { content: $fa-var-volume-up; } -.#{$fa-css-prefix}-qrcode:before { content: $fa-var-qrcode; } -.#{$fa-css-prefix}-barcode:before { content: $fa-var-barcode; } -.#{$fa-css-prefix}-tag:before { content: $fa-var-tag; } -.#{$fa-css-prefix}-tags:before { content: $fa-var-tags; } -.#{$fa-css-prefix}-book:before { content: $fa-var-book; } -.#{$fa-css-prefix}-bookmark:before { content: $fa-var-bookmark; } -.#{$fa-css-prefix}-print:before { content: $fa-var-print; } -.#{$fa-css-prefix}-camera:before { content: $fa-var-camera; } -.#{$fa-css-prefix}-font:before { content: $fa-var-font; } -.#{$fa-css-prefix}-bold:before { content: $fa-var-bold; } -.#{$fa-css-prefix}-italic:before { content: $fa-var-italic; } -.#{$fa-css-prefix}-text-height:before { content: $fa-var-text-height; } -.#{$fa-css-prefix}-text-width:before { content: $fa-var-text-width; } -.#{$fa-css-prefix}-align-left:before { content: $fa-var-align-left; } -.#{$fa-css-prefix}-align-center:before { content: $fa-var-align-center; } -.#{$fa-css-prefix}-align-right:before { content: $fa-var-align-right; } -.#{$fa-css-prefix}-align-justify:before { content: $fa-var-align-justify; } -.#{$fa-css-prefix}-list:before { content: $fa-var-list; } -.#{$fa-css-prefix}-dedent:before, -.#{$fa-css-prefix}-outdent:before { content: $fa-var-outdent; } -.#{$fa-css-prefix}-indent:before { content: $fa-var-indent; } -.#{$fa-css-prefix}-video-camera:before { content: $fa-var-video-camera; } -.#{$fa-css-prefix}-photo:before, -.#{$fa-css-prefix}-image:before, -.#{$fa-css-prefix}-picture-o:before { content: $fa-var-picture-o; } -.#{$fa-css-prefix}-pencil:before { content: $fa-var-pencil; } -.#{$fa-css-prefix}-map-marker:before { content: $fa-var-map-marker; } -.#{$fa-css-prefix}-adjust:before { content: $fa-var-adjust; } -.#{$fa-css-prefix}-tint:before { content: $fa-var-tint; } -.#{$fa-css-prefix}-edit:before, -.#{$fa-css-prefix}-pencil-square-o:before { content: $fa-var-pencil-square-o; } -.#{$fa-css-prefix}-share-square-o:before { content: $fa-var-share-square-o; } -.#{$fa-css-prefix}-check-square-o:before { content: $fa-var-check-square-o; } -.#{$fa-css-prefix}-arrows:before { content: $fa-var-arrows; } -.#{$fa-css-prefix}-step-backward:before { content: $fa-var-step-backward; } -.#{$fa-css-prefix}-fast-backward:before { content: $fa-var-fast-backward; } -.#{$fa-css-prefix}-backward:before { content: $fa-var-backward; } -.#{$fa-css-prefix}-play:before { content: $fa-var-play; } -.#{$fa-css-prefix}-pause:before { content: $fa-var-pause; } -.#{$fa-css-prefix}-stop:before { content: $fa-var-stop; } -.#{$fa-css-prefix}-forward:before { content: $fa-var-forward; } -.#{$fa-css-prefix}-fast-forward:before { content: $fa-var-fast-forward; } -.#{$fa-css-prefix}-step-forward:before { content: $fa-var-step-forward; } -.#{$fa-css-prefix}-eject:before { content: $fa-var-eject; } -.#{$fa-css-prefix}-chevron-left:before { content: $fa-var-chevron-left; } -.#{$fa-css-prefix}-chevron-right:before { content: $fa-var-chevron-right; } -.#{$fa-css-prefix}-plus-circle:before { content: $fa-var-plus-circle; } -.#{$fa-css-prefix}-minus-circle:before { content: $fa-var-minus-circle; } -.#{$fa-css-prefix}-times-circle:before { content: $fa-var-times-circle; } -.#{$fa-css-prefix}-check-circle:before { content: $fa-var-check-circle; } -.#{$fa-css-prefix}-question-circle:before { content: $fa-var-question-circle; } -.#{$fa-css-prefix}-info-circle:before { content: $fa-var-info-circle; } -.#{$fa-css-prefix}-crosshairs:before { content: $fa-var-crosshairs; } -.#{$fa-css-prefix}-times-circle-o:before { content: $fa-var-times-circle-o; } -.#{$fa-css-prefix}-check-circle-o:before { content: $fa-var-check-circle-o; } -.#{$fa-css-prefix}-ban:before { content: $fa-var-ban; } -.#{$fa-css-prefix}-arrow-left:before { content: $fa-var-arrow-left; } -.#{$fa-css-prefix}-arrow-right:before { content: $fa-var-arrow-right; } -.#{$fa-css-prefix}-arrow-up:before { content: $fa-var-arrow-up; } -.#{$fa-css-prefix}-arrow-down:before { content: $fa-var-arrow-down; } -.#{$fa-css-prefix}-mail-forward:before, -.#{$fa-css-prefix}-share:before { content: $fa-var-share; } -.#{$fa-css-prefix}-expand:before { content: $fa-var-expand; } -.#{$fa-css-prefix}-compress:before { content: $fa-var-compress; } -.#{$fa-css-prefix}-plus:before { content: $fa-var-plus; } -.#{$fa-css-prefix}-minus:before { content: $fa-var-minus; } -.#{$fa-css-prefix}-asterisk:before { content: $fa-var-asterisk; } -.#{$fa-css-prefix}-exclamation-circle:before { content: $fa-var-exclamation-circle; } -.#{$fa-css-prefix}-gift:before { content: $fa-var-gift; } -.#{$fa-css-prefix}-leaf:before { content: $fa-var-leaf; } -.#{$fa-css-prefix}-fire:before { content: $fa-var-fire; } -.#{$fa-css-prefix}-eye:before { content: $fa-var-eye; } -.#{$fa-css-prefix}-eye-slash:before { content: $fa-var-eye-slash; } -.#{$fa-css-prefix}-warning:before, -.#{$fa-css-prefix}-exclamation-triangle:before { content: $fa-var-exclamation-triangle; } -.#{$fa-css-prefix}-plane:before { content: $fa-var-plane; } -.#{$fa-css-prefix}-calendar:before { content: $fa-var-calendar; } -.#{$fa-css-prefix}-random:before { content: $fa-var-random; } -.#{$fa-css-prefix}-comment:before { content: $fa-var-comment; } -.#{$fa-css-prefix}-magnet:before { content: $fa-var-magnet; } -.#{$fa-css-prefix}-chevron-up:before { content: $fa-var-chevron-up; } -.#{$fa-css-prefix}-chevron-down:before { content: $fa-var-chevron-down; } -.#{$fa-css-prefix}-retweet:before { content: $fa-var-retweet; } -.#{$fa-css-prefix}-shopping-cart:before { content: $fa-var-shopping-cart; } -.#{$fa-css-prefix}-folder:before { content: $fa-var-folder; } -.#{$fa-css-prefix}-folder-open:before { content: $fa-var-folder-open; } -.#{$fa-css-prefix}-arrows-v:before { content: $fa-var-arrows-v; } -.#{$fa-css-prefix}-arrows-h:before { content: $fa-var-arrows-h; } -.#{$fa-css-prefix}-bar-chart-o:before, -.#{$fa-css-prefix}-bar-chart:before { content: $fa-var-bar-chart; } -.#{$fa-css-prefix}-twitter-square:before { content: $fa-var-twitter-square; } -.#{$fa-css-prefix}-facebook-square:before { content: $fa-var-facebook-square; } -.#{$fa-css-prefix}-camera-retro:before { content: $fa-var-camera-retro; } -.#{$fa-css-prefix}-key:before { content: $fa-var-key; } -.#{$fa-css-prefix}-gears:before, -.#{$fa-css-prefix}-cogs:before { content: $fa-var-cogs; } -.#{$fa-css-prefix}-comments:before { content: $fa-var-comments; } -.#{$fa-css-prefix}-thumbs-o-up:before { content: $fa-var-thumbs-o-up; } -.#{$fa-css-prefix}-thumbs-o-down:before { content: $fa-var-thumbs-o-down; } -.#{$fa-css-prefix}-star-half:before { content: $fa-var-star-half; } -.#{$fa-css-prefix}-heart-o:before { content: $fa-var-heart-o; } -.#{$fa-css-prefix}-sign-out:before { content: $fa-var-sign-out; } -.#{$fa-css-prefix}-linkedin-square:before { content: $fa-var-linkedin-square; } -.#{$fa-css-prefix}-thumb-tack:before { content: $fa-var-thumb-tack; } -.#{$fa-css-prefix}-external-link:before { content: $fa-var-external-link; } -.#{$fa-css-prefix}-sign-in:before { content: $fa-var-sign-in; } -.#{$fa-css-prefix}-trophy:before { content: $fa-var-trophy; } -.#{$fa-css-prefix}-github-square:before { content: $fa-var-github-square; } -.#{$fa-css-prefix}-upload:before { content: $fa-var-upload; } -.#{$fa-css-prefix}-lemon-o:before { content: $fa-var-lemon-o; } -.#{$fa-css-prefix}-phone:before { content: $fa-var-phone; } -.#{$fa-css-prefix}-square-o:before { content: $fa-var-square-o; } -.#{$fa-css-prefix}-bookmark-o:before { content: $fa-var-bookmark-o; } -.#{$fa-css-prefix}-phone-square:before { content: $fa-var-phone-square; } -.#{$fa-css-prefix}-twitter:before { content: $fa-var-twitter; } -.#{$fa-css-prefix}-facebook-f:before, -.#{$fa-css-prefix}-facebook:before { content: $fa-var-facebook; } -.#{$fa-css-prefix}-github:before { content: $fa-var-github; } -.#{$fa-css-prefix}-unlock:before { content: $fa-var-unlock; } -.#{$fa-css-prefix}-credit-card:before { content: $fa-var-credit-card; } -.#{$fa-css-prefix}-feed:before, -.#{$fa-css-prefix}-rss:before { content: $fa-var-rss; } -.#{$fa-css-prefix}-hdd-o:before { content: $fa-var-hdd-o; } -.#{$fa-css-prefix}-bullhorn:before { content: $fa-var-bullhorn; } -.#{$fa-css-prefix}-bell:before { content: $fa-var-bell; } -.#{$fa-css-prefix}-certificate:before { content: $fa-var-certificate; } -.#{$fa-css-prefix}-hand-o-right:before { content: $fa-var-hand-o-right; } -.#{$fa-css-prefix}-hand-o-left:before { content: $fa-var-hand-o-left; } -.#{$fa-css-prefix}-hand-o-up:before { content: $fa-var-hand-o-up; } -.#{$fa-css-prefix}-hand-o-down:before { content: $fa-var-hand-o-down; } -.#{$fa-css-prefix}-arrow-circle-left:before { content: $fa-var-arrow-circle-left; } -.#{$fa-css-prefix}-arrow-circle-right:before { content: $fa-var-arrow-circle-right; } -.#{$fa-css-prefix}-arrow-circle-up:before { content: $fa-var-arrow-circle-up; } -.#{$fa-css-prefix}-arrow-circle-down:before { content: $fa-var-arrow-circle-down; } -.#{$fa-css-prefix}-globe:before { content: $fa-var-globe; } -.#{$fa-css-prefix}-wrench:before { content: $fa-var-wrench; } -.#{$fa-css-prefix}-tasks:before { content: $fa-var-tasks; } -.#{$fa-css-prefix}-filter:before { content: $fa-var-filter; } -.#{$fa-css-prefix}-briefcase:before { content: $fa-var-briefcase; } -.#{$fa-css-prefix}-arrows-alt:before { content: $fa-var-arrows-alt; } -.#{$fa-css-prefix}-group:before, -.#{$fa-css-prefix}-users:before { content: $fa-var-users; } -.#{$fa-css-prefix}-chain:before, -.#{$fa-css-prefix}-link:before { content: $fa-var-link; } -.#{$fa-css-prefix}-cloud:before { content: $fa-var-cloud; } -.#{$fa-css-prefix}-flask:before { content: $fa-var-flask; } -.#{$fa-css-prefix}-cut:before, -.#{$fa-css-prefix}-scissors:before { content: $fa-var-scissors; } -.#{$fa-css-prefix}-copy:before, -.#{$fa-css-prefix}-files-o:before { content: $fa-var-files-o; } -.#{$fa-css-prefix}-paperclip:before { content: $fa-var-paperclip; } -.#{$fa-css-prefix}-save:before, -.#{$fa-css-prefix}-floppy-o:before { content: $fa-var-floppy-o; } -.#{$fa-css-prefix}-square:before { content: $fa-var-square; } -.#{$fa-css-prefix}-navicon:before, -.#{$fa-css-prefix}-reorder:before, -.#{$fa-css-prefix}-bars:before { content: $fa-var-bars; } -.#{$fa-css-prefix}-list-ul:before { content: $fa-var-list-ul; } -.#{$fa-css-prefix}-list-ol:before { content: $fa-var-list-ol; } -.#{$fa-css-prefix}-strikethrough:before { content: $fa-var-strikethrough; } -.#{$fa-css-prefix}-underline:before { content: $fa-var-underline; } -.#{$fa-css-prefix}-table:before { content: $fa-var-table; } -.#{$fa-css-prefix}-magic:before { content: $fa-var-magic; } -.#{$fa-css-prefix}-truck:before { content: $fa-var-truck; } -.#{$fa-css-prefix}-pinterest:before { content: $fa-var-pinterest; } -.#{$fa-css-prefix}-pinterest-square:before { content: $fa-var-pinterest-square; } -.#{$fa-css-prefix}-google-plus-square:before { content: $fa-var-google-plus-square; } -.#{$fa-css-prefix}-google-plus:before { content: $fa-var-google-plus; } -.#{$fa-css-prefix}-money:before { content: $fa-var-money; } -.#{$fa-css-prefix}-caret-down:before { content: $fa-var-caret-down; } -.#{$fa-css-prefix}-caret-up:before { content: $fa-var-caret-up; } -.#{$fa-css-prefix}-caret-left:before { content: $fa-var-caret-left; } -.#{$fa-css-prefix}-caret-right:before { content: $fa-var-caret-right; } -.#{$fa-css-prefix}-columns:before { content: $fa-var-columns; } -.#{$fa-css-prefix}-unsorted:before, -.#{$fa-css-prefix}-sort:before { content: $fa-var-sort; } -.#{$fa-css-prefix}-sort-down:before, -.#{$fa-css-prefix}-sort-desc:before { content: $fa-var-sort-desc; } -.#{$fa-css-prefix}-sort-up:before, -.#{$fa-css-prefix}-sort-asc:before { content: $fa-var-sort-asc; } -.#{$fa-css-prefix}-envelope:before { content: $fa-var-envelope; } -.#{$fa-css-prefix}-linkedin:before { content: $fa-var-linkedin; } -.#{$fa-css-prefix}-rotate-left:before, -.#{$fa-css-prefix}-undo:before { content: $fa-var-undo; } -.#{$fa-css-prefix}-legal:before, -.#{$fa-css-prefix}-gavel:before { content: $fa-var-gavel; } -.#{$fa-css-prefix}-dashboard:before, -.#{$fa-css-prefix}-tachometer:before { content: $fa-var-tachometer; } -.#{$fa-css-prefix}-comment-o:before { content: $fa-var-comment-o; } -.#{$fa-css-prefix}-comments-o:before { content: $fa-var-comments-o; } -.#{$fa-css-prefix}-flash:before, -.#{$fa-css-prefix}-bolt:before { content: $fa-var-bolt; } -.#{$fa-css-prefix}-sitemap:before { content: $fa-var-sitemap; } -.#{$fa-css-prefix}-umbrella:before { content: $fa-var-umbrella; } -.#{$fa-css-prefix}-paste:before, -.#{$fa-css-prefix}-clipboard:before { content: $fa-var-clipboard; } -.#{$fa-css-prefix}-lightbulb-o:before { content: $fa-var-lightbulb-o; } -.#{$fa-css-prefix}-exchange:before { content: $fa-var-exchange; } -.#{$fa-css-prefix}-cloud-download:before { content: $fa-var-cloud-download; } -.#{$fa-css-prefix}-cloud-upload:before { content: $fa-var-cloud-upload; } -.#{$fa-css-prefix}-user-md:before { content: $fa-var-user-md; } -.#{$fa-css-prefix}-stethoscope:before { content: $fa-var-stethoscope; } -.#{$fa-css-prefix}-suitcase:before { content: $fa-var-suitcase; } -.#{$fa-css-prefix}-bell-o:before { content: $fa-var-bell-o; } -.#{$fa-css-prefix}-coffee:before { content: $fa-var-coffee; } -.#{$fa-css-prefix}-cutlery:before { content: $fa-var-cutlery; } -.#{$fa-css-prefix}-file-text-o:before { content: $fa-var-file-text-o; } -.#{$fa-css-prefix}-building-o:before { content: $fa-var-building-o; } -.#{$fa-css-prefix}-hospital-o:before { content: $fa-var-hospital-o; } -.#{$fa-css-prefix}-ambulance:before { content: $fa-var-ambulance; } -.#{$fa-css-prefix}-medkit:before { content: $fa-var-medkit; } -.#{$fa-css-prefix}-fighter-jet:before { content: $fa-var-fighter-jet; } -.#{$fa-css-prefix}-beer:before { content: $fa-var-beer; } -.#{$fa-css-prefix}-h-square:before { content: $fa-var-h-square; } -.#{$fa-css-prefix}-plus-square:before { content: $fa-var-plus-square; } -.#{$fa-css-prefix}-angle-double-left:before { content: $fa-var-angle-double-left; } -.#{$fa-css-prefix}-angle-double-right:before { content: $fa-var-angle-double-right; } -.#{$fa-css-prefix}-angle-double-up:before { content: $fa-var-angle-double-up; } -.#{$fa-css-prefix}-angle-double-down:before { content: $fa-var-angle-double-down; } -.#{$fa-css-prefix}-angle-left:before { content: $fa-var-angle-left; } -.#{$fa-css-prefix}-angle-right:before { content: $fa-var-angle-right; } -.#{$fa-css-prefix}-angle-up:before { content: $fa-var-angle-up; } -.#{$fa-css-prefix}-angle-down:before { content: $fa-var-angle-down; } -.#{$fa-css-prefix}-desktop:before { content: $fa-var-desktop; } -.#{$fa-css-prefix}-laptop:before { content: $fa-var-laptop; } -.#{$fa-css-prefix}-tablet:before { content: $fa-var-tablet; } -.#{$fa-css-prefix}-mobile-phone:before, -.#{$fa-css-prefix}-mobile:before { content: $fa-var-mobile; } -.#{$fa-css-prefix}-circle-o:before { content: $fa-var-circle-o; } -.#{$fa-css-prefix}-quote-left:before { content: $fa-var-quote-left; } -.#{$fa-css-prefix}-quote-right:before { content: $fa-var-quote-right; } -.#{$fa-css-prefix}-spinner:before { content: $fa-var-spinner; } -.#{$fa-css-prefix}-circle:before { content: $fa-var-circle; } -.#{$fa-css-prefix}-mail-reply:before, -.#{$fa-css-prefix}-reply:before { content: $fa-var-reply; } -.#{$fa-css-prefix}-github-alt:before { content: $fa-var-github-alt; } -.#{$fa-css-prefix}-folder-o:before { content: $fa-var-folder-o; } -.#{$fa-css-prefix}-folder-open-o:before { content: $fa-var-folder-open-o; } -.#{$fa-css-prefix}-smile-o:before { content: $fa-var-smile-o; } -.#{$fa-css-prefix}-frown-o:before { content: $fa-var-frown-o; } -.#{$fa-css-prefix}-meh-o:before { content: $fa-var-meh-o; } -.#{$fa-css-prefix}-gamepad:before { content: $fa-var-gamepad; } -.#{$fa-css-prefix}-keyboard-o:before { content: $fa-var-keyboard-o; } -.#{$fa-css-prefix}-flag-o:before { content: $fa-var-flag-o; } -.#{$fa-css-prefix}-flag-checkered:before { content: $fa-var-flag-checkered; } -.#{$fa-css-prefix}-terminal:before { content: $fa-var-terminal; } -.#{$fa-css-prefix}-code:before { content: $fa-var-code; } -.#{$fa-css-prefix}-mail-reply-all:before, -.#{$fa-css-prefix}-reply-all:before { content: $fa-var-reply-all; } -.#{$fa-css-prefix}-star-half-empty:before, -.#{$fa-css-prefix}-star-half-full:before, -.#{$fa-css-prefix}-star-half-o:before { content: $fa-var-star-half-o; } -.#{$fa-css-prefix}-location-arrow:before { content: $fa-var-location-arrow; } -.#{$fa-css-prefix}-crop:before { content: $fa-var-crop; } -.#{$fa-css-prefix}-code-fork:before { content: $fa-var-code-fork; } -.#{$fa-css-prefix}-unlink:before, -.#{$fa-css-prefix}-chain-broken:before { content: $fa-var-chain-broken; } -.#{$fa-css-prefix}-question:before { content: $fa-var-question; } -.#{$fa-css-prefix}-info:before { content: $fa-var-info; } -.#{$fa-css-prefix}-exclamation:before { content: $fa-var-exclamation; } -.#{$fa-css-prefix}-superscript:before { content: $fa-var-superscript; } -.#{$fa-css-prefix}-subscript:before { content: $fa-var-subscript; } -.#{$fa-css-prefix}-eraser:before { content: $fa-var-eraser; } -.#{$fa-css-prefix}-puzzle-piece:before { content: $fa-var-puzzle-piece; } -.#{$fa-css-prefix}-microphone:before { content: $fa-var-microphone; } -.#{$fa-css-prefix}-microphone-slash:before { content: $fa-var-microphone-slash; } -.#{$fa-css-prefix}-shield:before { content: $fa-var-shield; } -.#{$fa-css-prefix}-calendar-o:before { content: $fa-var-calendar-o; } -.#{$fa-css-prefix}-fire-extinguisher:before { content: $fa-var-fire-extinguisher; } -.#{$fa-css-prefix}-rocket:before { content: $fa-var-rocket; } -.#{$fa-css-prefix}-maxcdn:before { content: $fa-var-maxcdn; } -.#{$fa-css-prefix}-chevron-circle-left:before { content: $fa-var-chevron-circle-left; } -.#{$fa-css-prefix}-chevron-circle-right:before { content: $fa-var-chevron-circle-right; } -.#{$fa-css-prefix}-chevron-circle-up:before { content: $fa-var-chevron-circle-up; } -.#{$fa-css-prefix}-chevron-circle-down:before { content: $fa-var-chevron-circle-down; } -.#{$fa-css-prefix}-html5:before { content: $fa-var-html5; } -.#{$fa-css-prefix}-css3:before { content: $fa-var-css3; } -.#{$fa-css-prefix}-anchor:before { content: $fa-var-anchor; } -.#{$fa-css-prefix}-unlock-alt:before { content: $fa-var-unlock-alt; } -.#{$fa-css-prefix}-bullseye:before { content: $fa-var-bullseye; } -.#{$fa-css-prefix}-ellipsis-h:before { content: $fa-var-ellipsis-h; } -.#{$fa-css-prefix}-ellipsis-v:before { content: $fa-var-ellipsis-v; } -.#{$fa-css-prefix}-rss-square:before { content: $fa-var-rss-square; } -.#{$fa-css-prefix}-play-circle:before { content: $fa-var-play-circle; } -.#{$fa-css-prefix}-ticket:before { content: $fa-var-ticket; } -.#{$fa-css-prefix}-minus-square:before { content: $fa-var-minus-square; } -.#{$fa-css-prefix}-minus-square-o:before { content: $fa-var-minus-square-o; } -.#{$fa-css-prefix}-level-up:before { content: $fa-var-level-up; } -.#{$fa-css-prefix}-level-down:before { content: $fa-var-level-down; } -.#{$fa-css-prefix}-check-square:before { content: $fa-var-check-square; } -.#{$fa-css-prefix}-pencil-square:before { content: $fa-var-pencil-square; } -.#{$fa-css-prefix}-external-link-square:before { content: $fa-var-external-link-square; } -.#{$fa-css-prefix}-share-square:before { content: $fa-var-share-square; } -.#{$fa-css-prefix}-compass:before { content: $fa-var-compass; } -.#{$fa-css-prefix}-toggle-down:before, -.#{$fa-css-prefix}-caret-square-o-down:before { content: $fa-var-caret-square-o-down; } -.#{$fa-css-prefix}-toggle-up:before, -.#{$fa-css-prefix}-caret-square-o-up:before { content: $fa-var-caret-square-o-up; } -.#{$fa-css-prefix}-toggle-right:before, -.#{$fa-css-prefix}-caret-square-o-right:before { content: $fa-var-caret-square-o-right; } -.#{$fa-css-prefix}-euro:before, -.#{$fa-css-prefix}-eur:before { content: $fa-var-eur; } -.#{$fa-css-prefix}-gbp:before { content: $fa-var-gbp; } -.#{$fa-css-prefix}-dollar:before, -.#{$fa-css-prefix}-usd:before { content: $fa-var-usd; } -.#{$fa-css-prefix}-rupee:before, -.#{$fa-css-prefix}-inr:before { content: $fa-var-inr; } -.#{$fa-css-prefix}-cny:before, -.#{$fa-css-prefix}-rmb:before, -.#{$fa-css-prefix}-yen:before, -.#{$fa-css-prefix}-jpy:before { content: $fa-var-jpy; } -.#{$fa-css-prefix}-ruble:before, -.#{$fa-css-prefix}-rouble:before, -.#{$fa-css-prefix}-rub:before { content: $fa-var-rub; } -.#{$fa-css-prefix}-won:before, -.#{$fa-css-prefix}-krw:before { content: $fa-var-krw; } -.#{$fa-css-prefix}-bitcoin:before, -.#{$fa-css-prefix}-btc:before { content: $fa-var-btc; } -.#{$fa-css-prefix}-file:before { content: $fa-var-file; } -.#{$fa-css-prefix}-file-text:before { content: $fa-var-file-text; } -.#{$fa-css-prefix}-sort-alpha-asc:before { content: $fa-var-sort-alpha-asc; } -.#{$fa-css-prefix}-sort-alpha-desc:before { content: $fa-var-sort-alpha-desc; } -.#{$fa-css-prefix}-sort-amount-asc:before { content: $fa-var-sort-amount-asc; } -.#{$fa-css-prefix}-sort-amount-desc:before { content: $fa-var-sort-amount-desc; } -.#{$fa-css-prefix}-sort-numeric-asc:before { content: $fa-var-sort-numeric-asc; } -.#{$fa-css-prefix}-sort-numeric-desc:before { content: $fa-var-sort-numeric-desc; } -.#{$fa-css-prefix}-thumbs-up:before { content: $fa-var-thumbs-up; } -.#{$fa-css-prefix}-thumbs-down:before { content: $fa-var-thumbs-down; } -.#{$fa-css-prefix}-youtube-square:before { content: $fa-var-youtube-square; } -.#{$fa-css-prefix}-youtube:before { content: $fa-var-youtube; } -.#{$fa-css-prefix}-xing:before { content: $fa-var-xing; } -.#{$fa-css-prefix}-xing-square:before { content: $fa-var-xing-square; } -.#{$fa-css-prefix}-youtube-play:before { content: $fa-var-youtube-play; } -.#{$fa-css-prefix}-dropbox:before { content: $fa-var-dropbox; } -.#{$fa-css-prefix}-stack-overflow:before { content: $fa-var-stack-overflow; } -.#{$fa-css-prefix}-instagram:before { content: $fa-var-instagram; } -.#{$fa-css-prefix}-flickr:before { content: $fa-var-flickr; } -.#{$fa-css-prefix}-adn:before { content: $fa-var-adn; } -.#{$fa-css-prefix}-bitbucket:before { content: $fa-var-bitbucket; } -.#{$fa-css-prefix}-bitbucket-square:before { content: $fa-var-bitbucket-square; } -.#{$fa-css-prefix}-tumblr:before { content: $fa-var-tumblr; } -.#{$fa-css-prefix}-tumblr-square:before { content: $fa-var-tumblr-square; } -.#{$fa-css-prefix}-long-arrow-down:before { content: $fa-var-long-arrow-down; } -.#{$fa-css-prefix}-long-arrow-up:before { content: $fa-var-long-arrow-up; } -.#{$fa-css-prefix}-long-arrow-left:before { content: $fa-var-long-arrow-left; } -.#{$fa-css-prefix}-long-arrow-right:before { content: $fa-var-long-arrow-right; } -.#{$fa-css-prefix}-apple:before { content: $fa-var-apple; } -.#{$fa-css-prefix}-windows:before { content: $fa-var-windows; } -.#{$fa-css-prefix}-android:before { content: $fa-var-android; } -.#{$fa-css-prefix}-linux:before { content: $fa-var-linux; } -.#{$fa-css-prefix}-dribbble:before { content: $fa-var-dribbble; } -.#{$fa-css-prefix}-skype:before { content: $fa-var-skype; } -.#{$fa-css-prefix}-foursquare:before { content: $fa-var-foursquare; } -.#{$fa-css-prefix}-trello:before { content: $fa-var-trello; } -.#{$fa-css-prefix}-female:before { content: $fa-var-female; } -.#{$fa-css-prefix}-male:before { content: $fa-var-male; } -.#{$fa-css-prefix}-gittip:before, -.#{$fa-css-prefix}-gratipay:before { content: $fa-var-gratipay; } -.#{$fa-css-prefix}-sun-o:before { content: $fa-var-sun-o; } -.#{$fa-css-prefix}-moon-o:before { content: $fa-var-moon-o; } -.#{$fa-css-prefix}-archive:before { content: $fa-var-archive; } -.#{$fa-css-prefix}-bug:before { content: $fa-var-bug; } -.#{$fa-css-prefix}-vk:before { content: $fa-var-vk; } -.#{$fa-css-prefix}-weibo:before { content: $fa-var-weibo; } -.#{$fa-css-prefix}-renren:before { content: $fa-var-renren; } -.#{$fa-css-prefix}-pagelines:before { content: $fa-var-pagelines; } -.#{$fa-css-prefix}-stack-exchange:before { content: $fa-var-stack-exchange; } -.#{$fa-css-prefix}-arrow-circle-o-right:before { content: $fa-var-arrow-circle-o-right; } -.#{$fa-css-prefix}-arrow-circle-o-left:before { content: $fa-var-arrow-circle-o-left; } -.#{$fa-css-prefix}-toggle-left:before, -.#{$fa-css-prefix}-caret-square-o-left:before { content: $fa-var-caret-square-o-left; } -.#{$fa-css-prefix}-dot-circle-o:before { content: $fa-var-dot-circle-o; } -.#{$fa-css-prefix}-wheelchair:before { content: $fa-var-wheelchair; } -.#{$fa-css-prefix}-vimeo-square:before { content: $fa-var-vimeo-square; } -.#{$fa-css-prefix}-turkish-lira:before, -.#{$fa-css-prefix}-try:before { content: $fa-var-try; } -.#{$fa-css-prefix}-plus-square-o:before { content: $fa-var-plus-square-o; } -.#{$fa-css-prefix}-space-shuttle:before { content: $fa-var-space-shuttle; } -.#{$fa-css-prefix}-slack:before { content: $fa-var-slack; } -.#{$fa-css-prefix}-envelope-square:before { content: $fa-var-envelope-square; } -.#{$fa-css-prefix}-wordpress:before { content: $fa-var-wordpress; } -.#{$fa-css-prefix}-openid:before { content: $fa-var-openid; } -.#{$fa-css-prefix}-institution:before, -.#{$fa-css-prefix}-bank:before, -.#{$fa-css-prefix}-university:before { content: $fa-var-university; } -.#{$fa-css-prefix}-mortar-board:before, -.#{$fa-css-prefix}-graduation-cap:before { content: $fa-var-graduation-cap; } -.#{$fa-css-prefix}-yahoo:before { content: $fa-var-yahoo; } -.#{$fa-css-prefix}-google:before { content: $fa-var-google; } -.#{$fa-css-prefix}-reddit:before { content: $fa-var-reddit; } -.#{$fa-css-prefix}-reddit-square:before { content: $fa-var-reddit-square; } -.#{$fa-css-prefix}-stumbleupon-circle:before { content: $fa-var-stumbleupon-circle; } -.#{$fa-css-prefix}-stumbleupon:before { content: $fa-var-stumbleupon; } -.#{$fa-css-prefix}-delicious:before { content: $fa-var-delicious; } -.#{$fa-css-prefix}-digg:before { content: $fa-var-digg; } -.#{$fa-css-prefix}-pied-piper-pp:before { content: $fa-var-pied-piper-pp; } -.#{$fa-css-prefix}-pied-piper-alt:before { content: $fa-var-pied-piper-alt; } -.#{$fa-css-prefix}-drupal:before { content: $fa-var-drupal; } -.#{$fa-css-prefix}-joomla:before { content: $fa-var-joomla; } -.#{$fa-css-prefix}-language:before { content: $fa-var-language; } -.#{$fa-css-prefix}-fax:before { content: $fa-var-fax; } -.#{$fa-css-prefix}-building:before { content: $fa-var-building; } -.#{$fa-css-prefix}-child:before { content: $fa-var-child; } -.#{$fa-css-prefix}-paw:before { content: $fa-var-paw; } -.#{$fa-css-prefix}-spoon:before { content: $fa-var-spoon; } -.#{$fa-css-prefix}-cube:before { content: $fa-var-cube; } -.#{$fa-css-prefix}-cubes:before { content: $fa-var-cubes; } -.#{$fa-css-prefix}-behance:before { content: $fa-var-behance; } -.#{$fa-css-prefix}-behance-square:before { content: $fa-var-behance-square; } -.#{$fa-css-prefix}-steam:before { content: $fa-var-steam; } -.#{$fa-css-prefix}-steam-square:before { content: $fa-var-steam-square; } -.#{$fa-css-prefix}-recycle:before { content: $fa-var-recycle; } -.#{$fa-css-prefix}-automobile:before, -.#{$fa-css-prefix}-car:before { content: $fa-var-car; } -.#{$fa-css-prefix}-cab:before, -.#{$fa-css-prefix}-taxi:before { content: $fa-var-taxi; } -.#{$fa-css-prefix}-tree:before { content: $fa-var-tree; } -.#{$fa-css-prefix}-spotify:before { content: $fa-var-spotify; } -.#{$fa-css-prefix}-deviantart:before { content: $fa-var-deviantart; } -.#{$fa-css-prefix}-soundcloud:before { content: $fa-var-soundcloud; } -.#{$fa-css-prefix}-database:before { content: $fa-var-database; } -.#{$fa-css-prefix}-file-pdf-o:before { content: $fa-var-file-pdf-o; } -.#{$fa-css-prefix}-file-word-o:before { content: $fa-var-file-word-o; } -.#{$fa-css-prefix}-file-excel-o:before { content: $fa-var-file-excel-o; } -.#{$fa-css-prefix}-file-powerpoint-o:before { content: $fa-var-file-powerpoint-o; } -.#{$fa-css-prefix}-file-photo-o:before, -.#{$fa-css-prefix}-file-picture-o:before, -.#{$fa-css-prefix}-file-image-o:before { content: $fa-var-file-image-o; } -.#{$fa-css-prefix}-file-zip-o:before, -.#{$fa-css-prefix}-file-archive-o:before { content: $fa-var-file-archive-o; } -.#{$fa-css-prefix}-file-sound-o:before, -.#{$fa-css-prefix}-file-audio-o:before { content: $fa-var-file-audio-o; } -.#{$fa-css-prefix}-file-movie-o:before, -.#{$fa-css-prefix}-file-video-o:before { content: $fa-var-file-video-o; } -.#{$fa-css-prefix}-file-code-o:before { content: $fa-var-file-code-o; } -.#{$fa-css-prefix}-vine:before { content: $fa-var-vine; } -.#{$fa-css-prefix}-codepen:before { content: $fa-var-codepen; } -.#{$fa-css-prefix}-jsfiddle:before { content: $fa-var-jsfiddle; } -.#{$fa-css-prefix}-life-bouy:before, -.#{$fa-css-prefix}-life-buoy:before, -.#{$fa-css-prefix}-life-saver:before, -.#{$fa-css-prefix}-support:before, -.#{$fa-css-prefix}-life-ring:before { content: $fa-var-life-ring; } -.#{$fa-css-prefix}-circle-o-notch:before { content: $fa-var-circle-o-notch; } -.#{$fa-css-prefix}-ra:before, -.#{$fa-css-prefix}-resistance:before, -.#{$fa-css-prefix}-rebel:before { content: $fa-var-rebel; } -.#{$fa-css-prefix}-ge:before, -.#{$fa-css-prefix}-empire:before { content: $fa-var-empire; } -.#{$fa-css-prefix}-git-square:before { content: $fa-var-git-square; } -.#{$fa-css-prefix}-git:before { content: $fa-var-git; } -.#{$fa-css-prefix}-y-combinator-square:before, -.#{$fa-css-prefix}-yc-square:before, -.#{$fa-css-prefix}-hacker-news:before { content: $fa-var-hacker-news; } -.#{$fa-css-prefix}-tencent-weibo:before { content: $fa-var-tencent-weibo; } -.#{$fa-css-prefix}-qq:before { content: $fa-var-qq; } -.#{$fa-css-prefix}-wechat:before, -.#{$fa-css-prefix}-weixin:before { content: $fa-var-weixin; } -.#{$fa-css-prefix}-send:before, -.#{$fa-css-prefix}-paper-plane:before { content: $fa-var-paper-plane; } -.#{$fa-css-prefix}-send-o:before, -.#{$fa-css-prefix}-paper-plane-o:before { content: $fa-var-paper-plane-o; } -.#{$fa-css-prefix}-history:before { content: $fa-var-history; } -.#{$fa-css-prefix}-circle-thin:before { content: $fa-var-circle-thin; } -.#{$fa-css-prefix}-header:before { content: $fa-var-header; } -.#{$fa-css-prefix}-paragraph:before { content: $fa-var-paragraph; } -.#{$fa-css-prefix}-sliders:before { content: $fa-var-sliders; } -.#{$fa-css-prefix}-share-alt:before { content: $fa-var-share-alt; } -.#{$fa-css-prefix}-share-alt-square:before { content: $fa-var-share-alt-square; } -.#{$fa-css-prefix}-bomb:before { content: $fa-var-bomb; } -.#{$fa-css-prefix}-soccer-ball-o:before, -.#{$fa-css-prefix}-futbol-o:before { content: $fa-var-futbol-o; } -.#{$fa-css-prefix}-tty:before { content: $fa-var-tty; } -.#{$fa-css-prefix}-binoculars:before { content: $fa-var-binoculars; } -.#{$fa-css-prefix}-plug:before { content: $fa-var-plug; } -.#{$fa-css-prefix}-slideshare:before { content: $fa-var-slideshare; } -.#{$fa-css-prefix}-twitch:before { content: $fa-var-twitch; } -.#{$fa-css-prefix}-yelp:before { content: $fa-var-yelp; } -.#{$fa-css-prefix}-newspaper-o:before { content: $fa-var-newspaper-o; } -.#{$fa-css-prefix}-wifi:before { content: $fa-var-wifi; } -.#{$fa-css-prefix}-calculator:before { content: $fa-var-calculator; } -.#{$fa-css-prefix}-paypal:before { content: $fa-var-paypal; } -.#{$fa-css-prefix}-google-wallet:before { content: $fa-var-google-wallet; } -.#{$fa-css-prefix}-cc-visa:before { content: $fa-var-cc-visa; } -.#{$fa-css-prefix}-cc-mastercard:before { content: $fa-var-cc-mastercard; } -.#{$fa-css-prefix}-cc-discover:before { content: $fa-var-cc-discover; } -.#{$fa-css-prefix}-cc-amex:before { content: $fa-var-cc-amex; } -.#{$fa-css-prefix}-cc-paypal:before { content: $fa-var-cc-paypal; } -.#{$fa-css-prefix}-cc-stripe:before { content: $fa-var-cc-stripe; } -.#{$fa-css-prefix}-bell-slash:before { content: $fa-var-bell-slash; } -.#{$fa-css-prefix}-bell-slash-o:before { content: $fa-var-bell-slash-o; } -.#{$fa-css-prefix}-trash:before { content: $fa-var-trash; } -.#{$fa-css-prefix}-copyright:before { content: $fa-var-copyright; } -.#{$fa-css-prefix}-at:before { content: $fa-var-at; } -.#{$fa-css-prefix}-eyedropper:before { content: $fa-var-eyedropper; } -.#{$fa-css-prefix}-paint-brush:before { content: $fa-var-paint-brush; } -.#{$fa-css-prefix}-birthday-cake:before { content: $fa-var-birthday-cake; } -.#{$fa-css-prefix}-area-chart:before { content: $fa-var-area-chart; } -.#{$fa-css-prefix}-pie-chart:before { content: $fa-var-pie-chart; } -.#{$fa-css-prefix}-line-chart:before { content: $fa-var-line-chart; } -.#{$fa-css-prefix}-lastfm:before { content: $fa-var-lastfm; } -.#{$fa-css-prefix}-lastfm-square:before { content: $fa-var-lastfm-square; } -.#{$fa-css-prefix}-toggle-off:before { content: $fa-var-toggle-off; } -.#{$fa-css-prefix}-toggle-on:before { content: $fa-var-toggle-on; } -.#{$fa-css-prefix}-bicycle:before { content: $fa-var-bicycle; } -.#{$fa-css-prefix}-bus:before { content: $fa-var-bus; } -.#{$fa-css-prefix}-ioxhost:before { content: $fa-var-ioxhost; } -.#{$fa-css-prefix}-angellist:before { content: $fa-var-angellist; } -.#{$fa-css-prefix}-cc:before { content: $fa-var-cc; } -.#{$fa-css-prefix}-shekel:before, -.#{$fa-css-prefix}-sheqel:before, -.#{$fa-css-prefix}-ils:before { content: $fa-var-ils; } -.#{$fa-css-prefix}-meanpath:before { content: $fa-var-meanpath; } -.#{$fa-css-prefix}-buysellads:before { content: $fa-var-buysellads; } -.#{$fa-css-prefix}-connectdevelop:before { content: $fa-var-connectdevelop; } -.#{$fa-css-prefix}-dashcube:before { content: $fa-var-dashcube; } -.#{$fa-css-prefix}-forumbee:before { content: $fa-var-forumbee; } -.#{$fa-css-prefix}-leanpub:before { content: $fa-var-leanpub; } -.#{$fa-css-prefix}-sellsy:before { content: $fa-var-sellsy; } -.#{$fa-css-prefix}-shirtsinbulk:before { content: $fa-var-shirtsinbulk; } -.#{$fa-css-prefix}-simplybuilt:before { content: $fa-var-simplybuilt; } -.#{$fa-css-prefix}-skyatlas:before { content: $fa-var-skyatlas; } -.#{$fa-css-prefix}-cart-plus:before { content: $fa-var-cart-plus; } -.#{$fa-css-prefix}-cart-arrow-down:before { content: $fa-var-cart-arrow-down; } -.#{$fa-css-prefix}-diamond:before { content: $fa-var-diamond; } -.#{$fa-css-prefix}-ship:before { content: $fa-var-ship; } -.#{$fa-css-prefix}-user-secret:before { content: $fa-var-user-secret; } -.#{$fa-css-prefix}-motorcycle:before { content: $fa-var-motorcycle; } -.#{$fa-css-prefix}-street-view:before { content: $fa-var-street-view; } -.#{$fa-css-prefix}-heartbeat:before { content: $fa-var-heartbeat; } -.#{$fa-css-prefix}-venus:before { content: $fa-var-venus; } -.#{$fa-css-prefix}-mars:before { content: $fa-var-mars; } -.#{$fa-css-prefix}-mercury:before { content: $fa-var-mercury; } -.#{$fa-css-prefix}-intersex:before, -.#{$fa-css-prefix}-transgender:before { content: $fa-var-transgender; } -.#{$fa-css-prefix}-transgender-alt:before { content: $fa-var-transgender-alt; } -.#{$fa-css-prefix}-venus-double:before { content: $fa-var-venus-double; } -.#{$fa-css-prefix}-mars-double:before { content: $fa-var-mars-double; } -.#{$fa-css-prefix}-venus-mars:before { content: $fa-var-venus-mars; } -.#{$fa-css-prefix}-mars-stroke:before { content: $fa-var-mars-stroke; } -.#{$fa-css-prefix}-mars-stroke-v:before { content: $fa-var-mars-stroke-v; } -.#{$fa-css-prefix}-mars-stroke-h:before { content: $fa-var-mars-stroke-h; } -.#{$fa-css-prefix}-neuter:before { content: $fa-var-neuter; } -.#{$fa-css-prefix}-genderless:before { content: $fa-var-genderless; } -.#{$fa-css-prefix}-facebook-official:before { content: $fa-var-facebook-official; } -.#{$fa-css-prefix}-pinterest-p:before { content: $fa-var-pinterest-p; } -.#{$fa-css-prefix}-whatsapp:before { content: $fa-var-whatsapp; } -.#{$fa-css-prefix}-server:before { content: $fa-var-server; } -.#{$fa-css-prefix}-user-plus:before { content: $fa-var-user-plus; } -.#{$fa-css-prefix}-user-times:before { content: $fa-var-user-times; } -.#{$fa-css-prefix}-hotel:before, -.#{$fa-css-prefix}-bed:before { content: $fa-var-bed; } -.#{$fa-css-prefix}-viacoin:before { content: $fa-var-viacoin; } -.#{$fa-css-prefix}-train:before { content: $fa-var-train; } -.#{$fa-css-prefix}-subway:before { content: $fa-var-subway; } -.#{$fa-css-prefix}-medium:before { content: $fa-var-medium; } -.#{$fa-css-prefix}-yc:before, -.#{$fa-css-prefix}-y-combinator:before { content: $fa-var-y-combinator; } -.#{$fa-css-prefix}-optin-monster:before { content: $fa-var-optin-monster; } -.#{$fa-css-prefix}-opencart:before { content: $fa-var-opencart; } -.#{$fa-css-prefix}-expeditedssl:before { content: $fa-var-expeditedssl; } -.#{$fa-css-prefix}-battery-4:before, -.#{$fa-css-prefix}-battery:before, -.#{$fa-css-prefix}-battery-full:before { content: $fa-var-battery-full; } -.#{$fa-css-prefix}-battery-3:before, -.#{$fa-css-prefix}-battery-three-quarters:before { content: $fa-var-battery-three-quarters; } -.#{$fa-css-prefix}-battery-2:before, -.#{$fa-css-prefix}-battery-half:before { content: $fa-var-battery-half; } -.#{$fa-css-prefix}-battery-1:before, -.#{$fa-css-prefix}-battery-quarter:before { content: $fa-var-battery-quarter; } -.#{$fa-css-prefix}-battery-0:before, -.#{$fa-css-prefix}-battery-empty:before { content: $fa-var-battery-empty; } -.#{$fa-css-prefix}-mouse-pointer:before { content: $fa-var-mouse-pointer; } -.#{$fa-css-prefix}-i-cursor:before { content: $fa-var-i-cursor; } -.#{$fa-css-prefix}-object-group:before { content: $fa-var-object-group; } -.#{$fa-css-prefix}-object-ungroup:before { content: $fa-var-object-ungroup; } -.#{$fa-css-prefix}-sticky-note:before { content: $fa-var-sticky-note; } -.#{$fa-css-prefix}-sticky-note-o:before { content: $fa-var-sticky-note-o; } -.#{$fa-css-prefix}-cc-jcb:before { content: $fa-var-cc-jcb; } -.#{$fa-css-prefix}-cc-diners-club:before { content: $fa-var-cc-diners-club; } -.#{$fa-css-prefix}-clone:before { content: $fa-var-clone; } -.#{$fa-css-prefix}-balance-scale:before { content: $fa-var-balance-scale; } -.#{$fa-css-prefix}-hourglass-o:before { content: $fa-var-hourglass-o; } -.#{$fa-css-prefix}-hourglass-1:before, -.#{$fa-css-prefix}-hourglass-start:before { content: $fa-var-hourglass-start; } -.#{$fa-css-prefix}-hourglass-2:before, -.#{$fa-css-prefix}-hourglass-half:before { content: $fa-var-hourglass-half; } -.#{$fa-css-prefix}-hourglass-3:before, -.#{$fa-css-prefix}-hourglass-end:before { content: $fa-var-hourglass-end; } -.#{$fa-css-prefix}-hourglass:before { content: $fa-var-hourglass; } -.#{$fa-css-prefix}-hand-grab-o:before, -.#{$fa-css-prefix}-hand-rock-o:before { content: $fa-var-hand-rock-o; } -.#{$fa-css-prefix}-hand-stop-o:before, -.#{$fa-css-prefix}-hand-paper-o:before { content: $fa-var-hand-paper-o; } -.#{$fa-css-prefix}-hand-scissors-o:before { content: $fa-var-hand-scissors-o; } -.#{$fa-css-prefix}-hand-lizard-o:before { content: $fa-var-hand-lizard-o; } -.#{$fa-css-prefix}-hand-spock-o:before { content: $fa-var-hand-spock-o; } -.#{$fa-css-prefix}-hand-pointer-o:before { content: $fa-var-hand-pointer-o; } -.#{$fa-css-prefix}-hand-peace-o:before { content: $fa-var-hand-peace-o; } -.#{$fa-css-prefix}-trademark:before { content: $fa-var-trademark; } -.#{$fa-css-prefix}-registered:before { content: $fa-var-registered; } -.#{$fa-css-prefix}-creative-commons:before { content: $fa-var-creative-commons; } -.#{$fa-css-prefix}-gg:before { content: $fa-var-gg; } -.#{$fa-css-prefix}-gg-circle:before { content: $fa-var-gg-circle; } -.#{$fa-css-prefix}-tripadvisor:before { content: $fa-var-tripadvisor; } -.#{$fa-css-prefix}-odnoklassniki:before { content: $fa-var-odnoklassniki; } -.#{$fa-css-prefix}-odnoklassniki-square:before { content: $fa-var-odnoklassniki-square; } -.#{$fa-css-prefix}-get-pocket:before { content: $fa-var-get-pocket; } -.#{$fa-css-prefix}-wikipedia-w:before { content: $fa-var-wikipedia-w; } -.#{$fa-css-prefix}-safari:before { content: $fa-var-safari; } -.#{$fa-css-prefix}-chrome:before { content: $fa-var-chrome; } -.#{$fa-css-prefix}-firefox:before { content: $fa-var-firefox; } -.#{$fa-css-prefix}-opera:before { content: $fa-var-opera; } -.#{$fa-css-prefix}-internet-explorer:before { content: $fa-var-internet-explorer; } -.#{$fa-css-prefix}-tv:before, -.#{$fa-css-prefix}-television:before { content: $fa-var-television; } -.#{$fa-css-prefix}-contao:before { content: $fa-var-contao; } -.#{$fa-css-prefix}-500px:before { content: $fa-var-500px; } -.#{$fa-css-prefix}-amazon:before { content: $fa-var-amazon; } -.#{$fa-css-prefix}-calendar-plus-o:before { content: $fa-var-calendar-plus-o; } -.#{$fa-css-prefix}-calendar-minus-o:before { content: $fa-var-calendar-minus-o; } -.#{$fa-css-prefix}-calendar-times-o:before { content: $fa-var-calendar-times-o; } -.#{$fa-css-prefix}-calendar-check-o:before { content: $fa-var-calendar-check-o; } -.#{$fa-css-prefix}-industry:before { content: $fa-var-industry; } -.#{$fa-css-prefix}-map-pin:before { content: $fa-var-map-pin; } -.#{$fa-css-prefix}-map-signs:before { content: $fa-var-map-signs; } -.#{$fa-css-prefix}-map-o:before { content: $fa-var-map-o; } -.#{$fa-css-prefix}-map:before { content: $fa-var-map; } -.#{$fa-css-prefix}-commenting:before { content: $fa-var-commenting; } -.#{$fa-css-prefix}-commenting-o:before { content: $fa-var-commenting-o; } -.#{$fa-css-prefix}-houzz:before { content: $fa-var-houzz; } -.#{$fa-css-prefix}-vimeo:before { content: $fa-var-vimeo; } -.#{$fa-css-prefix}-black-tie:before { content: $fa-var-black-tie; } -.#{$fa-css-prefix}-fonticons:before { content: $fa-var-fonticons; } -.#{$fa-css-prefix}-reddit-alien:before { content: $fa-var-reddit-alien; } -.#{$fa-css-prefix}-edge:before { content: $fa-var-edge; } -.#{$fa-css-prefix}-credit-card-alt:before { content: $fa-var-credit-card-alt; } -.#{$fa-css-prefix}-codiepie:before { content: $fa-var-codiepie; } -.#{$fa-css-prefix}-modx:before { content: $fa-var-modx; } -.#{$fa-css-prefix}-fort-awesome:before { content: $fa-var-fort-awesome; } -.#{$fa-css-prefix}-usb:before { content: $fa-var-usb; } -.#{$fa-css-prefix}-product-hunt:before { content: $fa-var-product-hunt; } -.#{$fa-css-prefix}-mixcloud:before { content: $fa-var-mixcloud; } -.#{$fa-css-prefix}-scribd:before { content: $fa-var-scribd; } -.#{$fa-css-prefix}-pause-circle:before { content: $fa-var-pause-circle; } -.#{$fa-css-prefix}-pause-circle-o:before { content: $fa-var-pause-circle-o; } -.#{$fa-css-prefix}-stop-circle:before { content: $fa-var-stop-circle; } -.#{$fa-css-prefix}-stop-circle-o:before { content: $fa-var-stop-circle-o; } -.#{$fa-css-prefix}-shopping-bag:before { content: $fa-var-shopping-bag; } -.#{$fa-css-prefix}-shopping-basket:before { content: $fa-var-shopping-basket; } -.#{$fa-css-prefix}-hashtag:before { content: $fa-var-hashtag; } -.#{$fa-css-prefix}-bluetooth:before { content: $fa-var-bluetooth; } -.#{$fa-css-prefix}-bluetooth-b:before { content: $fa-var-bluetooth-b; } -.#{$fa-css-prefix}-percent:before { content: $fa-var-percent; } -.#{$fa-css-prefix}-gitlab:before { content: $fa-var-gitlab; } -.#{$fa-css-prefix}-wpbeginner:before { content: $fa-var-wpbeginner; } -.#{$fa-css-prefix}-wpforms:before { content: $fa-var-wpforms; } -.#{$fa-css-prefix}-envira:before { content: $fa-var-envira; } -.#{$fa-css-prefix}-universal-access:before { content: $fa-var-universal-access; } -.#{$fa-css-prefix}-wheelchair-alt:before { content: $fa-var-wheelchair-alt; } -.#{$fa-css-prefix}-question-circle-o:before { content: $fa-var-question-circle-o; } -.#{$fa-css-prefix}-blind:before { content: $fa-var-blind; } -.#{$fa-css-prefix}-audio-description:before { content: $fa-var-audio-description; } -.#{$fa-css-prefix}-volume-control-phone:before { content: $fa-var-volume-control-phone; } -.#{$fa-css-prefix}-braille:before { content: $fa-var-braille; } -.#{$fa-css-prefix}-assistive-listening-systems:before { content: $fa-var-assistive-listening-systems; } -.#{$fa-css-prefix}-asl-interpreting:before, -.#{$fa-css-prefix}-american-sign-language-interpreting:before { content: $fa-var-american-sign-language-interpreting; } -.#{$fa-css-prefix}-deafness:before, -.#{$fa-css-prefix}-hard-of-hearing:before, -.#{$fa-css-prefix}-deaf:before { content: $fa-var-deaf; } -.#{$fa-css-prefix}-glide:before { content: $fa-var-glide; } -.#{$fa-css-prefix}-glide-g:before { content: $fa-var-glide-g; } -.#{$fa-css-prefix}-signing:before, -.#{$fa-css-prefix}-sign-language:before { content: $fa-var-sign-language; } -.#{$fa-css-prefix}-low-vision:before { content: $fa-var-low-vision; } -.#{$fa-css-prefix}-viadeo:before { content: $fa-var-viadeo; } -.#{$fa-css-prefix}-viadeo-square:before { content: $fa-var-viadeo-square; } -.#{$fa-css-prefix}-snapchat:before { content: $fa-var-snapchat; } -.#{$fa-css-prefix}-snapchat-ghost:before { content: $fa-var-snapchat-ghost; } -.#{$fa-css-prefix}-snapchat-square:before { content: $fa-var-snapchat-square; } -.#{$fa-css-prefix}-pied-piper:before { content: $fa-var-pied-piper; } -.#{$fa-css-prefix}-first-order:before { content: $fa-var-first-order; } -.#{$fa-css-prefix}-yoast:before { content: $fa-var-yoast; } -.#{$fa-css-prefix}-themeisle:before { content: $fa-var-themeisle; } -.#{$fa-css-prefix}-google-plus-circle:before, -.#{$fa-css-prefix}-google-plus-official:before { content: $fa-var-google-plus-official; } -.#{$fa-css-prefix}-fa:before, -.#{$fa-css-prefix}-font-awesome:before { content: $fa-var-font-awesome; } -.#{$fa-css-prefix}-handshake-o:before { content: $fa-var-handshake-o; } -.#{$fa-css-prefix}-envelope-open:before { content: $fa-var-envelope-open; } -.#{$fa-css-prefix}-envelope-open-o:before { content: $fa-var-envelope-open-o; } -.#{$fa-css-prefix}-linode:before { content: $fa-var-linode; } -.#{$fa-css-prefix}-address-book:before { content: $fa-var-address-book; } -.#{$fa-css-prefix}-address-book-o:before { content: $fa-var-address-book-o; } -.#{$fa-css-prefix}-vcard:before, -.#{$fa-css-prefix}-address-card:before { content: $fa-var-address-card; } -.#{$fa-css-prefix}-vcard-o:before, -.#{$fa-css-prefix}-address-card-o:before { content: $fa-var-address-card-o; } -.#{$fa-css-prefix}-user-circle:before { content: $fa-var-user-circle; } -.#{$fa-css-prefix}-user-circle-o:before { content: $fa-var-user-circle-o; } -.#{$fa-css-prefix}-user-o:before { content: $fa-var-user-o; } -.#{$fa-css-prefix}-id-badge:before { content: $fa-var-id-badge; } -.#{$fa-css-prefix}-drivers-license:before, -.#{$fa-css-prefix}-id-card:before { content: $fa-var-id-card; } -.#{$fa-css-prefix}-drivers-license-o:before, -.#{$fa-css-prefix}-id-card-o:before { content: $fa-var-id-card-o; } -.#{$fa-css-prefix}-quora:before { content: $fa-var-quora; } -.#{$fa-css-prefix}-free-code-camp:before { content: $fa-var-free-code-camp; } -.#{$fa-css-prefix}-telegram:before { content: $fa-var-telegram; } -.#{$fa-css-prefix}-thermometer-4:before, -.#{$fa-css-prefix}-thermometer:before, -.#{$fa-css-prefix}-thermometer-full:before { content: $fa-var-thermometer-full; } -.#{$fa-css-prefix}-thermometer-3:before, -.#{$fa-css-prefix}-thermometer-three-quarters:before { content: $fa-var-thermometer-three-quarters; } -.#{$fa-css-prefix}-thermometer-2:before, -.#{$fa-css-prefix}-thermometer-half:before { content: $fa-var-thermometer-half; } -.#{$fa-css-prefix}-thermometer-1:before, -.#{$fa-css-prefix}-thermometer-quarter:before { content: $fa-var-thermometer-quarter; } -.#{$fa-css-prefix}-thermometer-0:before, -.#{$fa-css-prefix}-thermometer-empty:before { content: $fa-var-thermometer-empty; } -.#{$fa-css-prefix}-shower:before { content: $fa-var-shower; } -.#{$fa-css-prefix}-bathtub:before, -.#{$fa-css-prefix}-s15:before, -.#{$fa-css-prefix}-bath:before { content: $fa-var-bath; } -.#{$fa-css-prefix}-podcast:before { content: $fa-var-podcast; } -.#{$fa-css-prefix}-window-maximize:before { content: $fa-var-window-maximize; } -.#{$fa-css-prefix}-window-minimize:before { content: $fa-var-window-minimize; } -.#{$fa-css-prefix}-window-restore:before { content: $fa-var-window-restore; } -.#{$fa-css-prefix}-times-rectangle:before, -.#{$fa-css-prefix}-window-close:before { content: $fa-var-window-close; } -.#{$fa-css-prefix}-times-rectangle-o:before, -.#{$fa-css-prefix}-window-close-o:before { content: $fa-var-window-close-o; } -.#{$fa-css-prefix}-bandcamp:before { content: $fa-var-bandcamp; } -.#{$fa-css-prefix}-grav:before { content: $fa-var-grav; } -.#{$fa-css-prefix}-etsy:before { content: $fa-var-etsy; } -.#{$fa-css-prefix}-imdb:before { content: $fa-var-imdb; } -.#{$fa-css-prefix}-ravelry:before { content: $fa-var-ravelry; } -.#{$fa-css-prefix}-eercast:before { content: $fa-var-eercast; } -.#{$fa-css-prefix}-microchip:before { content: $fa-var-microchip; } -.#{$fa-css-prefix}-snowflake-o:before { content: $fa-var-snowflake-o; } -.#{$fa-css-prefix}-superpowers:before { content: $fa-var-superpowers; } -.#{$fa-css-prefix}-wpexplorer:before { content: $fa-var-wpexplorer; } -.#{$fa-css-prefix}-meetup:before { content: $fa-var-meetup; } diff --git a/site/_sass/vendor/font-awesome/_larger.scss b/site/_sass/vendor/font-awesome/_larger.scss deleted file mode 100644 index 41e9a8184aa..00000000000 --- a/site/_sass/vendor/font-awesome/_larger.scss +++ /dev/null @@ -1,13 +0,0 @@ -// Icon Sizes -// ------------------------- - -/* makes the font 33% larger relative to the icon container */ -.#{$fa-css-prefix}-lg { - font-size: (4em / 3); - line-height: (3em / 4); - vertical-align: -15%; -} -.#{$fa-css-prefix}-2x { font-size: 2em; } -.#{$fa-css-prefix}-3x { font-size: 3em; } -.#{$fa-css-prefix}-4x { font-size: 4em; } -.#{$fa-css-prefix}-5x { font-size: 5em; } diff --git a/site/_sass/vendor/font-awesome/_list.scss b/site/_sass/vendor/font-awesome/_list.scss deleted file mode 100644 index 7d1e4d54d6c..00000000000 --- a/site/_sass/vendor/font-awesome/_list.scss +++ /dev/null @@ -1,19 +0,0 @@ -// List Icons -// ------------------------- - -.#{$fa-css-prefix}-ul { - padding-left: 0; - margin-left: $fa-li-width; - list-style-type: none; - > li { position: relative; } -} -.#{$fa-css-prefix}-li { - position: absolute; - left: -$fa-li-width; - width: $fa-li-width; - top: (2em / 14); - text-align: center; - &.#{$fa-css-prefix}-lg { - left: -$fa-li-width + (4em / 14); - } -} diff --git a/site/_sass/vendor/font-awesome/_mixins.scss b/site/_sass/vendor/font-awesome/_mixins.scss deleted file mode 100644 index c3bbd5745d3..00000000000 --- a/site/_sass/vendor/font-awesome/_mixins.scss +++ /dev/null @@ -1,60 +0,0 @@ -// Mixins -// -------------------------- - -@mixin fa-icon() { - display: inline-block; - font: normal normal normal #{$fa-font-size-base}/#{$fa-line-height-base} FontAwesome; // shortening font declaration - font-size: inherit; // can't have font-size inherit on line above, so need to override - text-rendering: auto; // optimizelegibility throws things off #1094 - -webkit-font-smoothing: antialiased; - -moz-osx-font-smoothing: grayscale; - -} - -@mixin fa-icon-rotate($degrees, $rotation) { - -ms-filter: "progid:DXImageTransform.Microsoft.BasicImage(rotation=#{$rotation})"; - -webkit-transform: rotate($degrees); - -ms-transform: rotate($degrees); - transform: rotate($degrees); -} - -@mixin fa-icon-flip($horiz, $vert, $rotation) { - -ms-filter: "progid:DXImageTransform.Microsoft.BasicImage(rotation=#{$rotation}, mirror=1)"; - -webkit-transform: scale($horiz, $vert); - -ms-transform: scale($horiz, $vert); - transform: scale($horiz, $vert); -} - - -// Only display content to screen readers. A la Bootstrap 4. -// -// See: http://a11yproject.com/posts/how-to-hide-content/ - -@mixin sr-only { - position: absolute; - width: 1px; - height: 1px; - padding: 0; - margin: -1px; - overflow: hidden; - clip: rect(0,0,0,0); - border: 0; -} - -// Use in conjunction with .sr-only to only display content when it's focused. -// -// Useful for "Skip to main content" links; see http://www.w3.org/TR/2013/NOTE-WCAG20-TECHS-20130905/G1 -// -// Credit: HTML5 Boilerplate - -@mixin sr-only-focusable { - &:active, - &:focus { - position: static; - width: auto; - height: auto; - margin: 0; - overflow: visible; - clip: auto; - } -} diff --git a/site/_sass/vendor/font-awesome/_path.scss b/site/_sass/vendor/font-awesome/_path.scss deleted file mode 100644 index bb457c23a8e..00000000000 --- a/site/_sass/vendor/font-awesome/_path.scss +++ /dev/null @@ -1,15 +0,0 @@ -/* FONT PATH - * -------------------------- */ - -@font-face { - font-family: 'FontAwesome'; - src: url('#{$fa-font-path}/fontawesome-webfont.eot?v=#{$fa-version}'); - src: url('#{$fa-font-path}/fontawesome-webfont.eot?#iefix&v=#{$fa-version}') format('embedded-opentype'), - url('#{$fa-font-path}/fontawesome-webfont.woff2?v=#{$fa-version}') format('woff2'), - url('#{$fa-font-path}/fontawesome-webfont.woff?v=#{$fa-version}') format('woff'), - url('#{$fa-font-path}/fontawesome-webfont.ttf?v=#{$fa-version}') format('truetype'), - url('#{$fa-font-path}/fontawesome-webfont.svg?v=#{$fa-version}#fontawesomeregular') format('svg'); -// src: url('#{$fa-font-path}/FontAwesome.otf') format('opentype'); // used when developing fonts - font-weight: normal; - font-style: normal; -} diff --git a/site/_sass/vendor/font-awesome/_rotated-flipped.scss b/site/_sass/vendor/font-awesome/_rotated-flipped.scss deleted file mode 100644 index a3558fd09ca..00000000000 --- a/site/_sass/vendor/font-awesome/_rotated-flipped.scss +++ /dev/null @@ -1,20 +0,0 @@ -// Rotated & Flipped Icons -// ------------------------- - -.#{$fa-css-prefix}-rotate-90 { @include fa-icon-rotate(90deg, 1); } -.#{$fa-css-prefix}-rotate-180 { @include fa-icon-rotate(180deg, 2); } -.#{$fa-css-prefix}-rotate-270 { @include fa-icon-rotate(270deg, 3); } - -.#{$fa-css-prefix}-flip-horizontal { @include fa-icon-flip(-1, 1, 0); } -.#{$fa-css-prefix}-flip-vertical { @include fa-icon-flip(1, -1, 2); } - -// Hook for IE8-9 -// ------------------------- - -:root .#{$fa-css-prefix}-rotate-90, -:root .#{$fa-css-prefix}-rotate-180, -:root .#{$fa-css-prefix}-rotate-270, -:root .#{$fa-css-prefix}-flip-horizontal, -:root .#{$fa-css-prefix}-flip-vertical { - filter: none; -} diff --git a/site/_sass/vendor/font-awesome/_screen-reader.scss b/site/_sass/vendor/font-awesome/_screen-reader.scss deleted file mode 100644 index 637426f0da6..00000000000 --- a/site/_sass/vendor/font-awesome/_screen-reader.scss +++ /dev/null @@ -1,5 +0,0 @@ -// Screen Readers -// ------------------------- - -.sr-only { @include sr-only(); } -.sr-only-focusable { @include sr-only-focusable(); } diff --git a/site/_sass/vendor/font-awesome/_stacked.scss b/site/_sass/vendor/font-awesome/_stacked.scss deleted file mode 100644 index aef7403660c..00000000000 --- a/site/_sass/vendor/font-awesome/_stacked.scss +++ /dev/null @@ -1,20 +0,0 @@ -// Stacked Icons -// ------------------------- - -.#{$fa-css-prefix}-stack { - position: relative; - display: inline-block; - width: 2em; - height: 2em; - line-height: 2em; - vertical-align: middle; -} -.#{$fa-css-prefix}-stack-1x, .#{$fa-css-prefix}-stack-2x { - position: absolute; - left: 0; - width: 100%; - text-align: center; -} -.#{$fa-css-prefix}-stack-1x { line-height: inherit; } -.#{$fa-css-prefix}-stack-2x { font-size: 2em; } -.#{$fa-css-prefix}-inverse { color: $fa-inverse; } diff --git a/site/_sass/vendor/font-awesome/_variables.scss b/site/_sass/vendor/font-awesome/_variables.scss deleted file mode 100644 index 498fc4a087c..00000000000 --- a/site/_sass/vendor/font-awesome/_variables.scss +++ /dev/null @@ -1,800 +0,0 @@ -// Variables -// -------------------------- - -$fa-font-path: "../fonts" !default; -$fa-font-size-base: 14px !default; -$fa-line-height-base: 1 !default; -//$fa-font-path: "//netdna.bootstrapcdn.com/font-awesome/4.7.0/fonts" !default; // for referencing Bootstrap CDN font files directly -$fa-css-prefix: fa !default; -$fa-version: "4.7.0" !default; -$fa-border-color: #eee !default; -$fa-inverse: #fff !default; -$fa-li-width: (30em / 14) !default; - -$fa-var-500px: "\f26e"; -$fa-var-address-book: "\f2b9"; -$fa-var-address-book-o: "\f2ba"; -$fa-var-address-card: "\f2bb"; -$fa-var-address-card-o: "\f2bc"; -$fa-var-adjust: "\f042"; -$fa-var-adn: "\f170"; -$fa-var-align-center: "\f037"; -$fa-var-align-justify: "\f039"; -$fa-var-align-left: "\f036"; -$fa-var-align-right: "\f038"; -$fa-var-amazon: "\f270"; -$fa-var-ambulance: "\f0f9"; -$fa-var-american-sign-language-interpreting: "\f2a3"; -$fa-var-anchor: "\f13d"; -$fa-var-android: "\f17b"; -$fa-var-angellist: "\f209"; -$fa-var-angle-double-down: "\f103"; -$fa-var-angle-double-left: "\f100"; -$fa-var-angle-double-right: "\f101"; -$fa-var-angle-double-up: "\f102"; -$fa-var-angle-down: "\f107"; -$fa-var-angle-left: "\f104"; -$fa-var-angle-right: "\f105"; -$fa-var-angle-up: "\f106"; -$fa-var-apple: "\f179"; -$fa-var-archive: "\f187"; -$fa-var-area-chart: "\f1fe"; -$fa-var-arrow-circle-down: "\f0ab"; -$fa-var-arrow-circle-left: "\f0a8"; -$fa-var-arrow-circle-o-down: "\f01a"; -$fa-var-arrow-circle-o-left: "\f190"; -$fa-var-arrow-circle-o-right: "\f18e"; -$fa-var-arrow-circle-o-up: "\f01b"; -$fa-var-arrow-circle-right: "\f0a9"; -$fa-var-arrow-circle-up: "\f0aa"; -$fa-var-arrow-down: "\f063"; -$fa-var-arrow-left: "\f060"; -$fa-var-arrow-right: "\f061"; -$fa-var-arrow-up: "\f062"; -$fa-var-arrows: "\f047"; -$fa-var-arrows-alt: "\f0b2"; -$fa-var-arrows-h: "\f07e"; -$fa-var-arrows-v: "\f07d"; -$fa-var-asl-interpreting: "\f2a3"; -$fa-var-assistive-listening-systems: "\f2a2"; -$fa-var-asterisk: "\f069"; -$fa-var-at: "\f1fa"; -$fa-var-audio-description: "\f29e"; -$fa-var-automobile: "\f1b9"; -$fa-var-backward: "\f04a"; -$fa-var-balance-scale: "\f24e"; -$fa-var-ban: "\f05e"; -$fa-var-bandcamp: "\f2d5"; -$fa-var-bank: "\f19c"; -$fa-var-bar-chart: "\f080"; -$fa-var-bar-chart-o: "\f080"; -$fa-var-barcode: "\f02a"; -$fa-var-bars: "\f0c9"; -$fa-var-bath: "\f2cd"; -$fa-var-bathtub: "\f2cd"; -$fa-var-battery: "\f240"; -$fa-var-battery-0: "\f244"; -$fa-var-battery-1: "\f243"; -$fa-var-battery-2: "\f242"; -$fa-var-battery-3: "\f241"; -$fa-var-battery-4: "\f240"; -$fa-var-battery-empty: "\f244"; -$fa-var-battery-full: "\f240"; -$fa-var-battery-half: "\f242"; -$fa-var-battery-quarter: "\f243"; -$fa-var-battery-three-quarters: "\f241"; -$fa-var-bed: "\f236"; -$fa-var-beer: "\f0fc"; -$fa-var-behance: "\f1b4"; -$fa-var-behance-square: "\f1b5"; -$fa-var-bell: "\f0f3"; -$fa-var-bell-o: "\f0a2"; -$fa-var-bell-slash: "\f1f6"; -$fa-var-bell-slash-o: "\f1f7"; -$fa-var-bicycle: "\f206"; -$fa-var-binoculars: "\f1e5"; -$fa-var-birthday-cake: "\f1fd"; -$fa-var-bitbucket: "\f171"; -$fa-var-bitbucket-square: "\f172"; -$fa-var-bitcoin: "\f15a"; -$fa-var-black-tie: "\f27e"; -$fa-var-blind: "\f29d"; -$fa-var-bluetooth: "\f293"; -$fa-var-bluetooth-b: "\f294"; -$fa-var-bold: "\f032"; -$fa-var-bolt: "\f0e7"; -$fa-var-bomb: "\f1e2"; -$fa-var-book: "\f02d"; -$fa-var-bookmark: "\f02e"; -$fa-var-bookmark-o: "\f097"; -$fa-var-braille: "\f2a1"; -$fa-var-briefcase: "\f0b1"; -$fa-var-btc: "\f15a"; -$fa-var-bug: "\f188"; -$fa-var-building: "\f1ad"; -$fa-var-building-o: "\f0f7"; -$fa-var-bullhorn: "\f0a1"; -$fa-var-bullseye: "\f140"; -$fa-var-bus: "\f207"; -$fa-var-buysellads: "\f20d"; -$fa-var-cab: "\f1ba"; -$fa-var-calculator: "\f1ec"; -$fa-var-calendar: "\f073"; -$fa-var-calendar-check-o: "\f274"; -$fa-var-calendar-minus-o: "\f272"; -$fa-var-calendar-o: "\f133"; -$fa-var-calendar-plus-o: "\f271"; -$fa-var-calendar-times-o: "\f273"; -$fa-var-camera: "\f030"; -$fa-var-camera-retro: "\f083"; -$fa-var-car: "\f1b9"; -$fa-var-caret-down: "\f0d7"; -$fa-var-caret-left: "\f0d9"; -$fa-var-caret-right: "\f0da"; -$fa-var-caret-square-o-down: "\f150"; -$fa-var-caret-square-o-left: "\f191"; -$fa-var-caret-square-o-right: "\f152"; -$fa-var-caret-square-o-up: "\f151"; -$fa-var-caret-up: "\f0d8"; -$fa-var-cart-arrow-down: "\f218"; -$fa-var-cart-plus: "\f217"; -$fa-var-cc: "\f20a"; -$fa-var-cc-amex: "\f1f3"; -$fa-var-cc-diners-club: "\f24c"; -$fa-var-cc-discover: "\f1f2"; -$fa-var-cc-jcb: "\f24b"; -$fa-var-cc-mastercard: "\f1f1"; -$fa-var-cc-paypal: "\f1f4"; -$fa-var-cc-stripe: "\f1f5"; -$fa-var-cc-visa: "\f1f0"; -$fa-var-certificate: "\f0a3"; -$fa-var-chain: "\f0c1"; -$fa-var-chain-broken: "\f127"; -$fa-var-check: "\f00c"; -$fa-var-check-circle: "\f058"; -$fa-var-check-circle-o: "\f05d"; -$fa-var-check-square: "\f14a"; -$fa-var-check-square-o: "\f046"; -$fa-var-chevron-circle-down: "\f13a"; -$fa-var-chevron-circle-left: "\f137"; -$fa-var-chevron-circle-right: "\f138"; -$fa-var-chevron-circle-up: "\f139"; -$fa-var-chevron-down: "\f078"; -$fa-var-chevron-left: "\f053"; -$fa-var-chevron-right: "\f054"; -$fa-var-chevron-up: "\f077"; -$fa-var-child: "\f1ae"; -$fa-var-chrome: "\f268"; -$fa-var-circle: "\f111"; -$fa-var-circle-o: "\f10c"; -$fa-var-circle-o-notch: "\f1ce"; -$fa-var-circle-thin: "\f1db"; -$fa-var-clipboard: "\f0ea"; -$fa-var-clock-o: "\f017"; -$fa-var-clone: "\f24d"; -$fa-var-close: "\f00d"; -$fa-var-cloud: "\f0c2"; -$fa-var-cloud-download: "\f0ed"; -$fa-var-cloud-upload: "\f0ee"; -$fa-var-cny: "\f157"; -$fa-var-code: "\f121"; -$fa-var-code-fork: "\f126"; -$fa-var-codepen: "\f1cb"; -$fa-var-codiepie: "\f284"; -$fa-var-coffee: "\f0f4"; -$fa-var-cog: "\f013"; -$fa-var-cogs: "\f085"; -$fa-var-columns: "\f0db"; -$fa-var-comment: "\f075"; -$fa-var-comment-o: "\f0e5"; -$fa-var-commenting: "\f27a"; -$fa-var-commenting-o: "\f27b"; -$fa-var-comments: "\f086"; -$fa-var-comments-o: "\f0e6"; -$fa-var-compass: "\f14e"; -$fa-var-compress: "\f066"; -$fa-var-connectdevelop: "\f20e"; -$fa-var-contao: "\f26d"; -$fa-var-copy: "\f0c5"; -$fa-var-copyright: "\f1f9"; -$fa-var-creative-commons: "\f25e"; -$fa-var-credit-card: "\f09d"; -$fa-var-credit-card-alt: "\f283"; -$fa-var-crop: "\f125"; -$fa-var-crosshairs: "\f05b"; -$fa-var-css3: "\f13c"; -$fa-var-cube: "\f1b2"; -$fa-var-cubes: "\f1b3"; -$fa-var-cut: "\f0c4"; -$fa-var-cutlery: "\f0f5"; -$fa-var-dashboard: "\f0e4"; -$fa-var-dashcube: "\f210"; -$fa-var-database: "\f1c0"; -$fa-var-deaf: "\f2a4"; -$fa-var-deafness: "\f2a4"; -$fa-var-dedent: "\f03b"; -$fa-var-delicious: "\f1a5"; -$fa-var-desktop: "\f108"; -$fa-var-deviantart: "\f1bd"; -$fa-var-diamond: "\f219"; -$fa-var-digg: "\f1a6"; -$fa-var-dollar: "\f155"; -$fa-var-dot-circle-o: "\f192"; -$fa-var-download: "\f019"; -$fa-var-dribbble: "\f17d"; -$fa-var-drivers-license: "\f2c2"; -$fa-var-drivers-license-o: "\f2c3"; -$fa-var-dropbox: "\f16b"; -$fa-var-drupal: "\f1a9"; -$fa-var-edge: "\f282"; -$fa-var-edit: "\f044"; -$fa-var-eercast: "\f2da"; -$fa-var-eject: "\f052"; -$fa-var-ellipsis-h: "\f141"; -$fa-var-ellipsis-v: "\f142"; -$fa-var-empire: "\f1d1"; -$fa-var-envelope: "\f0e0"; -$fa-var-envelope-o: "\f003"; -$fa-var-envelope-open: "\f2b6"; -$fa-var-envelope-open-o: "\f2b7"; -$fa-var-envelope-square: "\f199"; -$fa-var-envira: "\f299"; -$fa-var-eraser: "\f12d"; -$fa-var-etsy: "\f2d7"; -$fa-var-eur: "\f153"; -$fa-var-euro: "\f153"; -$fa-var-exchange: "\f0ec"; -$fa-var-exclamation: "\f12a"; -$fa-var-exclamation-circle: "\f06a"; -$fa-var-exclamation-triangle: "\f071"; -$fa-var-expand: "\f065"; -$fa-var-expeditedssl: "\f23e"; -$fa-var-external-link: "\f08e"; -$fa-var-external-link-square: "\f14c"; -$fa-var-eye: "\f06e"; -$fa-var-eye-slash: "\f070"; -$fa-var-eyedropper: "\f1fb"; -$fa-var-fa: "\f2b4"; -$fa-var-facebook: "\f09a"; -$fa-var-facebook-f: "\f09a"; -$fa-var-facebook-official: "\f230"; -$fa-var-facebook-square: "\f082"; -$fa-var-fast-backward: "\f049"; -$fa-var-fast-forward: "\f050"; -$fa-var-fax: "\f1ac"; -$fa-var-feed: "\f09e"; -$fa-var-female: "\f182"; -$fa-var-fighter-jet: "\f0fb"; -$fa-var-file: "\f15b"; -$fa-var-file-archive-o: "\f1c6"; -$fa-var-file-audio-o: "\f1c7"; -$fa-var-file-code-o: "\f1c9"; -$fa-var-file-excel-o: "\f1c3"; -$fa-var-file-image-o: "\f1c5"; -$fa-var-file-movie-o: "\f1c8"; -$fa-var-file-o: "\f016"; -$fa-var-file-pdf-o: "\f1c1"; -$fa-var-file-photo-o: "\f1c5"; -$fa-var-file-picture-o: "\f1c5"; -$fa-var-file-powerpoint-o: "\f1c4"; -$fa-var-file-sound-o: "\f1c7"; -$fa-var-file-text: "\f15c"; -$fa-var-file-text-o: "\f0f6"; -$fa-var-file-video-o: "\f1c8"; -$fa-var-file-word-o: "\f1c2"; -$fa-var-file-zip-o: "\f1c6"; -$fa-var-files-o: "\f0c5"; -$fa-var-film: "\f008"; -$fa-var-filter: "\f0b0"; -$fa-var-fire: "\f06d"; -$fa-var-fire-extinguisher: "\f134"; -$fa-var-firefox: "\f269"; -$fa-var-first-order: "\f2b0"; -$fa-var-flag: "\f024"; -$fa-var-flag-checkered: "\f11e"; -$fa-var-flag-o: "\f11d"; -$fa-var-flash: "\f0e7"; -$fa-var-flask: "\f0c3"; -$fa-var-flickr: "\f16e"; -$fa-var-floppy-o: "\f0c7"; -$fa-var-folder: "\f07b"; -$fa-var-folder-o: "\f114"; -$fa-var-folder-open: "\f07c"; -$fa-var-folder-open-o: "\f115"; -$fa-var-font: "\f031"; -$fa-var-font-awesome: "\f2b4"; -$fa-var-fonticons: "\f280"; -$fa-var-fort-awesome: "\f286"; -$fa-var-forumbee: "\f211"; -$fa-var-forward: "\f04e"; -$fa-var-foursquare: "\f180"; -$fa-var-free-code-camp: "\f2c5"; -$fa-var-frown-o: "\f119"; -$fa-var-futbol-o: "\f1e3"; -$fa-var-gamepad: "\f11b"; -$fa-var-gavel: "\f0e3"; -$fa-var-gbp: "\f154"; -$fa-var-ge: "\f1d1"; -$fa-var-gear: "\f013"; -$fa-var-gears: "\f085"; -$fa-var-genderless: "\f22d"; -$fa-var-get-pocket: "\f265"; -$fa-var-gg: "\f260"; -$fa-var-gg-circle: "\f261"; -$fa-var-gift: "\f06b"; -$fa-var-git: "\f1d3"; -$fa-var-git-square: "\f1d2"; -$fa-var-github: "\f09b"; -$fa-var-github-alt: "\f113"; -$fa-var-github-square: "\f092"; -$fa-var-gitlab: "\f296"; -$fa-var-gittip: "\f184"; -$fa-var-glass: "\f000"; -$fa-var-glide: "\f2a5"; -$fa-var-glide-g: "\f2a6"; -$fa-var-globe: "\f0ac"; -$fa-var-google: "\f1a0"; -$fa-var-google-plus: "\f0d5"; -$fa-var-google-plus-circle: "\f2b3"; -$fa-var-google-plus-official: "\f2b3"; -$fa-var-google-plus-square: "\f0d4"; -$fa-var-google-wallet: "\f1ee"; -$fa-var-graduation-cap: "\f19d"; -$fa-var-gratipay: "\f184"; -$fa-var-grav: "\f2d6"; -$fa-var-group: "\f0c0"; -$fa-var-h-square: "\f0fd"; -$fa-var-hacker-news: "\f1d4"; -$fa-var-hand-grab-o: "\f255"; -$fa-var-hand-lizard-o: "\f258"; -$fa-var-hand-o-down: "\f0a7"; -$fa-var-hand-o-left: "\f0a5"; -$fa-var-hand-o-right: "\f0a4"; -$fa-var-hand-o-up: "\f0a6"; -$fa-var-hand-paper-o: "\f256"; -$fa-var-hand-peace-o: "\f25b"; -$fa-var-hand-pointer-o: "\f25a"; -$fa-var-hand-rock-o: "\f255"; -$fa-var-hand-scissors-o: "\f257"; -$fa-var-hand-spock-o: "\f259"; -$fa-var-hand-stop-o: "\f256"; -$fa-var-handshake-o: "\f2b5"; -$fa-var-hard-of-hearing: "\f2a4"; -$fa-var-hashtag: "\f292"; -$fa-var-hdd-o: "\f0a0"; -$fa-var-header: "\f1dc"; -$fa-var-headphones: "\f025"; -$fa-var-heart: "\f004"; -$fa-var-heart-o: "\f08a"; -$fa-var-heartbeat: "\f21e"; -$fa-var-history: "\f1da"; -$fa-var-home: "\f015"; -$fa-var-hospital-o: "\f0f8"; -$fa-var-hotel: "\f236"; -$fa-var-hourglass: "\f254"; -$fa-var-hourglass-1: "\f251"; -$fa-var-hourglass-2: "\f252"; -$fa-var-hourglass-3: "\f253"; -$fa-var-hourglass-end: "\f253"; -$fa-var-hourglass-half: "\f252"; -$fa-var-hourglass-o: "\f250"; -$fa-var-hourglass-start: "\f251"; -$fa-var-houzz: "\f27c"; -$fa-var-html5: "\f13b"; -$fa-var-i-cursor: "\f246"; -$fa-var-id-badge: "\f2c1"; -$fa-var-id-card: "\f2c2"; -$fa-var-id-card-o: "\f2c3"; -$fa-var-ils: "\f20b"; -$fa-var-image: "\f03e"; -$fa-var-imdb: "\f2d8"; -$fa-var-inbox: "\f01c"; -$fa-var-indent: "\f03c"; -$fa-var-industry: "\f275"; -$fa-var-info: "\f129"; -$fa-var-info-circle: "\f05a"; -$fa-var-inr: "\f156"; -$fa-var-instagram: "\f16d"; -$fa-var-institution: "\f19c"; -$fa-var-internet-explorer: "\f26b"; -$fa-var-intersex: "\f224"; -$fa-var-ioxhost: "\f208"; -$fa-var-italic: "\f033"; -$fa-var-joomla: "\f1aa"; -$fa-var-jpy: "\f157"; -$fa-var-jsfiddle: "\f1cc"; -$fa-var-key: "\f084"; -$fa-var-keyboard-o: "\f11c"; -$fa-var-krw: "\f159"; -$fa-var-language: "\f1ab"; -$fa-var-laptop: "\f109"; -$fa-var-lastfm: "\f202"; -$fa-var-lastfm-square: "\f203"; -$fa-var-leaf: "\f06c"; -$fa-var-leanpub: "\f212"; -$fa-var-legal: "\f0e3"; -$fa-var-lemon-o: "\f094"; -$fa-var-level-down: "\f149"; -$fa-var-level-up: "\f148"; -$fa-var-life-bouy: "\f1cd"; -$fa-var-life-buoy: "\f1cd"; -$fa-var-life-ring: "\f1cd"; -$fa-var-life-saver: "\f1cd"; -$fa-var-lightbulb-o: "\f0eb"; -$fa-var-line-chart: "\f201"; -$fa-var-link: "\f0c1"; -$fa-var-linkedin: "\f0e1"; -$fa-var-linkedin-square: "\f08c"; -$fa-var-linode: "\f2b8"; -$fa-var-linux: "\f17c"; -$fa-var-list: "\f03a"; -$fa-var-list-alt: "\f022"; -$fa-var-list-ol: "\f0cb"; -$fa-var-list-ul: "\f0ca"; -$fa-var-location-arrow: "\f124"; -$fa-var-lock: "\f023"; -$fa-var-long-arrow-down: "\f175"; -$fa-var-long-arrow-left: "\f177"; -$fa-var-long-arrow-right: "\f178"; -$fa-var-long-arrow-up: "\f176"; -$fa-var-low-vision: "\f2a8"; -$fa-var-magic: "\f0d0"; -$fa-var-magnet: "\f076"; -$fa-var-mail-forward: "\f064"; -$fa-var-mail-reply: "\f112"; -$fa-var-mail-reply-all: "\f122"; -$fa-var-male: "\f183"; -$fa-var-map: "\f279"; -$fa-var-map-marker: "\f041"; -$fa-var-map-o: "\f278"; -$fa-var-map-pin: "\f276"; -$fa-var-map-signs: "\f277"; -$fa-var-mars: "\f222"; -$fa-var-mars-double: "\f227"; -$fa-var-mars-stroke: "\f229"; -$fa-var-mars-stroke-h: "\f22b"; -$fa-var-mars-stroke-v: "\f22a"; -$fa-var-maxcdn: "\f136"; -$fa-var-meanpath: "\f20c"; -$fa-var-medium: "\f23a"; -$fa-var-medkit: "\f0fa"; -$fa-var-meetup: "\f2e0"; -$fa-var-meh-o: "\f11a"; -$fa-var-mercury: "\f223"; -$fa-var-microchip: "\f2db"; -$fa-var-microphone: "\f130"; -$fa-var-microphone-slash: "\f131"; -$fa-var-minus: "\f068"; -$fa-var-minus-circle: "\f056"; -$fa-var-minus-square: "\f146"; -$fa-var-minus-square-o: "\f147"; -$fa-var-mixcloud: "\f289"; -$fa-var-mobile: "\f10b"; -$fa-var-mobile-phone: "\f10b"; -$fa-var-modx: "\f285"; -$fa-var-money: "\f0d6"; -$fa-var-moon-o: "\f186"; -$fa-var-mortar-board: "\f19d"; -$fa-var-motorcycle: "\f21c"; -$fa-var-mouse-pointer: "\f245"; -$fa-var-music: "\f001"; -$fa-var-navicon: "\f0c9"; -$fa-var-neuter: "\f22c"; -$fa-var-newspaper-o: "\f1ea"; -$fa-var-object-group: "\f247"; -$fa-var-object-ungroup: "\f248"; -$fa-var-odnoklassniki: "\f263"; -$fa-var-odnoklassniki-square: "\f264"; -$fa-var-opencart: "\f23d"; -$fa-var-openid: "\f19b"; -$fa-var-opera: "\f26a"; -$fa-var-optin-monster: "\f23c"; -$fa-var-outdent: "\f03b"; -$fa-var-pagelines: "\f18c"; -$fa-var-paint-brush: "\f1fc"; -$fa-var-paper-plane: "\f1d8"; -$fa-var-paper-plane-o: "\f1d9"; -$fa-var-paperclip: "\f0c6"; -$fa-var-paragraph: "\f1dd"; -$fa-var-paste: "\f0ea"; -$fa-var-pause: "\f04c"; -$fa-var-pause-circle: "\f28b"; -$fa-var-pause-circle-o: "\f28c"; -$fa-var-paw: "\f1b0"; -$fa-var-paypal: "\f1ed"; -$fa-var-pencil: "\f040"; -$fa-var-pencil-square: "\f14b"; -$fa-var-pencil-square-o: "\f044"; -$fa-var-percent: "\f295"; -$fa-var-phone: "\f095"; -$fa-var-phone-square: "\f098"; -$fa-var-photo: "\f03e"; -$fa-var-picture-o: "\f03e"; -$fa-var-pie-chart: "\f200"; -$fa-var-pied-piper: "\f2ae"; -$fa-var-pied-piper-alt: "\f1a8"; -$fa-var-pied-piper-pp: "\f1a7"; -$fa-var-pinterest: "\f0d2"; -$fa-var-pinterest-p: "\f231"; -$fa-var-pinterest-square: "\f0d3"; -$fa-var-plane: "\f072"; -$fa-var-play: "\f04b"; -$fa-var-play-circle: "\f144"; -$fa-var-play-circle-o: "\f01d"; -$fa-var-plug: "\f1e6"; -$fa-var-plus: "\f067"; -$fa-var-plus-circle: "\f055"; -$fa-var-plus-square: "\f0fe"; -$fa-var-plus-square-o: "\f196"; -$fa-var-podcast: "\f2ce"; -$fa-var-power-off: "\f011"; -$fa-var-print: "\f02f"; -$fa-var-product-hunt: "\f288"; -$fa-var-puzzle-piece: "\f12e"; -$fa-var-qq: "\f1d6"; -$fa-var-qrcode: "\f029"; -$fa-var-question: "\f128"; -$fa-var-question-circle: "\f059"; -$fa-var-question-circle-o: "\f29c"; -$fa-var-quora: "\f2c4"; -$fa-var-quote-left: "\f10d"; -$fa-var-quote-right: "\f10e"; -$fa-var-ra: "\f1d0"; -$fa-var-random: "\f074"; -$fa-var-ravelry: "\f2d9"; -$fa-var-rebel: "\f1d0"; -$fa-var-recycle: "\f1b8"; -$fa-var-reddit: "\f1a1"; -$fa-var-reddit-alien: "\f281"; -$fa-var-reddit-square: "\f1a2"; -$fa-var-refresh: "\f021"; -$fa-var-registered: "\f25d"; -$fa-var-remove: "\f00d"; -$fa-var-renren: "\f18b"; -$fa-var-reorder: "\f0c9"; -$fa-var-repeat: "\f01e"; -$fa-var-reply: "\f112"; -$fa-var-reply-all: "\f122"; -$fa-var-resistance: "\f1d0"; -$fa-var-retweet: "\f079"; -$fa-var-rmb: "\f157"; -$fa-var-road: "\f018"; -$fa-var-rocket: "\f135"; -$fa-var-rotate-left: "\f0e2"; -$fa-var-rotate-right: "\f01e"; -$fa-var-rouble: "\f158"; -$fa-var-rss: "\f09e"; -$fa-var-rss-square: "\f143"; -$fa-var-rub: "\f158"; -$fa-var-ruble: "\f158"; -$fa-var-rupee: "\f156"; -$fa-var-s15: "\f2cd"; -$fa-var-safari: "\f267"; -$fa-var-save: "\f0c7"; -$fa-var-scissors: "\f0c4"; -$fa-var-scribd: "\f28a"; -$fa-var-search: "\f002"; -$fa-var-search-minus: "\f010"; -$fa-var-search-plus: "\f00e"; -$fa-var-sellsy: "\f213"; -$fa-var-send: "\f1d8"; -$fa-var-send-o: "\f1d9"; -$fa-var-server: "\f233"; -$fa-var-share: "\f064"; -$fa-var-share-alt: "\f1e0"; -$fa-var-share-alt-square: "\f1e1"; -$fa-var-share-square: "\f14d"; -$fa-var-share-square-o: "\f045"; -$fa-var-shekel: "\f20b"; -$fa-var-sheqel: "\f20b"; -$fa-var-shield: "\f132"; -$fa-var-ship: "\f21a"; -$fa-var-shirtsinbulk: "\f214"; -$fa-var-shopping-bag: "\f290"; -$fa-var-shopping-basket: "\f291"; -$fa-var-shopping-cart: "\f07a"; -$fa-var-shower: "\f2cc"; -$fa-var-sign-in: "\f090"; -$fa-var-sign-language: "\f2a7"; -$fa-var-sign-out: "\f08b"; -$fa-var-signal: "\f012"; -$fa-var-signing: "\f2a7"; -$fa-var-simplybuilt: "\f215"; -$fa-var-sitemap: "\f0e8"; -$fa-var-skyatlas: "\f216"; -$fa-var-skype: "\f17e"; -$fa-var-slack: "\f198"; -$fa-var-sliders: "\f1de"; -$fa-var-slideshare: "\f1e7"; -$fa-var-smile-o: "\f118"; -$fa-var-snapchat: "\f2ab"; -$fa-var-snapchat-ghost: "\f2ac"; -$fa-var-snapchat-square: "\f2ad"; -$fa-var-snowflake-o: "\f2dc"; -$fa-var-soccer-ball-o: "\f1e3"; -$fa-var-sort: "\f0dc"; -$fa-var-sort-alpha-asc: "\f15d"; -$fa-var-sort-alpha-desc: "\f15e"; -$fa-var-sort-amount-asc: "\f160"; -$fa-var-sort-amount-desc: "\f161"; -$fa-var-sort-asc: "\f0de"; -$fa-var-sort-desc: "\f0dd"; -$fa-var-sort-down: "\f0dd"; -$fa-var-sort-numeric-asc: "\f162"; -$fa-var-sort-numeric-desc: "\f163"; -$fa-var-sort-up: "\f0de"; -$fa-var-soundcloud: "\f1be"; -$fa-var-space-shuttle: "\f197"; -$fa-var-spinner: "\f110"; -$fa-var-spoon: "\f1b1"; -$fa-var-spotify: "\f1bc"; -$fa-var-square: "\f0c8"; -$fa-var-square-o: "\f096"; -$fa-var-stack-exchange: "\f18d"; -$fa-var-stack-overflow: "\f16c"; -$fa-var-star: "\f005"; -$fa-var-star-half: "\f089"; -$fa-var-star-half-empty: "\f123"; -$fa-var-star-half-full: "\f123"; -$fa-var-star-half-o: "\f123"; -$fa-var-star-o: "\f006"; -$fa-var-steam: "\f1b6"; -$fa-var-steam-square: "\f1b7"; -$fa-var-step-backward: "\f048"; -$fa-var-step-forward: "\f051"; -$fa-var-stethoscope: "\f0f1"; -$fa-var-sticky-note: "\f249"; -$fa-var-sticky-note-o: "\f24a"; -$fa-var-stop: "\f04d"; -$fa-var-stop-circle: "\f28d"; -$fa-var-stop-circle-o: "\f28e"; -$fa-var-street-view: "\f21d"; -$fa-var-strikethrough: "\f0cc"; -$fa-var-stumbleupon: "\f1a4"; -$fa-var-stumbleupon-circle: "\f1a3"; -$fa-var-subscript: "\f12c"; -$fa-var-subway: "\f239"; -$fa-var-suitcase: "\f0f2"; -$fa-var-sun-o: "\f185"; -$fa-var-superpowers: "\f2dd"; -$fa-var-superscript: "\f12b"; -$fa-var-support: "\f1cd"; -$fa-var-table: "\f0ce"; -$fa-var-tablet: "\f10a"; -$fa-var-tachometer: "\f0e4"; -$fa-var-tag: "\f02b"; -$fa-var-tags: "\f02c"; -$fa-var-tasks: "\f0ae"; -$fa-var-taxi: "\f1ba"; -$fa-var-telegram: "\f2c6"; -$fa-var-television: "\f26c"; -$fa-var-tencent-weibo: "\f1d5"; -$fa-var-terminal: "\f120"; -$fa-var-text-height: "\f034"; -$fa-var-text-width: "\f035"; -$fa-var-th: "\f00a"; -$fa-var-th-large: "\f009"; -$fa-var-th-list: "\f00b"; -$fa-var-themeisle: "\f2b2"; -$fa-var-thermometer: "\f2c7"; -$fa-var-thermometer-0: "\f2cb"; -$fa-var-thermometer-1: "\f2ca"; -$fa-var-thermometer-2: "\f2c9"; -$fa-var-thermometer-3: "\f2c8"; -$fa-var-thermometer-4: "\f2c7"; -$fa-var-thermometer-empty: "\f2cb"; -$fa-var-thermometer-full: "\f2c7"; -$fa-var-thermometer-half: "\f2c9"; -$fa-var-thermometer-quarter: "\f2ca"; -$fa-var-thermometer-three-quarters: "\f2c8"; -$fa-var-thumb-tack: "\f08d"; -$fa-var-thumbs-down: "\f165"; -$fa-var-thumbs-o-down: "\f088"; -$fa-var-thumbs-o-up: "\f087"; -$fa-var-thumbs-up: "\f164"; -$fa-var-ticket: "\f145"; -$fa-var-times: "\f00d"; -$fa-var-times-circle: "\f057"; -$fa-var-times-circle-o: "\f05c"; -$fa-var-times-rectangle: "\f2d3"; -$fa-var-times-rectangle-o: "\f2d4"; -$fa-var-tint: "\f043"; -$fa-var-toggle-down: "\f150"; -$fa-var-toggle-left: "\f191"; -$fa-var-toggle-off: "\f204"; -$fa-var-toggle-on: "\f205"; -$fa-var-toggle-right: "\f152"; -$fa-var-toggle-up: "\f151"; -$fa-var-trademark: "\f25c"; -$fa-var-train: "\f238"; -$fa-var-transgender: "\f224"; -$fa-var-transgender-alt: "\f225"; -$fa-var-trash: "\f1f8"; -$fa-var-trash-o: "\f014"; -$fa-var-tree: "\f1bb"; -$fa-var-trello: "\f181"; -$fa-var-tripadvisor: "\f262"; -$fa-var-trophy: "\f091"; -$fa-var-truck: "\f0d1"; -$fa-var-try: "\f195"; -$fa-var-tty: "\f1e4"; -$fa-var-tumblr: "\f173"; -$fa-var-tumblr-square: "\f174"; -$fa-var-turkish-lira: "\f195"; -$fa-var-tv: "\f26c"; -$fa-var-twitch: "\f1e8"; -$fa-var-twitter: "\f099"; -$fa-var-twitter-square: "\f081"; -$fa-var-umbrella: "\f0e9"; -$fa-var-underline: "\f0cd"; -$fa-var-undo: "\f0e2"; -$fa-var-universal-access: "\f29a"; -$fa-var-university: "\f19c"; -$fa-var-unlink: "\f127"; -$fa-var-unlock: "\f09c"; -$fa-var-unlock-alt: "\f13e"; -$fa-var-unsorted: "\f0dc"; -$fa-var-upload: "\f093"; -$fa-var-usb: "\f287"; -$fa-var-usd: "\f155"; -$fa-var-user: "\f007"; -$fa-var-user-circle: "\f2bd"; -$fa-var-user-circle-o: "\f2be"; -$fa-var-user-md: "\f0f0"; -$fa-var-user-o: "\f2c0"; -$fa-var-user-plus: "\f234"; -$fa-var-user-secret: "\f21b"; -$fa-var-user-times: "\f235"; -$fa-var-users: "\f0c0"; -$fa-var-vcard: "\f2bb"; -$fa-var-vcard-o: "\f2bc"; -$fa-var-venus: "\f221"; -$fa-var-venus-double: "\f226"; -$fa-var-venus-mars: "\f228"; -$fa-var-viacoin: "\f237"; -$fa-var-viadeo: "\f2a9"; -$fa-var-viadeo-square: "\f2aa"; -$fa-var-video-camera: "\f03d"; -$fa-var-vimeo: "\f27d"; -$fa-var-vimeo-square: "\f194"; -$fa-var-vine: "\f1ca"; -$fa-var-vk: "\f189"; -$fa-var-volume-control-phone: "\f2a0"; -$fa-var-volume-down: "\f027"; -$fa-var-volume-off: "\f026"; -$fa-var-volume-up: "\f028"; -$fa-var-warning: "\f071"; -$fa-var-wechat: "\f1d7"; -$fa-var-weibo: "\f18a"; -$fa-var-weixin: "\f1d7"; -$fa-var-whatsapp: "\f232"; -$fa-var-wheelchair: "\f193"; -$fa-var-wheelchair-alt: "\f29b"; -$fa-var-wifi: "\f1eb"; -$fa-var-wikipedia-w: "\f266"; -$fa-var-window-close: "\f2d3"; -$fa-var-window-close-o: "\f2d4"; -$fa-var-window-maximize: "\f2d0"; -$fa-var-window-minimize: "\f2d1"; -$fa-var-window-restore: "\f2d2"; -$fa-var-windows: "\f17a"; -$fa-var-won: "\f159"; -$fa-var-wordpress: "\f19a"; -$fa-var-wpbeginner: "\f297"; -$fa-var-wpexplorer: "\f2de"; -$fa-var-wpforms: "\f298"; -$fa-var-wrench: "\f0ad"; -$fa-var-xing: "\f168"; -$fa-var-xing-square: "\f169"; -$fa-var-y-combinator: "\f23b"; -$fa-var-y-combinator-square: "\f1d4"; -$fa-var-yahoo: "\f19e"; -$fa-var-yc: "\f23b"; -$fa-var-yc-square: "\f1d4"; -$fa-var-yelp: "\f1e9"; -$fa-var-yen: "\f157"; -$fa-var-yoast: "\f2b1"; -$fa-var-youtube: "\f167"; -$fa-var-youtube-play: "\f16a"; -$fa-var-youtube-square: "\f166"; - diff --git a/site/_sass/vendor/font-awesome/font-awesome.scss b/site/_sass/vendor/font-awesome/font-awesome.scss deleted file mode 100644 index f1c83aaa5d2..00000000000 --- a/site/_sass/vendor/font-awesome/font-awesome.scss +++ /dev/null @@ -1,18 +0,0 @@ -/*! - * Font Awesome 4.7.0 by @davegandy - http://fontawesome.io - @fontawesome - * License - http://fontawesome.io/license (Font: SIL OFL 1.1, CSS: MIT License) - */ - -@import "variables"; -@import "mixins"; -@import "path"; -@import "core"; -@import "larger"; -@import "fixed-width"; -@import "list"; -@import "bordered-pulled"; -@import "animated"; -@import "rotated-flipped"; -@import "stacked"; -@import "icons"; -@import "screen-reader"; diff --git a/site/bps/BP-31-durability b/site/bps/BP-31-durability deleted file mode 100644 index 7c16eb22bf0..00000000000 --- a/site/bps/BP-31-durability +++ /dev/null @@ -1,134 +0,0 @@ - ---- -title: "BP-31: BookKeeper Durability (Anchor)" -issue: https://github.com/apache/bookkeeper/1202 -state: 'Accepted' -release: "N/A" ---- -## Motivation -Apache BookKeeper is transitioning into a full fledged distributed storage that can keep the data for long term. Durability is paramount to achieve the status of trusted store. This Anchor BP discusses many gaps and areas of improvement. Each issue listed here will have another issue and this BP is expected to be updated when that issue is created. - -## Durability Contract -1. **Maintain WQ copies all the time**. If any ledger falls into under replicated state, there needs to be an SLA on how quickly the replication levels can be brought back to normal levels. -2. **Enforce Placement Policy** strictly during write and replication. -3. **Protect the data** against corruption on the wire or at rest. - -## Work Grouping (In the order of priority) -### Detect Durability Validation -First step is to understand the areas of durability breaches. Design metrics that record durability contract violations. -* At the Creation: Validate durability contract when the ledger is being created -* At the Deletion: Validate durability contract when ledger is deleted -* During lifetime: Validate durability contract during the lifetime of the ledger.(periodic validator) -* During Read: IO or Checksum errors in the read path -### Delete Discipline -* Build a single delete choke point with stringent validations -* Archival bit in the metadata to assist Two phase Deletes -* Stateful/Explicit Deletes -### Metadata Recovery -* Metadata recovery tool to reconstruct the metadata if the metadata server gets wiped out. This tool need to make sure that the data is readable even if we can't get all the metadata (ex: ctime) back. - -### Plug Durability Violations -Our first step is to identify durability viloations. That gives us the magnitude of the problem and areas that we need to focus. In this phase, fix high impact areas. -* Identify source of problems detected by the work we did in step-1 above (Detect Durability Validation) -* Rereplicate under replicated ledgers detected during write -* Rereplicate under replicated / corrupted ledgers detected during read -* Replicated under replicated ledgers identified by periodic validator. -### Durability Test -* Test plan, new tests and integrating it into CI pipeline. -### Introduce bookie incarnation -* Design/Implement bookie incarnation mechanism -### End 2 End Checksum -* Efficient checksum implementation (crc32c?) -* Implement checksum validation on bookies in the write path. -### Soft Deletes -* Design and implement soft delete feature -### BitRot detection -* Design and implement bitrot detection/correction. - -## Durability Contract Violations -### Write errors beyond AQ are ignored. -BK client library transparently corrects any write errors while writing to bookie by changing the ensemble. Take a case where `WQ:3 and AQ:2`. This works fine only if the write fails to the bookie before it gets 2 successful responses. But if the 3rd bookie write fails **after** 2 successful responses and the response sent to client, this error is logged and no immediate action is taken to bring up the replication of the entry. -This case **may not be** detected by the auditor’s periodic ledger check. Given that we allow out of order write, that in the combination of 2 out of 3 to satisfy client, it is possible to have under replication in the middle of the ensemble entry. Hence ledgercheck is not going to find all under replication cases, on top of that, periodic ledger check is a complete sweep of the store, an very expensive and slow crawl hence defaulted to once a week run. - -### Strict enforcement of placement policy -The best effort placement policy increases the write availability but at the cost of durability. Due to this non-strict placement, BK can’t guarantee data availability when a fault domain (rack) is lost. This also makes rolling upgrade across fault domains more difficult/non possible. Need to enforce strict ensemble placement and fail the write if all WQ copies are not able to be placed across different fault domains. Minor fix/enhancement if we agree to give placement higher priority than a successful write(availability) - -The auditor re-replication uses client library to find a replacement bookie for each ledger in the lost bookie. But bookies are unaware of the ledger ensemble placement policy as this information is not part of metadata. - -### Detect and act on Ledger disk problems -While Auditor mechanism detects complete bookie crash, there is no mechanism to detect individual ledger disk errors. So if a ledger disk goes bad, bookie continues to run, and auditor can’t recognize under replication condition, until it runs the complete sweep, periodic ledger check. On the other hand bookie refuses to come up if it finds a bad disk, which is right thing to do. This is easy to fix, in the interleaved ledger manger bad disk handle. - -### Checksum at bookies in the write path -Lack of checksum calculations on the write path makes the store not to detect any corruption at the source issues. Imagine NIC issues on the client. If data gets corrupted at the client NIC’s level it safely gets stored on bookies (for the lack of crc calculations in the write path). This is a silent corruption of all 3 copies. For additional durability guarantees we can add checksum verification on bookies in the write path. Checksum calculations are cpu intensive and will add to the latency. But Java9 is adding native support for CRC32-C - A hardware assisted CRC calculation. We can consider adding this additional during JAVA-9 port after evaluating performance tradeoffs. - -### No repair in the read path -When a checksum error is detected, in addition to finding good replica, sfstore need to repair(replace with good one) bad replica too. - - -## Operations -### No bookie incarnation mechanism -A bookie `B1 at time t1` ; and same bookie `B1 at time t2` after bookie format are treated in the same way. -For this to cause any durability issues: -* Replication/Auditor mechanism is stopped or not running for some reason. (A stuck auditor will start a new one due to ZK) -* One of bookies(B1) went down (crash or something) -* B1’s Journal dir and all ledger dir got wiped. -* B1 came back to life as a fresh bookie -* Auditor is enabled monitoring again - -At this point auditor doesn’t have capability to know that the B1 in the cluster is not the same B1 that it used to be. Hence doesn’t consider it for under replication. This is a pathological scenario but we at least need to have a mechanism to identify and alert this scenario if not taking care of bookie incarnation issue. - -## Enhancements -### Delete Choke Points -Every delete must go through single routine/path in the code and that needs to implement additional checks to perform physical delete. - -### Archival bit in the metadata to assist Two phase Deletes -Main aim of this feature is to be as conservative as possible on the delete path. As explained in the stateful explicit deletes section, lack of ledgerId in the metadata means that ledger is deleted. A bug in the client code may erroneously delete the ledger. To protect from that, we want to introduce a archive/backedup bit. A separate backup/archival application can mark the bit after successfully backing up the ledger, and later on main client application will send the delete. If this feature is enabled, BK client will reject and throw an exception if it receives a delete request without archival/backed-up bit is not set. This protects the data from bugs and erroneous deletes. - -### Stateful explicit deltes -Current bookkeeper deletes synchronously deletes the metadata in the zookeeper. Bookies implicitly assume that a particular ledger is deleted if it is not present in the metadata. This process has no crosscheck if the ledger is actually deleted. Any ZK corruption or loss of the ledger path znodes will make bookies to delete data on the disk. No cross check. Even bugs in bookie code which ‘determines’ if a ledger is present on the zk or not, may lead to data deletion. - -Right way to deal with this is to asynchronously delete metadata after each bookie explicitly checks that a particular ledger is deleted. This way each bookie explicitly checks the ‘delete state’ of the ledger before deleting on the disk data. One of the proposal is to move the deleted ledgers under /deleted/ other idea is to add a delete state, Open->Closed->Deleted. - -As soon as we make the metadata deletions asynchronous, the immediate question is who will delete it? -Option-1: A centralized process like auditor will be responsible for deleting metadata after each bookie deletes on disk data. -Option-2: A decentralized, more complicated approach: Last bookie that deletes its on disk data, deletes the metadata too. -I am sure there can be more ideas. Any path will need a detailed design and need to consider many corner cases. - -#### Obvious points to consider: -ZK as-is heavily loaded with BK metadata. Keeping these znodes around for more time ineeded puts more pressure on ZK. -If a bookie is down for long time, what would be the delete policy for the metadata? -There will be lots of corner case scenarios we need to deal with. For example: -A bookie-1 hosting data for ledger-1 is down for long time -Ledger-1 data has been replicated to other bookies -Ledger-1 is deleted, and its data and metadata is clared. -Now bookie-1 came back to life. Since our policy is ‘explicit state check delete’ bookie-1 can’t delete ledger-1 data as it can’t explicitly validate that the ledger-1 has been deleted. -One possible solution: keep tomestones of deleted ledgers around for some duration. If a bookie is down for more than that duration, it needs to be decommissioned and add as a new bookie. -Enhance: Archival bit in the metadata to assist Two phase Deletes -Main aim of this feature is to be as conservative as possible on the delete path. As explained in the stateful explicit deletes section, lack of ledgerId in the metadata means that ledger is deleted. A bug in the client code may erroneously delete the ledger. To protect from that, we want to introduce a archive/backedup bit. A separate backup/archival application can mark the bit after successfully backing up the ledger, and later on main client application will send the delete. If this feature is enabled, BK client will reject and throw an exception if it receives a delete request without archival/backed-up bit is not set. This protects the data from bugs and erroneous deletes. - -### Metadata recovery tool -In case zookkeper completely wiped we need a way to reconstruct enough metadata to read ledgers back. Currently metadata contains ensemble information which is critical for reading ledgers back, and also it has additional metadata like ctime and custom metadata. Every bookie has one index file per ledger and that has enough information to reconstruct the ensemble information so that the ledgers can be made readable. This tool can be built in two ways. -If ZK is completely wiped, reconstruct entire data from bookie index files. -If ZK is completely wiped, but snapshots are available, restore ZK from snapshots and built the delta from bookie index files. - -### Bit Rot Detection (BP-24) -If the data stays on the disk for long time(years), it is possible to experience silent data degradation on the disk. In the current scenario we will not identify this until the data is read by the application. - -### End to end checksum -Bookies never validate the payload checksum. If the the client’s socket has issues, it might corrupt the data (at the source) and it won’t be detected until client reads it back. That will be too late as the original write was successful for the application. Use efficient checksum mechanisms and enforce checksum validations on the bookie’s write path. If checksum validation fails, the the write itself will fail and application will be notified. - - -## Test strategy to validate durability -BK need to develop a comprehensive testing strategy to test and validate the store’s durability. Various methods and levels are tests are needed to gain confidence for deploying the store in production. Specific points are mentioned here and these are in addition to regular functional testing/validation. -### White box error injection -Introduce all possible errors in the write path, kick replication mechanism and make sure cluster reached desired replica levels. -Corrupt first readable copy and make sure that the corruption is detected on the read path, and ultimately read must succeed after trying second replica. -Corrupt packet after checksum calculation on the client and make sure that it is detected in the read path, and ultimately read fails as this is corruption at the source. -After a write make sure that the replica is distributed across fault zones. -Kill a bookie, make sure that the auditor detected and replicated all ledgers in that bookie according to allocation policy (across fault zones) -### Black box error injection (Chaos Monkey) -While keeping longevity testing which is doing continues IO to the store introduce following errors. -Kill random bookie and reads should continue. -Kill random bookies keeping minimum fault zones to satisfy AQ Quorum during write workload. -Simulate disk errors in random bookies and allow the bookie to go down and replication gets started. -Make sure that the cluster is running in full durable state through the tools and monitoring built. diff --git a/site/bps/BP-34-cluster-metadata-checker.md b/site/bps/BP-34-cluster-metadata-checker.md deleted file mode 100644 index 29916d18d33..00000000000 --- a/site/bps/BP-34-cluster-metadata-checker.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -title: "BP-34: Cluster Metadata Checker“ -issue: https://github.com/apache/bookkeeper/issues/1602 -state: Accepted -release: N/A ---- - -### Motivation - -Currently in the Auditor we have two checkers - Periodic Bookie Check and Periodic Ledger check. Bookie Check validates the availability of bookies and if it finds any lost bookies it will mark ledgers residing in the lost bookies to be under replicated. Ledger Check reads the first entry and last entry of the segment from all the corresponding bookies of the ensemble and if it fails to read any entry then it will mark that ledger under replicated. By setting appropriate value to the conf - ‘auditorLedgerVerificationPercentage’ we can read upto 100% of entries of the ledger from all the corresponding bookies of the ensemble and any failure in reading will lead to mark ledger under replicated. - -Ideally for having complete confidence on the date in the cluster, it is needed to have a new checker - validating ledger placement policy, durability contract, progress in handling under replication and availability of bookies of the ensemble of ledgers. Though by configuring 'auditorLedgerVerificationPercentage' to 100% in periodic ledger check, we would get most of what we are intending to achieve. But this comes at heavy price since it involves reading all the entries from all the corresponding bookies in the ensemble, so it is not a performant solution. - -### Proposed Changes - -Intention of this new checker is to validate following things - - ledger placement policy : Ensemble of each segment in Ledger should adhere to LedgerPlacementPolicy - - durability contract : Every entry has WQ number of replicas and entries are replicated according to RoundRobinDistributionSchedule - - progress in handling under replication : No ledger is marked underreplicated for more than acceptable time - - availability of bookies of the ensemble of ledgers : If Auditor fails to get response from a Bookie, then that Bookie shouldn’t be registered to metadata server and Auditor should be aware of it unavailability or if it is a transient error in getting response from Bookie then subsequent calls to that Bookie should succeed. - -Roles and Responsibilities of the cluster metadata checker - - Police the durability contract and report violations. Its job is to make sure that the metadata server(zk) and the storage servers (bookies) are in sync. Simply put, check if bookies agree with the metadata server metadata and if not, raise an alert. - - Scrutiny’s job is not to fix if it finds any inconsistency. Instead make a noise about it. If the scrutiny fails, it means that we have a potential hole(bug) in our service to meet the durability contract. Scrutiny exposes that hole with enough information the help identify the issue and fix it. - - The Metadata Scrutiny needs to be light weighted esp., on Bookie and must run regularly giving the confidence that the cluster is in good state. - -High Level Logic - - Things would get complicated analyzing ledgers which are not closed because of several reasons, viz., unable to know lastEntryId by reading ZK metadata, possibility of change in ensemble because of write failure to a bookie, and other subtleties in dealing with last unclosed segment of the ledger. So for the sake of simplicity this checker should be limited to ledgers which are write closed/fenced. - - This durability check for each ledger will be run as a processor in ledgerManager.asyncProcessLedgers and it would ignore ledgers which are still open for write. - - first step is to check if this ledger is marked underreplicated already. If it is marked underreplicated for more than acceptable time then report it as violation otherwise skip this underreplicated ledger for this iteration of durability check. Since there is no point in further analyzing this ledger if it is already marked under replicated. - - get the ledger metadata of the ledger from the metadata server - - make sure that the ensemble of the ledger segments is in agreement with ledgerplacement policy. Any violation should be reported. - - get the info about available entries of the ledger from the bookies of the ensemble. Bookie is expected to return list of entries it contains for a given ledger - - Have to make sure that Bookies contain all the entries it is supposed to contain according to the RoundRobinDistributionSchedule and each entry has writequorum number of copies. Any violation should be reported. - - If there is any failure in trying to get info. from Bookie of the ensembles of the ledger, then add this ledger to potentially faulty ledgers list (but don't report it yet.) - - (in previous steps, in case of any violation or bookie read error, before reporting violation, check if the ledger is marked underreplicated. If it is marked underreplicated then ignore this ledger for this iteration. If it is not marked underreplicated, then get the ledgermetadata of this ledger onemore time. Check if it is any different from the ledgermetadata we got initially then instead of reporting the violation, redo the analysis for this ledger because apparently something had changed in the metadata (esp. with ensemble) and hence it is better to reevaluate instead of false alarm.) - - if there are potentially faulty ledgers because of unavailable/unreachable bookies, then schedule a new durability check task with time delay just for the potentially faulty ledgers. Even after subsequent delayed checks, if Auditor failed to get response from bookies then make sure that Bookie isn’t registered to metadata server and Auditor is aware of it unavailability, if not then report the violation. - - Auditor is going to use existing mechanisms/frameworks to report the violations - bookkeeper-stats statslogger/counters and complementing information in logs. - - It makes sense to group all the durability violations found in a scrutiny run according to the categories and report the aggregated count for each category after the end of the scrutiny run. - - before reporting these violations, each violation should be logged with complete information, so that it can be used to understand what went wrong. - -### Public Interfaces - -To know the entries of a ledger data persisted in a Bookie, currently there is no other way than reading the entry from bookie using BookieClient instance. So for auditor to know what entries of a ledger, Bookie contains we need to have a new on wire Bookkeeper protocol API as mentioned below. - -``` -message GetListOfEntriesOfALedgerRequest { - required int64 ledgerId = 1; -} - -message GetListOfEntriesOfALedgerResponse { - required StatusCode status = 1; - required int64 ledgerId = 2; - optional bytes availabilityOfEntriesOfLedger = 3; // treated as array of bits indicating availability of entry at that particular index location -} -``` - -For the sake of future extensibility it would be helpful to add version info (and possibly some metadata in the future) at the beginning of the 'availabilityOfEntriesOfLedger'. So the first 64 bytes will be considered reserved space, with the first byte specifying the version for now and the rest of the bytes in the reserved space will be 0's. - -Here Bookie is expected to attain this information just from just LedgerCache (Index Files) - IndexPersistenceMgr and IndexInMemPageMgr but it doesn’t actually check the availability of this entry in Entrylogger. Since the intention of this checker is limited to do just metadata validation at cluster level. - -### Compatibility, Deprecation, and Migration Plan - -- With this feature we are introducing new protocol message. Will do the required Compatibility testing. - -### Test Plan - -- unit tests for newly introduced API/code at LedgerCache Level -- end-to-end tests for the new Protocol request/response -- validating the checker in all cases of violations - -### Rejected Alternatives - -N/A diff --git a/site/community/bookkeeper_proposals.md b/site/community/bookkeeper_proposals.md deleted file mode 100644 index ee8899e7a18..00000000000 --- a/site/community/bookkeeper_proposals.md +++ /dev/null @@ -1,134 +0,0 @@ ---- -title: BookKeeper Proposals ---- - -This page describes a proposed *BookKeeper Proposal (BP)* process for proposing a major change to BookKeeper. - -## Process - -### What is considered a "major change" that needs a BP? - -Any of the following should be considered a major change: - -- Any major new feature, subsystem, or piece of functionality -- Any change that impacts the public interfaces of the project -- Any change that impacts developer workflow of the project - -All the following are public interfaces that people build around: - -- Binary log format -- The network protocol and api behavior -- Configuration, especially client configuration -- Monitoring/Stats provider -- Command line tools and arguments - -### What should be included in a BP? - -A BP should contain the following sections: - -- *Motivation*: describe the problem to be solved -- *Proposed Change*: describe the new thing you want to do. This may be fairly extensive and have large subsections of its own. Or it may be a few sentences, depending on the scope of the change. -- *New or Changed Public Interfaces*: impact to any of the "compatibility commitments" described above. We want to call these out in particular so everyone thinks about them. -- *Migration Plan and Compatibility*: if this feature requires additional support for a no-downtime upgrade describe how that will work -- *Rejected Alternatives*: What are the other alternatives you considered and why are they worse? The goal of this section is to help people understand why this is the best solution now, and also to prevent churn in the future when old alternatives are reconsidered. - -### Who should initiate the BP? - -Anyone can initiate a BP but you shouldn't do it unless you have an intention of getting the work done to implement it (otherwise it is silly). - -### How to make a BP? - -Here is the process for making a BP: - -1. Create an issue `BP-: [capation of bookkeeper proposal]`. E.g. `BP-1: 64 bits ledger id support`. - - Take the next available BP number from this page. - - Write a brief description about what BP is for in this issue. This issue will be the master issue for tracking the status of this BP and its implementations. - All the implementations of this BP should be listed and linked to this master issues. -1. Write the proposal for this BP. There are two ways to write a bookkeeper proposal. You can choose to write a BP using markdown, or write a BP -using Google Doc. - - Markdown - - Make a copy of the [BP-Template](https://github.com/apache/bookkeeper/tree/master/site/bps/BP-template.md). Name the BP file as `BP--[caption-of-proposal].md`. - ```shell - $ cp site/bps/BP-template.md site/bps/BP-xyz-capation-of-proposal.md - ``` - - Fill the sections listed in the BP template. - - issue: replace `` with the issue number. - - state: "Under Discussion" - - release: leave the release to `N/A`. you can only mark a release after a BP is implemented. - - Google Doc - - Make a copy of the [BP-Template](https://docs.google.com/document/d/1DsmH54LoohgwqnEjESPQNtIYxxcOy2rwonZ_TJCwws0). Name the BP file as `BP--[caption-of-proposal]`. - - Fill the sections listed in the BP template. -1. Send a PR for this BP. Following the instructions in the pull request template. - - add `BP` label to this PR - - attach the google doc link in the PR description if the BP is written in google doc - - don't associate this PR with any release or milestone - - edit `site/community/bookkeeper_proposals.md`: - - bump the next bp number - - add this BP to `Inprogress` section -1. You can tag committers on this RP for reviewers, or start a `[DISCUSS]` thread on Apache mailing list. If you are sending an email, please make sure that the subject - of the thread is of the format `[DISCUSS] BP-: capation of bookkeeper proposal`. -1. Once the BP is finalized, reviewed and approved by committers, the BP is accepted. The criteria for acceptance is [lazy majority](http://bookkeeper.apache.org/bylaws.html). - 1. Committers merge the PR after a BP is accepted. The development for this BP moves forward with implementations. The BP should be updated if there is anything changed during implementing it. - 1. After all the implementations for a given BP are completed, a new PR should be sent for changing the state of a BP: - - state: "Adopted" - - release: set to the release that includes this BP. - - moving the BP from `Inprogress` to `Adopted`. - 1. The final PR for changing BP state will be used as the criteria for marking a BP as completed. -1. If a BP is failed or rejected: - 1. Update the PR to change the state of a BP - - state: "Discarded" - - add a paragraph at the first paragraph of this BP for describing the reasons. - - moving the BP from `Inprogress` to `Discarded`. - 2. Once the PR is updated, committers can merge this proposal PR and close the master issue of this BP. - -## All Proposals - -This section lists all the _bookkeeper proposals_ made to BookKeeper. - -*Next Proposal Number: 35* - -### Inprogress - -Proposal | State -:--------|:----- -[BP-4 - BookKeeper Lifecycle Management](https://cwiki.apache.org/confluence/display/BOOKKEEPER/BP-4+-+BookKeeper+Lifecycle+Management) | Draft -[BP-8 - Queue based auto rereplicator](https://cwiki.apache.org/confluence/display/BOOKKEEPER/BP-8+-+Queue+based+auto+rereplicator) | Draft -[BP-12 - Improve documentation](https://cwiki.apache.org/confluence/display/BOOKKEEPER/BP-12+-+Improve+documentation) | Accepted -[BP-14 Relax durability](https://cwiki.apache.org/confluence/display/BOOKKEEPER/BP-14+Relax+durability) | Accepted -[BP-16: Thin Client - Remove direct metadata storage access from clients](https://cwiki.apache.org/confluence/display/BOOKKEEPER/BP-16%3A+Thin+Client+-+Remove+direct+metadata+storage+access+from+clients) | Draft -[BP-18: LedgerType, Flags and StorageHints](https://cwiki.apache.org/confluence/display/BOOKKEEPER/BP-18%3A+LedgerType%2C+Flags+and+StorageHints) | Accepted -[BP-26: Move distributedlog library as part of bookkeeper](../../bps/BP-26-move-distributedlog-core-library) | Accepted -[BP-27: New BookKeeper CLI](../../bps/BP-27-new-bookkeeper-cli) | Accepted -[BP-28: use etcd as metadata store](../../bps/BP-28-etcd-as-metadata-store) | Accepted -[BP-29: Metadata API module](../../bps/BP-29-metadata-store-api-module) | Accepted -[BP-30: BookKeeper Table Service](https://docs.google.com/document/d/155xAwWv5IdOitHh1NVMEwCMGgB28M3FyMiQSxEpjE-Y/edit#heading=h.56rbh52koe3f) | Accepted -[BP-31: BookKeeper Durability Anchor](../../bps/BP-31-durability) | Accepted -[BP-32: Advisory (optimistic) write close](../../bps/BP-32-advisory-write-close) | Accepted -[BP-33: Move releasing docker images out of main repo](../../bps/BP-33-building-official-docker-imags) | Draft -[BP-34: Cluster Metadata Checker](../../bps/BP-34-cluster-metadata-checker) | Accepted - -### Adopted - -Proposal | Release -:--------|:------- -[BP-1 - 64 bits ledger id support](https://cwiki.apache.org/confluence/display/BOOKKEEPER/BP-1+-+64+bits+ledger+id+support) | 4.5.0 -[BP-2 - Resource aware data placement](https://cwiki.apache.org/confluence/display/BOOKKEEPER/BP-2+-+Resource+aware+data+placement) | 4.5.0 -[BP-3 - Security support](https://cwiki.apache.org/confluence/display/BOOKKEEPER/BP-3+-+Security+support) | 4.5.0 -[BP-5 - Allow reads outside the LAC Protocol](https://cwiki.apache.org/confluence/display/BOOKKEEPER/BP-5+Allow+reads+outside+the+LAC+Protocol) | 4.5.0 -[BP-6 - Use separate log for compaction](https://cwiki.apache.org/confluence/display/BOOKKEEPER/BP-6+-+Use+separate+log+for+compaction) | 4.6.0 -[BP-9 - Github issues for Issue Tracking](https://cwiki.apache.org/confluence/display/BOOKKEEPER/BP-9+-+Github+issues+for+Issue+Tracking) | 4.5.0 -[BP-10 - Official Bookkeeper Docker Image](https://cwiki.apache.org/confluence/display/BOOKKEEPER/BP-10+-+Official+Bookkeeper+Docker+Image) | 4.5.0 -[BP-11 - Move website/documentation to Jekyll based site](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=71012301) | 4.5.0 -[BP-13 - Time Based Release Plan](https://cwiki.apache.org/confluence/display/BOOKKEEPER/BP-13+-+Time+Based+Release+Plan) | 4.6.0 -[BP-15 - New CreateLedger API](https://cwiki.apache.org/confluence/display/BOOKKEEPER/BP-15+New+CreateLedger+API) | 4.6.0 -[BP-17 - Define BookKeeper public http endpoints](https://cwiki.apache.org/confluence/display/BOOKKEEPER/BP-17%3A+Define+BookKeeper+public+http+endpoints) | 4.6.0 -[BP-20: Github workflow for bookkeeper proposals](../../bps/BP-20-github-workflow-for-bookkeeper-proposals) | 4.7.0 -[BP-25: Move checksum to proto](../../bps/BP-25-MovingChecksumToProto) | 4.7.0 - -### Discarded - -Proposal | Reason -:--------|:------ -[BP-7 - Explicit LAC on addEntry](https://cwiki.apache.org/confluence/display/BOOKKEEPER/BP-7+-+Explicit+LAC+on+addEntry) | Not A Problem -[BP-21: New API close inconsistencies](../../bps/BP-21-new-api-close-inconsistencies) | Not A Problem -[BP-22: Separate closing ledgers from opening ledgers](../../bps/BP-22-separate-closing-ledgers-from-opening-ledgers) | Not A Problem diff --git a/site/community/coding_guide.md b/site/community/coding_guide.md deleted file mode 100644 index 216cbd50c22..00000000000 --- a/site/community/coding_guide.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: Coding Guide ---- - -These guidelines are meant to encourage consistency and best practices among people working on _Apache BookKeeper_ code base. -They should be observed unless there is compelling reason to ignore them. We are also using checkstyle to enforce coding style. -Please refer to our [checkstyle rules](https://github.com/apache/bookkeeper/blob/master/buildtools/src/main/resources/bookkeeper/checkstyle.xml) for all enforced checkstyle rules. - -### Java - -Apache BookKeeper code should follow the [Sun Java Coding Convention](http://www.oracle.com/technetwork/java/javase/documentation/codeconvtoc-136057.html), with the following additions. - -* Lines can not be longer than 120 characters. -* Indentation should be **4 spaces**. Tabs should never be used. -* Use curly braces even for single-line ifs and elses. -* No @author tags in any javadoc. -* Use try-with-resources blocks whenever is possible. -* **TODO**s should be associated to at least one issue. E.g. `// TODO: make this parameter configurable (https://github.com/apache/bookkeeper/issues/280)` - -### Dependencies - -Apache BookKeeper uses following libraries a lot: - -* [Guava](https://github.com/google/guava): as a fundamental core library -* [Netty](http://netty.io/): for network communications and memory buffer management. - -Please use these libraries whenever possible rather than introducing more dependencies. - -Dependencies are bundled with our binary distributions, so we need to attach the relevant licenses. See [Third party dependencies and licensing](/community/licensing) for a guide on how to do this correctly. - -#### Future - -We prefer Java-8 Future over Guava's Listenable Future. Please use Java-8 Future whenever possible. - -#### Memory - -We prefer using netty _ByteBuf_ over java nio _ByteBuffer_ for internal usage. As we are using Netty Buffer for memory management. - -### Logging - -* Logging should be taken seriously. Please take the time to access the logs when making a change to ensure that the important things are getting logged and there is no junk there. -* Logging statements should be complete sentences with proper capitalization that are written to be read by a person not necessarily familiar with the source code. -* All loggings should be done with **SLF4j**, never _System.out_ or _System.err_. - -#### Logging levels - -- _INFO_ is the level you should assume the software will be run in. INFO messages are things which are not bad but which the user will definitely want to know about every time they occur. -- _TRACE_ and _DEBUG_ are both things you turn on when something is wrong and you want to figure out what is going on. _DEBUG_ should not be so fine grained that it will seriously affect performance of the program. _TRACE_ can be anything. Both _DEBUG_ and _TRACE_ statements should be considered to be wrapped in an _if (logger.isDebugEnabled)_ or _if (logger.isTraceEnabled)_ check to avoid performance degradation. -- _WARN_ and _ERROR_ indicate something that is **BAD**. Use _WARN_ if you aren't totally sure it is bad, and _ERROR_ if you are. - -Please log the _stack traces_ at **ERROR** level, but never at **INFO** level or below. They can be logged at **WARN** level when they are interesting for debugging. - -### Monitoring - -* Apache BookKeeper uses a pluggable [StatsProvider](https://github.com/apache/bookkeeper/tree/master/bookkeeper-stats) on exporting metrics -* Any new features should come with appropriate metrics for monitoring the feature is working correctly. -* Those metrics should be taken seriously and only export useful metrics that would be used on production on monitoring/alerting healthy of the system, or troubleshooting problems. - -### Unit Tests - -* New changes should come with unit tests that verify the functionality being added -* Unit tests should test the least amount of code possible. Don't start the whole server unless there is no other way to test a single class or small group of classes in isolation. -* Tests should not depend on any external resources. They need to setup and teardown their own stuff. -* It is okay to use the filesystem and network in tests since that's our business but you need to clean up them after yourself. -* _Do not_ use sleep or other timing assumptions in tests. It is always, always, wrong and will fail intermittently on any test server with other things going on that causes delays. -* We are strongly recommending adding a _timeout_ value to all our test cases, to prevent a build from completing indefinitely. -`@Test(timeout=60000)` - -### Configuration - -* Names should be thought through from the point of view of the person using the config. -* The default values should be thought as best value for people who runs the program without tuning parameters. -* All configuration settings should be added to [default configuration file](https://github.com/apache/bookkeeper/blob/master/bookkeeper-server/conf/bk_server.conf) and [documented](https://github.com/apache/bookkeeper/blob/master/site/_data/config/bk_server.yaml). - -### Concurrency - -Apache BookKeeper is a low latency system. So it is implemented as a purely asynchronous service. This is accomplished as follows: - -* All public classes should be **thread-safe**. -* We prefer using [OrderedSafeExecutor](https://github.com/apache/bookkeeper/blob/master/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/OrderedSafeExecutor.java) for executing any asynchronous actions. The mutations to same instance should be submitted to same thread to execute. -* If synchronization and locking is required, they should be in a fine granularity way. -* All threads should have proper meaningful name. -* If a class is not threadsafe, it should be annotated [@NotThreadSafe](https://github.com/misberner/jsr-305/blob/master/ri/src/main/java/javax/annotation/concurrent/NotThreadSafe.java). The instances that use this class is responsible for its synchronization. - -### Backwards Compatibility -* Wire protocol should support backwards compatibility to enable no-downtime upgrades. This means the servers **MUST** be able to support requests from both old and new clients simultaneously. -* Metadata formats and data formats should support backwards compatibility. diff --git a/site/community/meeting.md b/site/community/meeting.md deleted file mode 100644 index 8936023de73..00000000000 --- a/site/community/meeting.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: Community Meetings ---- - -The community meeting runs bi-weekly on Thursday 8am - 9am PST. The meeting link is [https://goo.gl/iyRA6G](https://goo.gl/iyRA6G). - -The meeting is typically comprised of 3 parts: - -- Discuss [BookKeeper Proposals](https://cwiki.apache.org/confluence/display/BOOKKEEPER/BookKeeper+Proposals). -- Review and address concerns for any open pull requests. -- Open discussion. - -As a member of the BookKeeper community, you are welcome to join any of the meetings if you are interested in. No registration required. - - - -### Past Community Meetings - -You can read the meeting notes from [past community meetings](https://cwiki.apache.org/confluence/display/BOOKKEEPER/Community+Meetings). diff --git a/site/community/release_guide.md b/site/community/release_guide.md deleted file mode 100644 index 70aefe09567..00000000000 --- a/site/community/release_guide.md +++ /dev/null @@ -1,688 +0,0 @@ ---- -title: Apache BookKeeper Release Guide -layout: community ---- - -* TOC -{:toc} - -This page documents the procedure to make an Apache BookKeeper release. - -## Introduction - -The Apache BookKeeper project periodically declares and publishes releases. A release is one or more packages of the project artifact(s) that are approved for general public distribution and use. They may come with various degrees of caveat regarding their perceived quality and potential for change, such as “alpha”, “beta”, “incubating”, “stable”, etc. - -The BookKeeper community treats releases with great importance. They are a public face of the project and most users interact with the project only through the releases. Releases are signed off by the entire BookKeeper community in a public vote. - -Each release is executed by a *Release Manager*, who is selected among the [BookKeeper committers](http://bookkeeper.apache.org/credits.html). This document describes the process that the Release Manager follows to perform a release. Any changes to this process should be discussed and adopted on the [dev@ mailing list](http://bookkeeper.apache.org/lists.html). - -Please remember that publishing software has legal consequences. This guide complements the foundation-wide [Product Release Policy](http://www.apache.org/dev/release.html) and [Release Distribution Policy](http://www.apache.org/dev/release-distribution). - -## Overview - -The release process consists of several steps: - -1. Decide to release -2. Prepare for the release -3. Build a release candidate -4. Vote on the release candidate -5. If necessary, fix any issues and go back to step 3. -6. Finalize the release -7. Promote the release - -********** - -## Decide to release - -Deciding to release and selecting a Release Manager is the first step of the release process. This is a consensus-based decision of the entire community. - -Anybody can propose a release on the dev@ mailing list, giving a solid argument and nominating a committer as the Release Manager (including themselves). There’s no formal process, no vote requirements, and no timing requirements. Any objections should be resolved by consensus before starting the release. - -In general, the community prefers to have a rotating set of 3-5 Release Managers. Keeping a small core set of managers allows enough people to build expertise in this area and improve processes over time, without Release Managers needing to re-learn the processes for each release. That said, if you are a committer interested in serving the community in this way, please reach out to the community on the dev@ mailing list. - -### Checklist to proceed to the next step - -1. Community agrees to release -2. Community selects a Release Manager - -********** - -## Prepare for the release - -Before your first release, you should perform one-time configuration steps. This will set up your security keys for signing the release and access to various release repositories. - -To prepare for each release, you should audit the project status in Github issue tracker, and do necessary bookkeeping. Finally, you should create a release branch from which individual release candidates will be built. - -### One-time setup instructions - -#### GPG Key - -You need to have a GPG key to sign the release artifacts. Please be aware of the ASF-wide [release signing guidelines](https://www.apache.org/dev/release-signing.html). -If you don’t have a GPG key associated with your Apache account, please create one according to the [guidelines](http://apache.org/dev/openpgp.html#generate-key) and [upload](https://www.apache.org/dev/release-signing.html#keyserver-upload) your key to a public key server. - -> It is important to [link](https://www.apache.org/dev/release-signing.html#apache-wot) your GPG key into the Apache web of trust. -> You can reach out other committers in Apache BookKeeper community for signing your key. - -Once you have a GPG key associated with your Apache count, then: - -**First**, Determine your Apache GPG Key and Key ID, as follows: - - gpg --list-keys - -This will list your GPG keys. One of these should reflect your Apache account, for example: - - -------------------------------------------------- - pub 2048R/845E6689 2016-02-23 - uid Nomen Nescio - sub 2048R/BA4D50BE 2016-02-23 - -Here, the key ID is the 8-digit hex string in the `pub` line: `845E6689`. - -**Second**, add your Apache GPG key to the BookKeeper’s `KEYS` file in [`dist`](https://dist.apache.org/repos/dist/release/bookkeeper/KEYS). - -```shell - -# checkout the svn folder that contains the KEYS file -svn co https://dist.apache.org/repos/dist/release/bookkeeper bookkeeper_dist -cd bookkeeper_dist - -# Export the key in ascii format and append it to the file -( gpg --list-sigs $USER@apache.org - gpg --export --armor $USER@apache.org ) >> KEYS - -# Commit to svn -svn ci -m "Added gpg key for $USER" - -``` - -Once you committed, please verify if your GPG key shows up in the BookkKeeper's `KEYS` file in [`dist`](https://dist.apache.org/repos/dist/release/bookkeeper/KEYS). - -**Third**, configure `git` to use this key when signing code by giving it your key ID, as follows: - - git config --global user.signingkey 845E6689 - -You may drop the `--global` option if you’d prefer to use this key for the current repository only. - -You may wish to start `gpg-agent` to unlock your GPG key only once using your passphrase. Otherwise, you may need to enter this passphrase hundreds of times. The setup for `gpg-agent` varies based on operating system, but may be something like this: - - eval $(gpg-agent --daemon --no-grab --write-env-file $HOME/.gpg-agent-info) - export GPG_TTY=$(tty) - export GPG_AGENT_INFO - -#### Access to Apache Nexus repository - -Configure access to the [Apache Nexus repository](http://repository.apache.org/), which enables final deployment of releases to the Maven Central Repository. - -1. You log in with your Apache account. -2. Confirm you have appropriate access by finding `org.apache.bookkeeper` under `Staging Profiles`. -3. Navigate to your `Profile` (top right dropdown menu of the page). -4. Choose `User Token` from the dropdown, then click `Access User Token`. Copy a snippet of the Maven XML configuration block. -5. Insert this snippet twice into your global Maven `settings.xml` file (use command `mvn -X | grep settings`, and read out the global Maven setting file), typically `${HOME}/.m2/settings.xml`. The end result should look like this, where `TOKEN_NAME` and `TOKEN_PASSWORD` are your secret tokens: - - - - - apache.releases.https - TOKEN_NAME - TOKEN_PASSWORD - - - apache.snapshots.https - TOKEN_NAME - TOKEN_PASSWORD - - - - -### Create a new version in Github - -When contributors resolve an issue in GitHub, they are tagging it with a release that will contain their changes. With the release currently underway, new issues should be resolved against a subsequent future release. Therefore, you should create a release item for this subsequent release, as follows: - -1. In Github, navigate to the [`Issues > Milestones`](https://github.com/apache/bookkeeper/milestones). -2. Add a new milestone: choose the next minor version number compared to the one currently underway, select a day that is 3-months from now as the `Due Date`, write a description `Release x.y.z` and choose `Create milestone`. - -Skip this step in case of a minor release, as milestones are only for major releases. - -### Triage release-blocking issues in Github - -There could be outstanding release-blocking issues, which should be triaged before proceeding to build a release candidate. We track them by assigning a specific `Milestone` field even before the issue resolved. - -The list of release-blocking issues is available at the [milestones page](https://github.com/apache/bookkeeper/milestones). Triage each unresolved issue with one of the following resolutions: - -* If the issue has been resolved and was not updated, close it accordingly. -* If the issue has not been resolved and it is acceptable to defer this until the next release, update the `Milestone` field to the new milestone you just created. Please consider discussing this with stakeholders and the dev@ mailing list, as appropriate. -* If the issue has not been resolved and it is not acceptable to release until it is fixed, the release cannot proceed. Instead, work with the BookKeeper community to resolve the issue. - -### Review Release Notes in Github - -> Github does not automatically generates Release Notes based on the `Milestone` field applied to issues. -> We can use [github-changelog-generator](https://github.com/skywinder/github-changelog-generator) to generate a ChangeLog for a milestone in future. - -For Github, we can use the milestone link in the Release Notes. E.g. [Release 4.5.0 milestone](https://github.com/apache/bookkeeper/milestone/1?closed=1). - -#### Prepare Release Notes - -After review the release notes on both Github, you should write a `releaseNotes` under `site/docs/${release_version}/overview/releaseNotes.md` and then send out a pull request for review. - -[4.5.0 Release Notes](https://github.com/apache/bookkeeper/pull/402) is a good example to follow. - -### Prepare release branch - -Release candidates are built from a release branch. As a final step in preparation for the release, you should create the release branch, push it to the code repository, and update version information on the original branch. - -Check out the version of the codebase from which you start the release. For a new minor or major release, this may be `HEAD` of the `master` branch. To build a hotfix/incremental release, instead of the `master` branch, use the release tag of the release being patched. (Please make sure your cloned repository is up-to-date before starting.) - - git checkout - - -Set up a few environment variables to simplify Maven commands that follow. (We use `bash` Unix syntax in this guide.) - -For a major release (for instance 4.5.0): - - MAJOR_VERSION="4.5" - VERSION="4.5.0" - NEXT_VERSION="4.6.0" - BRANCH_NAME="branch-${MAJOR_VERSION}" - DEVELOPMENT_VERSION="${NEXT_VERSION}-SNAPSHOT" - -For a minor release (for instance 4.5.1): - - MAJOR_VERSION="4.5" - VERSION="4.5.1" - NEXT_VERSION="4.5.2" - BRANCH_NAME="branch-${MAJOR_VERSION}" - DEVELOPMENT_VERSION="${NEXT_VERSION}-SNAPSHOT" - -Version represents the release currently underway, while next version specifies the anticipated next version to be released from that branch. Normally, 4.5.0 is followed by 4.6.0, while 4.5.0 is followed by 4.5.1. - -#### Create branch for major release - -If you are cutting a minor release, you can skip this step and go to section [Checkout release branch](#checkout-release-branch). - -If you are cutting a major release use Maven release plugin to create the release branch and update the current branch to use the new development version. This command applies for the new major or minor version. - -> This command automatically check in and tag your code in the code repository configured in the SCM. -> It is recommended to do a "dry run" before executing the command. To "dry run", you can provide "-DdryRun" -> at the end of this command. "dry run" will generate some temporary files in the project folder, you can remove -> them by running "mvn release:clean". - - mvn release:branch \ - -DbranchName=${BRANCH_NAME} \ - -DdevelopmentVersion=${DEVELOPMENT_VERSION} \ - [-DdryRun] - -> If you failed at the middle of running this command, please check if you have `push` permissions on `github.com`. -> You need use [personal access token](https://help.github.com/articles/creating-a-personal-access-token-for-the-command-line/) rather than your own password, if you enabled `2 factor authentication`. -> -> On failures, you need to reset on failures: -> -> $ git reset --hard apache/ -> $ git branch -D ${BRANCH_NAME} - -##### Create CI jobs for release branch - -Once the release branch is created, please create corresponding CI jobs for the release branch. These CI jobs includes postcommit jobs for different java versions and -integration tests. - -Example PR: [release-4.7.0](https://github.com/apache/bookkeeper/pull/1328) [integration tests for release-4.7.0](https://github.com/apache/bookkeeper/pull/1353) - -#### Checkout release branch - - -Check out the release branch. - - git checkout ${BRANCH_NAME} - -The rest of this guide assumes that commands are run in the root of a repository on `${BRANCH_NAME}` with the above environment variables set. - -Verify that pom.xml contains the correct VERSION, it should still end with the '-SNAPSHOT' suffix. - -### Checklist to proceed to the next step - -1. Release Manager’s GPG key is published to `dist.apache.org` -2. Release Manager’s GPG key is configured in `git` configuration -3. Release Manager has `org.apache.bookkeeper` listed under `Staging Profiles` in Nexus -4. Release Manager’s Nexus User Token is configured in `settings.xml` -5. Github milestone item for the subsequet release has been created -6. There are no release blocking Github issues -7. Release Notes for Github Milestone is generated, audited and adjusted -8. Release branch has been created -9. Originating branch has the version information updated to the new version - -********** - -## Build a release candidate - -The core of the release process is the build-vote-fix cycle. Each cycle produces one release candidate. The Release Manager repeats this cycle until the community approves one release candidate, which is then finalized. - -> Since 4.7.0, bookkeeper is releasing a CRC32C module `circe-checksum`. so all the steps on building a release candidate should happen in linux environment. -> It ensures the release candidate built with right jni library for `circe-checksum`. - -Set up a few environment variables to simplify Maven commands that follow. This identifies the release candidate being built. Start with `release candidate number` equal to `0` and increment it for each candidate. - - RC_NUM="0" - TAG="release-${VERSION}" - RC_DIR="bookkeeper-${VERSION}-rc${RC_NUM}" - RC_TAG="v${VERSION}-rc${RC_NUM}" - -> Please make sure `gpg` command is in your $PATH. The maven release plugin use `gpg` to sign generated jars and packages. - -### Run linux docker container to build release candidate - -```shell -./dev/release/000-run-docker.sh ${RC_NUM} -``` - -After the docker process is lauched, use `cache` credential helper to cache github credentials during releasing process. - -```shell -$ git config --global credential.helper "cache --timeout=3600" -``` - -Then run a dry-run github push to apache github repo. You will be asked for typing your github password, so the password will be cached for the whole releasing process. -If your account is configured with 2FA, use your personal token as the github password. - -The remote `apache` should point to `https://github.com/apache/bookkeeper`. - -```shell -$ git push apache --dry-run -``` - -### Build and stage Java artifacts with Maven - - -Use Maven release plugin to build the release artifacts, as follows: - -```shell -./dev/release/002-release-prepare.sh -``` - -Use Maven release plugin to stage these artifacts on the Apache Nexus repository, as follows: - -```shell -./dev/release/003-release-perform.sh -``` - -> If `release:perform` failed, -> delete the release tag: git tag -d release-${VERSION} && git push apache :refs/tags/release-${VERSION} -> -> Also, you need to check the git commits on the github and if needed you may have to -> force push backed out local git branch to github again. -> -> After reset, run `./dev/release/002-release-prepare.sh` again. - -Review all staged artifacts. They should contain all relevant parts for each module, including `pom.xml`, jar, test jar, source, test source, javadoc, etc. Artifact names should follow [the existing format](https://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.apache.bookkeeper%22) in which artifact name mirrors directory structure, e.g., `bookkeeper-server`. Carefully review any new artifacts. - -Close the staging repository on Apache Nexus. When prompted for a description, enter “Apache BookKeeper, version X, release candidate Y”. - -### Stage source release on dist.apache.org - -1. Copy the source release to the dev repository of `dist.apache.org`. - -```shell -./dev/release/004-stage-packages.sh -``` - -2. Verify that files are [present](https://dist.apache.org/repos/dist/dev/bookkeeper). - -### Checklist to proceed to the next step - -1. Maven artifacts deployed to the staging repository of [repository.apache.org](https://repository.apache.org/content/repositories/) -1. Source and Binary distribution deployed to the dev repository of [dist.apache.org](https://dist.apache.org/repos/dist/dev/bookkeeper/) - -********** - -## Vote on the release candidate - -Once you have built and individually reviewed the release candidate, please share it for the community-wide review. Please review foundation-wide [voting guidelines](http://www.apache.org/foundation/voting.html) for more information. - -Start the review-and-vote thread on the dev@ mailing list. Here’s an email template; please adjust as you see fit. - - From: Release Manager - To: dev@bookkeeper.apache.org - Subject: [VOTE] Release 4.5.0, release candidate #0 - - Hi everyone, - Please review and vote on the release candidate #0 for the version 0.4.0, as follows: - [ ] +1, Approve the release - [ ] -1, Do not approve the release (please provide specific comments) - - The complete staging area is available for your review, which includes: - * Release notes [1] - * The official Apache source and binary distributions to be deployed to dist.apache.org [2] - * All artifacts to be deployed to the Maven Central Repository [3] - * Source code tag "release-4.5.0" [4] with git sha XXXXXXXXXXXXXXXXXXXX - - BookKeeper's KEYS file contains PGP keys we used to sign this release: - https://dist.apache.org/repos/dist/release/bookkeeper/KEYS - - Please download these packages and review this release candidate: - - - Review release notes - - Download the source package (verify shasum, and asc) and follow the - instructions to build and run the bookkeeper service. - - Download the binary package (verify shasum, and asc) and follow the - instructions to run the bookkeeper service. - - Review maven repo, release tag, licenses, and any other things you think - it is important to a release. - - The vote will be open for at least 72 hours. It is adopted by majority approval, with at least 3 PMC affirmative votes. - - Thanks, - Release Manager - - [1] link - [2] link - [3] link - [4] link - [5] link - -If there are any issues found in the release candidate, reply on the vote thread to cancel the vote. There’s no need to wait 72 hours. Proceed to the `Fix Issues` step below and address the problem. However, some issues don’t require cancellation. For example, if an issue is found in the website pull request, just correct it on the spot and the vote can continue as-is. - -If there are no issues, reply on the vote thread to close the voting. Then, tally the votes in a separate email. Here’s an email template; please adjust as you see fit. (NOTE: the approver list are binding approvers.) - - From: Release Manager - To: dev@bookkeeper.apache.org - Subject: [RESULT] [VOTE] Release 0.4.0, release candidate #0 - - I'm happy to announce that we have unanimously approved this release. - - There are XXX approving votes, XXX of which are binding: - * approver 1 - * approver 2 - * approver 3 - * approver 4 - - There are no disapproving votes. - - Thanks everyone! - - -### Checklist to proceed to the finalization step - -1. Community votes to release the proposed candidate - -********** - -## Fix any issues - -Any issues identified during the community review and vote should be fixed in this step. - -Code changes should be proposed as standard pull requests to the `master` branch and reviewed using the normal contributing process. Then, relevant changes should be cherry-picked into the release branch. The cherry-pick commits should then be proposed as the pull requests against the release branch, again reviewed and merged using the normal contributing process. - -Once all issues have been resolved, you should go back and build a new release candidate with these changes. - -### Checklist to proceed to the next step - -1. Issues identified during vote have been resolved, with fixes committed to the release branch. - -********** - -## Finalize the release - -Once the release candidate has been reviewed and approved by the community, the release should be finalized. This involves the final deployment of the release candidate to the release repositories, merging of the website changes, etc. - -### Deploy artifacts to Maven Central Repository - -Use the Apache Nexus repository to release the staged binary artifacts to the Maven Central repository. In the `Staging Repositories` section, find the relevant release candidate `orgapachebookkeeper-XXX` entry and click `Release`. Drop all other release candidates that are not being released. - -### Deploy source release to dist.apache.org - -Copy the source release from the `dev` repository to the `release` repository at `dist.apache.org` using Subversion. - - svn move https://dist.apache.org/repos/dist/dev/bookkeeper/bookkeeper-${VERSION}-rc${RC_NUM} https://dist.apache.org/repos/dist/release/bookkeeper/bookkeeper-${VERSION} - -### Update Website - -1. Create the documentation for `${VERSION}`. Run the `release.sh` to generate the branch for `${VERSION}` and bump - the versions for website documentation; or run the `release_minor.sh` to release documentation when doing a - mintor release. - - ```shell - $ cd site - - // use `release.sh` for major releases - $ ./scripts/release.sh - - // or `release_minor.sh` for minor releases - $ ./scripts/release_minor.sh - ``` - - Once run the `release.sh`, please send a pull request for it and get approval from any committers, then merge it. - The CI job will automatically update the website in a few minutes. Please review the website to make sure the - documentation for `${VERSION}` is live. - -2. Merge the Release Notes pull request and make sure the Release Notes is updated. - -### Update Dockerfile - -> NOTE: The dockerfile PR should only be merged after the release package is showed up under https://archive.apache.org/dist/bookkeeper/ - -1. Update the `BK_VERSION` and `GPG_KEY` in `docker/Dockerfile` (e.g. [Pull Request 436](https://github.com/apache/bookkeeper/pull/436) ), - send a pull request for review and get an approval from the community. - -2. Once the pull request is approved, merge this pull request into master and make sure it is cherry-picked into corresponding branch. - -### Update DC/OS BookKeeper package - -> NOTE: Please update DC/OS bookkeeper package only after the release package is showed up under https://archive.apache.org/dist/bookkeeper/ - -Once we have new version of BookKeeper docker image available at [docker hub](https://hub.docker.com/r/apache/bookkeeper/), We could update DC/OS BookKeeper package in [mesosphere universe](https://github.com/mesosphere/universe). A new pull request is needed in it. - -It is easy if only version need be bump. - -1. Clone repo [mesosphere universe](https://github.com/mesosphere/universe). - - ```shell - $ git clone https://github.com/mesosphere/universe - ``` - -2. cd into the repo, Checkout a branch for the changes. - - ```shell - $ cd universe - $ git checkout -b bookkeeper_new_version - ``` - -3. Make a copy of latest code of BookKeeper package. - - ```shell - $ cp -rf repo/packages/B/bookkeeper/1 repo/packages/B/bookkeeper/2 - $ git add repo/packages/B/bookkeeper/2 - $ git commit -m "copy old version" - ``` - -4. Bump the version of BookKeeper docker image in file [resource.json](https://github.com/mesosphere/universe/blob/version-3.x/repo/packages/B/bookkeeper/1/resource.json#L5) and [package.json](https://github.com/mesosphere/universe/blob/version-3.x/repo/packages/B/bookkeeper/1/package.json#L4). - - ``` - diff --git repo/packages/B/bookkeeper/2/package.json repo/packages/B/bookkeeper/2/package.json - index 07199d56..75f4aa81 100644 - --- repo/packages/B/bookkeeper/2/package.json - +++ repo/packages/B/bookkeeper/2/package.json - @@ -1,7 +1,7 @@ - { - "packagingVersion": "3.0", - "name": "bookkeeper", - - "version": "4.5.1", - + "version": "4.7.0", - "scm": "https://github.com/apache/bookkeeper", - "maintainer": "zhaijia@apache.org", - "description": "BookKeeper is A scalable, fault-tolerant, and low-latency storage service optimized for real-time workloads.Further information can be found here: http://bookkeeper.apache.org/", - diff --git repo/packages/B/bookkeeper/2/resource.json repo/packages/B/bookkeeper/2/resource.json - index 3801750e..72690ea0 100644 - --- repo/packages/B/bookkeeper/2/resource.json - +++ repo/packages/B/bookkeeper/2/resource.json - @@ -2,7 +2,7 @@ - "assets": { - "container": { - "docker": { - - "bookkeeper": "apache/bookkeeper:4.5.1" - + "bookkeeper": "apache/bookkeeper:4.7.0" - } - } - }, - ``` - -5. Commit the change, create a pull request and wait for it to be approved and merged. - - ```shell - $ git add repo/packages/B/bookkeeper/2 - $ git commit -m "new bookkeeper version" - ``` - -### Git tag - -Create and push a new signed for the released version by copying the tag for the final release tag, as follows - -```shell -git tag -s "${TAG}" "${RC_TAG}" -git push apache "${TAG}" -``` - -Remove rc tags: - -```shell -for num in $(seq 0 ${RC_NUM}); do - git tag -d "v${VERSION}-rc${num}" - git push apache :"v${VERSION}-rc${num}" -done -``` - -### Verify Docker Image - -> After release tag is created, it will automatically trigger docker auto build. - -1. Verify the [docker hub](https://hub.docker.com/r/apache/bookkeeper/) to see if a new build for the given tag is build. - -2. Once the new docker image is built, update BC tests to include new docker image. Example: [release-4.7.0](https://github.com/apache/bookkeeper/pull/1352) - -### Advance version on release branch - -> only do this for minor release - -Use the Maven Release plugin in order to advance the version in all poms. - -> This command will upgrade the tag on every pom.xml locally to your workspace. - - mvn release:update-versions - -DdevelopmentVersion=${DEVELOPMENT_VERSION} - -Dstream - -For instance if you have released 4.5.1, you have to change version to 4.5.2-SNAPSHOT. -Then you have to create a PR and submit it for review. - -Example PR: [release-4.7.0](https://github.com/apache/bookkeeper/pull/1350) - - -### Mark the version as released in Github - -> only do this for feature release - -In Github, inside [milestones](https://github.com/apache/bookkeeper/milestones), hover over the current milestone and click `close` button to close a milestone and set today's date as due-date. - -### Update Release Schedule - -> only do this for feature release - -Update the [release schedule](../releases) page: - -- Bump the next feature release version and update its release window. -- Update the release schedule to remove released version and add a new release. - -### Checklist to proceed to the next step - -* Maven artifacts released and indexed in the [Maven Central Repository](https://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.apache.bookkeeper%22) -* Source and Binary distribution available in the release repository of [dist.apache.org](https://dist.apache.org/repos/dist/release/bookkeeper/) -* Website is updated with new release -* Docker image is built with new release -* Release tagged in the source code repository -* Release version finalized in Github -* Release section with release summary is added in [releases.md](https://github.com/apache/bookkeeper/blob/master/site/releases.md) -* Release schedule page is updated - -********** - -## Promote the release - -Once the release has been finalized, the last step of the process is to promote the release within the project and beyond. - -### Apache mailing lists - -- Announce on the dev@ mailing list that the release has been finished. -- Announce on the release on the user@ mailing list, listing major improvements and contributions. -- Announce the release on the announce@apache.org mailing list - -Use the template below for all the messages. - -> NOTE: Make sure sending the announce email using apache email, otherwise announce@apache.org will reject your email. - - - From: xxx@apache.org - To: dev@bookkeeper.apache.org, user@bookkeeper.apache.org, announce@apache.org - Subject: [ANNOUNCE] Apache BookKeeper x.y.z released - - The Apache BookKeeper team is proud to announce Apache BookKeeper version - x.y.z. - - Apache BookKeeper is a scalable, fault-tolerant, and low-latency storage service optimized for - real-time workloads. It has been used for a fundamental service to build reliable services. - It is also the log segment store for Apache DistributedLog and the message store for Apache Pulsar. - - This is the N release of the Apache BookKeeper. - - [highlights the release and why users need to try the release] - - For BookKeeper release details and downloads, visit: - - [download link] - - BookKeeper x.y.z Release Notes are at: - - [release notes link] - - We would like to thank the contributors that made the release possible. - - Regards, - - The BookKeeper Team - - -### Recordkeeping - -Use reporter.apache.org to seed the information about the release into future project reports. - -This step can be done only by PMC. - -### Social media - -Tweet, post on Facebook, LinkedIn, and other platforms. Ask other contributors to do the same. - -This step can be done only by PMC. - -### Cleanup old releases - -According to [ASF policy](http://www.apache.org/legal/release-policy.html#when-to-archive), `/www.apache.org/dist` should contain the latest release in each branch that -is currently under development. We need to remove the old releases from `release` repository. - -For example, if 4.6.1 is a newer release, we need to remove releases older than 4.6.1. - - - ```shell - $ svn del https://dist.apache.org/repos/dist/release/bookkeeper/bookkeeper-${old-release} -m "remove bookkeeper release " - ``` - -### Checklist to declare the process completed - -1. Release announced on the user@ mailing list. -1. Blog post published, if applicable. -1. Apache Software Foundation press release published. -1. Release announced on social media. -1. Completion declared on the dev@ mailing list. - -********** - -## Improve the process - -It is important that we improve the release processes over time. Once you’ve finished the release, please take a step back and look what areas of this process and be improved. Perhaps some part of the process can be simplified. Perhaps parts of this guide can be clarified. - -If we have specific ideas, please start a discussion on the dev@ mailing list and/or propose a pull request to update this guide. Thanks! diff --git a/site/community/releases.md b/site/community/releases.md deleted file mode 100644 index ca8f409bc8e..00000000000 --- a/site/community/releases.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: Release Management ---- - -> Apache BookKeeper community adopts [Time Based Release Plan](https://cwiki.apache.org/confluence/display/BOOKKEEPER/BP-13+-+Time+Based+Release+Plan) starting from 4.6.0. - -Apache BookKeeper community makes a feture release every 3 month. - -- A month before the release date, the release manager will cut branches and also publish a list of features that will be included in the release. These features will typically - be [BookKeeper Proposals](https://cwiki.apache.org/confluence/display/BOOKKEEPER/BookKeeper+Proposals), but not always. -- Another week will be left for *minor* features to get in, but at this point the community will start efforts to stabilize the release branch and contribute mostly tests and fixes. -- Two weeks before the release date, the bookkeeper community will announce code-freeze and start rolling out release candidates, after which only fixes for blocking bugs will be merged. - -## Current Release - -### Feature Release Window - -The next feature release is `4.8.0`. The release window is the following: - -| **Date** | **Event** | -| April 13, 2017 | Merge window opens on master branch | -| July 12, 2018 | Major feature should be in, Cut release branch | -| July 19, 2018 | Minor feature should be in, Stabilize release branch | -| July 26, 2018 - August 2, 2018 | Code freeze, Only accept fixes for blocking issues, Rolling out release candidates | - -## Release Schedule - -- **4.7.0**: November 2017 - April 2018 -- **4.8.0**: April 2018 - July 2018 -- **4.9.0**: July 2018 - October 2018 -- **4.10.0**: October 2018 - January 2019 - - diff --git a/site/community/slack.md b/site/community/slack.md deleted file mode 100644 index 291082572da..00000000000 --- a/site/community/slack.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: BookKeeper on Slack ---- - -There is an [Apache BookKeeper](http://apachebookkeeper.slack.com/) channel that is used for informal discussions for BookKeeper developers and users. - -The Slack channel is at [http://apachebookkeeper.slack.com/](http://apachebookkeeper.slack.com/). - -You can self-register at [https://apachebookkeeper.herokuapp.com/](https://apachebookkeeper.herokuapp.com/). diff --git a/site/community/testing.md b/site/community/testing.md deleted file mode 100644 index 2d784f912c4..00000000000 --- a/site/community/testing.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -title: BookKeeper Testing Guide ---- - -* TOC -{:toc} - -## Overview - -Apache BookKeeper is a well adopted software project with a strong commitment to testing. -Consequently, it has many testing-related needs. It requires precommit tests to ensure -code going to the repository meets a certain quality bar and it requires ongoing postcommit -tests to make sure that more subtle changes which escape precommit are nonetheless caught. -This document outlines how to write tests, which tests are appropriate where, and when tests -are run, with some additional information about the testing systems at the bottom. - -## Testing Scenarios - -With the tools at our disposal, we have a good set of utilities which we can use to verify -BookKeeper correctness. To ensure an ongoing high quality of code, we use precommit and postcommit -testing. - -### Precommit - -For precommit testing, BookKeeper uses [Jenkins](https://builds.apache.org/job/bookkeeper-seed/) and -[Travis CI](https://travis-ci.org/apache/bookkeeper), hooked up to -[Github](https://github.com/apache/bookkeeper), to ensure that pull requests meet a certain quality bar. -These precommits verify correctness via unit/integration tests. - -Precommit tests are kicked off when a user makes a Pull Request against the `apache/bookkeeper` repository, -and the Jenkins and Travis CI statuses are displayed at the bottom of the pull request page. Clicking on -"Details" will open the status page in the selected tool; there, test status and output can be viewed. - -For retriggering precommit testing for a given Pull Request, you can comment "retest this please" for -jenkins jobs and close/reopen the Pull Request for travis jobs. - -### Postcommit - -Running in postcommit removes as stringent of a time constraint, which gives us the ability to do some -more comprehensive testing. Currently in postcommit, we run unit/integration tests against both master and -the most recent release branch, publish website for any changes related to website and documentation, and -deploy a snapshot of latest master to artifactory. - -Postcommit test results can be found in [Jenkins](https://builds.apache.org/job/bookkeeper-seed/). - -### Configuration - -All the precommit and postcommit CI jobs are managed either by Jenkins or Travis CI. - -For Jenkins jobs, they are all written and managed using [Jenkin-DSL](https://github.com/jenkinsci/job-dsl-plugin/wiki). -The DSL scripts are maintained under [.test-infra](https://github.com/apache/bookkeeper/tree/master/.test-infra/jenkins). -Any jenkins changes should be made in these files and reviewed by the community. - -For Travis CI jobs, they are defined in [.travis.yml](https://github.com/apache/bookkeeper/blob/master/.travis.yml). -Any travis CI changes should be made in this file and reviewed by the community. - -#### Testing Jenkins DSL scripts - -Changes to the Jenkins DSL scripts should be tested before submission to avoid having to send multiple PRs for a single change. -There is a [Jenkins testing seed job](https://builds.apache.org/job/bookkeeper-jenkins-testing/job/seed/) which can be used for testing DSL scripts. - -To test a DSL script, create it in ```.test-infra/jenkins``` with a filename with the pattern ```jenkins_testing_job_.groovy```. Commit this to a branch, and push the branch to your own fork on github. Then kick off the testing seed job, specifying your fork and the branch as parameters. This will generate jobs under the ```bookkeeper-jenkins-testing``` folder in Jenkins. - -Run your job a couple of times to makes sure it works and is stable. Once you're satisfied with the job, rename the file to the pattern ```job_.groovy```, and create a PR. - -> NOTE: Kicking off the testing seed job requires a jenkins account, which is only available to committer.
          -> **WARN: Don't put triggers in your testing job, as you could mess up outstanding PRs approvals. Add them later.** diff --git a/site/css/normalize.css b/site/css/normalize.css deleted file mode 100644 index fa4e73dd418..00000000000 --- a/site/css/normalize.css +++ /dev/null @@ -1,447 +0,0 @@ -/*! normalize.css v7.0.0 | MIT License | github.com/necolas/normalize.css */ - -/* Document - ========================================================================== */ - -/** - * 1. Correct the line height in all browsers. - * 2. Prevent adjustments of font size after orientation changes in - * IE on Windows Phone and in iOS. - */ - -html { - line-height: 1.15; /* 1 */ - -ms-text-size-adjust: 100%; /* 2 */ - -webkit-text-size-adjust: 100%; /* 2 */ -} - -/* Sections - ========================================================================== */ - -/** - * Remove the margin in all browsers (opinionated). - */ - -body { - margin: 0; -} - -/** - * Add the correct display in IE 9-. - */ - -article, -aside, -footer, -header, -nav, -section { - display: block; -} - -/** - * Correct the font size and margin on `h1` elements within `section` and - * `article` contexts in Chrome, Firefox, and Safari. - */ - -h1 { - font-size: 2em; - margin: 0.67em 0; -} - -/* Grouping content - ========================================================================== */ - -/** - * Add the correct display in IE 9-. - * 1. Add the correct display in IE. - */ - -figcaption, -figure, -main { /* 1 */ - display: block; -} - -/** - * Add the correct margin in IE 8. - */ - -figure { - margin: 1em 40px; -} - -/** - * 1. Add the correct box sizing in Firefox. - * 2. Show the overflow in Edge and IE. - */ - -hr { - box-sizing: content-box; /* 1 */ - height: 0; /* 1 */ - overflow: visible; /* 2 */ -} - -/** - * 1. Correct the inheritance and scaling of font size in all browsers. - * 2. Correct the odd `em` font sizing in all browsers. - */ - -pre { - font-family: monospace, monospace; /* 1 */ - font-size: 1em; /* 2 */ -} - -/* Text-level semantics - ========================================================================== */ - -/** - * 1. Remove the gray background on active links in IE 10. - * 2. Remove gaps in links underline in iOS 8+ and Safari 8+. - */ - -a { - background-color: transparent; /* 1 */ - -webkit-text-decoration-skip: objects; /* 2 */ -} - -/** - * 1. Remove the bottom border in Chrome 57- and Firefox 39-. - * 2. Add the correct text decoration in Chrome, Edge, IE, Opera, and Safari. - */ - -abbr[title] { - border-bottom: none; /* 1 */ - text-decoration: underline; /* 2 */ - text-decoration: underline dotted; /* 2 */ -} - -/** - * Prevent the duplicate application of `bolder` by the next rule in Safari 6. - */ - -b, -strong { - font-weight: inherit; -} - -/** - * Add the correct font weight in Chrome, Edge, and Safari. - */ - -b, -strong { - font-weight: bolder; -} - -/** - * 1. Correct the inheritance and scaling of font size in all browsers. - * 2. Correct the odd `em` font sizing in all browsers. - */ - -code, -kbd, -samp { - font-family: monospace, monospace; /* 1 */ - font-size: 1em; /* 2 */ -} - -/** - * Add the correct font style in Android 4.3-. - */ - -dfn { - font-style: italic; -} - -/** - * Add the correct background and color in IE 9-. - */ - -mark { - background-color: #ff0; - color: #000; -} - -/** - * Add the correct font size in all browsers. - */ - -small { - font-size: 80%; -} - -/** - * Prevent `sub` and `sup` elements from affecting the line height in - * all browsers. - */ - -sub, -sup { - font-size: 75%; - line-height: 0; - position: relative; - vertical-align: baseline; -} - -sub { - bottom: -0.25em; -} - -sup { - top: -0.5em; -} - -/* Embedded content - ========================================================================== */ - -/** - * Add the correct display in IE 9-. - */ - -audio, -video { - display: inline-block; -} - -/** - * Add the correct display in iOS 4-7. - */ - -audio:not([controls]) { - display: none; - height: 0; -} - -/** - * Remove the border on images inside links in IE 10-. - */ - -img { - border-style: none; -} - -/** - * Hide the overflow in IE. - */ - -svg:not(:root) { - overflow: hidden; -} - -/* Forms - ========================================================================== */ - -/** - * 1. Change the font styles in all browsers (opinionated). - * 2. Remove the margin in Firefox and Safari. - */ - -button, -input, -optgroup, -select, -textarea { - font-family: sans-serif; /* 1 */ - font-size: 100%; /* 1 */ - line-height: 1.15; /* 1 */ - margin: 0; /* 2 */ -} - -/** - * Show the overflow in IE. - * 1. Show the overflow in Edge. - */ - -button, -input { /* 1 */ - overflow: visible; -} - -/** - * Remove the inheritance of text transform in Edge, Firefox, and IE. - * 1. Remove the inheritance of text transform in Firefox. - */ - -button, -select { /* 1 */ - text-transform: none; -} - -/** - * 1. Prevent a WebKit bug where (2) destroys native `audio` and `video` - * controls in Android 4. - * 2. Correct the inability to style clickable types in iOS and Safari. - */ - -button, -html [type="button"], /* 1 */ -[type="reset"], -[type="submit"] { - -webkit-appearance: button; /* 2 */ -} - -/** - * Remove the inner border and padding in Firefox. - */ - -button::-moz-focus-inner, -[type="button"]::-moz-focus-inner, -[type="reset"]::-moz-focus-inner, -[type="submit"]::-moz-focus-inner { - border-style: none; - padding: 0; -} - -/** - * Restore the focus styles unset by the previous rule. - */ - -button:-moz-focusring, -[type="button"]:-moz-focusring, -[type="reset"]:-moz-focusring, -[type="submit"]:-moz-focusring { - outline: 1px dotted ButtonText; -} - -/** - * Correct the padding in Firefox. - */ - -fieldset { - padding: 0.35em 0.75em 0.625em; -} - -/** - * 1. Correct the text wrapping in Edge and IE. - * 2. Correct the color inheritance from `fieldset` elements in IE. - * 3. Remove the padding so developers are not caught out when they zero out - * `fieldset` elements in all browsers. - */ - -legend { - box-sizing: border-box; /* 1 */ - color: inherit; /* 2 */ - display: table; /* 1 */ - max-width: 100%; /* 1 */ - padding: 0; /* 3 */ - white-space: normal; /* 1 */ -} - -/** - * 1. Add the correct display in IE 9-. - * 2. Add the correct vertical alignment in Chrome, Firefox, and Opera. - */ - -progress { - display: inline-block; /* 1 */ - vertical-align: baseline; /* 2 */ -} - -/** - * Remove the default vertical scrollbar in IE. - */ - -textarea { - overflow: auto; -} - -/** - * 1. Add the correct box sizing in IE 10-. - * 2. Remove the padding in IE 10-. - */ - -[type="checkbox"], -[type="radio"] { - box-sizing: border-box; /* 1 */ - padding: 0; /* 2 */ -} - -/** - * Correct the cursor style of increment and decrement buttons in Chrome. - */ - -[type="number"]::-webkit-inner-spin-button, -[type="number"]::-webkit-outer-spin-button { - height: auto; -} - -/** - * 1. Correct the odd appearance in Chrome and Safari. - * 2. Correct the outline style in Safari. - */ - -[type="search"] { - -webkit-appearance: textfield; /* 1 */ - outline-offset: -2px; /* 2 */ -} - -/** - * Remove the inner padding and cancel buttons in Chrome and Safari on macOS. - */ - -[type="search"]::-webkit-search-cancel-button, -[type="search"]::-webkit-search-decoration { - -webkit-appearance: none; -} - -/** - * 1. Correct the inability to style clickable types in iOS and Safari. - * 2. Change font properties to `inherit` in Safari. - */ - -::-webkit-file-upload-button { - -webkit-appearance: button; /* 1 */ - font: inherit; /* 2 */ -} - -/* Interactive - ========================================================================== */ - -/* - * Add the correct display in IE 9-. - * 1. Add the correct display in Edge, IE, and Firefox. - */ - -details, /* 1 */ -menu { - display: block; -} - -/* - * Add the correct display in all browsers. - */ - -summary { - display: list-item; -} - -/* Scripting - ========================================================================== */ - -/** - * Add the correct display in IE 9-. - */ - -canvas { - display: inline-block; -} - -/** - * Add the correct display in IE. - */ - -template { - display: none; -} - -/* Hidden - ========================================================================== */ - -/** - * Add the correct display in IE 10-. - */ - -[hidden] { - display: none; -} diff --git a/site/css/style.sass b/site/css/style.sass deleted file mode 100644 index a7d089849cc..00000000000 --- a/site/css/style.sass +++ /dev/null @@ -1,186 +0,0 @@ ---- ---- - -$fa-font-path: "../fonts" -@import vendor/font-awesome/font-awesome -@import vendor/bulma/sass/utilities/initial-variables -@import variables -@import mixins -@import navbar -@import syntax -@import typography -@import vendor/bulma/bulma - -img.bk-javadoc-icon - margin-right: 1.5rem - -.bk-level - height: 4rem - - img - height: 4rem - -span.bk-javadoc-icon - margin-right: .5rem - -span.tag - margin-left: 1em - -nav.pagination - margin-top: 5rem - -@media (max-width: 767px) - .bk-docs-container - margin-top: 1rem !important - -@media (min-width: 768px) - .bk-docs-container .bk-docs-block - padding: 0 5% !important - -.bk-twitter span - color: $twitter-blue - -em - margin-right: $em-right-margin - -table - tbody - td - max-width: 300px - white-space: normal - overflow-x: scroll - -.fa-stack-overflow - &:hover - color: #f48024 - -.javadoc-button - span + span - margin-left: .5rem - -ul, li - margin: 0 - padding: 0 - -span.pop - color: $blue - -+sticky-footer - -$base-font-size: .8rem -$footer-height: 20rem - -footer.footer - z-index: 10 - height: $footer-height - -// Popovers -.popover-template - display: none - -.toc - overflow-y: scroll - bottom: 0 - top: $navbar-height + 5rem - $toc-base-font-size: 1.2rem - position: fixed - max-width: 20rem - padding: 0 0 5% 0 - - .section-nav - .toc-entry - &.toc-h2 - font-size: $toc-base-font-size - - &.toc-h3 - padding-left: 10px - font-size: $toc-base-font-size * .8 - - &.toc-h4 - padding-left: 20px - font-size: $toc-base-font-size * .6 - - &.toc-h4, &.toc-h5 - display: none - - a - &:hover - color: darken($tan, 20%) - - & + .toc-entry - margin-top: $toc-base-font-size - -body - font-size: 1.25em - -h1.bk-title - font-size: $base-font-size * 4.25 - margin-bottom: 1.5rem - -h2.bk-subtitle - font-size: $base-font-size * 2 - margin-bottom: 1.5rem - -.bk-docs-container, .bk-community-container - margin: $docs-container-vertical-margin auto $docs-container-vertical-margin * 2 auto - - hr - height: 2px - - & + .pagination - margin-top: 2rem - - .bk-docs-block - .docs-title - h1.title - font-size: 2.4rem - width: 100% - - .title + .subtitle - padding-top: .75rem - font-size: $base-font-size * 1.2 - -.bk-main-content - ol ol, ol ul - margin: .5rem 0 .5rem .75rem - - ul, li, ol - margin-left: .75rem - - ul li - list-style-type: square - -.bk-docs-container - width: 80% - - .bk-main-content - min-height: 70vh - -.bk-community-container - width: 60% - -aside.sidebar - width: 15rem - position: fixed - overflow-y: scroll - bottom: 0 - top: $navbar-height + 5rem - padding: 0 0 5% 0 - - p + ul.sidebar-items - margin-top: .2rem - - ul.sidebar-items - margin-bottom: 1rem - - li - padding: .2rem 0 .2rem 1rem - font-size: 1rem - width: 100% - - &.active - background-color: $tan - border-radius: 3px - - a - color: white \ No newline at end of file diff --git a/site/css/tippy.css b/site/css/tippy.css deleted file mode 100644 index 91202c5f1d9..00000000000 --- a/site/css/tippy.css +++ /dev/null @@ -1 +0,0 @@ -.tippy-touch{cursor:pointer!important}.tippy-notransition{-webkit-transition:none!important;transition:none!important}.tippy-popper{max-width:400px;-webkit-perspective:800px;perspective:800px;z-index:9999;outline:0}.tippy-popper.html-template{max-width:96%;max-width:calc(100% - 20px)}.tippy-popper[x-placement^=top] [x-arrow]{position:absolute;width:0;height:0;border-top:7px solid #333;border-right:7px solid transparent;border-left:7px solid transparent;bottom:-7px}.tippy-popper[x-placement^=top] [x-arrow].arrow-small{border-top:5px solid #333;border-right:5px solid transparent;border-left:5px solid transparent;bottom:-5px}.tippy-popper[x-placement^=top] [x-arrow].arrow-big{border-top:10px solid #333;border-right:10px solid transparent;border-left:10px solid transparent;bottom:-10px}.tippy-popper[x-placement^=top] [x-circle]{-webkit-transform-origin:0 100%;transform-origin:0 100%}.tippy-popper[x-placement^=top] [x-circle].enter{-webkit-transform:scale(1) translate(-50%,-50%);transform:scale(1) translate(-50%,-50%);opacity:1}.tippy-popper[x-placement^=top] [x-circle].leave{-webkit-transform:scale(.45) translate(-50%,-100%);transform:scale(.45) translate(-50%,-100%);opacity:0}.tippy-popper[x-placement^=top] .tippy-tooltip.light-theme [x-circle]{background-color:#fff}.tippy-popper[x-placement^=top] .tippy-tooltip.light-theme [x-arrow]{border-top:7px solid #fff;border-right:7px solid transparent;border-left:7px solid transparent}.tippy-popper[x-placement^=top] .tippy-tooltip.light-theme [x-arrow].arrow-small{border-top:5px solid #fff;border-right:5px solid transparent;border-left:5px solid transparent}.tippy-popper[x-placement^=top] .tippy-tooltip.light-theme [x-arrow].arrow-big{border-top:10px solid #fff;border-right:10px solid transparent;border-left:10px solid transparent}.tippy-popper[x-placement^=top] .tippy-tooltip.transparent-theme [x-circle]{background-color:rgba(0,0,0,.7)}.tippy-popper[x-placement^=top] .tippy-tooltip.transparent-theme [x-arrow]{border-top:7px solid rgba(0,0,0,.7);border-right:7px solid transparent;border-left:7px solid transparent}.tippy-popper[x-placement^=top] .tippy-tooltip.transparent-theme [x-arrow].arrow-small{border-top:5px solid rgba(0,0,0,.7);border-right:5px solid transparent;border-left:5px solid transparent}.tippy-popper[x-placement^=top] .tippy-tooltip.transparent-theme [x-arrow].arrow-big{border-top:10px solid rgba(0,0,0,.7);border-right:10px solid transparent;border-left:10px solid transparent}.tippy-popper[x-placement^=top] [data-animation=perspective]{-webkit-transform-origin:bottom;transform-origin:bottom}.tippy-popper[x-placement^=top] [data-animation=perspective].enter{opacity:1;-webkit-transform:translateY(-10px) rotateX(0);transform:translateY(-10px) rotateX(0)}.tippy-popper[x-placement^=top] [data-animation=perspective].leave{opacity:0;-webkit-transform:translateY(0) rotateX(90deg);transform:translateY(0) rotateX(90deg)}.tippy-popper[x-placement^=top] [data-animation=fade].enter{opacity:1;-webkit-transform:translateY(-10px);transform:translateY(-10px)}.tippy-popper[x-placement^=top] [data-animation=fade].leave{opacity:0;-webkit-transform:translateY(-10px);transform:translateY(-10px)}.tippy-popper[x-placement^=top] [data-animation=shift].enter{opacity:1;-webkit-transform:translateY(-10px);transform:translateY(-10px)}.tippy-popper[x-placement^=top] [data-animation=shift].leave{opacity:0;-webkit-transform:translateY(0);transform:translateY(0)}.tippy-popper[x-placement^=top] [data-animation=scale].enter{opacity:1;-webkit-transform:translateY(-10px) scale(1);transform:translateY(-10px) scale(1)}.tippy-popper[x-placement^=top] [data-animation=scale].leave{opacity:0;-webkit-transform:translateY(0) scale(0);transform:translateY(0) scale(0)}.tippy-popper[x-placement^=bottom] [x-arrow]{position:absolute;width:0;height:0;border-bottom:7px solid #333;border-right:7px solid transparent;border-left:7px solid transparent;top:-7px}.tippy-popper[x-placement^=bottom] [x-arrow].arrow-small{border-bottom:5px solid #333;border-right:5px solid transparent;border-left:5px solid transparent;top:-5px}.tippy-popper[x-placement^=bottom] [x-arrow].arrow-big{border-bottom:10px solid #333;border-right:10px solid transparent;border-left:10px solid transparent;top:-10px}.tippy-popper[x-placement^=bottom] [x-circle]{-webkit-transform-origin:0 -100%;transform-origin:0 -100%}.tippy-popper[x-placement^=bottom] [x-circle].enter{-webkit-transform:scale(1) translate(-50%,-50%);transform:scale(1) translate(-50%,-50%);opacity:1}.tippy-popper[x-placement^=bottom] [x-circle].leave{-webkit-transform:scale(.45) translate(-50%,8%);transform:scale(.45) translate(-50%,8%);opacity:0}.tippy-popper[x-placement^=bottom] .tippy-tooltip.light-theme [x-circle]{background-color:#fff}.tippy-popper[x-placement^=bottom] .tippy-tooltip.light-theme [x-arrow]{border-bottom:7px solid #fff;border-right:7px solid transparent;border-left:7px solid transparent}.tippy-popper[x-placement^=bottom] .tippy-tooltip.light-theme [x-arrow].arrow-small{border-bottom:5px solid #fff;border-right:5px solid transparent;border-left:5px solid transparent}.tippy-popper[x-placement^=bottom] .tippy-tooltip.light-theme [x-arrow].arrow-big{border-bottom:10px solid #fff;border-right:10px solid transparent;border-left:10px solid transparent}.tippy-popper[x-placement^=bottom] .tippy-tooltip.transparent-theme [x-circle]{background-color:rgba(0,0,0,.7)}.tippy-popper[x-placement^=bottom] .tippy-tooltip.transparent-theme [x-arrow]{border-bottom:7px solid rgba(0,0,0,.7);border-right:7px solid transparent;border-left:7px solid transparent}.tippy-popper[x-placement^=bottom] .tippy-tooltip.transparent-theme [x-arrow].arrow-small{border-bottom:5px solid rgba(0,0,0,.7);border-right:5px solid transparent;border-left:5px solid transparent}.tippy-popper[x-placement^=bottom] .tippy-tooltip.transparent-theme [x-arrow].arrow-big{border-bottom:10px solid rgba(0,0,0,.7);border-right:10px solid transparent;border-left:10px solid transparent}.tippy-popper[x-placement^=bottom] [data-animation=perspective]{-webkit-transform-origin:top;transform-origin:top}.tippy-popper[x-placement^=bottom] [data-animation=perspective].enter{opacity:1;-webkit-transform:translateY(10px) rotateX(0);transform:translateY(10px) rotateX(0)}.tippy-popper[x-placement^=bottom] [data-animation=perspective].leave{opacity:0;-webkit-transform:translateY(0) rotateX(-90deg);transform:translateY(0) rotateX(-90deg)}.tippy-popper[x-placement^=bottom] [data-animation=fade].enter{opacity:1;-webkit-transform:translateY(10px);transform:translateY(10px)}.tippy-popper[x-placement^=bottom] [data-animation=fade].leave{opacity:0;-webkit-transform:translateY(10px);transform:translateY(10px)}.tippy-popper[x-placement^=bottom] [data-animation=shift].enter{opacity:1;-webkit-transform:translateY(10px);transform:translateY(10px)}.tippy-popper[x-placement^=bottom] [data-animation=shift].leave{opacity:0;-webkit-transform:translateY(0);transform:translateY(0)}.tippy-popper[x-placement^=bottom] [data-animation=scale].enter{opacity:1;-webkit-transform:translateY(10px) scale(1);transform:translateY(10px) scale(1)}.tippy-popper[x-placement^=bottom] [data-animation=scale].leave{opacity:0;-webkit-transform:translateY(0) scale(0);transform:translateY(0) scale(0)}.tippy-popper[x-placement^=left] [x-arrow]{position:absolute;width:0;height:0;border-left:7px solid #333;border-top:7px solid transparent;border-bottom:7px solid transparent;right:-7px}.tippy-popper[x-placement^=left] [x-arrow].arrow-small{border-left:5px solid #333;border-top:5px solid transparent;border-bottom:5px solid transparent;right:-5px}.tippy-popper[x-placement^=left] [x-arrow].arrow-big{border-left:10px solid #333;border-top:10px solid transparent;border-bottom:10px solid transparent;right:-10px}.tippy-popper[x-placement^=left] [x-circle]{-webkit-transform-origin:50% 0;transform-origin:50% 0}.tippy-popper[x-placement^=left] [x-circle].enter{-webkit-transform:scale(1) translate(-50%,-50%);transform:scale(1) translate(-50%,-50%);opacity:1}.tippy-popper[x-placement^=left] [x-circle].leave{-webkit-transform:scale(.45) translate(-50%,-50%);transform:scale(.45) translate(-50%,-50%);opacity:0}.tippy-popper[x-placement^=left] .tippy-tooltip.light-theme [x-circle]{background-color:#fff}.tippy-popper[x-placement^=left] .tippy-tooltip.light-theme [x-arrow]{border-left:7px solid #fff;border-top:7px solid transparent;border-bottom:7px solid transparent}.tippy-popper[x-placement^=left] .tippy-tooltip.light-theme [x-arrow].arrow-small{border-left:5px solid #fff;border-top:5px solid transparent;border-bottom:5px solid transparent}.tippy-popper[x-placement^=left] .tippy-tooltip.light-theme [x-arrow].arrow-big{border-left:10px solid #fff;border-top:10px solid transparent;border-bottom:10px solid transparent}.tippy-popper[x-placement^=left] .tippy-tooltip.transparent-theme [x-circle]{background-color:rgba(0,0,0,.7)}.tippy-popper[x-placement^=left] .tippy-tooltip.transparent-theme [x-arrow]{border-left:7px solid rgba(0,0,0,.7);border-top:7px solid transparent;border-bottom:7px solid transparent}.tippy-popper[x-placement^=left] .tippy-tooltip.transparent-theme [x-arrow].arrow-small{border-left:5px solid rgba(0,0,0,.7);border-top:5px solid transparent;border-bottom:5px solid transparent}.tippy-popper[x-placement^=left] .tippy-tooltip.transparent-theme [x-arrow].arrow-big{border-left:10px solid rgba(0,0,0,.7);border-top:10px solid transparent;border-bottom:10px solid transparent}.tippy-popper[x-placement^=left] [data-animation=perspective]{-webkit-transform-origin:right;transform-origin:right}.tippy-popper[x-placement^=left] [data-animation=perspective].enter{opacity:1;-webkit-transform:translateX(-10px) rotateY(0);transform:translateX(-10px) rotateY(0)}.tippy-popper[x-placement^=left] [data-animation=perspective].leave{opacity:0;-webkit-transform:translateX(0) rotateY(-90deg);transform:translateX(0) rotateY(-90deg)}.tippy-popper[x-placement^=left] [data-animation=fade].enter{opacity:1;-webkit-transform:translateX(-10px);transform:translateX(-10px)}.tippy-popper[x-placement^=left] [data-animation=fade].leave{opacity:0;-webkit-transform:translateX(-10px);transform:translateX(-10px)}.tippy-popper[x-placement^=left] [data-animation=shift].enter{opacity:1;-webkit-transform:translateX(-10px);transform:translateX(-10px)}.tippy-popper[x-placement^=left] [data-animation=shift].leave{opacity:0;-webkit-transform:translateX(0);transform:translateX(0)}.tippy-popper[x-placement^=left] [data-animation=scale].enter{opacity:1;-webkit-transform:translateX(-10px) scale(1);transform:translateX(-10px) scale(1)}.tippy-popper[x-placement^=left] [data-animation=scale].leave{opacity:0;-webkit-transform:translateX(0) scale(0);transform:translateX(0) scale(0)}.tippy-popper[x-placement^=right] [x-arrow]{position:absolute;width:0;height:0;border-right:7px solid #333;border-top:7px solid transparent;border-bottom:7px solid transparent;left:-7px}.tippy-popper[x-placement^=right] [x-arrow].arrow-small{border-right:5px solid #333;border-top:5px solid transparent;border-bottom:5px solid transparent;left:-5px}.tippy-popper[x-placement^=right] [x-arrow].arrow-big{border-right:10px solid #333;border-top:10px solid transparent;border-bottom:10px solid transparent;left:-10px}.tippy-popper[x-placement^=right] [x-circle]{-webkit-transform-origin:-50% 0;transform-origin:-50% 0}.tippy-popper[x-placement^=right] [x-circle].enter{-webkit-transform:scale(1) translate(-50%,-50%);transform:scale(1) translate(-50%,-50%);opacity:1}.tippy-popper[x-placement^=right] [x-circle].leave{-webkit-transform:scale(.45) translate(-50%,-50%);transform:scale(.45) translate(-50%,-50%);opacity:0}.tippy-popper[x-placement^=right] .tippy-tooltip.light-theme [x-circle]{background-color:#fff}.tippy-popper[x-placement^=right] .tippy-tooltip.light-theme [x-arrow]{border-right:7px solid #fff;border-top:7px solid transparent;border-bottom:7px solid transparent}.tippy-popper[x-placement^=right] .tippy-tooltip.light-theme [x-arrow].arrow-small{border-right:5px solid #fff;border-top:5px solid transparent;border-bottom:5px solid transparent}.tippy-popper[x-placement^=right] .tippy-tooltip.light-theme [x-arrow].arrow-big{border-right:10px solid #fff;border-top:10px solid transparent;border-bottom:10px solid transparent}.tippy-popper[x-placement^=right] .tippy-tooltip.transparent-theme [x-circle]{background-color:rgba(0,0,0,.7)}.tippy-popper[x-placement^=right] .tippy-tooltip.transparent-theme [x-arrow]{border-right:7px solid rgba(0,0,0,.7);border-top:7px solid transparent;border-bottom:7px solid transparent}.tippy-popper[x-placement^=right] .tippy-tooltip.transparent-theme [x-arrow].arrow-small{border-right:5px solid rgba(0,0,0,.7);border-top:5px solid transparent;border-bottom:5px solid transparent}.tippy-popper[x-placement^=right] .tippy-tooltip.transparent-theme [x-arrow].arrow-big{border-right:10px solid rgba(0,0,0,.7);border-top:10px solid transparent;border-bottom:10px solid transparent}.tippy-popper[x-placement^=right] [data-animation=perspective]{-webkit-transform-origin:left;transform-origin:left}.tippy-popper[x-placement^=right] [data-animation=perspective].enter{opacity:1;-webkit-transform:translateX(10px) rotateY(0);transform:translateX(10px) rotateY(0)}.tippy-popper[x-placement^=right] [data-animation=perspective].leave{opacity:0;-webkit-transform:translateX(0) rotateY(90deg);transform:translateX(0) rotateY(90deg)}.tippy-popper[x-placement^=right] [data-animation=fade].enter{opacity:1;-webkit-transform:translateX(10px);transform:translateX(10px)}.tippy-popper[x-placement^=right] [data-animation=fade].leave{opacity:0;-webkit-transform:translateX(10px);transform:translateX(10px)}.tippy-popper[x-placement^=right] [data-animation=shift].enter{opacity:1;-webkit-transform:translateX(10px);transform:translateX(10px)}.tippy-popper[x-placement^=right] [data-animation=shift].leave{opacity:0;-webkit-transform:translateX(0);transform:translateX(0)}.tippy-popper[x-placement^=right] [data-animation=scale].enter{opacity:1;-webkit-transform:translateX(10px) scale(1);transform:translateX(10px) scale(1)}.tippy-popper[x-placement^=right] [data-animation=scale].leave{opacity:0;-webkit-transform:translateX(0) scale(0);transform:translateX(0) scale(0)}.tippy-popper .tippy-tooltip.transparent-theme{background-color:rgba(0,0,0,.7)}.tippy-popper .tippy-tooltip.transparent-theme[data-animatefill]{background-color:transparent}.tippy-popper .tippy-tooltip.light-theme{color:#26323d;box-shadow:0 4px 20px 4px rgba(0,20,60,.1),0 4px 80px -8px rgba(0,20,60,.2);background-color:#fff}.tippy-popper .tippy-tooltip.light-theme[data-animatefill]{background-color:transparent}.tippy-tooltip{position:relative;color:#fff;border-radius:4px;font-size:.95rem;padding:.4rem .8rem;text-align:center;will-change:transform;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;background-color:#333;pointer-events:none}.tippy-tooltip--small{padding:.25rem .5rem;font-size:.8rem}.tippy-tooltip--big{padding:.6rem 1.2rem;font-size:1.2rem}.tippy-tooltip[data-animatefill]{overflow:hidden;background-color:transparent}.tippy-tooltip[data-interactive]{pointer-events:auto}.tippy-tooltip[data-inertia]{-webkit-transition-timing-function:cubic-bezier(.53,1,.36,.85);transition-timing-function:cubic-bezier(.53,2,.36,.85)}.tippy-tooltip [x-circle]{position:absolute;will-change:transform;background-color:#333;border-radius:50%;width:120%;width:calc(100% + 2rem);-webkit-transition:all ease-out;transition:all ease-out;left:50%;top:50%;z-index:-1;overflow:hidden}.tippy-tooltip [x-circle]:before{content:"";padding-top:75%;float:left}@media (max-width:450px){.tippy-popper{max-width:96%;max-width:calc(100% - 20px)}} \ No newline at end of file diff --git a/site/docker/Dockerfile b/site/docker/Dockerfile deleted file mode 100644 index 3643bd14e62..00000000000 --- a/site/docker/Dockerfile +++ /dev/null @@ -1,20 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -FROM ruby:2.4.1 diff --git a/site/docker/ci.sh b/site/docker/ci.sh deleted file mode 100755 index 44c4e237eba..00000000000 --- a/site/docker/ci.sh +++ /dev/null @@ -1,64 +0,0 @@ -#!/bin/bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -e -x -u - -SCRIPT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) - -export IMAGE_NAME="bookkeeper/docs" - -pushd ${SCRIPT_DIR} - -docker build --rm=true -t ${IMAGE_NAME} . - -popd - -if [ "$(uname -s)" == "Linux" ]; then - USER_NAME=${SUDO_USER:=$USER} - USER_ID=$(id -u "${USER_NAME}") - GROUP_ID=$(id -g "${USER_NAME}") - LOCAL_HOME="/home/${USER_NAME}" -else # boot2docker uid and gid - USER_NAME=$USER - USER_ID=1000 - GROUP_ID=50 - LOCAL_HOME="/Users/${USER_NAME}" -fi - -docker build -t "${IMAGE_NAME}-${USER_NAME}" - < The most important thing to ensure when starting up AutoRecovery is that the ZooKeeper connection string specified by the [`zkServers`](../../reference/config#zkServers) parameter points to the right ZooKeeper cluster. - -If you start up AutoRecovery on a machine that is already running a bookie, then the AutoRecovery process will run alongside the bookie on a separate thread. - -You can also start up AutoRecovery on a fresh machine if you'd like to create a dedicated cluster of AutoRecovery nodes. - -## Configuration - -There are a handful of AutoRecovery-related configs in the [`bk_server.conf`](../../reference/config) configuration file. For a listing of those configs, see [AutoRecovery settings](../../reference/config#autorecovery-settings). - -## Disable AutoRecovery - -You can disable AutoRecovery at any time, for example during maintenance. Disabling AutoRecovery ensures that bookies' data isn't unnecessarily rereplicated when the bookie is only taken down for a short period of time, for example when the bookie is being updated or the configuration if being changed. - -You can disable AutoRecover using the [`bookkeeper`](../../reference/cli#bookkeeper-shell-autorecovery) CLI tool: - -```bash -$ bookkeeper-server/bin/bookkeeper shell autorecovery -disable -``` - -Once disabled, you can reenable AutoRecovery using the [`enable`](../../reference/cli#bookkeeper-shell-autorecovery) shell command: - -```bash -$ bookkeeper-server/bin/bookkeeper shell autorecovery -enable -``` - -## AutoRecovery architecture - -AutoRecovery has two components: - -1. The [**auditor**](#auditor) (see the [`Auditor`](../../api/javadoc/org/apache/bookkeeper/replication/Auditor.html) class) is a singleton node that watches bookies to see if they fail and creates rereplication tasks for the ledgers on failed bookies. -1. The [**replication worker**](#replication-worker) (see the [`ReplicationWorker`](../../api/javadoc/org/apache/bookkeeper/replication/ReplicationWorker.html) class) runs on each bookie and executes rereplication tasks provided by the auditor. - -Both of these components run as threads in the [`AutoRecoveryMain`](../../api/javadoc/org/apache/bookkeeper/replication/AutoRecoveryMain) process, which runs on each bookie in the cluster. All recovery nodes participate in leader election---using ZooKeeper---to decide which node becomes the auditor. Nodes that fail to become the auditor watch the elected auditor and run an election process again if they see that the auditor node has failed. - -### Auditor - -The auditor watches all bookies in the cluster that are registered with ZooKeeper. Bookies register with ZooKeeper at startup. If the bookie crashes or is killed, the bookie's registration in ZooKeeper disappears and the auditor is notified of the change in the list of registered bookies. - -When the auditor sees that a bookie has disappeared, it immediately scans the complete {% pop ledger %} list to find ledgers that have data stored on the failed bookie. Once it has a list of ledgers for that bookie, the auditor will publish a rereplication task for each ledger under the `/underreplicated/` [znode](https://zookeeper.apache.org/doc/current/zookeeperOver.html) in ZooKeeper. - -### Replication Worker - -Each replication worker watches for tasks being published by the auditor on the `/underreplicated/` znode in ZooKeeper. When a new task appears, the replication worker will try to get a lock on it. If it cannot acquire the lock, it will try the next entry. The locks are implemented using ZooKeeper ephemeral znodes. - -The replication worker will scan through the rereplication task's ledger for fragments of which its local bookie is not a member. When it finds fragments matching this criterion, it will replicate the entries of that fragment to the local bookie. If, after this process, the ledger is fully replicated, the ledgers entry under /underreplicated/ is deleted, and the lock is released. If there is a problem replicating, or there are still fragments in the ledger which are still underreplicated (due to the local bookie already being part of the ensemble for the fragment), then the lock is simply released. - -If the replication worker finds a fragment which needs rereplication, but does not have a defined endpoint (i.e. the final fragment of a ledger currently being written to), it will wait for a grace period before attempting rereplication. If the fragment needing rereplication still does not have a defined endpoint, the ledger is fenced and rereplication then takes place. - -This avoids the situation in which a client is writing to a ledger and one of the bookies goes down, but the client has not written an entry to that bookie before rereplication takes place. The client could continue writing to the old fragment, even though the ensemble for the fragment had changed. This could lead to data loss. Fencing prevents this scenario from happening. In the normal case, the client will try to write to the failed bookie within the grace period, and will have started a new fragment before rereplication starts. - -You can configure this grace period using the [`openLedgerRereplicationGracePeriod`](../../reference/config#openLedgerRereplicationGracePeriod) parameter. - -### The rereplication process - -The ledger rereplication process happens in these steps: - -1. The client goes through all ledger fragments in the ledger, selecting those that contain the failed bookie. -1. A recovery process is initiated for each ledger fragment in this list. - 1. The client selects a bookie to which all entries in the ledger fragment will be replicated; In the case of autorecovery, this will always be the local bookie. - 1. The client reads entries that belong to the ledger fragment from other bookies in the ensemble and writes them to the selected bookie. - 1. Once all entries have been replicated, the zookeeper metadata for the fragment is updated to reflect the new ensemble. - 1. The fragment is marked as fully replicated in the recovery tool. -1. Once all ledger fragments are marked as fully replicated, the ledger is marked as fully replicated. - diff --git a/site/docs/4.5.0/admin/bookies.md b/site/docs/4.5.0/admin/bookies.md deleted file mode 100644 index f9b1dcf94dc..00000000000 --- a/site/docs/4.5.0/admin/bookies.md +++ /dev/null @@ -1,180 +0,0 @@ ---- -title: BookKeeper administration -subtitle: A guide to deploying and administering BookKeeper ---- - -This document is a guide to deploying, administering, and maintaining BookKeeper. It also discusses [best practices](#best-practices) and [common problems](#common-problems). - -## Requirements - -A typical BookKeeper installation consists of an ensemble of {% pop bookies %} and a ZooKeeper quorum. The exact number of bookies depends on the quorum mode that you choose, desired throughput, and the number of clients using the installation simultaneously. - -The minimum number of bookies depends on the type of installation: - -* For *self-verifying* entries you should run at least three bookies. In this mode, clients store a message authentication code along with each {% pop entry %}. -* For *generic* entries you should run at least four - -There is no upper limit on the number of bookies that you can run in a single ensemble. - -### Performance - -To achieve optimal performance, BookKeeper requires each server to have at least two disks. It's possible to run a bookie with a single disk but performance will be significantly degraded. - -### ZooKeeper - -There is no constraint on the number of ZooKeeper nodes you can run with BookKeeper. A single machine running ZooKeeper in [standalone mode](https://zookeeper.apache.org/doc/current/zookeeperStarted.html#sc_InstallingSingleMode) is sufficient for BookKeeper, although for the sake of higher resilience we recommend running ZooKeeper in [quorum mode](https://zookeeper.apache.org/doc/current/zookeeperStarted.html#sc_RunningReplicatedZooKeeper) with multiple servers. - -## Starting and stopping bookies - -You can run bookies either in the foreground or in the background, using [nohup](https://en.wikipedia.org/wiki/Nohup). You can also run [local bookies](#local-bookie) for development purposes. - -To start a bookie in the foreground, use the [`bookie`](../../reference/cli#bookkeeper-bookie) command of the [`bookkeeper`](../../reference/cli#bookkeeper) CLI tool: - -```shell -$ bookkeeper-server/bin/bookkeeper bookie -``` - -To start a bookie in the background, use the [`bookkeeper-daemon.sh`](../../reference/cli#bookkeeper-daemon.sh) script and run `start bookie`: - -```shell -$ bookkeeper-server/bin/bookkeeper-daemon.sh start bookie -``` - -### Local bookies - -The instructions above showed you how to run bookies intended for production use. If you'd like to experiment with ensembles of bookies locally, you can use the [`localbookie`](../../reference/cli#bookkeeper-localbookie) command of the `bookkeeper` CLI tool and specify the number of bookies you'd like to run. - -This would spin up a local ensemble of 6 bookies: - -```shell -$ bookkeeper-server/bin/bookkeeper localbookie 6 -``` - -> When you run a local bookie ensemble, all bookies run in a single JVM process. - -## Configuring bookies - -There's a wide variety of parameters that you can set in the bookie configuration file in `bookkeeper-server/conf/bk_server.conf` of your [BookKeeper installation](../../reference/config). A full listing can be found in [Bookie configuration](../../reference/config). - -Some of the more important parameters to be aware of: - -Parameter | Description | Default -:---------|:------------|:------- -`bookiePort` | The TCP port that the bookie listens on | `3181` -`zkServers` | A comma-separated list of ZooKeeper servers in `hostname:port` format | `localhost:2181` -`journalDirectory` | The directory where the [log device](../../getting-started/concepts#log-device) stores the bookie's write-ahead log (WAL) | `/tmp/bk-txn` -`ledgerDirectories` | The directories where the [ledger device](../../getting-started/concepts#ledger-device) stores the bookie's ledger entries (as a comma-separated list) | `/tmp/bk-data` - -> Ideally, the directories specified `journalDirectory` and `ledgerDirectories` should be on difference devices. - -## Logging - -BookKeeper uses [slf4j](http://www.slf4j.org/) for logging, with [log4j](https://logging.apache.org/log4j/2.x/) bindings enabled by default. - -To enable logging for a bookie, create a `log4j.properties` file and point the `BOOKIE_LOG_CONF` environment variable to the configuration file. Here's an example: - -```shell -$ export BOOKIE_LOG_CONF=/some/path/log4j.properties -$ bookkeeper-server/bin/bookkeeper bookie -``` - -## Upgrading - -From time to time you may need to make changes to the filesystem layout of bookies---changes that are incompatible with previous versions of BookKeeper and require that directories used with previous versions are upgraded. If a filesystem upgrade is required when updating BookKeeper, the bookie will fail to start and return an error like this: - -``` -2017-05-25 10:41:50,494 - ERROR - [main:Bookie@246] - Directory layout version is less than 3, upgrade needed -``` - -BookKeeper provides a utility for upgrading the filesystem. You can perform an upgrade using the [`upgrade`](../../reference/cli#bookkeeper-upgrade) command of the `bookkeeper` CLI tool. When running `bookkeeper upgrade` you need to specify one of three flags: - -Flag | Action -:----|:------ -`--upgrade` | Performs an upgrade -`--rollback` | Performs a rollback to the initial filesystem version -`--finalize` | Marks the upgrade as complete - -### Upgrade pattern - -A standard upgrade pattern is to run an upgrade... - -```shell -$ bookkeeper-server/bin/bookkeeper upgrade --upgrade -``` - -...then check that everything is working normally, then kill the bookie. If everything is okay, finalize the upgrade... - -```shell -$ bookkeeper-server/bin/bookkeeper upgrade --finalize -``` - -...and then restart the server: - -```shell -$ bookkeeper-server/bin/bookkeeper bookie -``` - -If something has gone wrong, you can always perform a rollback: - -```shell -$ bookkeeper-server/bin/bookkeeper upgrade --rollback -``` - -## Formatting - -You can format bookie metadata in ZooKeeper using the [`metaformat`](../../reference/cli#bookkeeper-shell-metaformat) command of the [BookKeeper shell](../../reference/cli#the-bookkeeper-shell). - -By default, formatting is done in interactive mode, which prompts you to confirm the format operation if old data exists. You can disable confirmation using the `-nonInteractive` flag. If old data does exist, the format operation will abort *unless* you set the `-force` flag. Here's an example: - -```shell -$ bookkeeper-server/bin/bookkeeper shell metaformat -``` - -You can format the local filesystem data on a bookie using the [`bookieformat`](../../reference/cli#bookkeeper-shell-bookieformat) command on each bookie. Here's an example: - -```shell -$ bookkeeper-server/bin/bookkeeper shell bookieformat -``` - -> The `-force` and `-nonInteractive` flags are also available for the `bookieformat` command. - -## AutoRecovery - -For a guide to AutoRecovery in BookKeeper, see [this doc](../autorecovery). - -## Missing disks or directories - -Accidentally replacing disks or removing directories can cause a bookie to fail while trying to read a ledger fragment that, according to the ledger metadata, exists on the bookie. For this reason, when a bookie is started for the first time, its disk configuration is fixed for the lifetime of that bookie. Any change to its disk configuration, such as a crashed disk or an accidental configuration change, will result in the bookie being unable to start. That will throw an error like this: - -``` -2017-05-29 18:19:13,790 - ERROR - [main:BookieServer314] – Exception running bookie server : @ -org.apache.bookkeeper.bookie.BookieException$InvalidCookieException -.......at org.apache.bookkeeper.bookie.Cookie.verify(Cookie.java:82) -.......at org.apache.bookkeeper.bookie.Bookie.checkEnvironment(Bookie.java:275) -.......at org.apache.bookkeeper.bookie.Bookie.(Bookie.java:351) -``` - -If the change was the result of an accidental configuration change, the change can be reverted and the bookie can be restarted. However, if the change *cannot* be reverted, such as is the case when you want to add a new disk or replace a disk, the bookie must be wiped and then all its data re-replicated onto it. - -1. Increment the [`bookiePort`](../../reference/config#bookiePort) parameter in the [`bk_server.conf`](../../reference/config) -1. Ensure that all directories specified by [`journalDirectory`](../../reference/config#journalDirectory) and [`ledgerDirectories`](../../reference/config#ledgerDirectories) are empty. -1. [Start the bookie](#starting-and-stopping-bookies). -1. Run the following command to re-replicate the data: - - ```bash - $ bin/bookkeeper org.apache.bookkeeper.tools.BookKeeperTools \ - \ - \ - - ``` - - The ZooKeeper server, old bookie, and new bookie, are all identified by their external IP and `bookiePort` (3181 by default). Here's an example: - - ```bash - $ bin/bookkeeper org.apache.bookkeeper.tools.BookKeeperTools \ - zk1.example.com \ - 192.168.1.10:3181 \ - 192.168.1.10:3181 - ``` - - See the [AutoRecovery](../autorecovery) documentation for more info on the re-replication process. diff --git a/site/docs/4.5.0/admin/geo-replication.md b/site/docs/4.5.0/admin/geo-replication.md deleted file mode 100644 index 38b972345ef..00000000000 --- a/site/docs/4.5.0/admin/geo-replication.md +++ /dev/null @@ -1,22 +0,0 @@ ---- -title: Geo-replication -subtitle: Replicate data across BookKeeper clusters ---- - -*Geo-replication* is the replication of data across BookKeeper clusters. In order to enable geo-replication for a group of BookKeeper clusters, - -## Global ZooKeeper - -Setting up a global ZooKeeper quorum is a lot like setting up a cluster-specific quorum. The crucial difference is that - -### Geo-replication across three clusters - -Let's say that you want to set up geo-replication across clusters in regions A, B, and C. First, the BookKeeper clusters in each region must have their own local (cluster-specific) ZooKeeper quorum. - -> BookKeeper clusters use global ZooKeeper only for metadata storage. Traffic from bookies to ZooKeeper should thus be fairly light in general. - -The crucial difference between using cluster-specific ZooKeeper and global ZooKeeper is that {% pop bookies %} is that you need to point all bookies to use the global ZooKeeper setup. - -## Region-aware placement polocy - -## Autorecovery diff --git a/site/docs/4.5.0/admin/metrics.md b/site/docs/4.5.0/admin/metrics.md deleted file mode 100644 index 635135faf7e..00000000000 --- a/site/docs/4.5.0/admin/metrics.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: Metric collection ---- - -BookKeeper enables metrics collection through a variety of [stats providers](#stats-providers). - -> For a full listing of available metrics, see the [Metrics](../../reference/metrics) reference doc. - -## Stats providers - -BookKeeper has stats provider implementations for four five sinks: - -Provider | Provider class name -:--------|:------------------- -[Codahale Metrics](https://mvnrepository.com/artifact/org.apache.bookkeeper.stats/codahale-metrics-provider) | `org.apache.bookkeeper.stats.CodahaleMetricsProvider` -[Prometheus](https://prometheus.io/) | `org.apache.bookkeeper.stats.PrometheusMetricsProvider` -[Finagle](https://twitter.github.io/finagle/guide/Metrics.html) | `org.apache.bookkeeper.stats.FinagleStatsProvider` -[Ostrich](https://github.com/twitter/ostrich) | `org.apache.bookkeeper.stats.OstrichProvider` -[Twitter Science Provider](https://mvnrepository.com/artifact/org.apache.bookkeeper.stats/twitter-science-provider) | `org.apache.bookkeeper.stats.TwitterStatsProvider` - -> The [Codahale Metrics]({{ site.github_master }}/bookkeeper-stats-providers/codahale-metrics-provider) stats provider is the default provider. - -## Enabling stats providers in bookies - -There are two stats-related [configuration parameters](../../reference/config#statistics) available for bookies: - -Parameter | Description | Default -:---------|:------------|:------- -`enableStatistics` | Whether statistics are enabled for the bookie | `false` -`statsProviderClass` | The stats provider class used by the bookie | `org.apache.bookkeeper.stats.CodahaleMetricsProvider` - - -To enable stats: - -* set the `enableStatistics` parameter to `true` -* set `statsProviderClass` to the desired provider (see the [table above](#stats-providers) for a listing of classes) - - diff --git a/site/docs/4.5.0/admin/perf.md b/site/docs/4.5.0/admin/perf.md deleted file mode 100644 index 82956326e5d..00000000000 --- a/site/docs/4.5.0/admin/perf.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: Performance tuning ---- diff --git a/site/docs/4.5.0/admin/placement.md b/site/docs/4.5.0/admin/placement.md deleted file mode 100644 index ded456e1aea..00000000000 --- a/site/docs/4.5.0/admin/placement.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: Customized placement policies ---- diff --git a/site/docs/4.5.0/admin/upgrade.md b/site/docs/4.5.0/admin/upgrade.md deleted file mode 100644 index 456df99a276..00000000000 --- a/site/docs/4.5.0/admin/upgrade.md +++ /dev/null @@ -1,73 +0,0 @@ ---- -title: Upgrade ---- - -> If you have questions about upgrades (or need help), please feel free to reach out to us by [mailing list]({{ site.baseurl }}community/mailing-lists) or [Slack Channel]({{ site.baseurl }}community/slack). - -## Overview - -Consider the below guidelines in preparation for upgrading. - -- Always back up all your configuration files before upgrading. -- Read through the documentation and draft an upgrade plan that matches your specific requirements and environment before starting the upgrade process. - Put differently, don't start working through the guide on a live cluster. Read guide entirely, make a plan, then execute the plan. -- Pay careful consideration to the order in which components are upgraded. In general, you need to upgrade bookies first and then upgrade your clients. -- If autorecovery is running along with bookies, you need to pay attention to the upgrade sequence. -- Read the release notes carefully for each release. They contain not only information about noteworthy features, but also changes to configurations - that may impact your upgrade. -- Always upgrade one or a small set of bookies to canary new version before upgraing all bookies in your cluster. - -## Canary - -It is wise to canary an upgraded version in one or small set of bookies before upgrading all bookies in your live cluster. - -You can follow below steps on how to canary a upgraded version: - -1. Stop a Bookie. -2. Upgrade the binary and configuration. -3. Start the Bookie in `ReadOnly` mode. This can be used to verify if the Bookie of this new version can run well for read workload. -4. Once the Bookie is running at `ReadOnly` mode successfully for a while, restart the Bookie in `Write/Read` mode. -5. After step 4, the Bookie will serve both write and read traffic. - -### Rollback Canaries - -If problems occur during canarying an upgraded version, you can simply take down the problematic Bookie node. The remain bookies in the old cluster -will repair this problematic bookie node by autorecovery. Nothing needs to be worried about. - -## Upgrade Steps - -Once you determined a version is safe to upgrade in a few nodes in your cluster, you can perform following steps to upgrade all bookies in your cluster. - -1. Determine if autorecovery is running along with bookies. If yes, check if the clients (either new clients with new binary or old clients with new configurations) -are allowed to talk to old bookies; if clients are not allowed to talk to old bookies, please [disable autorecovery](../../reference/cli/#autorecovery-1) during upgrade. -2. Decide on performing a rolling upgrade or a downtime upgrade. -3. Upgrade all Bookies (more below) -4. If autorecovery was disabled during upgrade, [enable autorecovery](../../reference/cli/#autorecovery-1). -5. After all bookies are upgraded, build applications that use `BookKeeper client` against the new bookkeeper libraries and deploy the new versions. - -### Upgrade Bookies - -In a rolling upgrade scenario, upgrade one Bookie at a time. In a downtime upgrade scenario, take the entire cluster down, upgrade each Bookie, then start the cluster. - -For each Bookie: - -1. Stop the bookie. -2. Upgrade the software (either new binary or new configuration) -2. Start the bookie. - -## Upgrade Guides - -We describes the general upgrade method in Apache BookKeeper as above. We will cover the details for individual versions. - -### 4.4.x to 4.5.x upgrade - -There isn't any protocol related backward compabilities changes in 4.5.0. So you can follow the general upgrade sequence to upgrade from 4.4.x to 4.5.x. -However, we list a list of things that you might want to know. - -1. 4.5.x upgrades netty from 3.x to 4.x. The memory usage pattern might be changed a bit. Netty 4 uses more direct memory. Please pay attention to your memory usage - and adjust the JVM settings accordingly. -2. `multi journals` is a non-rollbackable feature. If you configure a bookie to use multiple journals on 4.5.x you can not roll the bookie back to use 4.4.x. You have - to take a bookie out and recover it if you want to rollback to 4.4.x. - -If you are planning to upgrade a non-secured cluster to a secured cluster enabling security features in 4.5.0, please read [BookKeeper Security](../../security/overview) for more details. - diff --git a/site/docs/4.5.0/api/distributedlog-api.md b/site/docs/4.5.0/api/distributedlog-api.md deleted file mode 100644 index a13aa8b5868..00000000000 --- a/site/docs/4.5.0/api/distributedlog-api.md +++ /dev/null @@ -1,395 +0,0 @@ ---- -title: DistributedLog -subtitle: A higher-level API for managing BookKeeper entries ---- - -> DistributedLog began its life as a separate project under the Apache Foundation. It was merged into BookKeeper in 2017. - -The DistributedLog API is an easy-to-use interface for managing BookKeeper entries that enables you to use BookKeeper without needing to interact with [ledgers](../ledger-api) directly. - -DistributedLog (DL) maintains sequences of records in categories called *logs* (aka *log streams*). *Writers* append records to DL logs, while *readers* fetch and process those records. - -## Architecture - -The diagram below illustrates how the DistributedLog API works with BookKeeper: - -![DistributedLog API]({{ site.baseurl }}img/distributedlog.png) - -## Logs - -A *log* in DistributedLog is an ordered, immutable sequence of *log records*. - -The diagram below illustrates the anatomy of a log stream: - -![DistributedLog log]({{ site.baseurl }}img/logs.png) - -### Log records - -Each log record is a sequence of bytes. Applications are responsible for serializing and deserializing byte sequences stored in log records. - -Log records are written sequentially into a *log stream* and assigned with a a unique sequence number called a DLSN (DistributedLog Sequence Number). - -In addition to a DLSN, applications can assign their own sequence number when constructing log records. Application-defined sequence numbers are known as *TransactionIDs* (or *txid*). Either a DLSN or a TransactionID can be used for positioning readers to start reading from a specific log record. - -### Log segments - -Each log is broken down into *log segments* that contain subsets of records. Log segments are distributed and stored in BookKeeper. DistributedLog rolls the log segments based on the configured *rolling policy*, which be either - -* a configurable period of time (such as every 2 hours), or -* a configurable maximum size (such as every 128 MB). - -The data in logs is divided up into equally sized log segments and distributed evenly across {% pop bookies %}. This allows logs to scale beyond a size that would fit on a single server and spreads read traffic across the cluster. - -### Namespaces - -Log streams that belong to the same organization are typically categorized and managed under a *namespace*. DistributedLog namespaces essentially enable applications to locate log streams. Applications can perform the following actions under a namespace: - -* create streams -* delete streams -* truncate streams to a given sequence number (either a DLSN or a TransactionID) - -## Writers - -Through the DistributedLog API, writers write data into logs of their choice. All records are appended into logs in order. The sequencing is performed by the writer, which means that there is only one active writer for a log at any given time. - -DistributedLog guarantees correctness when two writers attempt to write to the same log when a network partition occurs using a *fencing* mechanism in the log segment store. - -### Write Proxy - -Log writers are served and managed in a service tier called the *Write Proxy* (see the diagram [above](#architecture)). The Write Proxy is used for accepting writes from a large number of clients. - -## Readers - -DistributedLog readers read records from logs of their choice, starting with a provided position. The provided position can be either a DLSN or a TransactionID. - -Readers read records from logs in strict order. Different readers can read records from different positions in the same log. - -Unlike other pub-sub systems, DistributedLog doesn't record or manage readers' positions. This means that tracking is the responsibility of applications, as different applications may have different requirements for tracking and coordinating positions. This is hard to get right with a single approach. Distributed databases, for example, might store reader positions along with SSTables, so they would resume applying transactions from the positions store in SSTables. Tracking reader positions could easily be done at the application level using various stores (such as ZooKeeper, the filesystem, or key-value stores). - -### Read Proxy - -Log records can be cached in a service tier called the *Read Proxy* to serve a large number of readers. See the diagram [above](#architecture). The Read Proxy is the analogue of the [Write Proxy](#write-proxy). - -## Guarantees - -The DistributedLog API for BookKeeper provides a number of guarantees for applications: - -* Records written by a [writer](#writers) to a [log](#logs) are appended in the order in which they are written. If a record **R1** is written by the same writer as a record **R2**, **R1** will have a smaller sequence number than **R2**. -* [Readers](#readers) see [records](#log-records) in the same order in which they are [written](#writers) to the log. -* All records are persisted on disk by BookKeeper before acknowledgements, which guarantees durability. -* For a log with a replication factor of N, DistributedLog tolerates up to N-1 server failures without losing any records. - -## API - -Documentation for the DistributedLog API can be found [here](https://bookkeeper.apache.org/distributedlog/docs/latest/user_guide/api/core). - -> At a later date, the DistributedLog API docs will be added here. - - diff --git a/site/docs/4.5.0/api/ledger-adv-api.md b/site/docs/4.5.0/api/ledger-adv-api.md deleted file mode 100644 index f46950dd984..00000000000 --- a/site/docs/4.5.0/api/ledger-adv-api.md +++ /dev/null @@ -1,82 +0,0 @@ ---- -title: The Advanced Ledger API ---- - -In release `4.5.0`, Apache BookKeeper introduces a few advanced API for advanced usage. -This sections covers these advanced APIs. - -> Before learn the advanced API, please read [Ledger API](../ledger-api) first. - -## LedgerHandleAdv - -[`LedgerHandleAdv`](../javadoc/org/apache/bookkeeper/client/LedgerHandleAdv) is an advanced extension of [`LedgerHandle`](../javadoc/org/apache/bookkeeper/client/LedgerHandle). -It allows user passing in an `entryId` when adding an entry. - -### Creating advanced ledgers - -Here's an exmaple: - -```java -byte[] passwd = "some-passwd".getBytes(); -LedgerHandleAdv handle = bkClient.createLedgerAdv( - 3, 3, 2, // replica settings - DigestType.CRC32, - passwd); -``` - -You can also create advanced ledgers asynchronously. - -```java -class LedgerCreationCallback implements AsyncCallback.CreateCallback { - public void createComplete(int returnCode, LedgerHandle handle, Object ctx) { - System.out.println("Ledger successfully created"); - } -} -client.asyncCreateLedgerAdv( - 3, // ensemble size - 3, // write quorum size - 2, // ack quorum size - BookKeeper.DigestType.CRC32, - password, - new LedgerCreationCallback(), - "some context" -); -``` - -Besides the APIs above, BookKeeper allows users providing `ledger-id` when creating advanced ledgers. - -```java -long ledgerId = ...; // the ledger id is generated externally. - -byte[] passwd = "some-passwd".getBytes(); -LedgerHandleAdv handle = bkClient.createLedgerAdv( - ledgerId, // ledger id generated externally - 3, 3, 2, // replica settings - DigestType.CRC32, - passwd); -``` - -> Please note, it is users' responsibility to provide a unique ledger id when using the API above. -> If a ledger already exists when users try to create an advanced ledger with same ledger id, -> a [LedgerExistsException](../javadoc/org/apache/bookkeeper/client/BKException.BKLedgerExistException.html) is thrown by the bookkeeper client. - -### Add Entries - -The normal [add entries api](ledger-api/#adding-entries-to-ledgers) in advanced ledgers are disabled. Instead, when users want to add entries -to advanced ledgers, an entry id is required to pass in along with the entry data when adding an entry. - -```java -long entryId = ...; // entry id generated externally - -ledger.addEntry(entryId, "Some entry data".getBytes()); -``` - -A few notes when using this API: - -- The entry id has to be non-negative. -- Clients are okay to add entries out of order. -- However, the entries are only acknowledged in a monotonic order starting from 0. - -### Read Entries - -The read entries api in advanced ledgers remain same as [normal ledgers](../ledger-api/#reading-entries-from-ledgers). diff --git a/site/docs/4.5.0/api/ledger-api.md b/site/docs/4.5.0/api/ledger-api.md deleted file mode 100644 index 4e1070d717a..00000000000 --- a/site/docs/4.5.0/api/ledger-api.md +++ /dev/null @@ -1,473 +0,0 @@ ---- -title: The Ledger API ---- - -The ledger API is a lower-level API for BookKeeper that enables you to interact with {% pop ledgers %} directly. - -## The Java ledger API client - -To get started with the Java client for BookKeeper, install the `bookkeeper-server` library as a dependency in your Java application. - -> For a more in-depth tutorial that involves a real use case for BookKeeper, see the [Example application](../example-application) guide. - -## Installation - -The BookKeeper Java client library is available via [Maven Central](http://search.maven.org/) and can be installed using [Maven](#maven), [Gradle](#gradle), and other build tools. - -### Maven - -If you're using [Maven](https://maven.apache.org/), add this to your [`pom.xml`](https://maven.apache.org/guides/introduction/introduction-to-the-pom.html) build configuration file: - -```xml - -4.5.0 - - - - org.apache.bookkeeper - bookkeeper-server - ${bookkeeper.version} - -``` - -### Gradle - -If you're using [Gradle](https://gradle.org/), add this to your [`build.gradle`](https://spring.io/guides/gs/gradle/) build configuration file: - -```groovy -dependencies { - compile group: 'org.apache.bookkeeper', name: 'bookkeeper-server', version: '4.5.0' -} - -// Alternatively: -dependencies { - compile 'org.apache.bookkeeper:bookkeeper-server:4.5.0' -} -``` - -## Connection string - -When interacting with BookKeeper using the Java client, you need to provide your client with a connection string, for which you have three options: - -* Provide your entire ZooKeeper connection string, for example `zk1:2181,zk2:2181,zk3:2181`. -* Provide a host and port for one node in your ZooKeeper cluster, for example `zk1:2181`. In general, it's better to provide a full connection string (in case the ZooKeeper node you attempt to connect to is down). -* If your ZooKeeper cluster can be discovered via DNS, you can provide the DNS name, for example `my-zookeeper-cluster.com`. - -## Creating a new client - -In order to create a new [`BookKeeper`](../javadoc/org/apache/bookkeeper/client/BookKeeper) client object, you need to pass in a [connection string](#connection-string). Here is an example client object using a ZooKeeper connection string: - -```java -try { - String connectionString = "127.0.0.1:2181"; // For a single-node, local ZooKeeper cluster - BookKeeper bkClient = new BookKeeper(connectionString); -} catch (InterruptedException | IOException | KeeperException e) { - e.printStackTrace(); -} -``` - -> If you're running BookKeeper [locally](../../getting-started/run-locally), using the [`localbookie`](../../reference/cli#bookkeeper-localbookie) command, use `"127.0.0.1:2181"` for your connection string, as in the example above. - -There are, however, other ways that you can create a client object: - -* By passing in a [`ClientConfiguration`](../javadoc/org/apache/bookkeeper/conf/ClientConfiguration) object. Here's an example: - - ```java - ClientConfiguration config = new ClientConfiguration(); - config.setZkServers(zkConnectionString); - config.setAddEntryTimeout(2000); - BookKeeper bkClient = new BookKeeper(config); - ``` - -* By specifying a `ClientConfiguration` and a [`ZooKeeper`](http://zookeeper.apache.org/doc/current/api/org/apache/zookeeper/ZooKeeper.html) client object: - - ```java - ClientConfiguration config = new ClientConfiguration(); - config.setAddEntryTimeout(5000); - ZooKeeper zkClient = new ZooKeeper(/* client args */); - BookKeeper bkClient = new BookKeeper(config, zkClient); - ``` - -* Using the `forConfig` method: - - ```java - BookKeeper bkClient = BookKeeper.forConfig(conf).build(); - ``` - -## Creating ledgers - -The easiest way to create a {% pop ledger %} using the Java client is via the `createLedger` method, which creates a new ledger synchronously and returns a [`LedgerHandle`](../javadoc/org/apache/bookkeeper/client/LedgerHandle). You must specify at least a [`DigestType`](../javadoc/org/apache/bookkeeper/client/BookKeeper.DigestType) and a password. - -Here's an example: - -```java -byte[] password = "some-password".getBytes(); -LedgerHandle handle = bkClient.createLedger(BookKeeper.DigestType.MAC, password); -``` - -You can also create ledgers asynchronously - -### Create ledgers asynchronously - -```java -class LedgerCreationCallback implements AsyncCallback.CreateCallback { - public void createComplete(int returnCode, LedgerHandle handle, Object ctx) { - System.out.println("Ledger successfully created"); - } -} - -client.asyncCreateLedger( - 3, - 2, - BookKeeper.DigestType.MAC, - password, - new LedgerCreationCallback(), - "some context" -); -``` - -## Adding entries to ledgers - -```java -long entryId = ledger.addEntry("Some entry data".getBytes()); -``` - -### Add entries asynchronously - -## Reading entries from ledgers - -```java -Enumerator entries = handle.readEntries(1, 99); -``` - -To read all possible entries from the ledger: - -```java -Enumerator entries = - handle.readEntries(0, handle.getLastAddConfirmed()); - -while (entries.hasNextElement()) { - LedgerEntry entry = entries.nextElement(); - System.out.println("Successfully read entry " + entry.getId()); -} -``` - -### Reading entries after the LastAddConfirmed range - -`readUnconfirmedEntries` allowing to read after the LastAddConfirmed range. -It lets the client read without checking the local value of LastAddConfirmed, so that it is possible to read entries for which the writer has not received the acknowledge yet -For entries which are within the range 0..LastAddConfirmed BookKeeper guarantees that the writer has successfully received the acknowledge. -For entries outside that range it is possible that the writer never received the acknowledge and so there is the risk that the reader is seeing entries before the writer and this could result in a consistency issue in some cases. -With this method you can even read entries before the LastAddConfirmed and entries after it with one call, the expected consistency will be as described above. - -```java -Enumerator entries = - handle.readUnconfirmedEntries(0, lastEntryIdExpectedToRead); - -while (entries.hasNextElement()) { - LedgerEntry entry = entries.nextElement(); - System.out.println("Successfully read entry " + entry.getId()); -} -``` - -## Deleting ledgers - -{% pop Ledgers %} can also be deleted synchronously or asynchronously. - -```java -long ledgerId = 1234; - -try { - bkClient.deleteLedger(ledgerId); -} catch (Exception e) { - e.printStackTrace(); -} -``` - -### Delete entries asynchronously - -Exceptions thrown: - -* - -```java -class DeleteEntryCallback implements AsyncCallback.DeleteCallback { - public void deleteComplete() { - System.out.println("Delete completed"); - } -} -``` - -## Simple example - -> For a more involved BookKeeper client example, see the [example application](#example-application) below. - -In the code sample below, a BookKeeper client: - -* creates a ledger -* writes entries to the ledger -* closes the ledger (meaning no further writes are possible) -* re-opens the ledger for reading -* reads all available entries - -```java -// Create a client object for the local ensemble. This -// operation throws multiple exceptions, so make sure to -// use a try/catch block when instantiating client objects. -BookKeeper bkc = new BookKeeper("localhost:2181"); - -// A password for the new ledger -byte[] ledgerPassword = /* some sequence of bytes, perhaps random */; - -// Create a new ledger and fetch its identifier -LedgerHandle lh = bkc.createLedger(BookKeeper.DigestType.MAC, ledgerPassword); -long ledgerId = lh.getId(); - -// Create a buffer for four-byte entries -ByteBuffer entry = ByteBuffer.allocate(4); - -int numberOfEntries = 100; - -// Add entries to the ledger, then close it -for (int i = 0; i < numberOfEntries; i++){ - entry.putInt(i); - entry.position(0); - lh.addEntry(entry.array()); -} -lh.close(); - -// Open the ledger for reading -lh = bkc.openLedger(ledgerId, BookKeeper.DigestType.MAC, ledgerPassword); - -// Read all available entries -Enumeration entries = lh.readEntries(0, numberOfEntries - 1); - -while(entries.hasMoreElements()) { - ByteBuffer result = ByteBuffer.wrap(ls.nextElement().getEntry()); - Integer retrEntry = result.getInt(); - - // Print the integer stored in each entry - System.out.println(String.format("Result: %s", retrEntry)); -} - -// Close the ledger and the client -lh.close(); -bkc.close(); -``` - -Running this should return this output: - -```shell -Result: 0 -Result: 1 -Result: 2 -# etc -``` - -## Example application - -This tutorial walks you through building an example application that uses BookKeeper as the replicated log. The application uses the [BookKeeper Java client](../java-client) to interact with BookKeeper. - -> The code for this tutorial can be found in [this GitHub repo](https://github.com/ivankelly/bookkeeper-tutorial/). The final code for the `Dice` class can be found [here](https://github.com/ivankelly/bookkeeper-tutorial/blob/master/src/main/java/org/apache/bookkeeper/Dice.java). - -### Setup - -Before you start, you will need to have a BookKeeper cluster running locally on your machine. For installation instructions, see [Installation](../../getting-started/installation). - -To start up a cluster consisting of six {% pop bookies %} locally: - -```shell -$ bookkeeper-server/bin/bookkeeper localbookie 6 -``` - -You can specify a different number of bookies if you'd like. - -### Goal - -The goal of the dice application is to have - -* multiple instances of this application, -* possibly running on different machines, -* all of which display the exact same sequence of numbers. - -In other words, the log needs to be both durable and consistent, regardless of how many {% pop bookies %} are participating in the BookKeeper ensemble. If one of the bookies crashes or becomes unable to communicate with the other bookies in any way, it should *still* display the same sequence of numbers as the others. This tutorial will show you how to achieve this. - -To begin, download the base application, compile and run it. - -```shell -$ git clone https://github.com/ivankelly/bookkeeper-tutorial.git -$ mvn package -$ mvn exec:java -Dexec.mainClass=org.apache.bookkeeper.Dice -``` - -That should yield output that looks something like this: - -``` -[INFO] Scanning for projects... -[INFO] -[INFO] ------------------------------------------------------------------------ -[INFO] Building tutorial 1.0-SNAPSHOT -[INFO] ------------------------------------------------------------------------ -[INFO] -[INFO] --- exec-maven-plugin:1.3.2:java (default-cli) @ tutorial --- -[WARNING] Warning: killAfter is now deprecated. Do you need it ? Please comment on MEXEC-6. -Value = 4 -Value = 5 -Value = 3 -``` - -### The base application - -The application in this tutorial is a dice application. The `Dice` class below has a `playDice` function that generates a random number between 1 and 6 every second, prints the value of the dice roll, and runs indefinitely. - -```java -public class Dice { - Random r = new Random(); - - void playDice() throws InterruptedException { - while (true) { - Thread.sleep(1000); - System.out.println("Value = " + (r.nextInt(6) + 1)); - } - } -} -``` - -When you run the `main` function of this class, a new `Dice` object will be instantiated and then run indefinitely: - -```java -public class Dice { - // other methods - - public static void main(String[] args) throws InterruptedException { - Dice d = new Dice(); - d.playDice(); - } -} -``` - -### Leaders and followers (and a bit of background) - -To achieve this common view in multiple instances of the program, we need each instance to agree on what the next number in the sequence will be. For example, the instances must agree that 4 is the first number and 2 is the second number and 5 is the third number and so on. This is a difficult problem, especially in the case that any instance may go away at any time, and messages between the instances can be lost or reordered. - -Luckily, there are already algorithms to solve this. Paxos is an abstract algorithm to implement this kind of agreement, while Zab and Raft are more practical protocols. This video gives a good overview about how these algorithms usually look. They all have a similar core. - -It would be possible to run the Paxos to agree on each number in the sequence. However, running Paxos each time can be expensive. What Zab and Raft do is that they use a Paxos-like algorithm to elect a leader. The leader then decides what the sequence of events should be, putting them in a log, which the other instances can then follow to maintain the same state as the leader. - -Bookkeeper provides the functionality for the second part of the protocol, allowing a leader to write events to a log and have multiple followers tailing the log. However, bookkeeper does not do leader election. You will need a zookeeper or raft instance for that purpose. - -### Why not just use ZooKeeper? - -There are a number of reasons: - -1. Zookeeper's log is only exposed through a tree like interface. It can be hard to shoehorn your application into this. -2. A zookeeper ensemble of multiple machines is limited to one log. You may want one log per resource, which will become expensive very quickly. -3. Adding extra machines to a zookeeper ensemble does not increase capacity nor throughput. - -Bookkeeper can be seen as a means of exposing ZooKeeper's replicated log to applications in a scalable fashion. ZooKeeper is still used by BookKeeper, however, to maintain consistency guarantees, though clients don't need to interact with ZooKeeper directly. - -### Electing a leader - -We'll use zookeeper to elect a leader. A zookeeper instance will have started locally when you started the localbookie application above. To verify it's running, run the following command. - -```shell -$ echo stat | nc localhost 2181 -Zookeeper version: 3.4.6-1569965, built on 02/20/2014 09:09 GMT -Clients: - /127.0.0.1:59343[1](queued=0,recved=40,sent=41) - /127.0.0.1:49354[1](queued=0,recved=11,sent=11) - /127.0.0.1:49361[0](queued=0,recved=1,sent=0) - /127.0.0.1:59344[1](queued=0,recved=38,sent=39) - /127.0.0.1:59345[1](queued=0,recved=38,sent=39) - /127.0.0.1:59346[1](queued=0,recved=38,sent=39) - -Latency min/avg/max: 0/0/23 -Received: 167 -Sent: 170 -Connections: 6 -Outstanding: 0 -Zxid: 0x11 -Mode: standalone -Node count: 16 -``` - -To interact with zookeeper, we'll use the Curator client rather than the stock zookeeper client. Getting things right with the zookeeper client can be tricky, and curator removes a lot of the pointy corners for you. In fact, curator even provides a leader election recipe, so we need to do very little work to get leader election in our application. - -```java -public class Dice extends LeaderSelectorListenerAdapter implements Closeable { - - final static String ZOOKEEPER_SERVER = "127.0.0.1:2181"; - final static String ELECTION_PATH = "/dice-elect"; - - ... - - Dice() throws InterruptedException { - curator = CuratorFrameworkFactory.newClient(ZOOKEEPER_SERVER, - 2000, 10000, new ExponentialBackoffRetry(1000, 3)); - curator.start(); - curator.blockUntilConnected(); - - leaderSelector = new LeaderSelector(curator, ELECTION_PATH, this); - leaderSelector.autoRequeue(); - leaderSelector.start(); - } -``` - -In the constructor for Dice, we need to create the curator client. We specify four things when creating the client, the location of the zookeeper service, the session timeout, the connect timeout and the retry policy. - -The session timeout is a zookeeper concept. If the zookeeper server doesn't hear anything from the client for this amount of time, any leases which the client holds will be timed out. This is important in leader election. For leader election, the curator client will take a lease on ELECTION_PATH. The first instance to take the lease will become leader and the rest will become followers. However, their claim on the lease will remain in the cue. If the first instance then goes away, due to a crash etc., its session will timeout. Once the session times out, the lease will be released and the next instance in the queue will become the leader. The call to autoRequeue() will make the client queue itself again if it loses the lease for some other reason, such as if it was still alive, but it a garbage collection cycle caused it to lose its session, and thereby its lease. I've set the lease to be quite low so that when we test out leader election, transitions will be quite quick. The optimum length for session timeout depends very much on the use case. The other parameters are the connection timeout, i.e. the amount of time it will spend trying to connect to a zookeeper server before giving up, and the retry policy. The retry policy specifies how the client should respond to transient errors, such as connection loss. Operations that fail with transient errors can be retried, and this argument specifies how often the retries should occur. - -Finally, you'll have noticed that Dice now extends LeaderSelectorListenerAdapter and implements Closeable. Closeable is there to close the resource we have initialized in the constructor, the curator client and the leaderSelector. LeaderSelectorListenerAdapter is a callback that the leaderSelector uses to notify the instance that it is now the leader. It is passed as the third argument to the LeaderSelector constructor. - -```java - @Override - public void takeLeadership(CuratorFramework client) - throws Exception { - synchronized (this) { - leader = true; - try { - while (true) { - this.wait(); - } - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - leader = false; - } - } - } -``` - -takeLeadership() is the callback called by LeaderSelector when the instance is leader. It should only return when the instance wants to give up leadership. In our case, we never do so we wait on the current object until we're interrupted. To signal to the rest of the program that we are leader we set a volatile boolean called leader to true. This is unset after we are interrupted. - -```java - void playDice() throws InterruptedException { - while (true) { - while (leader) { - Thread.sleep(1000); - System.out.println("Value = " + (r.nextInt(6) + 1) - + ", isLeader = " + leader); - } - } - } -``` - -Finally, we modify the `playDice` function to only generate random numbers when it is the leader. - -Run two instances of the program in two different terminals. You'll see that one becomes leader and prints numbers and the other just sits there. - -Now stop the leader using Control-Z. This will pause the process, but it won't kill it. You will be dropped back to the shell in that terminal. After a couple of seconds, the session timeout, you will see that the other instance has become the leader. Zookeeper will guarantee that only one instance is selected as leader at any time. - -Now go back to the shell that the original leader was on and wake up the process using fg. You'll see something like the following: - -```shell -... -... -Value = 4, isLeader = true -Value = 4, isLeader = true -^Z -[1]+ Stopped mvn exec:java -Dexec.mainClass=org.apache.bookkeeper.Dice -$ fg -mvn exec:java -Dexec.mainClass=org.apache.bookkeeper.Dice -Value = 3, isLeader = true -Value = 1, isLeader = false -``` diff --git a/site/docs/4.5.0/api/overview.md b/site/docs/4.5.0/api/overview.md deleted file mode 100644 index 3eb649273c1..00000000000 --- a/site/docs/4.5.0/api/overview.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: BookKeeper API ---- - -BookKeeper offers a few APIs that applications can use to interact with it: - -* The [ledger API](../ledger-api) is a lower-level API that enables you to interact with {% pop ledgers %} directly -* The [Ledger Advanced API)(../ledger-adv-api) is an advanced extension to [Ledger API](../ledger-api) to provide more flexibilities to applications. -* The [DistributedLog API](../distributedlog-api) is a higher-level API that provides convenient abstractions. - -## Trade-offs - -The `Ledger API` provides direct access to ledgers and thus enables you to use BookKeeper however you'd like. - -However, in most of use cases, if you want a `log stream`-like abstraction, it requires you to manage things like tracking list of ledgers, -managing rolling ledgers and data retention on your own. In such cases, you are recommended to use [DistributedLog API](../distributedlog-api), -with semantics resembling continous log streams from the standpoint of applications. diff --git a/site/docs/4.5.0/deployment/dcos.md b/site/docs/4.5.0/deployment/dcos.md deleted file mode 100644 index 3e174384ec1..00000000000 --- a/site/docs/4.5.0/deployment/dcos.md +++ /dev/null @@ -1,142 +0,0 @@ ---- -title: Deploying BookKeeper on DC/OS -subtitle: Get up and running easily on an Apache Mesos cluster -logo: img/dcos-logo.png ---- - -[DC/OS](https://dcos.io/) (the DataCenter Operating System) is a distributed operating system used for deploying and managing applications and systems on [Apache Mesos](http://mesos.apache.org/). DC/OS is an open-source tool created and maintained by [Mesosphere](https://mesosphere.com/). - -BookKeeper is available as a [DC/OS package](http://universe.dcos.io/#/package/bookkeeper/version/latest) from the [Mesosphere DC/OS Universe](http://universe.dcos.io/#/packages). - -## Prerequisites - -In order to run BookKeeper on DC/OS, you will need: - -* DC/OS version [1.8](https://dcos.io/docs/1.8/) or higher -* A DC/OS cluster with at least three nodes -* The [DC/OS CLI tool](https://dcos.io/docs/1.8/usage/cli/install/) installed - -Each node in your DC/OS-managed Mesos cluster must have at least: - -* 1 CPU -* 1 GB of memory -* 10 GB of total persistent disk storage - -## Installing BookKeeper - -```shell -$ dcos package install bookkeeper --yes -``` - -This command will: - -* Install the `bookkeeper` subcommand for the `dcos` CLI tool -* Start a single {% pop bookie %} on the Mesos cluster with the [default configuration](../../reference/config) - -The bookie that is automatically started up uses the host mode of the network and by default exports the service at `agent_ip:3181`. - -> If you run `dcos package install bookkeeper` without setting the `--yes` flag, the install will run in interactive mode. For more information on the `package install` command, see the [DC/OS docs](https://docs.mesosphere.com/latest/cli/command-reference/dcos-package/dcos-package-install/). - -### Services - -To watch BookKeeper start up, click on the **Services** tab in the DC/OS [user interface](https://docs.mesosphere.com/latest/gui/) and you should see the `bookkeeper` package listed: - -![DC/OS services]({{ site.baseurl }}img/dcos/services.png) - -### Tasks - -To see which tasks have started, click on the `bookkeeper` service and you'll see an interface that looks like this; - -![DC/OS tasks]({{ site.baseurl }}img/dcos/tasks.png) - -## Scaling BookKeeper - -Once the first {% pop bookie %} has started up, you can click on the **Scale** tab to scale up your BookKeeper ensemble by adding more bookies (or scale down the ensemble by removing bookies). - -![DC/OS scale]({{ site.baseurl }}img/dcos/scale.png) - -## ZooKeeper Exhibitor - -ZooKeeper contains the information for all bookies in the ensemble. When deployed on DC/OS, BookKeeper uses a ZooKeeper instance provided by DC/OS. You can access a visual UI for ZooKeeper using [Exhibitor](https://github.com/soabase/exhibitor/wiki), which is available at [http://master.dcos/exhibitor](http://master.dcos/exhibitor). - -![ZooKeeper Exhibitor]({{ site.baseurl }}img/dcos/exhibitor.png) - -You should see a listing of IP/host information for all bookies under the `messaging/bookkeeper/ledgers/available` node. - -## Client connections - -To connect to bookies running on DC/OS using clients running within your Mesos cluster, you need to specify the ZooKeeper connection string for DC/OS's ZooKeeper cluster: - -``` -master.mesos:2181 -``` - -This is the *only* ZooKeeper host/port you need to include in your connection string. Here's an example using the [Java client](../../api/ledger-api#the-java-ledger-api-client): - -```java -BookKeeper bkClient = new BookKeeper("master.mesos:2181"); -``` - -If you're connecting using a client running outside your Mesos cluster, you need to supply the public-facing connection string for your DC/OS ZooKeeper cluster. - -## Configuring BookKeeper - -By default, the `bookkeeper` package will start up a BookKeeper ensemble consisting of one {% pop bookie %} with one CPU, 1 GB of memory, and a 70 MB persistent volume. - -You can supply a non-default configuration when installing the package using a JSON file. Here's an example command: - -```shell -$ dcos package install bookkeeper \ - --options=/path/to/config.json -``` - -You can then fetch the current configuration for BookKeeper at any time using the `package describe` command: - -```shell -$ dcos package describe bookkeeper \ - --config -``` - -### Available parameters - -> Not all [configurable parameters](../../reference/config) for BookKeeper are available for BookKeeper on DC/OS. Only the parameters show in the table below are available. - -Param | Type | Description | Default -:-----|:-----|:------------|:------- -`name` | String | The name of the DC/OS service. | `bookkeeper` -`cpus` | Integer | The number of CPU shares to allocate to each {% pop bookie %}. The minimum is 1. | `1` | -`instances` | Integer | The number of {% pop bookies %} top run. The minimum is 1. | `1` -`mem` | Number | The memory, in MB, to allocate to each BookKeeper task | `1024.0` (1 GB) -`volume_size` | Number | The persistent volume size, in MB | `70` -`zk_client` | String | The connection string for the ZooKeeper client instance | `master.mesos:2181` -`service_port` | Integer | The BookKeeper export service port, using `PORT0` in Marathon | `3181` - -### Example JSON configuration - -Here's an example JSON configuration object for BookKeeper on DC/OS: - -```json -{ - "instances": 5, - "cpus": 3, - "mem": 2048.0, - "volume_size": 250 -} -``` - -If that configuration were stored in a file called `bk-config.json`, you could apply that configuration upon installating the BookKeeper package using this command: - -```shell -$ dcos package install bookkeeper \ - --options=./bk-config.json -``` - -## Uninstalling BookKeeper - -You can shut down and uninstall the `bookkeeper` from DC/OS at any time using the `package uninstall` command: - -```shell -$ dcos package uninstall bookkeeper -Uninstalled package [bookkeeper] version [4.5.0] -Thank you for using bookkeeper. -``` diff --git a/site/docs/4.5.0/deployment/kubernetes.md b/site/docs/4.5.0/deployment/kubernetes.md deleted file mode 100644 index f65172112a7..00000000000 --- a/site/docs/4.5.0/deployment/kubernetes.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: Deploying BookKeeper on Kubernetes -logo: img/kubernetes-logo.png ---- diff --git a/site/docs/4.5.0/deployment/manual.md b/site/docs/4.5.0/deployment/manual.md deleted file mode 100644 index daafd5556f5..00000000000 --- a/site/docs/4.5.0/deployment/manual.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -title: Manual deployment ---- - -The easiest way to deploy BookKeeper is using schedulers like [DC/OS](../dcos), but you can also deploy BookKeeper clusters manually. A BookKeeper cluster consists of two main components: - -* A [ZooKeeper](#zookeeper-setup) cluster that is used for configuration- and coordination-related tasks -* An [ensemble](#starting-up-bookies) of {% pop bookies %} - -## ZooKeeper setup - -We won't provide a full guide to setting up a ZooKeeper cluster here. We recommend that you consult [this guide](https://zookeeper.apache.org/doc/current/zookeeperAdmin.html) in the official ZooKeeper documentation. - -## Starting up bookies - -Once your ZooKeeper cluster is up and running, you can start up as many {% pop bookies %} as you'd like to form a cluster. Before starting up each bookie, you need to modify the bookie's configuration to make sure that it points to the right ZooKeeper cluster. - -On each bookie host, you need to [download](../../getting-started/installation#download) the BookKeeper package as a tarball. Once you've done that, you need to configure the bookie by setting values in the `bookkeeper-server/conf/bk_server.conf` config file. The one parameter that you will absolutely need to change is the [`zkServers`](../../config#zkServers) parameter, which you will need to set to the ZooKeeper connection string for your ZooKeeper cluster. Here's an example: - -```properties -zkServers=100.0.0.1:2181,100.0.0.2:2181,100.0.0.3:2181 -``` - -> A full listing of configurable parameters available in `bookkeeper-server/conf/bk_server.conf` can be found in the [Configuration](../../reference/config) reference manual. - -Once the bookie's configuration is set, you can start it up using the [`bookie`](../../reference/cli#bookkeeper-bookie) command of the [`bookkeeper`](../../reference/cli#bookkeeper) CLI tool: - -```shell -$ bookkeeper-server/bin/bookkeeper bookie -``` - -> You can also build BookKeeper [by cloning it from source](../../getting-started/installation#clone) or [using Maven](../../getting-started/installation#build-using-maven). - -### System requirements - -{% include system-requirements.md %} - -## Cluster metadata setup - -Once you've started up a cluster of bookies, you need to set up cluster metadata for the cluster by running the following command from any bookie in the cluster: - -```shell -$ bookkeeper-server/bin/bookkeeper shell metaformat -``` - -You can run in the formatting - -> The `metaformat` command performs all the necessary ZooKeeper cluster metadata tasks and thus only needs to be run *once* and from *any* bookie in the BookKeeper cluster. - -Once cluster metadata formatting has been completed, your BookKeeper cluster is ready to go! - - diff --git a/site/docs/4.5.0/development/codebase.md b/site/docs/4.5.0/development/codebase.md deleted file mode 100644 index 9a83073ea4c..00000000000 --- a/site/docs/4.5.0/development/codebase.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: The BookKeeper codebase ---- diff --git a/site/docs/4.5.0/development/protocol.md b/site/docs/4.5.0/development/protocol.md deleted file mode 100644 index 6d17aa0ed45..00000000000 --- a/site/docs/4.5.0/development/protocol.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: The BookKeeper protocol ---- - -BookKeeper uses a special replication protocol for guaranteeing persistent storage of entries in an ensemble of bookies. - -> This document assumes that you have some knowledge of leader election and log replication and how these can be used in a distributed system. If not, we recommend reading the [example application](../../api/ledger-api#example-application) documentation first. - -## Ledgers - -{% pop Ledgers %} are the basic building block of BookKeeper and the level at which BookKeeper makes its persistent storage guarantees. A replicated log consists of an ordered list of ledgers. See [Ledgers to logs](#ledgers-to-logs) for info on building a replicated log from ledgers. - -Ledgers are composed of metadata and {% pop entries %}. The metadata is stored in ZooKeeper, which provides a *compare-and-swap* (CAS) operation. Entries are stored on storage nodes known as {% pop bookies %}. - -A ledger has a single writer and multiple readers (SWMR). - -### Ledger metadata - -A ledger's metadata contains the following: - -Parameter | Name | Meaning -:---------|:-----|:------- -Identifer | | A 64-bit integer, unique within the system -Ensemble size | **E** | The number of nodes the ledger is stored on -Write quorum size | **Qw** | The number of nodes each entry is written to. In effect, the max replication for the entry. -Ack quorum size | **Qa** | The number of nodes an entry must be acknowledged on. In effect, the minimum replication for the entry. -Current state | | The current status of the ledger. One of `OPEN`, `CLOSED`, or `IN_RECOVERY`. -Last entry | | The last entry in the ledger or `NULL` is the current state is not `CLOSED`. - -In addition, each ledger's metadata consists of one or more *fragments*. Each fragment is either - -* the first entry of a fragment or -* a list of bookies for the fragment. - -When creating a ledger, the following invariant must hold: - -**E >= Qw >= Qa** - -Thus, the ensemble size (**E**) must be larger than the write quorum size (**Qw**), which must in turn be larger than the ack quorum size (**Qa**). If that condition does not hold, then the ledger creation operation will fail. - -### Ensembles - -When a ledger is created, **E** bookies are chosen for the entries of that ledger. The bookies are the initial ensemble of the ledger. A ledger can have multiple ensembles, but an entry has only one ensemble. Changes in the ensemble involve a new fragment being added to the ledger. - -Take the following example. In this ledger, with ensemble size of 3, there are two fragments and thus two ensembles, one starting at entry 0, the second at entry 12. The second ensemble differs from the first only by its first element. This could be because bookie1 has failed and therefore had to be replaced. - -First entry | Bookies -:-----------|:------- -0 | B1, B2, B3 -12 | B4, B2, B3 - -### Write quorums - -Each entry in the log is written to **Qw** nodes. This is considered the write quorum for that entry. The write quorum is the subsequence of the ensemble, **Qw** in length, and starting at the bookie at index (entryid % **E**). - -For example, in a ledger of **E** = 4, **Qw**, and **Qa** = 2, with an ensemble consisting of B1, B2, B3, and B4, the write quorums for the first 6 entries will be: - -Entry | Write quorum -:-----|:------------ -0 | B1, B2, B3 -1 | B2, B3, B4 -2 | B3, B4, B1 -3 | B4, B1, B2 -4 | B1, B2, B3 -5 | B2, B3, B4 - -There are only **E** distinct write quorums in any ensemble. If **Qw** = **Qa**, then there is only one, as no striping occurs. - -### Ack quorums - -The ack quorum for an entry is any subset of the write quorum of size **Qa**. If **Qa** bookies acknowledge an entry, it means it has been fully replicated. - -### Guarantees - -The system can tolerate **Qa** – 1 failures without data loss. - -Bookkeeper guarantees that: - -1. All updates to a ledger will be read in the same order as they were written. -2. All clients will read the same sequence of updates from the ledger. - -## Writing to ledgers - -writer, ensuring that entry ids are sequential is trivial. A bookie acknowledges a write once it has been persisted to disk and is therefore durable. Once **Qa** bookies from the write quorum acknowledge the write, the write is acknowledged to the client, but only if all entries with lower entry ids in the ledger have already been acknowledged to the client. - -The entry written contains the ledger id, the entry id, the last add confirmed and the payload. The last add confirmed is the last entry which had been acknowledged to the client when this entry was written. Sending this with the entry speeds up recovery of the ledger in the case that the writer crashes. - -Another client can also read entries in the ledger up as far as the last add confirmed, as we guarantee that all entries thus far have been replicated on Qa nodes, and therefore all future readers will be able to also read it. However, to read like this, the ledger should be opened with a non-fencing open. Otherwise, it would kill the writer. - -If a node fails to acknowledge a write, the writer will create a new ensemble by replacing the failed node in the current ensemble. It creates a new fragment with this ensemble, starting from the first message that has not been acknowledged to the client. Creating the new fragment involves making a CAS write to the metadata. If the CAS write fails, someone else has modified something in the ledger metadata. This concurrent modification could have been caused by recovery or {% pop rereplication %}. We reread the metadata. If the state of the ledger is no longer `OPEN`, we send an error to the client for any outstanding writes. Otherwise, we try to replace the failed node again. - -### Closing a ledger as a writer - -Closing a ledger is straightforward for a writer. The writer makes a CAS write to the metadata, changing the state to `CLOSED` and setting the last entry of the ledger to the last entry which we have acknowledged to the client. - -If the CAS write fails, it means someone else has modified the metadata. We reread the metadata, and retry closing as long as the state of the ledger is still `OPEN`. If the state is `IN_RECOVERY` we send an error to the client. If the state is `CLOSED` and the last entry is the same as the last entry we have acknowledged to the client, we complete the close operation successfully. If the last entry is different from what we have acknowledged to the client, we send an error to the client. - -### Closing a ledger as a reader - -A reader can also force a ledger to close. Forcing the ledger to close will prevent any writer from adding new entries to the ledger. This is called {% pop fencing %}. This can occur when a writer has crashed or become unavailable, and a new writer wants to take over writing to the log. The new writer must ensure that it has seen all updates from the previous writer, and prevent the previous writer from making any new updates before making any updates of its own. - -To recover a ledger, we first update the state in the metadata to IN_RECOVERY. We then send a fence message to all the bookies in the last fragment of the ledger. When a bookie receives a fence message for a ledger, the fenced state of the ledger is persisted to disk. Once we receive a response from at least (**Qw** - **Qa**)+1 bookies from each write quorum in the ensemble, the ledger is fenced. - -By ensuring we have received a response from at last (**Qw** - **Qa**) + 1 bookies in each write quorum, we ensure that, if the old writer is alive and tries to add a new entry there will be no write quorum in which Qa bookies will accept the write. If the old writer tries to update the ensemble, it will fail on the CAS metadata write, and then see that the ledger is in IN_RECOVERY state, and that it therefore shouldn’t try to write to it. - -The old writer will be able to write entries to individual bookies (we can’t guarantee that the fence message reaches all bookies), but as it will not be able reach ack quorum, it will not be able to send a success response to its client. The client will get a LedgerFenced error instead. - -It is important to note that when you get a ledger fenced message for an entry, it doesn’t mean that the entry has not been written. It means that the entry may or may not have been written, and this can only be determined after the ledger is recovered. In effect, LedgerFenced should be treated like a timeout. - -Once the ledger is fenced, recovery can begin. Recovery means finding the last entry of the ledger and closing the ledger. To find the last entry of the ledger, the client asks all bookies for the highest last add confirmed value they have seen. It waits until it has received a response at least (**Qw** - **Qa**) + 1 bookies from each write quorum, and takes the highest response as the entry id to start reading forward from. It then starts reading forward in the ledger, one entry at a time, replicating all entries it sees to the entire write quorum for that entry. Once it can no longer read any more entries, it updates the state in the metadata to `CLOSED`, and sets the last entry of the ledger to the last entry it wrote. Multiple readers can try to recovery a ledger at the same time, but as the metadata write is CAS they will all converge on the same last entry of the ledger. - -## Ledgers to logs - -In BookKeeper, {% pop ledgers %} can be used to build a replicated log for your system. All guarantees provided by BookKeeper are at the ledger level. Guarantees on the whole log can be built using the ledger guarantees and any consistent datastore with a compare-and-swap (CAS) primitive. BookKeeper uses ZooKeeper as the datastore but others could theoretically be used. - -A log in BookKeeper is built from some number of ledgers, with a fixed order. A ledger represents a single segment of the log. A ledger could be the whole period that one node was the leader, or there could be multiple ledgers for a single period of leadership. However, there can only ever be one leader that adds entries to a single ledger. Ledgers cannot be reopened for writing once they have been closed/recovered. - -> BookKeeper does *not* provide leader election. You must use a system like ZooKeeper for this. - -In many cases, leader election is really leader suggestion. Multiple nodes could think that they are leader at any one time. It is the job of the log to guarantee that only one can write changes to the system. - -### Opening a log - -Once a node thinks it is leader for a particular log, it must take the following steps: - -1. Read the list of ledgers for the log -1. {% pop Fence %} the last two ledgers in the list. Two ledgers are fenced because because the writer may be writing to the second-to-last ledger while adding the last ledger to the list. -1. Create a new ledger -1. Add the new ledger to the ledger list -1. Write the new ledger back to the datastore using a CAS operation - -The fencing in step 2 and the CAS operation in step 5 prevent two nodes from thinking that they have leadership at any one time. - -The CAS operation will fail if the list of ledgers has changed between reading it and writing back the new list. When the CAS operation fails, the leader must start at step 1 again. Even better, they should check that they are in fact still the leader with the system that is providing leader election. The protocol will work correctly without this step, though it will be able to make very little progress if two nodes think they are leader and are duelling for the log. - -The node must not serve any writes until step 5 completes successfully. - -### Rolling ledgers - -The leader may wish to close the current ledger and open a new one every so often. Ledgers can only be deleted as a whole. If you don't roll the log, you won't be able to clean up old entries in the log without a leader change. By closing the current ledger and adding a new one, the leader allows the log to be truncated whenever that data is no longer needed. The steps for rolling the log is similar to those for creating a new ledger. - -1. Create a new ledger -1. Add the new ledger to the ledger list -1. Write the new ledger list to the datastore using CAS -1. Close the previous ledger - -By deferring the closing of the previous ledger until step 4, we can continue writing to the log while we perform metadata update operations to add the new ledger. This is safe as long as you fence the last 2 ledgers when acquiring leadership. - diff --git a/site/docs/4.5.0/getting-started/concepts.md b/site/docs/4.5.0/getting-started/concepts.md deleted file mode 100644 index 7a3c92847b2..00000000000 --- a/site/docs/4.5.0/getting-started/concepts.md +++ /dev/null @@ -1,202 +0,0 @@ ---- -title: BookKeeper concepts and architecture -subtitle: The core components and how they work -prev: ../run-locally ---- - -BookKeeper is a service that provides persistent storage of streams of log [entries](#entries)---aka *records*---in sequences called [ledgers](#ledgers). BookKeeper replicates stored entries across multiple servers. - -## Basic terms - -In BookKeeper: - -* each unit of a log is an [*entry*](#entries) (aka record) -* streams of log entries are called [*ledgers*](#ledgers) -* individual servers storing ledgers of entries are called [*bookies*](#bookies) - -BookKeeper is designed to be reliable and resilient to a wide variety of failures. Bookies can crash, corrupt data, or discard data, but as long as there are enough bookies behaving correctly in the ensemble the service as a whole will behave correctly. - -## Entries - -> **Entries** contain the actual data written to ledgers, along with some important metadata. - -BookKeeper entries are sequences of bytes that are written to [ledgers](#ledgers). Each entry has the following fields: - -Field | Java type | Description -:-----|:----------|:----------- -Ledger number | `long` | The ID of the ledger to which the entry has been written -Entry number | `long` | The unique ID of the entry -Last confirmed (LC) | `long` | The ID of the last recorded entry -Data | `byte[]` | The entry's data (written by the client application) -Authentication code | `byte[]` | The message auth code, which includes *all* other fields in the entry - -## Ledgers - -> **Ledgers** are the basic unit of storage in BookKeeper. - -Ledgers are sequences of entries, while each entry is a sequence of bytes. Entries are written to a ledger: - -* sequentially, and -* at most once. - -This means that ledgers have *append-only* semantics. Entries cannot be modified once they've been written to a ledger. Determining the proper write order is the responsbility of [client applications](#clients). - -## Clients and APIs - -> BookKeeper clients have two main roles: they create and delete ledgers, and they read entries from and write entries to ledgers. -> -> BookKeeper provides both a lower-level and a higher-level API for ledger interaction. - -There are currently two APIs that can be used for interacting with BookKeeper: - -* The [ledger API](../../api/ledger-api) is a lower-level API that enables you to interact with {% pop ledgers %} directly. -* The [DistributedLog API](../../api/distributedlog-api) is a higher-level API that enables you to use BookKeeper without directly interacting with ledgers. - -In general, you should choose the API based on how much granular control you need over ledger semantics. The two APIs can also both be used within a single application. - -## Bookies - -> **Bookies** are individual BookKeeper servers that handle ledgers (more specifically, fragments of ledgers). Bookies function as part of an ensemble. - -A bookie is an individual BookKeeper storage server. Individual bookies store fragments of ledgers, not entire ledgers (for the sake of performance). For any given ledger **L**, an *ensemble* is the group of bookies storing the entries in **L**. - -Whenever entries are written to a ledger, those entries are {% pop striped %} across the ensemble (written to a sub-group of bookies rather than to all bookies). - -### Motivation - -> BookKeeper was initially inspired by the NameNode server in HDFS but its uses now extend far beyond this. - -The initial motivation for BookKeeper comes from the [Hadoop](http://hadoop.apache.org/) ecosystem. In the [Hadoop Distributed File System](https://wiki.apache.org/hadoop/HDFS) (HDFS), a special node called the [NameNode](https://wiki.apache.org/hadoop/NameNode) logs all operations in a reliable fashion, which ensures that recovery is possible in case of crashes. - -The NameNode, however, served only as initial inspiration for BookKeeper. The applications for BookKeeper extend far beyond this and include essentially any application that requires an append-based storage system. BookKeeper provides a number of advantages for such applications: - -* Highly efficient writes -* High fault tolerance via replication of messages within ensembles of bookies -* High throughput for write operations via {% pop striping %} (across as many bookies as you wish) - -## Metadata storage - -BookKeeper requires a metadata storage service to store information related to [ledgers](#ledgers) and available bookies. BookKeeper currently uses [ZooKeeper](https://zookeeper.apache.org) for this and other tasks. - -## Data management in bookies - -Bookies manage data in a [log-structured](https://en.wikipedia.org/wiki/Log-structured_file_system) way, which is implemented using three types of files: - -* [journals](#journals) -* [entry logs](#entry-logs) -* [index files](#index-files) - -### Journals - -A journal file contains BookKeeper transaction logs. Before any update to a ledger takes place, the bookie ensures that a transaction describing the update is written to non-volatile storage. A new journal file is created once the bookie starts or the older journal file reaches the journal file size threshold. - -### Entry logs - -An entry log file manages the written entries received from BookKeeper clients. Entries from different ledgers are aggregated and written sequentially, while their offsets are kept as pointers in a [ledger cache](#ledger-cache) for fast lookup. - -A new entry log file is created once the bookie starts or the older entry log file reaches the entry log size threshold. Old entry log files are removed by the Garbage Collector Thread once they are not associated with any active ledger. - -### Index files - -An index file is created for each ledger, which comprises a header and several fixed-length index pages that record the offsets of data stored in entry log files. - -Since updating index files would introduce random disk I/O index files are updated lazily by a sync thread running in the background. This ensures speedy performance for updates. Before index pages are persisted to disk, they are gathered in a ledger cache for lookup. - -### Ledger cache - -Ledger indexes pages are cached in a memory pool, which allows for more efficient management of disk head scheduling. - -### Adding entries - -When a client instructs a {% pop bookie %} to write an entry to a ledger, the entry will go through the following steps to be persisted on disk: - -1. The entry is appended to an [entry log](#entry-logs) -1. The index of the entry is updated in the [ledger cache](#ledger-cache) -1. A transaction corresponding to this entry update is appended to the [journal](#journals) -1. A response is sent to the BookKeeper client - -> For performance reasons, the entry log buffers entries in memory and commits them in batches, while the ledger cache holds index pages in memory and flushes them lazily. This process is described in more detail in the [Data flush](#data-flush) section below. - -### Data flush - -Ledger index pages are flushed to index files in the following two cases: - -* The ledger cache memory limit is reached. There is no more space available to hold newer index pages. Dirty index pages will be evicted from the ledger cache and persisted to index files. -* A background thread synchronous thread is responsible for flushing index pages from the ledger cache to index files periodically. - -Besides flushing index pages, the sync thread is responsible for rolling journal files in case that journal files use too much disk space. The data flush flow in the sync thread is as follows: - -* A `LastLogMark` is recorded in memory. The `LastLogMark` indicates that those entries before it have been persisted (to both index and entry log files) and contains two parts: - 1. A `txnLogId` (the file ID of a journal) - 1. A `txnLogPos` (offset in a journal) -* Dirty index pages are flushed from the ledger cache to the index file, and entry log files are flushed to ensure that all buffered entries in entry log files are persisted to disk. - - Ideally, a bookie only needs to flush index pages and entry log files that contain entries before `LastLogMark`. There is, however, no such information in the ledger and entry log mapping to journal files. Consequently, the thread flushes the ledger cache and entry log entirely here, and may flush entries after the `LastLogMark`. Flushing more is not a problem, though, just redundant. -* The `LastLogMark` is persisted to disk, which means that entries added before `LastLogMark` whose entry data and index page were also persisted to disk. It is now time to safely remove journal files created earlier than `txnLogId`. - -If the bookie has crashed before persisting `LastLogMark` to disk, it still has journal files containing entries for which index pages may not have been persisted. Consequently, when this bookie restarts, it inspects journal files to restore those entries and data isn't lost. - -Using the above data flush mechanism, it is safe for the sync thread to skip data flushing when the bookie shuts down. However, in the entry logger it uses a buffered channel to write entries in batches and there might be data buffered in the buffered channel upon a shut down. The bookie needs to ensure that the entry log flushes its buffered data during shutdown. Otherwise, entry log files become corrupted with partial entries. - -### Data compaction - -On bookies, entries of different ledgers are interleaved in entry log files. A bookie runs a garbage collector thread to delete un-associated entry log files to reclaim disk space. If a given entry log file contains entries from a ledger that has not been deleted, then the entry log file would never be removed and the occupied disk space never reclaimed. In order to avoid such a case, a bookie server compacts entry log files in a garbage collector thread to reclaim disk space. - -There are two kinds of compaction running with different frequency: minor compaction and major compaction. The differences between minor compaction and major compaction lies in their threshold value and compaction interval. - -* The garbage collection threshold is the size percentage of an entry log file occupied by those undeleted ledgers. The default minor compaction threshold is 0.2, while the major compaction threshold is 0.8. -* The garbage collection interval is how frequently to run the compaction. The default minor compaction interval is 1 hour, while the major compaction threshold is 1 day. - -> If either the threshold or interval is set to less than or equal to zero, compaction is disabled. - -The data compaction flow in the garbage collector thread is as follows: - -* The thread scans entry log files to get their entry log metadata, which records a list of ledgers comprising an entry log and their corresponding percentages. -* With the normal garbage collection flow, once the bookie determines that a ledger has been deleted, the ledger will be removed from the entry log metadata and the size of the entry log reduced. -* If the remaining size of an entry log file reaches a specified threshold, the entries of active ledgers in the entry log will be copied to a new entry log file. -* Once all valid entries have been copied, the old entry log file is deleted. - -## ZooKeeper metadata - -BookKeeper requires a ZooKeeper installation for storing [ledger](#ledger) metadata. Whenever you construct a [`BookKeeper`](../../api/javadoc/org/apache/bookkeeper/client/BookKeeper) client object, you need to pass a list of ZooKeeper servers as a parameter to the constructor, like this: - -```java -String zkConnectionString = "127.0.0.1:2181"; -BookKeeper bkClient = new BookKeeper(zkConnectionString); -``` - -> For more info on using the BookKeeper Java client, see [this guide](../../api/ledger-api#the-java-ledger-api-client). - -## Ledger manager - -A *ledger manager* handles ledgers' metadata (which is stored in ZooKeeper). BookKeeper offers two types of ledger managers: the [flat ledger manager](#flat-ledger-manager) and the [hierarchical ledger manager](#hierarchical-ledger-manager). Both ledger managers extend the [`AbstractZkLedgerManager`](../../api/javadoc/org/apache/bookkeeper/meta/AbstractZkLedgerManager) abstract class. - -> #### Use the flat ledger manager in most cases -> The flat ledger manager is the default and is recommended for nearly all use cases. The hierarchical ledger manager is better suited only for managing very large numbers of BookKeeper ledgers (> 50,000). - -### Flat ledger manager - -The *flat ledger manager*, implemented in the [`FlatLedgerManager`](../../api/javadoc/org/apache/bookkeeper/meta/FlatLedgerManager.html) class, stores all ledgers' metadata in child nodes of a single ZooKeeper path. The flat ledger manager creates [sequential nodes](https://zookeeper.apache.org/doc/trunk/zookeeperProgrammers.html#Sequence+Nodes+--+Unique+Naming) to ensure the uniqueness of the ledger ID and prefixes all nodes with `L`. Bookie servers manage their own active ledgers in a hash map so that it's easy to find which ledgers have been deleted from ZooKeeper and then garbage collect them. - -The flat ledger manager's garbage collection follow proceeds as follows: - -* All existing ledgers are fetched from ZooKeeper (`zkActiveLedgers`) -* All ledgers currently active within the bookie are fetched (`bkActiveLedgers`) -* The currently actively ledgers are looped through to determine which ledgers don't currently exist in ZooKeeper. Those are then garbage collected. -* The *hierarchical ledger manager* stores ledgers' metadata in two-level [znodes](https://zookeeper.apache.org/doc/current/zookeeperOver.html#Nodes+and+ephemeral+nodes). - -### Hierarchical ledger manager - -The *hierarchical ledger manager*, implemented in the [`HierarchicalLedgerManager`](../../api/javadoc/org/apache/bookkeeper/meta/HierarchicalLedgerManager) class, first obtains a global unique ID from ZooKeeper using an [`EPHEMERAL_SEQUENTIAL`](https://zookeeper.apache.org/doc/current/api/org/apache/zookeeper/CreateMode.html#EPHEMERAL_SEQUENTIAL) znode. Since ZooKeeper's sequence counter has a format of `%10d` (10 digits with 0 padding, for example `0000000001`), the hierarchical ledger manager splits the generated ID into 3 parts: - -```shell -{level1 (2 digits)}{level2 (4 digits)}{level3 (4 digits)} -``` - -These three parts are used to form the actual ledger node path to store ledger metadata: - -```shell -{ledgers_root_path}/{level1}/{level2}/L{level3} -``` - -For example, ledger 0000000001 is split into three parts, 00, 0000, and 00001, and stored in znode `/{ledgers_root_path}/00/0000/L0001`. Each znode could have as many 10,000 ledgers, which avoids the problem of the child list being larger than the maximum ZooKeeper packet size (which is the [limitation](https://issues.apache.org/jira/browse/BOOKKEEPER-39) that initially prompted the creation of the hierarchical ledger manager). diff --git a/site/docs/4.5.0/getting-started/installation.md b/site/docs/4.5.0/getting-started/installation.md deleted file mode 100644 index fac16ddd390..00000000000 --- a/site/docs/4.5.0/getting-started/installation.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -title: BookKeeper installation -subtitle: Download or clone BookKeeper and build it locally -next: ../run-locally ---- - -{% capture download_url %}http://apache.claz.org/bookkeeper/bookkeeper-{{ site.latest_release }}/bookkeeper-{{ site.latest_release }}-src.tar.gz{% endcapture %} - -You can install BookKeeper either by [downloading](#download) a [GZipped](http://www.gzip.org/) tarball package or [cloning](#clone) the BookKeeper repository. - -## Requirements - -* [Unix environment](http://www.opengroup.org/unix) -* [Java Development Kit 1.6](http://www.oracle.com/technetwork/java/javase/downloads/index.html) or later -* [Maven 3.0](https://maven.apache.org/install.html) or later - -## Download - -You can download Apache BookKeeper releases from one of many [Apache mirrors](http://www.apache.org/dyn/closer.cgi/bookkeeper). Here's an example for the [apache.claz.org](http://apache.claz.org/bookkeeper) mirror: - -```shell -$ curl -O {{ download_url }} -$ tar xvf bookkeeper-{{ site.latest_release }}-src.tar.gz -$ cd bookkeeper-{{ site.latest_release }} -``` - -## Clone - -To build BookKeeper from source, clone the repository, either from the [GitHub mirror]({{ site.github_repo }}) or from the [Apache repository](http://git.apache.org/bookkeeper.git/): - -```shell -# From the GitHub mirror -$ git clone {{ site.github_repo}} - -# From Apache directly -$ git clone git://git.apache.org/bookkeeper.git/ -``` - -## Build using Maven - -Once you have the BookKeeper on your local machine, either by [downloading](#download) or [cloning](#clone) it, you can then build BookKeeper from source using Maven: - -```shell -$ mvn package -``` - -> You can skip tests by adding the `-DskipTests` flag when running `mvn package`. - -### Useful Maven commands - -Some other useful Maven commands beyond `mvn package`: - -Command | Action -:-------|:------ -`mvn clean` | Removes build artifacts -`mvn compile` | Compiles JAR files from Java sources -`mvn compile findbugs:findbugs` | Compile using the Maven [FindBugs](http://gleclaire.github.io/findbugs-maven-plugin) plugin -`mvn install` | Install the BookKeeper JAR locally in your local Maven cache (usually in the `~/.m2` directory) -`mvn deploy` | Deploy the BookKeeper JAR to the Maven repo (if you have the proper credentials) -`mvn verify` | Performs a wide variety of verification and validation tasks -`mvn apache-rat:check` | Run Maven using the [Apache Rat](http://creadur.apache.org/rat/apache-rat-plugin/) plugin -`mvn compile javadoc:aggregate` | Build Javadocs locally -`mvn package assembly:single` | Build a complete distribution using the Maven [Assembly](http://maven.apache.org/plugins/maven-assembly-plugin/) plugin - -## Package directory - -The BookKeeper project contains several subfolders that you should be aware of: - -Subfolder | Contains -:---------|:-------- -[`bookkeeper-server`]({{ site.github_repo }}/tree/master/bookkeeper-server) | The BookKeeper server and client -[`bookkeeper-benchmark`]({{ site.github_repo }}/tree/master/bookkeeper-benchmark) | A benchmarking suite for measuring BookKeeper performance -[`bookkeeper-stats`]({{ site.github_repo }}/tree/master/bookkeeper-stats) | A BookKeeper stats library -[`bookkeeper-stats-providers`]({{ site.github_repo }}/tree/master/bookkeeper-stats-providers) | BookKeeper stats providers diff --git a/site/docs/4.5.0/getting-started/run-locally.md b/site/docs/4.5.0/getting-started/run-locally.md deleted file mode 100644 index ab33642c852..00000000000 --- a/site/docs/4.5.0/getting-started/run-locally.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Run bookies locally -prev: ../installation -next: ../concepts -toc_disable: true ---- - -{% pop Bookies %} are individual BookKeeper servers. You can run an ensemble of bookies locally on a single machine using the [`localbookie`](../../reference/cli#bookkeeper-localbookie) command of the `bookkeeper` CLI tool and specifying the number of bookies you'd like to include in the ensemble. - -This would start up an ensemble with 10 bookies: - -```shell -$ bookeeper-server/bin/bookeeper localbookie 10 -``` - -> When you start up an ensemble using `localbookie`, all bookies run in a single JVM process. diff --git a/site/docs/4.5.0/overview/overview.md b/site/docs/4.5.0/overview/overview.md deleted file mode 100644 index 11d74e93f45..00000000000 --- a/site/docs/4.5.0/overview/overview.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: Apache BookKeeper™ 4.5.0 ---- - - -This documentation is for Apache BookKeeper™ version 4.5.0. - -Apache BookKeeper™ is a scalable, fault-tolerant, and low latency storage service optimized for realtime workloads. -It offers durability, replication, and strong consistency as essentials for building reliable real-time applications. - -BookKeeper is well suited for scenarios like this: - -Scenario | Example -:--------|:------- -[WAL](https://en.wikipedia.org/wiki/Write-ahead_logging) (Write-Ahead-Logging) | The HDFS [namenode](https://hadoop.apache.org/docs/r2.5.2/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithNFS.html#BookKeeper_as_a_Shared_storage_EXPERIMENTAL) -Message storage | [Apache Pulsar](https://pulsar.incubator.apache.org/) -Offset/cursor storage | Apache Pulsar -Object/BLOG storage | Storing snapshots to replicated state machines. - -Learn more about Apache BookKeeper and what it can do for your organization: - -- [Apache BookKeeper 4.5.0 Release Notes](../releaseNotes) -- [Java API docs](../../api/javadoc) - -Or start using Apache BookKeeper today. - -### Users - -- **Concepts**: Start with [concepts](../../getting-started/concepts). This will help you to fully understand - the other parts of the documentation, including the setup, integration and operation guides. -- **Getting Started**: Install [Apache BookKeeper](../../getting-started/installation) and run bookies [locally](../../getting-started/run-locally) -- **API**: Read the [API](../../api/overview) documentation to learn how to use Apache BookKeeper to build your applications. -- **Deployment**: The [Deployment Guide](../../deployment/manual) shows how to deploy Apache BookKeeper to production clusters. - -### Administrators - -- **Operations**: The [Admin Guide](../../admin/bookies) shows how to run Apache BookKeeper on production, what are the production - considerations and best practices. - -### Contributors - -- **Details**: Learn [design details](../../development/protocol) to know more internals. diff --git a/site/docs/4.5.0/overview/releaseNotes.md b/site/docs/4.5.0/overview/releaseNotes.md deleted file mode 100644 index f1d3e3364aa..00000000000 --- a/site/docs/4.5.0/overview/releaseNotes.md +++ /dev/null @@ -1,509 +0,0 @@ ---- -title: Apache BookKeeper 4.5.0 Release Notes ---- - -This is the fifth release of BookKeeper as an Apache Top Level Project! - -The 4.5.0 release incorporates hundreds of new fixes, improvements, and features since previous major release, 4.4.0, -which was released over a year ago. It is a big milestone in Apache BookKeeper community, converging from three -main branches (Salesforce, Twitter and Yahoo). - -Apache BookKeeper users are encouraged to upgrade to 4.5.0. The technical details of this release are summarized -below. - -## Highlights - -The main features in 4.5.0 cover are around following areas: - -- Dependencies Upgrade -- Security -- Public API -- Performance -- Operations - -### Dependencies Upgrade - -Here is a list of dependencies upgraded in 4.5.0: - -- Moved the developement from Java 7 to Java 8. -- Upgrade Protobuf to `2.6`. -- Upgrade ZooKeeper from `3.4` to `3.5`. -- Upgrade Netty to `4.1`. -- Upgrade Guava to `20.0`. -- Upgrade SLF4J to `1.7.25`. -- Upgrade Codahale to `3.1.0`. - -### Security - -Prior to this release, Apache BookKeeper only supports simple `DIGEST-MD5` type authentication. - -With this release of Apache BookKeeper, a number of feature are introduced that can be used, together of separately, -to secure a BookKeeper cluster. - -The following security features are currently supported. - -- Authentication of connections to bookies from clients, using either `TLS` or `SASL (Kerberos). -- Authentication of connections from clients, bookies, autorecovery daemons to `ZooKeeper`, when using zookeeper - based ledger managers. -- Encryption of data transferred between bookies and clients, between bookies and autorecovery daemons using `TLS`. - -It's worth noting that those security features are optional - non-secured clusters are supported, as well as a mix -of authenticated, unauthenticated, encrypted and non-encrypted clients. - -For more details, have a look at [BookKeeper Security](../../security/overview). - -### Public API - -There are multiple new client features introduced in 4.5.0. - -#### LedgerHandleAdv - -The [Ledger API] is the low level API provides by BookKeeper for interacting with `ledgers` in a bookkeeper cluster. -It is simple but not flexible on ledger id or entry id generation. Apache BookKeeper introduces `LedgerHandleAdv` -as an extension of existing `LedgerHandle` for advanced usage. The new `LedgerHandleAdv` allows applications providing -its own `ledger-id` and assigning `entry-id` on adding entries. - -See [Ledger Advanced API](../../api/ledger-adv-api) for more details. - -#### Long Poll - -`Long Poll` is a main feature that [DistributedLog](https://distributedlog.io) uses to achieve low-latency tailing. -This big feature has been merged back in 4.5.0 and available to BookKeeper users. - -This feature includes two main changes, one is `LastAddConfirmed` piggyback, while the other one is a new `long poll` read API. - -The first change piggyback the latest `LastAddConfirm` along with the read response, so your `LastAddConfirmed` will be automatically advanced -when your read traffic continues. It significantly reduces the traffic to explicitly polling `LastAddConfirmed` and hence reduces the end-to-end latency. - -The second change provides a new `long poll` read API, allowing tailing-reads without polling `LastAddConfirmed` everytime after readers exhaust known entries. -Although `long poll` API brings great latency improvements on tailing reads, it is still a very low-level primitive. -It is still recommended to use high level API (e.g. [DistributedLog API](../../api/distributedlog-api)) for tailing and streaming use cases. - -See [Streaming Reads](https://bookkeeper.apache.org/distributedlog/docs/latest/user_guide/design/main.html#streaming-reads) for more details. - -#### Explicit LAC - -Prior to 4.5.0, the `LAC` is only advanced when subsequent entries are added. If there is no subsequent entries added, -the last entry written will not be visible to readers until the ledger is closed. High-level client (e.g. DistributedLog) or applications -has to work around this by writing some sort of `control records` to advance `LAC`. - -In 4.5.0, a new `explicit lac` feature is introduced to periodically advance `LAC` if there are not subsequent entries added. This feature -can be enabled by setting `explicitLacInterval` to a positive value. - -### Performance - -There are a lot for performance related bug fixes and improvements in 4.5.0. These changes includes: - -- Upgraded netty from 3.x to 4.x to leverage buffer pooling and reduce memory copies. -- Moved developement from Java 7 to Java 8 to take advantage of Java 8 features. -- A lot of improvements around scheduling and threading on `bookies`. -- Delay ensemble change to improve tail latency. -- Parallel ledger recovery to improve the recovery speed. -- ... - -We outlined following four changes as below. For a complete list of performance improvements, please checkout the `full list of changes` at the end. - -#### Netty 4 Upgrade - -The major performance improvement introduced in 4.5.0, is upgrading netty from 3.x to [4.x](http://netty.io/wiki/new-and-noteworthy-in-4.0.html). - -For more details, please read [upgrade guide](../../admin/upgrade) about the netty related tips when upgrading bookkeeper from 4.4.0 to 4.5.0. - -#### Delay Ensemble Change - -`Ensemble Change` is a feature that Apache BookKeeper uses to achieve high availability. However it is an expensive metadata operation. -Especially when Apache BookKeeper is deployed in a multiple data-centers environment, losing a data center will cause churn of metadata -operations due to ensemble changes. `Delay Ensemble Change` is introduced in 4.5.0 to overcome this problem. Enabling this feature means -an `Ensemble Change` will only occur when clients can't receive enough valid responses to satisfy `ack-quorum` constraint. This feature -improves the tail latency. - -To enable this feature, please set `delayEnsembleChange` to `true` on your clients. - -#### Parallel Ledger Recovery - -BookKeeper clients recovers entries one-by-one during ledger recovery. If a ledger has very large volumn of traffic, it will have -large number of entries to recover when client failures occur. BookKeeper introduces `parallel ledger recovery` in 4.5.0 to allow -batch recovery to improve ledger recovery speed. - -To enable this feature, please set `enableParallelRecoveryRead` to `true` on your clients. You can also set `recoveryReadBatchSize` -to control the batch size of recovery read. - -#### Multiple Journals - -Prior to 4.5.0, bookies are only allowed to configure one journal device. If you want to have high write bandwidth, you can raid multiple -disks into one device and mount that device for jouranl directory. However because there is only one journal thread, this approach doesn't -actually improve the write bandwidth. - -BookKeeper introduces multiple journal directories support in 4.5.0. Users can configure multiple devices for journal directories. - -To enable this feature, please use `journalDirectories` rather than `journalDirectory`. - -### Operations - -#### LongHierarchicalLedgerManager - -Apache BookKeeper supports pluggable metadata store. By default, it uses Apache ZooKeeper as its metadata store. Among the zookeeper-based -ledger manager implementations, `HierarchicalLedgerManager` is the most popular and widely adopted ledger manager. However it has a major -limitation, which it assumes `ledger-id` is a 32-bits integer. It limits the number of ledgers to `2^32`. - -`LongHierarchicalLedgerManager` is introduced to overcome this limitation. - -See [Ledger Manager](../../getting-started/concepts/#ledger-manager) for more details. - -#### Weight-based placement policy - -`Rack-Aware` and `Region-Aware` placement polices are the two available placement policies in BookKeeper client. It places ensembles based -on users' configured network topology. However they both assume that all nodes are equal. `weight-based` placement is introduced in 4.5.0 to -improve the existing placement polices. `weight-based` placement was not built as separated polices. It is built in the existing placement policies. -If you are using `Rack-Aware` or `Region-Aware`, you can simply enable `weight-based` placement by setting `diskWeightBasedPlacementEnabled` to `true`. - -#### Customized Ledger Metadata - -A `Map` is introduced in ledger metadata in 4.5.0. Clients now are allowed to pass in a key/value map when creating ledgers. -This customized ledger metadata can be later on used by user defined placement policy. This extends the flexibility of bookkeeper API. - -#### Add Prometheus stats provider - -A new [Prometheus](https://prometheus.io/) [stats provider](https://github.com/apache/bookkeeper/tree/master/bookkeeper-stats-providers/prometheus-metrics-provider) -is introduce in 4.5.0. It simplies the metric collection when running bookkeeper on [kubernetes](https://kubernetes.io/). - -#### Add more tools in BookieShell - -`BookieShell` is the tool provided by Apache BooKeeper to operate clusters. There are multiple importants tools introduced in 4.5.0, for example, `decommissionbookie`, -`expandstorage`, `lostbookierecoverydelay`, `triggeraudit`. - -For the complete list of commands in `BookieShell`, please read [BookKeeper CLI tool reference](../../reference/cli). - -## Full list of changes - -### JIRA - -#### Sub-task - - -#### Bug -
            -
          • [BOOKKEEPER-852] - Release LedgerDescriptor and master-key objects when not used anymore -
          • -
          • [BOOKKEEPER-903] - MetaFormat BookieShell Command is not deleting UnderReplicatedLedgers list from the ZooKeeper -
          • -
          • [BOOKKEEPER-907] - for ReadLedgerEntriesCmd, EntryFormatter should be configurable and HexDumpEntryFormatter should be one of them -
          • -
          • [BOOKKEEPER-908] - Case to handle BKLedgerExistException -
          • -
          • [BOOKKEEPER-924] - addEntry() is susceptible to spurious wakeups -
          • -
          • [BOOKKEEPER-927] - Extend BOOKKEEPER-886 to LedgerHandleAdv too (BOOKKEEPER-886: Allow to disable ledgers operation throttling) -
          • -
          • [BOOKKEEPER-933] - ClientConfiguration always inherits System properties -
          • -
          • [BOOKKEEPER-938] - LedgerOpenOp should use digestType from metadata -
          • -
          • [BOOKKEEPER-939] - Fix typo in bk-merge-pr.py -
          • -
          • [BOOKKEEPER-940] - Fix findbugs warnings after bumping to java 8 -
          • -
          • [BOOKKEEPER-952] - Fix RegionAwarePlacementPolicy -
          • -
          • [BOOKKEEPER-955] - in BookKeeperAdmin listLedgers method currentRange variable is not getting updated to next iterator when it has run out of elements -
          • -
          • [BOOKKEEPER-956] - HierarchicalLedgerManager doesn't work for ledgerid of length 9 and 10 because of order issue in HierarchicalLedgerRangeIterator -
          • -
          • [BOOKKEEPER-958] - ZeroBuffer readOnlyBuffer returns ByteBuffer with 0 remaining bytes for length > 64k -
          • -
          • [BOOKKEEPER-959] - ClientAuthProvider and BookieAuthProvider Public API used Protobuf Shaded classes -
          • -
          • [BOOKKEEPER-976] - Fix license headers with "Copyright 2016 The Apache Software Foundation" -
          • -
          • [BOOKKEEPER-980] - BookKeeper Tools doesn't process the argument correctly -
          • -
          • [BOOKKEEPER-981] - NullPointerException in RackawareEnsemblePlacementPolicy while running in Docker Container -
          • -
          • [BOOKKEEPER-984] - BookieClientTest.testWriteGaps tested -
          • -
          • [BOOKKEEPER-986] - Handle Memtable flush failure -
          • -
          • [BOOKKEEPER-987] - BookKeeper build is broken due to the shade plugin for commit ecbb053e6e -
          • -
          • [BOOKKEEPER-988] - Missing license headers -
          • -
          • [BOOKKEEPER-989] - Enable travis CI for bookkeeper git -
          • -
          • [BOOKKEEPER-999] - BookKeeper client can leak threads -
          • -
          • [BOOKKEEPER-1013] - Fix findbugs errors on latest master -
          • -
          • [BOOKKEEPER-1018] - Allow client to select older V2 protocol (no protobuf) -
          • -
          • [BOOKKEEPER-1020] - Fix Explicit LAC tests on master -
          • -
          • [BOOKKEEPER-1021] - Improve the merge script to handle github reviews api -
          • -
          • [BOOKKEEPER-1031] - ReplicationWorker.rereplicate fails to call close() on ReadOnlyLedgerHandle -
          • -
          • [BOOKKEEPER-1044] - Entrylogger is not readding rolled logs back to the logChannelsToFlush list when exception happens while trying to flush rolled logs -
          • -
          • [BOOKKEEPER-1047] - Add missing error code in ZK setData return path -
          • -
          • [BOOKKEEPER-1058] - Ignore already deleted ledger on replication audit -
          • -
          • [BOOKKEEPER-1061] - BookieWatcher should not do ZK blocking operations from ZK async callback thread -
          • -
          • [BOOKKEEPER-1065] - OrderedSafeExecutor should only have 1 thread per bucket -
          • -
          • [BOOKKEEPER-1071] - BookieRecoveryTest is failing due to a Netty4 IllegalReferenceCountException -
          • -
          • [BOOKKEEPER-1072] - CompactionTest is flaky when disks are almost full -
          • -
          • [BOOKKEEPER-1073] - Several stats provider related changes. -
          • -
          • [BOOKKEEPER-1074] - Remove JMX Bean -
          • -
          • [BOOKKEEPER-1075] - BK LedgerMetadata: more memory-efficient parsing of configs -
          • -
          • [BOOKKEEPER-1076] - BookieShell should be able to read the 'FENCE' entry in the log -
          • -
          • [BOOKKEEPER-1077] - BookKeeper: Local Bookie Journal and ledger paths -
          • -
          • [BOOKKEEPER-1079] - shell lastMark throws NPE -
          • -
          • [BOOKKEEPER-1098] - ZkUnderreplicationManager can build up an unbounded number of watchers -
          • -
          • [BOOKKEEPER-1101] - BookKeeper website menus not working under https -
          • -
          • [BOOKKEEPER-1102] - org.apache.bookkeeper.client.BookKeeperDiskSpaceWeightedLedgerPlacementTest.testDiskSpaceWeightedBookieSelectionWithBookiesBeingAdded is unreliable -
          • -
          • [BOOKKEEPER-1103] - LedgerMetadataCreateTest bug in ledger id generation causes intermittent hang -
          • -
          • [BOOKKEEPER-1104] - BookieInitializationTest.testWithDiskFullAndAbilityToCreateNewIndexFile testcase is unreliable -
          • -
          - -#### Improvement - - -#### New Feature -
            -
          • [BOOKKEEPER-390] - Provide support for ZooKeeper authentication -
          • -
          • [BOOKKEEPER-391] - Support Kerberos authentication of bookkeeper -
          • -
          • [BOOKKEEPER-575] - Bookie SSL support -
          • -
          • [BOOKKEEPER-670] - Longpoll Read & Piggyback Support -
          • -
          • [BOOKKEEPER-912] - Allow EnsemblePlacementPolicy to choose bookies using ledger custom data (multitenancy support) -
          • -
          • [BOOKKEEPER-928] - Add custom client supplied metadata field to LedgerMetadata -
          • -
          • [BOOKKEEPER-930] - Option to disable Bookie networking -
          • -
          • [BOOKKEEPER-941] - Introduce Feature Switches For controlling client and server behavior -
          • -
          • [BOOKKEEPER-948] - Provide an option to add more ledger/index directories to a bookie -
          • -
          • [BOOKKEEPER-950] - Ledger placement policy to accomodate different storage capacity of bookies -
          • -
          • [BOOKKEEPER-969] - Security Support -
          • -
          • [BOOKKEEPER-983] - BookieShell Command for LedgerDelete -
          • -
          • [BOOKKEEPER-991] - bk shell - Get a list of all on disk files -
          • -
          • [BOOKKEEPER-992] - ReadLog Command Enhancement -
          • -
          • [BOOKKEEPER-1019] - Support for reading entries after LAC (causal consistency driven by out-of-band communications) -
          • -
          • [BOOKKEEPER-1034] - When all disks are full, start Bookie in RO mode if RO mode is enabled -
          • -
          • [BOOKKEEPER-1067] - Add Prometheus stats provider -
          • -
          - -#### Story - - -#### Task - - -#### Test -
            -
          • [BOOKKEEPER-967] - Create new testsuite for testing RackAwareEnsemblePlacementPolicy using ScriptBasedMapping. -
          • -
          • [BOOKKEEPER-1045] - Execute tests in different JVM processes -
          • -
          • [BOOKKEEPER-1064] - ConcurrentModificationException in AuditorLedgerCheckerTest -
          • -
          • [BOOKKEEPER-1078] - Local BookKeeper enhancements for testability -
          • -
          • [BOOKKEEPER-1097] - GC test when no WritableDirs -
          • -
          - -#### Wish -
            -
          • [BOOKKEEPER-943] - Reduce log level of AbstractZkLedgerManager for register/unregister ReadOnlyLedgerHandle -
          • -
          - -### Github - -- [https://github.com/apache/bookkeeper/milestone/1](https://github.com/apache/bookkeeper/milestone/1) diff --git a/site/docs/4.5.0/overview/releaseNotesTemplate.md b/site/docs/4.5.0/overview/releaseNotesTemplate.md deleted file mode 100644 index c7845ae51bf..00000000000 --- a/site/docs/4.5.0/overview/releaseNotesTemplate.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Apache BookKeeper 4.5.0 Release Notes ---- - -[provide a summary of this release] - -Apache BookKeeper users are encouraged to upgrade to 4.5.0. The technical details of this release are summarized -below. - -## Highlights - -[List the highlights] - -## Details - -[list to issues list] - diff --git a/site/docs/4.5.0/reference/cli.md b/site/docs/4.5.0/reference/cli.md deleted file mode 100644 index 8beb36ff071..00000000000 --- a/site/docs/4.5.0/reference/cli.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: BookKeeper CLI tool reference -subtitle: A reference guide to the command-line tools that you can use to administer BookKeeper ---- - -{% include cli.html id="bookkeeper" %} - -## The BookKeeper shell - -{% include shell.html %} diff --git a/site/docs/4.5.0/reference/config.md b/site/docs/4.5.0/reference/config.md deleted file mode 100644 index 8997b6b62f0..00000000000 --- a/site/docs/4.5.0/reference/config.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: BookKeeper configuration -subtitle: A reference guide to all of BookKeeper's configurable parameters ---- - - -The table below lists parameters that you can set to configure {% pop bookies %}. All configuration takes place in the `bk_server.conf` file in the `bookkeeper-server/conf` directory of your [BookKeeper installation](../../getting-started/installing). - -{% include config.html id="bk_server" %} diff --git a/site/docs/4.5.0/reference/metrics.md b/site/docs/4.5.0/reference/metrics.md deleted file mode 100644 index 8bd6fe0a165..00000000000 --- a/site/docs/4.5.0/reference/metrics.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: BookKeeper metrics reference ---- diff --git a/site/docs/4.5.0/security/overview.md b/site/docs/4.5.0/security/overview.md deleted file mode 100644 index 62da8edae76..00000000000 --- a/site/docs/4.5.0/security/overview.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -title: BookKeeper Security -next: ../tls ---- - -In the 4.5.0 release, the BookKeeper community added a number of features that can be used, together or separately, to secure a BookKeeper cluster. -The following security measures are currently supported: - -1. Authentication of connections to bookies from clients, using either [TLS](../tls) or [SASL (Kerberos)](../sasl). -2. Authentication of connections from clients, bookies, autorecovery daemons to [ZooKeeper](../zookeeper), when using zookeeper based ledger managers. -3. Encryption of data transferred between bookies and clients, between bookies and autorecovery daemons using [TLS](../tls). - -It’s worth noting that security is optional - non-secured clusters are supported, as well as a mix of authenticated, unauthenticated, encrypted and non-encrypted clients. - -NOTE: currently `authorization` is not yet available in `4.5.0`. The Apache BookKeeper community is looking for adding this feature in subsequent releases. - -## Next Steps - -- [Encryption and Authentication using TLS](../tls) -- [Authentication using SASL](../sasl) -- [ZooKeeper Authentication](../zookeeper) diff --git a/site/docs/4.5.0/security/sasl.md b/site/docs/4.5.0/security/sasl.md deleted file mode 100644 index ffb972a8936..00000000000 --- a/site/docs/4.5.0/security/sasl.md +++ /dev/null @@ -1,202 +0,0 @@ ---- -title: Authentication using SASL -prev: ../tls -next: ../zookeeper ---- - -Bookies support client authentication via SASL. Currently we only support GSSAPI (Kerberos). We will start -with a general description of how to configure `SASL` for bookies, clients and autorecovery daemons, followed -by mechanism-specific details and wrap up with some operational details. - -## SASL configuration for Bookies - -1. Select the mechanisms to enable in the bookies. `GSSAPI` is the only mechanism currently supported by BookKeeper. -2. Add a `JAAS` config file for the selected mechanisms as described in the examples for setting up [GSSAPI (Kerberos)](#kerberos). -3. Pass the `JAAS` config file location as JVM parameter to each Bookie. For example: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookie_jaas.conf - ``` - -4. Enable SASL auth plugin in bookies, by setting `bookieAuthProviderFactoryClass` to `org.apache.bookkeeper.sasl.SASLBookieAuthProviderFactory`. - - - ```shell - bookieAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLBookieAuthProviderFactory - ``` - -5. If you are running `autorecovery` along with bookies, then you want to enable SASL auth plugin for `autorecovery`, by setting - `clientAuthProviderFactoryClass` to `org.apache.bookkeeper.sasl.SASLClientProviderFactory`. - - ```shell - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -6. Follow the steps in [GSSAPI (Kerberos)](#kerberos) to configure SASL. - -#### Important Notes - -1. `Bookie` is a section name in the JAAS file used by each bookie. This section tells the bookie which principal to use - and the location of the keytab where the principal is stored. It allows the bookie to login using the keytab specified in this section. -2. `Auditor` is a section name in the JASS file used by `autorecovery` daemon (it can be co-run with bookies). This section tells the - `autorecovery` daemon which principal to use and the location of the keytab where the principal is stored. It allows the bookie to - login using the keytab specified in this section. -3. The `Client` section is used to authenticate a SASL connection with ZooKeeper. It also allows the bookies to set ACLs on ZooKeeper nodes - which locks these nodes down so that only the bookies can modify it. It is necessary to have the same primary name across all bookies. - If you want to use a section name other than `Client`, set the system property `zookeeper.sasl.client` to the appropriate name - (e.g `-Dzookeeper.sasl.client=ZKClient`). -4. ZooKeeper uses `zookeeper` as the service name by default. If you want to change this, set the system property - `zookeeper.sasl.client.username` to the appropriate name (e.g. `-Dzookeeper.sasl.client.username=zk`). - -## SASL configuration for Clients - -To configure `SASL` authentication on the clients: - -1. Select a `SASL` mechanism for authentication and add a `JAAS` config file for the selected mechanism as described in the examples for - setting up [GSSAPI (Kerberos)](#kerberos). -2. Pass the `JAAS` config file location as JVM parameter to each client JVM. For example: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookkeeper_jaas.conf - ``` - -3. Configure the following properties in bookkeeper `ClientConfiguration`: - - ```shell - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -Follow the steps in [GSSAPI (Kerberos)](#kerberos) to configure SASL for the selected mechanism. - -## Authentication using SASL/Kerberos - -### Prerequisites - -#### Kerberos - -If your organization is already using a Kerberos server (for example, by using `Active Directory`), there is no need to -install a new server just for BookKeeper. Otherwise you will need to install one, your Linux vendor likely has packages -for `Kerberos` and a short guide on how to install and configure it ([Ubuntu](https://help.ubuntu.com/community/Kerberos), -[Redhat](https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/6/html/Managing_Smart_Cards/installing-kerberos.html)). -Note that if you are using Oracle Java, you will need to download JCE policy files for your Java version and copy them to `$JAVA_HOME/jre/lib/security`. - -#### Kerberos Principals - -If you are using the organization’s Kerberos or Active Directory server, ask your Kerberos administrator for a principal -for each Bookie in your cluster and for every operating system user that will access BookKeeper with Kerberos authentication -(via clients and tools). - -If you have installed your own Kerberos, you will need to create these principals yourself using the following commands: - -```shell -sudo /usr/sbin/kadmin.local -q 'addprinc -randkey bookkeeper/{hostname}@{REALM}' -sudo /usr/sbin/kadmin.local -q "ktadd -k /etc/security/keytabs/{keytabname}.keytab bookkeeper/{hostname}@{REALM}" -``` - -##### All hosts must be reachable using hostnames - -It is a *Kerberos* requirement that all your hosts can be resolved with their FQDNs. - -### Configuring Bookies - -1. Add a suitably modified JAAS file similar to the one below to each Bookie’s config directory, let’s call it `bookie_jaas.conf` -for this example (note that each bookie should have its own keytab): - - ``` - Bookie { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookie.keytab" - principal="bookkeeper/bk1.hostname.com@EXAMPLE.COM"; - }; - // ZooKeeper client authentication - Client { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookie.keytab" - principal="bookkeeper/bk1.hostname.com@EXAMPLE.COM"; - }; - // If you are running `autorecovery` along with bookies - Auditor { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookie.keytab" - principal="bookkeeper/bk1.hostname.com@EXAMPLE.COM"; - }; - ``` - - The `Bookie` section in the JAAS file tells the bookie which principal to use and the location of the keytab where this principal is stored. - It allows the bookie to login using the keytab specified in this section. See [notes](#notes) for more details on Zookeeper’s SASL configuration. - -2. Pass the name of the JAAS file as a JVM parameter to each Bookie: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookie_jaas.conf - ``` - - You may also wish to specify the path to the `krb5.conf` file - (see [JDK’s Kerberos Requirements](https://docs.oracle.com/javase/8/docs/technotes/guides/security/jgss/tutorials/KerberosReq.html) for more details): - - ```shell - -Djava.security.krb5.conf=/etc/bookkeeper/krb5.conf - ``` - -3. Make sure the keytabs configured in the JAAS file are readable by the operating system user who is starting the Bookies. - -4. Enable SASL authentication plugin in the bookies by setting following parameters. - - ```shell - bookieAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLBookieAuthProviderFactory - # if you run `autorecovery` along with bookies - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -### Configuring Clients - -To configure SASL authentication on the clients: - -1. Clients will authenticate to the cluster with their own principal (usually with the same name as the user running the client), - so obtain or create these principals as needed. Then create a `JAAS` file for each principal. The `BookKeeper` section describes - how the clients like writers and readers can connect to the Bookies. The following is an example configuration for a client using - a keytab (recommended for long-running processes): - - ``` - BookKeeper { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookkeeper.keytab" - principal="bookkeeper-client-1@EXAMPLE.COM"; - }; - ``` - - -2. Pass the name of the JAAS file as a JVM parameter to the client JVM: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookkeeper_jaas.conf - ``` - - You may also wish to specify the path to the `krb5.conf` file (see - [JDK’s Kerberos Requirements](https://docs.oracle.com/javase/8/docs/technotes/guides/security/jgss/tutorials/KerberosReq.html) for more details). - - ```shell - -Djava.security.krb5.conf=/etc/bookkeeper/krb5.conf - ``` - - -3. Make sure the keytabs configured in the `bookkeeper_jaas.conf` are readable by the operating system user who is starting bookkeeper client. - -4. Enable SASL authentication plugin in the client by setting following parameters. - - ```shell - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -## Enabling Logging for SASL - -To enable SASL debug output, you can set `sun.security.krb5.debug` system property to `true`. - diff --git a/site/docs/4.5.0/security/tls.md b/site/docs/4.5.0/security/tls.md deleted file mode 100644 index cd250ab2aa5..00000000000 --- a/site/docs/4.5.0/security/tls.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -title: Encryption and Authentication using TLS -prev: ../overview -next: ../sasl ---- - -Apache BookKeeper allows clients and autorecovery daemons to communicate over TLS, although this is not enabled by default. - -## Overview - -The bookies need their own key and certificate in order to use TLS. Clients can optionally provide a key and a certificate -for mutual authentication. Each bookie or client can also be configured with a truststore, which is used to -determine which certificates (bookie or client identities) to trust (authenticate). - -The truststore can be configured in many ways. To understand the truststore, consider the following two examples: - -1. the truststore contains one or many certificates; -2. it contains a certificate authority (CA). - -In (1), with a list of certificates, the bookie or client will trust any certificate listed in the truststore. -In (2), with a CA, the bookie or client will trust any certificate that was signed by the CA in the truststore. - -(TBD: benefits) - -## Generate TLS key and certificate - -The first step of deploying TLS is to generate the key and the certificate for each machine in the cluster. -You can use Java’s `keytool` utility to accomplish this task. We will generate the key into a temporary keystore -initially so that we can export and sign it later with CA. - -```shell -keytool -keystore bookie.keystore.jks -alias localhost -validity {validity} -genkey -``` - -You need to specify two parameters in the above command: - -1. `keystore`: the keystore file that stores the certificate. The *keystore* file contains the private key of - the certificate; hence, it needs to be kept safely. -2. `validity`: the valid time of the certificate in days. - -
          -Ensure that common name (CN) matches exactly with the fully qualified domain name (FQDN) of the server. -The client compares the CN with the DNS domain name to ensure that it is indeed connecting to the desired server, not a malicious one. -
          - -## Creating your own CA - -After the first step, each machine in the cluster has a public-private key pair, and a certificate to identify the machine. -The certificate, however, is unsigned, which means that an attacker can create such a certificate to pretend to be any machine. - -Therefore, it is important to prevent forged certificates by signing them for each machine in the cluster. -A `certificate authority (CA)` is responsible for signing certificates. CA works likes a government that issues passports — -the government stamps (signs) each passport so that the passport becomes difficult to forge. Other governments verify the stamps -to ensure the passport is authentic. Similarly, the CA signs the certificates, and the cryptography guarantees that a signed -certificate is computationally difficult to forge. Thus, as long as the CA is a genuine and trusted authority, the clients have -high assurance that they are connecting to the authentic machines. - -```shell -openssl req -new -x509 -keyout ca-key -out ca-cert -days 365 -``` - -The generated CA is simply a *public-private* key pair and certificate, and it is intended to sign other certificates. - -The next step is to add the generated CA to the clients' truststore so that the clients can trust this CA: - -```shell -keytool -keystore bookie.truststore.jks -alias CARoot -import -file ca-cert -``` - -NOTE: If you configure the bookies to require client authentication by setting `sslClientAuthentication` to `true` on the -[bookie config](../../reference/config), then you must also provide a truststore for the bookies and it should have all the CA -certificates that clients keys were signed by. - -```shell -keytool -keystore client.truststore.jks -alias CARoot -import -file ca-cert -``` - -In contrast to the keystore, which stores each machine’s own identity, the truststore of a client stores all the certificates -that the client should trust. Importing a certificate into one’s truststore also means trusting all certificates that are signed -by that certificate. As the analogy above, trusting the government (CA) also means trusting all passports (certificates) that -it has issued. This attribute is called the chain of trust, and it is particularly useful when deploying TLS on a large BookKeeper cluster. -You can sign all certificates in the cluster with a single CA, and have all machines share the same truststore that trusts the CA. -That way all machines can authenticate all other machines. - -## Signing the certificate - -The next step is to sign all certificates in the keystore with the CA we generated. First, you need to export the certificate from the keystore: - -```shell -keytool -keystore bookie.keystore.jks -alias localhost -certreq -file cert-file -``` - -Then sign it with the CA: - -```shell -openssl x509 -req -CA ca-cert -CAkey ca-key -in cert-file -out cert-signed -days {validity} -CAcreateserial -passin pass:{ca-password} -``` - -Finally, you need to import both the certificate of the CA and the signed certificate into the keystore: - -```shell -keytool -keystore bookie.keystore.jks -alias CARoot -import -file ca-cert -keytool -keystore bookie.keystore.jks -alias localhost -import -file cert-signed -``` - -The definitions of the parameters are the following: - -1. `keystore`: the location of the keystore -2. `ca-cert`: the certificate of the CA -3. `ca-key`: the private key of the CA -4. `ca-password`: the passphrase of the CA -5. `cert-file`: the exported, unsigned certificate of the bookie -6. `cert-signed`: the signed certificate of the bookie - -(TBD: add a script to automatically generate truststores and keystores.) - -## Configuring Bookies - -Bookies support TLS for connections on the same service port. In order to enable TLS, you need to configure `tlsProvider` to be either -`JDK` or `OpenSSL`. If `OpenSSL` is configured, it will use `netty-tcnative-boringssl-static`, which loads a corresponding binding according -to the platforms to run bookies. - -> Current `OpenSSL` implementation doesn't depend on the system installed OpenSSL library. If you want to leverage the OpenSSL installed on -the system, you can check [this example](http://netty.io/wiki/forked-tomcat-native.html) on how to replaces the JARs on the classpath with -netty bindings to leverage installed OpenSSL. - -The following TLS configs are needed on the bookie side: - -```shell -tlsProvider=OpenSSL -# key store -tlsKeyStoreType=JKS -tlsKeyStore=/var/private/tls/bookie.keystore.jks -tlsKeyStorePasswordPath=/var/private/tls/bookie.keystore.passwd -# trust store -tlsTrustStoreType=JKS -tlsTrustStore=/var/private/tls/bookie.truststore.jks -tlsTrustStorePasswordPath=/var/private/tls/bookie.truststore.passwd -``` - -NOTE: it is important to restrict access to the store files and corresponding password files via filesystem permissions. - -Optional settings that are worth considering: - -1. tlsClientAuthentication=false: Enable/Disable using TLS for authentication. This config when enabled will authenticate the other end - of the communication channel. It should be enabled on both bookies and clients for mutual TLS. -2. tlsEnabledCipherSuites= A cipher suite is a named combination of authentication, encryption, MAC and key exchange - algorithm used to negotiate the security settings for a network connection using TLS network protocol. By default, - it is null. [OpenSSL Ciphers](https://www.openssl.org/docs/man1.0.2/apps/ciphers.html) - [JDK Ciphers](http://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html#ciphersuites) -3. tlsEnabledProtocols = TLSv1.2,TLSv1.1,TLSv1 (list out the TLS protocols that you are going to accept from clients). - By default, it is not set. - -To verify the bookie's keystore and truststore are setup correctly you can run the following command: - -```shell -openssl s_client -debug -connect localhost:3181 -tls1 -``` - -NOTE: TLSv1 should be listed under `tlsEnabledProtocols`. - -In the output of this command you should see the server's certificate: - -```shell ------BEGIN CERTIFICATE----- -{variable sized random bytes} ------END CERTIFICATE----- -``` - -If the certificate does not show up or if there are any other error messages then your keystore is not setup correctly. - -## Configuring Clients - -TLS is supported only for the new BookKeeper client (BookKeeper versions 4.5.0 and higher), the older clients are not -supported. The configs for TLS will be the same as bookies. - -If client authentication is not required by the bookies, the following is a minimal configuration example: - -```shell -tlsProvider=OpenSSL -clientTrustStore=/var/private/tls/client.truststore.jks -clientTrustStorePasswordPath=/var/private/tls/client.truststore.passwd -``` - -If client authentication is required, then a keystore must be created for each client, and the bookies' truststores must -trust the certificate in the client's keystore. This may be done using commands that are similar to what we used for -the [bookie keystore](#bookie-keystore). - -And the following must also be configured: - -```shell -tlsClientAuthentication=true -clientKeyStore=/var/private/tls/client.keystore.jks -clientKeyStorePasswordPath=/var/private/tls/client.keystore.passwd -``` - -NOTE: it is important to restrict access to the store files and corresponding password files via filesystem permissions. - -(TBD: add example to use tls in bin/bookkeeper script?) - -## Enabling TLS Logging - -You can enable TLS debug logging at the JVM level by starting the bookies and/or clients with `javax.net.debug` system property. For example: - -```shell --Djavax.net.debug=all -``` - -You can find more details on this in [Oracle documentation](http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/ReadDebug.html) on -[debugging SSL/TLS connections](http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/ReadDebug.html). diff --git a/site/docs/4.5.0/security/zookeeper.md b/site/docs/4.5.0/security/zookeeper.md deleted file mode 100644 index e16be69a1d3..00000000000 --- a/site/docs/4.5.0/security/zookeeper.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: ZooKeeper Authentication -prev: ../sasl ---- - -## New Clusters - -To enable `ZooKeeper` authentication on Bookies or Clients, there are two necessary steps: - -1. Create a `JAAS` login file and set the appropriate system property to point to it as described in [GSSAPI (Kerberos)](../sasl#notes). -2. Set the configuration property `zkEnableSecurity` in each bookie to `true`. - -The metadata stored in `ZooKeeper` is such that only certain clients will be able to modify and read the corresponding znodes. -The rationale behind this decision is that the data stored in ZooKeeper is not sensitive, but inappropriate manipulation of znodes can cause cluster -disruption. - -## Migrating Clusters - -If you are running a version of BookKeeper that does not support security or simply with security disabled, and you want to make the cluster secure, -then you need to execute the following steps to enable ZooKeeper authentication with minimal disruption to your operations. - -1. Perform a rolling restart setting the `JAAS` login file, which enables bookie or clients to authenticate. At the end of the rolling restart, - bookies (or clients) are able to manipulate znodes with strict ACLs, but they will not create znodes with those ACLs. -2. Perform a second rolling restart of bookies, this time setting the configuration parameter `zkEnableSecurity` to true, which enables the use - of secure ACLs when creating znodes. -3. Currently we don't have provide a tool to set acls on old znodes. You are recommended to set it manually using ZooKeeper tools. - -It is also possible to turn off authentication in a secured cluster. To do it, follow these steps: - -1. Perform a rolling restart of bookies setting the `JAAS` login file, which enable bookies to authenticate, but setting `zkEnableSecurity` to `false`. - At the end of rolling restart, bookies stop creating znodes with secure ACLs, but are still able to authenticate and manipulate all znodes. -2. You can use ZooKeeper tools to manually reset all ACLs under the znode set in `zkLedgersRootPath`, which defaults to `/ledgers`. -3. Perform a second rolling restart of bookies, this time omitting the system property that sets the `JAAS` login file. - -## Migrating the ZooKeeper ensemble - -It is also necessary to enable authentication on the `ZooKeeper` ensemble. To do it, we need to perform a rolling restart of the ensemble and -set a few properties. Please refer to the ZooKeeper documentation for more details. - -1. [Apache ZooKeeper Documentation](http://zookeeper.apache.org/doc/r3.4.6/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) -2. [Apache ZooKeeper Wiki](https://cwiki.apache.org/confluence/display/ZOOKEEPER/Zookeeper+and+SASL) diff --git a/site/docs/4.5.1/admin/autorecovery.md b/site/docs/4.5.1/admin/autorecovery.md deleted file mode 100644 index bd11a8886d3..00000000000 --- a/site/docs/4.5.1/admin/autorecovery.md +++ /dev/null @@ -1,128 +0,0 @@ ---- -title: Using AutoRecovery ---- - -When a {% pop bookie %} crashes, all {% pop ledgers %} on that bookie become under-replicated. In order to bring all ledgers in your BookKeeper cluster back to full replication, you'll need to *recover* the data from any offline bookies. There are two ways to recover bookies' data: - -1. Using [manual recovery](#manual-recovery) -1. Automatically, using [*AutoRecovery*](#autorecovery) - -## Manual recovery - -You can manually recover failed bookies using the [`bookkeeper`](../../reference/cli) command-line tool. You need to specify: - -* that the `org.apache.bookkeeper.tools.BookKeeperTools` class needs to be run -* an IP and port for your BookKeeper cluster's ZooKeeper ensemble -* the IP and port for the failed bookie - -Here's an example: - -```bash -$ bookkeeper-server/bin/bookkeeper org.apache.bookkeeper.tools.BookKeeperTools \ - zk1.example.com:2181 \ # IP and port for ZooKeeper - 192.168.1.10:3181 # IP and port for the failed bookie -``` - -If you wish, you can also specify which bookie you'd like to rereplicate to. Here's an example: - -```bash -$ bookkeeper-server/bin/bookkeeper org.apache.bookkeeper.tools.BookKeeperTools \ - zk1.example.com:2181 \ # IP and port for ZooKeeper - 192.168.1.10:3181 \ # IP and port for the failed bookie - 192.168.1.11:3181 # IP and port for the bookie to rereplicate to -``` - -### The manual recovery process - -When you initiate a manual recovery process, the following happens: - -1. The client (the process running ) reads the metadata of active ledgers from ZooKeeper. -1. The ledgers that contain fragments from the failed bookie in their ensemble are selected. -1. A recovery process is initiated for each ledger in this list and the rereplication process is run for each ledger. -1. Once all the ledgers are marked as fully replicated, bookie recovery is finished. - -## AutoRecovery - -AutoRecovery is a process that: - -* automatically detects when a {% pop bookie %} in your BookKeeper cluster has become unavailable and then -* rereplicates all the {% pop ledgers %} that were stored on that bookie. - -AutoRecovery can be run in two ways: - -1. On dedicated nodes in your BookKeeper cluster -1. On the same machines on which your bookies are running - -## Running AutoRecovery - -You can start up AutoRecovery using the [`autorecovery`](../../reference/cli#bookkeeper-autorecovery) command of the [`bookkeeper`](../../reference/cli) CLI tool. - -```bash -$ bookkeeper-server/bin/bookkeeper autorecovery -``` - -> The most important thing to ensure when starting up AutoRecovery is that the ZooKeeper connection string specified by the [`zkServers`](../../reference/config#zkServers) parameter points to the right ZooKeeper cluster. - -If you start up AutoRecovery on a machine that is already running a bookie, then the AutoRecovery process will run alongside the bookie on a separate thread. - -You can also start up AutoRecovery on a fresh machine if you'd like to create a dedicated cluster of AutoRecovery nodes. - -## Configuration - -There are a handful of AutoRecovery-related configs in the [`bk_server.conf`](../../reference/config) configuration file. For a listing of those configs, see [AutoRecovery settings](../../reference/config#autorecovery-settings). - -## Disable AutoRecovery - -You can disable AutoRecovery at any time, for example during maintenance. Disabling AutoRecovery ensures that bookies' data isn't unnecessarily rereplicated when the bookie is only taken down for a short period of time, for example when the bookie is being updated or the configuration if being changed. - -You can disable AutoRecover using the [`bookkeeper`](../../reference/cli#bookkeeper-shell-autorecovery) CLI tool: - -```bash -$ bookkeeper-server/bin/bookkeeper shell autorecovery -disable -``` - -Once disabled, you can reenable AutoRecovery using the [`enable`](../../reference/cli#bookkeeper-shell-autorecovery) shell command: - -```bash -$ bookkeeper-server/bin/bookkeeper shell autorecovery -enable -``` - -## AutoRecovery architecture - -AutoRecovery has two components: - -1. The [**auditor**](#auditor) (see the [`Auditor`](../../api/javadoc/org/apache/bookkeeper/replication/Auditor.html) class) is a singleton node that watches bookies to see if they fail and creates rereplication tasks for the ledgers on failed bookies. -1. The [**replication worker**](#replication-worker) (see the [`ReplicationWorker`](../../api/javadoc/org/apache/bookkeeper/replication/ReplicationWorker.html) class) runs on each bookie and executes rereplication tasks provided by the auditor. - -Both of these components run as threads in the [`AutoRecoveryMain`](../../api/javadoc/org/apache/bookkeeper/replication/AutoRecoveryMain) process, which runs on each bookie in the cluster. All recovery nodes participate in leader election---using ZooKeeper---to decide which node becomes the auditor. Nodes that fail to become the auditor watch the elected auditor and run an election process again if they see that the auditor node has failed. - -### Auditor - -The auditor watches all bookies in the cluster that are registered with ZooKeeper. Bookies register with ZooKeeper at startup. If the bookie crashes or is killed, the bookie's registration in ZooKeeper disappears and the auditor is notified of the change in the list of registered bookies. - -When the auditor sees that a bookie has disappeared, it immediately scans the complete {% pop ledger %} list to find ledgers that have data stored on the failed bookie. Once it has a list of ledgers for that bookie, the auditor will publish a rereplication task for each ledger under the `/underreplicated/` [znode](https://zookeeper.apache.org/doc/current/zookeeperOver.html) in ZooKeeper. - -### Replication Worker - -Each replication worker watches for tasks being published by the auditor on the `/underreplicated/` znode in ZooKeeper. When a new task appears, the replication worker will try to get a lock on it. If it cannot acquire the lock, it will try the next entry. The locks are implemented using ZooKeeper ephemeral znodes. - -The replication worker will scan through the rereplication task's ledger for fragments of which its local bookie is not a member. When it finds fragments matching this criterion, it will replicate the entries of that fragment to the local bookie. If, after this process, the ledger is fully replicated, the ledgers entry under /underreplicated/ is deleted, and the lock is released. If there is a problem replicating, or there are still fragments in the ledger which are still underreplicated (due to the local bookie already being part of the ensemble for the fragment), then the lock is simply released. - -If the replication worker finds a fragment which needs rereplication, but does not have a defined endpoint (i.e. the final fragment of a ledger currently being written to), it will wait for a grace period before attempting rereplication. If the fragment needing rereplication still does not have a defined endpoint, the ledger is fenced and rereplication then takes place. - -This avoids the situation in which a client is writing to a ledger and one of the bookies goes down, but the client has not written an entry to that bookie before rereplication takes place. The client could continue writing to the old fragment, even though the ensemble for the fragment had changed. This could lead to data loss. Fencing prevents this scenario from happening. In the normal case, the client will try to write to the failed bookie within the grace period, and will have started a new fragment before rereplication starts. - -You can configure this grace period using the [`openLedgerRereplicationGracePeriod`](../../reference/config#openLedgerRereplicationGracePeriod) parameter. - -### The rereplication process - -The ledger rereplication process happens in these steps: - -1. The client goes through all ledger fragments in the ledger, selecting those that contain the failed bookie. -1. A recovery process is initiated for each ledger fragment in this list. - 1. The client selects a bookie to which all entries in the ledger fragment will be replicated; In the case of autorecovery, this will always be the local bookie. - 1. The client reads entries that belong to the ledger fragment from other bookies in the ensemble and writes them to the selected bookie. - 1. Once all entries have been replicated, the zookeeper metadata for the fragment is updated to reflect the new ensemble. - 1. The fragment is marked as fully replicated in the recovery tool. -1. Once all ledger fragments are marked as fully replicated, the ledger is marked as fully replicated. - diff --git a/site/docs/4.5.1/admin/bookies.md b/site/docs/4.5.1/admin/bookies.md deleted file mode 100644 index f9b1dcf94dc..00000000000 --- a/site/docs/4.5.1/admin/bookies.md +++ /dev/null @@ -1,180 +0,0 @@ ---- -title: BookKeeper administration -subtitle: A guide to deploying and administering BookKeeper ---- - -This document is a guide to deploying, administering, and maintaining BookKeeper. It also discusses [best practices](#best-practices) and [common problems](#common-problems). - -## Requirements - -A typical BookKeeper installation consists of an ensemble of {% pop bookies %} and a ZooKeeper quorum. The exact number of bookies depends on the quorum mode that you choose, desired throughput, and the number of clients using the installation simultaneously. - -The minimum number of bookies depends on the type of installation: - -* For *self-verifying* entries you should run at least three bookies. In this mode, clients store a message authentication code along with each {% pop entry %}. -* For *generic* entries you should run at least four - -There is no upper limit on the number of bookies that you can run in a single ensemble. - -### Performance - -To achieve optimal performance, BookKeeper requires each server to have at least two disks. It's possible to run a bookie with a single disk but performance will be significantly degraded. - -### ZooKeeper - -There is no constraint on the number of ZooKeeper nodes you can run with BookKeeper. A single machine running ZooKeeper in [standalone mode](https://zookeeper.apache.org/doc/current/zookeeperStarted.html#sc_InstallingSingleMode) is sufficient for BookKeeper, although for the sake of higher resilience we recommend running ZooKeeper in [quorum mode](https://zookeeper.apache.org/doc/current/zookeeperStarted.html#sc_RunningReplicatedZooKeeper) with multiple servers. - -## Starting and stopping bookies - -You can run bookies either in the foreground or in the background, using [nohup](https://en.wikipedia.org/wiki/Nohup). You can also run [local bookies](#local-bookie) for development purposes. - -To start a bookie in the foreground, use the [`bookie`](../../reference/cli#bookkeeper-bookie) command of the [`bookkeeper`](../../reference/cli#bookkeeper) CLI tool: - -```shell -$ bookkeeper-server/bin/bookkeeper bookie -``` - -To start a bookie in the background, use the [`bookkeeper-daemon.sh`](../../reference/cli#bookkeeper-daemon.sh) script and run `start bookie`: - -```shell -$ bookkeeper-server/bin/bookkeeper-daemon.sh start bookie -``` - -### Local bookies - -The instructions above showed you how to run bookies intended for production use. If you'd like to experiment with ensembles of bookies locally, you can use the [`localbookie`](../../reference/cli#bookkeeper-localbookie) command of the `bookkeeper` CLI tool and specify the number of bookies you'd like to run. - -This would spin up a local ensemble of 6 bookies: - -```shell -$ bookkeeper-server/bin/bookkeeper localbookie 6 -``` - -> When you run a local bookie ensemble, all bookies run in a single JVM process. - -## Configuring bookies - -There's a wide variety of parameters that you can set in the bookie configuration file in `bookkeeper-server/conf/bk_server.conf` of your [BookKeeper installation](../../reference/config). A full listing can be found in [Bookie configuration](../../reference/config). - -Some of the more important parameters to be aware of: - -Parameter | Description | Default -:---------|:------------|:------- -`bookiePort` | The TCP port that the bookie listens on | `3181` -`zkServers` | A comma-separated list of ZooKeeper servers in `hostname:port` format | `localhost:2181` -`journalDirectory` | The directory where the [log device](../../getting-started/concepts#log-device) stores the bookie's write-ahead log (WAL) | `/tmp/bk-txn` -`ledgerDirectories` | The directories where the [ledger device](../../getting-started/concepts#ledger-device) stores the bookie's ledger entries (as a comma-separated list) | `/tmp/bk-data` - -> Ideally, the directories specified `journalDirectory` and `ledgerDirectories` should be on difference devices. - -## Logging - -BookKeeper uses [slf4j](http://www.slf4j.org/) for logging, with [log4j](https://logging.apache.org/log4j/2.x/) bindings enabled by default. - -To enable logging for a bookie, create a `log4j.properties` file and point the `BOOKIE_LOG_CONF` environment variable to the configuration file. Here's an example: - -```shell -$ export BOOKIE_LOG_CONF=/some/path/log4j.properties -$ bookkeeper-server/bin/bookkeeper bookie -``` - -## Upgrading - -From time to time you may need to make changes to the filesystem layout of bookies---changes that are incompatible with previous versions of BookKeeper and require that directories used with previous versions are upgraded. If a filesystem upgrade is required when updating BookKeeper, the bookie will fail to start and return an error like this: - -``` -2017-05-25 10:41:50,494 - ERROR - [main:Bookie@246] - Directory layout version is less than 3, upgrade needed -``` - -BookKeeper provides a utility for upgrading the filesystem. You can perform an upgrade using the [`upgrade`](../../reference/cli#bookkeeper-upgrade) command of the `bookkeeper` CLI tool. When running `bookkeeper upgrade` you need to specify one of three flags: - -Flag | Action -:----|:------ -`--upgrade` | Performs an upgrade -`--rollback` | Performs a rollback to the initial filesystem version -`--finalize` | Marks the upgrade as complete - -### Upgrade pattern - -A standard upgrade pattern is to run an upgrade... - -```shell -$ bookkeeper-server/bin/bookkeeper upgrade --upgrade -``` - -...then check that everything is working normally, then kill the bookie. If everything is okay, finalize the upgrade... - -```shell -$ bookkeeper-server/bin/bookkeeper upgrade --finalize -``` - -...and then restart the server: - -```shell -$ bookkeeper-server/bin/bookkeeper bookie -``` - -If something has gone wrong, you can always perform a rollback: - -```shell -$ bookkeeper-server/bin/bookkeeper upgrade --rollback -``` - -## Formatting - -You can format bookie metadata in ZooKeeper using the [`metaformat`](../../reference/cli#bookkeeper-shell-metaformat) command of the [BookKeeper shell](../../reference/cli#the-bookkeeper-shell). - -By default, formatting is done in interactive mode, which prompts you to confirm the format operation if old data exists. You can disable confirmation using the `-nonInteractive` flag. If old data does exist, the format operation will abort *unless* you set the `-force` flag. Here's an example: - -```shell -$ bookkeeper-server/bin/bookkeeper shell metaformat -``` - -You can format the local filesystem data on a bookie using the [`bookieformat`](../../reference/cli#bookkeeper-shell-bookieformat) command on each bookie. Here's an example: - -```shell -$ bookkeeper-server/bin/bookkeeper shell bookieformat -``` - -> The `-force` and `-nonInteractive` flags are also available for the `bookieformat` command. - -## AutoRecovery - -For a guide to AutoRecovery in BookKeeper, see [this doc](../autorecovery). - -## Missing disks or directories - -Accidentally replacing disks or removing directories can cause a bookie to fail while trying to read a ledger fragment that, according to the ledger metadata, exists on the bookie. For this reason, when a bookie is started for the first time, its disk configuration is fixed for the lifetime of that bookie. Any change to its disk configuration, such as a crashed disk or an accidental configuration change, will result in the bookie being unable to start. That will throw an error like this: - -``` -2017-05-29 18:19:13,790 - ERROR - [main:BookieServer314] – Exception running bookie server : @ -org.apache.bookkeeper.bookie.BookieException$InvalidCookieException -.......at org.apache.bookkeeper.bookie.Cookie.verify(Cookie.java:82) -.......at org.apache.bookkeeper.bookie.Bookie.checkEnvironment(Bookie.java:275) -.......at org.apache.bookkeeper.bookie.Bookie.(Bookie.java:351) -``` - -If the change was the result of an accidental configuration change, the change can be reverted and the bookie can be restarted. However, if the change *cannot* be reverted, such as is the case when you want to add a new disk or replace a disk, the bookie must be wiped and then all its data re-replicated onto it. - -1. Increment the [`bookiePort`](../../reference/config#bookiePort) parameter in the [`bk_server.conf`](../../reference/config) -1. Ensure that all directories specified by [`journalDirectory`](../../reference/config#journalDirectory) and [`ledgerDirectories`](../../reference/config#ledgerDirectories) are empty. -1. [Start the bookie](#starting-and-stopping-bookies). -1. Run the following command to re-replicate the data: - - ```bash - $ bin/bookkeeper org.apache.bookkeeper.tools.BookKeeperTools \ - \ - \ - - ``` - - The ZooKeeper server, old bookie, and new bookie, are all identified by their external IP and `bookiePort` (3181 by default). Here's an example: - - ```bash - $ bin/bookkeeper org.apache.bookkeeper.tools.BookKeeperTools \ - zk1.example.com \ - 192.168.1.10:3181 \ - 192.168.1.10:3181 - ``` - - See the [AutoRecovery](../autorecovery) documentation for more info on the re-replication process. diff --git a/site/docs/4.5.1/admin/geo-replication.md b/site/docs/4.5.1/admin/geo-replication.md deleted file mode 100644 index 38b972345ef..00000000000 --- a/site/docs/4.5.1/admin/geo-replication.md +++ /dev/null @@ -1,22 +0,0 @@ ---- -title: Geo-replication -subtitle: Replicate data across BookKeeper clusters ---- - -*Geo-replication* is the replication of data across BookKeeper clusters. In order to enable geo-replication for a group of BookKeeper clusters, - -## Global ZooKeeper - -Setting up a global ZooKeeper quorum is a lot like setting up a cluster-specific quorum. The crucial difference is that - -### Geo-replication across three clusters - -Let's say that you want to set up geo-replication across clusters in regions A, B, and C. First, the BookKeeper clusters in each region must have their own local (cluster-specific) ZooKeeper quorum. - -> BookKeeper clusters use global ZooKeeper only for metadata storage. Traffic from bookies to ZooKeeper should thus be fairly light in general. - -The crucial difference between using cluster-specific ZooKeeper and global ZooKeeper is that {% pop bookies %} is that you need to point all bookies to use the global ZooKeeper setup. - -## Region-aware placement polocy - -## Autorecovery diff --git a/site/docs/4.5.1/admin/metrics.md b/site/docs/4.5.1/admin/metrics.md deleted file mode 100644 index 635135faf7e..00000000000 --- a/site/docs/4.5.1/admin/metrics.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: Metric collection ---- - -BookKeeper enables metrics collection through a variety of [stats providers](#stats-providers). - -> For a full listing of available metrics, see the [Metrics](../../reference/metrics) reference doc. - -## Stats providers - -BookKeeper has stats provider implementations for four five sinks: - -Provider | Provider class name -:--------|:------------------- -[Codahale Metrics](https://mvnrepository.com/artifact/org.apache.bookkeeper.stats/codahale-metrics-provider) | `org.apache.bookkeeper.stats.CodahaleMetricsProvider` -[Prometheus](https://prometheus.io/) | `org.apache.bookkeeper.stats.PrometheusMetricsProvider` -[Finagle](https://twitter.github.io/finagle/guide/Metrics.html) | `org.apache.bookkeeper.stats.FinagleStatsProvider` -[Ostrich](https://github.com/twitter/ostrich) | `org.apache.bookkeeper.stats.OstrichProvider` -[Twitter Science Provider](https://mvnrepository.com/artifact/org.apache.bookkeeper.stats/twitter-science-provider) | `org.apache.bookkeeper.stats.TwitterStatsProvider` - -> The [Codahale Metrics]({{ site.github_master }}/bookkeeper-stats-providers/codahale-metrics-provider) stats provider is the default provider. - -## Enabling stats providers in bookies - -There are two stats-related [configuration parameters](../../reference/config#statistics) available for bookies: - -Parameter | Description | Default -:---------|:------------|:------- -`enableStatistics` | Whether statistics are enabled for the bookie | `false` -`statsProviderClass` | The stats provider class used by the bookie | `org.apache.bookkeeper.stats.CodahaleMetricsProvider` - - -To enable stats: - -* set the `enableStatistics` parameter to `true` -* set `statsProviderClass` to the desired provider (see the [table above](#stats-providers) for a listing of classes) - - diff --git a/site/docs/4.5.1/admin/perf.md b/site/docs/4.5.1/admin/perf.md deleted file mode 100644 index 82956326e5d..00000000000 --- a/site/docs/4.5.1/admin/perf.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: Performance tuning ---- diff --git a/site/docs/4.5.1/admin/placement.md b/site/docs/4.5.1/admin/placement.md deleted file mode 100644 index ded456e1aea..00000000000 --- a/site/docs/4.5.1/admin/placement.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: Customized placement policies ---- diff --git a/site/docs/4.5.1/admin/upgrade.md b/site/docs/4.5.1/admin/upgrade.md deleted file mode 100644 index 456df99a276..00000000000 --- a/site/docs/4.5.1/admin/upgrade.md +++ /dev/null @@ -1,73 +0,0 @@ ---- -title: Upgrade ---- - -> If you have questions about upgrades (or need help), please feel free to reach out to us by [mailing list]({{ site.baseurl }}community/mailing-lists) or [Slack Channel]({{ site.baseurl }}community/slack). - -## Overview - -Consider the below guidelines in preparation for upgrading. - -- Always back up all your configuration files before upgrading. -- Read through the documentation and draft an upgrade plan that matches your specific requirements and environment before starting the upgrade process. - Put differently, don't start working through the guide on a live cluster. Read guide entirely, make a plan, then execute the plan. -- Pay careful consideration to the order in which components are upgraded. In general, you need to upgrade bookies first and then upgrade your clients. -- If autorecovery is running along with bookies, you need to pay attention to the upgrade sequence. -- Read the release notes carefully for each release. They contain not only information about noteworthy features, but also changes to configurations - that may impact your upgrade. -- Always upgrade one or a small set of bookies to canary new version before upgraing all bookies in your cluster. - -## Canary - -It is wise to canary an upgraded version in one or small set of bookies before upgrading all bookies in your live cluster. - -You can follow below steps on how to canary a upgraded version: - -1. Stop a Bookie. -2. Upgrade the binary and configuration. -3. Start the Bookie in `ReadOnly` mode. This can be used to verify if the Bookie of this new version can run well for read workload. -4. Once the Bookie is running at `ReadOnly` mode successfully for a while, restart the Bookie in `Write/Read` mode. -5. After step 4, the Bookie will serve both write and read traffic. - -### Rollback Canaries - -If problems occur during canarying an upgraded version, you can simply take down the problematic Bookie node. The remain bookies in the old cluster -will repair this problematic bookie node by autorecovery. Nothing needs to be worried about. - -## Upgrade Steps - -Once you determined a version is safe to upgrade in a few nodes in your cluster, you can perform following steps to upgrade all bookies in your cluster. - -1. Determine if autorecovery is running along with bookies. If yes, check if the clients (either new clients with new binary or old clients with new configurations) -are allowed to talk to old bookies; if clients are not allowed to talk to old bookies, please [disable autorecovery](../../reference/cli/#autorecovery-1) during upgrade. -2. Decide on performing a rolling upgrade or a downtime upgrade. -3. Upgrade all Bookies (more below) -4. If autorecovery was disabled during upgrade, [enable autorecovery](../../reference/cli/#autorecovery-1). -5. After all bookies are upgraded, build applications that use `BookKeeper client` against the new bookkeeper libraries and deploy the new versions. - -### Upgrade Bookies - -In a rolling upgrade scenario, upgrade one Bookie at a time. In a downtime upgrade scenario, take the entire cluster down, upgrade each Bookie, then start the cluster. - -For each Bookie: - -1. Stop the bookie. -2. Upgrade the software (either new binary or new configuration) -2. Start the bookie. - -## Upgrade Guides - -We describes the general upgrade method in Apache BookKeeper as above. We will cover the details for individual versions. - -### 4.4.x to 4.5.x upgrade - -There isn't any protocol related backward compabilities changes in 4.5.0. So you can follow the general upgrade sequence to upgrade from 4.4.x to 4.5.x. -However, we list a list of things that you might want to know. - -1. 4.5.x upgrades netty from 3.x to 4.x. The memory usage pattern might be changed a bit. Netty 4 uses more direct memory. Please pay attention to your memory usage - and adjust the JVM settings accordingly. -2. `multi journals` is a non-rollbackable feature. If you configure a bookie to use multiple journals on 4.5.x you can not roll the bookie back to use 4.4.x. You have - to take a bookie out and recover it if you want to rollback to 4.4.x. - -If you are planning to upgrade a non-secured cluster to a secured cluster enabling security features in 4.5.0, please read [BookKeeper Security](../../security/overview) for more details. - diff --git a/site/docs/4.5.1/api/distributedlog-api.md b/site/docs/4.5.1/api/distributedlog-api.md deleted file mode 100644 index 9064a5b94e3..00000000000 --- a/site/docs/4.5.1/api/distributedlog-api.md +++ /dev/null @@ -1,395 +0,0 @@ ---- -title: DistributedLog -subtitle: A higher-level API for managing BookKeeper entries ---- - -> DistributedLog began its life as a separate project under the Apache Foundation. It was merged into BookKeeper in 2017. - -The DistributedLog API is an easy-to-use interface for managing BookKeeper entries that enables you to use BookKeeper without needing to interact with [ledgers](../ledger-api) directly. - -DistributedLog (DL) maintains sequences of records in categories called *logs* (aka *log streams*). *Writers* append records to DL logs, while *readers* fetch and process those records. - -## Architecture - -The diagram below illustrates how the DistributedLog API works with BookKeeper: - -![DistributedLog API]({{ site.baseurl }}img/distributedlog.png) - -## Logs - -A *log* in DistributedLog is an ordered, immutable sequence of *log records*. - -The diagram below illustrates the anatomy of a log stream: - -![DistributedLog log]({{ site.baseurl }}img/logs.png) - -### Log records - -Each log record is a sequence of bytes. Applications are responsible for serializing and deserializing byte sequences stored in log records. - -Log records are written sequentially into a *log stream* and assigned with a a unique sequence number called a DLSN (DistributedLog Sequence Number). - -In addition to a DLSN, applications can assign their own sequence number when constructing log records. Application-defined sequence numbers are known as *TransactionIDs* (or *txid*). Either a DLSN or a TransactionID can be used for positioning readers to start reading from a specific log record. - -### Log segments - -Each log is broken down into *log segments* that contain subsets of records. Log segments are distributed and stored in BookKeeper. DistributedLog rolls the log segments based on the configured *rolling policy*, which be either - -* a configurable period of time (such as every 2 hours), or -* a configurable maximum size (such as every 128 MB). - -The data in logs is divided up into equally sized log segments and distributed evenly across {% pop bookies %}. This allows logs to scale beyond a size that would fit on a single server and spreads read traffic across the cluster. - -### Namespaces - -Log streams that belong to the same organization are typically categorized and managed under a *namespace*. DistributedLog namespaces essentially enable applications to locate log streams. Applications can perform the following actions under a namespace: - -* create streams -* delete streams -* truncate streams to a given sequence number (either a DLSN or a TransactionID) - -## Writers - -Through the DistributedLog API, writers write data into logs of their choice. All records are appended into logs in order. The sequencing is performed by the writer, which means that there is only one active writer for a log at any given time. - -DistributedLog guarantees correctness when two writers attempt to write to the same log when a network partition occurs using a *fencing* mechanism in the log segment store. - -### Write Proxy - -Log writers are served and managed in a service tier called the *Write Proxy* (see the diagram [above](#architecture)). The Write Proxy is used for accepting writes from a large number of clients. - -## Readers - -DistributedLog readers read records from logs of their choice, starting with a provided position. The provided position can be either a DLSN or a TransactionID. - -Readers read records from logs in strict order. Different readers can read records from different positions in the same log. - -Unlike other pub-sub systems, DistributedLog doesn't record or manage readers' positions. This means that tracking is the responsibility of applications, as different applications may have different requirements for tracking and coordinating positions. This is hard to get right with a single approach. Distributed databases, for example, might store reader positions along with SSTables, so they would resume applying transactions from the positions store in SSTables. Tracking reader positions could easily be done at the application level using various stores (such as ZooKeeper, the filesystem, or key-value stores). - -### Read Proxy - -Log records can be cached in a service tier called the *Read Proxy* to serve a large number of readers. See the diagram [above](#architecture). The Read Proxy is the analogue of the [Write Proxy](#write-proxy). - -## Guarantees - -The DistributedLog API for BookKeeper provides a number of guarantees for applications: - -* Records written by a [writer](#writers) to a [log](#logs) are appended in the order in which they are written. If a record **R1** is written by the same writer as a record **R2**, **R1** will have a smaller sequence number than **R2**. -* [Readers](#readers) see [records](#log-records) in the same order in which they are [written](#writers) to the log. -* All records are persisted on disk by BookKeeper before acknowledgements, which guarantees durability. -* For a log with a replication factor of N, DistributedLog tolerates up to N-1 server failures without losing any records. - -## API - -Documentation for the DistributedLog API can be found [here](https://bookkeeper.apache.org/distributedlog/docs/latest/user_guide/api/core). - -> At a later date, the DistributedLog API docs will be added here. - - diff --git a/site/docs/4.5.1/api/ledger-adv-api.md b/site/docs/4.5.1/api/ledger-adv-api.md deleted file mode 100644 index f46950dd984..00000000000 --- a/site/docs/4.5.1/api/ledger-adv-api.md +++ /dev/null @@ -1,82 +0,0 @@ ---- -title: The Advanced Ledger API ---- - -In release `4.5.0`, Apache BookKeeper introduces a few advanced API for advanced usage. -This sections covers these advanced APIs. - -> Before learn the advanced API, please read [Ledger API](../ledger-api) first. - -## LedgerHandleAdv - -[`LedgerHandleAdv`](../javadoc/org/apache/bookkeeper/client/LedgerHandleAdv) is an advanced extension of [`LedgerHandle`](../javadoc/org/apache/bookkeeper/client/LedgerHandle). -It allows user passing in an `entryId` when adding an entry. - -### Creating advanced ledgers - -Here's an exmaple: - -```java -byte[] passwd = "some-passwd".getBytes(); -LedgerHandleAdv handle = bkClient.createLedgerAdv( - 3, 3, 2, // replica settings - DigestType.CRC32, - passwd); -``` - -You can also create advanced ledgers asynchronously. - -```java -class LedgerCreationCallback implements AsyncCallback.CreateCallback { - public void createComplete(int returnCode, LedgerHandle handle, Object ctx) { - System.out.println("Ledger successfully created"); - } -} -client.asyncCreateLedgerAdv( - 3, // ensemble size - 3, // write quorum size - 2, // ack quorum size - BookKeeper.DigestType.CRC32, - password, - new LedgerCreationCallback(), - "some context" -); -``` - -Besides the APIs above, BookKeeper allows users providing `ledger-id` when creating advanced ledgers. - -```java -long ledgerId = ...; // the ledger id is generated externally. - -byte[] passwd = "some-passwd".getBytes(); -LedgerHandleAdv handle = bkClient.createLedgerAdv( - ledgerId, // ledger id generated externally - 3, 3, 2, // replica settings - DigestType.CRC32, - passwd); -``` - -> Please note, it is users' responsibility to provide a unique ledger id when using the API above. -> If a ledger already exists when users try to create an advanced ledger with same ledger id, -> a [LedgerExistsException](../javadoc/org/apache/bookkeeper/client/BKException.BKLedgerExistException.html) is thrown by the bookkeeper client. - -### Add Entries - -The normal [add entries api](ledger-api/#adding-entries-to-ledgers) in advanced ledgers are disabled. Instead, when users want to add entries -to advanced ledgers, an entry id is required to pass in along with the entry data when adding an entry. - -```java -long entryId = ...; // entry id generated externally - -ledger.addEntry(entryId, "Some entry data".getBytes()); -``` - -A few notes when using this API: - -- The entry id has to be non-negative. -- Clients are okay to add entries out of order. -- However, the entries are only acknowledged in a monotonic order starting from 0. - -### Read Entries - -The read entries api in advanced ledgers remain same as [normal ledgers](../ledger-api/#reading-entries-from-ledgers). diff --git a/site/docs/4.5.1/api/ledger-api.md b/site/docs/4.5.1/api/ledger-api.md deleted file mode 100644 index 031389c0fb0..00000000000 --- a/site/docs/4.5.1/api/ledger-api.md +++ /dev/null @@ -1,473 +0,0 @@ ---- -title: The Ledger API ---- - -The ledger API is a lower-level API for BookKeeper that enables you to interact with {% pop ledgers %} directly. - -## The Java ledger API client - -To get started with the Java client for BookKeeper, install the `bookkeeper-server` library as a dependency in your Java application. - -> For a more in-depth tutorial that involves a real use case for BookKeeper, see the [Example application](../example-application) guide. - -## Installation - -The BookKeeper Java client library is available via [Maven Central](http://search.maven.org/) and can be installed using [Maven](#maven), [Gradle](#gradle), and other build tools. - -### Maven - -If you're using [Maven](https://maven.apache.org/), add this to your [`pom.xml`](https://maven.apache.org/guides/introduction/introduction-to-the-pom.html) build configuration file: - -```xml - -4.5.1 - - - - org.apache.bookkeeper - bookkeeper-server - ${bookkeeper.version} - -``` - -### Gradle - -If you're using [Gradle](https://gradle.org/), add this to your [`build.gradle`](https://spring.io/guides/gs/gradle/) build configuration file: - -```groovy -dependencies { - compile group: 'org.apache.bookkeeper', name: 'bookkeeper-server', version: '4.5.0' -} - -// Alternatively: -dependencies { - compile 'org.apache.bookkeeper:bookkeeper-server:4.5.0' -} -``` - -## Connection string - -When interacting with BookKeeper using the Java client, you need to provide your client with a connection string, for which you have three options: - -* Provide your entire ZooKeeper connection string, for example `zk1:2181,zk2:2181,zk3:2181`. -* Provide a host and port for one node in your ZooKeeper cluster, for example `zk1:2181`. In general, it's better to provide a full connection string (in case the ZooKeeper node you attempt to connect to is down). -* If your ZooKeeper cluster can be discovered via DNS, you can provide the DNS name, for example `my-zookeeper-cluster.com`. - -## Creating a new client - -In order to create a new [`BookKeeper`](../javadoc/org/apache/bookkeeper/client/BookKeeper) client object, you need to pass in a [connection string](#connection-string). Here is an example client object using a ZooKeeper connection string: - -```java -try { - String connectionString = "127.0.0.1:2181"; // For a single-node, local ZooKeeper cluster - BookKeeper bkClient = new BookKeeper(connectionString); -} catch (InterruptedException | IOException | KeeperException e) { - e.printStackTrace(); -} -``` - -> If you're running BookKeeper [locally](../../getting-started/run-locally), using the [`localbookie`](../../reference/cli#bookkeeper-localbookie) command, use `"127.0.0.1:2181"` for your connection string, as in the example above. - -There are, however, other ways that you can create a client object: - -* By passing in a [`ClientConfiguration`](../javadoc/org/apache/bookkeeper/conf/ClientConfiguration) object. Here's an example: - - ```java - ClientConfiguration config = new ClientConfiguration(); - config.setZkServers(zkConnectionString); - config.setAddEntryTimeout(2000); - BookKeeper bkClient = new BookKeeper(config); - ``` - -* By specifying a `ClientConfiguration` and a [`ZooKeeper`](http://zookeeper.apache.org/doc/current/api/org/apache/zookeeper/ZooKeeper.html) client object: - - ```java - ClientConfiguration config = new ClientConfiguration(); - config.setAddEntryTimeout(5000); - ZooKeeper zkClient = new ZooKeeper(/* client args */); - BookKeeper bkClient = new BookKeeper(config, zkClient); - ``` - -* Using the `forConfig` method: - - ```java - BookKeeper bkClient = BookKeeper.forConfig(conf).build(); - ``` - -## Creating ledgers - -The easiest way to create a {% pop ledger %} using the Java client is via the `createLedger` method, which creates a new ledger synchronously and returns a [`LedgerHandle`](../javadoc/org/apache/bookkeeper/client/LedgerHandle). You must specify at least a [`DigestType`](../javadoc/org/apache/bookkeeper/client/BookKeeper.DigestType) and a password. - -Here's an example: - -```java -byte[] password = "some-password".getBytes(); -LedgerHandle handle = bkClient.createLedger(BookKeeper.DigestType.MAC, password); -``` - -You can also create ledgers asynchronously - -### Create ledgers asynchronously - -```java -class LedgerCreationCallback implements AsyncCallback.CreateCallback { - public void createComplete(int returnCode, LedgerHandle handle, Object ctx) { - System.out.println("Ledger successfully created"); - } -} - -client.asyncCreateLedger( - 3, - 2, - BookKeeper.DigestType.MAC, - password, - new LedgerCreationCallback(), - "some context" -); -``` - -## Adding entries to ledgers - -```java -long entryId = ledger.addEntry("Some entry data".getBytes()); -``` - -### Add entries asynchronously - -## Reading entries from ledgers - -```java -Enumerator entries = handle.readEntries(1, 99); -``` - -To read all possible entries from the ledger: - -```java -Enumerator entries = - handle.readEntries(0, handle.getLastAddConfirmed()); - -while (entries.hasNextElement()) { - LedgerEntry entry = entries.nextElement(); - System.out.println("Successfully read entry " + entry.getId()); -} -``` - -### Reading entries after the LastAddConfirmed range - -`readUnconfirmedEntries` allowing to read after the LastAddConfirmed range. -It lets the client read without checking the local value of LastAddConfirmed, so that it is possible to read entries for which the writer has not received the acknowledge yet -For entries which are within the range 0..LastAddConfirmed BookKeeper guarantees that the writer has successfully received the acknowledge. -For entries outside that range it is possible that the writer never received the acknowledge and so there is the risk that the reader is seeing entries before the writer and this could result in a consistency issue in some cases. -With this method you can even read entries before the LastAddConfirmed and entries after it with one call, the expected consistency will be as described above. - -```java -Enumerator entries = - handle.readUnconfirmedEntries(0, lastEntryIdExpectedToRead); - -while (entries.hasNextElement()) { - LedgerEntry entry = entries.nextElement(); - System.out.println("Successfully read entry " + entry.getId()); -} -``` - -## Deleting ledgers - -{% pop Ledgers %} can also be deleted synchronously or asynchronously. - -```java -long ledgerId = 1234; - -try { - bkClient.deleteLedger(ledgerId); -} catch (Exception e) { - e.printStackTrace(); -} -``` - -### Delete entries asynchronously - -Exceptions thrown: - -* - -```java -class DeleteEntryCallback implements AsyncCallback.DeleteCallback { - public void deleteComplete() { - System.out.println("Delete completed"); - } -} -``` - -## Simple example - -> For a more involved BookKeeper client example, see the [example application](#example-application) below. - -In the code sample below, a BookKeeper client: - -* creates a ledger -* writes entries to the ledger -* closes the ledger (meaning no further writes are possible) -* re-opens the ledger for reading -* reads all available entries - -```java -// Create a client object for the local ensemble. This -// operation throws multiple exceptions, so make sure to -// use a try/catch block when instantiating client objects. -BookKeeper bkc = new BookKeeper("localhost:2181"); - -// A password for the new ledger -byte[] ledgerPassword = /* some sequence of bytes, perhaps random */; - -// Create a new ledger and fetch its identifier -LedgerHandle lh = bkc.createLedger(BookKeeper.DigestType.MAC, ledgerPassword); -long ledgerId = lh.getId(); - -// Create a buffer for four-byte entries -ByteBuffer entry = ByteBuffer.allocate(4); - -int numberOfEntries = 100; - -// Add entries to the ledger, then close it -for (int i = 0; i < numberOfEntries; i++){ - entry.putInt(i); - entry.position(0); - lh.addEntry(entry.array()); -} -lh.close(); - -// Open the ledger for reading -lh = bkc.openLedger(ledgerId, BookKeeper.DigestType.MAC, ledgerPassword); - -// Read all available entries -Enumeration entries = lh.readEntries(0, numberOfEntries - 1); - -while(entries.hasMoreElements()) { - ByteBuffer result = ByteBuffer.wrap(ls.nextElement().getEntry()); - Integer retrEntry = result.getInt(); - - // Print the integer stored in each entry - System.out.println(String.format("Result: %s", retrEntry)); -} - -// Close the ledger and the client -lh.close(); -bkc.close(); -``` - -Running this should return this output: - -```shell -Result: 0 -Result: 1 -Result: 2 -# etc -``` - -## Example application - -This tutorial walks you through building an example application that uses BookKeeper as the replicated log. The application uses the [BookKeeper Java client](../java-client) to interact with BookKeeper. - -> The code for this tutorial can be found in [this GitHub repo](https://github.com/ivankelly/bookkeeper-tutorial/). The final code for the `Dice` class can be found [here](https://github.com/ivankelly/bookkeeper-tutorial/blob/master/src/main/java/org/apache/bookkeeper/Dice.java). - -### Setup - -Before you start, you will need to have a BookKeeper cluster running locally on your machine. For installation instructions, see [Installation](../../getting-started/installation). - -To start up a cluster consisting of six {% pop bookies %} locally: - -```shell -$ bookkeeper-server/bin/bookkeeper localbookie 6 -``` - -You can specify a different number of bookies if you'd like. - -### Goal - -The goal of the dice application is to have - -* multiple instances of this application, -* possibly running on different machines, -* all of which display the exact same sequence of numbers. - -In other words, the log needs to be both durable and consistent, regardless of how many {% pop bookies %} are participating in the BookKeeper ensemble. If one of the bookies crashes or becomes unable to communicate with the other bookies in any way, it should *still* display the same sequence of numbers as the others. This tutorial will show you how to achieve this. - -To begin, download the base application, compile and run it. - -```shell -$ git clone https://github.com/ivankelly/bookkeeper-tutorial.git -$ mvn package -$ mvn exec:java -Dexec.mainClass=org.apache.bookkeeper.Dice -``` - -That should yield output that looks something like this: - -``` -[INFO] Scanning for projects... -[INFO] -[INFO] ------------------------------------------------------------------------ -[INFO] Building tutorial 1.0-SNAPSHOT -[INFO] ------------------------------------------------------------------------ -[INFO] -[INFO] --- exec-maven-plugin:1.3.2:java (default-cli) @ tutorial --- -[WARNING] Warning: killAfter is now deprecated. Do you need it ? Please comment on MEXEC-6. -Value = 4 -Value = 5 -Value = 3 -``` - -### The base application - -The application in this tutorial is a dice application. The `Dice` class below has a `playDice` function that generates a random number between 1 and 6 every second, prints the value of the dice roll, and runs indefinitely. - -```java -public class Dice { - Random r = new Random(); - - void playDice() throws InterruptedException { - while (true) { - Thread.sleep(1000); - System.out.println("Value = " + (r.nextInt(6) + 1)); - } - } -} -``` - -When you run the `main` function of this class, a new `Dice` object will be instantiated and then run indefinitely: - -```java -public class Dice { - // other methods - - public static void main(String[] args) throws InterruptedException { - Dice d = new Dice(); - d.playDice(); - } -} -``` - -### Leaders and followers (and a bit of background) - -To achieve this common view in multiple instances of the program, we need each instance to agree on what the next number in the sequence will be. For example, the instances must agree that 4 is the first number and 2 is the second number and 5 is the third number and so on. This is a difficult problem, especially in the case that any instance may go away at any time, and messages between the instances can be lost or reordered. - -Luckily, there are already algorithms to solve this. Paxos is an abstract algorithm to implement this kind of agreement, while Zab and Raft are more practical protocols. This video gives a good overview about how these algorithms usually look. They all have a similar core. - -It would be possible to run the Paxos to agree on each number in the sequence. However, running Paxos each time can be expensive. What Zab and Raft do is that they use a Paxos-like algorithm to elect a leader. The leader then decides what the sequence of events should be, putting them in a log, which the other instances can then follow to maintain the same state as the leader. - -Bookkeeper provides the functionality for the second part of the protocol, allowing a leader to write events to a log and have multiple followers tailing the log. However, bookkeeper does not do leader election. You will need a zookeeper or raft instance for that purpose. - -### Why not just use ZooKeeper? - -There are a number of reasons: - -1. Zookeeper's log is only exposed through a tree like interface. It can be hard to shoehorn your application into this. -2. A zookeeper ensemble of multiple machines is limited to one log. You may want one log per resource, which will become expensive very quickly. -3. Adding extra machines to a zookeeper ensemble does not increase capacity nor throughput. - -Bookkeeper can be seen as a means of exposing ZooKeeper's replicated log to applications in a scalable fashion. ZooKeeper is still used by BookKeeper, however, to maintain consistency guarantees, though clients don't need to interact with ZooKeeper directly. - -### Electing a leader - -We'll use zookeeper to elect a leader. A zookeeper instance will have started locally when you started the localbookie application above. To verify it's running, run the following command. - -```shell -$ echo stat | nc localhost 2181 -Zookeeper version: 3.4.6-1569965, built on 02/20/2014 09:09 GMT -Clients: - /127.0.0.1:59343[1](queued=0,recved=40,sent=41) - /127.0.0.1:49354[1](queued=0,recved=11,sent=11) - /127.0.0.1:49361[0](queued=0,recved=1,sent=0) - /127.0.0.1:59344[1](queued=0,recved=38,sent=39) - /127.0.0.1:59345[1](queued=0,recved=38,sent=39) - /127.0.0.1:59346[1](queued=0,recved=38,sent=39) - -Latency min/avg/max: 0/0/23 -Received: 167 -Sent: 170 -Connections: 6 -Outstanding: 0 -Zxid: 0x11 -Mode: standalone -Node count: 16 -``` - -To interact with zookeeper, we'll use the Curator client rather than the stock zookeeper client. Getting things right with the zookeeper client can be tricky, and curator removes a lot of the pointy corners for you. In fact, curator even provides a leader election recipe, so we need to do very little work to get leader election in our application. - -```java -public class Dice extends LeaderSelectorListenerAdapter implements Closeable { - - final static String ZOOKEEPER_SERVER = "127.0.0.1:2181"; - final static String ELECTION_PATH = "/dice-elect"; - - ... - - Dice() throws InterruptedException { - curator = CuratorFrameworkFactory.newClient(ZOOKEEPER_SERVER, - 2000, 10000, new ExponentialBackoffRetry(1000, 3)); - curator.start(); - curator.blockUntilConnected(); - - leaderSelector = new LeaderSelector(curator, ELECTION_PATH, this); - leaderSelector.autoRequeue(); - leaderSelector.start(); - } -``` - -In the constructor for Dice, we need to create the curator client. We specify four things when creating the client, the location of the zookeeper service, the session timeout, the connect timeout and the retry policy. - -The session timeout is a zookeeper concept. If the zookeeper server doesn't hear anything from the client for this amount of time, any leases which the client holds will be timed out. This is important in leader election. For leader election, the curator client will take a lease on ELECTION_PATH. The first instance to take the lease will become leader and the rest will become followers. However, their claim on the lease will remain in the cue. If the first instance then goes away, due to a crash etc., its session will timeout. Once the session times out, the lease will be released and the next instance in the queue will become the leader. The call to autoRequeue() will make the client queue itself again if it loses the lease for some other reason, such as if it was still alive, but it a garbage collection cycle caused it to lose its session, and thereby its lease. I've set the lease to be quite low so that when we test out leader election, transitions will be quite quick. The optimum length for session timeout depends very much on the use case. The other parameters are the connection timeout, i.e. the amount of time it will spend trying to connect to a zookeeper server before giving up, and the retry policy. The retry policy specifies how the client should respond to transient errors, such as connection loss. Operations that fail with transient errors can be retried, and this argument specifies how often the retries should occur. - -Finally, you'll have noticed that Dice now extends LeaderSelectorListenerAdapter and implements Closeable. Closeable is there to close the resource we have initialized in the constructor, the curator client and the leaderSelector. LeaderSelectorListenerAdapter is a callback that the leaderSelector uses to notify the instance that it is now the leader. It is passed as the third argument to the LeaderSelector constructor. - -```java - @Override - public void takeLeadership(CuratorFramework client) - throws Exception { - synchronized (this) { - leader = true; - try { - while (true) { - this.wait(); - } - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - leader = false; - } - } - } -``` - -takeLeadership() is the callback called by LeaderSelector when the instance is leader. It should only return when the instance wants to give up leadership. In our case, we never do so we wait on the current object until we're interrupted. To signal to the rest of the program that we are leader we set a volatile boolean called leader to true. This is unset after we are interrupted. - -```java - void playDice() throws InterruptedException { - while (true) { - while (leader) { - Thread.sleep(1000); - System.out.println("Value = " + (r.nextInt(6) + 1) - + ", isLeader = " + leader); - } - } - } -``` - -Finally, we modify the `playDice` function to only generate random numbers when it is the leader. - -Run two instances of the program in two different terminals. You'll see that one becomes leader and prints numbers and the other just sits there. - -Now stop the leader using Control-Z. This will pause the process, but it won't kill it. You will be dropped back to the shell in that terminal. After a couple of seconds, the session timeout, you will see that the other instance has become the leader. Zookeeper will guarantee that only one instance is selected as leader at any time. - -Now go back to the shell that the original leader was on and wake up the process using fg. You'll see something like the following: - -```shell -... -... -Value = 4, isLeader = true -Value = 4, isLeader = true -^Z -[1]+ Stopped mvn exec:java -Dexec.mainClass=org.apache.bookkeeper.Dice -$ fg -mvn exec:java -Dexec.mainClass=org.apache.bookkeeper.Dice -Value = 3, isLeader = true -Value = 1, isLeader = false -``` diff --git a/site/docs/4.5.1/api/overview.md b/site/docs/4.5.1/api/overview.md deleted file mode 100644 index 3eb649273c1..00000000000 --- a/site/docs/4.5.1/api/overview.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: BookKeeper API ---- - -BookKeeper offers a few APIs that applications can use to interact with it: - -* The [ledger API](../ledger-api) is a lower-level API that enables you to interact with {% pop ledgers %} directly -* The [Ledger Advanced API)(../ledger-adv-api) is an advanced extension to [Ledger API](../ledger-api) to provide more flexibilities to applications. -* The [DistributedLog API](../distributedlog-api) is a higher-level API that provides convenient abstractions. - -## Trade-offs - -The `Ledger API` provides direct access to ledgers and thus enables you to use BookKeeper however you'd like. - -However, in most of use cases, if you want a `log stream`-like abstraction, it requires you to manage things like tracking list of ledgers, -managing rolling ledgers and data retention on your own. In such cases, you are recommended to use [DistributedLog API](../distributedlog-api), -with semantics resembling continous log streams from the standpoint of applications. diff --git a/site/docs/4.5.1/deployment/dcos.md b/site/docs/4.5.1/deployment/dcos.md deleted file mode 100644 index 3f77695b412..00000000000 --- a/site/docs/4.5.1/deployment/dcos.md +++ /dev/null @@ -1,142 +0,0 @@ ---- -title: Deploying BookKeeper on DC/OS -subtitle: Get up and running easily on an Apache Mesos cluster -logo: img/dcos-logo.png ---- - -[DC/OS](https://dcos.io/) (the DataCenter Operating System) is a distributed operating system used for deploying and managing applications and systems on [Apache Mesos](http://mesos.apache.org/). DC/OS is an open-source tool created and maintained by [Mesosphere](https://mesosphere.com/). - -BookKeeper is available as a [DC/OS package](http://universe.dcos.io/#/package/bookkeeper/version/latest) from the [Mesosphere DC/OS Universe](http://universe.dcos.io/#/packages). - -## Prerequisites - -In order to run BookKeeper on DC/OS, you will need: - -* DC/OS version [1.8](https://dcos.io/docs/1.8/) or higher -* A DC/OS cluster with at least three nodes -* The [DC/OS CLI tool](https://dcos.io/docs/1.8/usage/cli/install/) installed - -Each node in your DC/OS-managed Mesos cluster must have at least: - -* 1 CPU -* 1 GB of memory -* 10 GB of total persistent disk storage - -## Installing BookKeeper - -```shell -$ dcos package install bookkeeper --yes -``` - -This command will: - -* Install the `bookkeeper` subcommand for the `dcos` CLI tool -* Start a single {% pop bookie %} on the Mesos cluster with the [default configuration](../../reference/config) - -The bookie that is automatically started up uses the host mode of the network and by default exports the service at `agent_ip:3181`. - -> If you run `dcos package install bookkeeper` without setting the `--yes` flag, the install will run in interactive mode. For more information on the `package install` command, see the [DC/OS docs](https://docs.mesosphere.com/latest/cli/command-reference/dcos-package/dcos-package-install/). - -### Services - -To watch BookKeeper start up, click on the **Services** tab in the DC/OS [user interface](https://docs.mesosphere.com/latest/gui/) and you should see the `bookkeeper` package listed: - -![DC/OS services]({{ site.baseurl }}img/dcos/services.png) - -### Tasks - -To see which tasks have started, click on the `bookkeeper` service and you'll see an interface that looks like this; - -![DC/OS tasks]({{ site.baseurl }}img/dcos/tasks.png) - -## Scaling BookKeeper - -Once the first {% pop bookie %} has started up, you can click on the **Scale** tab to scale up your BookKeeper ensemble by adding more bookies (or scale down the ensemble by removing bookies). - -![DC/OS scale]({{ site.baseurl }}img/dcos/scale.png) - -## ZooKeeper Exhibitor - -ZooKeeper contains the information for all bookies in the ensemble. When deployed on DC/OS, BookKeeper uses a ZooKeeper instance provided by DC/OS. You can access a visual UI for ZooKeeper using [Exhibitor](https://github.com/soabase/exhibitor/wiki), which is available at [http://master.dcos/exhibitor](http://master.dcos/exhibitor). - -![ZooKeeper Exhibitor]({{ site.baseurl }}img/dcos/exhibitor.png) - -You should see a listing of IP/host information for all bookies under the `messaging/bookkeeper/ledgers/available` node. - -## Client connections - -To connect to bookies running on DC/OS using clients running within your Mesos cluster, you need to specify the ZooKeeper connection string for DC/OS's ZooKeeper cluster: - -``` -master.mesos:2181 -``` - -This is the *only* ZooKeeper host/port you need to include in your connection string. Here's an example using the [Java client](../../api/ledger-api#the-java-ledger-api-client): - -```java -BookKeeper bkClient = new BookKeeper("master.mesos:2181"); -``` - -If you're connecting using a client running outside your Mesos cluster, you need to supply the public-facing connection string for your DC/OS ZooKeeper cluster. - -## Configuring BookKeeper - -By default, the `bookkeeper` package will start up a BookKeeper ensemble consisting of one {% pop bookie %} with one CPU, 1 GB of memory, and a 70 MB persistent volume. - -You can supply a non-default configuration when installing the package using a JSON file. Here's an example command: - -```shell -$ dcos package install bookkeeper \ - --options=/path/to/config.json -``` - -You can then fetch the current configuration for BookKeeper at any time using the `package describe` command: - -```shell -$ dcos package describe bookkeeper \ - --config -``` - -### Available parameters - -> Not all [configurable parameters](../../reference/config) for BookKeeper are available for BookKeeper on DC/OS. Only the parameters show in the table below are available. - -Param | Type | Description | Default -:-----|:-----|:------------|:------- -`name` | String | The name of the DC/OS service. | `bookkeeper` -`cpus` | Integer | The number of CPU shares to allocate to each {% pop bookie %}. The minimum is 1. | `1` | -`instances` | Integer | The number of {% pop bookies %} top run. The minimum is 1. | `1` -`mem` | Number | The memory, in MB, to allocate to each BookKeeper task | `1024.0` (1 GB) -`volume_size` | Number | The persistent volume size, in MB | `70` -`zk_client` | String | The connection string for the ZooKeeper client instance | `master.mesos:2181` -`service_port` | Integer | The BookKeeper export service port, using `PORT0` in Marathon | `3181` - -### Example JSON configuration - -Here's an example JSON configuration object for BookKeeper on DC/OS: - -```json -{ - "instances": 5, - "cpus": 3, - "mem": 2048.0, - "volume_size": 250 -} -``` - -If that configuration were stored in a file called `bk-config.json`, you could apply that configuration upon installating the BookKeeper package using this command: - -```shell -$ dcos package install bookkeeper \ - --options=./bk-config.json -``` - -## Uninstalling BookKeeper - -You can shut down and uninstall the `bookkeeper` from DC/OS at any time using the `package uninstall` command: - -```shell -$ dcos package uninstall bookkeeper -Uninstalled package [bookkeeper] version [4.5.1] -Thank you for using bookkeeper. -``` diff --git a/site/docs/4.5.1/deployment/kubernetes.md b/site/docs/4.5.1/deployment/kubernetes.md deleted file mode 100644 index f65172112a7..00000000000 --- a/site/docs/4.5.1/deployment/kubernetes.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: Deploying BookKeeper on Kubernetes -logo: img/kubernetes-logo.png ---- diff --git a/site/docs/4.5.1/deployment/manual.md b/site/docs/4.5.1/deployment/manual.md deleted file mode 100644 index daafd5556f5..00000000000 --- a/site/docs/4.5.1/deployment/manual.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -title: Manual deployment ---- - -The easiest way to deploy BookKeeper is using schedulers like [DC/OS](../dcos), but you can also deploy BookKeeper clusters manually. A BookKeeper cluster consists of two main components: - -* A [ZooKeeper](#zookeeper-setup) cluster that is used for configuration- and coordination-related tasks -* An [ensemble](#starting-up-bookies) of {% pop bookies %} - -## ZooKeeper setup - -We won't provide a full guide to setting up a ZooKeeper cluster here. We recommend that you consult [this guide](https://zookeeper.apache.org/doc/current/zookeeperAdmin.html) in the official ZooKeeper documentation. - -## Starting up bookies - -Once your ZooKeeper cluster is up and running, you can start up as many {% pop bookies %} as you'd like to form a cluster. Before starting up each bookie, you need to modify the bookie's configuration to make sure that it points to the right ZooKeeper cluster. - -On each bookie host, you need to [download](../../getting-started/installation#download) the BookKeeper package as a tarball. Once you've done that, you need to configure the bookie by setting values in the `bookkeeper-server/conf/bk_server.conf` config file. The one parameter that you will absolutely need to change is the [`zkServers`](../../config#zkServers) parameter, which you will need to set to the ZooKeeper connection string for your ZooKeeper cluster. Here's an example: - -```properties -zkServers=100.0.0.1:2181,100.0.0.2:2181,100.0.0.3:2181 -``` - -> A full listing of configurable parameters available in `bookkeeper-server/conf/bk_server.conf` can be found in the [Configuration](../../reference/config) reference manual. - -Once the bookie's configuration is set, you can start it up using the [`bookie`](../../reference/cli#bookkeeper-bookie) command of the [`bookkeeper`](../../reference/cli#bookkeeper) CLI tool: - -```shell -$ bookkeeper-server/bin/bookkeeper bookie -``` - -> You can also build BookKeeper [by cloning it from source](../../getting-started/installation#clone) or [using Maven](../../getting-started/installation#build-using-maven). - -### System requirements - -{% include system-requirements.md %} - -## Cluster metadata setup - -Once you've started up a cluster of bookies, you need to set up cluster metadata for the cluster by running the following command from any bookie in the cluster: - -```shell -$ bookkeeper-server/bin/bookkeeper shell metaformat -``` - -You can run in the formatting - -> The `metaformat` command performs all the necessary ZooKeeper cluster metadata tasks and thus only needs to be run *once* and from *any* bookie in the BookKeeper cluster. - -Once cluster metadata formatting has been completed, your BookKeeper cluster is ready to go! - - diff --git a/site/docs/4.5.1/development/codebase.md b/site/docs/4.5.1/development/codebase.md deleted file mode 100644 index 9a83073ea4c..00000000000 --- a/site/docs/4.5.1/development/codebase.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: The BookKeeper codebase ---- diff --git a/site/docs/4.5.1/development/protocol.md b/site/docs/4.5.1/development/protocol.md deleted file mode 100644 index 6d17aa0ed45..00000000000 --- a/site/docs/4.5.1/development/protocol.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: The BookKeeper protocol ---- - -BookKeeper uses a special replication protocol for guaranteeing persistent storage of entries in an ensemble of bookies. - -> This document assumes that you have some knowledge of leader election and log replication and how these can be used in a distributed system. If not, we recommend reading the [example application](../../api/ledger-api#example-application) documentation first. - -## Ledgers - -{% pop Ledgers %} are the basic building block of BookKeeper and the level at which BookKeeper makes its persistent storage guarantees. A replicated log consists of an ordered list of ledgers. See [Ledgers to logs](#ledgers-to-logs) for info on building a replicated log from ledgers. - -Ledgers are composed of metadata and {% pop entries %}. The metadata is stored in ZooKeeper, which provides a *compare-and-swap* (CAS) operation. Entries are stored on storage nodes known as {% pop bookies %}. - -A ledger has a single writer and multiple readers (SWMR). - -### Ledger metadata - -A ledger's metadata contains the following: - -Parameter | Name | Meaning -:---------|:-----|:------- -Identifer | | A 64-bit integer, unique within the system -Ensemble size | **E** | The number of nodes the ledger is stored on -Write quorum size | **Qw** | The number of nodes each entry is written to. In effect, the max replication for the entry. -Ack quorum size | **Qa** | The number of nodes an entry must be acknowledged on. In effect, the minimum replication for the entry. -Current state | | The current status of the ledger. One of `OPEN`, `CLOSED`, or `IN_RECOVERY`. -Last entry | | The last entry in the ledger or `NULL` is the current state is not `CLOSED`. - -In addition, each ledger's metadata consists of one or more *fragments*. Each fragment is either - -* the first entry of a fragment or -* a list of bookies for the fragment. - -When creating a ledger, the following invariant must hold: - -**E >= Qw >= Qa** - -Thus, the ensemble size (**E**) must be larger than the write quorum size (**Qw**), which must in turn be larger than the ack quorum size (**Qa**). If that condition does not hold, then the ledger creation operation will fail. - -### Ensembles - -When a ledger is created, **E** bookies are chosen for the entries of that ledger. The bookies are the initial ensemble of the ledger. A ledger can have multiple ensembles, but an entry has only one ensemble. Changes in the ensemble involve a new fragment being added to the ledger. - -Take the following example. In this ledger, with ensemble size of 3, there are two fragments and thus two ensembles, one starting at entry 0, the second at entry 12. The second ensemble differs from the first only by its first element. This could be because bookie1 has failed and therefore had to be replaced. - -First entry | Bookies -:-----------|:------- -0 | B1, B2, B3 -12 | B4, B2, B3 - -### Write quorums - -Each entry in the log is written to **Qw** nodes. This is considered the write quorum for that entry. The write quorum is the subsequence of the ensemble, **Qw** in length, and starting at the bookie at index (entryid % **E**). - -For example, in a ledger of **E** = 4, **Qw**, and **Qa** = 2, with an ensemble consisting of B1, B2, B3, and B4, the write quorums for the first 6 entries will be: - -Entry | Write quorum -:-----|:------------ -0 | B1, B2, B3 -1 | B2, B3, B4 -2 | B3, B4, B1 -3 | B4, B1, B2 -4 | B1, B2, B3 -5 | B2, B3, B4 - -There are only **E** distinct write quorums in any ensemble. If **Qw** = **Qa**, then there is only one, as no striping occurs. - -### Ack quorums - -The ack quorum for an entry is any subset of the write quorum of size **Qa**. If **Qa** bookies acknowledge an entry, it means it has been fully replicated. - -### Guarantees - -The system can tolerate **Qa** – 1 failures without data loss. - -Bookkeeper guarantees that: - -1. All updates to a ledger will be read in the same order as they were written. -2. All clients will read the same sequence of updates from the ledger. - -## Writing to ledgers - -writer, ensuring that entry ids are sequential is trivial. A bookie acknowledges a write once it has been persisted to disk and is therefore durable. Once **Qa** bookies from the write quorum acknowledge the write, the write is acknowledged to the client, but only if all entries with lower entry ids in the ledger have already been acknowledged to the client. - -The entry written contains the ledger id, the entry id, the last add confirmed and the payload. The last add confirmed is the last entry which had been acknowledged to the client when this entry was written. Sending this with the entry speeds up recovery of the ledger in the case that the writer crashes. - -Another client can also read entries in the ledger up as far as the last add confirmed, as we guarantee that all entries thus far have been replicated on Qa nodes, and therefore all future readers will be able to also read it. However, to read like this, the ledger should be opened with a non-fencing open. Otherwise, it would kill the writer. - -If a node fails to acknowledge a write, the writer will create a new ensemble by replacing the failed node in the current ensemble. It creates a new fragment with this ensemble, starting from the first message that has not been acknowledged to the client. Creating the new fragment involves making a CAS write to the metadata. If the CAS write fails, someone else has modified something in the ledger metadata. This concurrent modification could have been caused by recovery or {% pop rereplication %}. We reread the metadata. If the state of the ledger is no longer `OPEN`, we send an error to the client for any outstanding writes. Otherwise, we try to replace the failed node again. - -### Closing a ledger as a writer - -Closing a ledger is straightforward for a writer. The writer makes a CAS write to the metadata, changing the state to `CLOSED` and setting the last entry of the ledger to the last entry which we have acknowledged to the client. - -If the CAS write fails, it means someone else has modified the metadata. We reread the metadata, and retry closing as long as the state of the ledger is still `OPEN`. If the state is `IN_RECOVERY` we send an error to the client. If the state is `CLOSED` and the last entry is the same as the last entry we have acknowledged to the client, we complete the close operation successfully. If the last entry is different from what we have acknowledged to the client, we send an error to the client. - -### Closing a ledger as a reader - -A reader can also force a ledger to close. Forcing the ledger to close will prevent any writer from adding new entries to the ledger. This is called {% pop fencing %}. This can occur when a writer has crashed or become unavailable, and a new writer wants to take over writing to the log. The new writer must ensure that it has seen all updates from the previous writer, and prevent the previous writer from making any new updates before making any updates of its own. - -To recover a ledger, we first update the state in the metadata to IN_RECOVERY. We then send a fence message to all the bookies in the last fragment of the ledger. When a bookie receives a fence message for a ledger, the fenced state of the ledger is persisted to disk. Once we receive a response from at least (**Qw** - **Qa**)+1 bookies from each write quorum in the ensemble, the ledger is fenced. - -By ensuring we have received a response from at last (**Qw** - **Qa**) + 1 bookies in each write quorum, we ensure that, if the old writer is alive and tries to add a new entry there will be no write quorum in which Qa bookies will accept the write. If the old writer tries to update the ensemble, it will fail on the CAS metadata write, and then see that the ledger is in IN_RECOVERY state, and that it therefore shouldn’t try to write to it. - -The old writer will be able to write entries to individual bookies (we can’t guarantee that the fence message reaches all bookies), but as it will not be able reach ack quorum, it will not be able to send a success response to its client. The client will get a LedgerFenced error instead. - -It is important to note that when you get a ledger fenced message for an entry, it doesn’t mean that the entry has not been written. It means that the entry may or may not have been written, and this can only be determined after the ledger is recovered. In effect, LedgerFenced should be treated like a timeout. - -Once the ledger is fenced, recovery can begin. Recovery means finding the last entry of the ledger and closing the ledger. To find the last entry of the ledger, the client asks all bookies for the highest last add confirmed value they have seen. It waits until it has received a response at least (**Qw** - **Qa**) + 1 bookies from each write quorum, and takes the highest response as the entry id to start reading forward from. It then starts reading forward in the ledger, one entry at a time, replicating all entries it sees to the entire write quorum for that entry. Once it can no longer read any more entries, it updates the state in the metadata to `CLOSED`, and sets the last entry of the ledger to the last entry it wrote. Multiple readers can try to recovery a ledger at the same time, but as the metadata write is CAS they will all converge on the same last entry of the ledger. - -## Ledgers to logs - -In BookKeeper, {% pop ledgers %} can be used to build a replicated log for your system. All guarantees provided by BookKeeper are at the ledger level. Guarantees on the whole log can be built using the ledger guarantees and any consistent datastore with a compare-and-swap (CAS) primitive. BookKeeper uses ZooKeeper as the datastore but others could theoretically be used. - -A log in BookKeeper is built from some number of ledgers, with a fixed order. A ledger represents a single segment of the log. A ledger could be the whole period that one node was the leader, or there could be multiple ledgers for a single period of leadership. However, there can only ever be one leader that adds entries to a single ledger. Ledgers cannot be reopened for writing once they have been closed/recovered. - -> BookKeeper does *not* provide leader election. You must use a system like ZooKeeper for this. - -In many cases, leader election is really leader suggestion. Multiple nodes could think that they are leader at any one time. It is the job of the log to guarantee that only one can write changes to the system. - -### Opening a log - -Once a node thinks it is leader for a particular log, it must take the following steps: - -1. Read the list of ledgers for the log -1. {% pop Fence %} the last two ledgers in the list. Two ledgers are fenced because because the writer may be writing to the second-to-last ledger while adding the last ledger to the list. -1. Create a new ledger -1. Add the new ledger to the ledger list -1. Write the new ledger back to the datastore using a CAS operation - -The fencing in step 2 and the CAS operation in step 5 prevent two nodes from thinking that they have leadership at any one time. - -The CAS operation will fail if the list of ledgers has changed between reading it and writing back the new list. When the CAS operation fails, the leader must start at step 1 again. Even better, they should check that they are in fact still the leader with the system that is providing leader election. The protocol will work correctly without this step, though it will be able to make very little progress if two nodes think they are leader and are duelling for the log. - -The node must not serve any writes until step 5 completes successfully. - -### Rolling ledgers - -The leader may wish to close the current ledger and open a new one every so often. Ledgers can only be deleted as a whole. If you don't roll the log, you won't be able to clean up old entries in the log without a leader change. By closing the current ledger and adding a new one, the leader allows the log to be truncated whenever that data is no longer needed. The steps for rolling the log is similar to those for creating a new ledger. - -1. Create a new ledger -1. Add the new ledger to the ledger list -1. Write the new ledger list to the datastore using CAS -1. Close the previous ledger - -By deferring the closing of the previous ledger until step 4, we can continue writing to the log while we perform metadata update operations to add the new ledger. This is safe as long as you fence the last 2 ledgers when acquiring leadership. - diff --git a/site/docs/4.5.1/getting-started/concepts.md b/site/docs/4.5.1/getting-started/concepts.md deleted file mode 100644 index 7a3c92847b2..00000000000 --- a/site/docs/4.5.1/getting-started/concepts.md +++ /dev/null @@ -1,202 +0,0 @@ ---- -title: BookKeeper concepts and architecture -subtitle: The core components and how they work -prev: ../run-locally ---- - -BookKeeper is a service that provides persistent storage of streams of log [entries](#entries)---aka *records*---in sequences called [ledgers](#ledgers). BookKeeper replicates stored entries across multiple servers. - -## Basic terms - -In BookKeeper: - -* each unit of a log is an [*entry*](#entries) (aka record) -* streams of log entries are called [*ledgers*](#ledgers) -* individual servers storing ledgers of entries are called [*bookies*](#bookies) - -BookKeeper is designed to be reliable and resilient to a wide variety of failures. Bookies can crash, corrupt data, or discard data, but as long as there are enough bookies behaving correctly in the ensemble the service as a whole will behave correctly. - -## Entries - -> **Entries** contain the actual data written to ledgers, along with some important metadata. - -BookKeeper entries are sequences of bytes that are written to [ledgers](#ledgers). Each entry has the following fields: - -Field | Java type | Description -:-----|:----------|:----------- -Ledger number | `long` | The ID of the ledger to which the entry has been written -Entry number | `long` | The unique ID of the entry -Last confirmed (LC) | `long` | The ID of the last recorded entry -Data | `byte[]` | The entry's data (written by the client application) -Authentication code | `byte[]` | The message auth code, which includes *all* other fields in the entry - -## Ledgers - -> **Ledgers** are the basic unit of storage in BookKeeper. - -Ledgers are sequences of entries, while each entry is a sequence of bytes. Entries are written to a ledger: - -* sequentially, and -* at most once. - -This means that ledgers have *append-only* semantics. Entries cannot be modified once they've been written to a ledger. Determining the proper write order is the responsbility of [client applications](#clients). - -## Clients and APIs - -> BookKeeper clients have two main roles: they create and delete ledgers, and they read entries from and write entries to ledgers. -> -> BookKeeper provides both a lower-level and a higher-level API for ledger interaction. - -There are currently two APIs that can be used for interacting with BookKeeper: - -* The [ledger API](../../api/ledger-api) is a lower-level API that enables you to interact with {% pop ledgers %} directly. -* The [DistributedLog API](../../api/distributedlog-api) is a higher-level API that enables you to use BookKeeper without directly interacting with ledgers. - -In general, you should choose the API based on how much granular control you need over ledger semantics. The two APIs can also both be used within a single application. - -## Bookies - -> **Bookies** are individual BookKeeper servers that handle ledgers (more specifically, fragments of ledgers). Bookies function as part of an ensemble. - -A bookie is an individual BookKeeper storage server. Individual bookies store fragments of ledgers, not entire ledgers (for the sake of performance). For any given ledger **L**, an *ensemble* is the group of bookies storing the entries in **L**. - -Whenever entries are written to a ledger, those entries are {% pop striped %} across the ensemble (written to a sub-group of bookies rather than to all bookies). - -### Motivation - -> BookKeeper was initially inspired by the NameNode server in HDFS but its uses now extend far beyond this. - -The initial motivation for BookKeeper comes from the [Hadoop](http://hadoop.apache.org/) ecosystem. In the [Hadoop Distributed File System](https://wiki.apache.org/hadoop/HDFS) (HDFS), a special node called the [NameNode](https://wiki.apache.org/hadoop/NameNode) logs all operations in a reliable fashion, which ensures that recovery is possible in case of crashes. - -The NameNode, however, served only as initial inspiration for BookKeeper. The applications for BookKeeper extend far beyond this and include essentially any application that requires an append-based storage system. BookKeeper provides a number of advantages for such applications: - -* Highly efficient writes -* High fault tolerance via replication of messages within ensembles of bookies -* High throughput for write operations via {% pop striping %} (across as many bookies as you wish) - -## Metadata storage - -BookKeeper requires a metadata storage service to store information related to [ledgers](#ledgers) and available bookies. BookKeeper currently uses [ZooKeeper](https://zookeeper.apache.org) for this and other tasks. - -## Data management in bookies - -Bookies manage data in a [log-structured](https://en.wikipedia.org/wiki/Log-structured_file_system) way, which is implemented using three types of files: - -* [journals](#journals) -* [entry logs](#entry-logs) -* [index files](#index-files) - -### Journals - -A journal file contains BookKeeper transaction logs. Before any update to a ledger takes place, the bookie ensures that a transaction describing the update is written to non-volatile storage. A new journal file is created once the bookie starts or the older journal file reaches the journal file size threshold. - -### Entry logs - -An entry log file manages the written entries received from BookKeeper clients. Entries from different ledgers are aggregated and written sequentially, while their offsets are kept as pointers in a [ledger cache](#ledger-cache) for fast lookup. - -A new entry log file is created once the bookie starts or the older entry log file reaches the entry log size threshold. Old entry log files are removed by the Garbage Collector Thread once they are not associated with any active ledger. - -### Index files - -An index file is created for each ledger, which comprises a header and several fixed-length index pages that record the offsets of data stored in entry log files. - -Since updating index files would introduce random disk I/O index files are updated lazily by a sync thread running in the background. This ensures speedy performance for updates. Before index pages are persisted to disk, they are gathered in a ledger cache for lookup. - -### Ledger cache - -Ledger indexes pages are cached in a memory pool, which allows for more efficient management of disk head scheduling. - -### Adding entries - -When a client instructs a {% pop bookie %} to write an entry to a ledger, the entry will go through the following steps to be persisted on disk: - -1. The entry is appended to an [entry log](#entry-logs) -1. The index of the entry is updated in the [ledger cache](#ledger-cache) -1. A transaction corresponding to this entry update is appended to the [journal](#journals) -1. A response is sent to the BookKeeper client - -> For performance reasons, the entry log buffers entries in memory and commits them in batches, while the ledger cache holds index pages in memory and flushes them lazily. This process is described in more detail in the [Data flush](#data-flush) section below. - -### Data flush - -Ledger index pages are flushed to index files in the following two cases: - -* The ledger cache memory limit is reached. There is no more space available to hold newer index pages. Dirty index pages will be evicted from the ledger cache and persisted to index files. -* A background thread synchronous thread is responsible for flushing index pages from the ledger cache to index files periodically. - -Besides flushing index pages, the sync thread is responsible for rolling journal files in case that journal files use too much disk space. The data flush flow in the sync thread is as follows: - -* A `LastLogMark` is recorded in memory. The `LastLogMark` indicates that those entries before it have been persisted (to both index and entry log files) and contains two parts: - 1. A `txnLogId` (the file ID of a journal) - 1. A `txnLogPos` (offset in a journal) -* Dirty index pages are flushed from the ledger cache to the index file, and entry log files are flushed to ensure that all buffered entries in entry log files are persisted to disk. - - Ideally, a bookie only needs to flush index pages and entry log files that contain entries before `LastLogMark`. There is, however, no such information in the ledger and entry log mapping to journal files. Consequently, the thread flushes the ledger cache and entry log entirely here, and may flush entries after the `LastLogMark`. Flushing more is not a problem, though, just redundant. -* The `LastLogMark` is persisted to disk, which means that entries added before `LastLogMark` whose entry data and index page were also persisted to disk. It is now time to safely remove journal files created earlier than `txnLogId`. - -If the bookie has crashed before persisting `LastLogMark` to disk, it still has journal files containing entries for which index pages may not have been persisted. Consequently, when this bookie restarts, it inspects journal files to restore those entries and data isn't lost. - -Using the above data flush mechanism, it is safe for the sync thread to skip data flushing when the bookie shuts down. However, in the entry logger it uses a buffered channel to write entries in batches and there might be data buffered in the buffered channel upon a shut down. The bookie needs to ensure that the entry log flushes its buffered data during shutdown. Otherwise, entry log files become corrupted with partial entries. - -### Data compaction - -On bookies, entries of different ledgers are interleaved in entry log files. A bookie runs a garbage collector thread to delete un-associated entry log files to reclaim disk space. If a given entry log file contains entries from a ledger that has not been deleted, then the entry log file would never be removed and the occupied disk space never reclaimed. In order to avoid such a case, a bookie server compacts entry log files in a garbage collector thread to reclaim disk space. - -There are two kinds of compaction running with different frequency: minor compaction and major compaction. The differences between minor compaction and major compaction lies in their threshold value and compaction interval. - -* The garbage collection threshold is the size percentage of an entry log file occupied by those undeleted ledgers. The default minor compaction threshold is 0.2, while the major compaction threshold is 0.8. -* The garbage collection interval is how frequently to run the compaction. The default minor compaction interval is 1 hour, while the major compaction threshold is 1 day. - -> If either the threshold or interval is set to less than or equal to zero, compaction is disabled. - -The data compaction flow in the garbage collector thread is as follows: - -* The thread scans entry log files to get their entry log metadata, which records a list of ledgers comprising an entry log and their corresponding percentages. -* With the normal garbage collection flow, once the bookie determines that a ledger has been deleted, the ledger will be removed from the entry log metadata and the size of the entry log reduced. -* If the remaining size of an entry log file reaches a specified threshold, the entries of active ledgers in the entry log will be copied to a new entry log file. -* Once all valid entries have been copied, the old entry log file is deleted. - -## ZooKeeper metadata - -BookKeeper requires a ZooKeeper installation for storing [ledger](#ledger) metadata. Whenever you construct a [`BookKeeper`](../../api/javadoc/org/apache/bookkeeper/client/BookKeeper) client object, you need to pass a list of ZooKeeper servers as a parameter to the constructor, like this: - -```java -String zkConnectionString = "127.0.0.1:2181"; -BookKeeper bkClient = new BookKeeper(zkConnectionString); -``` - -> For more info on using the BookKeeper Java client, see [this guide](../../api/ledger-api#the-java-ledger-api-client). - -## Ledger manager - -A *ledger manager* handles ledgers' metadata (which is stored in ZooKeeper). BookKeeper offers two types of ledger managers: the [flat ledger manager](#flat-ledger-manager) and the [hierarchical ledger manager](#hierarchical-ledger-manager). Both ledger managers extend the [`AbstractZkLedgerManager`](../../api/javadoc/org/apache/bookkeeper/meta/AbstractZkLedgerManager) abstract class. - -> #### Use the flat ledger manager in most cases -> The flat ledger manager is the default and is recommended for nearly all use cases. The hierarchical ledger manager is better suited only for managing very large numbers of BookKeeper ledgers (> 50,000). - -### Flat ledger manager - -The *flat ledger manager*, implemented in the [`FlatLedgerManager`](../../api/javadoc/org/apache/bookkeeper/meta/FlatLedgerManager.html) class, stores all ledgers' metadata in child nodes of a single ZooKeeper path. The flat ledger manager creates [sequential nodes](https://zookeeper.apache.org/doc/trunk/zookeeperProgrammers.html#Sequence+Nodes+--+Unique+Naming) to ensure the uniqueness of the ledger ID and prefixes all nodes with `L`. Bookie servers manage their own active ledgers in a hash map so that it's easy to find which ledgers have been deleted from ZooKeeper and then garbage collect them. - -The flat ledger manager's garbage collection follow proceeds as follows: - -* All existing ledgers are fetched from ZooKeeper (`zkActiveLedgers`) -* All ledgers currently active within the bookie are fetched (`bkActiveLedgers`) -* The currently actively ledgers are looped through to determine which ledgers don't currently exist in ZooKeeper. Those are then garbage collected. -* The *hierarchical ledger manager* stores ledgers' metadata in two-level [znodes](https://zookeeper.apache.org/doc/current/zookeeperOver.html#Nodes+and+ephemeral+nodes). - -### Hierarchical ledger manager - -The *hierarchical ledger manager*, implemented in the [`HierarchicalLedgerManager`](../../api/javadoc/org/apache/bookkeeper/meta/HierarchicalLedgerManager) class, first obtains a global unique ID from ZooKeeper using an [`EPHEMERAL_SEQUENTIAL`](https://zookeeper.apache.org/doc/current/api/org/apache/zookeeper/CreateMode.html#EPHEMERAL_SEQUENTIAL) znode. Since ZooKeeper's sequence counter has a format of `%10d` (10 digits with 0 padding, for example `0000000001`), the hierarchical ledger manager splits the generated ID into 3 parts: - -```shell -{level1 (2 digits)}{level2 (4 digits)}{level3 (4 digits)} -``` - -These three parts are used to form the actual ledger node path to store ledger metadata: - -```shell -{ledgers_root_path}/{level1}/{level2}/L{level3} -``` - -For example, ledger 0000000001 is split into three parts, 00, 0000, and 00001, and stored in znode `/{ledgers_root_path}/00/0000/L0001`. Each znode could have as many 10,000 ledgers, which avoids the problem of the child list being larger than the maximum ZooKeeper packet size (which is the [limitation](https://issues.apache.org/jira/browse/BOOKKEEPER-39) that initially prompted the creation of the hierarchical ledger manager). diff --git a/site/docs/4.5.1/getting-started/installation.md b/site/docs/4.5.1/getting-started/installation.md deleted file mode 100644 index fac16ddd390..00000000000 --- a/site/docs/4.5.1/getting-started/installation.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -title: BookKeeper installation -subtitle: Download or clone BookKeeper and build it locally -next: ../run-locally ---- - -{% capture download_url %}http://apache.claz.org/bookkeeper/bookkeeper-{{ site.latest_release }}/bookkeeper-{{ site.latest_release }}-src.tar.gz{% endcapture %} - -You can install BookKeeper either by [downloading](#download) a [GZipped](http://www.gzip.org/) tarball package or [cloning](#clone) the BookKeeper repository. - -## Requirements - -* [Unix environment](http://www.opengroup.org/unix) -* [Java Development Kit 1.6](http://www.oracle.com/technetwork/java/javase/downloads/index.html) or later -* [Maven 3.0](https://maven.apache.org/install.html) or later - -## Download - -You can download Apache BookKeeper releases from one of many [Apache mirrors](http://www.apache.org/dyn/closer.cgi/bookkeeper). Here's an example for the [apache.claz.org](http://apache.claz.org/bookkeeper) mirror: - -```shell -$ curl -O {{ download_url }} -$ tar xvf bookkeeper-{{ site.latest_release }}-src.tar.gz -$ cd bookkeeper-{{ site.latest_release }} -``` - -## Clone - -To build BookKeeper from source, clone the repository, either from the [GitHub mirror]({{ site.github_repo }}) or from the [Apache repository](http://git.apache.org/bookkeeper.git/): - -```shell -# From the GitHub mirror -$ git clone {{ site.github_repo}} - -# From Apache directly -$ git clone git://git.apache.org/bookkeeper.git/ -``` - -## Build using Maven - -Once you have the BookKeeper on your local machine, either by [downloading](#download) or [cloning](#clone) it, you can then build BookKeeper from source using Maven: - -```shell -$ mvn package -``` - -> You can skip tests by adding the `-DskipTests` flag when running `mvn package`. - -### Useful Maven commands - -Some other useful Maven commands beyond `mvn package`: - -Command | Action -:-------|:------ -`mvn clean` | Removes build artifacts -`mvn compile` | Compiles JAR files from Java sources -`mvn compile findbugs:findbugs` | Compile using the Maven [FindBugs](http://gleclaire.github.io/findbugs-maven-plugin) plugin -`mvn install` | Install the BookKeeper JAR locally in your local Maven cache (usually in the `~/.m2` directory) -`mvn deploy` | Deploy the BookKeeper JAR to the Maven repo (if you have the proper credentials) -`mvn verify` | Performs a wide variety of verification and validation tasks -`mvn apache-rat:check` | Run Maven using the [Apache Rat](http://creadur.apache.org/rat/apache-rat-plugin/) plugin -`mvn compile javadoc:aggregate` | Build Javadocs locally -`mvn package assembly:single` | Build a complete distribution using the Maven [Assembly](http://maven.apache.org/plugins/maven-assembly-plugin/) plugin - -## Package directory - -The BookKeeper project contains several subfolders that you should be aware of: - -Subfolder | Contains -:---------|:-------- -[`bookkeeper-server`]({{ site.github_repo }}/tree/master/bookkeeper-server) | The BookKeeper server and client -[`bookkeeper-benchmark`]({{ site.github_repo }}/tree/master/bookkeeper-benchmark) | A benchmarking suite for measuring BookKeeper performance -[`bookkeeper-stats`]({{ site.github_repo }}/tree/master/bookkeeper-stats) | A BookKeeper stats library -[`bookkeeper-stats-providers`]({{ site.github_repo }}/tree/master/bookkeeper-stats-providers) | BookKeeper stats providers diff --git a/site/docs/4.5.1/getting-started/run-locally.md b/site/docs/4.5.1/getting-started/run-locally.md deleted file mode 100644 index ab33642c852..00000000000 --- a/site/docs/4.5.1/getting-started/run-locally.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Run bookies locally -prev: ../installation -next: ../concepts -toc_disable: true ---- - -{% pop Bookies %} are individual BookKeeper servers. You can run an ensemble of bookies locally on a single machine using the [`localbookie`](../../reference/cli#bookkeeper-localbookie) command of the `bookkeeper` CLI tool and specifying the number of bookies you'd like to include in the ensemble. - -This would start up an ensemble with 10 bookies: - -```shell -$ bookeeper-server/bin/bookeeper localbookie 10 -``` - -> When you start up an ensemble using `localbookie`, all bookies run in a single JVM process. diff --git a/site/docs/4.5.1/overview/overview.md b/site/docs/4.5.1/overview/overview.md deleted file mode 100644 index 9bef23255bd..00000000000 --- a/site/docs/4.5.1/overview/overview.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: Apache BookKeeper™ 4.5.1 ---- - - -This documentation is for Apache BookKeeper™ version `4.5.1`. - -Apache BookKeeper™ is a scalable, fault-tolerant, and low latency storage service optimized for realtime workloads. -It offers durability, replication, and strong consistency as essentials for building reliable real-time applications. - -BookKeeper is well suited for scenarios like this: - -Scenario | Example -:--------|:------- -[WAL](https://en.wikipedia.org/wiki/Write-ahead_logging) (Write-Ahead-Logging) | The HDFS [namenode](https://hadoop.apache.org/docs/r2.5.2/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithNFS.html#BookKeeper_as_a_Shared_storage_EXPERIMENTAL) -Message storage | [Apache Pulsar](https://pulsar.incubator.apache.org/) -Offset/cursor storage | Apache Pulsar -Object/BLOG storage | Storing snapshots to replicated state machines. - -Learn more about Apache BookKeeper and what it can do for your organization: - -- [Apache BookKeeper 4.5.1 Release Notes](../releaseNotes) -- [Java API docs](../../api/javadoc) - -Or start using Apache BookKeeper today. - -### Users - -- **Concepts**: Start with [concepts](../../getting-started/concepts). This will help you to fully understand - the other parts of the documentation, including the setup, integration and operation guides. -- **Getting Started**: Install [Apache BookKeeper](../../getting-started/installation) and run bookies [locally](../../getting-started/run-locally) -- **API**: Read the [API](../../api/overview) documentation to learn how to use Apache BookKeeper to build your applications. -- **Deployment**: The [Deployment Guide](../../deployment/manual) shows how to deploy Apache BookKeeper to production clusters. - -### Administrators - -- **Operations**: The [Admin Guide](../../admin/bookies) shows how to run Apache BookKeeper on production, what are the production - considerations and best practices. - -### Contributors - -- **Details**: Learn [design details](../../development/protocol) to know more internals. diff --git a/site/docs/4.5.1/overview/releaseNotes.md b/site/docs/4.5.1/overview/releaseNotes.md deleted file mode 100644 index 3b55869b3c0..00000000000 --- a/site/docs/4.5.1/overview/releaseNotes.md +++ /dev/null @@ -1,28 +0,0 @@ ---- -title: Apache BookKeeper 4.5.1 Release Notes ---- - -This is the sixth release of BookKeeper as an Apache Top Level Project! - -The 4.5.1 release is a bugfix release which fixes a bunch of issues reported from users of 4.5.0. - -Apache BookKeeper users are encouraged to upgrade to 4.5.1. The technical details of this release are summarized -below. - -## Highlights - -- Fix critical bug on Parallel Recovery, see [https://github.com/apache/bookkeeper/issues/343](https://github.com/apache/bookkeeper/issues/343) - -- Fix critical bug on Prometheus stats provider, see [https://github.com/apache/bookkeeper/pull/535](https://github.com/apache/bookkeeper/pull/535) - -- Fix critical bug ledger length for LedgerHandleAdv, see [https://github.com/apache/bookkeeper/issues/683](https://github.com/apache/bookkeeper/issues/683) - -- Fix critical bug on RackAwarePolicy, see [https://github.com/apache/bookkeeper/issues/551](https://github.com/apache/bookkeeper/issues/551) - -### Dependencies Upgrade - -There is no dependency upgrade since 4.5.0. - -## Full list of changes - -- [https://github.com/apache/bookkeeper/issues?utf8=%E2%9C%93&q=label%3Arelease%2F4.5.1%20](https://github.com/apache/bookkeeper/issues?utf8=%E2%9C%93&q=label%3Arelease%2F4.5.1%20) diff --git a/site/docs/4.5.1/overview/releaseNotesTemplate.md b/site/docs/4.5.1/overview/releaseNotesTemplate.md deleted file mode 100644 index ea00419aa05..00000000000 --- a/site/docs/4.5.1/overview/releaseNotesTemplate.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Apache BookKeeper x.x.x Release Notes ---- - -[provide a summary of this release] - -Apache BookKeeper users are encouraged to upgrade to 4.5.0. The technical details of this release are summarized -below. - -## Highlights - -[List the highlights] - -## Details - -[list to issues list] - diff --git a/site/docs/4.5.1/reference/cli.md b/site/docs/4.5.1/reference/cli.md deleted file mode 100644 index 8beb36ff071..00000000000 --- a/site/docs/4.5.1/reference/cli.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: BookKeeper CLI tool reference -subtitle: A reference guide to the command-line tools that you can use to administer BookKeeper ---- - -{% include cli.html id="bookkeeper" %} - -## The BookKeeper shell - -{% include shell.html %} diff --git a/site/docs/4.5.1/reference/config.md b/site/docs/4.5.1/reference/config.md deleted file mode 100644 index 8997b6b62f0..00000000000 --- a/site/docs/4.5.1/reference/config.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: BookKeeper configuration -subtitle: A reference guide to all of BookKeeper's configurable parameters ---- - - -The table below lists parameters that you can set to configure {% pop bookies %}. All configuration takes place in the `bk_server.conf` file in the `bookkeeper-server/conf` directory of your [BookKeeper installation](../../getting-started/installing). - -{% include config.html id="bk_server" %} diff --git a/site/docs/4.5.1/reference/metrics.md b/site/docs/4.5.1/reference/metrics.md deleted file mode 100644 index 8bd6fe0a165..00000000000 --- a/site/docs/4.5.1/reference/metrics.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: BookKeeper metrics reference ---- diff --git a/site/docs/4.5.1/security/overview.md b/site/docs/4.5.1/security/overview.md deleted file mode 100644 index 62da8edae76..00000000000 --- a/site/docs/4.5.1/security/overview.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -title: BookKeeper Security -next: ../tls ---- - -In the 4.5.0 release, the BookKeeper community added a number of features that can be used, together or separately, to secure a BookKeeper cluster. -The following security measures are currently supported: - -1. Authentication of connections to bookies from clients, using either [TLS](../tls) or [SASL (Kerberos)](../sasl). -2. Authentication of connections from clients, bookies, autorecovery daemons to [ZooKeeper](../zookeeper), when using zookeeper based ledger managers. -3. Encryption of data transferred between bookies and clients, between bookies and autorecovery daemons using [TLS](../tls). - -It’s worth noting that security is optional - non-secured clusters are supported, as well as a mix of authenticated, unauthenticated, encrypted and non-encrypted clients. - -NOTE: currently `authorization` is not yet available in `4.5.0`. The Apache BookKeeper community is looking for adding this feature in subsequent releases. - -## Next Steps - -- [Encryption and Authentication using TLS](../tls) -- [Authentication using SASL](../sasl) -- [ZooKeeper Authentication](../zookeeper) diff --git a/site/docs/4.5.1/security/sasl.md b/site/docs/4.5.1/security/sasl.md deleted file mode 100644 index ffb972a8936..00000000000 --- a/site/docs/4.5.1/security/sasl.md +++ /dev/null @@ -1,202 +0,0 @@ ---- -title: Authentication using SASL -prev: ../tls -next: ../zookeeper ---- - -Bookies support client authentication via SASL. Currently we only support GSSAPI (Kerberos). We will start -with a general description of how to configure `SASL` for bookies, clients and autorecovery daemons, followed -by mechanism-specific details and wrap up with some operational details. - -## SASL configuration for Bookies - -1. Select the mechanisms to enable in the bookies. `GSSAPI` is the only mechanism currently supported by BookKeeper. -2. Add a `JAAS` config file for the selected mechanisms as described in the examples for setting up [GSSAPI (Kerberos)](#kerberos). -3. Pass the `JAAS` config file location as JVM parameter to each Bookie. For example: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookie_jaas.conf - ``` - -4. Enable SASL auth plugin in bookies, by setting `bookieAuthProviderFactoryClass` to `org.apache.bookkeeper.sasl.SASLBookieAuthProviderFactory`. - - - ```shell - bookieAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLBookieAuthProviderFactory - ``` - -5. If you are running `autorecovery` along with bookies, then you want to enable SASL auth plugin for `autorecovery`, by setting - `clientAuthProviderFactoryClass` to `org.apache.bookkeeper.sasl.SASLClientProviderFactory`. - - ```shell - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -6. Follow the steps in [GSSAPI (Kerberos)](#kerberos) to configure SASL. - -#### Important Notes - -1. `Bookie` is a section name in the JAAS file used by each bookie. This section tells the bookie which principal to use - and the location of the keytab where the principal is stored. It allows the bookie to login using the keytab specified in this section. -2. `Auditor` is a section name in the JASS file used by `autorecovery` daemon (it can be co-run with bookies). This section tells the - `autorecovery` daemon which principal to use and the location of the keytab where the principal is stored. It allows the bookie to - login using the keytab specified in this section. -3. The `Client` section is used to authenticate a SASL connection with ZooKeeper. It also allows the bookies to set ACLs on ZooKeeper nodes - which locks these nodes down so that only the bookies can modify it. It is necessary to have the same primary name across all bookies. - If you want to use a section name other than `Client`, set the system property `zookeeper.sasl.client` to the appropriate name - (e.g `-Dzookeeper.sasl.client=ZKClient`). -4. ZooKeeper uses `zookeeper` as the service name by default. If you want to change this, set the system property - `zookeeper.sasl.client.username` to the appropriate name (e.g. `-Dzookeeper.sasl.client.username=zk`). - -## SASL configuration for Clients - -To configure `SASL` authentication on the clients: - -1. Select a `SASL` mechanism for authentication and add a `JAAS` config file for the selected mechanism as described in the examples for - setting up [GSSAPI (Kerberos)](#kerberos). -2. Pass the `JAAS` config file location as JVM parameter to each client JVM. For example: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookkeeper_jaas.conf - ``` - -3. Configure the following properties in bookkeeper `ClientConfiguration`: - - ```shell - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -Follow the steps in [GSSAPI (Kerberos)](#kerberos) to configure SASL for the selected mechanism. - -## Authentication using SASL/Kerberos - -### Prerequisites - -#### Kerberos - -If your organization is already using a Kerberos server (for example, by using `Active Directory`), there is no need to -install a new server just for BookKeeper. Otherwise you will need to install one, your Linux vendor likely has packages -for `Kerberos` and a short guide on how to install and configure it ([Ubuntu](https://help.ubuntu.com/community/Kerberos), -[Redhat](https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/6/html/Managing_Smart_Cards/installing-kerberos.html)). -Note that if you are using Oracle Java, you will need to download JCE policy files for your Java version and copy them to `$JAVA_HOME/jre/lib/security`. - -#### Kerberos Principals - -If you are using the organization’s Kerberos or Active Directory server, ask your Kerberos administrator for a principal -for each Bookie in your cluster and for every operating system user that will access BookKeeper with Kerberos authentication -(via clients and tools). - -If you have installed your own Kerberos, you will need to create these principals yourself using the following commands: - -```shell -sudo /usr/sbin/kadmin.local -q 'addprinc -randkey bookkeeper/{hostname}@{REALM}' -sudo /usr/sbin/kadmin.local -q "ktadd -k /etc/security/keytabs/{keytabname}.keytab bookkeeper/{hostname}@{REALM}" -``` - -##### All hosts must be reachable using hostnames - -It is a *Kerberos* requirement that all your hosts can be resolved with their FQDNs. - -### Configuring Bookies - -1. Add a suitably modified JAAS file similar to the one below to each Bookie’s config directory, let’s call it `bookie_jaas.conf` -for this example (note that each bookie should have its own keytab): - - ``` - Bookie { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookie.keytab" - principal="bookkeeper/bk1.hostname.com@EXAMPLE.COM"; - }; - // ZooKeeper client authentication - Client { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookie.keytab" - principal="bookkeeper/bk1.hostname.com@EXAMPLE.COM"; - }; - // If you are running `autorecovery` along with bookies - Auditor { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookie.keytab" - principal="bookkeeper/bk1.hostname.com@EXAMPLE.COM"; - }; - ``` - - The `Bookie` section in the JAAS file tells the bookie which principal to use and the location of the keytab where this principal is stored. - It allows the bookie to login using the keytab specified in this section. See [notes](#notes) for more details on Zookeeper’s SASL configuration. - -2. Pass the name of the JAAS file as a JVM parameter to each Bookie: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookie_jaas.conf - ``` - - You may also wish to specify the path to the `krb5.conf` file - (see [JDK’s Kerberos Requirements](https://docs.oracle.com/javase/8/docs/technotes/guides/security/jgss/tutorials/KerberosReq.html) for more details): - - ```shell - -Djava.security.krb5.conf=/etc/bookkeeper/krb5.conf - ``` - -3. Make sure the keytabs configured in the JAAS file are readable by the operating system user who is starting the Bookies. - -4. Enable SASL authentication plugin in the bookies by setting following parameters. - - ```shell - bookieAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLBookieAuthProviderFactory - # if you run `autorecovery` along with bookies - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -### Configuring Clients - -To configure SASL authentication on the clients: - -1. Clients will authenticate to the cluster with their own principal (usually with the same name as the user running the client), - so obtain or create these principals as needed. Then create a `JAAS` file for each principal. The `BookKeeper` section describes - how the clients like writers and readers can connect to the Bookies. The following is an example configuration for a client using - a keytab (recommended for long-running processes): - - ``` - BookKeeper { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookkeeper.keytab" - principal="bookkeeper-client-1@EXAMPLE.COM"; - }; - ``` - - -2. Pass the name of the JAAS file as a JVM parameter to the client JVM: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookkeeper_jaas.conf - ``` - - You may also wish to specify the path to the `krb5.conf` file (see - [JDK’s Kerberos Requirements](https://docs.oracle.com/javase/8/docs/technotes/guides/security/jgss/tutorials/KerberosReq.html) for more details). - - ```shell - -Djava.security.krb5.conf=/etc/bookkeeper/krb5.conf - ``` - - -3. Make sure the keytabs configured in the `bookkeeper_jaas.conf` are readable by the operating system user who is starting bookkeeper client. - -4. Enable SASL authentication plugin in the client by setting following parameters. - - ```shell - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -## Enabling Logging for SASL - -To enable SASL debug output, you can set `sun.security.krb5.debug` system property to `true`. - diff --git a/site/docs/4.5.1/security/tls.md b/site/docs/4.5.1/security/tls.md deleted file mode 100644 index cd250ab2aa5..00000000000 --- a/site/docs/4.5.1/security/tls.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -title: Encryption and Authentication using TLS -prev: ../overview -next: ../sasl ---- - -Apache BookKeeper allows clients and autorecovery daemons to communicate over TLS, although this is not enabled by default. - -## Overview - -The bookies need their own key and certificate in order to use TLS. Clients can optionally provide a key and a certificate -for mutual authentication. Each bookie or client can also be configured with a truststore, which is used to -determine which certificates (bookie or client identities) to trust (authenticate). - -The truststore can be configured in many ways. To understand the truststore, consider the following two examples: - -1. the truststore contains one or many certificates; -2. it contains a certificate authority (CA). - -In (1), with a list of certificates, the bookie or client will trust any certificate listed in the truststore. -In (2), with a CA, the bookie or client will trust any certificate that was signed by the CA in the truststore. - -(TBD: benefits) - -## Generate TLS key and certificate - -The first step of deploying TLS is to generate the key and the certificate for each machine in the cluster. -You can use Java’s `keytool` utility to accomplish this task. We will generate the key into a temporary keystore -initially so that we can export and sign it later with CA. - -```shell -keytool -keystore bookie.keystore.jks -alias localhost -validity {validity} -genkey -``` - -You need to specify two parameters in the above command: - -1. `keystore`: the keystore file that stores the certificate. The *keystore* file contains the private key of - the certificate; hence, it needs to be kept safely. -2. `validity`: the valid time of the certificate in days. - -
          -Ensure that common name (CN) matches exactly with the fully qualified domain name (FQDN) of the server. -The client compares the CN with the DNS domain name to ensure that it is indeed connecting to the desired server, not a malicious one. -
          - -## Creating your own CA - -After the first step, each machine in the cluster has a public-private key pair, and a certificate to identify the machine. -The certificate, however, is unsigned, which means that an attacker can create such a certificate to pretend to be any machine. - -Therefore, it is important to prevent forged certificates by signing them for each machine in the cluster. -A `certificate authority (CA)` is responsible for signing certificates. CA works likes a government that issues passports — -the government stamps (signs) each passport so that the passport becomes difficult to forge. Other governments verify the stamps -to ensure the passport is authentic. Similarly, the CA signs the certificates, and the cryptography guarantees that a signed -certificate is computationally difficult to forge. Thus, as long as the CA is a genuine and trusted authority, the clients have -high assurance that they are connecting to the authentic machines. - -```shell -openssl req -new -x509 -keyout ca-key -out ca-cert -days 365 -``` - -The generated CA is simply a *public-private* key pair and certificate, and it is intended to sign other certificates. - -The next step is to add the generated CA to the clients' truststore so that the clients can trust this CA: - -```shell -keytool -keystore bookie.truststore.jks -alias CARoot -import -file ca-cert -``` - -NOTE: If you configure the bookies to require client authentication by setting `sslClientAuthentication` to `true` on the -[bookie config](../../reference/config), then you must also provide a truststore for the bookies and it should have all the CA -certificates that clients keys were signed by. - -```shell -keytool -keystore client.truststore.jks -alias CARoot -import -file ca-cert -``` - -In contrast to the keystore, which stores each machine’s own identity, the truststore of a client stores all the certificates -that the client should trust. Importing a certificate into one’s truststore also means trusting all certificates that are signed -by that certificate. As the analogy above, trusting the government (CA) also means trusting all passports (certificates) that -it has issued. This attribute is called the chain of trust, and it is particularly useful when deploying TLS on a large BookKeeper cluster. -You can sign all certificates in the cluster with a single CA, and have all machines share the same truststore that trusts the CA. -That way all machines can authenticate all other machines. - -## Signing the certificate - -The next step is to sign all certificates in the keystore with the CA we generated. First, you need to export the certificate from the keystore: - -```shell -keytool -keystore bookie.keystore.jks -alias localhost -certreq -file cert-file -``` - -Then sign it with the CA: - -```shell -openssl x509 -req -CA ca-cert -CAkey ca-key -in cert-file -out cert-signed -days {validity} -CAcreateserial -passin pass:{ca-password} -``` - -Finally, you need to import both the certificate of the CA and the signed certificate into the keystore: - -```shell -keytool -keystore bookie.keystore.jks -alias CARoot -import -file ca-cert -keytool -keystore bookie.keystore.jks -alias localhost -import -file cert-signed -``` - -The definitions of the parameters are the following: - -1. `keystore`: the location of the keystore -2. `ca-cert`: the certificate of the CA -3. `ca-key`: the private key of the CA -4. `ca-password`: the passphrase of the CA -5. `cert-file`: the exported, unsigned certificate of the bookie -6. `cert-signed`: the signed certificate of the bookie - -(TBD: add a script to automatically generate truststores and keystores.) - -## Configuring Bookies - -Bookies support TLS for connections on the same service port. In order to enable TLS, you need to configure `tlsProvider` to be either -`JDK` or `OpenSSL`. If `OpenSSL` is configured, it will use `netty-tcnative-boringssl-static`, which loads a corresponding binding according -to the platforms to run bookies. - -> Current `OpenSSL` implementation doesn't depend on the system installed OpenSSL library. If you want to leverage the OpenSSL installed on -the system, you can check [this example](http://netty.io/wiki/forked-tomcat-native.html) on how to replaces the JARs on the classpath with -netty bindings to leverage installed OpenSSL. - -The following TLS configs are needed on the bookie side: - -```shell -tlsProvider=OpenSSL -# key store -tlsKeyStoreType=JKS -tlsKeyStore=/var/private/tls/bookie.keystore.jks -tlsKeyStorePasswordPath=/var/private/tls/bookie.keystore.passwd -# trust store -tlsTrustStoreType=JKS -tlsTrustStore=/var/private/tls/bookie.truststore.jks -tlsTrustStorePasswordPath=/var/private/tls/bookie.truststore.passwd -``` - -NOTE: it is important to restrict access to the store files and corresponding password files via filesystem permissions. - -Optional settings that are worth considering: - -1. tlsClientAuthentication=false: Enable/Disable using TLS for authentication. This config when enabled will authenticate the other end - of the communication channel. It should be enabled on both bookies and clients for mutual TLS. -2. tlsEnabledCipherSuites= A cipher suite is a named combination of authentication, encryption, MAC and key exchange - algorithm used to negotiate the security settings for a network connection using TLS network protocol. By default, - it is null. [OpenSSL Ciphers](https://www.openssl.org/docs/man1.0.2/apps/ciphers.html) - [JDK Ciphers](http://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html#ciphersuites) -3. tlsEnabledProtocols = TLSv1.2,TLSv1.1,TLSv1 (list out the TLS protocols that you are going to accept from clients). - By default, it is not set. - -To verify the bookie's keystore and truststore are setup correctly you can run the following command: - -```shell -openssl s_client -debug -connect localhost:3181 -tls1 -``` - -NOTE: TLSv1 should be listed under `tlsEnabledProtocols`. - -In the output of this command you should see the server's certificate: - -```shell ------BEGIN CERTIFICATE----- -{variable sized random bytes} ------END CERTIFICATE----- -``` - -If the certificate does not show up or if there are any other error messages then your keystore is not setup correctly. - -## Configuring Clients - -TLS is supported only for the new BookKeeper client (BookKeeper versions 4.5.0 and higher), the older clients are not -supported. The configs for TLS will be the same as bookies. - -If client authentication is not required by the bookies, the following is a minimal configuration example: - -```shell -tlsProvider=OpenSSL -clientTrustStore=/var/private/tls/client.truststore.jks -clientTrustStorePasswordPath=/var/private/tls/client.truststore.passwd -``` - -If client authentication is required, then a keystore must be created for each client, and the bookies' truststores must -trust the certificate in the client's keystore. This may be done using commands that are similar to what we used for -the [bookie keystore](#bookie-keystore). - -And the following must also be configured: - -```shell -tlsClientAuthentication=true -clientKeyStore=/var/private/tls/client.keystore.jks -clientKeyStorePasswordPath=/var/private/tls/client.keystore.passwd -``` - -NOTE: it is important to restrict access to the store files and corresponding password files via filesystem permissions. - -(TBD: add example to use tls in bin/bookkeeper script?) - -## Enabling TLS Logging - -You can enable TLS debug logging at the JVM level by starting the bookies and/or clients with `javax.net.debug` system property. For example: - -```shell --Djavax.net.debug=all -``` - -You can find more details on this in [Oracle documentation](http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/ReadDebug.html) on -[debugging SSL/TLS connections](http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/ReadDebug.html). diff --git a/site/docs/4.5.1/security/zookeeper.md b/site/docs/4.5.1/security/zookeeper.md deleted file mode 100644 index e16be69a1d3..00000000000 --- a/site/docs/4.5.1/security/zookeeper.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: ZooKeeper Authentication -prev: ../sasl ---- - -## New Clusters - -To enable `ZooKeeper` authentication on Bookies or Clients, there are two necessary steps: - -1. Create a `JAAS` login file and set the appropriate system property to point to it as described in [GSSAPI (Kerberos)](../sasl#notes). -2. Set the configuration property `zkEnableSecurity` in each bookie to `true`. - -The metadata stored in `ZooKeeper` is such that only certain clients will be able to modify and read the corresponding znodes. -The rationale behind this decision is that the data stored in ZooKeeper is not sensitive, but inappropriate manipulation of znodes can cause cluster -disruption. - -## Migrating Clusters - -If you are running a version of BookKeeper that does not support security or simply with security disabled, and you want to make the cluster secure, -then you need to execute the following steps to enable ZooKeeper authentication with minimal disruption to your operations. - -1. Perform a rolling restart setting the `JAAS` login file, which enables bookie or clients to authenticate. At the end of the rolling restart, - bookies (or clients) are able to manipulate znodes with strict ACLs, but they will not create znodes with those ACLs. -2. Perform a second rolling restart of bookies, this time setting the configuration parameter `zkEnableSecurity` to true, which enables the use - of secure ACLs when creating znodes. -3. Currently we don't have provide a tool to set acls on old znodes. You are recommended to set it manually using ZooKeeper tools. - -It is also possible to turn off authentication in a secured cluster. To do it, follow these steps: - -1. Perform a rolling restart of bookies setting the `JAAS` login file, which enable bookies to authenticate, but setting `zkEnableSecurity` to `false`. - At the end of rolling restart, bookies stop creating znodes with secure ACLs, but are still able to authenticate and manipulate all znodes. -2. You can use ZooKeeper tools to manually reset all ACLs under the znode set in `zkLedgersRootPath`, which defaults to `/ledgers`. -3. Perform a second rolling restart of bookies, this time omitting the system property that sets the `JAAS` login file. - -## Migrating the ZooKeeper ensemble - -It is also necessary to enable authentication on the `ZooKeeper` ensemble. To do it, we need to perform a rolling restart of the ensemble and -set a few properties. Please refer to the ZooKeeper documentation for more details. - -1. [Apache ZooKeeper Documentation](http://zookeeper.apache.org/doc/r3.4.6/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) -2. [Apache ZooKeeper Wiki](https://cwiki.apache.org/confluence/display/ZOOKEEPER/Zookeeper+and+SASL) diff --git a/site/docs/4.6.0/admin/autorecovery.md b/site/docs/4.6.0/admin/autorecovery.md deleted file mode 100644 index b1dd078f9b2..00000000000 --- a/site/docs/4.6.0/admin/autorecovery.md +++ /dev/null @@ -1,128 +0,0 @@ ---- -title: Using AutoRecovery ---- - -When a {% pop bookie %} crashes, all {% pop ledgers %} on that bookie become under-replicated. In order to bring all ledgers in your BookKeeper cluster back to full replication, you'll need to *recover* the data from any offline bookies. There are two ways to recover bookies' data: - -1. Using [manual recovery](#manual-recovery) -1. Automatically, using [*AutoRecovery*](#autorecovery) - -## Manual recovery - -You can manually recover failed bookies using the [`bookkeeper`](../../reference/cli) command-line tool. You need to specify: - -* the `shell recover` option -* an IP and port for your BookKeeper cluster's ZooKeeper ensemble -* the IP and port for the failed bookie - -Here's an example: - -```bash -$ bookkeeper-server/bin/bookkeeper shell recover \ - zk1.example.com:2181 \ # IP and port for ZooKeeper - 192.168.1.10:3181 # IP and port for the failed bookie -``` - -If you wish, you can also specify which bookie you'd like to rereplicate to. Here's an example: - -```bash -$ bookkeeper-server/bin/bookkeeper shell recover \ - zk1.example.com:2181 \ # IP and port for ZooKeeper - 192.168.1.10:3181 \ # IP and port for the failed bookie - 192.168.1.11:3181 # IP and port for the bookie to rereplicate to -``` - -### The manual recovery process - -When you initiate a manual recovery process, the following happens: - -1. The client (the process running ) reads the metadata of active ledgers from ZooKeeper. -1. The ledgers that contain fragments from the failed bookie in their ensemble are selected. -1. A recovery process is initiated for each ledger in this list and the rereplication process is run for each ledger. -1. Once all the ledgers are marked as fully replicated, bookie recovery is finished. - -## AutoRecovery - -AutoRecovery is a process that: - -* automatically detects when a {% pop bookie %} in your BookKeeper cluster has become unavailable and then -* rereplicates all the {% pop ledgers %} that were stored on that bookie. - -AutoRecovery can be run in two ways: - -1. On dedicated nodes in your BookKeeper cluster -1. On the same machines on which your bookies are running - -## Running AutoRecovery - -You can start up AutoRecovery using the [`autorecovery`](../../reference/cli#bookkeeper-autorecovery) command of the [`bookkeeper`](../../reference/cli) CLI tool. - -```bash -$ bookkeeper-server/bin/bookkeeper autorecovery -``` - -> The most important thing to ensure when starting up AutoRecovery is that the ZooKeeper connection string specified by the [`zkServers`](../../reference/config#zkServers) parameter points to the right ZooKeeper cluster. - -If you start up AutoRecovery on a machine that is already running a bookie, then the AutoRecovery process will run alongside the bookie on a separate thread. - -You can also start up AutoRecovery on a fresh machine if you'd like to create a dedicated cluster of AutoRecovery nodes. - -## Configuration - -There are a handful of AutoRecovery-related configs in the [`bk_server.conf`](../../reference/config) configuration file. For a listing of those configs, see [AutoRecovery settings](../../reference/config#autorecovery-settings). - -## Disable AutoRecovery - -You can disable AutoRecovery at any time, for example during maintenance. Disabling AutoRecovery ensures that bookies' data isn't unnecessarily rereplicated when the bookie is only taken down for a short period of time, for example when the bookie is being updated or the configuration if being changed. - -You can disable AutoRecover using the [`bookkeeper`](../../reference/cli#bookkeeper-shell-autorecovery) CLI tool: - -```bash -$ bookkeeper-server/bin/bookkeeper shell autorecovery -disable -``` - -Once disabled, you can reenable AutoRecovery using the [`enable`](../../reference/cli#bookkeeper-shell-autorecovery) shell command: - -```bash -$ bookkeeper-server/bin/bookkeeper shell autorecovery -enable -``` - -## AutoRecovery architecture - -AutoRecovery has two components: - -1. The [**auditor**](#auditor) (see the [`Auditor`](../../api/javadoc/org/apache/bookkeeper/replication/Auditor.html) class) is a singleton node that watches bookies to see if they fail and creates rereplication tasks for the ledgers on failed bookies. -1. The [**replication worker**](#replication-worker) (see the [`ReplicationWorker`](../../api/javadoc/org/apache/bookkeeper/replication/ReplicationWorker.html) class) runs on each bookie and executes rereplication tasks provided by the auditor. - -Both of these components run as threads in the [`AutoRecoveryMain`](../../api/javadoc/org/apache/bookkeeper/replication/AutoRecoveryMain) process, which runs on each bookie in the cluster. All recovery nodes participate in leader election---using ZooKeeper---to decide which node becomes the auditor. Nodes that fail to become the auditor watch the elected auditor and run an election process again if they see that the auditor node has failed. - -### Auditor - -The auditor watches all bookies in the cluster that are registered with ZooKeeper. Bookies register with ZooKeeper at startup. If the bookie crashes or is killed, the bookie's registration in ZooKeeper disappears and the auditor is notified of the change in the list of registered bookies. - -When the auditor sees that a bookie has disappeared, it immediately scans the complete {% pop ledger %} list to find ledgers that have data stored on the failed bookie. Once it has a list of ledgers for that bookie, the auditor will publish a rereplication task for each ledger under the `/underreplicated/` [znode](https://zookeeper.apache.org/doc/current/zookeeperOver.html) in ZooKeeper. - -### Replication Worker - -Each replication worker watches for tasks being published by the auditor on the `/underreplicated/` znode in ZooKeeper. When a new task appears, the replication worker will try to get a lock on it. If it cannot acquire the lock, it will try the next entry. The locks are implemented using ZooKeeper ephemeral znodes. - -The replication worker will scan through the rereplication task's ledger for fragments of which its local bookie is not a member. When it finds fragments matching this criterion, it will replicate the entries of that fragment to the local bookie. If, after this process, the ledger is fully replicated, the ledgers entry under /underreplicated/ is deleted, and the lock is released. If there is a problem replicating, or there are still fragments in the ledger which are still underreplicated (due to the local bookie already being part of the ensemble for the fragment), then the lock is simply released. - -If the replication worker finds a fragment which needs rereplication, but does not have a defined endpoint (i.e. the final fragment of a ledger currently being written to), it will wait for a grace period before attempting rereplication. If the fragment needing rereplication still does not have a defined endpoint, the ledger is fenced and rereplication then takes place. - -This avoids the situation in which a client is writing to a ledger and one of the bookies goes down, but the client has not written an entry to that bookie before rereplication takes place. The client could continue writing to the old fragment, even though the ensemble for the fragment had changed. This could lead to data loss. Fencing prevents this scenario from happening. In the normal case, the client will try to write to the failed bookie within the grace period, and will have started a new fragment before rereplication starts. - -You can configure this grace period using the [`openLedgerRereplicationGracePeriod`](../../reference/config#openLedgerRereplicationGracePeriod) parameter. - -### The rereplication process - -The ledger rereplication process happens in these steps: - -1. The client goes through all ledger fragments in the ledger, selecting those that contain the failed bookie. -1. A recovery process is initiated for each ledger fragment in this list. - 1. The client selects a bookie to which all entries in the ledger fragment will be replicated; In the case of autorecovery, this will always be the local bookie. - 1. The client reads entries that belong to the ledger fragment from other bookies in the ensemble and writes them to the selected bookie. - 1. Once all entries have been replicated, the zookeeper metadata for the fragment is updated to reflect the new ensemble. - 1. The fragment is marked as fully replicated in the recovery tool. -1. Once all ledger fragments are marked as fully replicated, the ledger is marked as fully replicated. - diff --git a/site/docs/4.6.0/admin/bookies.md b/site/docs/4.6.0/admin/bookies.md deleted file mode 100644 index 1b0427dae3c..00000000000 --- a/site/docs/4.6.0/admin/bookies.md +++ /dev/null @@ -1,180 +0,0 @@ ---- -title: BookKeeper administration -subtitle: A guide to deploying and administering BookKeeper ---- - -This document is a guide to deploying, administering, and maintaining BookKeeper. It also discusses [best practices](#best-practices) and [common problems](#common-problems). - -## Requirements - -A typical BookKeeper installation consists of an ensemble of {% pop bookies %} and a ZooKeeper quorum. The exact number of bookies depends on the quorum mode that you choose, desired throughput, and the number of clients using the installation simultaneously. - -The minimum number of bookies depends on the type of installation: - -* For *self-verifying* entries you should run at least three bookies. In this mode, clients store a message authentication code along with each {% pop entry %}. -* For *generic* entries you should run at least four - -There is no upper limit on the number of bookies that you can run in a single ensemble. - -### Performance - -To achieve optimal performance, BookKeeper requires each server to have at least two disks. It's possible to run a bookie with a single disk but performance will be significantly degraded. - -### ZooKeeper - -There is no constraint on the number of ZooKeeper nodes you can run with BookKeeper. A single machine running ZooKeeper in [standalone mode](https://zookeeper.apache.org/doc/current/zookeeperStarted.html#sc_InstallingSingleMode) is sufficient for BookKeeper, although for the sake of higher resilience we recommend running ZooKeeper in [quorum mode](https://zookeeper.apache.org/doc/current/zookeeperStarted.html#sc_RunningReplicatedZooKeeper) with multiple servers. - -## Starting and stopping bookies - -You can run bookies either in the foreground or in the background, using [nohup](https://en.wikipedia.org/wiki/Nohup). You can also run [local bookies](#local-bookie) for development purposes. - -To start a bookie in the foreground, use the [`bookie`](../../reference/cli#bookkeeper-bookie) command of the [`bookkeeper`](../../reference/cli#bookkeeper) CLI tool: - -```shell -$ bookkeeper-server/bin/bookkeeper bookie -``` - -To start a bookie in the background, use the [`bookkeeper-daemon.sh`](../../reference/cli#bookkeeper-daemon.sh) script and run `start bookie`: - -```shell -$ bookkeeper-server/bin/bookkeeper-daemon.sh start bookie -``` - -### Local bookies - -The instructions above showed you how to run bookies intended for production use. If you'd like to experiment with ensembles of bookies locally, you can use the [`localbookie`](../../reference/cli#bookkeeper-localbookie) command of the `bookkeeper` CLI tool and specify the number of bookies you'd like to run. - -This would spin up a local ensemble of 6 bookies: - -```shell -$ bookkeeper-server/bin/bookkeeper localbookie 6 -``` - -> When you run a local bookie ensemble, all bookies run in a single JVM process. - -## Configuring bookies - -There's a wide variety of parameters that you can set in the bookie configuration file in `bookkeeper-server/conf/bk_server.conf` of your [BookKeeper installation](../../reference/config). A full listing can be found in [Bookie configuration](../../reference/config). - -Some of the more important parameters to be aware of: - -Parameter | Description | Default -:---------|:------------|:------- -`bookiePort` | The TCP port that the bookie listens on | `3181` -`zkServers` | A comma-separated list of ZooKeeper servers in `hostname:port` format | `localhost:2181` -`journalDirectory` | The directory where the [log device](../../getting-started/concepts#log-device) stores the bookie's write-ahead log (WAL) | `/tmp/bk-txn` -`ledgerDirectories` | The directories where the [ledger device](../../getting-started/concepts#ledger-device) stores the bookie's ledger entries (as a comma-separated list) | `/tmp/bk-data` - -> Ideally, the directories specified `journalDirectory` and `ledgerDirectories` should be on difference devices. - -## Logging - -BookKeeper uses [slf4j](http://www.slf4j.org/) for logging, with [log4j](https://logging.apache.org/log4j/2.x/) bindings enabled by default. - -To enable logging for a bookie, create a `log4j.properties` file and point the `BOOKIE_LOG_CONF` environment variable to the configuration file. Here's an example: - -```shell -$ export BOOKIE_LOG_CONF=/some/path/log4j.properties -$ bookkeeper-server/bin/bookkeeper bookie -``` - -## Upgrading - -From time to time you may need to make changes to the filesystem layout of bookies---changes that are incompatible with previous versions of BookKeeper and require that directories used with previous versions are upgraded. If a filesystem upgrade is required when updating BookKeeper, the bookie will fail to start and return an error like this: - -``` -2017-05-25 10:41:50,494 - ERROR - [main:Bookie@246] - Directory layout version is less than 3, upgrade needed -``` - -BookKeeper provides a utility for upgrading the filesystem. You can perform an upgrade using the [`upgrade`](../../reference/cli#bookkeeper-upgrade) command of the `bookkeeper` CLI tool. When running `bookkeeper upgrade` you need to specify one of three flags: - -Flag | Action -:----|:------ -`--upgrade` | Performs an upgrade -`--rollback` | Performs a rollback to the initial filesystem version -`--finalize` | Marks the upgrade as complete - -### Upgrade pattern - -A standard upgrade pattern is to run an upgrade... - -```shell -$ bookkeeper-server/bin/bookkeeper upgrade --upgrade -``` - -...then check that everything is working normally, then kill the bookie. If everything is okay, finalize the upgrade... - -```shell -$ bookkeeper-server/bin/bookkeeper upgrade --finalize -``` - -...and then restart the server: - -```shell -$ bookkeeper-server/bin/bookkeeper bookie -``` - -If something has gone wrong, you can always perform a rollback: - -```shell -$ bookkeeper-server/bin/bookkeeper upgrade --rollback -``` - -## Formatting - -You can format bookie metadata in ZooKeeper using the [`metaformat`](../../reference/cli#bookkeeper-shell-metaformat) command of the [BookKeeper shell](../../reference/cli#the-bookkeeper-shell). - -By default, formatting is done in interactive mode, which prompts you to confirm the format operation if old data exists. You can disable confirmation using the `-nonInteractive` flag. If old data does exist, the format operation will abort *unless* you set the `-force` flag. Here's an example: - -```shell -$ bookkeeper-server/bin/bookkeeper shell metaformat -``` - -You can format the local filesystem data on a bookie using the [`bookieformat`](../../reference/cli#bookkeeper-shell-bookieformat) command on each bookie. Here's an example: - -```shell -$ bookkeeper-server/bin/bookkeeper shell bookieformat -``` - -> The `-force` and `-nonInteractive` flags are also available for the `bookieformat` command. - -## AutoRecovery - -For a guide to AutoRecovery in BookKeeper, see [this doc](../autorecovery). - -## Missing disks or directories - -Accidentally replacing disks or removing directories can cause a bookie to fail while trying to read a ledger fragment that, according to the ledger metadata, exists on the bookie. For this reason, when a bookie is started for the first time, its disk configuration is fixed for the lifetime of that bookie. Any change to its disk configuration, such as a crashed disk or an accidental configuration change, will result in the bookie being unable to start. That will throw an error like this: - -``` -2017-05-29 18:19:13,790 - ERROR - [main:BookieServer314] – Exception running bookie server : @ -org.apache.bookkeeper.bookie.BookieException$InvalidCookieException -.......at org.apache.bookkeeper.bookie.Cookie.verify(Cookie.java:82) -.......at org.apache.bookkeeper.bookie.Bookie.checkEnvironment(Bookie.java:275) -.......at org.apache.bookkeeper.bookie.Bookie.(Bookie.java:351) -``` - -If the change was the result of an accidental configuration change, the change can be reverted and the bookie can be restarted. However, if the change *cannot* be reverted, such as is the case when you want to add a new disk or replace a disk, the bookie must be wiped and then all its data re-replicated onto it. - -1. Increment the [`bookiePort`](../../reference/config#bookiePort) parameter in the [`bk_server.conf`](../../reference/config) -1. Ensure that all directories specified by [`journalDirectory`](../../reference/config#journalDirectory) and [`ledgerDirectories`](../../reference/config#ledgerDirectories) are empty. -1. [Start the bookie](#starting-and-stopping-bookies). -1. Run the following command to re-replicate the data: - - ```bash - $ bookkeeper-server/bin/bookkeeper shell recover \ - \ - \ - - ``` - - The ZooKeeper server, old bookie, and new bookie, are all identified by their external IP and `bookiePort` (3181 by default). Here's an example: - - ```bash - $ bookkeeper-server/bin/bookkeeper shell recover \ - zk1.example.com \ - 192.168.1.10:3181 \ - 192.168.1.10:3181 - ``` - - See the [AutoRecovery](../autorecovery) documentation for more info on the re-replication process. diff --git a/site/docs/4.6.0/admin/geo-replication.md b/site/docs/4.6.0/admin/geo-replication.md deleted file mode 100644 index 38b972345ef..00000000000 --- a/site/docs/4.6.0/admin/geo-replication.md +++ /dev/null @@ -1,22 +0,0 @@ ---- -title: Geo-replication -subtitle: Replicate data across BookKeeper clusters ---- - -*Geo-replication* is the replication of data across BookKeeper clusters. In order to enable geo-replication for a group of BookKeeper clusters, - -## Global ZooKeeper - -Setting up a global ZooKeeper quorum is a lot like setting up a cluster-specific quorum. The crucial difference is that - -### Geo-replication across three clusters - -Let's say that you want to set up geo-replication across clusters in regions A, B, and C. First, the BookKeeper clusters in each region must have their own local (cluster-specific) ZooKeeper quorum. - -> BookKeeper clusters use global ZooKeeper only for metadata storage. Traffic from bookies to ZooKeeper should thus be fairly light in general. - -The crucial difference between using cluster-specific ZooKeeper and global ZooKeeper is that {% pop bookies %} is that you need to point all bookies to use the global ZooKeeper setup. - -## Region-aware placement polocy - -## Autorecovery diff --git a/site/docs/4.6.0/admin/http.md b/site/docs/4.6.0/admin/http.md deleted file mode 100644 index 0097adc62b8..00000000000 --- a/site/docs/4.6.0/admin/http.md +++ /dev/null @@ -1,394 +0,0 @@ ---- -title: BookKeeper Admin REST API ---- - -This document introduces BookKeeper HTTP endpoints, which can be used for BookKeeper administration. -To use this feature, set `httpServerEnabled` to `true` in file `conf/bk_server.conf`. - -## All the endpoints - -Currently all the HTTP endpoints could be divided into these 4 components: -1. Heartbeat: heartbeat for a specific bookie. -1. Config: doing the server configuration for a specific bookie. -1. Ledger: HTTP endpoints related to ledgers. -1. Bookie: HTTP endpoints related to bookies. -1. AutoRecovery: HTTP endpoints related to auto recovery. - -## Heartbeat - -### Endpoint: /heartbeat -* Method: GET -* Description: Get heartbeat status for a specific bookie -* Response: - -| Code | Description | -|:-------|:------------| -|200 | Successful operation | - -## Config - -### Endpoint: /api/v1/config/server_config -1. Method: GET - * Description: Get value of all configured values overridden on local server config - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | -1. Method: PUT - * Description: Update a local server config - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |configName | String | Yes | Configuration name(key) | - |configValue | String | Yes | Configuration value(value) | - * Body: - ```json - { - "configName1": "configValue1", - "configName2": "configValue2" - } - ``` - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -## Ledger - -### Endpoint: /api/v1/ledger/delete/?ledger_id=<ledger_id> -1. Method: DELETE - * Description: Delete a ledger. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |ledger_id | Long | Yes | ledger id of the ledger. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -### Endpoint: /api/v1/ledger/list/?print_metadata=<metadata> -1. Method: GET - * Description: List all the ledgers. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |print_metadata | Boolean | No | whether print out metadata | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "ledgerId1": "ledgerMetadata1", - "ledgerId2": "ledgerMetadata2", - ... - } - ``` - -### Endpoint: /api/v1/ledger/metadata/?ledger_id=<ledger_id> -1. Method: GET - * Description: Get the metadata of a ledger. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |ledger_id | Long | Yes | ledger id of the ledger. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "ledgerId1": "ledgerMetadata1" - } - ``` - -### Endpoint: /api/v1/ledger/read/?ledger_id=<ledger_id>&start_entry_id=<start_entry_id>&end_entry_id=<end_entry_id> -1. Method: GET - * Description: Read a range of entries from ledger. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |ledger_id | Long | Yes| ledger id of the ledger. | - |start_entry_id | Long | No | start entry id of read range. | - |end_entry_id | Long | No | end entry id of read range. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "entryId1": "entry content 1", - "entryId2": "entry content 2", - ... - } - ``` - -## Bookie - -### Endpoint: /api/v1/bookie/list_bookies/?type=<type>&print_hostnames=<hostnames> -1. Method: GET - * Description: Get all the available bookies. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |type | String | Yes | value: "rw" or "ro" , list read-write/read-only bookies. | - |print_hostnames | Boolean | No | whether print hostname of bookies. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "bookieSocketAddress1": "hostname1", - "bookieSocketAddress2": "hostname2", - ... - } - ``` - -### Endpoint: /api/v1/bookie/list_bookie_info -1. Method: GET - * Description: Get bookies disk usage info of this cluster. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "bookieAddress" : {free: xxx, total: xxx}, - "bookieAddress" : {free: xxx, total: xxx}, - ... - "clusterInfo" : {total_free: xxx, total: xxx} - } - ``` - -### Endpoint: /api/v1/bookie/last_log_mark -1. Method: GET - * Description: Get the last log marker. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - JournalId1 : position1, - JournalId2 : position2, - ... - } - ``` - -### Endpoint: /api/v1/bookie/list_disk_file/?file_type=<type> -1. Method: GET - * Description: Get all the files on disk of current bookie. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |type | String | No | file type: journal/entrylog/index. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "journal files" : "filename1 filename2 ...", - "entrylog files" : "filename1 filename2...", - "index files" : "filename1 filename2 ..." - } - ``` - -### Endpoint: /api/v1/bookie/expand_storage -1. Method: PUT - * Description: Expand storage for a bookie. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -## Auto recovery - -### Endpoint: /api/v1/autorecovery/bookie/ -1. Method: PUT - * Description: Ledger data recovery for failed bookie - * Body: - ```json - { - "bookie_src": [ "bookie_src1", "bookie_src2"... ], - "bookie_dest": [ "bookie_dest1", "bookie_dest2"... ], - "delete_cookie": - } - ``` - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |bookie_src | Strings | Yes | bookie source to recovery | - |bookie_dest | Strings | No | bookie data recovery destination | - |delete_cookie | Boolean | No | Whether delete cookie | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -### Endpoint: /api/v1/autorecovery/list_under_replicated_ledger/?missingreplica=<bookie_address>&excludingmissingreplica=<bookie_address> -1. Method: GET - * Description: Get all under replicated ledgers. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |missingreplica | String | No | missing replica bookieId | - |excludingmissingreplica | String | No | exclude missing replica bookieId | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - [ledgerId1, ledgerId2...] - } - ``` - -### Endpoint: /api/v1/autorecovery/who_is_auditor -1. Method: GET - * Description: Get auditor bookie id. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "Auditor": "hostname/hostAddress:Port" - } - ``` - -### Endpoint: /api/v1/autorecovery/trigger_audit -1. Method: PUT - * Description: Force trigger audit by resting the lostBookieRecoveryDelay. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -### Endpoint: /api/v1/autorecovery/lost_bookie_recovery_delay -1. Method: GET - * Description: Get lostBookieRecoveryDelay value in seconds. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -1. Method: PUT - * Description: Set lostBookieRecoveryDelay value in seconds. - * Body: - ```json - { - "delay_seconds": - } - ``` - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - | delay_seconds | Long | Yes | set delay value in seconds. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -### Endpoint: /api/v1/autorecovery/decommission -1. Method: PUT - * Description: Decommission Bookie, Force trigger Audit task and make sure all the ledgers stored in the decommissioning bookie are replicated. - * Body: - ```json - { - "bookie_src": - } - ``` - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - | bookie_src | String | Yes | Bookie src to decommission.. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | diff --git a/site/docs/4.6.0/admin/metrics.md b/site/docs/4.6.0/admin/metrics.md deleted file mode 100644 index 142df3dcd2d..00000000000 --- a/site/docs/4.6.0/admin/metrics.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: Metric collection ---- - -BookKeeper enables metrics collection through a variety of [stats providers](#stats-providers). - -> For a full listing of available metrics, see the [Metrics](../../reference/metrics) reference doc. - -## Stats providers - -BookKeeper has stats provider implementations for four five sinks: - -Provider | Provider class name -:--------|:------------------- -[Codahale Metrics](https://mvnrepository.com/artifact/org.apache.bookkeeper.stats/codahale-metrics-provider) | `org.apache.bookkeeper.stats.CodahaleMetricsProvider` -[Prometheus](https://prometheus.io/) | `org.apache.bookkeeper.stats.prometheus.PrometheusMetricsProvider` -[Finagle](https://twitter.github.io/finagle/guide/Metrics.html) | `org.apache.bookkeeper.stats.FinagleStatsProvider` -[Ostrich](https://github.com/twitter/ostrich) | `org.apache.bookkeeper.stats.OstrichProvider` -[Twitter Science Provider](https://mvnrepository.com/artifact/org.apache.bookkeeper.stats/twitter-science-provider) | `org.apache.bookkeeper.stats.TwitterStatsProvider` - -> The [Codahale Metrics]({{ site.github_master }}/bookkeeper-stats-providers/codahale-metrics-provider) stats provider is the default provider. - -## Enabling stats providers in bookies - -There are two stats-related [configuration parameters](../../reference/config#statistics) available for bookies: - -Parameter | Description | Default -:---------|:------------|:------- -`enableStatistics` | Whether statistics are enabled for the bookie | `false` -`statsProviderClass` | The stats provider class used by the bookie | `org.apache.bookkeeper.stats.CodahaleMetricsProvider` - - -To enable stats: - -* set the `enableStatistics` parameter to `true` -* set `statsProviderClass` to the desired provider (see the [table above](#stats-providers) for a listing of classes) - - diff --git a/site/docs/4.6.0/admin/perf.md b/site/docs/4.6.0/admin/perf.md deleted file mode 100644 index 82956326e5d..00000000000 --- a/site/docs/4.6.0/admin/perf.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: Performance tuning ---- diff --git a/site/docs/4.6.0/admin/placement.md b/site/docs/4.6.0/admin/placement.md deleted file mode 100644 index ded456e1aea..00000000000 --- a/site/docs/4.6.0/admin/placement.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: Customized placement policies ---- diff --git a/site/docs/4.6.0/admin/upgrade.md b/site/docs/4.6.0/admin/upgrade.md deleted file mode 100644 index 1aa84a4ab40..00000000000 --- a/site/docs/4.6.0/admin/upgrade.md +++ /dev/null @@ -1,76 +0,0 @@ ---- -title: Upgrade ---- - -> If you have questions about upgrades (or need help), please feel free to reach out to us by [mailing list]({{ site.baseurl }}community/mailing-lists) or [Slack Channel]({{ site.baseurl }}community/slack). - -## Overview - -Consider the below guidelines in preparation for upgrading. - -- Always back up all your configuration files before upgrading. -- Read through the documentation and draft an upgrade plan that matches your specific requirements and environment before starting the upgrade process. - Put differently, don't start working through the guide on a live cluster. Read guide entirely, make a plan, then execute the plan. -- Pay careful consideration to the order in which components are upgraded. In general, you need to upgrade bookies first and then upgrade your clients. -- If autorecovery is running along with bookies, you need to pay attention to the upgrade sequence. -- Read the release notes carefully for each release. They contain not only information about noteworthy features, but also changes to configurations - that may impact your upgrade. -- Always upgrade one or a small set of bookies to canary new version before upgraing all bookies in your cluster. - -## Canary - -It is wise to canary an upgraded version in one or small set of bookies before upgrading all bookies in your live cluster. - -You can follow below steps on how to canary a upgraded version: - -1. Stop a Bookie. -2. Upgrade the binary and configuration. -3. Start the Bookie in `ReadOnly` mode. This can be used to verify if the Bookie of this new version can run well for read workload. -4. Once the Bookie is running at `ReadOnly` mode successfully for a while, restart the Bookie in `Write/Read` mode. -5. After step 4, the Bookie will serve both write and read traffic. - -### Rollback Canaries - -If problems occur during canarying an upgraded version, you can simply take down the problematic Bookie node. The remain bookies in the old cluster -will repair this problematic bookie node by autorecovery. Nothing needs to be worried about. - -## Upgrade Steps - -Once you determined a version is safe to upgrade in a few nodes in your cluster, you can perform following steps to upgrade all bookies in your cluster. - -1. Determine if autorecovery is running along with bookies. If yes, check if the clients (either new clients with new binary or old clients with new configurations) -are allowed to talk to old bookies; if clients are not allowed to talk to old bookies, please [disable autorecovery](../../reference/cli/#autorecovery-1) during upgrade. -2. Decide on performing a rolling upgrade or a downtime upgrade. -3. Upgrade all Bookies (more below) -4. If autorecovery was disabled during upgrade, [enable autorecovery](../../reference/cli/#autorecovery-1). -5. After all bookies are upgraded, build applications that use `BookKeeper client` against the new bookkeeper libraries and deploy the new versions. - -### Upgrade Bookies - -In a rolling upgrade scenario, upgrade one Bookie at a time. In a downtime upgrade scenario, take the entire cluster down, upgrade each Bookie, then start the cluster. - -For each Bookie: - -1. Stop the bookie. -2. Upgrade the software (either new binary or new configuration) -2. Start the bookie. - -## Upgrade Guides - -We describes the general upgrade method in Apache BookKeeper as above. We will cover the details for individual versions. - -### 4.5.x to 4.6.x upgrade - -There isn't any protocol related backward compabilities changes in 4.6.x. So you can follow the general upgrade sequence to upgrade from 4.5.x to 4.6.x. - -### 4.4.x to 4.5.x upgrade - -There isn't any protocol related backward compabilities changes in 4.5.0. So you can follow the general upgrade sequence to upgrade from 4.4.x to 4.5.x. -However, we list a list of things that you might want to know. - -1. 4.5.x upgrades netty from 3.x to 4.x. The memory usage pattern might be changed a bit. Netty 4 uses more direct memory. Please pay attention to your memory usage - and adjust the JVM settings accordingly. -2. `multi journals` is a non-rollbackable feature. If you configure a bookie to use multiple journals on 4.5.x you can not roll the bookie back to use 4.4.x. You have - to take a bookie out and recover it if you want to rollback to 4.4.x. - -If you are planning to upgrade a non-secured cluster to a secured cluster enabling security features in 4.5.0, please read [BookKeeper Security](../../security/overview) for more details. diff --git a/site/docs/4.6.0/api/distributedlog-api.md b/site/docs/4.6.0/api/distributedlog-api.md deleted file mode 100644 index cbd8b4b9888..00000000000 --- a/site/docs/4.6.0/api/distributedlog-api.md +++ /dev/null @@ -1,395 +0,0 @@ ---- -title: DistributedLog -subtitle: A higher-level API for managing BookKeeper entries ---- - -> DistributedLog began its life as a separate project under the Apache Foundation. It was merged into BookKeeper in 2017. - -The DistributedLog API is an easy-to-use interface for managing BookKeeper entries that enables you to use BookKeeper without needing to interact with [ledgers](../ledger-api) directly. - -DistributedLog (DL) maintains sequences of records in categories called *logs* (aka *log streams*). *Writers* append records to DL logs, while *readers* fetch and process those records. - -## Architecture - -The diagram below illustrates how the DistributedLog API works with BookKeeper: - -![DistributedLog API]({{ site.baseurl }}img/distributedlog.png) - -## Logs - -A *log* in DistributedLog is an ordered, immutable sequence of *log records*. - -The diagram below illustrates the anatomy of a log stream: - -![DistributedLog log]({{ site.baseurl }}img/logs.png) - -### Log records - -Each log record is a sequence of bytes. Applications are responsible for serializing and deserializing byte sequences stored in log records. - -Log records are written sequentially into a *log stream* and assigned with a a unique sequence number called a DLSN (DistributedLog Sequence Number). - -In addition to a DLSN, applications can assign their own sequence number when constructing log records. Application-defined sequence numbers are known as *TransactionIDs* (or *txid*). Either a DLSN or a TransactionID can be used for positioning readers to start reading from a specific log record. - -### Log segments - -Each log is broken down into *log segments* that contain subsets of records. Log segments are distributed and stored in BookKeeper. DistributedLog rolls the log segments based on the configured *rolling policy*, which be either - -* a configurable period of time (such as every 2 hours), or -* a configurable maximum size (such as every 128 MB). - -The data in logs is divided up into equally sized log segments and distributed evenly across {% pop bookies %}. This allows logs to scale beyond a size that would fit on a single server and spreads read traffic across the cluster. - -### Namespaces - -Log streams that belong to the same organization are typically categorized and managed under a *namespace*. DistributedLog namespaces essentially enable applications to locate log streams. Applications can perform the following actions under a namespace: - -* create streams -* delete streams -* truncate streams to a given sequence number (either a DLSN or a TransactionID) - -## Writers - -Through the DistributedLog API, writers write data into logs of their choice. All records are appended into logs in order. The sequencing is performed by the writer, which means that there is only one active writer for a log at any given time. - -DistributedLog guarantees correctness when two writers attempt to write to the same log when a network partition occurs using a *fencing* mechanism in the log segment store. - -### Write Proxy - -Log writers are served and managed in a service tier called the *Write Proxy* (see the diagram [above](#architecture)). The Write Proxy is used for accepting writes from a large number of clients. - -## Readers - -DistributedLog readers read records from logs of their choice, starting with a provided position. The provided position can be either a DLSN or a TransactionID. - -Readers read records from logs in strict order. Different readers can read records from different positions in the same log. - -Unlike other pub-sub systems, DistributedLog doesn't record or manage readers' positions. This means that tracking is the responsibility of applications, as different applications may have different requirements for tracking and coordinating positions. This is hard to get right with a single approach. Distributed databases, for example, might store reader positions along with SSTables, so they would resume applying transactions from the positions store in SSTables. Tracking reader positions could easily be done at the application level using various stores (such as ZooKeeper, the filesystem, or key-value stores). - -### Read Proxy - -Log records can be cached in a service tier called the *Read Proxy* to serve a large number of readers. See the diagram [above](#architecture). The Read Proxy is the analogue of the [Write Proxy](#write-proxy). - -## Guarantees - -The DistributedLog API for BookKeeper provides a number of guarantees for applications: - -* Records written by a [writer](#writers) to a [log](#logs) are appended in the order in which they are written. If a record **R1** is written by the same writer as a record **R2**, **R1** will have a smaller sequence number than **R2**. -* [Readers](#readers) see [records](#log-records) in the same order in which they are [written](#writers) to the log. -* All records are persisted on disk by BookKeeper before acknowledgements, which guarantees durability. -* For a log with a replication factor of N, DistributedLog tolerates up to N-1 server failures without losing any records. - -## API - -Documentation for the DistributedLog API can be found [here](https://bookkeeper.apache.org/distributedlog/docs/latest/user_guide/api/core). - -> At a later date, the DistributedLog API docs will be added here. - - diff --git a/site/docs/4.6.0/api/ledger-adv-api.md b/site/docs/4.6.0/api/ledger-adv-api.md deleted file mode 100644 index f46950dd984..00000000000 --- a/site/docs/4.6.0/api/ledger-adv-api.md +++ /dev/null @@ -1,82 +0,0 @@ ---- -title: The Advanced Ledger API ---- - -In release `4.5.0`, Apache BookKeeper introduces a few advanced API for advanced usage. -This sections covers these advanced APIs. - -> Before learn the advanced API, please read [Ledger API](../ledger-api) first. - -## LedgerHandleAdv - -[`LedgerHandleAdv`](../javadoc/org/apache/bookkeeper/client/LedgerHandleAdv) is an advanced extension of [`LedgerHandle`](../javadoc/org/apache/bookkeeper/client/LedgerHandle). -It allows user passing in an `entryId` when adding an entry. - -### Creating advanced ledgers - -Here's an exmaple: - -```java -byte[] passwd = "some-passwd".getBytes(); -LedgerHandleAdv handle = bkClient.createLedgerAdv( - 3, 3, 2, // replica settings - DigestType.CRC32, - passwd); -``` - -You can also create advanced ledgers asynchronously. - -```java -class LedgerCreationCallback implements AsyncCallback.CreateCallback { - public void createComplete(int returnCode, LedgerHandle handle, Object ctx) { - System.out.println("Ledger successfully created"); - } -} -client.asyncCreateLedgerAdv( - 3, // ensemble size - 3, // write quorum size - 2, // ack quorum size - BookKeeper.DigestType.CRC32, - password, - new LedgerCreationCallback(), - "some context" -); -``` - -Besides the APIs above, BookKeeper allows users providing `ledger-id` when creating advanced ledgers. - -```java -long ledgerId = ...; // the ledger id is generated externally. - -byte[] passwd = "some-passwd".getBytes(); -LedgerHandleAdv handle = bkClient.createLedgerAdv( - ledgerId, // ledger id generated externally - 3, 3, 2, // replica settings - DigestType.CRC32, - passwd); -``` - -> Please note, it is users' responsibility to provide a unique ledger id when using the API above. -> If a ledger already exists when users try to create an advanced ledger with same ledger id, -> a [LedgerExistsException](../javadoc/org/apache/bookkeeper/client/BKException.BKLedgerExistException.html) is thrown by the bookkeeper client. - -### Add Entries - -The normal [add entries api](ledger-api/#adding-entries-to-ledgers) in advanced ledgers are disabled. Instead, when users want to add entries -to advanced ledgers, an entry id is required to pass in along with the entry data when adding an entry. - -```java -long entryId = ...; // entry id generated externally - -ledger.addEntry(entryId, "Some entry data".getBytes()); -``` - -A few notes when using this API: - -- The entry id has to be non-negative. -- Clients are okay to add entries out of order. -- However, the entries are only acknowledged in a monotonic order starting from 0. - -### Read Entries - -The read entries api in advanced ledgers remain same as [normal ledgers](../ledger-api/#reading-entries-from-ledgers). diff --git a/site/docs/4.6.0/api/ledger-api.md b/site/docs/4.6.0/api/ledger-api.md deleted file mode 100644 index acde8d6951b..00000000000 --- a/site/docs/4.6.0/api/ledger-api.md +++ /dev/null @@ -1,747 +0,0 @@ ---- -title: The Ledger API ---- - -The ledger API is a lower-level API for BookKeeper that enables you to interact with {% pop ledgers %} directly. - -## The Java ledger API client - -To get started with the Java client for BookKeeper, install the `bookkeeper-server` library as a dependency in your Java application. - -> For a more in-depth tutorial that involves a real use case for BookKeeper, see the [Example application](../example-application) guide. - -## Installation - -The BookKeeper Java client library is available via [Maven Central](http://search.maven.org/) and can be installed using [Maven](#maven), [Gradle](#gradle), and other build tools. - -### Maven - -If you're using [Maven](https://maven.apache.org/), add this to your [`pom.xml`](https://maven.apache.org/guides/introduction/introduction-to-the-pom.html) build configuration file: - -```xml - -4.6.0 - - - - org.apache.bookkeeper - bookkeeper-server - ${bookkeeper.version} - -``` - -### Gradle - -If you're using [Gradle](https://gradle.org/), add this to your [`build.gradle`](https://spring.io/guides/gs/gradle/) build configuration file: - -```groovy -dependencies { - compile group: 'org.apache.bookkeeper', name: 'bookkeeper-server', version: '4.6.0' -} - -// Alternatively: -dependencies { - compile 'org.apache.bookkeeper:bookkeeper-server:4.6.0' -} -``` - -## Connection string - -When interacting with BookKeeper using the Java client, you need to provide your client with a connection string, for which you have three options: - -* Provide your entire ZooKeeper connection string, for example `zk1:2181,zk2:2181,zk3:2181`. -* Provide a host and port for one node in your ZooKeeper cluster, for example `zk1:2181`. In general, it's better to provide a full connection string (in case the ZooKeeper node you attempt to connect to is down). -* If your ZooKeeper cluster can be discovered via DNS, you can provide the DNS name, for example `my-zookeeper-cluster.com`. - -## Creating a new client - -In order to create a new [`BookKeeper`](../javadoc/org/apache/bookkeeper/client/BookKeeper) client object, you need to pass in a [connection string](#connection-string). Here is an example client object using a ZooKeeper connection string: - -```java -try { - String connectionString = "127.0.0.1:2181"; // For a single-node, local ZooKeeper cluster - BookKeeper bkClient = new BookKeeper(connectionString); -} catch (InterruptedException | IOException | KeeperException e) { - e.printStackTrace(); -} -``` - -> If you're running BookKeeper [locally](../../getting-started/run-locally), using the [`localbookie`](../../reference/cli#bookkeeper-localbookie) command, use `"127.0.0.1:2181"` for your connection string, as in the example above. - -There are, however, other ways that you can create a client object: - -* By passing in a [`ClientConfiguration`](../javadoc/org/apache/bookkeeper/conf/ClientConfiguration) object. Here's an example: - - ```java - ClientConfiguration config = new ClientConfiguration(); - config.setZkServers(zkConnectionString); - config.setAddEntryTimeout(2000); - BookKeeper bkClient = new BookKeeper(config); - ``` - -* By specifying a `ClientConfiguration` and a [`ZooKeeper`](http://zookeeper.apache.org/doc/current/api/org/apache/zookeeper/ZooKeeper.html) client object: - - ```java - ClientConfiguration config = new ClientConfiguration(); - config.setAddEntryTimeout(5000); - ZooKeeper zkClient = new ZooKeeper(/* client args */); - BookKeeper bkClient = new BookKeeper(config, zkClient); - ``` - -* Using the `forConfig` method: - - ```java - BookKeeper bkClient = BookKeeper.forConfig(conf).build(); - ``` - -## Creating ledgers - -The easiest way to create a {% pop ledger %} using the Java client is via the `createLedger` method, which creates a new ledger synchronously and returns a [`LedgerHandle`](../javadoc/org/apache/bookkeeper/client/LedgerHandle). You must specify at least a [`DigestType`](../javadoc/org/apache/bookkeeper/client/BookKeeper.DigestType) and a password. - -Here's an example: - -```java -byte[] password = "some-password".getBytes(); -LedgerHandle handle = bkClient.createLedger(BookKeeper.DigestType.MAC, password); -``` - -You can also create ledgers asynchronously - -### Create ledgers asynchronously - -```java -class LedgerCreationCallback implements AsyncCallback.CreateCallback { - public void createComplete(int returnCode, LedgerHandle handle, Object ctx) { - System.out.println("Ledger successfully created"); - } -} - -client.asyncCreateLedger( - 3, - 2, - BookKeeper.DigestType.MAC, - password, - new LedgerCreationCallback(), - "some context" -); -``` - -## Adding entries to ledgers - -```java -long entryId = ledger.addEntry("Some entry data".getBytes()); -``` - -### Add entries asynchronously - -## Reading entries from ledgers - -```java -Enumerator entries = handle.readEntries(1, 99); -``` - -To read all possible entries from the ledger: - -```java -Enumerator entries = - handle.readEntries(0, handle.getLastAddConfirmed()); - -while (entries.hasNextElement()) { - LedgerEntry entry = entries.nextElement(); - System.out.println("Successfully read entry " + entry.getId()); -} -``` - -### Reading entries after the LastAddConfirmed range - -`readUnconfirmedEntries` allowing to read after the LastAddConfirmed range. -It lets the client read without checking the local value of LastAddConfirmed, so that it is possible to read entries for which the writer has not received the acknowledge yet -For entries which are within the range 0..LastAddConfirmed BookKeeper guarantees that the writer has successfully received the acknowledge. -For entries outside that range it is possible that the writer never received the acknowledge and so there is the risk that the reader is seeing entries before the writer and this could result in a consistency issue in some cases. -With this method you can even read entries before the LastAddConfirmed and entries after it with one call, the expected consistency will be as described above. - -```java -Enumerator entries = - handle.readUnconfirmedEntries(0, lastEntryIdExpectedToRead); - -while (entries.hasNextElement()) { - LedgerEntry entry = entries.nextElement(); - System.out.println("Successfully read entry " + entry.getId()); -} -``` - -## Deleting ledgers - -{% pop Ledgers %} can also be deleted synchronously or asynchronously. - -```java -long ledgerId = 1234; - -try { - bkClient.deleteLedger(ledgerId); -} catch (Exception e) { - e.printStackTrace(); -} -``` - -### Delete entries asynchronously - -Exceptions thrown: - -* - -```java -class DeleteEntryCallback implements AsyncCallback.DeleteCallback { - public void deleteComplete() { - System.out.println("Delete completed"); - } -} -``` - -## Simple example - -> For a more involved BookKeeper client example, see the [example application](#example-application) below. - -In the code sample below, a BookKeeper client: - -* creates a ledger -* writes entries to the ledger -* closes the ledger (meaning no further writes are possible) -* re-opens the ledger for reading -* reads all available entries - -```java -// Create a client object for the local ensemble. This -// operation throws multiple exceptions, so make sure to -// use a try/catch block when instantiating client objects. -BookKeeper bkc = new BookKeeper("localhost:2181"); - -// A password for the new ledger -byte[] ledgerPassword = /* some sequence of bytes, perhaps random */; - -// Create a new ledger and fetch its identifier -LedgerHandle lh = bkc.createLedger(BookKeeper.DigestType.MAC, ledgerPassword); -long ledgerId = lh.getId(); - -// Create a buffer for four-byte entries -ByteBuffer entry = ByteBuffer.allocate(4); - -int numberOfEntries = 100; - -// Add entries to the ledger, then close it -for (int i = 0; i < numberOfEntries; i++){ - entry.putInt(i); - entry.position(0); - lh.addEntry(entry.array()); -} -lh.close(); - -// Open the ledger for reading -lh = bkc.openLedger(ledgerId, BookKeeper.DigestType.MAC, ledgerPassword); - -// Read all available entries -Enumeration entries = lh.readEntries(0, numberOfEntries - 1); - -while(entries.hasMoreElements()) { - ByteBuffer result = ByteBuffer.wrap(ls.nextElement().getEntry()); - Integer retrEntry = result.getInt(); - - // Print the integer stored in each entry - System.out.println(String.format("Result: %s", retrEntry)); -} - -// Close the ledger and the client -lh.close(); -bkc.close(); -``` - -Running this should return this output: - -```shell -Result: 0 -Result: 1 -Result: 2 -# etc -``` - -## Example application - -This tutorial walks you through building an example application that uses BookKeeper as the replicated log. The application uses the [BookKeeper Java client](../java-client) to interact with BookKeeper. - -> The code for this tutorial can be found in [this GitHub repo](https://github.com/ivankelly/bookkeeper-tutorial/). The final code for the `Dice` class can be found [here](https://github.com/ivankelly/bookkeeper-tutorial/blob/master/src/main/java/org/apache/bookkeeper/Dice.java). - -### Setup - -Before you start, you will need to have a BookKeeper cluster running locally on your machine. For installation instructions, see [Installation](../../getting-started/installation). - -To start up a cluster consisting of six {% pop bookies %} locally: - -```shell -$ bookkeeper-server/bin/bookkeeper localbookie 6 -``` - -You can specify a different number of bookies if you'd like. - -### Goal - -The goal of the dice application is to have - -* multiple instances of this application, -* possibly running on different machines, -* all of which display the exact same sequence of numbers. - -In other words, the log needs to be both durable and consistent, regardless of how many {% pop bookies %} are participating in the BookKeeper ensemble. If one of the bookies crashes or becomes unable to communicate with the other bookies in any way, it should *still* display the same sequence of numbers as the others. This tutorial will show you how to achieve this. - -To begin, download the base application, compile and run it. - -```shell -$ git clone https://github.com/ivankelly/bookkeeper-tutorial.git -$ mvn package -$ mvn exec:java -Dexec.mainClass=org.apache.bookkeeper.Dice -``` - -That should yield output that looks something like this: - -``` -[INFO] Scanning for projects... -[INFO] -[INFO] ------------------------------------------------------------------------ -[INFO] Building tutorial 1.0-SNAPSHOT -[INFO] ------------------------------------------------------------------------ -[INFO] -[INFO] --- exec-maven-plugin:1.3.2:java (default-cli) @ tutorial --- -[WARNING] Warning: killAfter is now deprecated. Do you need it ? Please comment on MEXEC-6. -Value = 4 -Value = 5 -Value = 3 -``` - -### The base application - -The application in this tutorial is a dice application. The `Dice` class below has a `playDice` function that generates a random number between 1 and 6 every second, prints the value of the dice roll, and runs indefinitely. - -```java -public class Dice { - Random r = new Random(); - - void playDice() throws InterruptedException { - while (true) { - Thread.sleep(1000); - System.out.println("Value = " + (r.nextInt(6) + 1)); - } - } -} -``` - -When you run the `main` function of this class, a new `Dice` object will be instantiated and then run indefinitely: - -```java -public class Dice { - // other methods - - public static void main(String[] args) throws InterruptedException { - Dice d = new Dice(); - d.playDice(); - } -} -``` - -### Leaders and followers (and a bit of background) - -To achieve this common view in multiple instances of the program, we need each instance to agree on what the next number in the sequence will be. For example, the instances must agree that 4 is the first number and 2 is the second number and 5 is the third number and so on. This is a difficult problem, especially in the case that any instance may go away at any time, and messages between the instances can be lost or reordered. - -Luckily, there are already algorithms to solve this. Paxos is an abstract algorithm to implement this kind of agreement, while Zab and Raft are more practical protocols. This video gives a good overview about how these algorithms usually look. They all have a similar core. - -It would be possible to run the Paxos to agree on each number in the sequence. However, running Paxos each time can be expensive. What Zab and Raft do is that they use a Paxos-like algorithm to elect a leader. The leader then decides what the sequence of events should be, putting them in a log, which the other instances can then follow to maintain the same state as the leader. - -Bookkeeper provides the functionality for the second part of the protocol, allowing a leader to write events to a log and have multiple followers tailing the log. However, bookkeeper does not do leader election. You will need a zookeeper or raft instance for that purpose. - -### Why not just use ZooKeeper? - -There are a number of reasons: - -1. Zookeeper's log is only exposed through a tree like interface. It can be hard to shoehorn your application into this. -2. A zookeeper ensemble of multiple machines is limited to one log. You may want one log per resource, which will become expensive very quickly. -3. Adding extra machines to a zookeeper ensemble does not increase capacity nor throughput. - -Bookkeeper can be seen as a means of exposing ZooKeeper's replicated log to applications in a scalable fashion. ZooKeeper is still used by BookKeeper, however, to maintain consistency guarantees, though clients don't need to interact with ZooKeeper directly. - -### Electing a leader - -We'll use zookeeper to elect a leader. A zookeeper instance will have started locally when you started the localbookie application above. To verify it's running, run the following command. - -```shell -$ echo stat | nc localhost 2181 -Zookeeper version: 3.4.6-1569965, built on 02/20/2014 09:09 GMT -Clients: - /127.0.0.1:59343[1](queued=0,recved=40,sent=41) - /127.0.0.1:49354[1](queued=0,recved=11,sent=11) - /127.0.0.1:49361[0](queued=0,recved=1,sent=0) - /127.0.0.1:59344[1](queued=0,recved=38,sent=39) - /127.0.0.1:59345[1](queued=0,recved=38,sent=39) - /127.0.0.1:59346[1](queued=0,recved=38,sent=39) - -Latency min/avg/max: 0/0/23 -Received: 167 -Sent: 170 -Connections: 6 -Outstanding: 0 -Zxid: 0x11 -Mode: standalone -Node count: 16 -``` - -To interact with zookeeper, we'll use the Curator client rather than the stock zookeeper client. Getting things right with the zookeeper client can be tricky, and curator removes a lot of the pointy corners for you. In fact, curator even provides a leader election recipe, so we need to do very little work to get leader election in our application. - -```java -public class Dice extends LeaderSelectorListenerAdapter implements Closeable { - - final static String ZOOKEEPER_SERVER = "127.0.0.1:2181"; - final static String ELECTION_PATH = "/dice-elect"; - - ... - - Dice() throws InterruptedException { - curator = CuratorFrameworkFactory.newClient(ZOOKEEPER_SERVER, - 2000, 10000, new ExponentialBackoffRetry(1000, 3)); - curator.start(); - curator.blockUntilConnected(); - - leaderSelector = new LeaderSelector(curator, ELECTION_PATH, this); - leaderSelector.autoRequeue(); - leaderSelector.start(); - } -``` - -In the constructor for Dice, we need to create the curator client. We specify four things when creating the client, the location of the zookeeper service, the session timeout, the connect timeout and the retry policy. - -The session timeout is a zookeeper concept. If the zookeeper server doesn't hear anything from the client for this amount of time, any leases which the client holds will be timed out. This is important in leader election. For leader election, the curator client will take a lease on ELECTION_PATH. The first instance to take the lease will become leader and the rest will become followers. However, their claim on the lease will remain in the cue. If the first instance then goes away, due to a crash etc., its session will timeout. Once the session times out, the lease will be released and the next instance in the queue will become the leader. The call to autoRequeue() will make the client queue itself again if it loses the lease for some other reason, such as if it was still alive, but it a garbage collection cycle caused it to lose its session, and thereby its lease. I've set the lease to be quite low so that when we test out leader election, transitions will be quite quick. The optimum length for session timeout depends very much on the use case. The other parameters are the connection timeout, i.e. the amount of time it will spend trying to connect to a zookeeper server before giving up, and the retry policy. The retry policy specifies how the client should respond to transient errors, such as connection loss. Operations that fail with transient errors can be retried, and this argument specifies how often the retries should occur. - -Finally, you'll have noticed that Dice now extends LeaderSelectorListenerAdapter and implements Closeable. Closeable is there to close the resource we have initialized in the constructor, the curator client and the leaderSelector. LeaderSelectorListenerAdapter is a callback that the leaderSelector uses to notify the instance that it is now the leader. It is passed as the third argument to the LeaderSelector constructor. - -```java - @Override - public void takeLeadership(CuratorFramework client) - throws Exception { - synchronized (this) { - leader = true; - try { - while (true) { - this.wait(); - } - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - leader = false; - } - } - } -``` - -takeLeadership() is the callback called by LeaderSelector when the instance is leader. It should only return when the instance wants to give up leadership. In our case, we never do so we wait on the current object until we're interrupted. To signal to the rest of the program that we are leader we set a volatile boolean called leader to true. This is unset after we are interrupted. - -```java - void playDice() throws InterruptedException { - while (true) { - while (leader) { - Thread.sleep(1000); - System.out.println("Value = " + (r.nextInt(6) + 1) - + ", isLeader = " + leader); - } - } - } -``` - -Finally, we modify the `playDice` function to only generate random numbers when it is the leader. - -Run two instances of the program in two different terminals. You'll see that one becomes leader and prints numbers and the other just sits there. - -Now stop the leader using Control-Z. This will pause the process, but it won't kill it. You will be dropped back to the shell in that terminal. After a couple of seconds, the session timeout, you will see that the other instance has become the leader. Zookeeper will guarantee that only one instance is selected as leader at any time. - -Now go back to the shell that the original leader was on and wake up the process using fg. You'll see something like the following: - -```shell -... -... -Value = 4, isLeader = true -Value = 4, isLeader = true -^Z -[1]+ Stopped mvn exec:java -Dexec.mainClass=org.apache.bookkeeper.Dice -$ fg -mvn exec:java -Dexec.mainClass=org.apache.bookkeeper.Dice -Value = 3, isLeader = true -Value = 1, isLeader = false -``` - -## New API - -Since 4.6 BookKeeper provides a new client API which leverages Java8 [CompletableFuture](https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/CompletableFuture.html) facility. -[WriteHandle](../javadoc/org/apache/bookkeeper/client/api/WriteHandle), [WriteAdvHandle](../javadoc/org/apache/bookkeeper/client/api/WriteAdvHandle), [ReadHandle](../javadoc/org/apache/bookkeeper/client/api/ReadHandle) are introduced for replacing the generic [LedgerHandle](../javadoc/org/apache/bookkeeper/client/LedgerHandle). - -> All the new API now is available in `org.apache.bookkeeper.client.api`. You should only use interfaces defined in this package. - -*Beware* that this API in 4.6 is still experimental API and can be subject to changes in next minor releases. - -### Create a new client - -In order to create a new [`BookKeeper`](../javadoc/org/apache/bookkeeper/client/api/BookKeeper) client object, you need to construct a [`ClientConfiguration`](../javadoc/org/apache/bookkeeper/conf/ClientConfiguration) object and set a [connection string](#connection-string) first, and then use [`BookKeeperBuilder`](../javadoc/org/apache/bookkeeper/client/api/BookKeeperBuilder) to build the client. - -Here is an example building the bookkeeper client. - -```java -// construct a client configuration instance -ClientConfiguration conf = new ClientConfiguration(); -conf.setZkServers(zkConnectionString); -conf.setZkLedgersRootPath("/path/to/ledgers/root"); - -// build the bookkeeper client -BookKeeper bk = BookKeeper.newBuilder(conf) - .statsLogger(...) - ... - .build(); - -``` - -### Create ledgers - -the easiest way to create a {% pop ledger %} using the java client is via the [`createbuilder`](../javadoc/org/apache/bookkeeper/client/api/createbuilder). you must specify at least -a [`digesttype`](../javadoc/org/apache/bookkeeper/client/api/digesttype) and a password. - -here's an example: - -```java -BookKeeper bk = ...; - -byte[] password = "some-password".getBytes(); - -WriteHandle wh = bk.newCreateLedgerOp() - .withDigestType(DigestType.CRC32) - .withPassword(password) - .withEnsembleSize(3) - .withWriteQuorumSize(3) - .withAckQuorumSize(2) - .execute() // execute the creation op - .get(); // wait for the execution to complete -``` - -A [`WriteHandle`](../javadoc/org/apache/bookkeeper/client/api/WriteHandle) is returned for applications to write and read entries to and from the ledger. - -### Append entries to ledgers - -The [`WriteHandle`](../javadoc/org/apache/bookkeeper/client/api/WriteHandle) can be used for applications to append entries to the ledgers. - -```java -WriteHandle wh = ...; - -CompletableFuture addFuture = wh.append("Some entry data".getBytes()); - -// option 1: you can wait for add to complete synchronously -try { - long entryId = FutureUtils.result(addFuture.get()); -} catch (BKException bke) { - // error handling -} - -// option 2: you can process the result and exception asynchronously -addFuture - .thenApply(entryId -> { - // process the result - }) - .exceptionally(cause -> { - // handle the exception - }) - -// option 3: bookkeeper provides a twitter-future-like event listener for processing result and exception asynchronously -addFuture.whenComplete(new FutureEventListener() { - @Override - public void onSuccess(long entryId) { - // process the result - } - @Override - public void onFailure(Throwable cause) { - // handle the exception - } -}); -``` - -The append method supports three representations of a bytes array: the native java `byte[]`, java nio `ByteBuffer` and netty `ByteBuf`. -It is recommended to use `ByteBuf` as it is more gc friendly. - -### Open ledgers - -You can open ledgers to read entries. Opening ledgers is done by [`openBuilder`](../javadoc/org/apache/bookkeeper/client/api/openBuilder). You must specify the ledgerId and the password -in order to open the ledgers. - -here's an example: - -```java -BookKeeper bk = ...; - -long ledgerId = ...; -byte[] password = "some-password".getBytes(); - -ReadHandle rh = bk.newOpenLedgerOp() - .withLedgerId(ledgerId) - .withPassword(password) - .execute() // execute the open op - .get(); // wait for the execution to complete -``` - -A [`ReadHandle`](../javadoc/org/apache/bookkeeper/client/api/ReadHandle) is returned for applications to read entries to and from the ledger. - -#### Recovery vs NoRecovery - -By default, the [`openBuilder`](../javadoc/org/apache/bookkeeper/client/api/openBuilder) opens the ledger in a `NoRecovery` mode. You can open the ledger in `Recovery` mode by specifying -`withRecovery(true)` in the open builder. - -```java -BookKeeper bk = ...; - -long ledgerId = ...; -byte[] password = "some-password".getBytes(); - -ReadHandle rh = bk.newOpenLedgerOp() - .withLedgerId(ledgerId) - .withPassword(password) - .withRecovery(true) - .execute() - .get(); - -``` - -**What is the difference between "Recovery" and "NoRecovery"?** - -If you are opening a ledger in "Recovery" mode, it will basically fence and seal the ledger -- no more entries are allowed -to be appended to it. The writer which is currently appending entries to the ledger will fail with [`LedgerFencedException`](../javadoc/org/apache/bookkeeper/client/api/BKException.Code#LedgerFencedException). - -In constrat, opening a ledger in "NoRecovery" mode, it will not fence and seal the ledger. "NoRecovery" mode is usually used by applications to tailing-read from a ledger. - -### Read entries from ledgers - -The [`ReadHandle`](../javadoc/org/apache/bookkeeper/client/api/ReadHandle) returned from the open builder can be used for applications to read entries from the ledgers. - -```java -ReadHandle rh = ...; - -long startEntryId = ...; -long endEntryId = ...; -CompletableFuture readFuture = rh.read(startEntryId, endEntryId); - -// option 1: you can wait for read to complete synchronously -try { - LedgerEntries entries = FutureUtils.result(readFuture.get()); -} catch (BKException bke) { - // error handling -} - -// option 2: you can process the result and exception asynchronously -readFuture - .thenApply(entries -> { - // process the result - }) - .exceptionally(cause -> { - // handle the exception - }) - -// option 3: bookkeeper provides a twitter-future-like event listener for processing result and exception asynchronously -readFuture.whenComplete(new FutureEventListener<>() { - @Override - public void onSuccess(LedgerEntries entries) { - // process the result - } - @Override - public void onFailure(Throwable cause) { - // handle the exception - } -}); -``` - -Once you are done with processing the [`LedgerEntries`](../javadoc/org/apache/bookkeeper/client/api/LedgerEntries), you can call `#close()` on the `LedgerEntries` instance to -release the buffers held by it. - -Applications are allowed to read any entries between `0` and [`LastAddConfirmed`](../javadoc/org/apache/bookkeeper/client/api/ReadHandle.html#getLastAddConfirmed). If the applications -attempts to read entries beyond `LastAddConfirmed`, they will receive [`IncorrectParameterException`](../javadoc/org/apache/bookkeeper/client/api/BKException.Code#IncorrectParameterException). - -### Read unconfirmed entries from ledgers - -`readUnconfirmed` is provided the mechanism for applications to read entries beyond `LastAddConfirmed`. Applications should be aware of `readUnconfirmed` doesn't provide any -repeatable read consistency. - -```java -CompletableFuture readFuture = rh.readUnconfirmed(startEntryId, endEntryId); -``` - -### Tailing Reads - -There are two methods for applications to achieve tailing reads: `Polling` and `Long-Polling`. - -#### Polling - -You can do this in synchronous way: - -```java -ReadHandle rh = ...; - -long startEntryId = 0L; -long nextEntryId = startEntryId; -int numEntriesPerBatch = 4; -while (!rh.isClosed() || nextEntryId <= rh.getLastAddConfirmed()) { - long lac = rh.getLastAddConfirmed(); - if (nextEntryId > lac) { - // no more entries are added - Thread.sleep(1000); - - lac = rh.readLastAddConfirmed().get(); - continue; - } - - long endEntryId = Math.min(lac, nextEntryId + numEntriesPerBatch - 1); - LedgerEntries entries = rh.read(nextEntryId, endEntryId).get(); - - // process the entries - - nextEntryId = endEntryId + 1; -} -``` - -#### Long Polling - -```java -ReadHandle rh = ...; - -long startEntryId = 0L; -long nextEntryId = startEntryId; -int numEntriesPerBatch = 4; -while (!rh.isClosed() || nextEntryId <= rh.getLastAddConfirmed()) { - long lac = rh.getLastAddConfirmed(); - if (nextEntryId > lac) { - // no more entries are added - try (LastConfirmedAndEntry lacAndEntry = rh.readLastAddConfirmedAndEntry(nextEntryId, 1000, false).get()) { - if (lacAndEntry.hasEntry()) { - // process the entry - - ++nextEntryId; - } - } - } else { - long endEntryId = Math.min(lac, nextEntryId + numEntriesPerBatch - 1); - LedgerEntries entries = rh.read(nextEntryId, endEntryId).get(); - - // process the entries - nextEntryId = endEntryId + 1; - } -} -``` - -### Delete ledgers - -{% pop Ledgers %} can be deleted by using [`DeleteBuilder`](../javadoc/org/apache/bookkeeper/client/api/DeleteBuilder). - -```java -BookKeeper bk = ...; -long ledgerId = ...; - -bk.newDeleteLedgerOp() - .withLedgerId(ledgerId) - .execute() - .get(); -``` diff --git a/site/docs/4.6.0/api/overview.md b/site/docs/4.6.0/api/overview.md deleted file mode 100644 index 3eb649273c1..00000000000 --- a/site/docs/4.6.0/api/overview.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: BookKeeper API ---- - -BookKeeper offers a few APIs that applications can use to interact with it: - -* The [ledger API](../ledger-api) is a lower-level API that enables you to interact with {% pop ledgers %} directly -* The [Ledger Advanced API)(../ledger-adv-api) is an advanced extension to [Ledger API](../ledger-api) to provide more flexibilities to applications. -* The [DistributedLog API](../distributedlog-api) is a higher-level API that provides convenient abstractions. - -## Trade-offs - -The `Ledger API` provides direct access to ledgers and thus enables you to use BookKeeper however you'd like. - -However, in most of use cases, if you want a `log stream`-like abstraction, it requires you to manage things like tracking list of ledgers, -managing rolling ledgers and data retention on your own. In such cases, you are recommended to use [DistributedLog API](../distributedlog-api), -with semantics resembling continous log streams from the standpoint of applications. diff --git a/site/docs/4.6.0/deployment/dcos.md b/site/docs/4.6.0/deployment/dcos.md deleted file mode 100644 index ee3c956fd8c..00000000000 --- a/site/docs/4.6.0/deployment/dcos.md +++ /dev/null @@ -1,142 +0,0 @@ ---- -title: Deploying BookKeeper on DC/OS -subtitle: Get up and running easily on an Apache Mesos cluster -logo: img/dcos-logo.png ---- - -[DC/OS](https://dcos.io/) (the DataCenter Operating System) is a distributed operating system used for deploying and managing applications and systems on [Apache Mesos](http://mesos.apache.org/). DC/OS is an open-source tool created and maintained by [Mesosphere](https://mesosphere.com/). - -BookKeeper is available as a [DC/OS package](http://universe.dcos.io/#/package/bookkeeper/version/latest) from the [Mesosphere DC/OS Universe](http://universe.dcos.io/#/packages). - -## Prerequisites - -In order to run BookKeeper on DC/OS, you will need: - -* DC/OS version [1.8](https://dcos.io/docs/1.8/) or higher -* A DC/OS cluster with at least three nodes -* The [DC/OS CLI tool](https://dcos.io/docs/1.8/usage/cli/install/) installed - -Each node in your DC/OS-managed Mesos cluster must have at least: - -* 1 CPU -* 1 GB of memory -* 10 GB of total persistent disk storage - -## Installing BookKeeper - -```shell -$ dcos package install bookkeeper --yes -``` - -This command will: - -* Install the `bookkeeper` subcommand for the `dcos` CLI tool -* Start a single {% pop bookie %} on the Mesos cluster with the [default configuration](../../reference/config) - -The bookie that is automatically started up uses the host mode of the network and by default exports the service at `agent_ip:3181`. - -> If you run `dcos package install bookkeeper` without setting the `--yes` flag, the install will run in interactive mode. For more information on the `package install` command, see the [DC/OS docs](https://docs.mesosphere.com/latest/cli/command-reference/dcos-package/dcos-package-install/). - -### Services - -To watch BookKeeper start up, click on the **Services** tab in the DC/OS [user interface](https://docs.mesosphere.com/latest/gui/) and you should see the `bookkeeper` package listed: - -![DC/OS services]({{ site.baseurl }}img/dcos/services.png) - -### Tasks - -To see which tasks have started, click on the `bookkeeper` service and you'll see an interface that looks like this; - -![DC/OS tasks]({{ site.baseurl }}img/dcos/tasks.png) - -## Scaling BookKeeper - -Once the first {% pop bookie %} has started up, you can click on the **Scale** tab to scale up your BookKeeper ensemble by adding more bookies (or scale down the ensemble by removing bookies). - -![DC/OS scale]({{ site.baseurl }}img/dcos/scale.png) - -## ZooKeeper Exhibitor - -ZooKeeper contains the information for all bookies in the ensemble. When deployed on DC/OS, BookKeeper uses a ZooKeeper instance provided by DC/OS. You can access a visual UI for ZooKeeper using [Exhibitor](https://github.com/soabase/exhibitor/wiki), which is available at [http://master.dcos/exhibitor](http://master.dcos/exhibitor). - -![ZooKeeper Exhibitor]({{ site.baseurl }}img/dcos/exhibitor.png) - -You should see a listing of IP/host information for all bookies under the `messaging/bookkeeper/ledgers/available` node. - -## Client connections - -To connect to bookies running on DC/OS using clients running within your Mesos cluster, you need to specify the ZooKeeper connection string for DC/OS's ZooKeeper cluster: - -``` -master.mesos:2181 -``` - -This is the *only* ZooKeeper host/port you need to include in your connection string. Here's an example using the [Java client](../../api/ledger-api#the-java-ledger-api-client): - -```java -BookKeeper bkClient = new BookKeeper("master.mesos:2181"); -``` - -If you're connecting using a client running outside your Mesos cluster, you need to supply the public-facing connection string for your DC/OS ZooKeeper cluster. - -## Configuring BookKeeper - -By default, the `bookkeeper` package will start up a BookKeeper ensemble consisting of one {% pop bookie %} with one CPU, 1 GB of memory, and a 70 MB persistent volume. - -You can supply a non-default configuration when installing the package using a JSON file. Here's an example command: - -```shell -$ dcos package install bookkeeper \ - --options=/path/to/config.json -``` - -You can then fetch the current configuration for BookKeeper at any time using the `package describe` command: - -```shell -$ dcos package describe bookkeeper \ - --config -``` - -### Available parameters - -> Not all [configurable parameters](../../reference/config) for BookKeeper are available for BookKeeper on DC/OS. Only the parameters show in the table below are available. - -Param | Type | Description | Default -:-----|:-----|:------------|:------- -`name` | String | The name of the DC/OS service. | `bookkeeper` -`cpus` | Integer | The number of CPU shares to allocate to each {% pop bookie %}. The minimum is 1. | `1` | -`instances` | Integer | The number of {% pop bookies %} top run. The minimum is 1. | `1` -`mem` | Number | The memory, in MB, to allocate to each BookKeeper task | `1024.0` (1 GB) -`volume_size` | Number | The persistent volume size, in MB | `70` -`zk_client` | String | The connection string for the ZooKeeper client instance | `master.mesos:2181` -`service_port` | Integer | The BookKeeper export service port, using `PORT0` in Marathon | `3181` - -### Example JSON configuration - -Here's an example JSON configuration object for BookKeeper on DC/OS: - -```json -{ - "instances": 5, - "cpus": 3, - "mem": 2048.0, - "volume_size": 250 -} -``` - -If that configuration were stored in a file called `bk-config.json`, you could apply that configuration upon installating the BookKeeper package using this command: - -```shell -$ dcos package install bookkeeper \ - --options=./bk-config.json -``` - -## Uninstalling BookKeeper - -You can shut down and uninstall the `bookkeeper` from DC/OS at any time using the `package uninstall` command: - -```shell -$ dcos package uninstall bookkeeper -Uninstalled package [bookkeeper] version [4.6.0] -Thank you for using bookkeeper. -``` diff --git a/site/docs/4.6.0/deployment/kubernetes.md b/site/docs/4.6.0/deployment/kubernetes.md deleted file mode 100644 index 0f113169edc..00000000000 --- a/site/docs/4.6.0/deployment/kubernetes.md +++ /dev/null @@ -1,181 +0,0 @@ ---- -title: Deploying Apache BookKeeper on Kubernetes -tags: [Kubernetes, Google Container Engine] -logo: img/kubernetes-logo.png ---- - -Apache BookKeeper can be easily deployed in [Kubernetes](https://kubernetes.io/) clusters. The managed clusters on [Google Container Engine](https://cloud.google.com/compute/) is the most convenient way. - -The deployment method shown in this guide relies on [YAML](http://yaml.org/) definitions for Kubernetes [resources](https://kubernetes.io/docs/resources-reference/v1.6/). The [`kubernetes`](https://github.com/apache/bookkeeper/tree/master/deploy/kubernetes) subdirectory holds resource definitions for: - -* A three-node ZooKeeper cluster -* A BookKeeper cluster with a bookie runs on each node. - -## Setup on Google Container Engine - -To get started, get source code of [`kubernetes`](https://github.com/apache/bookkeeper/tree/master/deploy/kubernetes) from github by git clone. - -If you'd like to change the number of bookies, or ZooKeeper nodes in your BookKeeper cluster, modify the `replicas` parameter in the `spec` section of the appropriate [`Deployment`](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/) or [`StatefulSet`](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/) resource. - -[Google Container Engine](https://cloud.google.com/container-engine) (GKE) automates the creation and management of Kubernetes clusters in [Google Compute Engine](https://cloud.google.com/compute/) (GCE). - -### Prerequisites - -To get started, you'll need: - -* A Google Cloud Platform account, which you can sign up for at [cloud.google.com](https://cloud.google.com) -* An existing Cloud Platform project -* The [Google Cloud SDK](https://cloud.google.com/sdk/downloads) (in particular the [`gcloud`](https://cloud.google.com/sdk/gcloud/) and [`kubectl`]() tools). - -### Create a new Kubernetes cluster - -You can create a new GKE cluster using the [`container clusters create`](https://cloud.google.com/sdk/gcloud/reference/container/clusters/create) command for `gcloud`. This command enables you to specify the number of nodes in the cluster, the machine types of those nodes, and more. - -As an example, we'll create a new GKE cluster for Kubernetes version [1.6.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG.md#v164) in the [us-central1-a](https://cloud.google.com/compute/docs/regions-zones/regions-zones#available) zone. The cluster will be named `bookkeeper-gke-cluster` and will consist of three VMs, each using two locally attached SSDs and running on [n1-standard-8](https://cloud.google.com/compute/docs/machine-types) machines. These SSDs will be used by Bookie instances, one for the BookKeeper journal and the other for storing the actual data. - -```bash -$ gcloud config set compute/zone us-central1-a -$ gcloud config set project your-project-name -$ gcloud container clusters create bookkeeper-gke-cluster \ - --machine-type=n1-standard-8 \ - --num-nodes=3 \ - --local-ssd-count=2 \ - --enable-kubernetes-alpha -``` - -By default, bookies will run on all the machines that have locally attached SSD disks. In this example, all of those machines will have two SSDs, but you can add different types of machines to the cluster later. You can control which machines host bookie servers using [labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels). - -### Dashboard - -You can observe your cluster in the [Kubernetes Dashboard](https://kubernetes.io/docs/tasks/access-application-cluster/web-ui-dashboard/) by downloading the credentials for your Kubernetes cluster and opening up a proxy to the cluster: - -```bash -$ gcloud container clusters get-credentials bookkeeper-gke-cluster \ - --zone=us-central1-a \ - --project=your-project-name -$ kubectl proxy -``` - -By default, the proxy will be opened on port 8001. Now you can navigate to [localhost:8001/ui](http://localhost:8001/ui) in your browser to access the dashboard. At first your GKE cluster will be empty, but that will change as you begin deploying. - -When you create a cluster, your `kubectl` config in `~/.kube/config` (on MacOS and Linux) will be updated for you, so you probably won't need to change your configuration. Nonetheless, you can ensure that `kubectl` can interact with your cluster by listing the nodes in the cluster: - -```bash -$ kubectl get nodes -``` - -If `kubectl` is working with your cluster, you can proceed to deploy ZooKeeper and Bookies. - -### ZooKeeper - -You *must* deploy ZooKeeper as the first component, as it is a dependency for the others. - -```bash -$ kubectl apply -f zookeeper.yaml -``` - -Wait until all three ZooKeeper server pods are up and have the status `Running`. You can check on the status of the ZooKeeper pods at any time: - -```bash -$ kubectl get pods -l component=zookeeper -NAME READY STATUS RESTARTS AGE -zk-0 1/1 Running 0 18m -zk-1 1/1 Running 0 17m -zk-2 0/1 Running 6 15m -``` - -This step may take several minutes, as Kubernetes needs to download the Docker image on the VMs. - - -If you want to connect to one of the remote zookeeper server, you can use[zk-shell](https://github.com/rgs1/zk_shell), you need to forward a local port to the -remote zookeeper server: - -```bash -$ kubectl port-forward zk-0 2181:2181 -$ zk-shell localhost 2181 -``` - -### Deploy Bookies - -Once ZooKeeper cluster is Running, you can then deploy the bookies. You can deploy the bookies either using a [DaemonSet](https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/) or a [StatefulSet](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/). - -> NOTE: _DaemonSet_ vs _StatefulSet_ -> -> A _DaemonSet_ ensures that all (or some) nodes run a pod of bookie instance. As nodes are added to the cluster, bookie pods are added automatically to them. As nodes are removed from the -> cluster, those bookie pods are garbage collected. The bookies deployed in a DaemonSet stores data on the local disks on those nodes. So it doesn't require any external storage for Persistent -> Volumes. -> -> A _StatefulSet_ maintains a sticky identity for the pods that it runs and manages. It provides stable and unique network identifiers, and stable and persistent storage for each pod. The pods -> are not interchangeable, the idenifiers for each pod are maintained across any rescheduling. -> -> Which one to use? A _DaemonSet_ is the easiest way to deploy a bookkeeper cluster, because it doesn't require additional persistent volume provisioner and use local disks. BookKeeper manages -> the data replication. It maintains the best latency property. However, it uses `hostIP` and `hostPort` for communications between pods. In some k8s platform (such as DC/OS), `hostIP` and -> `hostPort` are not well supported. A _StatefulSet_ is only practical when deploying in a cloud environment or any K8S installation that has persistent volumes available. Also be aware, latency -> can be potentially higher when using persistent volumes, because there is usually built-in replication in the persistent volumes. - -```bash -# deploy bookies in a daemon set -$ kubectl apply -f bookkeeper.yaml - -# deploy bookies in a stateful set -$ kubectl apply -f bookkeeper.stateful.yaml -``` - -You can check on the status of the Bookie pods for these components either in the Kubernetes Dashboard or using `kubectl`: - -```bash -$ kubectl get pods -``` - -While all BookKeeper pods is Running, by zk-shell you could find all available bookies under /ledgers/ - -You could also run a [bookkeeper tutorial](https://github.com/ivankelly/bookkeeper-tutorial/) instance, which named as 'dice' here, in this bookkeeper cluster. - -```bash -$kubectl run -i --tty --attach dice --image=caiok/bookkeeper-tutorial --env ZOOKEEPER_SERVERS="zk-0.zookeeper" -``` - -An example output of Dice instance is like this: -```aidl -➜ $ kubectl run -i --tty --attach dice --image=caiok/bookkeeper-tutorial --env ZOOKEEPER_SERVERS="zk-0.zookeeper" -If you don't see a command prompt, try pressing enter. -Value = 1, epoch = 5, leading -Value = 2, epoch = 5, leading -Value = 1, epoch = 5, leading -Value = 4, epoch = 5, leading -Value = 5, epoch = 5, leading -Value = 4, epoch = 5, leading -Value = 3, epoch = 5, leading -Value = 5, epoch = 5, leading -Value = 3, epoch = 5, leading -Value = 2, epoch = 5, leading -Value = 1, epoch = 5, leading -Value = 4, epoch = 5, leading -Value = 2, epoch = 5, leading -``` - -### Un-Deploy - -Delete Demo dice instance - -```bash -$kubectl delete deployment dice -``` - -Delete BookKeeper -```bash -$ kubectl delete -f bookkeeper.yaml -``` - -Delete ZooKeeper -```bash -$ kubectl delete -f zookeeper.yaml -``` - -Delete cluster -```bash -$ gcloud container clusters delete bookkeeper-gke-cluster -``` - - - diff --git a/site/docs/4.6.0/deployment/manual.md b/site/docs/4.6.0/deployment/manual.md deleted file mode 100644 index daafd5556f5..00000000000 --- a/site/docs/4.6.0/deployment/manual.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -title: Manual deployment ---- - -The easiest way to deploy BookKeeper is using schedulers like [DC/OS](../dcos), but you can also deploy BookKeeper clusters manually. A BookKeeper cluster consists of two main components: - -* A [ZooKeeper](#zookeeper-setup) cluster that is used for configuration- and coordination-related tasks -* An [ensemble](#starting-up-bookies) of {% pop bookies %} - -## ZooKeeper setup - -We won't provide a full guide to setting up a ZooKeeper cluster here. We recommend that you consult [this guide](https://zookeeper.apache.org/doc/current/zookeeperAdmin.html) in the official ZooKeeper documentation. - -## Starting up bookies - -Once your ZooKeeper cluster is up and running, you can start up as many {% pop bookies %} as you'd like to form a cluster. Before starting up each bookie, you need to modify the bookie's configuration to make sure that it points to the right ZooKeeper cluster. - -On each bookie host, you need to [download](../../getting-started/installation#download) the BookKeeper package as a tarball. Once you've done that, you need to configure the bookie by setting values in the `bookkeeper-server/conf/bk_server.conf` config file. The one parameter that you will absolutely need to change is the [`zkServers`](../../config#zkServers) parameter, which you will need to set to the ZooKeeper connection string for your ZooKeeper cluster. Here's an example: - -```properties -zkServers=100.0.0.1:2181,100.0.0.2:2181,100.0.0.3:2181 -``` - -> A full listing of configurable parameters available in `bookkeeper-server/conf/bk_server.conf` can be found in the [Configuration](../../reference/config) reference manual. - -Once the bookie's configuration is set, you can start it up using the [`bookie`](../../reference/cli#bookkeeper-bookie) command of the [`bookkeeper`](../../reference/cli#bookkeeper) CLI tool: - -```shell -$ bookkeeper-server/bin/bookkeeper bookie -``` - -> You can also build BookKeeper [by cloning it from source](../../getting-started/installation#clone) or [using Maven](../../getting-started/installation#build-using-maven). - -### System requirements - -{% include system-requirements.md %} - -## Cluster metadata setup - -Once you've started up a cluster of bookies, you need to set up cluster metadata for the cluster by running the following command from any bookie in the cluster: - -```shell -$ bookkeeper-server/bin/bookkeeper shell metaformat -``` - -You can run in the formatting - -> The `metaformat` command performs all the necessary ZooKeeper cluster metadata tasks and thus only needs to be run *once* and from *any* bookie in the BookKeeper cluster. - -Once cluster metadata formatting has been completed, your BookKeeper cluster is ready to go! - - diff --git a/site/docs/4.6.0/development/codebase.md b/site/docs/4.6.0/development/codebase.md deleted file mode 100644 index 9a83073ea4c..00000000000 --- a/site/docs/4.6.0/development/codebase.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: The BookKeeper codebase ---- diff --git a/site/docs/4.6.0/development/protocol.md b/site/docs/4.6.0/development/protocol.md deleted file mode 100644 index 6d17aa0ed45..00000000000 --- a/site/docs/4.6.0/development/protocol.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: The BookKeeper protocol ---- - -BookKeeper uses a special replication protocol for guaranteeing persistent storage of entries in an ensemble of bookies. - -> This document assumes that you have some knowledge of leader election and log replication and how these can be used in a distributed system. If not, we recommend reading the [example application](../../api/ledger-api#example-application) documentation first. - -## Ledgers - -{% pop Ledgers %} are the basic building block of BookKeeper and the level at which BookKeeper makes its persistent storage guarantees. A replicated log consists of an ordered list of ledgers. See [Ledgers to logs](#ledgers-to-logs) for info on building a replicated log from ledgers. - -Ledgers are composed of metadata and {% pop entries %}. The metadata is stored in ZooKeeper, which provides a *compare-and-swap* (CAS) operation. Entries are stored on storage nodes known as {% pop bookies %}. - -A ledger has a single writer and multiple readers (SWMR). - -### Ledger metadata - -A ledger's metadata contains the following: - -Parameter | Name | Meaning -:---------|:-----|:------- -Identifer | | A 64-bit integer, unique within the system -Ensemble size | **E** | The number of nodes the ledger is stored on -Write quorum size | **Qw** | The number of nodes each entry is written to. In effect, the max replication for the entry. -Ack quorum size | **Qa** | The number of nodes an entry must be acknowledged on. In effect, the minimum replication for the entry. -Current state | | The current status of the ledger. One of `OPEN`, `CLOSED`, or `IN_RECOVERY`. -Last entry | | The last entry in the ledger or `NULL` is the current state is not `CLOSED`. - -In addition, each ledger's metadata consists of one or more *fragments*. Each fragment is either - -* the first entry of a fragment or -* a list of bookies for the fragment. - -When creating a ledger, the following invariant must hold: - -**E >= Qw >= Qa** - -Thus, the ensemble size (**E**) must be larger than the write quorum size (**Qw**), which must in turn be larger than the ack quorum size (**Qa**). If that condition does not hold, then the ledger creation operation will fail. - -### Ensembles - -When a ledger is created, **E** bookies are chosen for the entries of that ledger. The bookies are the initial ensemble of the ledger. A ledger can have multiple ensembles, but an entry has only one ensemble. Changes in the ensemble involve a new fragment being added to the ledger. - -Take the following example. In this ledger, with ensemble size of 3, there are two fragments and thus two ensembles, one starting at entry 0, the second at entry 12. The second ensemble differs from the first only by its first element. This could be because bookie1 has failed and therefore had to be replaced. - -First entry | Bookies -:-----------|:------- -0 | B1, B2, B3 -12 | B4, B2, B3 - -### Write quorums - -Each entry in the log is written to **Qw** nodes. This is considered the write quorum for that entry. The write quorum is the subsequence of the ensemble, **Qw** in length, and starting at the bookie at index (entryid % **E**). - -For example, in a ledger of **E** = 4, **Qw**, and **Qa** = 2, with an ensemble consisting of B1, B2, B3, and B4, the write quorums for the first 6 entries will be: - -Entry | Write quorum -:-----|:------------ -0 | B1, B2, B3 -1 | B2, B3, B4 -2 | B3, B4, B1 -3 | B4, B1, B2 -4 | B1, B2, B3 -5 | B2, B3, B4 - -There are only **E** distinct write quorums in any ensemble. If **Qw** = **Qa**, then there is only one, as no striping occurs. - -### Ack quorums - -The ack quorum for an entry is any subset of the write quorum of size **Qa**. If **Qa** bookies acknowledge an entry, it means it has been fully replicated. - -### Guarantees - -The system can tolerate **Qa** – 1 failures without data loss. - -Bookkeeper guarantees that: - -1. All updates to a ledger will be read in the same order as they were written. -2. All clients will read the same sequence of updates from the ledger. - -## Writing to ledgers - -writer, ensuring that entry ids are sequential is trivial. A bookie acknowledges a write once it has been persisted to disk and is therefore durable. Once **Qa** bookies from the write quorum acknowledge the write, the write is acknowledged to the client, but only if all entries with lower entry ids in the ledger have already been acknowledged to the client. - -The entry written contains the ledger id, the entry id, the last add confirmed and the payload. The last add confirmed is the last entry which had been acknowledged to the client when this entry was written. Sending this with the entry speeds up recovery of the ledger in the case that the writer crashes. - -Another client can also read entries in the ledger up as far as the last add confirmed, as we guarantee that all entries thus far have been replicated on Qa nodes, and therefore all future readers will be able to also read it. However, to read like this, the ledger should be opened with a non-fencing open. Otherwise, it would kill the writer. - -If a node fails to acknowledge a write, the writer will create a new ensemble by replacing the failed node in the current ensemble. It creates a new fragment with this ensemble, starting from the first message that has not been acknowledged to the client. Creating the new fragment involves making a CAS write to the metadata. If the CAS write fails, someone else has modified something in the ledger metadata. This concurrent modification could have been caused by recovery or {% pop rereplication %}. We reread the metadata. If the state of the ledger is no longer `OPEN`, we send an error to the client for any outstanding writes. Otherwise, we try to replace the failed node again. - -### Closing a ledger as a writer - -Closing a ledger is straightforward for a writer. The writer makes a CAS write to the metadata, changing the state to `CLOSED` and setting the last entry of the ledger to the last entry which we have acknowledged to the client. - -If the CAS write fails, it means someone else has modified the metadata. We reread the metadata, and retry closing as long as the state of the ledger is still `OPEN`. If the state is `IN_RECOVERY` we send an error to the client. If the state is `CLOSED` and the last entry is the same as the last entry we have acknowledged to the client, we complete the close operation successfully. If the last entry is different from what we have acknowledged to the client, we send an error to the client. - -### Closing a ledger as a reader - -A reader can also force a ledger to close. Forcing the ledger to close will prevent any writer from adding new entries to the ledger. This is called {% pop fencing %}. This can occur when a writer has crashed or become unavailable, and a new writer wants to take over writing to the log. The new writer must ensure that it has seen all updates from the previous writer, and prevent the previous writer from making any new updates before making any updates of its own. - -To recover a ledger, we first update the state in the metadata to IN_RECOVERY. We then send a fence message to all the bookies in the last fragment of the ledger. When a bookie receives a fence message for a ledger, the fenced state of the ledger is persisted to disk. Once we receive a response from at least (**Qw** - **Qa**)+1 bookies from each write quorum in the ensemble, the ledger is fenced. - -By ensuring we have received a response from at last (**Qw** - **Qa**) + 1 bookies in each write quorum, we ensure that, if the old writer is alive and tries to add a new entry there will be no write quorum in which Qa bookies will accept the write. If the old writer tries to update the ensemble, it will fail on the CAS metadata write, and then see that the ledger is in IN_RECOVERY state, and that it therefore shouldn’t try to write to it. - -The old writer will be able to write entries to individual bookies (we can’t guarantee that the fence message reaches all bookies), but as it will not be able reach ack quorum, it will not be able to send a success response to its client. The client will get a LedgerFenced error instead. - -It is important to note that when you get a ledger fenced message for an entry, it doesn’t mean that the entry has not been written. It means that the entry may or may not have been written, and this can only be determined after the ledger is recovered. In effect, LedgerFenced should be treated like a timeout. - -Once the ledger is fenced, recovery can begin. Recovery means finding the last entry of the ledger and closing the ledger. To find the last entry of the ledger, the client asks all bookies for the highest last add confirmed value they have seen. It waits until it has received a response at least (**Qw** - **Qa**) + 1 bookies from each write quorum, and takes the highest response as the entry id to start reading forward from. It then starts reading forward in the ledger, one entry at a time, replicating all entries it sees to the entire write quorum for that entry. Once it can no longer read any more entries, it updates the state in the metadata to `CLOSED`, and sets the last entry of the ledger to the last entry it wrote. Multiple readers can try to recovery a ledger at the same time, but as the metadata write is CAS they will all converge on the same last entry of the ledger. - -## Ledgers to logs - -In BookKeeper, {% pop ledgers %} can be used to build a replicated log for your system. All guarantees provided by BookKeeper are at the ledger level. Guarantees on the whole log can be built using the ledger guarantees and any consistent datastore with a compare-and-swap (CAS) primitive. BookKeeper uses ZooKeeper as the datastore but others could theoretically be used. - -A log in BookKeeper is built from some number of ledgers, with a fixed order. A ledger represents a single segment of the log. A ledger could be the whole period that one node was the leader, or there could be multiple ledgers for a single period of leadership. However, there can only ever be one leader that adds entries to a single ledger. Ledgers cannot be reopened for writing once they have been closed/recovered. - -> BookKeeper does *not* provide leader election. You must use a system like ZooKeeper for this. - -In many cases, leader election is really leader suggestion. Multiple nodes could think that they are leader at any one time. It is the job of the log to guarantee that only one can write changes to the system. - -### Opening a log - -Once a node thinks it is leader for a particular log, it must take the following steps: - -1. Read the list of ledgers for the log -1. {% pop Fence %} the last two ledgers in the list. Two ledgers are fenced because because the writer may be writing to the second-to-last ledger while adding the last ledger to the list. -1. Create a new ledger -1. Add the new ledger to the ledger list -1. Write the new ledger back to the datastore using a CAS operation - -The fencing in step 2 and the CAS operation in step 5 prevent two nodes from thinking that they have leadership at any one time. - -The CAS operation will fail if the list of ledgers has changed between reading it and writing back the new list. When the CAS operation fails, the leader must start at step 1 again. Even better, they should check that they are in fact still the leader with the system that is providing leader election. The protocol will work correctly without this step, though it will be able to make very little progress if two nodes think they are leader and are duelling for the log. - -The node must not serve any writes until step 5 completes successfully. - -### Rolling ledgers - -The leader may wish to close the current ledger and open a new one every so often. Ledgers can only be deleted as a whole. If you don't roll the log, you won't be able to clean up old entries in the log without a leader change. By closing the current ledger and adding a new one, the leader allows the log to be truncated whenever that data is no longer needed. The steps for rolling the log is similar to those for creating a new ledger. - -1. Create a new ledger -1. Add the new ledger to the ledger list -1. Write the new ledger list to the datastore using CAS -1. Close the previous ledger - -By deferring the closing of the previous ledger until step 4, we can continue writing to the log while we perform metadata update operations to add the new ledger. This is safe as long as you fence the last 2 ledgers when acquiring leadership. - diff --git a/site/docs/4.6.0/getting-started/concepts.md b/site/docs/4.6.0/getting-started/concepts.md deleted file mode 100644 index 7a3c92847b2..00000000000 --- a/site/docs/4.6.0/getting-started/concepts.md +++ /dev/null @@ -1,202 +0,0 @@ ---- -title: BookKeeper concepts and architecture -subtitle: The core components and how they work -prev: ../run-locally ---- - -BookKeeper is a service that provides persistent storage of streams of log [entries](#entries)---aka *records*---in sequences called [ledgers](#ledgers). BookKeeper replicates stored entries across multiple servers. - -## Basic terms - -In BookKeeper: - -* each unit of a log is an [*entry*](#entries) (aka record) -* streams of log entries are called [*ledgers*](#ledgers) -* individual servers storing ledgers of entries are called [*bookies*](#bookies) - -BookKeeper is designed to be reliable and resilient to a wide variety of failures. Bookies can crash, corrupt data, or discard data, but as long as there are enough bookies behaving correctly in the ensemble the service as a whole will behave correctly. - -## Entries - -> **Entries** contain the actual data written to ledgers, along with some important metadata. - -BookKeeper entries are sequences of bytes that are written to [ledgers](#ledgers). Each entry has the following fields: - -Field | Java type | Description -:-----|:----------|:----------- -Ledger number | `long` | The ID of the ledger to which the entry has been written -Entry number | `long` | The unique ID of the entry -Last confirmed (LC) | `long` | The ID of the last recorded entry -Data | `byte[]` | The entry's data (written by the client application) -Authentication code | `byte[]` | The message auth code, which includes *all* other fields in the entry - -## Ledgers - -> **Ledgers** are the basic unit of storage in BookKeeper. - -Ledgers are sequences of entries, while each entry is a sequence of bytes. Entries are written to a ledger: - -* sequentially, and -* at most once. - -This means that ledgers have *append-only* semantics. Entries cannot be modified once they've been written to a ledger. Determining the proper write order is the responsbility of [client applications](#clients). - -## Clients and APIs - -> BookKeeper clients have two main roles: they create and delete ledgers, and they read entries from and write entries to ledgers. -> -> BookKeeper provides both a lower-level and a higher-level API for ledger interaction. - -There are currently two APIs that can be used for interacting with BookKeeper: - -* The [ledger API](../../api/ledger-api) is a lower-level API that enables you to interact with {% pop ledgers %} directly. -* The [DistributedLog API](../../api/distributedlog-api) is a higher-level API that enables you to use BookKeeper without directly interacting with ledgers. - -In general, you should choose the API based on how much granular control you need over ledger semantics. The two APIs can also both be used within a single application. - -## Bookies - -> **Bookies** are individual BookKeeper servers that handle ledgers (more specifically, fragments of ledgers). Bookies function as part of an ensemble. - -A bookie is an individual BookKeeper storage server. Individual bookies store fragments of ledgers, not entire ledgers (for the sake of performance). For any given ledger **L**, an *ensemble* is the group of bookies storing the entries in **L**. - -Whenever entries are written to a ledger, those entries are {% pop striped %} across the ensemble (written to a sub-group of bookies rather than to all bookies). - -### Motivation - -> BookKeeper was initially inspired by the NameNode server in HDFS but its uses now extend far beyond this. - -The initial motivation for BookKeeper comes from the [Hadoop](http://hadoop.apache.org/) ecosystem. In the [Hadoop Distributed File System](https://wiki.apache.org/hadoop/HDFS) (HDFS), a special node called the [NameNode](https://wiki.apache.org/hadoop/NameNode) logs all operations in a reliable fashion, which ensures that recovery is possible in case of crashes. - -The NameNode, however, served only as initial inspiration for BookKeeper. The applications for BookKeeper extend far beyond this and include essentially any application that requires an append-based storage system. BookKeeper provides a number of advantages for such applications: - -* Highly efficient writes -* High fault tolerance via replication of messages within ensembles of bookies -* High throughput for write operations via {% pop striping %} (across as many bookies as you wish) - -## Metadata storage - -BookKeeper requires a metadata storage service to store information related to [ledgers](#ledgers) and available bookies. BookKeeper currently uses [ZooKeeper](https://zookeeper.apache.org) for this and other tasks. - -## Data management in bookies - -Bookies manage data in a [log-structured](https://en.wikipedia.org/wiki/Log-structured_file_system) way, which is implemented using three types of files: - -* [journals](#journals) -* [entry logs](#entry-logs) -* [index files](#index-files) - -### Journals - -A journal file contains BookKeeper transaction logs. Before any update to a ledger takes place, the bookie ensures that a transaction describing the update is written to non-volatile storage. A new journal file is created once the bookie starts or the older journal file reaches the journal file size threshold. - -### Entry logs - -An entry log file manages the written entries received from BookKeeper clients. Entries from different ledgers are aggregated and written sequentially, while their offsets are kept as pointers in a [ledger cache](#ledger-cache) for fast lookup. - -A new entry log file is created once the bookie starts or the older entry log file reaches the entry log size threshold. Old entry log files are removed by the Garbage Collector Thread once they are not associated with any active ledger. - -### Index files - -An index file is created for each ledger, which comprises a header and several fixed-length index pages that record the offsets of data stored in entry log files. - -Since updating index files would introduce random disk I/O index files are updated lazily by a sync thread running in the background. This ensures speedy performance for updates. Before index pages are persisted to disk, they are gathered in a ledger cache for lookup. - -### Ledger cache - -Ledger indexes pages are cached in a memory pool, which allows for more efficient management of disk head scheduling. - -### Adding entries - -When a client instructs a {% pop bookie %} to write an entry to a ledger, the entry will go through the following steps to be persisted on disk: - -1. The entry is appended to an [entry log](#entry-logs) -1. The index of the entry is updated in the [ledger cache](#ledger-cache) -1. A transaction corresponding to this entry update is appended to the [journal](#journals) -1. A response is sent to the BookKeeper client - -> For performance reasons, the entry log buffers entries in memory and commits them in batches, while the ledger cache holds index pages in memory and flushes them lazily. This process is described in more detail in the [Data flush](#data-flush) section below. - -### Data flush - -Ledger index pages are flushed to index files in the following two cases: - -* The ledger cache memory limit is reached. There is no more space available to hold newer index pages. Dirty index pages will be evicted from the ledger cache and persisted to index files. -* A background thread synchronous thread is responsible for flushing index pages from the ledger cache to index files periodically. - -Besides flushing index pages, the sync thread is responsible for rolling journal files in case that journal files use too much disk space. The data flush flow in the sync thread is as follows: - -* A `LastLogMark` is recorded in memory. The `LastLogMark` indicates that those entries before it have been persisted (to both index and entry log files) and contains two parts: - 1. A `txnLogId` (the file ID of a journal) - 1. A `txnLogPos` (offset in a journal) -* Dirty index pages are flushed from the ledger cache to the index file, and entry log files are flushed to ensure that all buffered entries in entry log files are persisted to disk. - - Ideally, a bookie only needs to flush index pages and entry log files that contain entries before `LastLogMark`. There is, however, no such information in the ledger and entry log mapping to journal files. Consequently, the thread flushes the ledger cache and entry log entirely here, and may flush entries after the `LastLogMark`. Flushing more is not a problem, though, just redundant. -* The `LastLogMark` is persisted to disk, which means that entries added before `LastLogMark` whose entry data and index page were also persisted to disk. It is now time to safely remove journal files created earlier than `txnLogId`. - -If the bookie has crashed before persisting `LastLogMark` to disk, it still has journal files containing entries for which index pages may not have been persisted. Consequently, when this bookie restarts, it inspects journal files to restore those entries and data isn't lost. - -Using the above data flush mechanism, it is safe for the sync thread to skip data flushing when the bookie shuts down. However, in the entry logger it uses a buffered channel to write entries in batches and there might be data buffered in the buffered channel upon a shut down. The bookie needs to ensure that the entry log flushes its buffered data during shutdown. Otherwise, entry log files become corrupted with partial entries. - -### Data compaction - -On bookies, entries of different ledgers are interleaved in entry log files. A bookie runs a garbage collector thread to delete un-associated entry log files to reclaim disk space. If a given entry log file contains entries from a ledger that has not been deleted, then the entry log file would never be removed and the occupied disk space never reclaimed. In order to avoid such a case, a bookie server compacts entry log files in a garbage collector thread to reclaim disk space. - -There are two kinds of compaction running with different frequency: minor compaction and major compaction. The differences between minor compaction and major compaction lies in their threshold value and compaction interval. - -* The garbage collection threshold is the size percentage of an entry log file occupied by those undeleted ledgers. The default minor compaction threshold is 0.2, while the major compaction threshold is 0.8. -* The garbage collection interval is how frequently to run the compaction. The default minor compaction interval is 1 hour, while the major compaction threshold is 1 day. - -> If either the threshold or interval is set to less than or equal to zero, compaction is disabled. - -The data compaction flow in the garbage collector thread is as follows: - -* The thread scans entry log files to get their entry log metadata, which records a list of ledgers comprising an entry log and their corresponding percentages. -* With the normal garbage collection flow, once the bookie determines that a ledger has been deleted, the ledger will be removed from the entry log metadata and the size of the entry log reduced. -* If the remaining size of an entry log file reaches a specified threshold, the entries of active ledgers in the entry log will be copied to a new entry log file. -* Once all valid entries have been copied, the old entry log file is deleted. - -## ZooKeeper metadata - -BookKeeper requires a ZooKeeper installation for storing [ledger](#ledger) metadata. Whenever you construct a [`BookKeeper`](../../api/javadoc/org/apache/bookkeeper/client/BookKeeper) client object, you need to pass a list of ZooKeeper servers as a parameter to the constructor, like this: - -```java -String zkConnectionString = "127.0.0.1:2181"; -BookKeeper bkClient = new BookKeeper(zkConnectionString); -``` - -> For more info on using the BookKeeper Java client, see [this guide](../../api/ledger-api#the-java-ledger-api-client). - -## Ledger manager - -A *ledger manager* handles ledgers' metadata (which is stored in ZooKeeper). BookKeeper offers two types of ledger managers: the [flat ledger manager](#flat-ledger-manager) and the [hierarchical ledger manager](#hierarchical-ledger-manager). Both ledger managers extend the [`AbstractZkLedgerManager`](../../api/javadoc/org/apache/bookkeeper/meta/AbstractZkLedgerManager) abstract class. - -> #### Use the flat ledger manager in most cases -> The flat ledger manager is the default and is recommended for nearly all use cases. The hierarchical ledger manager is better suited only for managing very large numbers of BookKeeper ledgers (> 50,000). - -### Flat ledger manager - -The *flat ledger manager*, implemented in the [`FlatLedgerManager`](../../api/javadoc/org/apache/bookkeeper/meta/FlatLedgerManager.html) class, stores all ledgers' metadata in child nodes of a single ZooKeeper path. The flat ledger manager creates [sequential nodes](https://zookeeper.apache.org/doc/trunk/zookeeperProgrammers.html#Sequence+Nodes+--+Unique+Naming) to ensure the uniqueness of the ledger ID and prefixes all nodes with `L`. Bookie servers manage their own active ledgers in a hash map so that it's easy to find which ledgers have been deleted from ZooKeeper and then garbage collect them. - -The flat ledger manager's garbage collection follow proceeds as follows: - -* All existing ledgers are fetched from ZooKeeper (`zkActiveLedgers`) -* All ledgers currently active within the bookie are fetched (`bkActiveLedgers`) -* The currently actively ledgers are looped through to determine which ledgers don't currently exist in ZooKeeper. Those are then garbage collected. -* The *hierarchical ledger manager* stores ledgers' metadata in two-level [znodes](https://zookeeper.apache.org/doc/current/zookeeperOver.html#Nodes+and+ephemeral+nodes). - -### Hierarchical ledger manager - -The *hierarchical ledger manager*, implemented in the [`HierarchicalLedgerManager`](../../api/javadoc/org/apache/bookkeeper/meta/HierarchicalLedgerManager) class, first obtains a global unique ID from ZooKeeper using an [`EPHEMERAL_SEQUENTIAL`](https://zookeeper.apache.org/doc/current/api/org/apache/zookeeper/CreateMode.html#EPHEMERAL_SEQUENTIAL) znode. Since ZooKeeper's sequence counter has a format of `%10d` (10 digits with 0 padding, for example `0000000001`), the hierarchical ledger manager splits the generated ID into 3 parts: - -```shell -{level1 (2 digits)}{level2 (4 digits)}{level3 (4 digits)} -``` - -These three parts are used to form the actual ledger node path to store ledger metadata: - -```shell -{ledgers_root_path}/{level1}/{level2}/L{level3} -``` - -For example, ledger 0000000001 is split into three parts, 00, 0000, and 00001, and stored in znode `/{ledgers_root_path}/00/0000/L0001`. Each znode could have as many 10,000 ledgers, which avoids the problem of the child list being larger than the maximum ZooKeeper packet size (which is the [limitation](https://issues.apache.org/jira/browse/BOOKKEEPER-39) that initially prompted the creation of the hierarchical ledger manager). diff --git a/site/docs/4.6.0/getting-started/installation.md b/site/docs/4.6.0/getting-started/installation.md deleted file mode 100644 index fac16ddd390..00000000000 --- a/site/docs/4.6.0/getting-started/installation.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -title: BookKeeper installation -subtitle: Download or clone BookKeeper and build it locally -next: ../run-locally ---- - -{% capture download_url %}http://apache.claz.org/bookkeeper/bookkeeper-{{ site.latest_release }}/bookkeeper-{{ site.latest_release }}-src.tar.gz{% endcapture %} - -You can install BookKeeper either by [downloading](#download) a [GZipped](http://www.gzip.org/) tarball package or [cloning](#clone) the BookKeeper repository. - -## Requirements - -* [Unix environment](http://www.opengroup.org/unix) -* [Java Development Kit 1.6](http://www.oracle.com/technetwork/java/javase/downloads/index.html) or later -* [Maven 3.0](https://maven.apache.org/install.html) or later - -## Download - -You can download Apache BookKeeper releases from one of many [Apache mirrors](http://www.apache.org/dyn/closer.cgi/bookkeeper). Here's an example for the [apache.claz.org](http://apache.claz.org/bookkeeper) mirror: - -```shell -$ curl -O {{ download_url }} -$ tar xvf bookkeeper-{{ site.latest_release }}-src.tar.gz -$ cd bookkeeper-{{ site.latest_release }} -``` - -## Clone - -To build BookKeeper from source, clone the repository, either from the [GitHub mirror]({{ site.github_repo }}) or from the [Apache repository](http://git.apache.org/bookkeeper.git/): - -```shell -# From the GitHub mirror -$ git clone {{ site.github_repo}} - -# From Apache directly -$ git clone git://git.apache.org/bookkeeper.git/ -``` - -## Build using Maven - -Once you have the BookKeeper on your local machine, either by [downloading](#download) or [cloning](#clone) it, you can then build BookKeeper from source using Maven: - -```shell -$ mvn package -``` - -> You can skip tests by adding the `-DskipTests` flag when running `mvn package`. - -### Useful Maven commands - -Some other useful Maven commands beyond `mvn package`: - -Command | Action -:-------|:------ -`mvn clean` | Removes build artifacts -`mvn compile` | Compiles JAR files from Java sources -`mvn compile findbugs:findbugs` | Compile using the Maven [FindBugs](http://gleclaire.github.io/findbugs-maven-plugin) plugin -`mvn install` | Install the BookKeeper JAR locally in your local Maven cache (usually in the `~/.m2` directory) -`mvn deploy` | Deploy the BookKeeper JAR to the Maven repo (if you have the proper credentials) -`mvn verify` | Performs a wide variety of verification and validation tasks -`mvn apache-rat:check` | Run Maven using the [Apache Rat](http://creadur.apache.org/rat/apache-rat-plugin/) plugin -`mvn compile javadoc:aggregate` | Build Javadocs locally -`mvn package assembly:single` | Build a complete distribution using the Maven [Assembly](http://maven.apache.org/plugins/maven-assembly-plugin/) plugin - -## Package directory - -The BookKeeper project contains several subfolders that you should be aware of: - -Subfolder | Contains -:---------|:-------- -[`bookkeeper-server`]({{ site.github_repo }}/tree/master/bookkeeper-server) | The BookKeeper server and client -[`bookkeeper-benchmark`]({{ site.github_repo }}/tree/master/bookkeeper-benchmark) | A benchmarking suite for measuring BookKeeper performance -[`bookkeeper-stats`]({{ site.github_repo }}/tree/master/bookkeeper-stats) | A BookKeeper stats library -[`bookkeeper-stats-providers`]({{ site.github_repo }}/tree/master/bookkeeper-stats-providers) | BookKeeper stats providers diff --git a/site/docs/4.6.0/getting-started/run-locally.md b/site/docs/4.6.0/getting-started/run-locally.md deleted file mode 100644 index edbfab9fda6..00000000000 --- a/site/docs/4.6.0/getting-started/run-locally.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Run bookies locally -prev: ../installation -next: ../concepts -toc_disable: true ---- - -{% pop Bookies %} are individual BookKeeper servers. You can run an ensemble of bookies locally on a single machine using the [`localbookie`](../../reference/cli#bookkeeper-localbookie) command of the `bookkeeper` CLI tool and specifying the number of bookies you'd like to include in the ensemble. - -This would start up an ensemble with 10 bookies: - -```shell -$ bookkeeper-server/bin/bookkeeper localbookie 10 -``` - -> When you start up an ensemble using `localbookie`, all bookies run in a single JVM process. diff --git a/site/docs/4.6.0/overview/overview.md b/site/docs/4.6.0/overview/overview.md deleted file mode 100644 index 3e01abbccad..00000000000 --- a/site/docs/4.6.0/overview/overview.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -title: Apache BookKeeper™ 4.6.0 ---- - - -This documentation is for Apache BookKeeper™ version `4.6.0`. - -Apache BookKeeper™ is a scalable, fault tolerant and low latency storage service optimized for realtime workloads. -It offers `durability`, `replication` and `strong consistency` as essentials for building reliable real-time applications. - -It is suitable for being used in following scenerios: - -- [WAL](https://en.wikipedia.org/wiki/Write-ahead_logging) (Write-Ahead-Logging), e.g. HDFS [namenode](https://hadoop.apache.org/docs/r2.5.2/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithNFS.html#BookKeeper_as_a_Shared_storage_EXPERIMENTAL). -- Message Store, e.g. [Apache Pulsar](https://pulsar.incubator.apache.org/). -- Offset/Cursor Store, e.g. Apache Pulsar. -- Object/Blob Store, e.g. storing snapshots to replicated state machines. - -Learn more about Apache BookKeeper™ and what it can do for your organization: - -- [Apache BookKeeper 4.6.0 Release Notes](../releaseNotes) -- [Java API docs](../../api/javadoc) - -Or start using Apache BookKeeper today. - -### Users - -- **Concepts**: Start with [concepts](../../getting-started/concepts). This will help you to fully understand - the other parts of the documentation, including the setup, integration and operation guides. -- **Getting Started**: Install [Apache BookKeeper](../../getting-started/installation) and run bookies [locally](../../getting-started/run-locally) -- **API**: Read the [API](../../api/overview) documentation to learn how to use Apache BookKeeper to build your applications. -- **Deployment**: The [Deployment Guide](../../deployment/manual) shows how to deploy Apache BookKeeper to production clusters. - -### Administrators - -- **Operations**: The [Admin Guide](../../admin/bookies) shows how to run Apache BookKeeper on production, what are the production - considerations and best practices. - -### Contributors - -- **Details**: Learn [design details](../../development/protocol) to know more internals. diff --git a/site/docs/4.6.0/overview/releaseNotes.md b/site/docs/4.6.0/overview/releaseNotes.md deleted file mode 100644 index 3bd0116fcdc..00000000000 --- a/site/docs/4.6.0/overview/releaseNotes.md +++ /dev/null @@ -1,66 +0,0 @@ ---- -title: Apache BookKeeper 4.6.0 Release Notes ---- - -This is the seventh release of BookKeeper as an Apache Top Level Project! - -The 4.6.0 release incorporates new fixes, improvements, and features since previous major release 4.5.0. - -Apache BookKeeper users are encouraged to upgrade to 4.6.0. The technical details of this release are summarized -below. - -## Highlights - -The main features in 4.6.0 cover are around following areas: -- Dependencies Upgrade -- Bookie enhancement -- BookKeeper Admin REST API -- New BookKeeper API -- Performance improvement -- Deployment or Ease of use - -### Dependencies Upgrade - -- Upgrade Protobuf to `3.4`. - -### Bookie enhancement - -- Persistable bookie status. - - Prior to this release, bookie status was transient. It is a bit hard for management tooling. This feature adds persistable bookies status. See [Issue-265](https://github.com/apache/bookkeeper/issues/265) for more details. - -- Introduce Bookie Discovery Interface. Prior to this release, bookkeeper client only provides interfaces for ledger metadata management. It doesn't provide any interface for service discovery part. This feature introduces bookie discovery interface, so it allows plugging in different service discovery backends for bookkeeper. - - Introduce Bookie Registration Manager for bookie server, see [Issue-662](https://github.com/apache/bookkeeper/issues/662) for more details. - - Introduce registration client for bookkeeper client, see [Issue-666](https://github.com/apache/bookkeeper/issues/666) for more details. - -- Lifecycle components for managing components in bookie server. - - Introduce lifecycle component for each service component, which includes "stats provider", "auto recovery", "http endpoint", and "bookie server(both storage and netty server)", to run these components in a clear way. See [Issue-508](https://github.com/apache/bookkeeper/issues/508) and [Issue-547](https://github.com/apache/bookkeeper/issues/547) for more details. - -- Make bookie recovery work with recovering multiple bookies. - - Make recovery tool work with multiple bookies, so that one call could recover multiple bookies. See [Issue-612](https://github.com/apache/bookkeeper/issues/612) for more details. - -### BookKeeper Admin REST API - -- Introduce a bookkeeper admin endpoint for operations to interact and administer the bookkeeper cluster using REST API. see [PR-278](https://github.com/apache/bookkeeper/pull/278), [Issue-520](https://github.com/apache/bookkeeper/issues/520), and [Issue-674](https://github.com/apache/bookkeeper/issues/674) for more details. - -### New BookKeeper API - -- New Fluent Style API. - - A brand new API to manage ledgers using the Builder pattern, and new interfaces to make it clear operations on ledgers, like WriteHandle and ReadHandle, are provided in this release. See [Issue-506](https://github.com/apache/bookkeeper/issues/506), [Issue-673](https://github.com/apache/bookkeeper/issues/673) and [Issue-550](https://github.com/apache/bookkeeper/issues/550) for more details - -### Performance improvement -- Use ByteBuf in multiple places to avoid unnecessary memory allocation and reduce the garbage produced in JVM. See [PR-640](https://github.com/apache/bookkeeper/pull/640) for more details. - -- Separate the FileInfo cache into write and read cache. It avoids catchup reads impact tailing reads and writes. See [PR-513](https://github.com/apache/bookkeeper/pull/513) for more details. - -### Deployment or Ease of use -- Deployment BookKeeper on K8s. - - Provide yaml files to run BookKeeper on Kubernetes using both StatefulSets and DaemonSet. See [Issue-337](https://github.com/apache/bookkeeper/issues/337) and [Issue-681](https://github.com/apache/bookkeeper/issues/681)for more details. - -## Existing API changes - -- BookKeeper constructor now throws BKException instead of KeeperException. -- The signatures of `reorderReadSequence` and `reorderReadLACSequence` are changed in EnsemblePlacementPolicy. - -## Full list of changes - -- [https://github.com/apache/bookkeeper/milestone/2?closed=1](https://github.com/apache/bookkeeper/milestone/2?closed=1) \ No newline at end of file diff --git a/site/docs/4.6.0/overview/releaseNotesTemplate.md b/site/docs/4.6.0/overview/releaseNotesTemplate.md deleted file mode 100644 index 2df2589eb15..00000000000 --- a/site/docs/4.6.0/overview/releaseNotesTemplate.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Apache BookKeeper 4.6.0 Release Notes ---- - -[provide a summary of this release] - -Apache BookKeeper users are encouraged to upgrade to 4.6.0. The technical details of this release are summarized -below. - -## Highlights - -[List the highlights] - -## Details - -[list to issues list] - diff --git a/site/docs/4.6.0/reference/cli.md b/site/docs/4.6.0/reference/cli.md deleted file mode 100644 index 8beb36ff071..00000000000 --- a/site/docs/4.6.0/reference/cli.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: BookKeeper CLI tool reference -subtitle: A reference guide to the command-line tools that you can use to administer BookKeeper ---- - -{% include cli.html id="bookkeeper" %} - -## The BookKeeper shell - -{% include shell.html %} diff --git a/site/docs/4.6.0/reference/config.md b/site/docs/4.6.0/reference/config.md deleted file mode 100644 index 8997b6b62f0..00000000000 --- a/site/docs/4.6.0/reference/config.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: BookKeeper configuration -subtitle: A reference guide to all of BookKeeper's configurable parameters ---- - - -The table below lists parameters that you can set to configure {% pop bookies %}. All configuration takes place in the `bk_server.conf` file in the `bookkeeper-server/conf` directory of your [BookKeeper installation](../../getting-started/installing). - -{% include config.html id="bk_server" %} diff --git a/site/docs/4.6.0/reference/metrics.md b/site/docs/4.6.0/reference/metrics.md deleted file mode 100644 index 8bd6fe0a165..00000000000 --- a/site/docs/4.6.0/reference/metrics.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: BookKeeper metrics reference ---- diff --git a/site/docs/4.6.0/security/overview.md b/site/docs/4.6.0/security/overview.md deleted file mode 100644 index b825776eb67..00000000000 --- a/site/docs/4.6.0/security/overview.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -title: BookKeeper Security -next: ../tls ---- - -In the 4.5.0 release, the BookKeeper community added a number of features that can be used, together or separately, to secure a BookKeeper cluster. -The following security measures are currently supported: - -1. Authentication of connections to bookies from clients, using either [TLS](../tls) or [SASL (Kerberos)](../sasl). -2. Authentication of connections from clients, bookies, autorecovery daemons to [ZooKeeper](../zookeeper), when using zookeeper based ledger managers. -3. Encryption of data transferred between bookies and clients, between bookies and autorecovery daemons using [TLS](../tls). - -It’s worth noting that security is optional - non-secured clusters are supported, as well as a mix of authenticated, unauthenticated, encrypted and non-encrypted clients. - -NOTE: authorization is not yet available in 4.5.0. The Apache BookKeeper community is looking to add this feature in subsequent releases. - -## Next Steps - -- [Encryption and Authentication using TLS](../tls) -- [Authentication using SASL](../sasl) -- [ZooKeeper Authentication](../zookeeper) diff --git a/site/docs/4.6.0/security/sasl.md b/site/docs/4.6.0/security/sasl.md deleted file mode 100644 index ffb972a8936..00000000000 --- a/site/docs/4.6.0/security/sasl.md +++ /dev/null @@ -1,202 +0,0 @@ ---- -title: Authentication using SASL -prev: ../tls -next: ../zookeeper ---- - -Bookies support client authentication via SASL. Currently we only support GSSAPI (Kerberos). We will start -with a general description of how to configure `SASL` for bookies, clients and autorecovery daemons, followed -by mechanism-specific details and wrap up with some operational details. - -## SASL configuration for Bookies - -1. Select the mechanisms to enable in the bookies. `GSSAPI` is the only mechanism currently supported by BookKeeper. -2. Add a `JAAS` config file for the selected mechanisms as described in the examples for setting up [GSSAPI (Kerberos)](#kerberos). -3. Pass the `JAAS` config file location as JVM parameter to each Bookie. For example: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookie_jaas.conf - ``` - -4. Enable SASL auth plugin in bookies, by setting `bookieAuthProviderFactoryClass` to `org.apache.bookkeeper.sasl.SASLBookieAuthProviderFactory`. - - - ```shell - bookieAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLBookieAuthProviderFactory - ``` - -5. If you are running `autorecovery` along with bookies, then you want to enable SASL auth plugin for `autorecovery`, by setting - `clientAuthProviderFactoryClass` to `org.apache.bookkeeper.sasl.SASLClientProviderFactory`. - - ```shell - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -6. Follow the steps in [GSSAPI (Kerberos)](#kerberos) to configure SASL. - -#### Important Notes - -1. `Bookie` is a section name in the JAAS file used by each bookie. This section tells the bookie which principal to use - and the location of the keytab where the principal is stored. It allows the bookie to login using the keytab specified in this section. -2. `Auditor` is a section name in the JASS file used by `autorecovery` daemon (it can be co-run with bookies). This section tells the - `autorecovery` daemon which principal to use and the location of the keytab where the principal is stored. It allows the bookie to - login using the keytab specified in this section. -3. The `Client` section is used to authenticate a SASL connection with ZooKeeper. It also allows the bookies to set ACLs on ZooKeeper nodes - which locks these nodes down so that only the bookies can modify it. It is necessary to have the same primary name across all bookies. - If you want to use a section name other than `Client`, set the system property `zookeeper.sasl.client` to the appropriate name - (e.g `-Dzookeeper.sasl.client=ZKClient`). -4. ZooKeeper uses `zookeeper` as the service name by default. If you want to change this, set the system property - `zookeeper.sasl.client.username` to the appropriate name (e.g. `-Dzookeeper.sasl.client.username=zk`). - -## SASL configuration for Clients - -To configure `SASL` authentication on the clients: - -1. Select a `SASL` mechanism for authentication and add a `JAAS` config file for the selected mechanism as described in the examples for - setting up [GSSAPI (Kerberos)](#kerberos). -2. Pass the `JAAS` config file location as JVM parameter to each client JVM. For example: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookkeeper_jaas.conf - ``` - -3. Configure the following properties in bookkeeper `ClientConfiguration`: - - ```shell - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -Follow the steps in [GSSAPI (Kerberos)](#kerberos) to configure SASL for the selected mechanism. - -## Authentication using SASL/Kerberos - -### Prerequisites - -#### Kerberos - -If your organization is already using a Kerberos server (for example, by using `Active Directory`), there is no need to -install a new server just for BookKeeper. Otherwise you will need to install one, your Linux vendor likely has packages -for `Kerberos` and a short guide on how to install and configure it ([Ubuntu](https://help.ubuntu.com/community/Kerberos), -[Redhat](https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/6/html/Managing_Smart_Cards/installing-kerberos.html)). -Note that if you are using Oracle Java, you will need to download JCE policy files for your Java version and copy them to `$JAVA_HOME/jre/lib/security`. - -#### Kerberos Principals - -If you are using the organization’s Kerberos or Active Directory server, ask your Kerberos administrator for a principal -for each Bookie in your cluster and for every operating system user that will access BookKeeper with Kerberos authentication -(via clients and tools). - -If you have installed your own Kerberos, you will need to create these principals yourself using the following commands: - -```shell -sudo /usr/sbin/kadmin.local -q 'addprinc -randkey bookkeeper/{hostname}@{REALM}' -sudo /usr/sbin/kadmin.local -q "ktadd -k /etc/security/keytabs/{keytabname}.keytab bookkeeper/{hostname}@{REALM}" -``` - -##### All hosts must be reachable using hostnames - -It is a *Kerberos* requirement that all your hosts can be resolved with their FQDNs. - -### Configuring Bookies - -1. Add a suitably modified JAAS file similar to the one below to each Bookie’s config directory, let’s call it `bookie_jaas.conf` -for this example (note that each bookie should have its own keytab): - - ``` - Bookie { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookie.keytab" - principal="bookkeeper/bk1.hostname.com@EXAMPLE.COM"; - }; - // ZooKeeper client authentication - Client { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookie.keytab" - principal="bookkeeper/bk1.hostname.com@EXAMPLE.COM"; - }; - // If you are running `autorecovery` along with bookies - Auditor { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookie.keytab" - principal="bookkeeper/bk1.hostname.com@EXAMPLE.COM"; - }; - ``` - - The `Bookie` section in the JAAS file tells the bookie which principal to use and the location of the keytab where this principal is stored. - It allows the bookie to login using the keytab specified in this section. See [notes](#notes) for more details on Zookeeper’s SASL configuration. - -2. Pass the name of the JAAS file as a JVM parameter to each Bookie: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookie_jaas.conf - ``` - - You may also wish to specify the path to the `krb5.conf` file - (see [JDK’s Kerberos Requirements](https://docs.oracle.com/javase/8/docs/technotes/guides/security/jgss/tutorials/KerberosReq.html) for more details): - - ```shell - -Djava.security.krb5.conf=/etc/bookkeeper/krb5.conf - ``` - -3. Make sure the keytabs configured in the JAAS file are readable by the operating system user who is starting the Bookies. - -4. Enable SASL authentication plugin in the bookies by setting following parameters. - - ```shell - bookieAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLBookieAuthProviderFactory - # if you run `autorecovery` along with bookies - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -### Configuring Clients - -To configure SASL authentication on the clients: - -1. Clients will authenticate to the cluster with their own principal (usually with the same name as the user running the client), - so obtain or create these principals as needed. Then create a `JAAS` file for each principal. The `BookKeeper` section describes - how the clients like writers and readers can connect to the Bookies. The following is an example configuration for a client using - a keytab (recommended for long-running processes): - - ``` - BookKeeper { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookkeeper.keytab" - principal="bookkeeper-client-1@EXAMPLE.COM"; - }; - ``` - - -2. Pass the name of the JAAS file as a JVM parameter to the client JVM: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookkeeper_jaas.conf - ``` - - You may also wish to specify the path to the `krb5.conf` file (see - [JDK’s Kerberos Requirements](https://docs.oracle.com/javase/8/docs/technotes/guides/security/jgss/tutorials/KerberosReq.html) for more details). - - ```shell - -Djava.security.krb5.conf=/etc/bookkeeper/krb5.conf - ``` - - -3. Make sure the keytabs configured in the `bookkeeper_jaas.conf` are readable by the operating system user who is starting bookkeeper client. - -4. Enable SASL authentication plugin in the client by setting following parameters. - - ```shell - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -## Enabling Logging for SASL - -To enable SASL debug output, you can set `sun.security.krb5.debug` system property to `true`. - diff --git a/site/docs/4.6.0/security/tls.md b/site/docs/4.6.0/security/tls.md deleted file mode 100644 index cd250ab2aa5..00000000000 --- a/site/docs/4.6.0/security/tls.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -title: Encryption and Authentication using TLS -prev: ../overview -next: ../sasl ---- - -Apache BookKeeper allows clients and autorecovery daemons to communicate over TLS, although this is not enabled by default. - -## Overview - -The bookies need their own key and certificate in order to use TLS. Clients can optionally provide a key and a certificate -for mutual authentication. Each bookie or client can also be configured with a truststore, which is used to -determine which certificates (bookie or client identities) to trust (authenticate). - -The truststore can be configured in many ways. To understand the truststore, consider the following two examples: - -1. the truststore contains one or many certificates; -2. it contains a certificate authority (CA). - -In (1), with a list of certificates, the bookie or client will trust any certificate listed in the truststore. -In (2), with a CA, the bookie or client will trust any certificate that was signed by the CA in the truststore. - -(TBD: benefits) - -## Generate TLS key and certificate - -The first step of deploying TLS is to generate the key and the certificate for each machine in the cluster. -You can use Java’s `keytool` utility to accomplish this task. We will generate the key into a temporary keystore -initially so that we can export and sign it later with CA. - -```shell -keytool -keystore bookie.keystore.jks -alias localhost -validity {validity} -genkey -``` - -You need to specify two parameters in the above command: - -1. `keystore`: the keystore file that stores the certificate. The *keystore* file contains the private key of - the certificate; hence, it needs to be kept safely. -2. `validity`: the valid time of the certificate in days. - -
          -Ensure that common name (CN) matches exactly with the fully qualified domain name (FQDN) of the server. -The client compares the CN with the DNS domain name to ensure that it is indeed connecting to the desired server, not a malicious one. -
          - -## Creating your own CA - -After the first step, each machine in the cluster has a public-private key pair, and a certificate to identify the machine. -The certificate, however, is unsigned, which means that an attacker can create such a certificate to pretend to be any machine. - -Therefore, it is important to prevent forged certificates by signing them for each machine in the cluster. -A `certificate authority (CA)` is responsible for signing certificates. CA works likes a government that issues passports — -the government stamps (signs) each passport so that the passport becomes difficult to forge. Other governments verify the stamps -to ensure the passport is authentic. Similarly, the CA signs the certificates, and the cryptography guarantees that a signed -certificate is computationally difficult to forge. Thus, as long as the CA is a genuine and trusted authority, the clients have -high assurance that they are connecting to the authentic machines. - -```shell -openssl req -new -x509 -keyout ca-key -out ca-cert -days 365 -``` - -The generated CA is simply a *public-private* key pair and certificate, and it is intended to sign other certificates. - -The next step is to add the generated CA to the clients' truststore so that the clients can trust this CA: - -```shell -keytool -keystore bookie.truststore.jks -alias CARoot -import -file ca-cert -``` - -NOTE: If you configure the bookies to require client authentication by setting `sslClientAuthentication` to `true` on the -[bookie config](../../reference/config), then you must also provide a truststore for the bookies and it should have all the CA -certificates that clients keys were signed by. - -```shell -keytool -keystore client.truststore.jks -alias CARoot -import -file ca-cert -``` - -In contrast to the keystore, which stores each machine’s own identity, the truststore of a client stores all the certificates -that the client should trust. Importing a certificate into one’s truststore also means trusting all certificates that are signed -by that certificate. As the analogy above, trusting the government (CA) also means trusting all passports (certificates) that -it has issued. This attribute is called the chain of trust, and it is particularly useful when deploying TLS on a large BookKeeper cluster. -You can sign all certificates in the cluster with a single CA, and have all machines share the same truststore that trusts the CA. -That way all machines can authenticate all other machines. - -## Signing the certificate - -The next step is to sign all certificates in the keystore with the CA we generated. First, you need to export the certificate from the keystore: - -```shell -keytool -keystore bookie.keystore.jks -alias localhost -certreq -file cert-file -``` - -Then sign it with the CA: - -```shell -openssl x509 -req -CA ca-cert -CAkey ca-key -in cert-file -out cert-signed -days {validity} -CAcreateserial -passin pass:{ca-password} -``` - -Finally, you need to import both the certificate of the CA and the signed certificate into the keystore: - -```shell -keytool -keystore bookie.keystore.jks -alias CARoot -import -file ca-cert -keytool -keystore bookie.keystore.jks -alias localhost -import -file cert-signed -``` - -The definitions of the parameters are the following: - -1. `keystore`: the location of the keystore -2. `ca-cert`: the certificate of the CA -3. `ca-key`: the private key of the CA -4. `ca-password`: the passphrase of the CA -5. `cert-file`: the exported, unsigned certificate of the bookie -6. `cert-signed`: the signed certificate of the bookie - -(TBD: add a script to automatically generate truststores and keystores.) - -## Configuring Bookies - -Bookies support TLS for connections on the same service port. In order to enable TLS, you need to configure `tlsProvider` to be either -`JDK` or `OpenSSL`. If `OpenSSL` is configured, it will use `netty-tcnative-boringssl-static`, which loads a corresponding binding according -to the platforms to run bookies. - -> Current `OpenSSL` implementation doesn't depend on the system installed OpenSSL library. If you want to leverage the OpenSSL installed on -the system, you can check [this example](http://netty.io/wiki/forked-tomcat-native.html) on how to replaces the JARs on the classpath with -netty bindings to leverage installed OpenSSL. - -The following TLS configs are needed on the bookie side: - -```shell -tlsProvider=OpenSSL -# key store -tlsKeyStoreType=JKS -tlsKeyStore=/var/private/tls/bookie.keystore.jks -tlsKeyStorePasswordPath=/var/private/tls/bookie.keystore.passwd -# trust store -tlsTrustStoreType=JKS -tlsTrustStore=/var/private/tls/bookie.truststore.jks -tlsTrustStorePasswordPath=/var/private/tls/bookie.truststore.passwd -``` - -NOTE: it is important to restrict access to the store files and corresponding password files via filesystem permissions. - -Optional settings that are worth considering: - -1. tlsClientAuthentication=false: Enable/Disable using TLS for authentication. This config when enabled will authenticate the other end - of the communication channel. It should be enabled on both bookies and clients for mutual TLS. -2. tlsEnabledCipherSuites= A cipher suite is a named combination of authentication, encryption, MAC and key exchange - algorithm used to negotiate the security settings for a network connection using TLS network protocol. By default, - it is null. [OpenSSL Ciphers](https://www.openssl.org/docs/man1.0.2/apps/ciphers.html) - [JDK Ciphers](http://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html#ciphersuites) -3. tlsEnabledProtocols = TLSv1.2,TLSv1.1,TLSv1 (list out the TLS protocols that you are going to accept from clients). - By default, it is not set. - -To verify the bookie's keystore and truststore are setup correctly you can run the following command: - -```shell -openssl s_client -debug -connect localhost:3181 -tls1 -``` - -NOTE: TLSv1 should be listed under `tlsEnabledProtocols`. - -In the output of this command you should see the server's certificate: - -```shell ------BEGIN CERTIFICATE----- -{variable sized random bytes} ------END CERTIFICATE----- -``` - -If the certificate does not show up or if there are any other error messages then your keystore is not setup correctly. - -## Configuring Clients - -TLS is supported only for the new BookKeeper client (BookKeeper versions 4.5.0 and higher), the older clients are not -supported. The configs for TLS will be the same as bookies. - -If client authentication is not required by the bookies, the following is a minimal configuration example: - -```shell -tlsProvider=OpenSSL -clientTrustStore=/var/private/tls/client.truststore.jks -clientTrustStorePasswordPath=/var/private/tls/client.truststore.passwd -``` - -If client authentication is required, then a keystore must be created for each client, and the bookies' truststores must -trust the certificate in the client's keystore. This may be done using commands that are similar to what we used for -the [bookie keystore](#bookie-keystore). - -And the following must also be configured: - -```shell -tlsClientAuthentication=true -clientKeyStore=/var/private/tls/client.keystore.jks -clientKeyStorePasswordPath=/var/private/tls/client.keystore.passwd -``` - -NOTE: it is important to restrict access to the store files and corresponding password files via filesystem permissions. - -(TBD: add example to use tls in bin/bookkeeper script?) - -## Enabling TLS Logging - -You can enable TLS debug logging at the JVM level by starting the bookies and/or clients with `javax.net.debug` system property. For example: - -```shell --Djavax.net.debug=all -``` - -You can find more details on this in [Oracle documentation](http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/ReadDebug.html) on -[debugging SSL/TLS connections](http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/ReadDebug.html). diff --git a/site/docs/4.6.0/security/zookeeper.md b/site/docs/4.6.0/security/zookeeper.md deleted file mode 100644 index e16be69a1d3..00000000000 --- a/site/docs/4.6.0/security/zookeeper.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: ZooKeeper Authentication -prev: ../sasl ---- - -## New Clusters - -To enable `ZooKeeper` authentication on Bookies or Clients, there are two necessary steps: - -1. Create a `JAAS` login file and set the appropriate system property to point to it as described in [GSSAPI (Kerberos)](../sasl#notes). -2. Set the configuration property `zkEnableSecurity` in each bookie to `true`. - -The metadata stored in `ZooKeeper` is such that only certain clients will be able to modify and read the corresponding znodes. -The rationale behind this decision is that the data stored in ZooKeeper is not sensitive, but inappropriate manipulation of znodes can cause cluster -disruption. - -## Migrating Clusters - -If you are running a version of BookKeeper that does not support security or simply with security disabled, and you want to make the cluster secure, -then you need to execute the following steps to enable ZooKeeper authentication with minimal disruption to your operations. - -1. Perform a rolling restart setting the `JAAS` login file, which enables bookie or clients to authenticate. At the end of the rolling restart, - bookies (or clients) are able to manipulate znodes with strict ACLs, but they will not create znodes with those ACLs. -2. Perform a second rolling restart of bookies, this time setting the configuration parameter `zkEnableSecurity` to true, which enables the use - of secure ACLs when creating znodes. -3. Currently we don't have provide a tool to set acls on old znodes. You are recommended to set it manually using ZooKeeper tools. - -It is also possible to turn off authentication in a secured cluster. To do it, follow these steps: - -1. Perform a rolling restart of bookies setting the `JAAS` login file, which enable bookies to authenticate, but setting `zkEnableSecurity` to `false`. - At the end of rolling restart, bookies stop creating znodes with secure ACLs, but are still able to authenticate and manipulate all znodes. -2. You can use ZooKeeper tools to manually reset all ACLs under the znode set in `zkLedgersRootPath`, which defaults to `/ledgers`. -3. Perform a second rolling restart of bookies, this time omitting the system property that sets the `JAAS` login file. - -## Migrating the ZooKeeper ensemble - -It is also necessary to enable authentication on the `ZooKeeper` ensemble. To do it, we need to perform a rolling restart of the ensemble and -set a few properties. Please refer to the ZooKeeper documentation for more details. - -1. [Apache ZooKeeper Documentation](http://zookeeper.apache.org/doc/r3.4.6/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) -2. [Apache ZooKeeper Wiki](https://cwiki.apache.org/confluence/display/ZOOKEEPER/Zookeeper+and+SASL) diff --git a/site/docs/4.6.1/admin/autorecovery.md b/site/docs/4.6.1/admin/autorecovery.md deleted file mode 100644 index b1dd078f9b2..00000000000 --- a/site/docs/4.6.1/admin/autorecovery.md +++ /dev/null @@ -1,128 +0,0 @@ ---- -title: Using AutoRecovery ---- - -When a {% pop bookie %} crashes, all {% pop ledgers %} on that bookie become under-replicated. In order to bring all ledgers in your BookKeeper cluster back to full replication, you'll need to *recover* the data from any offline bookies. There are two ways to recover bookies' data: - -1. Using [manual recovery](#manual-recovery) -1. Automatically, using [*AutoRecovery*](#autorecovery) - -## Manual recovery - -You can manually recover failed bookies using the [`bookkeeper`](../../reference/cli) command-line tool. You need to specify: - -* the `shell recover` option -* an IP and port for your BookKeeper cluster's ZooKeeper ensemble -* the IP and port for the failed bookie - -Here's an example: - -```bash -$ bookkeeper-server/bin/bookkeeper shell recover \ - zk1.example.com:2181 \ # IP and port for ZooKeeper - 192.168.1.10:3181 # IP and port for the failed bookie -``` - -If you wish, you can also specify which bookie you'd like to rereplicate to. Here's an example: - -```bash -$ bookkeeper-server/bin/bookkeeper shell recover \ - zk1.example.com:2181 \ # IP and port for ZooKeeper - 192.168.1.10:3181 \ # IP and port for the failed bookie - 192.168.1.11:3181 # IP and port for the bookie to rereplicate to -``` - -### The manual recovery process - -When you initiate a manual recovery process, the following happens: - -1. The client (the process running ) reads the metadata of active ledgers from ZooKeeper. -1. The ledgers that contain fragments from the failed bookie in their ensemble are selected. -1. A recovery process is initiated for each ledger in this list and the rereplication process is run for each ledger. -1. Once all the ledgers are marked as fully replicated, bookie recovery is finished. - -## AutoRecovery - -AutoRecovery is a process that: - -* automatically detects when a {% pop bookie %} in your BookKeeper cluster has become unavailable and then -* rereplicates all the {% pop ledgers %} that were stored on that bookie. - -AutoRecovery can be run in two ways: - -1. On dedicated nodes in your BookKeeper cluster -1. On the same machines on which your bookies are running - -## Running AutoRecovery - -You can start up AutoRecovery using the [`autorecovery`](../../reference/cli#bookkeeper-autorecovery) command of the [`bookkeeper`](../../reference/cli) CLI tool. - -```bash -$ bookkeeper-server/bin/bookkeeper autorecovery -``` - -> The most important thing to ensure when starting up AutoRecovery is that the ZooKeeper connection string specified by the [`zkServers`](../../reference/config#zkServers) parameter points to the right ZooKeeper cluster. - -If you start up AutoRecovery on a machine that is already running a bookie, then the AutoRecovery process will run alongside the bookie on a separate thread. - -You can also start up AutoRecovery on a fresh machine if you'd like to create a dedicated cluster of AutoRecovery nodes. - -## Configuration - -There are a handful of AutoRecovery-related configs in the [`bk_server.conf`](../../reference/config) configuration file. For a listing of those configs, see [AutoRecovery settings](../../reference/config#autorecovery-settings). - -## Disable AutoRecovery - -You can disable AutoRecovery at any time, for example during maintenance. Disabling AutoRecovery ensures that bookies' data isn't unnecessarily rereplicated when the bookie is only taken down for a short period of time, for example when the bookie is being updated or the configuration if being changed. - -You can disable AutoRecover using the [`bookkeeper`](../../reference/cli#bookkeeper-shell-autorecovery) CLI tool: - -```bash -$ bookkeeper-server/bin/bookkeeper shell autorecovery -disable -``` - -Once disabled, you can reenable AutoRecovery using the [`enable`](../../reference/cli#bookkeeper-shell-autorecovery) shell command: - -```bash -$ bookkeeper-server/bin/bookkeeper shell autorecovery -enable -``` - -## AutoRecovery architecture - -AutoRecovery has two components: - -1. The [**auditor**](#auditor) (see the [`Auditor`](../../api/javadoc/org/apache/bookkeeper/replication/Auditor.html) class) is a singleton node that watches bookies to see if they fail and creates rereplication tasks for the ledgers on failed bookies. -1. The [**replication worker**](#replication-worker) (see the [`ReplicationWorker`](../../api/javadoc/org/apache/bookkeeper/replication/ReplicationWorker.html) class) runs on each bookie and executes rereplication tasks provided by the auditor. - -Both of these components run as threads in the [`AutoRecoveryMain`](../../api/javadoc/org/apache/bookkeeper/replication/AutoRecoveryMain) process, which runs on each bookie in the cluster. All recovery nodes participate in leader election---using ZooKeeper---to decide which node becomes the auditor. Nodes that fail to become the auditor watch the elected auditor and run an election process again if they see that the auditor node has failed. - -### Auditor - -The auditor watches all bookies in the cluster that are registered with ZooKeeper. Bookies register with ZooKeeper at startup. If the bookie crashes or is killed, the bookie's registration in ZooKeeper disappears and the auditor is notified of the change in the list of registered bookies. - -When the auditor sees that a bookie has disappeared, it immediately scans the complete {% pop ledger %} list to find ledgers that have data stored on the failed bookie. Once it has a list of ledgers for that bookie, the auditor will publish a rereplication task for each ledger under the `/underreplicated/` [znode](https://zookeeper.apache.org/doc/current/zookeeperOver.html) in ZooKeeper. - -### Replication Worker - -Each replication worker watches for tasks being published by the auditor on the `/underreplicated/` znode in ZooKeeper. When a new task appears, the replication worker will try to get a lock on it. If it cannot acquire the lock, it will try the next entry. The locks are implemented using ZooKeeper ephemeral znodes. - -The replication worker will scan through the rereplication task's ledger for fragments of which its local bookie is not a member. When it finds fragments matching this criterion, it will replicate the entries of that fragment to the local bookie. If, after this process, the ledger is fully replicated, the ledgers entry under /underreplicated/ is deleted, and the lock is released. If there is a problem replicating, or there are still fragments in the ledger which are still underreplicated (due to the local bookie already being part of the ensemble for the fragment), then the lock is simply released. - -If the replication worker finds a fragment which needs rereplication, but does not have a defined endpoint (i.e. the final fragment of a ledger currently being written to), it will wait for a grace period before attempting rereplication. If the fragment needing rereplication still does not have a defined endpoint, the ledger is fenced and rereplication then takes place. - -This avoids the situation in which a client is writing to a ledger and one of the bookies goes down, but the client has not written an entry to that bookie before rereplication takes place. The client could continue writing to the old fragment, even though the ensemble for the fragment had changed. This could lead to data loss. Fencing prevents this scenario from happening. In the normal case, the client will try to write to the failed bookie within the grace period, and will have started a new fragment before rereplication starts. - -You can configure this grace period using the [`openLedgerRereplicationGracePeriod`](../../reference/config#openLedgerRereplicationGracePeriod) parameter. - -### The rereplication process - -The ledger rereplication process happens in these steps: - -1. The client goes through all ledger fragments in the ledger, selecting those that contain the failed bookie. -1. A recovery process is initiated for each ledger fragment in this list. - 1. The client selects a bookie to which all entries in the ledger fragment will be replicated; In the case of autorecovery, this will always be the local bookie. - 1. The client reads entries that belong to the ledger fragment from other bookies in the ensemble and writes them to the selected bookie. - 1. Once all entries have been replicated, the zookeeper metadata for the fragment is updated to reflect the new ensemble. - 1. The fragment is marked as fully replicated in the recovery tool. -1. Once all ledger fragments are marked as fully replicated, the ledger is marked as fully replicated. - diff --git a/site/docs/4.6.1/admin/bookies.md b/site/docs/4.6.1/admin/bookies.md deleted file mode 100644 index 1b0427dae3c..00000000000 --- a/site/docs/4.6.1/admin/bookies.md +++ /dev/null @@ -1,180 +0,0 @@ ---- -title: BookKeeper administration -subtitle: A guide to deploying and administering BookKeeper ---- - -This document is a guide to deploying, administering, and maintaining BookKeeper. It also discusses [best practices](#best-practices) and [common problems](#common-problems). - -## Requirements - -A typical BookKeeper installation consists of an ensemble of {% pop bookies %} and a ZooKeeper quorum. The exact number of bookies depends on the quorum mode that you choose, desired throughput, and the number of clients using the installation simultaneously. - -The minimum number of bookies depends on the type of installation: - -* For *self-verifying* entries you should run at least three bookies. In this mode, clients store a message authentication code along with each {% pop entry %}. -* For *generic* entries you should run at least four - -There is no upper limit on the number of bookies that you can run in a single ensemble. - -### Performance - -To achieve optimal performance, BookKeeper requires each server to have at least two disks. It's possible to run a bookie with a single disk but performance will be significantly degraded. - -### ZooKeeper - -There is no constraint on the number of ZooKeeper nodes you can run with BookKeeper. A single machine running ZooKeeper in [standalone mode](https://zookeeper.apache.org/doc/current/zookeeperStarted.html#sc_InstallingSingleMode) is sufficient for BookKeeper, although for the sake of higher resilience we recommend running ZooKeeper in [quorum mode](https://zookeeper.apache.org/doc/current/zookeeperStarted.html#sc_RunningReplicatedZooKeeper) with multiple servers. - -## Starting and stopping bookies - -You can run bookies either in the foreground or in the background, using [nohup](https://en.wikipedia.org/wiki/Nohup). You can also run [local bookies](#local-bookie) for development purposes. - -To start a bookie in the foreground, use the [`bookie`](../../reference/cli#bookkeeper-bookie) command of the [`bookkeeper`](../../reference/cli#bookkeeper) CLI tool: - -```shell -$ bookkeeper-server/bin/bookkeeper bookie -``` - -To start a bookie in the background, use the [`bookkeeper-daemon.sh`](../../reference/cli#bookkeeper-daemon.sh) script and run `start bookie`: - -```shell -$ bookkeeper-server/bin/bookkeeper-daemon.sh start bookie -``` - -### Local bookies - -The instructions above showed you how to run bookies intended for production use. If you'd like to experiment with ensembles of bookies locally, you can use the [`localbookie`](../../reference/cli#bookkeeper-localbookie) command of the `bookkeeper` CLI tool and specify the number of bookies you'd like to run. - -This would spin up a local ensemble of 6 bookies: - -```shell -$ bookkeeper-server/bin/bookkeeper localbookie 6 -``` - -> When you run a local bookie ensemble, all bookies run in a single JVM process. - -## Configuring bookies - -There's a wide variety of parameters that you can set in the bookie configuration file in `bookkeeper-server/conf/bk_server.conf` of your [BookKeeper installation](../../reference/config). A full listing can be found in [Bookie configuration](../../reference/config). - -Some of the more important parameters to be aware of: - -Parameter | Description | Default -:---------|:------------|:------- -`bookiePort` | The TCP port that the bookie listens on | `3181` -`zkServers` | A comma-separated list of ZooKeeper servers in `hostname:port` format | `localhost:2181` -`journalDirectory` | The directory where the [log device](../../getting-started/concepts#log-device) stores the bookie's write-ahead log (WAL) | `/tmp/bk-txn` -`ledgerDirectories` | The directories where the [ledger device](../../getting-started/concepts#ledger-device) stores the bookie's ledger entries (as a comma-separated list) | `/tmp/bk-data` - -> Ideally, the directories specified `journalDirectory` and `ledgerDirectories` should be on difference devices. - -## Logging - -BookKeeper uses [slf4j](http://www.slf4j.org/) for logging, with [log4j](https://logging.apache.org/log4j/2.x/) bindings enabled by default. - -To enable logging for a bookie, create a `log4j.properties` file and point the `BOOKIE_LOG_CONF` environment variable to the configuration file. Here's an example: - -```shell -$ export BOOKIE_LOG_CONF=/some/path/log4j.properties -$ bookkeeper-server/bin/bookkeeper bookie -``` - -## Upgrading - -From time to time you may need to make changes to the filesystem layout of bookies---changes that are incompatible with previous versions of BookKeeper and require that directories used with previous versions are upgraded. If a filesystem upgrade is required when updating BookKeeper, the bookie will fail to start and return an error like this: - -``` -2017-05-25 10:41:50,494 - ERROR - [main:Bookie@246] - Directory layout version is less than 3, upgrade needed -``` - -BookKeeper provides a utility for upgrading the filesystem. You can perform an upgrade using the [`upgrade`](../../reference/cli#bookkeeper-upgrade) command of the `bookkeeper` CLI tool. When running `bookkeeper upgrade` you need to specify one of three flags: - -Flag | Action -:----|:------ -`--upgrade` | Performs an upgrade -`--rollback` | Performs a rollback to the initial filesystem version -`--finalize` | Marks the upgrade as complete - -### Upgrade pattern - -A standard upgrade pattern is to run an upgrade... - -```shell -$ bookkeeper-server/bin/bookkeeper upgrade --upgrade -``` - -...then check that everything is working normally, then kill the bookie. If everything is okay, finalize the upgrade... - -```shell -$ bookkeeper-server/bin/bookkeeper upgrade --finalize -``` - -...and then restart the server: - -```shell -$ bookkeeper-server/bin/bookkeeper bookie -``` - -If something has gone wrong, you can always perform a rollback: - -```shell -$ bookkeeper-server/bin/bookkeeper upgrade --rollback -``` - -## Formatting - -You can format bookie metadata in ZooKeeper using the [`metaformat`](../../reference/cli#bookkeeper-shell-metaformat) command of the [BookKeeper shell](../../reference/cli#the-bookkeeper-shell). - -By default, formatting is done in interactive mode, which prompts you to confirm the format operation if old data exists. You can disable confirmation using the `-nonInteractive` flag. If old data does exist, the format operation will abort *unless* you set the `-force` flag. Here's an example: - -```shell -$ bookkeeper-server/bin/bookkeeper shell metaformat -``` - -You can format the local filesystem data on a bookie using the [`bookieformat`](../../reference/cli#bookkeeper-shell-bookieformat) command on each bookie. Here's an example: - -```shell -$ bookkeeper-server/bin/bookkeeper shell bookieformat -``` - -> The `-force` and `-nonInteractive` flags are also available for the `bookieformat` command. - -## AutoRecovery - -For a guide to AutoRecovery in BookKeeper, see [this doc](../autorecovery). - -## Missing disks or directories - -Accidentally replacing disks or removing directories can cause a bookie to fail while trying to read a ledger fragment that, according to the ledger metadata, exists on the bookie. For this reason, when a bookie is started for the first time, its disk configuration is fixed for the lifetime of that bookie. Any change to its disk configuration, such as a crashed disk or an accidental configuration change, will result in the bookie being unable to start. That will throw an error like this: - -``` -2017-05-29 18:19:13,790 - ERROR - [main:BookieServer314] – Exception running bookie server : @ -org.apache.bookkeeper.bookie.BookieException$InvalidCookieException -.......at org.apache.bookkeeper.bookie.Cookie.verify(Cookie.java:82) -.......at org.apache.bookkeeper.bookie.Bookie.checkEnvironment(Bookie.java:275) -.......at org.apache.bookkeeper.bookie.Bookie.(Bookie.java:351) -``` - -If the change was the result of an accidental configuration change, the change can be reverted and the bookie can be restarted. However, if the change *cannot* be reverted, such as is the case when you want to add a new disk or replace a disk, the bookie must be wiped and then all its data re-replicated onto it. - -1. Increment the [`bookiePort`](../../reference/config#bookiePort) parameter in the [`bk_server.conf`](../../reference/config) -1. Ensure that all directories specified by [`journalDirectory`](../../reference/config#journalDirectory) and [`ledgerDirectories`](../../reference/config#ledgerDirectories) are empty. -1. [Start the bookie](#starting-and-stopping-bookies). -1. Run the following command to re-replicate the data: - - ```bash - $ bookkeeper-server/bin/bookkeeper shell recover \ - \ - \ - - ``` - - The ZooKeeper server, old bookie, and new bookie, are all identified by their external IP and `bookiePort` (3181 by default). Here's an example: - - ```bash - $ bookkeeper-server/bin/bookkeeper shell recover \ - zk1.example.com \ - 192.168.1.10:3181 \ - 192.168.1.10:3181 - ``` - - See the [AutoRecovery](../autorecovery) documentation for more info on the re-replication process. diff --git a/site/docs/4.6.1/admin/geo-replication.md b/site/docs/4.6.1/admin/geo-replication.md deleted file mode 100644 index 38b972345ef..00000000000 --- a/site/docs/4.6.1/admin/geo-replication.md +++ /dev/null @@ -1,22 +0,0 @@ ---- -title: Geo-replication -subtitle: Replicate data across BookKeeper clusters ---- - -*Geo-replication* is the replication of data across BookKeeper clusters. In order to enable geo-replication for a group of BookKeeper clusters, - -## Global ZooKeeper - -Setting up a global ZooKeeper quorum is a lot like setting up a cluster-specific quorum. The crucial difference is that - -### Geo-replication across three clusters - -Let's say that you want to set up geo-replication across clusters in regions A, B, and C. First, the BookKeeper clusters in each region must have their own local (cluster-specific) ZooKeeper quorum. - -> BookKeeper clusters use global ZooKeeper only for metadata storage. Traffic from bookies to ZooKeeper should thus be fairly light in general. - -The crucial difference between using cluster-specific ZooKeeper and global ZooKeeper is that {% pop bookies %} is that you need to point all bookies to use the global ZooKeeper setup. - -## Region-aware placement polocy - -## Autorecovery diff --git a/site/docs/4.6.1/admin/http.md b/site/docs/4.6.1/admin/http.md deleted file mode 100644 index 0097adc62b8..00000000000 --- a/site/docs/4.6.1/admin/http.md +++ /dev/null @@ -1,394 +0,0 @@ ---- -title: BookKeeper Admin REST API ---- - -This document introduces BookKeeper HTTP endpoints, which can be used for BookKeeper administration. -To use this feature, set `httpServerEnabled` to `true` in file `conf/bk_server.conf`. - -## All the endpoints - -Currently all the HTTP endpoints could be divided into these 4 components: -1. Heartbeat: heartbeat for a specific bookie. -1. Config: doing the server configuration for a specific bookie. -1. Ledger: HTTP endpoints related to ledgers. -1. Bookie: HTTP endpoints related to bookies. -1. AutoRecovery: HTTP endpoints related to auto recovery. - -## Heartbeat - -### Endpoint: /heartbeat -* Method: GET -* Description: Get heartbeat status for a specific bookie -* Response: - -| Code | Description | -|:-------|:------------| -|200 | Successful operation | - -## Config - -### Endpoint: /api/v1/config/server_config -1. Method: GET - * Description: Get value of all configured values overridden on local server config - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | -1. Method: PUT - * Description: Update a local server config - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |configName | String | Yes | Configuration name(key) | - |configValue | String | Yes | Configuration value(value) | - * Body: - ```json - { - "configName1": "configValue1", - "configName2": "configValue2" - } - ``` - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -## Ledger - -### Endpoint: /api/v1/ledger/delete/?ledger_id=<ledger_id> -1. Method: DELETE - * Description: Delete a ledger. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |ledger_id | Long | Yes | ledger id of the ledger. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -### Endpoint: /api/v1/ledger/list/?print_metadata=<metadata> -1. Method: GET - * Description: List all the ledgers. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |print_metadata | Boolean | No | whether print out metadata | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "ledgerId1": "ledgerMetadata1", - "ledgerId2": "ledgerMetadata2", - ... - } - ``` - -### Endpoint: /api/v1/ledger/metadata/?ledger_id=<ledger_id> -1. Method: GET - * Description: Get the metadata of a ledger. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |ledger_id | Long | Yes | ledger id of the ledger. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "ledgerId1": "ledgerMetadata1" - } - ``` - -### Endpoint: /api/v1/ledger/read/?ledger_id=<ledger_id>&start_entry_id=<start_entry_id>&end_entry_id=<end_entry_id> -1. Method: GET - * Description: Read a range of entries from ledger. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |ledger_id | Long | Yes| ledger id of the ledger. | - |start_entry_id | Long | No | start entry id of read range. | - |end_entry_id | Long | No | end entry id of read range. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "entryId1": "entry content 1", - "entryId2": "entry content 2", - ... - } - ``` - -## Bookie - -### Endpoint: /api/v1/bookie/list_bookies/?type=<type>&print_hostnames=<hostnames> -1. Method: GET - * Description: Get all the available bookies. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |type | String | Yes | value: "rw" or "ro" , list read-write/read-only bookies. | - |print_hostnames | Boolean | No | whether print hostname of bookies. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "bookieSocketAddress1": "hostname1", - "bookieSocketAddress2": "hostname2", - ... - } - ``` - -### Endpoint: /api/v1/bookie/list_bookie_info -1. Method: GET - * Description: Get bookies disk usage info of this cluster. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "bookieAddress" : {free: xxx, total: xxx}, - "bookieAddress" : {free: xxx, total: xxx}, - ... - "clusterInfo" : {total_free: xxx, total: xxx} - } - ``` - -### Endpoint: /api/v1/bookie/last_log_mark -1. Method: GET - * Description: Get the last log marker. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - JournalId1 : position1, - JournalId2 : position2, - ... - } - ``` - -### Endpoint: /api/v1/bookie/list_disk_file/?file_type=<type> -1. Method: GET - * Description: Get all the files on disk of current bookie. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |type | String | No | file type: journal/entrylog/index. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "journal files" : "filename1 filename2 ...", - "entrylog files" : "filename1 filename2...", - "index files" : "filename1 filename2 ..." - } - ``` - -### Endpoint: /api/v1/bookie/expand_storage -1. Method: PUT - * Description: Expand storage for a bookie. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -## Auto recovery - -### Endpoint: /api/v1/autorecovery/bookie/ -1. Method: PUT - * Description: Ledger data recovery for failed bookie - * Body: - ```json - { - "bookie_src": [ "bookie_src1", "bookie_src2"... ], - "bookie_dest": [ "bookie_dest1", "bookie_dest2"... ], - "delete_cookie": - } - ``` - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |bookie_src | Strings | Yes | bookie source to recovery | - |bookie_dest | Strings | No | bookie data recovery destination | - |delete_cookie | Boolean | No | Whether delete cookie | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -### Endpoint: /api/v1/autorecovery/list_under_replicated_ledger/?missingreplica=<bookie_address>&excludingmissingreplica=<bookie_address> -1. Method: GET - * Description: Get all under replicated ledgers. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |missingreplica | String | No | missing replica bookieId | - |excludingmissingreplica | String | No | exclude missing replica bookieId | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - [ledgerId1, ledgerId2...] - } - ``` - -### Endpoint: /api/v1/autorecovery/who_is_auditor -1. Method: GET - * Description: Get auditor bookie id. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "Auditor": "hostname/hostAddress:Port" - } - ``` - -### Endpoint: /api/v1/autorecovery/trigger_audit -1. Method: PUT - * Description: Force trigger audit by resting the lostBookieRecoveryDelay. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -### Endpoint: /api/v1/autorecovery/lost_bookie_recovery_delay -1. Method: GET - * Description: Get lostBookieRecoveryDelay value in seconds. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -1. Method: PUT - * Description: Set lostBookieRecoveryDelay value in seconds. - * Body: - ```json - { - "delay_seconds": - } - ``` - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - | delay_seconds | Long | Yes | set delay value in seconds. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -### Endpoint: /api/v1/autorecovery/decommission -1. Method: PUT - * Description: Decommission Bookie, Force trigger Audit task and make sure all the ledgers stored in the decommissioning bookie are replicated. - * Body: - ```json - { - "bookie_src": - } - ``` - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - | bookie_src | String | Yes | Bookie src to decommission.. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | diff --git a/site/docs/4.6.1/admin/metrics.md b/site/docs/4.6.1/admin/metrics.md deleted file mode 100644 index 142df3dcd2d..00000000000 --- a/site/docs/4.6.1/admin/metrics.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: Metric collection ---- - -BookKeeper enables metrics collection through a variety of [stats providers](#stats-providers). - -> For a full listing of available metrics, see the [Metrics](../../reference/metrics) reference doc. - -## Stats providers - -BookKeeper has stats provider implementations for four five sinks: - -Provider | Provider class name -:--------|:------------------- -[Codahale Metrics](https://mvnrepository.com/artifact/org.apache.bookkeeper.stats/codahale-metrics-provider) | `org.apache.bookkeeper.stats.CodahaleMetricsProvider` -[Prometheus](https://prometheus.io/) | `org.apache.bookkeeper.stats.prometheus.PrometheusMetricsProvider` -[Finagle](https://twitter.github.io/finagle/guide/Metrics.html) | `org.apache.bookkeeper.stats.FinagleStatsProvider` -[Ostrich](https://github.com/twitter/ostrich) | `org.apache.bookkeeper.stats.OstrichProvider` -[Twitter Science Provider](https://mvnrepository.com/artifact/org.apache.bookkeeper.stats/twitter-science-provider) | `org.apache.bookkeeper.stats.TwitterStatsProvider` - -> The [Codahale Metrics]({{ site.github_master }}/bookkeeper-stats-providers/codahale-metrics-provider) stats provider is the default provider. - -## Enabling stats providers in bookies - -There are two stats-related [configuration parameters](../../reference/config#statistics) available for bookies: - -Parameter | Description | Default -:---------|:------------|:------- -`enableStatistics` | Whether statistics are enabled for the bookie | `false` -`statsProviderClass` | The stats provider class used by the bookie | `org.apache.bookkeeper.stats.CodahaleMetricsProvider` - - -To enable stats: - -* set the `enableStatistics` parameter to `true` -* set `statsProviderClass` to the desired provider (see the [table above](#stats-providers) for a listing of classes) - - diff --git a/site/docs/4.6.1/admin/perf.md b/site/docs/4.6.1/admin/perf.md deleted file mode 100644 index 82956326e5d..00000000000 --- a/site/docs/4.6.1/admin/perf.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: Performance tuning ---- diff --git a/site/docs/4.6.1/admin/placement.md b/site/docs/4.6.1/admin/placement.md deleted file mode 100644 index ded456e1aea..00000000000 --- a/site/docs/4.6.1/admin/placement.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: Customized placement policies ---- diff --git a/site/docs/4.6.1/admin/upgrade.md b/site/docs/4.6.1/admin/upgrade.md deleted file mode 100644 index 1aa84a4ab40..00000000000 --- a/site/docs/4.6.1/admin/upgrade.md +++ /dev/null @@ -1,76 +0,0 @@ ---- -title: Upgrade ---- - -> If you have questions about upgrades (or need help), please feel free to reach out to us by [mailing list]({{ site.baseurl }}community/mailing-lists) or [Slack Channel]({{ site.baseurl }}community/slack). - -## Overview - -Consider the below guidelines in preparation for upgrading. - -- Always back up all your configuration files before upgrading. -- Read through the documentation and draft an upgrade plan that matches your specific requirements and environment before starting the upgrade process. - Put differently, don't start working through the guide on a live cluster. Read guide entirely, make a plan, then execute the plan. -- Pay careful consideration to the order in which components are upgraded. In general, you need to upgrade bookies first and then upgrade your clients. -- If autorecovery is running along with bookies, you need to pay attention to the upgrade sequence. -- Read the release notes carefully for each release. They contain not only information about noteworthy features, but also changes to configurations - that may impact your upgrade. -- Always upgrade one or a small set of bookies to canary new version before upgraing all bookies in your cluster. - -## Canary - -It is wise to canary an upgraded version in one or small set of bookies before upgrading all bookies in your live cluster. - -You can follow below steps on how to canary a upgraded version: - -1. Stop a Bookie. -2. Upgrade the binary and configuration. -3. Start the Bookie in `ReadOnly` mode. This can be used to verify if the Bookie of this new version can run well for read workload. -4. Once the Bookie is running at `ReadOnly` mode successfully for a while, restart the Bookie in `Write/Read` mode. -5. After step 4, the Bookie will serve both write and read traffic. - -### Rollback Canaries - -If problems occur during canarying an upgraded version, you can simply take down the problematic Bookie node. The remain bookies in the old cluster -will repair this problematic bookie node by autorecovery. Nothing needs to be worried about. - -## Upgrade Steps - -Once you determined a version is safe to upgrade in a few nodes in your cluster, you can perform following steps to upgrade all bookies in your cluster. - -1. Determine if autorecovery is running along with bookies. If yes, check if the clients (either new clients with new binary or old clients with new configurations) -are allowed to talk to old bookies; if clients are not allowed to talk to old bookies, please [disable autorecovery](../../reference/cli/#autorecovery-1) during upgrade. -2. Decide on performing a rolling upgrade or a downtime upgrade. -3. Upgrade all Bookies (more below) -4. If autorecovery was disabled during upgrade, [enable autorecovery](../../reference/cli/#autorecovery-1). -5. After all bookies are upgraded, build applications that use `BookKeeper client` against the new bookkeeper libraries and deploy the new versions. - -### Upgrade Bookies - -In a rolling upgrade scenario, upgrade one Bookie at a time. In a downtime upgrade scenario, take the entire cluster down, upgrade each Bookie, then start the cluster. - -For each Bookie: - -1. Stop the bookie. -2. Upgrade the software (either new binary or new configuration) -2. Start the bookie. - -## Upgrade Guides - -We describes the general upgrade method in Apache BookKeeper as above. We will cover the details for individual versions. - -### 4.5.x to 4.6.x upgrade - -There isn't any protocol related backward compabilities changes in 4.6.x. So you can follow the general upgrade sequence to upgrade from 4.5.x to 4.6.x. - -### 4.4.x to 4.5.x upgrade - -There isn't any protocol related backward compabilities changes in 4.5.0. So you can follow the general upgrade sequence to upgrade from 4.4.x to 4.5.x. -However, we list a list of things that you might want to know. - -1. 4.5.x upgrades netty from 3.x to 4.x. The memory usage pattern might be changed a bit. Netty 4 uses more direct memory. Please pay attention to your memory usage - and adjust the JVM settings accordingly. -2. `multi journals` is a non-rollbackable feature. If you configure a bookie to use multiple journals on 4.5.x you can not roll the bookie back to use 4.4.x. You have - to take a bookie out and recover it if you want to rollback to 4.4.x. - -If you are planning to upgrade a non-secured cluster to a secured cluster enabling security features in 4.5.0, please read [BookKeeper Security](../../security/overview) for more details. diff --git a/site/docs/4.6.1/api/distributedlog-api.md b/site/docs/4.6.1/api/distributedlog-api.md deleted file mode 100644 index 9676fa9f04e..00000000000 --- a/site/docs/4.6.1/api/distributedlog-api.md +++ /dev/null @@ -1,395 +0,0 @@ ---- -title: DistributedLog -subtitle: A higher-level API for managing BookKeeper entries ---- - -> DistributedLog began its life as a separate project under the Apache Foundation. It was merged into BookKeeper in 2017. - -The DistributedLog API is an easy-to-use interface for managing BookKeeper entries that enables you to use BookKeeper without needing to interact with [ledgers](../ledger-api) directly. - -DistributedLog (DL) maintains sequences of records in categories called *logs* (aka *log streams*). *Writers* append records to DL logs, while *readers* fetch and process those records. - -## Architecture - -The diagram below illustrates how the DistributedLog API works with BookKeeper: - -![DistributedLog API]({{ site.baseurl }}img/distributedlog.png) - -## Logs - -A *log* in DistributedLog is an ordered, immutable sequence of *log records*. - -The diagram below illustrates the anatomy of a log stream: - -![DistributedLog log]({{ site.baseurl }}img/logs.png) - -### Log records - -Each log record is a sequence of bytes. Applications are responsible for serializing and deserializing byte sequences stored in log records. - -Log records are written sequentially into a *log stream* and assigned with a a unique sequence number called a DLSN (DistributedLog Sequence Number). - -In addition to a DLSN, applications can assign their own sequence number when constructing log records. Application-defined sequence numbers are known as *TransactionIDs* (or *txid*). Either a DLSN or a TransactionID can be used for positioning readers to start reading from a specific log record. - -### Log segments - -Each log is broken down into *log segments* that contain subsets of records. Log segments are distributed and stored in BookKeeper. DistributedLog rolls the log segments based on the configured *rolling policy*, which be either - -* a configurable period of time (such as every 2 hours), or -* a configurable maximum size (such as every 128 MB). - -The data in logs is divided up into equally sized log segments and distributed evenly across {% pop bookies %}. This allows logs to scale beyond a size that would fit on a single server and spreads read traffic across the cluster. - -### Namespaces - -Log streams that belong to the same organization are typically categorized and managed under a *namespace*. DistributedLog namespaces essentially enable applications to locate log streams. Applications can perform the following actions under a namespace: - -* create streams -* delete streams -* truncate streams to a given sequence number (either a DLSN or a TransactionID) - -## Writers - -Through the DistributedLog API, writers write data into logs of their choice. All records are appended into logs in order. The sequencing is performed by the writer, which means that there is only one active writer for a log at any given time. - -DistributedLog guarantees correctness when two writers attempt to write to the same log when a network partition occurs using a *fencing* mechanism in the log segment store. - -### Write Proxy - -Log writers are served and managed in a service tier called the *Write Proxy* (see the diagram [above](#architecture)). The Write Proxy is used for accepting writes from a large number of clients. - -## Readers - -DistributedLog readers read records from logs of their choice, starting with a provided position. The provided position can be either a DLSN or a TransactionID. - -Readers read records from logs in strict order. Different readers can read records from different positions in the same log. - -Unlike other pub-sub systems, DistributedLog doesn't record or manage readers' positions. This means that tracking is the responsibility of applications, as different applications may have different requirements for tracking and coordinating positions. This is hard to get right with a single approach. Distributed databases, for example, might store reader positions along with SSTables, so they would resume applying transactions from the positions store in SSTables. Tracking reader positions could easily be done at the application level using various stores (such as ZooKeeper, the filesystem, or key-value stores). - -### Read Proxy - -Log records can be cached in a service tier called the *Read Proxy* to serve a large number of readers. See the diagram [above](#architecture). The Read Proxy is the analogue of the [Write Proxy](#write-proxy). - -## Guarantees - -The DistributedLog API for BookKeeper provides a number of guarantees for applications: - -* Records written by a [writer](#writers) to a [log](#logs) are appended in the order in which they are written. If a record **R1** is written by the same writer as a record **R2**, **R1** will have a smaller sequence number than **R2**. -* [Readers](#readers) see [records](#log-records) in the same order in which they are [written](#writers) to the log. -* All records are persisted on disk by BookKeeper before acknowledgements, which guarantees durability. -* For a log with a replication factor of N, DistributedLog tolerates up to N-1 server failures without losing any records. - -## API - -Documentation for the DistributedLog API can be found [here](https://bookkeeper.apache.org/distributedlog/docs/latest/user_guide/api/core). - -> At a later date, the DistributedLog API docs will be added here. - - diff --git a/site/docs/4.6.1/api/ledger-adv-api.md b/site/docs/4.6.1/api/ledger-adv-api.md deleted file mode 100644 index f46950dd984..00000000000 --- a/site/docs/4.6.1/api/ledger-adv-api.md +++ /dev/null @@ -1,82 +0,0 @@ ---- -title: The Advanced Ledger API ---- - -In release `4.5.0`, Apache BookKeeper introduces a few advanced API for advanced usage. -This sections covers these advanced APIs. - -> Before learn the advanced API, please read [Ledger API](../ledger-api) first. - -## LedgerHandleAdv - -[`LedgerHandleAdv`](../javadoc/org/apache/bookkeeper/client/LedgerHandleAdv) is an advanced extension of [`LedgerHandle`](../javadoc/org/apache/bookkeeper/client/LedgerHandle). -It allows user passing in an `entryId` when adding an entry. - -### Creating advanced ledgers - -Here's an exmaple: - -```java -byte[] passwd = "some-passwd".getBytes(); -LedgerHandleAdv handle = bkClient.createLedgerAdv( - 3, 3, 2, // replica settings - DigestType.CRC32, - passwd); -``` - -You can also create advanced ledgers asynchronously. - -```java -class LedgerCreationCallback implements AsyncCallback.CreateCallback { - public void createComplete(int returnCode, LedgerHandle handle, Object ctx) { - System.out.println("Ledger successfully created"); - } -} -client.asyncCreateLedgerAdv( - 3, // ensemble size - 3, // write quorum size - 2, // ack quorum size - BookKeeper.DigestType.CRC32, - password, - new LedgerCreationCallback(), - "some context" -); -``` - -Besides the APIs above, BookKeeper allows users providing `ledger-id` when creating advanced ledgers. - -```java -long ledgerId = ...; // the ledger id is generated externally. - -byte[] passwd = "some-passwd".getBytes(); -LedgerHandleAdv handle = bkClient.createLedgerAdv( - ledgerId, // ledger id generated externally - 3, 3, 2, // replica settings - DigestType.CRC32, - passwd); -``` - -> Please note, it is users' responsibility to provide a unique ledger id when using the API above. -> If a ledger already exists when users try to create an advanced ledger with same ledger id, -> a [LedgerExistsException](../javadoc/org/apache/bookkeeper/client/BKException.BKLedgerExistException.html) is thrown by the bookkeeper client. - -### Add Entries - -The normal [add entries api](ledger-api/#adding-entries-to-ledgers) in advanced ledgers are disabled. Instead, when users want to add entries -to advanced ledgers, an entry id is required to pass in along with the entry data when adding an entry. - -```java -long entryId = ...; // entry id generated externally - -ledger.addEntry(entryId, "Some entry data".getBytes()); -``` - -A few notes when using this API: - -- The entry id has to be non-negative. -- Clients are okay to add entries out of order. -- However, the entries are only acknowledged in a monotonic order starting from 0. - -### Read Entries - -The read entries api in advanced ledgers remain same as [normal ledgers](../ledger-api/#reading-entries-from-ledgers). diff --git a/site/docs/4.6.1/api/ledger-api.md b/site/docs/4.6.1/api/ledger-api.md deleted file mode 100644 index c247bfb8fa9..00000000000 --- a/site/docs/4.6.1/api/ledger-api.md +++ /dev/null @@ -1,810 +0,0 @@ ---- -title: The Ledger API ---- - -The ledger API is a lower-level API for BookKeeper that enables you to interact with {% pop ledgers %} directly. - -## The Java ledger API client - -To get started with the Java client for BookKeeper, install the `bookkeeper-server` library as a dependency in your Java application. - -> For a more in-depth tutorial that involves a real use case for BookKeeper, see the [Example application](../example-application) guide. - -## Installation - -The BookKeeper Java client library is available via [Maven Central](http://search.maven.org/) and can be installed using [Maven](#maven), [Gradle](#gradle), and other build tools. - -### Maven - -If you're using [Maven](https://maven.apache.org/), add this to your [`pom.xml`](https://maven.apache.org/guides/introduction/introduction-to-the-pom.html) build configuration file: - -```xml - -4.6.1 - - - - org.apache.bookkeeper - bookkeeper-server - ${bookkeeper.version} - -``` - -BookKeeper uses google [protobuf](https://github.com/google/protobuf/tree/master/java) and [guava](https://github.com/google/guava) libraries -a lot. If your application might include different versions of protobuf or guava introduced by other dependencies, you can choose to use the -shaded library, which relocate classes of protobuf and guava into a different namespace to avoid conflicts. - -You can use the shaded artifact of `bookkeeper-server`. Please note that [maven-shade-plugin](https://maven.apache.org/plugins/maven-shade-plugin) doesn't generate -a dependency-reduced pom file for shaded artifact using [shadedArtifactAttached](https://maven.apache.org/plugins/maven-shade-plugin/examples/attached-artifact.html). You need to manually to exclude relocated packages when using the shaded artifact. Full example of how to use this is -showed as below. - -```xml - -4.6.1 - - - - org.apache.bookkeeper - bookkeeper-server - ${bookkeeper.version} - shaded - - - org.apache.bookkeeper - bookkeeper-common - - - org.apache.bookkeeper - bookkeeper-proto - - - -``` - -Or you can use a separate shaded artifact `bookkeeper-server-shaded`. - -```xml - -4.6.1 - - - - org.apache.bookkeeper - bookkeeper-server-shaded - ${bookkeeper.version} - -``` - -### Gradle - -If you're using [Gradle](https://gradle.org/), add this to your [`build.gradle`](https://spring.io/guides/gs/gradle/) build configuration file: - -```groovy -dependencies { - compile group: 'org.apache.bookkeeper', name: 'bookkeeper-server', version: '4.6.1' -} - -// Alternatively: -dependencies { - compile 'org.apache.bookkeeper:bookkeeper-server:4.6.1' -} -``` - -Similarly as using maven, you can also configure to use the shaded jars. - -```groovy -// use the shaded artifact of `bookkeeper-server` jar -dependencies { - compile ('org.apache.bookkeeper:bookkeeper-server:{{ site.latest-version }}:shaded') { - exclude group: 'org.apache.bookkeeper', module: "bookkeeper-common' - exclude group: 'org.apache.bookkeeper', module: 'bookkeeper-proto' - } -} - - -// use the `bookkeeper-server-shaded` jar -dependencies { - compile 'org.apache.bookkeeper:bookkeeper-server-shaded:{{ site.latest-version }}' -} -``` - -## Connection string - -When interacting with BookKeeper using the Java client, you need to provide your client with a connection string, for which you have three options: - -* Provide your entire ZooKeeper connection string, for example `zk1:2181,zk2:2181,zk3:2181`. -* Provide a host and port for one node in your ZooKeeper cluster, for example `zk1:2181`. In general, it's better to provide a full connection string (in case the ZooKeeper node you attempt to connect to is down). -* If your ZooKeeper cluster can be discovered via DNS, you can provide the DNS name, for example `my-zookeeper-cluster.com`. - -## Creating a new client - -In order to create a new [`BookKeeper`](../javadoc/org/apache/bookkeeper/client/BookKeeper) client object, you need to pass in a [connection string](#connection-string). Here is an example client object using a ZooKeeper connection string: - -```java -try { - String connectionString = "127.0.0.1:2181"; // For a single-node, local ZooKeeper cluster - BookKeeper bkClient = new BookKeeper(connectionString); -} catch (InterruptedException | IOException | KeeperException e) { - e.printStackTrace(); -} -``` - -> If you're running BookKeeper [locally](../../getting-started/run-locally), using the [`localbookie`](../../reference/cli#bookkeeper-localbookie) command, use `"127.0.0.1:2181"` for your connection string, as in the example above. - -There are, however, other ways that you can create a client object: - -* By passing in a [`ClientConfiguration`](../javadoc/org/apache/bookkeeper/conf/ClientConfiguration) object. Here's an example: - - ```java - ClientConfiguration config = new ClientConfiguration(); - config.setZkServers(zkConnectionString); - config.setAddEntryTimeout(2000); - BookKeeper bkClient = new BookKeeper(config); - ``` - -* By specifying a `ClientConfiguration` and a [`ZooKeeper`](http://zookeeper.apache.org/doc/current/api/org/apache/zookeeper/ZooKeeper.html) client object: - - ```java - ClientConfiguration config = new ClientConfiguration(); - config.setAddEntryTimeout(5000); - ZooKeeper zkClient = new ZooKeeper(/* client args */); - BookKeeper bkClient = new BookKeeper(config, zkClient); - ``` - -* Using the `forConfig` method: - - ```java - BookKeeper bkClient = BookKeeper.forConfig(conf).build(); - ``` - -## Creating ledgers - -The easiest way to create a {% pop ledger %} using the Java client is via the `createLedger` method, which creates a new ledger synchronously and returns a [`LedgerHandle`](../javadoc/org/apache/bookkeeper/client/LedgerHandle). You must specify at least a [`DigestType`](../javadoc/org/apache/bookkeeper/client/BookKeeper.DigestType) and a password. - -Here's an example: - -```java -byte[] password = "some-password".getBytes(); -LedgerHandle handle = bkClient.createLedger(BookKeeper.DigestType.MAC, password); -``` - -You can also create ledgers asynchronously - -### Create ledgers asynchronously - -```java -class LedgerCreationCallback implements AsyncCallback.CreateCallback { - public void createComplete(int returnCode, LedgerHandle handle, Object ctx) { - System.out.println("Ledger successfully created"); - } -} - -client.asyncCreateLedger( - 3, - 2, - BookKeeper.DigestType.MAC, - password, - new LedgerCreationCallback(), - "some context" -); -``` - -## Adding entries to ledgers - -```java -long entryId = ledger.addEntry("Some entry data".getBytes()); -``` - -### Add entries asynchronously - -## Reading entries from ledgers - -```java -Enumerator entries = handle.readEntries(1, 99); -``` - -To read all possible entries from the ledger: - -```java -Enumerator entries = - handle.readEntries(0, handle.getLastAddConfirmed()); - -while (entries.hasNextElement()) { - LedgerEntry entry = entries.nextElement(); - System.out.println("Successfully read entry " + entry.getId()); -} -``` - -### Reading entries after the LastAddConfirmed range - -`readUnconfirmedEntries` allowing to read after the LastAddConfirmed range. -It lets the client read without checking the local value of LastAddConfirmed, so that it is possible to read entries for which the writer has not received the acknowledge yet -For entries which are within the range 0..LastAddConfirmed BookKeeper guarantees that the writer has successfully received the acknowledge. -For entries outside that range it is possible that the writer never received the acknowledge and so there is the risk that the reader is seeing entries before the writer and this could result in a consistency issue in some cases. -With this method you can even read entries before the LastAddConfirmed and entries after it with one call, the expected consistency will be as described above. - -```java -Enumerator entries = - handle.readUnconfirmedEntries(0, lastEntryIdExpectedToRead); - -while (entries.hasNextElement()) { - LedgerEntry entry = entries.nextElement(); - System.out.println("Successfully read entry " + entry.getId()); -} -``` - -## Deleting ledgers - -{% pop Ledgers %} can also be deleted synchronously or asynchronously. - -```java -long ledgerId = 1234; - -try { - bkClient.deleteLedger(ledgerId); -} catch (Exception e) { - e.printStackTrace(); -} -``` - -### Delete entries asynchronously - -Exceptions thrown: - -* - -```java -class DeleteEntryCallback implements AsyncCallback.DeleteCallback { - public void deleteComplete() { - System.out.println("Delete completed"); - } -} -``` - -## Simple example - -> For a more involved BookKeeper client example, see the [example application](#example-application) below. - -In the code sample below, a BookKeeper client: - -* creates a ledger -* writes entries to the ledger -* closes the ledger (meaning no further writes are possible) -* re-opens the ledger for reading -* reads all available entries - -```java -// Create a client object for the local ensemble. This -// operation throws multiple exceptions, so make sure to -// use a try/catch block when instantiating client objects. -BookKeeper bkc = new BookKeeper("localhost:2181"); - -// A password for the new ledger -byte[] ledgerPassword = /* some sequence of bytes, perhaps random */; - -// Create a new ledger and fetch its identifier -LedgerHandle lh = bkc.createLedger(BookKeeper.DigestType.MAC, ledgerPassword); -long ledgerId = lh.getId(); - -// Create a buffer for four-byte entries -ByteBuffer entry = ByteBuffer.allocate(4); - -int numberOfEntries = 100; - -// Add entries to the ledger, then close it -for (int i = 0; i < numberOfEntries; i++){ - entry.putInt(i); - entry.position(0); - lh.addEntry(entry.array()); -} -lh.close(); - -// Open the ledger for reading -lh = bkc.openLedger(ledgerId, BookKeeper.DigestType.MAC, ledgerPassword); - -// Read all available entries -Enumeration entries = lh.readEntries(0, numberOfEntries - 1); - -while(entries.hasMoreElements()) { - ByteBuffer result = ByteBuffer.wrap(ls.nextElement().getEntry()); - Integer retrEntry = result.getInt(); - - // Print the integer stored in each entry - System.out.println(String.format("Result: %s", retrEntry)); -} - -// Close the ledger and the client -lh.close(); -bkc.close(); -``` - -Running this should return this output: - -```shell -Result: 0 -Result: 1 -Result: 2 -# etc -``` - -## Example application - -This tutorial walks you through building an example application that uses BookKeeper as the replicated log. The application uses the [BookKeeper Java client](../java-client) to interact with BookKeeper. - -> The code for this tutorial can be found in [this GitHub repo](https://github.com/ivankelly/bookkeeper-tutorial/). The final code for the `Dice` class can be found [here](https://github.com/ivankelly/bookkeeper-tutorial/blob/master/src/main/java/org/apache/bookkeeper/Dice.java). - -### Setup - -Before you start, you will need to have a BookKeeper cluster running locally on your machine. For installation instructions, see [Installation](../../getting-started/installation). - -To start up a cluster consisting of six {% pop bookies %} locally: - -```shell -$ bookkeeper-server/bin/bookkeeper localbookie 6 -``` - -You can specify a different number of bookies if you'd like. - -### Goal - -The goal of the dice application is to have - -* multiple instances of this application, -* possibly running on different machines, -* all of which display the exact same sequence of numbers. - -In other words, the log needs to be both durable and consistent, regardless of how many {% pop bookies %} are participating in the BookKeeper ensemble. If one of the bookies crashes or becomes unable to communicate with the other bookies in any way, it should *still* display the same sequence of numbers as the others. This tutorial will show you how to achieve this. - -To begin, download the base application, compile and run it. - -```shell -$ git clone https://github.com/ivankelly/bookkeeper-tutorial.git -$ mvn package -$ mvn exec:java -Dexec.mainClass=org.apache.bookkeeper.Dice -``` - -That should yield output that looks something like this: - -``` -[INFO] Scanning for projects... -[INFO] -[INFO] ------------------------------------------------------------------------ -[INFO] Building tutorial 1.0-SNAPSHOT -[INFO] ------------------------------------------------------------------------ -[INFO] -[INFO] --- exec-maven-plugin:1.3.2:java (default-cli) @ tutorial --- -[WARNING] Warning: killAfter is now deprecated. Do you need it ? Please comment on MEXEC-6. -Value = 4 -Value = 5 -Value = 3 -``` - -### The base application - -The application in this tutorial is a dice application. The `Dice` class below has a `playDice` function that generates a random number between 1 and 6 every second, prints the value of the dice roll, and runs indefinitely. - -```java -public class Dice { - Random r = new Random(); - - void playDice() throws InterruptedException { - while (true) { - Thread.sleep(1000); - System.out.println("Value = " + (r.nextInt(6) + 1)); - } - } -} -``` - -When you run the `main` function of this class, a new `Dice` object will be instantiated and then run indefinitely: - -```java -public class Dice { - // other methods - - public static void main(String[] args) throws InterruptedException { - Dice d = new Dice(); - d.playDice(); - } -} -``` - -### Leaders and followers (and a bit of background) - -To achieve this common view in multiple instances of the program, we need each instance to agree on what the next number in the sequence will be. For example, the instances must agree that 4 is the first number and 2 is the second number and 5 is the third number and so on. This is a difficult problem, especially in the case that any instance may go away at any time, and messages between the instances can be lost or reordered. - -Luckily, there are already algorithms to solve this. Paxos is an abstract algorithm to implement this kind of agreement, while Zab and Raft are more practical protocols. This video gives a good overview about how these algorithms usually look. They all have a similar core. - -It would be possible to run the Paxos to agree on each number in the sequence. However, running Paxos each time can be expensive. What Zab and Raft do is that they use a Paxos-like algorithm to elect a leader. The leader then decides what the sequence of events should be, putting them in a log, which the other instances can then follow to maintain the same state as the leader. - -Bookkeeper provides the functionality for the second part of the protocol, allowing a leader to write events to a log and have multiple followers tailing the log. However, bookkeeper does not do leader election. You will need a zookeeper or raft instance for that purpose. - -### Why not just use ZooKeeper? - -There are a number of reasons: - -1. Zookeeper's log is only exposed through a tree like interface. It can be hard to shoehorn your application into this. -2. A zookeeper ensemble of multiple machines is limited to one log. You may want one log per resource, which will become expensive very quickly. -3. Adding extra machines to a zookeeper ensemble does not increase capacity nor throughput. - -Bookkeeper can be seen as a means of exposing ZooKeeper's replicated log to applications in a scalable fashion. ZooKeeper is still used by BookKeeper, however, to maintain consistency guarantees, though clients don't need to interact with ZooKeeper directly. - -### Electing a leader - -We'll use zookeeper to elect a leader. A zookeeper instance will have started locally when you started the localbookie application above. To verify it's running, run the following command. - -```shell -$ echo stat | nc localhost 2181 -Zookeeper version: 3.4.6-1569965, built on 02/20/2014 09:09 GMT -Clients: - /127.0.0.1:59343[1](queued=0,recved=40,sent=41) - /127.0.0.1:49354[1](queued=0,recved=11,sent=11) - /127.0.0.1:49361[0](queued=0,recved=1,sent=0) - /127.0.0.1:59344[1](queued=0,recved=38,sent=39) - /127.0.0.1:59345[1](queued=0,recved=38,sent=39) - /127.0.0.1:59346[1](queued=0,recved=38,sent=39) - -Latency min/avg/max: 0/0/23 -Received: 167 -Sent: 170 -Connections: 6 -Outstanding: 0 -Zxid: 0x11 -Mode: standalone -Node count: 16 -``` - -To interact with zookeeper, we'll use the Curator client rather than the stock zookeeper client. Getting things right with the zookeeper client can be tricky, and curator removes a lot of the pointy corners for you. In fact, curator even provides a leader election recipe, so we need to do very little work to get leader election in our application. - -```java -public class Dice extends LeaderSelectorListenerAdapter implements Closeable { - - final static String ZOOKEEPER_SERVER = "127.0.0.1:2181"; - final static String ELECTION_PATH = "/dice-elect"; - - ... - - Dice() throws InterruptedException { - curator = CuratorFrameworkFactory.newClient(ZOOKEEPER_SERVER, - 2000, 10000, new ExponentialBackoffRetry(1000, 3)); - curator.start(); - curator.blockUntilConnected(); - - leaderSelector = new LeaderSelector(curator, ELECTION_PATH, this); - leaderSelector.autoRequeue(); - leaderSelector.start(); - } -``` - -In the constructor for Dice, we need to create the curator client. We specify four things when creating the client, the location of the zookeeper service, the session timeout, the connect timeout and the retry policy. - -The session timeout is a zookeeper concept. If the zookeeper server doesn't hear anything from the client for this amount of time, any leases which the client holds will be timed out. This is important in leader election. For leader election, the curator client will take a lease on ELECTION_PATH. The first instance to take the lease will become leader and the rest will become followers. However, their claim on the lease will remain in the cue. If the first instance then goes away, due to a crash etc., its session will timeout. Once the session times out, the lease will be released and the next instance in the queue will become the leader. The call to autoRequeue() will make the client queue itself again if it loses the lease for some other reason, such as if it was still alive, but it a garbage collection cycle caused it to lose its session, and thereby its lease. I've set the lease to be quite low so that when we test out leader election, transitions will be quite quick. The optimum length for session timeout depends very much on the use case. The other parameters are the connection timeout, i.e. the amount of time it will spend trying to connect to a zookeeper server before giving up, and the retry policy. The retry policy specifies how the client should respond to transient errors, such as connection loss. Operations that fail with transient errors can be retried, and this argument specifies how often the retries should occur. - -Finally, you'll have noticed that Dice now extends LeaderSelectorListenerAdapter and implements Closeable. Closeable is there to close the resource we have initialized in the constructor, the curator client and the leaderSelector. LeaderSelectorListenerAdapter is a callback that the leaderSelector uses to notify the instance that it is now the leader. It is passed as the third argument to the LeaderSelector constructor. - -```java - @Override - public void takeLeadership(CuratorFramework client) - throws Exception { - synchronized (this) { - leader = true; - try { - while (true) { - this.wait(); - } - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - leader = false; - } - } - } -``` - -takeLeadership() is the callback called by LeaderSelector when the instance is leader. It should only return when the instance wants to give up leadership. In our case, we never do so we wait on the current object until we're interrupted. To signal to the rest of the program that we are leader we set a volatile boolean called leader to true. This is unset after we are interrupted. - -```java - void playDice() throws InterruptedException { - while (true) { - while (leader) { - Thread.sleep(1000); - System.out.println("Value = " + (r.nextInt(6) + 1) - + ", isLeader = " + leader); - } - } - } -``` - -Finally, we modify the `playDice` function to only generate random numbers when it is the leader. - -Run two instances of the program in two different terminals. You'll see that one becomes leader and prints numbers and the other just sits there. - -Now stop the leader using Control-Z. This will pause the process, but it won't kill it. You will be dropped back to the shell in that terminal. After a couple of seconds, the session timeout, you will see that the other instance has become the leader. Zookeeper will guarantee that only one instance is selected as leader at any time. - -Now go back to the shell that the original leader was on and wake up the process using fg. You'll see something like the following: - -```shell -... -... -Value = 4, isLeader = true -Value = 4, isLeader = true -^Z -[1]+ Stopped mvn exec:java -Dexec.mainClass=org.apache.bookkeeper.Dice -$ fg -mvn exec:java -Dexec.mainClass=org.apache.bookkeeper.Dice -Value = 3, isLeader = true -Value = 1, isLeader = false -``` - -## New API - -Since 4.6 BookKeeper provides a new client API which leverages Java8 [CompletableFuture](https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/CompletableFuture.html) facility. -[WriteHandle](../javadoc/org/apache/bookkeeper/client/api/WriteHandle), [WriteAdvHandle](../javadoc/org/apache/bookkeeper/client/api/WriteAdvHandle), [ReadHandle](../javadoc/org/apache/bookkeeper/client/api/ReadHandle) are introduced for replacing the generic [LedgerHandle](../javadoc/org/apache/bookkeeper/client/LedgerHandle). - -> All the new API now is available in `org.apache.bookkeeper.client.api`. You should only use interfaces defined in this package. - -*Beware* that this API in 4.6 is still experimental API and can be subject to changes in next minor releases. - -### Create a new client - -In order to create a new [`BookKeeper`](../javadoc/org/apache/bookkeeper/client/api/BookKeeper) client object, you need to construct a [`ClientConfiguration`](../javadoc/org/apache/bookkeeper/conf/ClientConfiguration) object and set a [connection string](#connection-string) first, and then use [`BookKeeperBuilder`](../javadoc/org/apache/bookkeeper/client/api/BookKeeperBuilder) to build the client. - -Here is an example building the bookkeeper client. - -```java -// construct a client configuration instance -ClientConfiguration conf = new ClientConfiguration(); -conf.setZkServers(zkConnectionString); -conf.setZkLedgersRootPath("/path/to/ledgers/root"); - -// build the bookkeeper client -BookKeeper bk = BookKeeper.newBuilder(conf) - .statsLogger(...) - ... - .build(); - -``` - -### Create ledgers - -the easiest way to create a {% pop ledger %} using the java client is via the [`createbuilder`](../javadoc/org/apache/bookkeeper/client/api/createbuilder). you must specify at least -a [`digesttype`](../javadoc/org/apache/bookkeeper/client/api/digesttype) and a password. - -here's an example: - -```java -BookKeeper bk = ...; - -byte[] password = "some-password".getBytes(); - -WriteHandle wh = bk.newCreateLedgerOp() - .withDigestType(DigestType.CRC32) - .withPassword(password) - .withEnsembleSize(3) - .withWriteQuorumSize(3) - .withAckQuorumSize(2) - .execute() // execute the creation op - .get(); // wait for the execution to complete -``` - -A [`WriteHandle`](../javadoc/org/apache/bookkeeper/client/api/WriteHandle) is returned for applications to write and read entries to and from the ledger. - -### Append entries to ledgers - -The [`WriteHandle`](../javadoc/org/apache/bookkeeper/client/api/WriteHandle) can be used for applications to append entries to the ledgers. - -```java -WriteHandle wh = ...; - -CompletableFuture addFuture = wh.append("Some entry data".getBytes()); - -// option 1: you can wait for add to complete synchronously -try { - long entryId = FutureUtils.result(addFuture.get()); -} catch (BKException bke) { - // error handling -} - -// option 2: you can process the result and exception asynchronously -addFuture - .thenApply(entryId -> { - // process the result - }) - .exceptionally(cause -> { - // handle the exception - }) - -// option 3: bookkeeper provides a twitter-future-like event listener for processing result and exception asynchronously -addFuture.whenComplete(new FutureEventListener() { - @Override - public void onSuccess(long entryId) { - // process the result - } - @Override - public void onFailure(Throwable cause) { - // handle the exception - } -}); -``` - -The append method supports three representations of a bytes array: the native java `byte[]`, java nio `ByteBuffer` and netty `ByteBuf`. -It is recommended to use `ByteBuf` as it is more gc friendly. - -### Open ledgers - -You can open ledgers to read entries. Opening ledgers is done by [`openBuilder`](../javadoc/org/apache/bookkeeper/client/api/openBuilder). You must specify the ledgerId and the password -in order to open the ledgers. - -here's an example: - -```java -BookKeeper bk = ...; - -long ledgerId = ...; -byte[] password = "some-password".getBytes(); - -ReadHandle rh = bk.newOpenLedgerOp() - .withLedgerId(ledgerId) - .withPassword(password) - .execute() // execute the open op - .get(); // wait for the execution to complete -``` - -A [`ReadHandle`](../javadoc/org/apache/bookkeeper/client/api/ReadHandle) is returned for applications to read entries to and from the ledger. - -#### Recovery vs NoRecovery - -By default, the [`openBuilder`](../javadoc/org/apache/bookkeeper/client/api/openBuilder) opens the ledger in a `NoRecovery` mode. You can open the ledger in `Recovery` mode by specifying -`withRecovery(true)` in the open builder. - -```java -BookKeeper bk = ...; - -long ledgerId = ...; -byte[] password = "some-password".getBytes(); - -ReadHandle rh = bk.newOpenLedgerOp() - .withLedgerId(ledgerId) - .withPassword(password) - .withRecovery(true) - .execute() - .get(); - -``` - -**What is the difference between "Recovery" and "NoRecovery"?** - -If you are opening a ledger in "Recovery" mode, it will basically fence and seal the ledger -- no more entries are allowed -to be appended to it. The writer which is currently appending entries to the ledger will fail with [`LedgerFencedException`](../javadoc/org/apache/bookkeeper/client/api/BKException.Code#LedgerFencedException). - -In constrat, opening a ledger in "NoRecovery" mode, it will not fence and seal the ledger. "NoRecovery" mode is usually used by applications to tailing-read from a ledger. - -### Read entries from ledgers - -The [`ReadHandle`](../javadoc/org/apache/bookkeeper/client/api/ReadHandle) returned from the open builder can be used for applications to read entries from the ledgers. - -```java -ReadHandle rh = ...; - -long startEntryId = ...; -long endEntryId = ...; -CompletableFuture readFuture = rh.read(startEntryId, endEntryId); - -// option 1: you can wait for read to complete synchronously -try { - LedgerEntries entries = FutureUtils.result(readFuture.get()); -} catch (BKException bke) { - // error handling -} - -// option 2: you can process the result and exception asynchronously -readFuture - .thenApply(entries -> { - // process the result - }) - .exceptionally(cause -> { - // handle the exception - }) - -// option 3: bookkeeper provides a twitter-future-like event listener for processing result and exception asynchronously -readFuture.whenComplete(new FutureEventListener<>() { - @Override - public void onSuccess(LedgerEntries entries) { - // process the result - } - @Override - public void onFailure(Throwable cause) { - // handle the exception - } -}); -``` - -Once you are done with processing the [`LedgerEntries`](../javadoc/org/apache/bookkeeper/client/api/LedgerEntries), you can call `#close()` on the `LedgerEntries` instance to -release the buffers held by it. - -Applications are allowed to read any entries between `0` and [`LastAddConfirmed`](../javadoc/org/apache/bookkeeper/client/api/ReadHandle.html#getLastAddConfirmed). If the applications -attempts to read entries beyond `LastAddConfirmed`, they will receive [`IncorrectParameterException`](../javadoc/org/apache/bookkeeper/client/api/BKException.Code#IncorrectParameterException). - -### Read unconfirmed entries from ledgers - -`readUnconfirmed` is provided the mechanism for applications to read entries beyond `LastAddConfirmed`. Applications should be aware of `readUnconfirmed` doesn't provide any -repeatable read consistency. - -```java -CompletableFuture readFuture = rh.readUnconfirmed(startEntryId, endEntryId); -``` - -### Tailing Reads - -There are two methods for applications to achieve tailing reads: `Polling` and `Long-Polling`. - -#### Polling - -You can do this in synchronous way: - -```java -ReadHandle rh = ...; - -long startEntryId = 0L; -long nextEntryId = startEntryId; -int numEntriesPerBatch = 4; -while (!rh.isClosed() || nextEntryId <= rh.getLastAddConfirmed()) { - long lac = rh.getLastAddConfirmed(); - if (nextEntryId > lac) { - // no more entries are added - Thread.sleep(1000); - - lac = rh.readLastAddConfirmed().get(); - continue; - } - - long endEntryId = Math.min(lac, nextEntryId + numEntriesPerBatch - 1); - LedgerEntries entries = rh.read(nextEntryId, endEntryId).get(); - - // process the entries - - nextEntryId = endEntryId + 1; -} -``` - -#### Long Polling - -```java -ReadHandle rh = ...; - -long startEntryId = 0L; -long nextEntryId = startEntryId; -int numEntriesPerBatch = 4; -while (!rh.isClosed() || nextEntryId <= rh.getLastAddConfirmed()) { - long lac = rh.getLastAddConfirmed(); - if (nextEntryId > lac) { - // no more entries are added - try (LastConfirmedAndEntry lacAndEntry = rh.readLastAddConfirmedAndEntry(nextEntryId, 1000, false).get()) { - if (lacAndEntry.hasEntry()) { - // process the entry - - ++nextEntryId; - } - } - } else { - long endEntryId = Math.min(lac, nextEntryId + numEntriesPerBatch - 1); - LedgerEntries entries = rh.read(nextEntryId, endEntryId).get(); - - // process the entries - nextEntryId = endEntryId + 1; - } -} -``` - -### Delete ledgers - -{% pop Ledgers %} can be deleted by using [`DeleteBuilder`](../javadoc/org/apache/bookkeeper/client/api/DeleteBuilder). - -```java -BookKeeper bk = ...; -long ledgerId = ...; - -bk.newDeleteLedgerOp() - .withLedgerId(ledgerId) - .execute() - .get(); -``` diff --git a/site/docs/4.6.1/api/overview.md b/site/docs/4.6.1/api/overview.md deleted file mode 100644 index 3eb649273c1..00000000000 --- a/site/docs/4.6.1/api/overview.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: BookKeeper API ---- - -BookKeeper offers a few APIs that applications can use to interact with it: - -* The [ledger API](../ledger-api) is a lower-level API that enables you to interact with {% pop ledgers %} directly -* The [Ledger Advanced API)(../ledger-adv-api) is an advanced extension to [Ledger API](../ledger-api) to provide more flexibilities to applications. -* The [DistributedLog API](../distributedlog-api) is a higher-level API that provides convenient abstractions. - -## Trade-offs - -The `Ledger API` provides direct access to ledgers and thus enables you to use BookKeeper however you'd like. - -However, in most of use cases, if you want a `log stream`-like abstraction, it requires you to manage things like tracking list of ledgers, -managing rolling ledgers and data retention on your own. In such cases, you are recommended to use [DistributedLog API](../distributedlog-api), -with semantics resembling continous log streams from the standpoint of applications. diff --git a/site/docs/4.6.1/deployment/dcos.md b/site/docs/4.6.1/deployment/dcos.md deleted file mode 100644 index 4060519772d..00000000000 --- a/site/docs/4.6.1/deployment/dcos.md +++ /dev/null @@ -1,142 +0,0 @@ ---- -title: Deploying BookKeeper on DC/OS -subtitle: Get up and running easily on an Apache Mesos cluster -logo: img/dcos-logo.png ---- - -[DC/OS](https://dcos.io/) (the DataCenter Operating System) is a distributed operating system used for deploying and managing applications and systems on [Apache Mesos](http://mesos.apache.org/). DC/OS is an open-source tool created and maintained by [Mesosphere](https://mesosphere.com/). - -BookKeeper is available as a [DC/OS package](http://universe.dcos.io/#/package/bookkeeper/version/latest) from the [Mesosphere DC/OS Universe](http://universe.dcos.io/#/packages). - -## Prerequisites - -In order to run BookKeeper on DC/OS, you will need: - -* DC/OS version [1.8](https://dcos.io/docs/1.8/) or higher -* A DC/OS cluster with at least three nodes -* The [DC/OS CLI tool](https://dcos.io/docs/1.8/usage/cli/install/) installed - -Each node in your DC/OS-managed Mesos cluster must have at least: - -* 1 CPU -* 1 GB of memory -* 10 GB of total persistent disk storage - -## Installing BookKeeper - -```shell -$ dcos package install bookkeeper --yes -``` - -This command will: - -* Install the `bookkeeper` subcommand for the `dcos` CLI tool -* Start a single {% pop bookie %} on the Mesos cluster with the [default configuration](../../reference/config) - -The bookie that is automatically started up uses the host mode of the network and by default exports the service at `agent_ip:3181`. - -> If you run `dcos package install bookkeeper` without setting the `--yes` flag, the install will run in interactive mode. For more information on the `package install` command, see the [DC/OS docs](https://docs.mesosphere.com/latest/cli/command-reference/dcos-package/dcos-package-install/). - -### Services - -To watch BookKeeper start up, click on the **Services** tab in the DC/OS [user interface](https://docs.mesosphere.com/latest/gui/) and you should see the `bookkeeper` package listed: - -![DC/OS services]({{ site.baseurl }}img/dcos/services.png) - -### Tasks - -To see which tasks have started, click on the `bookkeeper` service and you'll see an interface that looks like this; - -![DC/OS tasks]({{ site.baseurl }}img/dcos/tasks.png) - -## Scaling BookKeeper - -Once the first {% pop bookie %} has started up, you can click on the **Scale** tab to scale up your BookKeeper ensemble by adding more bookies (or scale down the ensemble by removing bookies). - -![DC/OS scale]({{ site.baseurl }}img/dcos/scale.png) - -## ZooKeeper Exhibitor - -ZooKeeper contains the information for all bookies in the ensemble. When deployed on DC/OS, BookKeeper uses a ZooKeeper instance provided by DC/OS. You can access a visual UI for ZooKeeper using [Exhibitor](https://github.com/soabase/exhibitor/wiki), which is available at [http://master.dcos/exhibitor](http://master.dcos/exhibitor). - -![ZooKeeper Exhibitor]({{ site.baseurl }}img/dcos/exhibitor.png) - -You should see a listing of IP/host information for all bookies under the `messaging/bookkeeper/ledgers/available` node. - -## Client connections - -To connect to bookies running on DC/OS using clients running within your Mesos cluster, you need to specify the ZooKeeper connection string for DC/OS's ZooKeeper cluster: - -``` -master.mesos:2181 -``` - -This is the *only* ZooKeeper host/port you need to include in your connection string. Here's an example using the [Java client](../../api/ledger-api#the-java-ledger-api-client): - -```java -BookKeeper bkClient = new BookKeeper("master.mesos:2181"); -``` - -If you're connecting using a client running outside your Mesos cluster, you need to supply the public-facing connection string for your DC/OS ZooKeeper cluster. - -## Configuring BookKeeper - -By default, the `bookkeeper` package will start up a BookKeeper ensemble consisting of one {% pop bookie %} with one CPU, 1 GB of memory, and a 70 MB persistent volume. - -You can supply a non-default configuration when installing the package using a JSON file. Here's an example command: - -```shell -$ dcos package install bookkeeper \ - --options=/path/to/config.json -``` - -You can then fetch the current configuration for BookKeeper at any time using the `package describe` command: - -```shell -$ dcos package describe bookkeeper \ - --config -``` - -### Available parameters - -> Not all [configurable parameters](../../reference/config) for BookKeeper are available for BookKeeper on DC/OS. Only the parameters show in the table below are available. - -Param | Type | Description | Default -:-----|:-----|:------------|:------- -`name` | String | The name of the DC/OS service. | `bookkeeper` -`cpus` | Integer | The number of CPU shares to allocate to each {% pop bookie %}. The minimum is 1. | `1` | -`instances` | Integer | The number of {% pop bookies %} top run. The minimum is 1. | `1` -`mem` | Number | The memory, in MB, to allocate to each BookKeeper task | `1024.0` (1 GB) -`volume_size` | Number | The persistent volume size, in MB | `70` -`zk_client` | String | The connection string for the ZooKeeper client instance | `master.mesos:2181` -`service_port` | Integer | The BookKeeper export service port, using `PORT0` in Marathon | `3181` - -### Example JSON configuration - -Here's an example JSON configuration object for BookKeeper on DC/OS: - -```json -{ - "instances": 5, - "cpus": 3, - "mem": 2048.0, - "volume_size": 250 -} -``` - -If that configuration were stored in a file called `bk-config.json`, you could apply that configuration upon installating the BookKeeper package using this command: - -```shell -$ dcos package install bookkeeper \ - --options=./bk-config.json -``` - -## Uninstalling BookKeeper - -You can shut down and uninstall the `bookkeeper` from DC/OS at any time using the `package uninstall` command: - -```shell -$ dcos package uninstall bookkeeper -Uninstalled package [bookkeeper] version [4.6.1] -Thank you for using bookkeeper. -``` diff --git a/site/docs/4.6.1/deployment/kubernetes.md b/site/docs/4.6.1/deployment/kubernetes.md deleted file mode 100644 index 0f113169edc..00000000000 --- a/site/docs/4.6.1/deployment/kubernetes.md +++ /dev/null @@ -1,181 +0,0 @@ ---- -title: Deploying Apache BookKeeper on Kubernetes -tags: [Kubernetes, Google Container Engine] -logo: img/kubernetes-logo.png ---- - -Apache BookKeeper can be easily deployed in [Kubernetes](https://kubernetes.io/) clusters. The managed clusters on [Google Container Engine](https://cloud.google.com/compute/) is the most convenient way. - -The deployment method shown in this guide relies on [YAML](http://yaml.org/) definitions for Kubernetes [resources](https://kubernetes.io/docs/resources-reference/v1.6/). The [`kubernetes`](https://github.com/apache/bookkeeper/tree/master/deploy/kubernetes) subdirectory holds resource definitions for: - -* A three-node ZooKeeper cluster -* A BookKeeper cluster with a bookie runs on each node. - -## Setup on Google Container Engine - -To get started, get source code of [`kubernetes`](https://github.com/apache/bookkeeper/tree/master/deploy/kubernetes) from github by git clone. - -If you'd like to change the number of bookies, or ZooKeeper nodes in your BookKeeper cluster, modify the `replicas` parameter in the `spec` section of the appropriate [`Deployment`](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/) or [`StatefulSet`](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/) resource. - -[Google Container Engine](https://cloud.google.com/container-engine) (GKE) automates the creation and management of Kubernetes clusters in [Google Compute Engine](https://cloud.google.com/compute/) (GCE). - -### Prerequisites - -To get started, you'll need: - -* A Google Cloud Platform account, which you can sign up for at [cloud.google.com](https://cloud.google.com) -* An existing Cloud Platform project -* The [Google Cloud SDK](https://cloud.google.com/sdk/downloads) (in particular the [`gcloud`](https://cloud.google.com/sdk/gcloud/) and [`kubectl`]() tools). - -### Create a new Kubernetes cluster - -You can create a new GKE cluster using the [`container clusters create`](https://cloud.google.com/sdk/gcloud/reference/container/clusters/create) command for `gcloud`. This command enables you to specify the number of nodes in the cluster, the machine types of those nodes, and more. - -As an example, we'll create a new GKE cluster for Kubernetes version [1.6.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG.md#v164) in the [us-central1-a](https://cloud.google.com/compute/docs/regions-zones/regions-zones#available) zone. The cluster will be named `bookkeeper-gke-cluster` and will consist of three VMs, each using two locally attached SSDs and running on [n1-standard-8](https://cloud.google.com/compute/docs/machine-types) machines. These SSDs will be used by Bookie instances, one for the BookKeeper journal and the other for storing the actual data. - -```bash -$ gcloud config set compute/zone us-central1-a -$ gcloud config set project your-project-name -$ gcloud container clusters create bookkeeper-gke-cluster \ - --machine-type=n1-standard-8 \ - --num-nodes=3 \ - --local-ssd-count=2 \ - --enable-kubernetes-alpha -``` - -By default, bookies will run on all the machines that have locally attached SSD disks. In this example, all of those machines will have two SSDs, but you can add different types of machines to the cluster later. You can control which machines host bookie servers using [labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels). - -### Dashboard - -You can observe your cluster in the [Kubernetes Dashboard](https://kubernetes.io/docs/tasks/access-application-cluster/web-ui-dashboard/) by downloading the credentials for your Kubernetes cluster and opening up a proxy to the cluster: - -```bash -$ gcloud container clusters get-credentials bookkeeper-gke-cluster \ - --zone=us-central1-a \ - --project=your-project-name -$ kubectl proxy -``` - -By default, the proxy will be opened on port 8001. Now you can navigate to [localhost:8001/ui](http://localhost:8001/ui) in your browser to access the dashboard. At first your GKE cluster will be empty, but that will change as you begin deploying. - -When you create a cluster, your `kubectl` config in `~/.kube/config` (on MacOS and Linux) will be updated for you, so you probably won't need to change your configuration. Nonetheless, you can ensure that `kubectl` can interact with your cluster by listing the nodes in the cluster: - -```bash -$ kubectl get nodes -``` - -If `kubectl` is working with your cluster, you can proceed to deploy ZooKeeper and Bookies. - -### ZooKeeper - -You *must* deploy ZooKeeper as the first component, as it is a dependency for the others. - -```bash -$ kubectl apply -f zookeeper.yaml -``` - -Wait until all three ZooKeeper server pods are up and have the status `Running`. You can check on the status of the ZooKeeper pods at any time: - -```bash -$ kubectl get pods -l component=zookeeper -NAME READY STATUS RESTARTS AGE -zk-0 1/1 Running 0 18m -zk-1 1/1 Running 0 17m -zk-2 0/1 Running 6 15m -``` - -This step may take several minutes, as Kubernetes needs to download the Docker image on the VMs. - - -If you want to connect to one of the remote zookeeper server, you can use[zk-shell](https://github.com/rgs1/zk_shell), you need to forward a local port to the -remote zookeeper server: - -```bash -$ kubectl port-forward zk-0 2181:2181 -$ zk-shell localhost 2181 -``` - -### Deploy Bookies - -Once ZooKeeper cluster is Running, you can then deploy the bookies. You can deploy the bookies either using a [DaemonSet](https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/) or a [StatefulSet](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/). - -> NOTE: _DaemonSet_ vs _StatefulSet_ -> -> A _DaemonSet_ ensures that all (or some) nodes run a pod of bookie instance. As nodes are added to the cluster, bookie pods are added automatically to them. As nodes are removed from the -> cluster, those bookie pods are garbage collected. The bookies deployed in a DaemonSet stores data on the local disks on those nodes. So it doesn't require any external storage for Persistent -> Volumes. -> -> A _StatefulSet_ maintains a sticky identity for the pods that it runs and manages. It provides stable and unique network identifiers, and stable and persistent storage for each pod. The pods -> are not interchangeable, the idenifiers for each pod are maintained across any rescheduling. -> -> Which one to use? A _DaemonSet_ is the easiest way to deploy a bookkeeper cluster, because it doesn't require additional persistent volume provisioner and use local disks. BookKeeper manages -> the data replication. It maintains the best latency property. However, it uses `hostIP` and `hostPort` for communications between pods. In some k8s platform (such as DC/OS), `hostIP` and -> `hostPort` are not well supported. A _StatefulSet_ is only practical when deploying in a cloud environment or any K8S installation that has persistent volumes available. Also be aware, latency -> can be potentially higher when using persistent volumes, because there is usually built-in replication in the persistent volumes. - -```bash -# deploy bookies in a daemon set -$ kubectl apply -f bookkeeper.yaml - -# deploy bookies in a stateful set -$ kubectl apply -f bookkeeper.stateful.yaml -``` - -You can check on the status of the Bookie pods for these components either in the Kubernetes Dashboard or using `kubectl`: - -```bash -$ kubectl get pods -``` - -While all BookKeeper pods is Running, by zk-shell you could find all available bookies under /ledgers/ - -You could also run a [bookkeeper tutorial](https://github.com/ivankelly/bookkeeper-tutorial/) instance, which named as 'dice' here, in this bookkeeper cluster. - -```bash -$kubectl run -i --tty --attach dice --image=caiok/bookkeeper-tutorial --env ZOOKEEPER_SERVERS="zk-0.zookeeper" -``` - -An example output of Dice instance is like this: -```aidl -➜ $ kubectl run -i --tty --attach dice --image=caiok/bookkeeper-tutorial --env ZOOKEEPER_SERVERS="zk-0.zookeeper" -If you don't see a command prompt, try pressing enter. -Value = 1, epoch = 5, leading -Value = 2, epoch = 5, leading -Value = 1, epoch = 5, leading -Value = 4, epoch = 5, leading -Value = 5, epoch = 5, leading -Value = 4, epoch = 5, leading -Value = 3, epoch = 5, leading -Value = 5, epoch = 5, leading -Value = 3, epoch = 5, leading -Value = 2, epoch = 5, leading -Value = 1, epoch = 5, leading -Value = 4, epoch = 5, leading -Value = 2, epoch = 5, leading -``` - -### Un-Deploy - -Delete Demo dice instance - -```bash -$kubectl delete deployment dice -``` - -Delete BookKeeper -```bash -$ kubectl delete -f bookkeeper.yaml -``` - -Delete ZooKeeper -```bash -$ kubectl delete -f zookeeper.yaml -``` - -Delete cluster -```bash -$ gcloud container clusters delete bookkeeper-gke-cluster -``` - - - diff --git a/site/docs/4.6.1/deployment/manual.md b/site/docs/4.6.1/deployment/manual.md deleted file mode 100644 index daafd5556f5..00000000000 --- a/site/docs/4.6.1/deployment/manual.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -title: Manual deployment ---- - -The easiest way to deploy BookKeeper is using schedulers like [DC/OS](../dcos), but you can also deploy BookKeeper clusters manually. A BookKeeper cluster consists of two main components: - -* A [ZooKeeper](#zookeeper-setup) cluster that is used for configuration- and coordination-related tasks -* An [ensemble](#starting-up-bookies) of {% pop bookies %} - -## ZooKeeper setup - -We won't provide a full guide to setting up a ZooKeeper cluster here. We recommend that you consult [this guide](https://zookeeper.apache.org/doc/current/zookeeperAdmin.html) in the official ZooKeeper documentation. - -## Starting up bookies - -Once your ZooKeeper cluster is up and running, you can start up as many {% pop bookies %} as you'd like to form a cluster. Before starting up each bookie, you need to modify the bookie's configuration to make sure that it points to the right ZooKeeper cluster. - -On each bookie host, you need to [download](../../getting-started/installation#download) the BookKeeper package as a tarball. Once you've done that, you need to configure the bookie by setting values in the `bookkeeper-server/conf/bk_server.conf` config file. The one parameter that you will absolutely need to change is the [`zkServers`](../../config#zkServers) parameter, which you will need to set to the ZooKeeper connection string for your ZooKeeper cluster. Here's an example: - -```properties -zkServers=100.0.0.1:2181,100.0.0.2:2181,100.0.0.3:2181 -``` - -> A full listing of configurable parameters available in `bookkeeper-server/conf/bk_server.conf` can be found in the [Configuration](../../reference/config) reference manual. - -Once the bookie's configuration is set, you can start it up using the [`bookie`](../../reference/cli#bookkeeper-bookie) command of the [`bookkeeper`](../../reference/cli#bookkeeper) CLI tool: - -```shell -$ bookkeeper-server/bin/bookkeeper bookie -``` - -> You can also build BookKeeper [by cloning it from source](../../getting-started/installation#clone) or [using Maven](../../getting-started/installation#build-using-maven). - -### System requirements - -{% include system-requirements.md %} - -## Cluster metadata setup - -Once you've started up a cluster of bookies, you need to set up cluster metadata for the cluster by running the following command from any bookie in the cluster: - -```shell -$ bookkeeper-server/bin/bookkeeper shell metaformat -``` - -You can run in the formatting - -> The `metaformat` command performs all the necessary ZooKeeper cluster metadata tasks and thus only needs to be run *once* and from *any* bookie in the BookKeeper cluster. - -Once cluster metadata formatting has been completed, your BookKeeper cluster is ready to go! - - diff --git a/site/docs/4.6.1/development/codebase.md b/site/docs/4.6.1/development/codebase.md deleted file mode 100644 index 9a83073ea4c..00000000000 --- a/site/docs/4.6.1/development/codebase.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: The BookKeeper codebase ---- diff --git a/site/docs/4.6.1/development/protocol.md b/site/docs/4.6.1/development/protocol.md deleted file mode 100644 index 6d17aa0ed45..00000000000 --- a/site/docs/4.6.1/development/protocol.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: The BookKeeper protocol ---- - -BookKeeper uses a special replication protocol for guaranteeing persistent storage of entries in an ensemble of bookies. - -> This document assumes that you have some knowledge of leader election and log replication and how these can be used in a distributed system. If not, we recommend reading the [example application](../../api/ledger-api#example-application) documentation first. - -## Ledgers - -{% pop Ledgers %} are the basic building block of BookKeeper and the level at which BookKeeper makes its persistent storage guarantees. A replicated log consists of an ordered list of ledgers. See [Ledgers to logs](#ledgers-to-logs) for info on building a replicated log from ledgers. - -Ledgers are composed of metadata and {% pop entries %}. The metadata is stored in ZooKeeper, which provides a *compare-and-swap* (CAS) operation. Entries are stored on storage nodes known as {% pop bookies %}. - -A ledger has a single writer and multiple readers (SWMR). - -### Ledger metadata - -A ledger's metadata contains the following: - -Parameter | Name | Meaning -:---------|:-----|:------- -Identifer | | A 64-bit integer, unique within the system -Ensemble size | **E** | The number of nodes the ledger is stored on -Write quorum size | **Qw** | The number of nodes each entry is written to. In effect, the max replication for the entry. -Ack quorum size | **Qa** | The number of nodes an entry must be acknowledged on. In effect, the minimum replication for the entry. -Current state | | The current status of the ledger. One of `OPEN`, `CLOSED`, or `IN_RECOVERY`. -Last entry | | The last entry in the ledger or `NULL` is the current state is not `CLOSED`. - -In addition, each ledger's metadata consists of one or more *fragments*. Each fragment is either - -* the first entry of a fragment or -* a list of bookies for the fragment. - -When creating a ledger, the following invariant must hold: - -**E >= Qw >= Qa** - -Thus, the ensemble size (**E**) must be larger than the write quorum size (**Qw**), which must in turn be larger than the ack quorum size (**Qa**). If that condition does not hold, then the ledger creation operation will fail. - -### Ensembles - -When a ledger is created, **E** bookies are chosen for the entries of that ledger. The bookies are the initial ensemble of the ledger. A ledger can have multiple ensembles, but an entry has only one ensemble. Changes in the ensemble involve a new fragment being added to the ledger. - -Take the following example. In this ledger, with ensemble size of 3, there are two fragments and thus two ensembles, one starting at entry 0, the second at entry 12. The second ensemble differs from the first only by its first element. This could be because bookie1 has failed and therefore had to be replaced. - -First entry | Bookies -:-----------|:------- -0 | B1, B2, B3 -12 | B4, B2, B3 - -### Write quorums - -Each entry in the log is written to **Qw** nodes. This is considered the write quorum for that entry. The write quorum is the subsequence of the ensemble, **Qw** in length, and starting at the bookie at index (entryid % **E**). - -For example, in a ledger of **E** = 4, **Qw**, and **Qa** = 2, with an ensemble consisting of B1, B2, B3, and B4, the write quorums for the first 6 entries will be: - -Entry | Write quorum -:-----|:------------ -0 | B1, B2, B3 -1 | B2, B3, B4 -2 | B3, B4, B1 -3 | B4, B1, B2 -4 | B1, B2, B3 -5 | B2, B3, B4 - -There are only **E** distinct write quorums in any ensemble. If **Qw** = **Qa**, then there is only one, as no striping occurs. - -### Ack quorums - -The ack quorum for an entry is any subset of the write quorum of size **Qa**. If **Qa** bookies acknowledge an entry, it means it has been fully replicated. - -### Guarantees - -The system can tolerate **Qa** – 1 failures without data loss. - -Bookkeeper guarantees that: - -1. All updates to a ledger will be read in the same order as they were written. -2. All clients will read the same sequence of updates from the ledger. - -## Writing to ledgers - -writer, ensuring that entry ids are sequential is trivial. A bookie acknowledges a write once it has been persisted to disk and is therefore durable. Once **Qa** bookies from the write quorum acknowledge the write, the write is acknowledged to the client, but only if all entries with lower entry ids in the ledger have already been acknowledged to the client. - -The entry written contains the ledger id, the entry id, the last add confirmed and the payload. The last add confirmed is the last entry which had been acknowledged to the client when this entry was written. Sending this with the entry speeds up recovery of the ledger in the case that the writer crashes. - -Another client can also read entries in the ledger up as far as the last add confirmed, as we guarantee that all entries thus far have been replicated on Qa nodes, and therefore all future readers will be able to also read it. However, to read like this, the ledger should be opened with a non-fencing open. Otherwise, it would kill the writer. - -If a node fails to acknowledge a write, the writer will create a new ensemble by replacing the failed node in the current ensemble. It creates a new fragment with this ensemble, starting from the first message that has not been acknowledged to the client. Creating the new fragment involves making a CAS write to the metadata. If the CAS write fails, someone else has modified something in the ledger metadata. This concurrent modification could have been caused by recovery or {% pop rereplication %}. We reread the metadata. If the state of the ledger is no longer `OPEN`, we send an error to the client for any outstanding writes. Otherwise, we try to replace the failed node again. - -### Closing a ledger as a writer - -Closing a ledger is straightforward for a writer. The writer makes a CAS write to the metadata, changing the state to `CLOSED` and setting the last entry of the ledger to the last entry which we have acknowledged to the client. - -If the CAS write fails, it means someone else has modified the metadata. We reread the metadata, and retry closing as long as the state of the ledger is still `OPEN`. If the state is `IN_RECOVERY` we send an error to the client. If the state is `CLOSED` and the last entry is the same as the last entry we have acknowledged to the client, we complete the close operation successfully. If the last entry is different from what we have acknowledged to the client, we send an error to the client. - -### Closing a ledger as a reader - -A reader can also force a ledger to close. Forcing the ledger to close will prevent any writer from adding new entries to the ledger. This is called {% pop fencing %}. This can occur when a writer has crashed or become unavailable, and a new writer wants to take over writing to the log. The new writer must ensure that it has seen all updates from the previous writer, and prevent the previous writer from making any new updates before making any updates of its own. - -To recover a ledger, we first update the state in the metadata to IN_RECOVERY. We then send a fence message to all the bookies in the last fragment of the ledger. When a bookie receives a fence message for a ledger, the fenced state of the ledger is persisted to disk. Once we receive a response from at least (**Qw** - **Qa**)+1 bookies from each write quorum in the ensemble, the ledger is fenced. - -By ensuring we have received a response from at last (**Qw** - **Qa**) + 1 bookies in each write quorum, we ensure that, if the old writer is alive and tries to add a new entry there will be no write quorum in which Qa bookies will accept the write. If the old writer tries to update the ensemble, it will fail on the CAS metadata write, and then see that the ledger is in IN_RECOVERY state, and that it therefore shouldn’t try to write to it. - -The old writer will be able to write entries to individual bookies (we can’t guarantee that the fence message reaches all bookies), but as it will not be able reach ack quorum, it will not be able to send a success response to its client. The client will get a LedgerFenced error instead. - -It is important to note that when you get a ledger fenced message for an entry, it doesn’t mean that the entry has not been written. It means that the entry may or may not have been written, and this can only be determined after the ledger is recovered. In effect, LedgerFenced should be treated like a timeout. - -Once the ledger is fenced, recovery can begin. Recovery means finding the last entry of the ledger and closing the ledger. To find the last entry of the ledger, the client asks all bookies for the highest last add confirmed value they have seen. It waits until it has received a response at least (**Qw** - **Qa**) + 1 bookies from each write quorum, and takes the highest response as the entry id to start reading forward from. It then starts reading forward in the ledger, one entry at a time, replicating all entries it sees to the entire write quorum for that entry. Once it can no longer read any more entries, it updates the state in the metadata to `CLOSED`, and sets the last entry of the ledger to the last entry it wrote. Multiple readers can try to recovery a ledger at the same time, but as the metadata write is CAS they will all converge on the same last entry of the ledger. - -## Ledgers to logs - -In BookKeeper, {% pop ledgers %} can be used to build a replicated log for your system. All guarantees provided by BookKeeper are at the ledger level. Guarantees on the whole log can be built using the ledger guarantees and any consistent datastore with a compare-and-swap (CAS) primitive. BookKeeper uses ZooKeeper as the datastore but others could theoretically be used. - -A log in BookKeeper is built from some number of ledgers, with a fixed order. A ledger represents a single segment of the log. A ledger could be the whole period that one node was the leader, or there could be multiple ledgers for a single period of leadership. However, there can only ever be one leader that adds entries to a single ledger. Ledgers cannot be reopened for writing once they have been closed/recovered. - -> BookKeeper does *not* provide leader election. You must use a system like ZooKeeper for this. - -In many cases, leader election is really leader suggestion. Multiple nodes could think that they are leader at any one time. It is the job of the log to guarantee that only one can write changes to the system. - -### Opening a log - -Once a node thinks it is leader for a particular log, it must take the following steps: - -1. Read the list of ledgers for the log -1. {% pop Fence %} the last two ledgers in the list. Two ledgers are fenced because because the writer may be writing to the second-to-last ledger while adding the last ledger to the list. -1. Create a new ledger -1. Add the new ledger to the ledger list -1. Write the new ledger back to the datastore using a CAS operation - -The fencing in step 2 and the CAS operation in step 5 prevent two nodes from thinking that they have leadership at any one time. - -The CAS operation will fail if the list of ledgers has changed between reading it and writing back the new list. When the CAS operation fails, the leader must start at step 1 again. Even better, they should check that they are in fact still the leader with the system that is providing leader election. The protocol will work correctly without this step, though it will be able to make very little progress if two nodes think they are leader and are duelling for the log. - -The node must not serve any writes until step 5 completes successfully. - -### Rolling ledgers - -The leader may wish to close the current ledger and open a new one every so often. Ledgers can only be deleted as a whole. If you don't roll the log, you won't be able to clean up old entries in the log without a leader change. By closing the current ledger and adding a new one, the leader allows the log to be truncated whenever that data is no longer needed. The steps for rolling the log is similar to those for creating a new ledger. - -1. Create a new ledger -1. Add the new ledger to the ledger list -1. Write the new ledger list to the datastore using CAS -1. Close the previous ledger - -By deferring the closing of the previous ledger until step 4, we can continue writing to the log while we perform metadata update operations to add the new ledger. This is safe as long as you fence the last 2 ledgers when acquiring leadership. - diff --git a/site/docs/4.6.1/getting-started/concepts.md b/site/docs/4.6.1/getting-started/concepts.md deleted file mode 100644 index 7a3c92847b2..00000000000 --- a/site/docs/4.6.1/getting-started/concepts.md +++ /dev/null @@ -1,202 +0,0 @@ ---- -title: BookKeeper concepts and architecture -subtitle: The core components and how they work -prev: ../run-locally ---- - -BookKeeper is a service that provides persistent storage of streams of log [entries](#entries)---aka *records*---in sequences called [ledgers](#ledgers). BookKeeper replicates stored entries across multiple servers. - -## Basic terms - -In BookKeeper: - -* each unit of a log is an [*entry*](#entries) (aka record) -* streams of log entries are called [*ledgers*](#ledgers) -* individual servers storing ledgers of entries are called [*bookies*](#bookies) - -BookKeeper is designed to be reliable and resilient to a wide variety of failures. Bookies can crash, corrupt data, or discard data, but as long as there are enough bookies behaving correctly in the ensemble the service as a whole will behave correctly. - -## Entries - -> **Entries** contain the actual data written to ledgers, along with some important metadata. - -BookKeeper entries are sequences of bytes that are written to [ledgers](#ledgers). Each entry has the following fields: - -Field | Java type | Description -:-----|:----------|:----------- -Ledger number | `long` | The ID of the ledger to which the entry has been written -Entry number | `long` | The unique ID of the entry -Last confirmed (LC) | `long` | The ID of the last recorded entry -Data | `byte[]` | The entry's data (written by the client application) -Authentication code | `byte[]` | The message auth code, which includes *all* other fields in the entry - -## Ledgers - -> **Ledgers** are the basic unit of storage in BookKeeper. - -Ledgers are sequences of entries, while each entry is a sequence of bytes. Entries are written to a ledger: - -* sequentially, and -* at most once. - -This means that ledgers have *append-only* semantics. Entries cannot be modified once they've been written to a ledger. Determining the proper write order is the responsbility of [client applications](#clients). - -## Clients and APIs - -> BookKeeper clients have two main roles: they create and delete ledgers, and they read entries from and write entries to ledgers. -> -> BookKeeper provides both a lower-level and a higher-level API for ledger interaction. - -There are currently two APIs that can be used for interacting with BookKeeper: - -* The [ledger API](../../api/ledger-api) is a lower-level API that enables you to interact with {% pop ledgers %} directly. -* The [DistributedLog API](../../api/distributedlog-api) is a higher-level API that enables you to use BookKeeper without directly interacting with ledgers. - -In general, you should choose the API based on how much granular control you need over ledger semantics. The two APIs can also both be used within a single application. - -## Bookies - -> **Bookies** are individual BookKeeper servers that handle ledgers (more specifically, fragments of ledgers). Bookies function as part of an ensemble. - -A bookie is an individual BookKeeper storage server. Individual bookies store fragments of ledgers, not entire ledgers (for the sake of performance). For any given ledger **L**, an *ensemble* is the group of bookies storing the entries in **L**. - -Whenever entries are written to a ledger, those entries are {% pop striped %} across the ensemble (written to a sub-group of bookies rather than to all bookies). - -### Motivation - -> BookKeeper was initially inspired by the NameNode server in HDFS but its uses now extend far beyond this. - -The initial motivation for BookKeeper comes from the [Hadoop](http://hadoop.apache.org/) ecosystem. In the [Hadoop Distributed File System](https://wiki.apache.org/hadoop/HDFS) (HDFS), a special node called the [NameNode](https://wiki.apache.org/hadoop/NameNode) logs all operations in a reliable fashion, which ensures that recovery is possible in case of crashes. - -The NameNode, however, served only as initial inspiration for BookKeeper. The applications for BookKeeper extend far beyond this and include essentially any application that requires an append-based storage system. BookKeeper provides a number of advantages for such applications: - -* Highly efficient writes -* High fault tolerance via replication of messages within ensembles of bookies -* High throughput for write operations via {% pop striping %} (across as many bookies as you wish) - -## Metadata storage - -BookKeeper requires a metadata storage service to store information related to [ledgers](#ledgers) and available bookies. BookKeeper currently uses [ZooKeeper](https://zookeeper.apache.org) for this and other tasks. - -## Data management in bookies - -Bookies manage data in a [log-structured](https://en.wikipedia.org/wiki/Log-structured_file_system) way, which is implemented using three types of files: - -* [journals](#journals) -* [entry logs](#entry-logs) -* [index files](#index-files) - -### Journals - -A journal file contains BookKeeper transaction logs. Before any update to a ledger takes place, the bookie ensures that a transaction describing the update is written to non-volatile storage. A new journal file is created once the bookie starts or the older journal file reaches the journal file size threshold. - -### Entry logs - -An entry log file manages the written entries received from BookKeeper clients. Entries from different ledgers are aggregated and written sequentially, while their offsets are kept as pointers in a [ledger cache](#ledger-cache) for fast lookup. - -A new entry log file is created once the bookie starts or the older entry log file reaches the entry log size threshold. Old entry log files are removed by the Garbage Collector Thread once they are not associated with any active ledger. - -### Index files - -An index file is created for each ledger, which comprises a header and several fixed-length index pages that record the offsets of data stored in entry log files. - -Since updating index files would introduce random disk I/O index files are updated lazily by a sync thread running in the background. This ensures speedy performance for updates. Before index pages are persisted to disk, they are gathered in a ledger cache for lookup. - -### Ledger cache - -Ledger indexes pages are cached in a memory pool, which allows for more efficient management of disk head scheduling. - -### Adding entries - -When a client instructs a {% pop bookie %} to write an entry to a ledger, the entry will go through the following steps to be persisted on disk: - -1. The entry is appended to an [entry log](#entry-logs) -1. The index of the entry is updated in the [ledger cache](#ledger-cache) -1. A transaction corresponding to this entry update is appended to the [journal](#journals) -1. A response is sent to the BookKeeper client - -> For performance reasons, the entry log buffers entries in memory and commits them in batches, while the ledger cache holds index pages in memory and flushes them lazily. This process is described in more detail in the [Data flush](#data-flush) section below. - -### Data flush - -Ledger index pages are flushed to index files in the following two cases: - -* The ledger cache memory limit is reached. There is no more space available to hold newer index pages. Dirty index pages will be evicted from the ledger cache and persisted to index files. -* A background thread synchronous thread is responsible for flushing index pages from the ledger cache to index files periodically. - -Besides flushing index pages, the sync thread is responsible for rolling journal files in case that journal files use too much disk space. The data flush flow in the sync thread is as follows: - -* A `LastLogMark` is recorded in memory. The `LastLogMark` indicates that those entries before it have been persisted (to both index and entry log files) and contains two parts: - 1. A `txnLogId` (the file ID of a journal) - 1. A `txnLogPos` (offset in a journal) -* Dirty index pages are flushed from the ledger cache to the index file, and entry log files are flushed to ensure that all buffered entries in entry log files are persisted to disk. - - Ideally, a bookie only needs to flush index pages and entry log files that contain entries before `LastLogMark`. There is, however, no such information in the ledger and entry log mapping to journal files. Consequently, the thread flushes the ledger cache and entry log entirely here, and may flush entries after the `LastLogMark`. Flushing more is not a problem, though, just redundant. -* The `LastLogMark` is persisted to disk, which means that entries added before `LastLogMark` whose entry data and index page were also persisted to disk. It is now time to safely remove journal files created earlier than `txnLogId`. - -If the bookie has crashed before persisting `LastLogMark` to disk, it still has journal files containing entries for which index pages may not have been persisted. Consequently, when this bookie restarts, it inspects journal files to restore those entries and data isn't lost. - -Using the above data flush mechanism, it is safe for the sync thread to skip data flushing when the bookie shuts down. However, in the entry logger it uses a buffered channel to write entries in batches and there might be data buffered in the buffered channel upon a shut down. The bookie needs to ensure that the entry log flushes its buffered data during shutdown. Otherwise, entry log files become corrupted with partial entries. - -### Data compaction - -On bookies, entries of different ledgers are interleaved in entry log files. A bookie runs a garbage collector thread to delete un-associated entry log files to reclaim disk space. If a given entry log file contains entries from a ledger that has not been deleted, then the entry log file would never be removed and the occupied disk space never reclaimed. In order to avoid such a case, a bookie server compacts entry log files in a garbage collector thread to reclaim disk space. - -There are two kinds of compaction running with different frequency: minor compaction and major compaction. The differences between minor compaction and major compaction lies in their threshold value and compaction interval. - -* The garbage collection threshold is the size percentage of an entry log file occupied by those undeleted ledgers. The default minor compaction threshold is 0.2, while the major compaction threshold is 0.8. -* The garbage collection interval is how frequently to run the compaction. The default minor compaction interval is 1 hour, while the major compaction threshold is 1 day. - -> If either the threshold or interval is set to less than or equal to zero, compaction is disabled. - -The data compaction flow in the garbage collector thread is as follows: - -* The thread scans entry log files to get their entry log metadata, which records a list of ledgers comprising an entry log and their corresponding percentages. -* With the normal garbage collection flow, once the bookie determines that a ledger has been deleted, the ledger will be removed from the entry log metadata and the size of the entry log reduced. -* If the remaining size of an entry log file reaches a specified threshold, the entries of active ledgers in the entry log will be copied to a new entry log file. -* Once all valid entries have been copied, the old entry log file is deleted. - -## ZooKeeper metadata - -BookKeeper requires a ZooKeeper installation for storing [ledger](#ledger) metadata. Whenever you construct a [`BookKeeper`](../../api/javadoc/org/apache/bookkeeper/client/BookKeeper) client object, you need to pass a list of ZooKeeper servers as a parameter to the constructor, like this: - -```java -String zkConnectionString = "127.0.0.1:2181"; -BookKeeper bkClient = new BookKeeper(zkConnectionString); -``` - -> For more info on using the BookKeeper Java client, see [this guide](../../api/ledger-api#the-java-ledger-api-client). - -## Ledger manager - -A *ledger manager* handles ledgers' metadata (which is stored in ZooKeeper). BookKeeper offers two types of ledger managers: the [flat ledger manager](#flat-ledger-manager) and the [hierarchical ledger manager](#hierarchical-ledger-manager). Both ledger managers extend the [`AbstractZkLedgerManager`](../../api/javadoc/org/apache/bookkeeper/meta/AbstractZkLedgerManager) abstract class. - -> #### Use the flat ledger manager in most cases -> The flat ledger manager is the default and is recommended for nearly all use cases. The hierarchical ledger manager is better suited only for managing very large numbers of BookKeeper ledgers (> 50,000). - -### Flat ledger manager - -The *flat ledger manager*, implemented in the [`FlatLedgerManager`](../../api/javadoc/org/apache/bookkeeper/meta/FlatLedgerManager.html) class, stores all ledgers' metadata in child nodes of a single ZooKeeper path. The flat ledger manager creates [sequential nodes](https://zookeeper.apache.org/doc/trunk/zookeeperProgrammers.html#Sequence+Nodes+--+Unique+Naming) to ensure the uniqueness of the ledger ID and prefixes all nodes with `L`. Bookie servers manage their own active ledgers in a hash map so that it's easy to find which ledgers have been deleted from ZooKeeper and then garbage collect them. - -The flat ledger manager's garbage collection follow proceeds as follows: - -* All existing ledgers are fetched from ZooKeeper (`zkActiveLedgers`) -* All ledgers currently active within the bookie are fetched (`bkActiveLedgers`) -* The currently actively ledgers are looped through to determine which ledgers don't currently exist in ZooKeeper. Those are then garbage collected. -* The *hierarchical ledger manager* stores ledgers' metadata in two-level [znodes](https://zookeeper.apache.org/doc/current/zookeeperOver.html#Nodes+and+ephemeral+nodes). - -### Hierarchical ledger manager - -The *hierarchical ledger manager*, implemented in the [`HierarchicalLedgerManager`](../../api/javadoc/org/apache/bookkeeper/meta/HierarchicalLedgerManager) class, first obtains a global unique ID from ZooKeeper using an [`EPHEMERAL_SEQUENTIAL`](https://zookeeper.apache.org/doc/current/api/org/apache/zookeeper/CreateMode.html#EPHEMERAL_SEQUENTIAL) znode. Since ZooKeeper's sequence counter has a format of `%10d` (10 digits with 0 padding, for example `0000000001`), the hierarchical ledger manager splits the generated ID into 3 parts: - -```shell -{level1 (2 digits)}{level2 (4 digits)}{level3 (4 digits)} -``` - -These three parts are used to form the actual ledger node path to store ledger metadata: - -```shell -{ledgers_root_path}/{level1}/{level2}/L{level3} -``` - -For example, ledger 0000000001 is split into three parts, 00, 0000, and 00001, and stored in znode `/{ledgers_root_path}/00/0000/L0001`. Each znode could have as many 10,000 ledgers, which avoids the problem of the child list being larger than the maximum ZooKeeper packet size (which is the [limitation](https://issues.apache.org/jira/browse/BOOKKEEPER-39) that initially prompted the creation of the hierarchical ledger manager). diff --git a/site/docs/4.6.1/getting-started/installation.md b/site/docs/4.6.1/getting-started/installation.md deleted file mode 100644 index fac16ddd390..00000000000 --- a/site/docs/4.6.1/getting-started/installation.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -title: BookKeeper installation -subtitle: Download or clone BookKeeper and build it locally -next: ../run-locally ---- - -{% capture download_url %}http://apache.claz.org/bookkeeper/bookkeeper-{{ site.latest_release }}/bookkeeper-{{ site.latest_release }}-src.tar.gz{% endcapture %} - -You can install BookKeeper either by [downloading](#download) a [GZipped](http://www.gzip.org/) tarball package or [cloning](#clone) the BookKeeper repository. - -## Requirements - -* [Unix environment](http://www.opengroup.org/unix) -* [Java Development Kit 1.6](http://www.oracle.com/technetwork/java/javase/downloads/index.html) or later -* [Maven 3.0](https://maven.apache.org/install.html) or later - -## Download - -You can download Apache BookKeeper releases from one of many [Apache mirrors](http://www.apache.org/dyn/closer.cgi/bookkeeper). Here's an example for the [apache.claz.org](http://apache.claz.org/bookkeeper) mirror: - -```shell -$ curl -O {{ download_url }} -$ tar xvf bookkeeper-{{ site.latest_release }}-src.tar.gz -$ cd bookkeeper-{{ site.latest_release }} -``` - -## Clone - -To build BookKeeper from source, clone the repository, either from the [GitHub mirror]({{ site.github_repo }}) or from the [Apache repository](http://git.apache.org/bookkeeper.git/): - -```shell -# From the GitHub mirror -$ git clone {{ site.github_repo}} - -# From Apache directly -$ git clone git://git.apache.org/bookkeeper.git/ -``` - -## Build using Maven - -Once you have the BookKeeper on your local machine, either by [downloading](#download) or [cloning](#clone) it, you can then build BookKeeper from source using Maven: - -```shell -$ mvn package -``` - -> You can skip tests by adding the `-DskipTests` flag when running `mvn package`. - -### Useful Maven commands - -Some other useful Maven commands beyond `mvn package`: - -Command | Action -:-------|:------ -`mvn clean` | Removes build artifacts -`mvn compile` | Compiles JAR files from Java sources -`mvn compile findbugs:findbugs` | Compile using the Maven [FindBugs](http://gleclaire.github.io/findbugs-maven-plugin) plugin -`mvn install` | Install the BookKeeper JAR locally in your local Maven cache (usually in the `~/.m2` directory) -`mvn deploy` | Deploy the BookKeeper JAR to the Maven repo (if you have the proper credentials) -`mvn verify` | Performs a wide variety of verification and validation tasks -`mvn apache-rat:check` | Run Maven using the [Apache Rat](http://creadur.apache.org/rat/apache-rat-plugin/) plugin -`mvn compile javadoc:aggregate` | Build Javadocs locally -`mvn package assembly:single` | Build a complete distribution using the Maven [Assembly](http://maven.apache.org/plugins/maven-assembly-plugin/) plugin - -## Package directory - -The BookKeeper project contains several subfolders that you should be aware of: - -Subfolder | Contains -:---------|:-------- -[`bookkeeper-server`]({{ site.github_repo }}/tree/master/bookkeeper-server) | The BookKeeper server and client -[`bookkeeper-benchmark`]({{ site.github_repo }}/tree/master/bookkeeper-benchmark) | A benchmarking suite for measuring BookKeeper performance -[`bookkeeper-stats`]({{ site.github_repo }}/tree/master/bookkeeper-stats) | A BookKeeper stats library -[`bookkeeper-stats-providers`]({{ site.github_repo }}/tree/master/bookkeeper-stats-providers) | BookKeeper stats providers diff --git a/site/docs/4.6.1/getting-started/run-locally.md b/site/docs/4.6.1/getting-started/run-locally.md deleted file mode 100644 index edbfab9fda6..00000000000 --- a/site/docs/4.6.1/getting-started/run-locally.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Run bookies locally -prev: ../installation -next: ../concepts -toc_disable: true ---- - -{% pop Bookies %} are individual BookKeeper servers. You can run an ensemble of bookies locally on a single machine using the [`localbookie`](../../reference/cli#bookkeeper-localbookie) command of the `bookkeeper` CLI tool and specifying the number of bookies you'd like to include in the ensemble. - -This would start up an ensemble with 10 bookies: - -```shell -$ bookkeeper-server/bin/bookkeeper localbookie 10 -``` - -> When you start up an ensemble using `localbookie`, all bookies run in a single JVM process. diff --git a/site/docs/4.6.1/overview/overview.md b/site/docs/4.6.1/overview/overview.md deleted file mode 100644 index 5adc9be3016..00000000000 --- a/site/docs/4.6.1/overview/overview.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -title: Apache BookKeeper™ 4.6.1 ---- - - -This documentation is for Apache BookKeeper™ version `4.6.1`. - -Apache BookKeeper™ is a scalable, fault tolerant and low latency storage service optimized for realtime workloads. -It offers `durability`, `replication` and `strong consistency` as essentials for building reliable real-time applications. - -It is suitable for being used in following scenerios: - -- [WAL](https://en.wikipedia.org/wiki/Write-ahead_logging) (Write-Ahead-Logging), e.g. HDFS [namenode](https://hadoop.apache.org/docs/r2.5.2/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithNFS.html#BookKeeper_as_a_Shared_storage_EXPERIMENTAL). -- Message Store, e.g. [Apache Pulsar](https://pulsar.incubator.apache.org/). -- Offset/Cursor Store, e.g. Apache Pulsar. -- Object/Blob Store, e.g. storing snapshots to replicated state machines. - -Learn more about Apache BookKeeper™ and what it can do for your organization: - -- [Apache BookKeeper 4.6.1 Release Notes](../releaseNotes) -- [Java API docs](../../api/javadoc) - -Or start using Apache BookKeeper today. - -### Users - -- **Concepts**: Start with [concepts](../../getting-started/concepts). This will help you to fully understand - the other parts of the documentation, including the setup, integration and operation guides. -- **Getting Started**: Install [Apache BookKeeper](../../getting-started/installation) and run bookies [locally](../../getting-started/run-locally) -- **API**: Read the [API](../../api/overview) documentation to learn how to use Apache BookKeeper to build your applications. -- **Deployment**: The [Deployment Guide](../../deployment/manual) shows how to deploy Apache BookKeeper to production clusters. - -### Administrators - -- **Operations**: The [Admin Guide](../../admin/bookies) shows how to run Apache BookKeeper on production, what are the production - considerations and best practices. - -### Contributors - -- **Details**: Learn [design details](../../development/protocol) to know more internals. diff --git a/site/docs/4.6.1/overview/releaseNotes.md b/site/docs/4.6.1/overview/releaseNotes.md deleted file mode 100644 index 7d7303606bf..00000000000 --- a/site/docs/4.6.1/overview/releaseNotes.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: Apache BookKeeper 4.6.1 Release Notes ---- - -This is the eighth release of BookKeeper as an Apache Top Level Project! - -The 4.6.1 release is a bugfix release which fixes a bunch of issues reported from users of 4.6.0. - -Apache BookKeeper users are encouraged to upgrade to 4.6.1. The technical details of this release are summarized -below. - -## Highlights - -- Fix critical bug on index persistence manager, see [https://github.com/apache/bookkeeper/pull/913](https://github.com/apache/bookkeeper/pull/913) - -- Fix critical bug to allow using versions of Netty newer than 4.1.2 on classpath, see [https://github.com/apache/bookkeeper/pull/996](https://github.com/apache/bookkeeper/pull/996) - -- Enhance Java 9 compatibility, see [https://github.com/apache/bookkeeper/issues/326](https://github.com/apache/bookkeeper/issues/326) - -- New option to track task execution time, see [https://github.com/apache/bookkeeper/issues/931](https://github.com/apache/bookkeeper/issues/931) - -- Distribute a version of BookKeeper which embeds and relocates Guava and Protobuf, see [https://github.com/apache/bookkeeper/issues/922](https://github.com/apache/bookkeeper/issues/922) - -- Add description for the new error code "Too many requests", see [https://github.com/apache/bookkeeper/pull/921](https://github.com/apache/bookkeeper/pull/921) - -### Dependencies Upgrade - -There is no dependency upgrade since 4.6.0, but now we distribute a 'shaded' version of main artifacts, see [Ledger API](../ledger-api) - -## Full list of changes - -- [https://github.com/apache/bookkeeper/issues?utf8=%E2%9C%93&q=label%3Arelease%2F4.6.1+is%3Aclosed](https://github.com/apache/bookkeeper/issues?utf8=%E2%9C%93&q=label%3Arelease%2F4.6.1+is%3Aclosed) diff --git a/site/docs/4.6.1/overview/releaseNotesTemplate.md b/site/docs/4.6.1/overview/releaseNotesTemplate.md deleted file mode 100644 index 53848e37a02..00000000000 --- a/site/docs/4.6.1/overview/releaseNotesTemplate.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Apache BookKeeper 4.6.1 Release Notes ---- - -[provide a summary of this release] - -Apache BookKeeper users are encouraged to upgrade to 4.6.1. The technical details of this release are summarized -below. - -## Highlights - -[List the highlights] - -## Details - -[list to issues list] - diff --git a/site/docs/4.6.1/reference/cli.md b/site/docs/4.6.1/reference/cli.md deleted file mode 100644 index 8beb36ff071..00000000000 --- a/site/docs/4.6.1/reference/cli.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: BookKeeper CLI tool reference -subtitle: A reference guide to the command-line tools that you can use to administer BookKeeper ---- - -{% include cli.html id="bookkeeper" %} - -## The BookKeeper shell - -{% include shell.html %} diff --git a/site/docs/4.6.1/reference/config.md b/site/docs/4.6.1/reference/config.md deleted file mode 100644 index 8997b6b62f0..00000000000 --- a/site/docs/4.6.1/reference/config.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: BookKeeper configuration -subtitle: A reference guide to all of BookKeeper's configurable parameters ---- - - -The table below lists parameters that you can set to configure {% pop bookies %}. All configuration takes place in the `bk_server.conf` file in the `bookkeeper-server/conf` directory of your [BookKeeper installation](../../getting-started/installing). - -{% include config.html id="bk_server" %} diff --git a/site/docs/4.6.1/reference/metrics.md b/site/docs/4.6.1/reference/metrics.md deleted file mode 100644 index 8bd6fe0a165..00000000000 --- a/site/docs/4.6.1/reference/metrics.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: BookKeeper metrics reference ---- diff --git a/site/docs/4.6.1/security/overview.md b/site/docs/4.6.1/security/overview.md deleted file mode 100644 index b825776eb67..00000000000 --- a/site/docs/4.6.1/security/overview.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -title: BookKeeper Security -next: ../tls ---- - -In the 4.5.0 release, the BookKeeper community added a number of features that can be used, together or separately, to secure a BookKeeper cluster. -The following security measures are currently supported: - -1. Authentication of connections to bookies from clients, using either [TLS](../tls) or [SASL (Kerberos)](../sasl). -2. Authentication of connections from clients, bookies, autorecovery daemons to [ZooKeeper](../zookeeper), when using zookeeper based ledger managers. -3. Encryption of data transferred between bookies and clients, between bookies and autorecovery daemons using [TLS](../tls). - -It’s worth noting that security is optional - non-secured clusters are supported, as well as a mix of authenticated, unauthenticated, encrypted and non-encrypted clients. - -NOTE: authorization is not yet available in 4.5.0. The Apache BookKeeper community is looking to add this feature in subsequent releases. - -## Next Steps - -- [Encryption and Authentication using TLS](../tls) -- [Authentication using SASL](../sasl) -- [ZooKeeper Authentication](../zookeeper) diff --git a/site/docs/4.6.1/security/sasl.md b/site/docs/4.6.1/security/sasl.md deleted file mode 100644 index ffb972a8936..00000000000 --- a/site/docs/4.6.1/security/sasl.md +++ /dev/null @@ -1,202 +0,0 @@ ---- -title: Authentication using SASL -prev: ../tls -next: ../zookeeper ---- - -Bookies support client authentication via SASL. Currently we only support GSSAPI (Kerberos). We will start -with a general description of how to configure `SASL` for bookies, clients and autorecovery daemons, followed -by mechanism-specific details and wrap up with some operational details. - -## SASL configuration for Bookies - -1. Select the mechanisms to enable in the bookies. `GSSAPI` is the only mechanism currently supported by BookKeeper. -2. Add a `JAAS` config file for the selected mechanisms as described in the examples for setting up [GSSAPI (Kerberos)](#kerberos). -3. Pass the `JAAS` config file location as JVM parameter to each Bookie. For example: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookie_jaas.conf - ``` - -4. Enable SASL auth plugin in bookies, by setting `bookieAuthProviderFactoryClass` to `org.apache.bookkeeper.sasl.SASLBookieAuthProviderFactory`. - - - ```shell - bookieAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLBookieAuthProviderFactory - ``` - -5. If you are running `autorecovery` along with bookies, then you want to enable SASL auth plugin for `autorecovery`, by setting - `clientAuthProviderFactoryClass` to `org.apache.bookkeeper.sasl.SASLClientProviderFactory`. - - ```shell - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -6. Follow the steps in [GSSAPI (Kerberos)](#kerberos) to configure SASL. - -#### Important Notes - -1. `Bookie` is a section name in the JAAS file used by each bookie. This section tells the bookie which principal to use - and the location of the keytab where the principal is stored. It allows the bookie to login using the keytab specified in this section. -2. `Auditor` is a section name in the JASS file used by `autorecovery` daemon (it can be co-run with bookies). This section tells the - `autorecovery` daemon which principal to use and the location of the keytab where the principal is stored. It allows the bookie to - login using the keytab specified in this section. -3. The `Client` section is used to authenticate a SASL connection with ZooKeeper. It also allows the bookies to set ACLs on ZooKeeper nodes - which locks these nodes down so that only the bookies can modify it. It is necessary to have the same primary name across all bookies. - If you want to use a section name other than `Client`, set the system property `zookeeper.sasl.client` to the appropriate name - (e.g `-Dzookeeper.sasl.client=ZKClient`). -4. ZooKeeper uses `zookeeper` as the service name by default. If you want to change this, set the system property - `zookeeper.sasl.client.username` to the appropriate name (e.g. `-Dzookeeper.sasl.client.username=zk`). - -## SASL configuration for Clients - -To configure `SASL` authentication on the clients: - -1. Select a `SASL` mechanism for authentication and add a `JAAS` config file for the selected mechanism as described in the examples for - setting up [GSSAPI (Kerberos)](#kerberos). -2. Pass the `JAAS` config file location as JVM parameter to each client JVM. For example: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookkeeper_jaas.conf - ``` - -3. Configure the following properties in bookkeeper `ClientConfiguration`: - - ```shell - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -Follow the steps in [GSSAPI (Kerberos)](#kerberos) to configure SASL for the selected mechanism. - -## Authentication using SASL/Kerberos - -### Prerequisites - -#### Kerberos - -If your organization is already using a Kerberos server (for example, by using `Active Directory`), there is no need to -install a new server just for BookKeeper. Otherwise you will need to install one, your Linux vendor likely has packages -for `Kerberos` and a short guide on how to install and configure it ([Ubuntu](https://help.ubuntu.com/community/Kerberos), -[Redhat](https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/6/html/Managing_Smart_Cards/installing-kerberos.html)). -Note that if you are using Oracle Java, you will need to download JCE policy files for your Java version and copy them to `$JAVA_HOME/jre/lib/security`. - -#### Kerberos Principals - -If you are using the organization’s Kerberos or Active Directory server, ask your Kerberos administrator for a principal -for each Bookie in your cluster and for every operating system user that will access BookKeeper with Kerberos authentication -(via clients and tools). - -If you have installed your own Kerberos, you will need to create these principals yourself using the following commands: - -```shell -sudo /usr/sbin/kadmin.local -q 'addprinc -randkey bookkeeper/{hostname}@{REALM}' -sudo /usr/sbin/kadmin.local -q "ktadd -k /etc/security/keytabs/{keytabname}.keytab bookkeeper/{hostname}@{REALM}" -``` - -##### All hosts must be reachable using hostnames - -It is a *Kerberos* requirement that all your hosts can be resolved with their FQDNs. - -### Configuring Bookies - -1. Add a suitably modified JAAS file similar to the one below to each Bookie’s config directory, let’s call it `bookie_jaas.conf` -for this example (note that each bookie should have its own keytab): - - ``` - Bookie { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookie.keytab" - principal="bookkeeper/bk1.hostname.com@EXAMPLE.COM"; - }; - // ZooKeeper client authentication - Client { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookie.keytab" - principal="bookkeeper/bk1.hostname.com@EXAMPLE.COM"; - }; - // If you are running `autorecovery` along with bookies - Auditor { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookie.keytab" - principal="bookkeeper/bk1.hostname.com@EXAMPLE.COM"; - }; - ``` - - The `Bookie` section in the JAAS file tells the bookie which principal to use and the location of the keytab where this principal is stored. - It allows the bookie to login using the keytab specified in this section. See [notes](#notes) for more details on Zookeeper’s SASL configuration. - -2. Pass the name of the JAAS file as a JVM parameter to each Bookie: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookie_jaas.conf - ``` - - You may also wish to specify the path to the `krb5.conf` file - (see [JDK’s Kerberos Requirements](https://docs.oracle.com/javase/8/docs/technotes/guides/security/jgss/tutorials/KerberosReq.html) for more details): - - ```shell - -Djava.security.krb5.conf=/etc/bookkeeper/krb5.conf - ``` - -3. Make sure the keytabs configured in the JAAS file are readable by the operating system user who is starting the Bookies. - -4. Enable SASL authentication plugin in the bookies by setting following parameters. - - ```shell - bookieAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLBookieAuthProviderFactory - # if you run `autorecovery` along with bookies - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -### Configuring Clients - -To configure SASL authentication on the clients: - -1. Clients will authenticate to the cluster with their own principal (usually with the same name as the user running the client), - so obtain or create these principals as needed. Then create a `JAAS` file for each principal. The `BookKeeper` section describes - how the clients like writers and readers can connect to the Bookies. The following is an example configuration for a client using - a keytab (recommended for long-running processes): - - ``` - BookKeeper { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookkeeper.keytab" - principal="bookkeeper-client-1@EXAMPLE.COM"; - }; - ``` - - -2. Pass the name of the JAAS file as a JVM parameter to the client JVM: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookkeeper_jaas.conf - ``` - - You may also wish to specify the path to the `krb5.conf` file (see - [JDK’s Kerberos Requirements](https://docs.oracle.com/javase/8/docs/technotes/guides/security/jgss/tutorials/KerberosReq.html) for more details). - - ```shell - -Djava.security.krb5.conf=/etc/bookkeeper/krb5.conf - ``` - - -3. Make sure the keytabs configured in the `bookkeeper_jaas.conf` are readable by the operating system user who is starting bookkeeper client. - -4. Enable SASL authentication plugin in the client by setting following parameters. - - ```shell - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -## Enabling Logging for SASL - -To enable SASL debug output, you can set `sun.security.krb5.debug` system property to `true`. - diff --git a/site/docs/4.6.1/security/tls.md b/site/docs/4.6.1/security/tls.md deleted file mode 100644 index cd250ab2aa5..00000000000 --- a/site/docs/4.6.1/security/tls.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -title: Encryption and Authentication using TLS -prev: ../overview -next: ../sasl ---- - -Apache BookKeeper allows clients and autorecovery daemons to communicate over TLS, although this is not enabled by default. - -## Overview - -The bookies need their own key and certificate in order to use TLS. Clients can optionally provide a key and a certificate -for mutual authentication. Each bookie or client can also be configured with a truststore, which is used to -determine which certificates (bookie or client identities) to trust (authenticate). - -The truststore can be configured in many ways. To understand the truststore, consider the following two examples: - -1. the truststore contains one or many certificates; -2. it contains a certificate authority (CA). - -In (1), with a list of certificates, the bookie or client will trust any certificate listed in the truststore. -In (2), with a CA, the bookie or client will trust any certificate that was signed by the CA in the truststore. - -(TBD: benefits) - -## Generate TLS key and certificate - -The first step of deploying TLS is to generate the key and the certificate for each machine in the cluster. -You can use Java’s `keytool` utility to accomplish this task. We will generate the key into a temporary keystore -initially so that we can export and sign it later with CA. - -```shell -keytool -keystore bookie.keystore.jks -alias localhost -validity {validity} -genkey -``` - -You need to specify two parameters in the above command: - -1. `keystore`: the keystore file that stores the certificate. The *keystore* file contains the private key of - the certificate; hence, it needs to be kept safely. -2. `validity`: the valid time of the certificate in days. - -
          -Ensure that common name (CN) matches exactly with the fully qualified domain name (FQDN) of the server. -The client compares the CN with the DNS domain name to ensure that it is indeed connecting to the desired server, not a malicious one. -
          - -## Creating your own CA - -After the first step, each machine in the cluster has a public-private key pair, and a certificate to identify the machine. -The certificate, however, is unsigned, which means that an attacker can create such a certificate to pretend to be any machine. - -Therefore, it is important to prevent forged certificates by signing them for each machine in the cluster. -A `certificate authority (CA)` is responsible for signing certificates. CA works likes a government that issues passports — -the government stamps (signs) each passport so that the passport becomes difficult to forge. Other governments verify the stamps -to ensure the passport is authentic. Similarly, the CA signs the certificates, and the cryptography guarantees that a signed -certificate is computationally difficult to forge. Thus, as long as the CA is a genuine and trusted authority, the clients have -high assurance that they are connecting to the authentic machines. - -```shell -openssl req -new -x509 -keyout ca-key -out ca-cert -days 365 -``` - -The generated CA is simply a *public-private* key pair and certificate, and it is intended to sign other certificates. - -The next step is to add the generated CA to the clients' truststore so that the clients can trust this CA: - -```shell -keytool -keystore bookie.truststore.jks -alias CARoot -import -file ca-cert -``` - -NOTE: If you configure the bookies to require client authentication by setting `sslClientAuthentication` to `true` on the -[bookie config](../../reference/config), then you must also provide a truststore for the bookies and it should have all the CA -certificates that clients keys were signed by. - -```shell -keytool -keystore client.truststore.jks -alias CARoot -import -file ca-cert -``` - -In contrast to the keystore, which stores each machine’s own identity, the truststore of a client stores all the certificates -that the client should trust. Importing a certificate into one’s truststore also means trusting all certificates that are signed -by that certificate. As the analogy above, trusting the government (CA) also means trusting all passports (certificates) that -it has issued. This attribute is called the chain of trust, and it is particularly useful when deploying TLS on a large BookKeeper cluster. -You can sign all certificates in the cluster with a single CA, and have all machines share the same truststore that trusts the CA. -That way all machines can authenticate all other machines. - -## Signing the certificate - -The next step is to sign all certificates in the keystore with the CA we generated. First, you need to export the certificate from the keystore: - -```shell -keytool -keystore bookie.keystore.jks -alias localhost -certreq -file cert-file -``` - -Then sign it with the CA: - -```shell -openssl x509 -req -CA ca-cert -CAkey ca-key -in cert-file -out cert-signed -days {validity} -CAcreateserial -passin pass:{ca-password} -``` - -Finally, you need to import both the certificate of the CA and the signed certificate into the keystore: - -```shell -keytool -keystore bookie.keystore.jks -alias CARoot -import -file ca-cert -keytool -keystore bookie.keystore.jks -alias localhost -import -file cert-signed -``` - -The definitions of the parameters are the following: - -1. `keystore`: the location of the keystore -2. `ca-cert`: the certificate of the CA -3. `ca-key`: the private key of the CA -4. `ca-password`: the passphrase of the CA -5. `cert-file`: the exported, unsigned certificate of the bookie -6. `cert-signed`: the signed certificate of the bookie - -(TBD: add a script to automatically generate truststores and keystores.) - -## Configuring Bookies - -Bookies support TLS for connections on the same service port. In order to enable TLS, you need to configure `tlsProvider` to be either -`JDK` or `OpenSSL`. If `OpenSSL` is configured, it will use `netty-tcnative-boringssl-static`, which loads a corresponding binding according -to the platforms to run bookies. - -> Current `OpenSSL` implementation doesn't depend on the system installed OpenSSL library. If you want to leverage the OpenSSL installed on -the system, you can check [this example](http://netty.io/wiki/forked-tomcat-native.html) on how to replaces the JARs on the classpath with -netty bindings to leverage installed OpenSSL. - -The following TLS configs are needed on the bookie side: - -```shell -tlsProvider=OpenSSL -# key store -tlsKeyStoreType=JKS -tlsKeyStore=/var/private/tls/bookie.keystore.jks -tlsKeyStorePasswordPath=/var/private/tls/bookie.keystore.passwd -# trust store -tlsTrustStoreType=JKS -tlsTrustStore=/var/private/tls/bookie.truststore.jks -tlsTrustStorePasswordPath=/var/private/tls/bookie.truststore.passwd -``` - -NOTE: it is important to restrict access to the store files and corresponding password files via filesystem permissions. - -Optional settings that are worth considering: - -1. tlsClientAuthentication=false: Enable/Disable using TLS for authentication. This config when enabled will authenticate the other end - of the communication channel. It should be enabled on both bookies and clients for mutual TLS. -2. tlsEnabledCipherSuites= A cipher suite is a named combination of authentication, encryption, MAC and key exchange - algorithm used to negotiate the security settings for a network connection using TLS network protocol. By default, - it is null. [OpenSSL Ciphers](https://www.openssl.org/docs/man1.0.2/apps/ciphers.html) - [JDK Ciphers](http://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html#ciphersuites) -3. tlsEnabledProtocols = TLSv1.2,TLSv1.1,TLSv1 (list out the TLS protocols that you are going to accept from clients). - By default, it is not set. - -To verify the bookie's keystore and truststore are setup correctly you can run the following command: - -```shell -openssl s_client -debug -connect localhost:3181 -tls1 -``` - -NOTE: TLSv1 should be listed under `tlsEnabledProtocols`. - -In the output of this command you should see the server's certificate: - -```shell ------BEGIN CERTIFICATE----- -{variable sized random bytes} ------END CERTIFICATE----- -``` - -If the certificate does not show up or if there are any other error messages then your keystore is not setup correctly. - -## Configuring Clients - -TLS is supported only for the new BookKeeper client (BookKeeper versions 4.5.0 and higher), the older clients are not -supported. The configs for TLS will be the same as bookies. - -If client authentication is not required by the bookies, the following is a minimal configuration example: - -```shell -tlsProvider=OpenSSL -clientTrustStore=/var/private/tls/client.truststore.jks -clientTrustStorePasswordPath=/var/private/tls/client.truststore.passwd -``` - -If client authentication is required, then a keystore must be created for each client, and the bookies' truststores must -trust the certificate in the client's keystore. This may be done using commands that are similar to what we used for -the [bookie keystore](#bookie-keystore). - -And the following must also be configured: - -```shell -tlsClientAuthentication=true -clientKeyStore=/var/private/tls/client.keystore.jks -clientKeyStorePasswordPath=/var/private/tls/client.keystore.passwd -``` - -NOTE: it is important to restrict access to the store files and corresponding password files via filesystem permissions. - -(TBD: add example to use tls in bin/bookkeeper script?) - -## Enabling TLS Logging - -You can enable TLS debug logging at the JVM level by starting the bookies and/or clients with `javax.net.debug` system property. For example: - -```shell --Djavax.net.debug=all -``` - -You can find more details on this in [Oracle documentation](http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/ReadDebug.html) on -[debugging SSL/TLS connections](http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/ReadDebug.html). diff --git a/site/docs/4.6.1/security/zookeeper.md b/site/docs/4.6.1/security/zookeeper.md deleted file mode 100644 index e16be69a1d3..00000000000 --- a/site/docs/4.6.1/security/zookeeper.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: ZooKeeper Authentication -prev: ../sasl ---- - -## New Clusters - -To enable `ZooKeeper` authentication on Bookies or Clients, there are two necessary steps: - -1. Create a `JAAS` login file and set the appropriate system property to point to it as described in [GSSAPI (Kerberos)](../sasl#notes). -2. Set the configuration property `zkEnableSecurity` in each bookie to `true`. - -The metadata stored in `ZooKeeper` is such that only certain clients will be able to modify and read the corresponding znodes. -The rationale behind this decision is that the data stored in ZooKeeper is not sensitive, but inappropriate manipulation of znodes can cause cluster -disruption. - -## Migrating Clusters - -If you are running a version of BookKeeper that does not support security or simply with security disabled, and you want to make the cluster secure, -then you need to execute the following steps to enable ZooKeeper authentication with minimal disruption to your operations. - -1. Perform a rolling restart setting the `JAAS` login file, which enables bookie or clients to authenticate. At the end of the rolling restart, - bookies (or clients) are able to manipulate znodes with strict ACLs, but they will not create znodes with those ACLs. -2. Perform a second rolling restart of bookies, this time setting the configuration parameter `zkEnableSecurity` to true, which enables the use - of secure ACLs when creating znodes. -3. Currently we don't have provide a tool to set acls on old znodes. You are recommended to set it manually using ZooKeeper tools. - -It is also possible to turn off authentication in a secured cluster. To do it, follow these steps: - -1. Perform a rolling restart of bookies setting the `JAAS` login file, which enable bookies to authenticate, but setting `zkEnableSecurity` to `false`. - At the end of rolling restart, bookies stop creating znodes with secure ACLs, but are still able to authenticate and manipulate all znodes. -2. You can use ZooKeeper tools to manually reset all ACLs under the znode set in `zkLedgersRootPath`, which defaults to `/ledgers`. -3. Perform a second rolling restart of bookies, this time omitting the system property that sets the `JAAS` login file. - -## Migrating the ZooKeeper ensemble - -It is also necessary to enable authentication on the `ZooKeeper` ensemble. To do it, we need to perform a rolling restart of the ensemble and -set a few properties. Please refer to the ZooKeeper documentation for more details. - -1. [Apache ZooKeeper Documentation](http://zookeeper.apache.org/doc/r3.4.6/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) -2. [Apache ZooKeeper Wiki](https://cwiki.apache.org/confluence/display/ZOOKEEPER/Zookeeper+and+SASL) diff --git a/site/docs/4.6.2/admin/autorecovery.md b/site/docs/4.6.2/admin/autorecovery.md deleted file mode 100644 index b1dd078f9b2..00000000000 --- a/site/docs/4.6.2/admin/autorecovery.md +++ /dev/null @@ -1,128 +0,0 @@ ---- -title: Using AutoRecovery ---- - -When a {% pop bookie %} crashes, all {% pop ledgers %} on that bookie become under-replicated. In order to bring all ledgers in your BookKeeper cluster back to full replication, you'll need to *recover* the data from any offline bookies. There are two ways to recover bookies' data: - -1. Using [manual recovery](#manual-recovery) -1. Automatically, using [*AutoRecovery*](#autorecovery) - -## Manual recovery - -You can manually recover failed bookies using the [`bookkeeper`](../../reference/cli) command-line tool. You need to specify: - -* the `shell recover` option -* an IP and port for your BookKeeper cluster's ZooKeeper ensemble -* the IP and port for the failed bookie - -Here's an example: - -```bash -$ bookkeeper-server/bin/bookkeeper shell recover \ - zk1.example.com:2181 \ # IP and port for ZooKeeper - 192.168.1.10:3181 # IP and port for the failed bookie -``` - -If you wish, you can also specify which bookie you'd like to rereplicate to. Here's an example: - -```bash -$ bookkeeper-server/bin/bookkeeper shell recover \ - zk1.example.com:2181 \ # IP and port for ZooKeeper - 192.168.1.10:3181 \ # IP and port for the failed bookie - 192.168.1.11:3181 # IP and port for the bookie to rereplicate to -``` - -### The manual recovery process - -When you initiate a manual recovery process, the following happens: - -1. The client (the process running ) reads the metadata of active ledgers from ZooKeeper. -1. The ledgers that contain fragments from the failed bookie in their ensemble are selected. -1. A recovery process is initiated for each ledger in this list and the rereplication process is run for each ledger. -1. Once all the ledgers are marked as fully replicated, bookie recovery is finished. - -## AutoRecovery - -AutoRecovery is a process that: - -* automatically detects when a {% pop bookie %} in your BookKeeper cluster has become unavailable and then -* rereplicates all the {% pop ledgers %} that were stored on that bookie. - -AutoRecovery can be run in two ways: - -1. On dedicated nodes in your BookKeeper cluster -1. On the same machines on which your bookies are running - -## Running AutoRecovery - -You can start up AutoRecovery using the [`autorecovery`](../../reference/cli#bookkeeper-autorecovery) command of the [`bookkeeper`](../../reference/cli) CLI tool. - -```bash -$ bookkeeper-server/bin/bookkeeper autorecovery -``` - -> The most important thing to ensure when starting up AutoRecovery is that the ZooKeeper connection string specified by the [`zkServers`](../../reference/config#zkServers) parameter points to the right ZooKeeper cluster. - -If you start up AutoRecovery on a machine that is already running a bookie, then the AutoRecovery process will run alongside the bookie on a separate thread. - -You can also start up AutoRecovery on a fresh machine if you'd like to create a dedicated cluster of AutoRecovery nodes. - -## Configuration - -There are a handful of AutoRecovery-related configs in the [`bk_server.conf`](../../reference/config) configuration file. For a listing of those configs, see [AutoRecovery settings](../../reference/config#autorecovery-settings). - -## Disable AutoRecovery - -You can disable AutoRecovery at any time, for example during maintenance. Disabling AutoRecovery ensures that bookies' data isn't unnecessarily rereplicated when the bookie is only taken down for a short period of time, for example when the bookie is being updated or the configuration if being changed. - -You can disable AutoRecover using the [`bookkeeper`](../../reference/cli#bookkeeper-shell-autorecovery) CLI tool: - -```bash -$ bookkeeper-server/bin/bookkeeper shell autorecovery -disable -``` - -Once disabled, you can reenable AutoRecovery using the [`enable`](../../reference/cli#bookkeeper-shell-autorecovery) shell command: - -```bash -$ bookkeeper-server/bin/bookkeeper shell autorecovery -enable -``` - -## AutoRecovery architecture - -AutoRecovery has two components: - -1. The [**auditor**](#auditor) (see the [`Auditor`](../../api/javadoc/org/apache/bookkeeper/replication/Auditor.html) class) is a singleton node that watches bookies to see if they fail and creates rereplication tasks for the ledgers on failed bookies. -1. The [**replication worker**](#replication-worker) (see the [`ReplicationWorker`](../../api/javadoc/org/apache/bookkeeper/replication/ReplicationWorker.html) class) runs on each bookie and executes rereplication tasks provided by the auditor. - -Both of these components run as threads in the [`AutoRecoveryMain`](../../api/javadoc/org/apache/bookkeeper/replication/AutoRecoveryMain) process, which runs on each bookie in the cluster. All recovery nodes participate in leader election---using ZooKeeper---to decide which node becomes the auditor. Nodes that fail to become the auditor watch the elected auditor and run an election process again if they see that the auditor node has failed. - -### Auditor - -The auditor watches all bookies in the cluster that are registered with ZooKeeper. Bookies register with ZooKeeper at startup. If the bookie crashes or is killed, the bookie's registration in ZooKeeper disappears and the auditor is notified of the change in the list of registered bookies. - -When the auditor sees that a bookie has disappeared, it immediately scans the complete {% pop ledger %} list to find ledgers that have data stored on the failed bookie. Once it has a list of ledgers for that bookie, the auditor will publish a rereplication task for each ledger under the `/underreplicated/` [znode](https://zookeeper.apache.org/doc/current/zookeeperOver.html) in ZooKeeper. - -### Replication Worker - -Each replication worker watches for tasks being published by the auditor on the `/underreplicated/` znode in ZooKeeper. When a new task appears, the replication worker will try to get a lock on it. If it cannot acquire the lock, it will try the next entry. The locks are implemented using ZooKeeper ephemeral znodes. - -The replication worker will scan through the rereplication task's ledger for fragments of which its local bookie is not a member. When it finds fragments matching this criterion, it will replicate the entries of that fragment to the local bookie. If, after this process, the ledger is fully replicated, the ledgers entry under /underreplicated/ is deleted, and the lock is released. If there is a problem replicating, or there are still fragments in the ledger which are still underreplicated (due to the local bookie already being part of the ensemble for the fragment), then the lock is simply released. - -If the replication worker finds a fragment which needs rereplication, but does not have a defined endpoint (i.e. the final fragment of a ledger currently being written to), it will wait for a grace period before attempting rereplication. If the fragment needing rereplication still does not have a defined endpoint, the ledger is fenced and rereplication then takes place. - -This avoids the situation in which a client is writing to a ledger and one of the bookies goes down, but the client has not written an entry to that bookie before rereplication takes place. The client could continue writing to the old fragment, even though the ensemble for the fragment had changed. This could lead to data loss. Fencing prevents this scenario from happening. In the normal case, the client will try to write to the failed bookie within the grace period, and will have started a new fragment before rereplication starts. - -You can configure this grace period using the [`openLedgerRereplicationGracePeriod`](../../reference/config#openLedgerRereplicationGracePeriod) parameter. - -### The rereplication process - -The ledger rereplication process happens in these steps: - -1. The client goes through all ledger fragments in the ledger, selecting those that contain the failed bookie. -1. A recovery process is initiated for each ledger fragment in this list. - 1. The client selects a bookie to which all entries in the ledger fragment will be replicated; In the case of autorecovery, this will always be the local bookie. - 1. The client reads entries that belong to the ledger fragment from other bookies in the ensemble and writes them to the selected bookie. - 1. Once all entries have been replicated, the zookeeper metadata for the fragment is updated to reflect the new ensemble. - 1. The fragment is marked as fully replicated in the recovery tool. -1. Once all ledger fragments are marked as fully replicated, the ledger is marked as fully replicated. - diff --git a/site/docs/4.6.2/admin/bookies.md b/site/docs/4.6.2/admin/bookies.md deleted file mode 100644 index 1b0427dae3c..00000000000 --- a/site/docs/4.6.2/admin/bookies.md +++ /dev/null @@ -1,180 +0,0 @@ ---- -title: BookKeeper administration -subtitle: A guide to deploying and administering BookKeeper ---- - -This document is a guide to deploying, administering, and maintaining BookKeeper. It also discusses [best practices](#best-practices) and [common problems](#common-problems). - -## Requirements - -A typical BookKeeper installation consists of an ensemble of {% pop bookies %} and a ZooKeeper quorum. The exact number of bookies depends on the quorum mode that you choose, desired throughput, and the number of clients using the installation simultaneously. - -The minimum number of bookies depends on the type of installation: - -* For *self-verifying* entries you should run at least three bookies. In this mode, clients store a message authentication code along with each {% pop entry %}. -* For *generic* entries you should run at least four - -There is no upper limit on the number of bookies that you can run in a single ensemble. - -### Performance - -To achieve optimal performance, BookKeeper requires each server to have at least two disks. It's possible to run a bookie with a single disk but performance will be significantly degraded. - -### ZooKeeper - -There is no constraint on the number of ZooKeeper nodes you can run with BookKeeper. A single machine running ZooKeeper in [standalone mode](https://zookeeper.apache.org/doc/current/zookeeperStarted.html#sc_InstallingSingleMode) is sufficient for BookKeeper, although for the sake of higher resilience we recommend running ZooKeeper in [quorum mode](https://zookeeper.apache.org/doc/current/zookeeperStarted.html#sc_RunningReplicatedZooKeeper) with multiple servers. - -## Starting and stopping bookies - -You can run bookies either in the foreground or in the background, using [nohup](https://en.wikipedia.org/wiki/Nohup). You can also run [local bookies](#local-bookie) for development purposes. - -To start a bookie in the foreground, use the [`bookie`](../../reference/cli#bookkeeper-bookie) command of the [`bookkeeper`](../../reference/cli#bookkeeper) CLI tool: - -```shell -$ bookkeeper-server/bin/bookkeeper bookie -``` - -To start a bookie in the background, use the [`bookkeeper-daemon.sh`](../../reference/cli#bookkeeper-daemon.sh) script and run `start bookie`: - -```shell -$ bookkeeper-server/bin/bookkeeper-daemon.sh start bookie -``` - -### Local bookies - -The instructions above showed you how to run bookies intended for production use. If you'd like to experiment with ensembles of bookies locally, you can use the [`localbookie`](../../reference/cli#bookkeeper-localbookie) command of the `bookkeeper` CLI tool and specify the number of bookies you'd like to run. - -This would spin up a local ensemble of 6 bookies: - -```shell -$ bookkeeper-server/bin/bookkeeper localbookie 6 -``` - -> When you run a local bookie ensemble, all bookies run in a single JVM process. - -## Configuring bookies - -There's a wide variety of parameters that you can set in the bookie configuration file in `bookkeeper-server/conf/bk_server.conf` of your [BookKeeper installation](../../reference/config). A full listing can be found in [Bookie configuration](../../reference/config). - -Some of the more important parameters to be aware of: - -Parameter | Description | Default -:---------|:------------|:------- -`bookiePort` | The TCP port that the bookie listens on | `3181` -`zkServers` | A comma-separated list of ZooKeeper servers in `hostname:port` format | `localhost:2181` -`journalDirectory` | The directory where the [log device](../../getting-started/concepts#log-device) stores the bookie's write-ahead log (WAL) | `/tmp/bk-txn` -`ledgerDirectories` | The directories where the [ledger device](../../getting-started/concepts#ledger-device) stores the bookie's ledger entries (as a comma-separated list) | `/tmp/bk-data` - -> Ideally, the directories specified `journalDirectory` and `ledgerDirectories` should be on difference devices. - -## Logging - -BookKeeper uses [slf4j](http://www.slf4j.org/) for logging, with [log4j](https://logging.apache.org/log4j/2.x/) bindings enabled by default. - -To enable logging for a bookie, create a `log4j.properties` file and point the `BOOKIE_LOG_CONF` environment variable to the configuration file. Here's an example: - -```shell -$ export BOOKIE_LOG_CONF=/some/path/log4j.properties -$ bookkeeper-server/bin/bookkeeper bookie -``` - -## Upgrading - -From time to time you may need to make changes to the filesystem layout of bookies---changes that are incompatible with previous versions of BookKeeper and require that directories used with previous versions are upgraded. If a filesystem upgrade is required when updating BookKeeper, the bookie will fail to start and return an error like this: - -``` -2017-05-25 10:41:50,494 - ERROR - [main:Bookie@246] - Directory layout version is less than 3, upgrade needed -``` - -BookKeeper provides a utility for upgrading the filesystem. You can perform an upgrade using the [`upgrade`](../../reference/cli#bookkeeper-upgrade) command of the `bookkeeper` CLI tool. When running `bookkeeper upgrade` you need to specify one of three flags: - -Flag | Action -:----|:------ -`--upgrade` | Performs an upgrade -`--rollback` | Performs a rollback to the initial filesystem version -`--finalize` | Marks the upgrade as complete - -### Upgrade pattern - -A standard upgrade pattern is to run an upgrade... - -```shell -$ bookkeeper-server/bin/bookkeeper upgrade --upgrade -``` - -...then check that everything is working normally, then kill the bookie. If everything is okay, finalize the upgrade... - -```shell -$ bookkeeper-server/bin/bookkeeper upgrade --finalize -``` - -...and then restart the server: - -```shell -$ bookkeeper-server/bin/bookkeeper bookie -``` - -If something has gone wrong, you can always perform a rollback: - -```shell -$ bookkeeper-server/bin/bookkeeper upgrade --rollback -``` - -## Formatting - -You can format bookie metadata in ZooKeeper using the [`metaformat`](../../reference/cli#bookkeeper-shell-metaformat) command of the [BookKeeper shell](../../reference/cli#the-bookkeeper-shell). - -By default, formatting is done in interactive mode, which prompts you to confirm the format operation if old data exists. You can disable confirmation using the `-nonInteractive` flag. If old data does exist, the format operation will abort *unless* you set the `-force` flag. Here's an example: - -```shell -$ bookkeeper-server/bin/bookkeeper shell metaformat -``` - -You can format the local filesystem data on a bookie using the [`bookieformat`](../../reference/cli#bookkeeper-shell-bookieformat) command on each bookie. Here's an example: - -```shell -$ bookkeeper-server/bin/bookkeeper shell bookieformat -``` - -> The `-force` and `-nonInteractive` flags are also available for the `bookieformat` command. - -## AutoRecovery - -For a guide to AutoRecovery in BookKeeper, see [this doc](../autorecovery). - -## Missing disks or directories - -Accidentally replacing disks or removing directories can cause a bookie to fail while trying to read a ledger fragment that, according to the ledger metadata, exists on the bookie. For this reason, when a bookie is started for the first time, its disk configuration is fixed for the lifetime of that bookie. Any change to its disk configuration, such as a crashed disk or an accidental configuration change, will result in the bookie being unable to start. That will throw an error like this: - -``` -2017-05-29 18:19:13,790 - ERROR - [main:BookieServer314] – Exception running bookie server : @ -org.apache.bookkeeper.bookie.BookieException$InvalidCookieException -.......at org.apache.bookkeeper.bookie.Cookie.verify(Cookie.java:82) -.......at org.apache.bookkeeper.bookie.Bookie.checkEnvironment(Bookie.java:275) -.......at org.apache.bookkeeper.bookie.Bookie.(Bookie.java:351) -``` - -If the change was the result of an accidental configuration change, the change can be reverted and the bookie can be restarted. However, if the change *cannot* be reverted, such as is the case when you want to add a new disk or replace a disk, the bookie must be wiped and then all its data re-replicated onto it. - -1. Increment the [`bookiePort`](../../reference/config#bookiePort) parameter in the [`bk_server.conf`](../../reference/config) -1. Ensure that all directories specified by [`journalDirectory`](../../reference/config#journalDirectory) and [`ledgerDirectories`](../../reference/config#ledgerDirectories) are empty. -1. [Start the bookie](#starting-and-stopping-bookies). -1. Run the following command to re-replicate the data: - - ```bash - $ bookkeeper-server/bin/bookkeeper shell recover \ - \ - \ - - ``` - - The ZooKeeper server, old bookie, and new bookie, are all identified by their external IP and `bookiePort` (3181 by default). Here's an example: - - ```bash - $ bookkeeper-server/bin/bookkeeper shell recover \ - zk1.example.com \ - 192.168.1.10:3181 \ - 192.168.1.10:3181 - ``` - - See the [AutoRecovery](../autorecovery) documentation for more info on the re-replication process. diff --git a/site/docs/4.6.2/admin/geo-replication.md b/site/docs/4.6.2/admin/geo-replication.md deleted file mode 100644 index 38b972345ef..00000000000 --- a/site/docs/4.6.2/admin/geo-replication.md +++ /dev/null @@ -1,22 +0,0 @@ ---- -title: Geo-replication -subtitle: Replicate data across BookKeeper clusters ---- - -*Geo-replication* is the replication of data across BookKeeper clusters. In order to enable geo-replication for a group of BookKeeper clusters, - -## Global ZooKeeper - -Setting up a global ZooKeeper quorum is a lot like setting up a cluster-specific quorum. The crucial difference is that - -### Geo-replication across three clusters - -Let's say that you want to set up geo-replication across clusters in regions A, B, and C. First, the BookKeeper clusters in each region must have their own local (cluster-specific) ZooKeeper quorum. - -> BookKeeper clusters use global ZooKeeper only for metadata storage. Traffic from bookies to ZooKeeper should thus be fairly light in general. - -The crucial difference between using cluster-specific ZooKeeper and global ZooKeeper is that {% pop bookies %} is that you need to point all bookies to use the global ZooKeeper setup. - -## Region-aware placement polocy - -## Autorecovery diff --git a/site/docs/4.6.2/admin/http.md b/site/docs/4.6.2/admin/http.md deleted file mode 100644 index 0097adc62b8..00000000000 --- a/site/docs/4.6.2/admin/http.md +++ /dev/null @@ -1,394 +0,0 @@ ---- -title: BookKeeper Admin REST API ---- - -This document introduces BookKeeper HTTP endpoints, which can be used for BookKeeper administration. -To use this feature, set `httpServerEnabled` to `true` in file `conf/bk_server.conf`. - -## All the endpoints - -Currently all the HTTP endpoints could be divided into these 4 components: -1. Heartbeat: heartbeat for a specific bookie. -1. Config: doing the server configuration for a specific bookie. -1. Ledger: HTTP endpoints related to ledgers. -1. Bookie: HTTP endpoints related to bookies. -1. AutoRecovery: HTTP endpoints related to auto recovery. - -## Heartbeat - -### Endpoint: /heartbeat -* Method: GET -* Description: Get heartbeat status for a specific bookie -* Response: - -| Code | Description | -|:-------|:------------| -|200 | Successful operation | - -## Config - -### Endpoint: /api/v1/config/server_config -1. Method: GET - * Description: Get value of all configured values overridden on local server config - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | -1. Method: PUT - * Description: Update a local server config - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |configName | String | Yes | Configuration name(key) | - |configValue | String | Yes | Configuration value(value) | - * Body: - ```json - { - "configName1": "configValue1", - "configName2": "configValue2" - } - ``` - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -## Ledger - -### Endpoint: /api/v1/ledger/delete/?ledger_id=<ledger_id> -1. Method: DELETE - * Description: Delete a ledger. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |ledger_id | Long | Yes | ledger id of the ledger. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -### Endpoint: /api/v1/ledger/list/?print_metadata=<metadata> -1. Method: GET - * Description: List all the ledgers. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |print_metadata | Boolean | No | whether print out metadata | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "ledgerId1": "ledgerMetadata1", - "ledgerId2": "ledgerMetadata2", - ... - } - ``` - -### Endpoint: /api/v1/ledger/metadata/?ledger_id=<ledger_id> -1. Method: GET - * Description: Get the metadata of a ledger. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |ledger_id | Long | Yes | ledger id of the ledger. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "ledgerId1": "ledgerMetadata1" - } - ``` - -### Endpoint: /api/v1/ledger/read/?ledger_id=<ledger_id>&start_entry_id=<start_entry_id>&end_entry_id=<end_entry_id> -1. Method: GET - * Description: Read a range of entries from ledger. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |ledger_id | Long | Yes| ledger id of the ledger. | - |start_entry_id | Long | No | start entry id of read range. | - |end_entry_id | Long | No | end entry id of read range. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "entryId1": "entry content 1", - "entryId2": "entry content 2", - ... - } - ``` - -## Bookie - -### Endpoint: /api/v1/bookie/list_bookies/?type=<type>&print_hostnames=<hostnames> -1. Method: GET - * Description: Get all the available bookies. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |type | String | Yes | value: "rw" or "ro" , list read-write/read-only bookies. | - |print_hostnames | Boolean | No | whether print hostname of bookies. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "bookieSocketAddress1": "hostname1", - "bookieSocketAddress2": "hostname2", - ... - } - ``` - -### Endpoint: /api/v1/bookie/list_bookie_info -1. Method: GET - * Description: Get bookies disk usage info of this cluster. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "bookieAddress" : {free: xxx, total: xxx}, - "bookieAddress" : {free: xxx, total: xxx}, - ... - "clusterInfo" : {total_free: xxx, total: xxx} - } - ``` - -### Endpoint: /api/v1/bookie/last_log_mark -1. Method: GET - * Description: Get the last log marker. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - JournalId1 : position1, - JournalId2 : position2, - ... - } - ``` - -### Endpoint: /api/v1/bookie/list_disk_file/?file_type=<type> -1. Method: GET - * Description: Get all the files on disk of current bookie. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |type | String | No | file type: journal/entrylog/index. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "journal files" : "filename1 filename2 ...", - "entrylog files" : "filename1 filename2...", - "index files" : "filename1 filename2 ..." - } - ``` - -### Endpoint: /api/v1/bookie/expand_storage -1. Method: PUT - * Description: Expand storage for a bookie. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -## Auto recovery - -### Endpoint: /api/v1/autorecovery/bookie/ -1. Method: PUT - * Description: Ledger data recovery for failed bookie - * Body: - ```json - { - "bookie_src": [ "bookie_src1", "bookie_src2"... ], - "bookie_dest": [ "bookie_dest1", "bookie_dest2"... ], - "delete_cookie": - } - ``` - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |bookie_src | Strings | Yes | bookie source to recovery | - |bookie_dest | Strings | No | bookie data recovery destination | - |delete_cookie | Boolean | No | Whether delete cookie | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -### Endpoint: /api/v1/autorecovery/list_under_replicated_ledger/?missingreplica=<bookie_address>&excludingmissingreplica=<bookie_address> -1. Method: GET - * Description: Get all under replicated ledgers. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |missingreplica | String | No | missing replica bookieId | - |excludingmissingreplica | String | No | exclude missing replica bookieId | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - [ledgerId1, ledgerId2...] - } - ``` - -### Endpoint: /api/v1/autorecovery/who_is_auditor -1. Method: GET - * Description: Get auditor bookie id. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "Auditor": "hostname/hostAddress:Port" - } - ``` - -### Endpoint: /api/v1/autorecovery/trigger_audit -1. Method: PUT - * Description: Force trigger audit by resting the lostBookieRecoveryDelay. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -### Endpoint: /api/v1/autorecovery/lost_bookie_recovery_delay -1. Method: GET - * Description: Get lostBookieRecoveryDelay value in seconds. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -1. Method: PUT - * Description: Set lostBookieRecoveryDelay value in seconds. - * Body: - ```json - { - "delay_seconds": - } - ``` - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - | delay_seconds | Long | Yes | set delay value in seconds. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -### Endpoint: /api/v1/autorecovery/decommission -1. Method: PUT - * Description: Decommission Bookie, Force trigger Audit task and make sure all the ledgers stored in the decommissioning bookie are replicated. - * Body: - ```json - { - "bookie_src": - } - ``` - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - | bookie_src | String | Yes | Bookie src to decommission.. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | diff --git a/site/docs/4.6.2/admin/metrics.md b/site/docs/4.6.2/admin/metrics.md deleted file mode 100644 index 142df3dcd2d..00000000000 --- a/site/docs/4.6.2/admin/metrics.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: Metric collection ---- - -BookKeeper enables metrics collection through a variety of [stats providers](#stats-providers). - -> For a full listing of available metrics, see the [Metrics](../../reference/metrics) reference doc. - -## Stats providers - -BookKeeper has stats provider implementations for four five sinks: - -Provider | Provider class name -:--------|:------------------- -[Codahale Metrics](https://mvnrepository.com/artifact/org.apache.bookkeeper.stats/codahale-metrics-provider) | `org.apache.bookkeeper.stats.CodahaleMetricsProvider` -[Prometheus](https://prometheus.io/) | `org.apache.bookkeeper.stats.prometheus.PrometheusMetricsProvider` -[Finagle](https://twitter.github.io/finagle/guide/Metrics.html) | `org.apache.bookkeeper.stats.FinagleStatsProvider` -[Ostrich](https://github.com/twitter/ostrich) | `org.apache.bookkeeper.stats.OstrichProvider` -[Twitter Science Provider](https://mvnrepository.com/artifact/org.apache.bookkeeper.stats/twitter-science-provider) | `org.apache.bookkeeper.stats.TwitterStatsProvider` - -> The [Codahale Metrics]({{ site.github_master }}/bookkeeper-stats-providers/codahale-metrics-provider) stats provider is the default provider. - -## Enabling stats providers in bookies - -There are two stats-related [configuration parameters](../../reference/config#statistics) available for bookies: - -Parameter | Description | Default -:---------|:------------|:------- -`enableStatistics` | Whether statistics are enabled for the bookie | `false` -`statsProviderClass` | The stats provider class used by the bookie | `org.apache.bookkeeper.stats.CodahaleMetricsProvider` - - -To enable stats: - -* set the `enableStatistics` parameter to `true` -* set `statsProviderClass` to the desired provider (see the [table above](#stats-providers) for a listing of classes) - - diff --git a/site/docs/4.6.2/admin/perf.md b/site/docs/4.6.2/admin/perf.md deleted file mode 100644 index 82956326e5d..00000000000 --- a/site/docs/4.6.2/admin/perf.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: Performance tuning ---- diff --git a/site/docs/4.6.2/admin/placement.md b/site/docs/4.6.2/admin/placement.md deleted file mode 100644 index ded456e1aea..00000000000 --- a/site/docs/4.6.2/admin/placement.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: Customized placement policies ---- diff --git a/site/docs/4.6.2/admin/upgrade.md b/site/docs/4.6.2/admin/upgrade.md deleted file mode 100644 index 1aa84a4ab40..00000000000 --- a/site/docs/4.6.2/admin/upgrade.md +++ /dev/null @@ -1,76 +0,0 @@ ---- -title: Upgrade ---- - -> If you have questions about upgrades (or need help), please feel free to reach out to us by [mailing list]({{ site.baseurl }}community/mailing-lists) or [Slack Channel]({{ site.baseurl }}community/slack). - -## Overview - -Consider the below guidelines in preparation for upgrading. - -- Always back up all your configuration files before upgrading. -- Read through the documentation and draft an upgrade plan that matches your specific requirements and environment before starting the upgrade process. - Put differently, don't start working through the guide on a live cluster. Read guide entirely, make a plan, then execute the plan. -- Pay careful consideration to the order in which components are upgraded. In general, you need to upgrade bookies first and then upgrade your clients. -- If autorecovery is running along with bookies, you need to pay attention to the upgrade sequence. -- Read the release notes carefully for each release. They contain not only information about noteworthy features, but also changes to configurations - that may impact your upgrade. -- Always upgrade one or a small set of bookies to canary new version before upgraing all bookies in your cluster. - -## Canary - -It is wise to canary an upgraded version in one or small set of bookies before upgrading all bookies in your live cluster. - -You can follow below steps on how to canary a upgraded version: - -1. Stop a Bookie. -2. Upgrade the binary and configuration. -3. Start the Bookie in `ReadOnly` mode. This can be used to verify if the Bookie of this new version can run well for read workload. -4. Once the Bookie is running at `ReadOnly` mode successfully for a while, restart the Bookie in `Write/Read` mode. -5. After step 4, the Bookie will serve both write and read traffic. - -### Rollback Canaries - -If problems occur during canarying an upgraded version, you can simply take down the problematic Bookie node. The remain bookies in the old cluster -will repair this problematic bookie node by autorecovery. Nothing needs to be worried about. - -## Upgrade Steps - -Once you determined a version is safe to upgrade in a few nodes in your cluster, you can perform following steps to upgrade all bookies in your cluster. - -1. Determine if autorecovery is running along with bookies. If yes, check if the clients (either new clients with new binary or old clients with new configurations) -are allowed to talk to old bookies; if clients are not allowed to talk to old bookies, please [disable autorecovery](../../reference/cli/#autorecovery-1) during upgrade. -2. Decide on performing a rolling upgrade or a downtime upgrade. -3. Upgrade all Bookies (more below) -4. If autorecovery was disabled during upgrade, [enable autorecovery](../../reference/cli/#autorecovery-1). -5. After all bookies are upgraded, build applications that use `BookKeeper client` against the new bookkeeper libraries and deploy the new versions. - -### Upgrade Bookies - -In a rolling upgrade scenario, upgrade one Bookie at a time. In a downtime upgrade scenario, take the entire cluster down, upgrade each Bookie, then start the cluster. - -For each Bookie: - -1. Stop the bookie. -2. Upgrade the software (either new binary or new configuration) -2. Start the bookie. - -## Upgrade Guides - -We describes the general upgrade method in Apache BookKeeper as above. We will cover the details for individual versions. - -### 4.5.x to 4.6.x upgrade - -There isn't any protocol related backward compabilities changes in 4.6.x. So you can follow the general upgrade sequence to upgrade from 4.5.x to 4.6.x. - -### 4.4.x to 4.5.x upgrade - -There isn't any protocol related backward compabilities changes in 4.5.0. So you can follow the general upgrade sequence to upgrade from 4.4.x to 4.5.x. -However, we list a list of things that you might want to know. - -1. 4.5.x upgrades netty from 3.x to 4.x. The memory usage pattern might be changed a bit. Netty 4 uses more direct memory. Please pay attention to your memory usage - and adjust the JVM settings accordingly. -2. `multi journals` is a non-rollbackable feature. If you configure a bookie to use multiple journals on 4.5.x you can not roll the bookie back to use 4.4.x. You have - to take a bookie out and recover it if you want to rollback to 4.4.x. - -If you are planning to upgrade a non-secured cluster to a secured cluster enabling security features in 4.5.0, please read [BookKeeper Security](../../security/overview) for more details. diff --git a/site/docs/4.6.2/api/distributedlog-api.md b/site/docs/4.6.2/api/distributedlog-api.md deleted file mode 100644 index 85462452691..00000000000 --- a/site/docs/4.6.2/api/distributedlog-api.md +++ /dev/null @@ -1,395 +0,0 @@ ---- -title: DistributedLog -subtitle: A higher-level API for managing BookKeeper entries ---- - -> DistributedLog began its life as a separate project under the Apache Foundation. It was merged into BookKeeper in 2017. - -The DistributedLog API is an easy-to-use interface for managing BookKeeper entries that enables you to use BookKeeper without needing to interact with [ledgers](../ledger-api) directly. - -DistributedLog (DL) maintains sequences of records in categories called *logs* (aka *log streams*). *Writers* append records to DL logs, while *readers* fetch and process those records. - -## Architecture - -The diagram below illustrates how the DistributedLog API works with BookKeeper: - -![DistributedLog API]({{ site.baseurl }}img/distributedlog.png) - -## Logs - -A *log* in DistributedLog is an ordered, immutable sequence of *log records*. - -The diagram below illustrates the anatomy of a log stream: - -![DistributedLog log]({{ site.baseurl }}img/logs.png) - -### Log records - -Each log record is a sequence of bytes. Applications are responsible for serializing and deserializing byte sequences stored in log records. - -Log records are written sequentially into a *log stream* and assigned with a a unique sequence number called a DLSN (DistributedLog Sequence Number). - -In addition to a DLSN, applications can assign their own sequence number when constructing log records. Application-defined sequence numbers are known as *TransactionIDs* (or *txid*). Either a DLSN or a TransactionID can be used for positioning readers to start reading from a specific log record. - -### Log segments - -Each log is broken down into *log segments* that contain subsets of records. Log segments are distributed and stored in BookKeeper. DistributedLog rolls the log segments based on the configured *rolling policy*, which be either - -* a configurable period of time (such as every 2 hours), or -* a configurable maximum size (such as every 128 MB). - -The data in logs is divided up into equally sized log segments and distributed evenly across {% pop bookies %}. This allows logs to scale beyond a size that would fit on a single server and spreads read traffic across the cluster. - -### Namespaces - -Log streams that belong to the same organization are typically categorized and managed under a *namespace*. DistributedLog namespaces essentially enable applications to locate log streams. Applications can perform the following actions under a namespace: - -* create streams -* delete streams -* truncate streams to a given sequence number (either a DLSN or a TransactionID) - -## Writers - -Through the DistributedLog API, writers write data into logs of their choice. All records are appended into logs in order. The sequencing is performed by the writer, which means that there is only one active writer for a log at any given time. - -DistributedLog guarantees correctness when two writers attempt to write to the same log when a network partition occurs using a *fencing* mechanism in the log segment store. - -### Write Proxy - -Log writers are served and managed in a service tier called the *Write Proxy* (see the diagram [above](#architecture)). The Write Proxy is used for accepting writes from a large number of clients. - -## Readers - -DistributedLog readers read records from logs of their choice, starting with a provided position. The provided position can be either a DLSN or a TransactionID. - -Readers read records from logs in strict order. Different readers can read records from different positions in the same log. - -Unlike other pub-sub systems, DistributedLog doesn't record or manage readers' positions. This means that tracking is the responsibility of applications, as different applications may have different requirements for tracking and coordinating positions. This is hard to get right with a single approach. Distributed databases, for example, might store reader positions along with SSTables, so they would resume applying transactions from the positions store in SSTables. Tracking reader positions could easily be done at the application level using various stores (such as ZooKeeper, the filesystem, or key-value stores). - -### Read Proxy - -Log records can be cached in a service tier called the *Read Proxy* to serve a large number of readers. See the diagram [above](#architecture). The Read Proxy is the analogue of the [Write Proxy](#write-proxy). - -## Guarantees - -The DistributedLog API for BookKeeper provides a number of guarantees for applications: - -* Records written by a [writer](#writers) to a [log](#logs) are appended in the order in which they are written. If a record **R1** is written by the same writer as a record **R2**, **R1** will have a smaller sequence number than **R2**. -* [Readers](#readers) see [records](#log-records) in the same order in which they are [written](#writers) to the log. -* All records are persisted on disk by BookKeeper before acknowledgements, which guarantees durability. -* For a log with a replication factor of N, DistributedLog tolerates up to N-1 server failures without losing any records. - -## API - -Documentation for the DistributedLog API can be found [here](https://bookkeeper.apache.org/distributedlog/docs/latest/user_guide/api/core). - -> At a later date, the DistributedLog API docs will be added here. - - diff --git a/site/docs/4.6.2/api/ledger-adv-api.md b/site/docs/4.6.2/api/ledger-adv-api.md deleted file mode 100644 index f46950dd984..00000000000 --- a/site/docs/4.6.2/api/ledger-adv-api.md +++ /dev/null @@ -1,82 +0,0 @@ ---- -title: The Advanced Ledger API ---- - -In release `4.5.0`, Apache BookKeeper introduces a few advanced API for advanced usage. -This sections covers these advanced APIs. - -> Before learn the advanced API, please read [Ledger API](../ledger-api) first. - -## LedgerHandleAdv - -[`LedgerHandleAdv`](../javadoc/org/apache/bookkeeper/client/LedgerHandleAdv) is an advanced extension of [`LedgerHandle`](../javadoc/org/apache/bookkeeper/client/LedgerHandle). -It allows user passing in an `entryId` when adding an entry. - -### Creating advanced ledgers - -Here's an exmaple: - -```java -byte[] passwd = "some-passwd".getBytes(); -LedgerHandleAdv handle = bkClient.createLedgerAdv( - 3, 3, 2, // replica settings - DigestType.CRC32, - passwd); -``` - -You can also create advanced ledgers asynchronously. - -```java -class LedgerCreationCallback implements AsyncCallback.CreateCallback { - public void createComplete(int returnCode, LedgerHandle handle, Object ctx) { - System.out.println("Ledger successfully created"); - } -} -client.asyncCreateLedgerAdv( - 3, // ensemble size - 3, // write quorum size - 2, // ack quorum size - BookKeeper.DigestType.CRC32, - password, - new LedgerCreationCallback(), - "some context" -); -``` - -Besides the APIs above, BookKeeper allows users providing `ledger-id` when creating advanced ledgers. - -```java -long ledgerId = ...; // the ledger id is generated externally. - -byte[] passwd = "some-passwd".getBytes(); -LedgerHandleAdv handle = bkClient.createLedgerAdv( - ledgerId, // ledger id generated externally - 3, 3, 2, // replica settings - DigestType.CRC32, - passwd); -``` - -> Please note, it is users' responsibility to provide a unique ledger id when using the API above. -> If a ledger already exists when users try to create an advanced ledger with same ledger id, -> a [LedgerExistsException](../javadoc/org/apache/bookkeeper/client/BKException.BKLedgerExistException.html) is thrown by the bookkeeper client. - -### Add Entries - -The normal [add entries api](ledger-api/#adding-entries-to-ledgers) in advanced ledgers are disabled. Instead, when users want to add entries -to advanced ledgers, an entry id is required to pass in along with the entry data when adding an entry. - -```java -long entryId = ...; // entry id generated externally - -ledger.addEntry(entryId, "Some entry data".getBytes()); -``` - -A few notes when using this API: - -- The entry id has to be non-negative. -- Clients are okay to add entries out of order. -- However, the entries are only acknowledged in a monotonic order starting from 0. - -### Read Entries - -The read entries api in advanced ledgers remain same as [normal ledgers](../ledger-api/#reading-entries-from-ledgers). diff --git a/site/docs/4.6.2/api/ledger-api.md b/site/docs/4.6.2/api/ledger-api.md deleted file mode 100644 index 2007e71a02f..00000000000 --- a/site/docs/4.6.2/api/ledger-api.md +++ /dev/null @@ -1,810 +0,0 @@ ---- -title: The Ledger API ---- - -The ledger API is a lower-level API for BookKeeper that enables you to interact with {% pop ledgers %} directly. - -## The Java ledger API client - -To get started with the Java client for BookKeeper, install the `bookkeeper-server` library as a dependency in your Java application. - -> For a more in-depth tutorial that involves a real use case for BookKeeper, see the [Example application](../example-application) guide. - -## Installation - -The BookKeeper Java client library is available via [Maven Central](http://search.maven.org/) and can be installed using [Maven](#maven), [Gradle](#gradle), and other build tools. - -### Maven - -If you're using [Maven](https://maven.apache.org/), add this to your [`pom.xml`](https://maven.apache.org/guides/introduction/introduction-to-the-pom.html) build configuration file: - -```xml - -4.6.2 - - - - org.apache.bookkeeper - bookkeeper-server - ${bookkeeper.version} - -``` - -BookKeeper uses google [protobuf](https://github.com/google/protobuf/tree/master/java) and [guava](https://github.com/google/guava) libraries -a lot. If your application might include different versions of protobuf or guava introduced by other dependencies, you can choose to use the -shaded library, which relocate classes of protobuf and guava into a different namespace to avoid conflicts. - -You can use the shaded artifact of `bookkeeper-server`. Please note that [maven-shade-plugin](https://maven.apache.org/plugins/maven-shade-plugin) doesn't generate -a dependency-reduced pom file for shaded artifact using [shadedArtifactAttached](https://maven.apache.org/plugins/maven-shade-plugin/examples/attached-artifact.html). You need to manually to exclude relocated packages when using the shaded artifact. Full example of how to use this is -showed as below. - -```xml - -4.6.2 - - - - org.apache.bookkeeper - bookkeeper-server - ${bookkeeper.version} - shaded - - - org.apache.bookkeeper - bookkeeper-common - - - org.apache.bookkeeper - bookkeeper-proto - - - -``` - -Or you can use a separate shaded artifact `bookkeeper-server-shaded`. - -```xml - -4.6.2 - - - - org.apache.bookkeeper - bookkeeper-server-shaded - ${bookkeeper.version} - -``` - -### Gradle - -If you're using [Gradle](https://gradle.org/), add this to your [`build.gradle`](https://spring.io/guides/gs/gradle/) build configuration file: - -```groovy -dependencies { - compile group: 'org.apache.bookkeeper', name: 'bookkeeper-server', version: '4.6.2' -} - -// Alternatively: -dependencies { - compile 'org.apache.bookkeeper:bookkeeper-server:4.6.2' -} -``` - -Similarly as using maven, you can also configure to use the shaded jars. - -```groovy -// use the shaded artifact of `bookkeeper-server` jar -dependencies { - compile ('org.apache.bookkeeper:bookkeeper-server:{{ site.latest-version }}:shaded') { - exclude group: 'org.apache.bookkeeper', module: "bookkeeper-common' - exclude group: 'org.apache.bookkeeper', module: 'bookkeeper-proto' - } -} - - -// use the `bookkeeper-server-shaded` jar -dependencies { - compile 'org.apache.bookkeeper:bookkeeper-server-shaded:{{ site.latest-version }}' -} -``` - -## Connection string - -When interacting with BookKeeper using the Java client, you need to provide your client with a connection string, for which you have three options: - -* Provide your entire ZooKeeper connection string, for example `zk1:2181,zk2:2181,zk3:2181`. -* Provide a host and port for one node in your ZooKeeper cluster, for example `zk1:2181`. In general, it's better to provide a full connection string (in case the ZooKeeper node you attempt to connect to is down). -* If your ZooKeeper cluster can be discovered via DNS, you can provide the DNS name, for example `my-zookeeper-cluster.com`. - -## Creating a new client - -In order to create a new [`BookKeeper`](../javadoc/org/apache/bookkeeper/client/BookKeeper) client object, you need to pass in a [connection string](#connection-string). Here is an example client object using a ZooKeeper connection string: - -```java -try { - String connectionString = "127.0.0.1:2181"; // For a single-node, local ZooKeeper cluster - BookKeeper bkClient = new BookKeeper(connectionString); -} catch (InterruptedException | IOException | KeeperException e) { - e.printStackTrace(); -} -``` - -> If you're running BookKeeper [locally](../../getting-started/run-locally), using the [`localbookie`](../../reference/cli#bookkeeper-localbookie) command, use `"127.0.0.1:2181"` for your connection string, as in the example above. - -There are, however, other ways that you can create a client object: - -* By passing in a [`ClientConfiguration`](../javadoc/org/apache/bookkeeper/conf/ClientConfiguration) object. Here's an example: - - ```java - ClientConfiguration config = new ClientConfiguration(); - config.setZkServers(zkConnectionString); - config.setAddEntryTimeout(2000); - BookKeeper bkClient = new BookKeeper(config); - ``` - -* By specifying a `ClientConfiguration` and a [`ZooKeeper`](http://zookeeper.apache.org/doc/current/api/org/apache/zookeeper/ZooKeeper.html) client object: - - ```java - ClientConfiguration config = new ClientConfiguration(); - config.setAddEntryTimeout(5000); - ZooKeeper zkClient = new ZooKeeper(/* client args */); - BookKeeper bkClient = new BookKeeper(config, zkClient); - ``` - -* Using the `forConfig` method: - - ```java - BookKeeper bkClient = BookKeeper.forConfig(conf).build(); - ``` - -## Creating ledgers - -The easiest way to create a {% pop ledger %} using the Java client is via the `createLedger` method, which creates a new ledger synchronously and returns a [`LedgerHandle`](../javadoc/org/apache/bookkeeper/client/LedgerHandle). You must specify at least a [`DigestType`](../javadoc/org/apache/bookkeeper/client/BookKeeper.DigestType) and a password. - -Here's an example: - -```java -byte[] password = "some-password".getBytes(); -LedgerHandle handle = bkClient.createLedger(BookKeeper.DigestType.MAC, password); -``` - -You can also create ledgers asynchronously - -### Create ledgers asynchronously - -```java -class LedgerCreationCallback implements AsyncCallback.CreateCallback { - public void createComplete(int returnCode, LedgerHandle handle, Object ctx) { - System.out.println("Ledger successfully created"); - } -} - -client.asyncCreateLedger( - 3, - 2, - BookKeeper.DigestType.MAC, - password, - new LedgerCreationCallback(), - "some context" -); -``` - -## Adding entries to ledgers - -```java -long entryId = ledger.addEntry("Some entry data".getBytes()); -``` - -### Add entries asynchronously - -## Reading entries from ledgers - -```java -Enumerator entries = handle.readEntries(1, 99); -``` - -To read all possible entries from the ledger: - -```java -Enumerator entries = - handle.readEntries(0, handle.getLastAddConfirmed()); - -while (entries.hasNextElement()) { - LedgerEntry entry = entries.nextElement(); - System.out.println("Successfully read entry " + entry.getId()); -} -``` - -### Reading entries after the LastAddConfirmed range - -`readUnconfirmedEntries` allowing to read after the LastAddConfirmed range. -It lets the client read without checking the local value of LastAddConfirmed, so that it is possible to read entries for which the writer has not received the acknowledge yet -For entries which are within the range 0..LastAddConfirmed BookKeeper guarantees that the writer has successfully received the acknowledge. -For entries outside that range it is possible that the writer never received the acknowledge and so there is the risk that the reader is seeing entries before the writer and this could result in a consistency issue in some cases. -With this method you can even read entries before the LastAddConfirmed and entries after it with one call, the expected consistency will be as described above. - -```java -Enumerator entries = - handle.readUnconfirmedEntries(0, lastEntryIdExpectedToRead); - -while (entries.hasNextElement()) { - LedgerEntry entry = entries.nextElement(); - System.out.println("Successfully read entry " + entry.getId()); -} -``` - -## Deleting ledgers - -{% pop Ledgers %} can also be deleted synchronously or asynchronously. - -```java -long ledgerId = 1234; - -try { - bkClient.deleteLedger(ledgerId); -} catch (Exception e) { - e.printStackTrace(); -} -``` - -### Delete entries asynchronously - -Exceptions thrown: - -* - -```java -class DeleteEntryCallback implements AsyncCallback.DeleteCallback { - public void deleteComplete() { - System.out.println("Delete completed"); - } -} -``` - -## Simple example - -> For a more involved BookKeeper client example, see the [example application](#example-application) below. - -In the code sample below, a BookKeeper client: - -* creates a ledger -* writes entries to the ledger -* closes the ledger (meaning no further writes are possible) -* re-opens the ledger for reading -* reads all available entries - -```java -// Create a client object for the local ensemble. This -// operation throws multiple exceptions, so make sure to -// use a try/catch block when instantiating client objects. -BookKeeper bkc = new BookKeeper("localhost:2181"); - -// A password for the new ledger -byte[] ledgerPassword = /* some sequence of bytes, perhaps random */; - -// Create a new ledger and fetch its identifier -LedgerHandle lh = bkc.createLedger(BookKeeper.DigestType.MAC, ledgerPassword); -long ledgerId = lh.getId(); - -// Create a buffer for four-byte entries -ByteBuffer entry = ByteBuffer.allocate(4); - -int numberOfEntries = 100; - -// Add entries to the ledger, then close it -for (int i = 0; i < numberOfEntries; i++){ - entry.putInt(i); - entry.position(0); - lh.addEntry(entry.array()); -} -lh.close(); - -// Open the ledger for reading -lh = bkc.openLedger(ledgerId, BookKeeper.DigestType.MAC, ledgerPassword); - -// Read all available entries -Enumeration entries = lh.readEntries(0, numberOfEntries - 1); - -while(entries.hasMoreElements()) { - ByteBuffer result = ByteBuffer.wrap(ls.nextElement().getEntry()); - Integer retrEntry = result.getInt(); - - // Print the integer stored in each entry - System.out.println(String.format("Result: %s", retrEntry)); -} - -// Close the ledger and the client -lh.close(); -bkc.close(); -``` - -Running this should return this output: - -```shell -Result: 0 -Result: 1 -Result: 2 -# etc -``` - -## Example application - -This tutorial walks you through building an example application that uses BookKeeper as the replicated log. The application uses the [BookKeeper Java client](../java-client) to interact with BookKeeper. - -> The code for this tutorial can be found in [this GitHub repo](https://github.com/ivankelly/bookkeeper-tutorial/). The final code for the `Dice` class can be found [here](https://github.com/ivankelly/bookkeeper-tutorial/blob/master/src/main/java/org/apache/bookkeeper/Dice.java). - -### Setup - -Before you start, you will need to have a BookKeeper cluster running locally on your machine. For installation instructions, see [Installation](../../getting-started/installation). - -To start up a cluster consisting of six {% pop bookies %} locally: - -```shell -$ bookkeeper-server/bin/bookkeeper localbookie 6 -``` - -You can specify a different number of bookies if you'd like. - -### Goal - -The goal of the dice application is to have - -* multiple instances of this application, -* possibly running on different machines, -* all of which display the exact same sequence of numbers. - -In other words, the log needs to be both durable and consistent, regardless of how many {% pop bookies %} are participating in the BookKeeper ensemble. If one of the bookies crashes or becomes unable to communicate with the other bookies in any way, it should *still* display the same sequence of numbers as the others. This tutorial will show you how to achieve this. - -To begin, download the base application, compile and run it. - -```shell -$ git clone https://github.com/ivankelly/bookkeeper-tutorial.git -$ mvn package -$ mvn exec:java -Dexec.mainClass=org.apache.bookkeeper.Dice -``` - -That should yield output that looks something like this: - -``` -[INFO] Scanning for projects... -[INFO] -[INFO] ------------------------------------------------------------------------ -[INFO] Building tutorial 1.0-SNAPSHOT -[INFO] ------------------------------------------------------------------------ -[INFO] -[INFO] --- exec-maven-plugin:1.3.2:java (default-cli) @ tutorial --- -[WARNING] Warning: killAfter is now deprecated. Do you need it ? Please comment on MEXEC-6. -Value = 4 -Value = 5 -Value = 3 -``` - -### The base application - -The application in this tutorial is a dice application. The `Dice` class below has a `playDice` function that generates a random number between 1 and 6 every second, prints the value of the dice roll, and runs indefinitely. - -```java -public class Dice { - Random r = new Random(); - - void playDice() throws InterruptedException { - while (true) { - Thread.sleep(1000); - System.out.println("Value = " + (r.nextInt(6) + 1)); - } - } -} -``` - -When you run the `main` function of this class, a new `Dice` object will be instantiated and then run indefinitely: - -```java -public class Dice { - // other methods - - public static void main(String[] args) throws InterruptedException { - Dice d = new Dice(); - d.playDice(); - } -} -``` - -### Leaders and followers (and a bit of background) - -To achieve this common view in multiple instances of the program, we need each instance to agree on what the next number in the sequence will be. For example, the instances must agree that 4 is the first number and 2 is the second number and 5 is the third number and so on. This is a difficult problem, especially in the case that any instance may go away at any time, and messages between the instances can be lost or reordered. - -Luckily, there are already algorithms to solve this. Paxos is an abstract algorithm to implement this kind of agreement, while Zab and Raft are more practical protocols. This video gives a good overview about how these algorithms usually look. They all have a similar core. - -It would be possible to run the Paxos to agree on each number in the sequence. However, running Paxos each time can be expensive. What Zab and Raft do is that they use a Paxos-like algorithm to elect a leader. The leader then decides what the sequence of events should be, putting them in a log, which the other instances can then follow to maintain the same state as the leader. - -Bookkeeper provides the functionality for the second part of the protocol, allowing a leader to write events to a log and have multiple followers tailing the log. However, bookkeeper does not do leader election. You will need a zookeeper or raft instance for that purpose. - -### Why not just use ZooKeeper? - -There are a number of reasons: - -1. Zookeeper's log is only exposed through a tree like interface. It can be hard to shoehorn your application into this. -2. A zookeeper ensemble of multiple machines is limited to one log. You may want one log per resource, which will become expensive very quickly. -3. Adding extra machines to a zookeeper ensemble does not increase capacity nor throughput. - -Bookkeeper can be seen as a means of exposing ZooKeeper's replicated log to applications in a scalable fashion. ZooKeeper is still used by BookKeeper, however, to maintain consistency guarantees, though clients don't need to interact with ZooKeeper directly. - -### Electing a leader - -We'll use zookeeper to elect a leader. A zookeeper instance will have started locally when you started the localbookie application above. To verify it's running, run the following command. - -```shell -$ echo stat | nc localhost 2181 -Zookeeper version: 3.4.6-1569965, built on 02/20/2014 09:09 GMT -Clients: - /127.0.0.1:59343[1](queued=0,recved=40,sent=41) - /127.0.0.1:49354[1](queued=0,recved=11,sent=11) - /127.0.0.1:49361[0](queued=0,recved=1,sent=0) - /127.0.0.1:59344[1](queued=0,recved=38,sent=39) - /127.0.0.1:59345[1](queued=0,recved=38,sent=39) - /127.0.0.1:59346[1](queued=0,recved=38,sent=39) - -Latency min/avg/max: 0/0/23 -Received: 167 -Sent: 170 -Connections: 6 -Outstanding: 0 -Zxid: 0x11 -Mode: standalone -Node count: 16 -``` - -To interact with zookeeper, we'll use the Curator client rather than the stock zookeeper client. Getting things right with the zookeeper client can be tricky, and curator removes a lot of the pointy corners for you. In fact, curator even provides a leader election recipe, so we need to do very little work to get leader election in our application. - -```java -public class Dice extends LeaderSelectorListenerAdapter implements Closeable { - - final static String ZOOKEEPER_SERVER = "127.0.0.1:2181"; - final static String ELECTION_PATH = "/dice-elect"; - - ... - - Dice() throws InterruptedException { - curator = CuratorFrameworkFactory.newClient(ZOOKEEPER_SERVER, - 2000, 10000, new ExponentialBackoffRetry(1000, 3)); - curator.start(); - curator.blockUntilConnected(); - - leaderSelector = new LeaderSelector(curator, ELECTION_PATH, this); - leaderSelector.autoRequeue(); - leaderSelector.start(); - } -``` - -In the constructor for Dice, we need to create the curator client. We specify four things when creating the client, the location of the zookeeper service, the session timeout, the connect timeout and the retry policy. - -The session timeout is a zookeeper concept. If the zookeeper server doesn't hear anything from the client for this amount of time, any leases which the client holds will be timed out. This is important in leader election. For leader election, the curator client will take a lease on ELECTION_PATH. The first instance to take the lease will become leader and the rest will become followers. However, their claim on the lease will remain in the cue. If the first instance then goes away, due to a crash etc., its session will timeout. Once the session times out, the lease will be released and the next instance in the queue will become the leader. The call to autoRequeue() will make the client queue itself again if it loses the lease for some other reason, such as if it was still alive, but it a garbage collection cycle caused it to lose its session, and thereby its lease. I've set the lease to be quite low so that when we test out leader election, transitions will be quite quick. The optimum length for session timeout depends very much on the use case. The other parameters are the connection timeout, i.e. the amount of time it will spend trying to connect to a zookeeper server before giving up, and the retry policy. The retry policy specifies how the client should respond to transient errors, such as connection loss. Operations that fail with transient errors can be retried, and this argument specifies how often the retries should occur. - -Finally, you'll have noticed that Dice now extends LeaderSelectorListenerAdapter and implements Closeable. Closeable is there to close the resource we have initialized in the constructor, the curator client and the leaderSelector. LeaderSelectorListenerAdapter is a callback that the leaderSelector uses to notify the instance that it is now the leader. It is passed as the third argument to the LeaderSelector constructor. - -```java - @Override - public void takeLeadership(CuratorFramework client) - throws Exception { - synchronized (this) { - leader = true; - try { - while (true) { - this.wait(); - } - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - leader = false; - } - } - } -``` - -takeLeadership() is the callback called by LeaderSelector when the instance is leader. It should only return when the instance wants to give up leadership. In our case, we never do so we wait on the current object until we're interrupted. To signal to the rest of the program that we are leader we set a volatile boolean called leader to true. This is unset after we are interrupted. - -```java - void playDice() throws InterruptedException { - while (true) { - while (leader) { - Thread.sleep(1000); - System.out.println("Value = " + (r.nextInt(6) + 1) - + ", isLeader = " + leader); - } - } - } -``` - -Finally, we modify the `playDice` function to only generate random numbers when it is the leader. - -Run two instances of the program in two different terminals. You'll see that one becomes leader and prints numbers and the other just sits there. - -Now stop the leader using Control-Z. This will pause the process, but it won't kill it. You will be dropped back to the shell in that terminal. After a couple of seconds, the session timeout, you will see that the other instance has become the leader. Zookeeper will guarantee that only one instance is selected as leader at any time. - -Now go back to the shell that the original leader was on and wake up the process using fg. You'll see something like the following: - -```shell -... -... -Value = 4, isLeader = true -Value = 4, isLeader = true -^Z -[1]+ Stopped mvn exec:java -Dexec.mainClass=org.apache.bookkeeper.Dice -$ fg -mvn exec:java -Dexec.mainClass=org.apache.bookkeeper.Dice -Value = 3, isLeader = true -Value = 1, isLeader = false -``` - -## New API - -Since 4.6 BookKeeper provides a new client API which leverages Java8 [CompletableFuture](https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/CompletableFuture.html) facility. -[WriteHandle](../javadoc/org/apache/bookkeeper/client/api/WriteHandle), [WriteAdvHandle](../javadoc/org/apache/bookkeeper/client/api/WriteAdvHandle), [ReadHandle](../javadoc/org/apache/bookkeeper/client/api/ReadHandle) are introduced for replacing the generic [LedgerHandle](../javadoc/org/apache/bookkeeper/client/LedgerHandle). - -> All the new API now is available in `org.apache.bookkeeper.client.api`. You should only use interfaces defined in this package. - -*Beware* that this API in 4.6 is still experimental API and can be subject to changes in next minor releases. - -### Create a new client - -In order to create a new [`BookKeeper`](../javadoc/org/apache/bookkeeper/client/api/BookKeeper) client object, you need to construct a [`ClientConfiguration`](../javadoc/org/apache/bookkeeper/conf/ClientConfiguration) object and set a [connection string](#connection-string) first, and then use [`BookKeeperBuilder`](../javadoc/org/apache/bookkeeper/client/api/BookKeeperBuilder) to build the client. - -Here is an example building the bookkeeper client. - -```java -// construct a client configuration instance -ClientConfiguration conf = new ClientConfiguration(); -conf.setZkServers(zkConnectionString); -conf.setZkLedgersRootPath("/path/to/ledgers/root"); - -// build the bookkeeper client -BookKeeper bk = BookKeeper.newBuilder(conf) - .statsLogger(...) - ... - .build(); - -``` - -### Create ledgers - -the easiest way to create a {% pop ledger %} using the java client is via the [`createbuilder`](../javadoc/org/apache/bookkeeper/client/api/createbuilder). you must specify at least -a [`digesttype`](../javadoc/org/apache/bookkeeper/client/api/digesttype) and a password. - -here's an example: - -```java -BookKeeper bk = ...; - -byte[] password = "some-password".getBytes(); - -WriteHandle wh = bk.newCreateLedgerOp() - .withDigestType(DigestType.CRC32) - .withPassword(password) - .withEnsembleSize(3) - .withWriteQuorumSize(3) - .withAckQuorumSize(2) - .execute() // execute the creation op - .get(); // wait for the execution to complete -``` - -A [`WriteHandle`](../javadoc/org/apache/bookkeeper/client/api/WriteHandle) is returned for applications to write and read entries to and from the ledger. - -### Append entries to ledgers - -The [`WriteHandle`](../javadoc/org/apache/bookkeeper/client/api/WriteHandle) can be used for applications to append entries to the ledgers. - -```java -WriteHandle wh = ...; - -CompletableFuture addFuture = wh.append("Some entry data".getBytes()); - -// option 1: you can wait for add to complete synchronously -try { - long entryId = FutureUtils.result(addFuture.get()); -} catch (BKException bke) { - // error handling -} - -// option 2: you can process the result and exception asynchronously -addFuture - .thenApply(entryId -> { - // process the result - }) - .exceptionally(cause -> { - // handle the exception - }) - -// option 3: bookkeeper provides a twitter-future-like event listener for processing result and exception asynchronously -addFuture.whenComplete(new FutureEventListener() { - @Override - public void onSuccess(long entryId) { - // process the result - } - @Override - public void onFailure(Throwable cause) { - // handle the exception - } -}); -``` - -The append method supports three representations of a bytes array: the native java `byte[]`, java nio `ByteBuffer` and netty `ByteBuf`. -It is recommended to use `ByteBuf` as it is more gc friendly. - -### Open ledgers - -You can open ledgers to read entries. Opening ledgers is done by [`openBuilder`](../javadoc/org/apache/bookkeeper/client/api/openBuilder). You must specify the ledgerId and the password -in order to open the ledgers. - -here's an example: - -```java -BookKeeper bk = ...; - -long ledgerId = ...; -byte[] password = "some-password".getBytes(); - -ReadHandle rh = bk.newOpenLedgerOp() - .withLedgerId(ledgerId) - .withPassword(password) - .execute() // execute the open op - .get(); // wait for the execution to complete -``` - -A [`ReadHandle`](../javadoc/org/apache/bookkeeper/client/api/ReadHandle) is returned for applications to read entries to and from the ledger. - -#### Recovery vs NoRecovery - -By default, the [`openBuilder`](../javadoc/org/apache/bookkeeper/client/api/openBuilder) opens the ledger in a `NoRecovery` mode. You can open the ledger in `Recovery` mode by specifying -`withRecovery(true)` in the open builder. - -```java -BookKeeper bk = ...; - -long ledgerId = ...; -byte[] password = "some-password".getBytes(); - -ReadHandle rh = bk.newOpenLedgerOp() - .withLedgerId(ledgerId) - .withPassword(password) - .withRecovery(true) - .execute() - .get(); - -``` - -**What is the difference between "Recovery" and "NoRecovery"?** - -If you are opening a ledger in "Recovery" mode, it will basically fence and seal the ledger -- no more entries are allowed -to be appended to it. The writer which is currently appending entries to the ledger will fail with [`LedgerFencedException`](../javadoc/org/apache/bookkeeper/client/api/BKException.Code#LedgerFencedException). - -In constrat, opening a ledger in "NoRecovery" mode, it will not fence and seal the ledger. "NoRecovery" mode is usually used by applications to tailing-read from a ledger. - -### Read entries from ledgers - -The [`ReadHandle`](../javadoc/org/apache/bookkeeper/client/api/ReadHandle) returned from the open builder can be used for applications to read entries from the ledgers. - -```java -ReadHandle rh = ...; - -long startEntryId = ...; -long endEntryId = ...; -CompletableFuture readFuture = rh.read(startEntryId, endEntryId); - -// option 1: you can wait for read to complete synchronously -try { - LedgerEntries entries = FutureUtils.result(readFuture.get()); -} catch (BKException bke) { - // error handling -} - -// option 2: you can process the result and exception asynchronously -readFuture - .thenApply(entries -> { - // process the result - }) - .exceptionally(cause -> { - // handle the exception - }) - -// option 3: bookkeeper provides a twitter-future-like event listener for processing result and exception asynchronously -readFuture.whenComplete(new FutureEventListener<>() { - @Override - public void onSuccess(LedgerEntries entries) { - // process the result - } - @Override - public void onFailure(Throwable cause) { - // handle the exception - } -}); -``` - -Once you are done with processing the [`LedgerEntries`](../javadoc/org/apache/bookkeeper/client/api/LedgerEntries), you can call `#close()` on the `LedgerEntries` instance to -release the buffers held by it. - -Applications are allowed to read any entries between `0` and [`LastAddConfirmed`](../javadoc/org/apache/bookkeeper/client/api/ReadHandle.html#getLastAddConfirmed). If the applications -attempts to read entries beyond `LastAddConfirmed`, they will receive [`IncorrectParameterException`](../javadoc/org/apache/bookkeeper/client/api/BKException.Code#IncorrectParameterException). - -### Read unconfirmed entries from ledgers - -`readUnconfirmed` is provided the mechanism for applications to read entries beyond `LastAddConfirmed`. Applications should be aware of `readUnconfirmed` doesn't provide any -repeatable read consistency. - -```java -CompletableFuture readFuture = rh.readUnconfirmed(startEntryId, endEntryId); -``` - -### Tailing Reads - -There are two methods for applications to achieve tailing reads: `Polling` and `Long-Polling`. - -#### Polling - -You can do this in synchronous way: - -```java -ReadHandle rh = ...; - -long startEntryId = 0L; -long nextEntryId = startEntryId; -int numEntriesPerBatch = 4; -while (!rh.isClosed() || nextEntryId <= rh.getLastAddConfirmed()) { - long lac = rh.getLastAddConfirmed(); - if (nextEntryId > lac) { - // no more entries are added - Thread.sleep(1000); - - lac = rh.readLastAddConfirmed().get(); - continue; - } - - long endEntryId = Math.min(lac, nextEntryId + numEntriesPerBatch - 1); - LedgerEntries entries = rh.read(nextEntryId, endEntryId).get(); - - // process the entries - - nextEntryId = endEntryId + 1; -} -``` - -#### Long Polling - -```java -ReadHandle rh = ...; - -long startEntryId = 0L; -long nextEntryId = startEntryId; -int numEntriesPerBatch = 4; -while (!rh.isClosed() || nextEntryId <= rh.getLastAddConfirmed()) { - long lac = rh.getLastAddConfirmed(); - if (nextEntryId > lac) { - // no more entries are added - try (LastConfirmedAndEntry lacAndEntry = rh.readLastAddConfirmedAndEntry(nextEntryId, 1000, false).get()) { - if (lacAndEntry.hasEntry()) { - // process the entry - - ++nextEntryId; - } - } - } else { - long endEntryId = Math.min(lac, nextEntryId + numEntriesPerBatch - 1); - LedgerEntries entries = rh.read(nextEntryId, endEntryId).get(); - - // process the entries - nextEntryId = endEntryId + 1; - } -} -``` - -### Delete ledgers - -{% pop Ledgers %} can be deleted by using [`DeleteBuilder`](../javadoc/org/apache/bookkeeper/client/api/DeleteBuilder). - -```java -BookKeeper bk = ...; -long ledgerId = ...; - -bk.newDeleteLedgerOp() - .withLedgerId(ledgerId) - .execute() - .get(); -``` diff --git a/site/docs/4.6.2/api/overview.md b/site/docs/4.6.2/api/overview.md deleted file mode 100644 index 3eb649273c1..00000000000 --- a/site/docs/4.6.2/api/overview.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: BookKeeper API ---- - -BookKeeper offers a few APIs that applications can use to interact with it: - -* The [ledger API](../ledger-api) is a lower-level API that enables you to interact with {% pop ledgers %} directly -* The [Ledger Advanced API)(../ledger-adv-api) is an advanced extension to [Ledger API](../ledger-api) to provide more flexibilities to applications. -* The [DistributedLog API](../distributedlog-api) is a higher-level API that provides convenient abstractions. - -## Trade-offs - -The `Ledger API` provides direct access to ledgers and thus enables you to use BookKeeper however you'd like. - -However, in most of use cases, if you want a `log stream`-like abstraction, it requires you to manage things like tracking list of ledgers, -managing rolling ledgers and data retention on your own. In such cases, you are recommended to use [DistributedLog API](../distributedlog-api), -with semantics resembling continous log streams from the standpoint of applications. diff --git a/site/docs/4.6.2/deployment/dcos.md b/site/docs/4.6.2/deployment/dcos.md deleted file mode 100644 index 90bf75cd660..00000000000 --- a/site/docs/4.6.2/deployment/dcos.md +++ /dev/null @@ -1,142 +0,0 @@ ---- -title: Deploying BookKeeper on DC/OS -subtitle: Get up and running easily on an Apache Mesos cluster -logo: img/dcos-logo.png ---- - -[DC/OS](https://dcos.io/) (the DataCenter Operating System) is a distributed operating system used for deploying and managing applications and systems on [Apache Mesos](http://mesos.apache.org/). DC/OS is an open-source tool created and maintained by [Mesosphere](https://mesosphere.com/). - -BookKeeper is available as a [DC/OS package](http://universe.dcos.io/#/package/bookkeeper/version/latest) from the [Mesosphere DC/OS Universe](http://universe.dcos.io/#/packages). - -## Prerequisites - -In order to run BookKeeper on DC/OS, you will need: - -* DC/OS version [1.8](https://dcos.io/docs/1.8/) or higher -* A DC/OS cluster with at least three nodes -* The [DC/OS CLI tool](https://dcos.io/docs/1.8/usage/cli/install/) installed - -Each node in your DC/OS-managed Mesos cluster must have at least: - -* 1 CPU -* 1 GB of memory -* 10 GB of total persistent disk storage - -## Installing BookKeeper - -```shell -$ dcos package install bookkeeper --yes -``` - -This command will: - -* Install the `bookkeeper` subcommand for the `dcos` CLI tool -* Start a single {% pop bookie %} on the Mesos cluster with the [default configuration](../../reference/config) - -The bookie that is automatically started up uses the host mode of the network and by default exports the service at `agent_ip:3181`. - -> If you run `dcos package install bookkeeper` without setting the `--yes` flag, the install will run in interactive mode. For more information on the `package install` command, see the [DC/OS docs](https://docs.mesosphere.com/latest/cli/command-reference/dcos-package/dcos-package-install/). - -### Services - -To watch BookKeeper start up, click on the **Services** tab in the DC/OS [user interface](https://docs.mesosphere.com/latest/gui/) and you should see the `bookkeeper` package listed: - -![DC/OS services]({{ site.baseurl }}img/dcos/services.png) - -### Tasks - -To see which tasks have started, click on the `bookkeeper` service and you'll see an interface that looks like this; - -![DC/OS tasks]({{ site.baseurl }}img/dcos/tasks.png) - -## Scaling BookKeeper - -Once the first {% pop bookie %} has started up, you can click on the **Scale** tab to scale up your BookKeeper ensemble by adding more bookies (or scale down the ensemble by removing bookies). - -![DC/OS scale]({{ site.baseurl }}img/dcos/scale.png) - -## ZooKeeper Exhibitor - -ZooKeeper contains the information for all bookies in the ensemble. When deployed on DC/OS, BookKeeper uses a ZooKeeper instance provided by DC/OS. You can access a visual UI for ZooKeeper using [Exhibitor](https://github.com/soabase/exhibitor/wiki), which is available at [http://master.dcos/exhibitor](http://master.dcos/exhibitor). - -![ZooKeeper Exhibitor]({{ site.baseurl }}img/dcos/exhibitor.png) - -You should see a listing of IP/host information for all bookies under the `messaging/bookkeeper/ledgers/available` node. - -## Client connections - -To connect to bookies running on DC/OS using clients running within your Mesos cluster, you need to specify the ZooKeeper connection string for DC/OS's ZooKeeper cluster: - -``` -master.mesos:2181 -``` - -This is the *only* ZooKeeper host/port you need to include in your connection string. Here's an example using the [Java client](../../api/ledger-api#the-java-ledger-api-client): - -```java -BookKeeper bkClient = new BookKeeper("master.mesos:2181"); -``` - -If you're connecting using a client running outside your Mesos cluster, you need to supply the public-facing connection string for your DC/OS ZooKeeper cluster. - -## Configuring BookKeeper - -By default, the `bookkeeper` package will start up a BookKeeper ensemble consisting of one {% pop bookie %} with one CPU, 1 GB of memory, and a 70 MB persistent volume. - -You can supply a non-default configuration when installing the package using a JSON file. Here's an example command: - -```shell -$ dcos package install bookkeeper \ - --options=/path/to/config.json -``` - -You can then fetch the current configuration for BookKeeper at any time using the `package describe` command: - -```shell -$ dcos package describe bookkeeper \ - --config -``` - -### Available parameters - -> Not all [configurable parameters](../../reference/config) for BookKeeper are available for BookKeeper on DC/OS. Only the parameters show in the table below are available. - -Param | Type | Description | Default -:-----|:-----|:------------|:------- -`name` | String | The name of the DC/OS service. | `bookkeeper` -`cpus` | Integer | The number of CPU shares to allocate to each {% pop bookie %}. The minimum is 1. | `1` | -`instances` | Integer | The number of {% pop bookies %} top run. The minimum is 1. | `1` -`mem` | Number | The memory, in MB, to allocate to each BookKeeper task | `1024.0` (1 GB) -`volume_size` | Number | The persistent volume size, in MB | `70` -`zk_client` | String | The connection string for the ZooKeeper client instance | `master.mesos:2181` -`service_port` | Integer | The BookKeeper export service port, using `PORT0` in Marathon | `3181` - -### Example JSON configuration - -Here's an example JSON configuration object for BookKeeper on DC/OS: - -```json -{ - "instances": 5, - "cpus": 3, - "mem": 2048.0, - "volume_size": 250 -} -``` - -If that configuration were stored in a file called `bk-config.json`, you could apply that configuration upon installating the BookKeeper package using this command: - -```shell -$ dcos package install bookkeeper \ - --options=./bk-config.json -``` - -## Uninstalling BookKeeper - -You can shut down and uninstall the `bookkeeper` from DC/OS at any time using the `package uninstall` command: - -```shell -$ dcos package uninstall bookkeeper -Uninstalled package [bookkeeper] version [4.6.2] -Thank you for using bookkeeper. -``` diff --git a/site/docs/4.6.2/deployment/kubernetes.md b/site/docs/4.6.2/deployment/kubernetes.md deleted file mode 100644 index 0f113169edc..00000000000 --- a/site/docs/4.6.2/deployment/kubernetes.md +++ /dev/null @@ -1,181 +0,0 @@ ---- -title: Deploying Apache BookKeeper on Kubernetes -tags: [Kubernetes, Google Container Engine] -logo: img/kubernetes-logo.png ---- - -Apache BookKeeper can be easily deployed in [Kubernetes](https://kubernetes.io/) clusters. The managed clusters on [Google Container Engine](https://cloud.google.com/compute/) is the most convenient way. - -The deployment method shown in this guide relies on [YAML](http://yaml.org/) definitions for Kubernetes [resources](https://kubernetes.io/docs/resources-reference/v1.6/). The [`kubernetes`](https://github.com/apache/bookkeeper/tree/master/deploy/kubernetes) subdirectory holds resource definitions for: - -* A three-node ZooKeeper cluster -* A BookKeeper cluster with a bookie runs on each node. - -## Setup on Google Container Engine - -To get started, get source code of [`kubernetes`](https://github.com/apache/bookkeeper/tree/master/deploy/kubernetes) from github by git clone. - -If you'd like to change the number of bookies, or ZooKeeper nodes in your BookKeeper cluster, modify the `replicas` parameter in the `spec` section of the appropriate [`Deployment`](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/) or [`StatefulSet`](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/) resource. - -[Google Container Engine](https://cloud.google.com/container-engine) (GKE) automates the creation and management of Kubernetes clusters in [Google Compute Engine](https://cloud.google.com/compute/) (GCE). - -### Prerequisites - -To get started, you'll need: - -* A Google Cloud Platform account, which you can sign up for at [cloud.google.com](https://cloud.google.com) -* An existing Cloud Platform project -* The [Google Cloud SDK](https://cloud.google.com/sdk/downloads) (in particular the [`gcloud`](https://cloud.google.com/sdk/gcloud/) and [`kubectl`]() tools). - -### Create a new Kubernetes cluster - -You can create a new GKE cluster using the [`container clusters create`](https://cloud.google.com/sdk/gcloud/reference/container/clusters/create) command for `gcloud`. This command enables you to specify the number of nodes in the cluster, the machine types of those nodes, and more. - -As an example, we'll create a new GKE cluster for Kubernetes version [1.6.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG.md#v164) in the [us-central1-a](https://cloud.google.com/compute/docs/regions-zones/regions-zones#available) zone. The cluster will be named `bookkeeper-gke-cluster` and will consist of three VMs, each using two locally attached SSDs and running on [n1-standard-8](https://cloud.google.com/compute/docs/machine-types) machines. These SSDs will be used by Bookie instances, one for the BookKeeper journal and the other for storing the actual data. - -```bash -$ gcloud config set compute/zone us-central1-a -$ gcloud config set project your-project-name -$ gcloud container clusters create bookkeeper-gke-cluster \ - --machine-type=n1-standard-8 \ - --num-nodes=3 \ - --local-ssd-count=2 \ - --enable-kubernetes-alpha -``` - -By default, bookies will run on all the machines that have locally attached SSD disks. In this example, all of those machines will have two SSDs, but you can add different types of machines to the cluster later. You can control which machines host bookie servers using [labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels). - -### Dashboard - -You can observe your cluster in the [Kubernetes Dashboard](https://kubernetes.io/docs/tasks/access-application-cluster/web-ui-dashboard/) by downloading the credentials for your Kubernetes cluster and opening up a proxy to the cluster: - -```bash -$ gcloud container clusters get-credentials bookkeeper-gke-cluster \ - --zone=us-central1-a \ - --project=your-project-name -$ kubectl proxy -``` - -By default, the proxy will be opened on port 8001. Now you can navigate to [localhost:8001/ui](http://localhost:8001/ui) in your browser to access the dashboard. At first your GKE cluster will be empty, but that will change as you begin deploying. - -When you create a cluster, your `kubectl` config in `~/.kube/config` (on MacOS and Linux) will be updated for you, so you probably won't need to change your configuration. Nonetheless, you can ensure that `kubectl` can interact with your cluster by listing the nodes in the cluster: - -```bash -$ kubectl get nodes -``` - -If `kubectl` is working with your cluster, you can proceed to deploy ZooKeeper and Bookies. - -### ZooKeeper - -You *must* deploy ZooKeeper as the first component, as it is a dependency for the others. - -```bash -$ kubectl apply -f zookeeper.yaml -``` - -Wait until all three ZooKeeper server pods are up and have the status `Running`. You can check on the status of the ZooKeeper pods at any time: - -```bash -$ kubectl get pods -l component=zookeeper -NAME READY STATUS RESTARTS AGE -zk-0 1/1 Running 0 18m -zk-1 1/1 Running 0 17m -zk-2 0/1 Running 6 15m -``` - -This step may take several minutes, as Kubernetes needs to download the Docker image on the VMs. - - -If you want to connect to one of the remote zookeeper server, you can use[zk-shell](https://github.com/rgs1/zk_shell), you need to forward a local port to the -remote zookeeper server: - -```bash -$ kubectl port-forward zk-0 2181:2181 -$ zk-shell localhost 2181 -``` - -### Deploy Bookies - -Once ZooKeeper cluster is Running, you can then deploy the bookies. You can deploy the bookies either using a [DaemonSet](https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/) or a [StatefulSet](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/). - -> NOTE: _DaemonSet_ vs _StatefulSet_ -> -> A _DaemonSet_ ensures that all (or some) nodes run a pod of bookie instance. As nodes are added to the cluster, bookie pods are added automatically to them. As nodes are removed from the -> cluster, those bookie pods are garbage collected. The bookies deployed in a DaemonSet stores data on the local disks on those nodes. So it doesn't require any external storage for Persistent -> Volumes. -> -> A _StatefulSet_ maintains a sticky identity for the pods that it runs and manages. It provides stable and unique network identifiers, and stable and persistent storage for each pod. The pods -> are not interchangeable, the idenifiers for each pod are maintained across any rescheduling. -> -> Which one to use? A _DaemonSet_ is the easiest way to deploy a bookkeeper cluster, because it doesn't require additional persistent volume provisioner and use local disks. BookKeeper manages -> the data replication. It maintains the best latency property. However, it uses `hostIP` and `hostPort` for communications between pods. In some k8s platform (such as DC/OS), `hostIP` and -> `hostPort` are not well supported. A _StatefulSet_ is only practical when deploying in a cloud environment or any K8S installation that has persistent volumes available. Also be aware, latency -> can be potentially higher when using persistent volumes, because there is usually built-in replication in the persistent volumes. - -```bash -# deploy bookies in a daemon set -$ kubectl apply -f bookkeeper.yaml - -# deploy bookies in a stateful set -$ kubectl apply -f bookkeeper.stateful.yaml -``` - -You can check on the status of the Bookie pods for these components either in the Kubernetes Dashboard or using `kubectl`: - -```bash -$ kubectl get pods -``` - -While all BookKeeper pods is Running, by zk-shell you could find all available bookies under /ledgers/ - -You could also run a [bookkeeper tutorial](https://github.com/ivankelly/bookkeeper-tutorial/) instance, which named as 'dice' here, in this bookkeeper cluster. - -```bash -$kubectl run -i --tty --attach dice --image=caiok/bookkeeper-tutorial --env ZOOKEEPER_SERVERS="zk-0.zookeeper" -``` - -An example output of Dice instance is like this: -```aidl -➜ $ kubectl run -i --tty --attach dice --image=caiok/bookkeeper-tutorial --env ZOOKEEPER_SERVERS="zk-0.zookeeper" -If you don't see a command prompt, try pressing enter. -Value = 1, epoch = 5, leading -Value = 2, epoch = 5, leading -Value = 1, epoch = 5, leading -Value = 4, epoch = 5, leading -Value = 5, epoch = 5, leading -Value = 4, epoch = 5, leading -Value = 3, epoch = 5, leading -Value = 5, epoch = 5, leading -Value = 3, epoch = 5, leading -Value = 2, epoch = 5, leading -Value = 1, epoch = 5, leading -Value = 4, epoch = 5, leading -Value = 2, epoch = 5, leading -``` - -### Un-Deploy - -Delete Demo dice instance - -```bash -$kubectl delete deployment dice -``` - -Delete BookKeeper -```bash -$ kubectl delete -f bookkeeper.yaml -``` - -Delete ZooKeeper -```bash -$ kubectl delete -f zookeeper.yaml -``` - -Delete cluster -```bash -$ gcloud container clusters delete bookkeeper-gke-cluster -``` - - - diff --git a/site/docs/4.6.2/deployment/manual.md b/site/docs/4.6.2/deployment/manual.md deleted file mode 100644 index daafd5556f5..00000000000 --- a/site/docs/4.6.2/deployment/manual.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -title: Manual deployment ---- - -The easiest way to deploy BookKeeper is using schedulers like [DC/OS](../dcos), but you can also deploy BookKeeper clusters manually. A BookKeeper cluster consists of two main components: - -* A [ZooKeeper](#zookeeper-setup) cluster that is used for configuration- and coordination-related tasks -* An [ensemble](#starting-up-bookies) of {% pop bookies %} - -## ZooKeeper setup - -We won't provide a full guide to setting up a ZooKeeper cluster here. We recommend that you consult [this guide](https://zookeeper.apache.org/doc/current/zookeeperAdmin.html) in the official ZooKeeper documentation. - -## Starting up bookies - -Once your ZooKeeper cluster is up and running, you can start up as many {% pop bookies %} as you'd like to form a cluster. Before starting up each bookie, you need to modify the bookie's configuration to make sure that it points to the right ZooKeeper cluster. - -On each bookie host, you need to [download](../../getting-started/installation#download) the BookKeeper package as a tarball. Once you've done that, you need to configure the bookie by setting values in the `bookkeeper-server/conf/bk_server.conf` config file. The one parameter that you will absolutely need to change is the [`zkServers`](../../config#zkServers) parameter, which you will need to set to the ZooKeeper connection string for your ZooKeeper cluster. Here's an example: - -```properties -zkServers=100.0.0.1:2181,100.0.0.2:2181,100.0.0.3:2181 -``` - -> A full listing of configurable parameters available in `bookkeeper-server/conf/bk_server.conf` can be found in the [Configuration](../../reference/config) reference manual. - -Once the bookie's configuration is set, you can start it up using the [`bookie`](../../reference/cli#bookkeeper-bookie) command of the [`bookkeeper`](../../reference/cli#bookkeeper) CLI tool: - -```shell -$ bookkeeper-server/bin/bookkeeper bookie -``` - -> You can also build BookKeeper [by cloning it from source](../../getting-started/installation#clone) or [using Maven](../../getting-started/installation#build-using-maven). - -### System requirements - -{% include system-requirements.md %} - -## Cluster metadata setup - -Once you've started up a cluster of bookies, you need to set up cluster metadata for the cluster by running the following command from any bookie in the cluster: - -```shell -$ bookkeeper-server/bin/bookkeeper shell metaformat -``` - -You can run in the formatting - -> The `metaformat` command performs all the necessary ZooKeeper cluster metadata tasks and thus only needs to be run *once* and from *any* bookie in the BookKeeper cluster. - -Once cluster metadata formatting has been completed, your BookKeeper cluster is ready to go! - - diff --git a/site/docs/4.6.2/development/codebase.md b/site/docs/4.6.2/development/codebase.md deleted file mode 100644 index 9a83073ea4c..00000000000 --- a/site/docs/4.6.2/development/codebase.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: The BookKeeper codebase ---- diff --git a/site/docs/4.6.2/development/protocol.md b/site/docs/4.6.2/development/protocol.md deleted file mode 100644 index 6d17aa0ed45..00000000000 --- a/site/docs/4.6.2/development/protocol.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: The BookKeeper protocol ---- - -BookKeeper uses a special replication protocol for guaranteeing persistent storage of entries in an ensemble of bookies. - -> This document assumes that you have some knowledge of leader election and log replication and how these can be used in a distributed system. If not, we recommend reading the [example application](../../api/ledger-api#example-application) documentation first. - -## Ledgers - -{% pop Ledgers %} are the basic building block of BookKeeper and the level at which BookKeeper makes its persistent storage guarantees. A replicated log consists of an ordered list of ledgers. See [Ledgers to logs](#ledgers-to-logs) for info on building a replicated log from ledgers. - -Ledgers are composed of metadata and {% pop entries %}. The metadata is stored in ZooKeeper, which provides a *compare-and-swap* (CAS) operation. Entries are stored on storage nodes known as {% pop bookies %}. - -A ledger has a single writer and multiple readers (SWMR). - -### Ledger metadata - -A ledger's metadata contains the following: - -Parameter | Name | Meaning -:---------|:-----|:------- -Identifer | | A 64-bit integer, unique within the system -Ensemble size | **E** | The number of nodes the ledger is stored on -Write quorum size | **Qw** | The number of nodes each entry is written to. In effect, the max replication for the entry. -Ack quorum size | **Qa** | The number of nodes an entry must be acknowledged on. In effect, the minimum replication for the entry. -Current state | | The current status of the ledger. One of `OPEN`, `CLOSED`, or `IN_RECOVERY`. -Last entry | | The last entry in the ledger or `NULL` is the current state is not `CLOSED`. - -In addition, each ledger's metadata consists of one or more *fragments*. Each fragment is either - -* the first entry of a fragment or -* a list of bookies for the fragment. - -When creating a ledger, the following invariant must hold: - -**E >= Qw >= Qa** - -Thus, the ensemble size (**E**) must be larger than the write quorum size (**Qw**), which must in turn be larger than the ack quorum size (**Qa**). If that condition does not hold, then the ledger creation operation will fail. - -### Ensembles - -When a ledger is created, **E** bookies are chosen for the entries of that ledger. The bookies are the initial ensemble of the ledger. A ledger can have multiple ensembles, but an entry has only one ensemble. Changes in the ensemble involve a new fragment being added to the ledger. - -Take the following example. In this ledger, with ensemble size of 3, there are two fragments and thus two ensembles, one starting at entry 0, the second at entry 12. The second ensemble differs from the first only by its first element. This could be because bookie1 has failed and therefore had to be replaced. - -First entry | Bookies -:-----------|:------- -0 | B1, B2, B3 -12 | B4, B2, B3 - -### Write quorums - -Each entry in the log is written to **Qw** nodes. This is considered the write quorum for that entry. The write quorum is the subsequence of the ensemble, **Qw** in length, and starting at the bookie at index (entryid % **E**). - -For example, in a ledger of **E** = 4, **Qw**, and **Qa** = 2, with an ensemble consisting of B1, B2, B3, and B4, the write quorums for the first 6 entries will be: - -Entry | Write quorum -:-----|:------------ -0 | B1, B2, B3 -1 | B2, B3, B4 -2 | B3, B4, B1 -3 | B4, B1, B2 -4 | B1, B2, B3 -5 | B2, B3, B4 - -There are only **E** distinct write quorums in any ensemble. If **Qw** = **Qa**, then there is only one, as no striping occurs. - -### Ack quorums - -The ack quorum for an entry is any subset of the write quorum of size **Qa**. If **Qa** bookies acknowledge an entry, it means it has been fully replicated. - -### Guarantees - -The system can tolerate **Qa** – 1 failures without data loss. - -Bookkeeper guarantees that: - -1. All updates to a ledger will be read in the same order as they were written. -2. All clients will read the same sequence of updates from the ledger. - -## Writing to ledgers - -writer, ensuring that entry ids are sequential is trivial. A bookie acknowledges a write once it has been persisted to disk and is therefore durable. Once **Qa** bookies from the write quorum acknowledge the write, the write is acknowledged to the client, but only if all entries with lower entry ids in the ledger have already been acknowledged to the client. - -The entry written contains the ledger id, the entry id, the last add confirmed and the payload. The last add confirmed is the last entry which had been acknowledged to the client when this entry was written. Sending this with the entry speeds up recovery of the ledger in the case that the writer crashes. - -Another client can also read entries in the ledger up as far as the last add confirmed, as we guarantee that all entries thus far have been replicated on Qa nodes, and therefore all future readers will be able to also read it. However, to read like this, the ledger should be opened with a non-fencing open. Otherwise, it would kill the writer. - -If a node fails to acknowledge a write, the writer will create a new ensemble by replacing the failed node in the current ensemble. It creates a new fragment with this ensemble, starting from the first message that has not been acknowledged to the client. Creating the new fragment involves making a CAS write to the metadata. If the CAS write fails, someone else has modified something in the ledger metadata. This concurrent modification could have been caused by recovery or {% pop rereplication %}. We reread the metadata. If the state of the ledger is no longer `OPEN`, we send an error to the client for any outstanding writes. Otherwise, we try to replace the failed node again. - -### Closing a ledger as a writer - -Closing a ledger is straightforward for a writer. The writer makes a CAS write to the metadata, changing the state to `CLOSED` and setting the last entry of the ledger to the last entry which we have acknowledged to the client. - -If the CAS write fails, it means someone else has modified the metadata. We reread the metadata, and retry closing as long as the state of the ledger is still `OPEN`. If the state is `IN_RECOVERY` we send an error to the client. If the state is `CLOSED` and the last entry is the same as the last entry we have acknowledged to the client, we complete the close operation successfully. If the last entry is different from what we have acknowledged to the client, we send an error to the client. - -### Closing a ledger as a reader - -A reader can also force a ledger to close. Forcing the ledger to close will prevent any writer from adding new entries to the ledger. This is called {% pop fencing %}. This can occur when a writer has crashed or become unavailable, and a new writer wants to take over writing to the log. The new writer must ensure that it has seen all updates from the previous writer, and prevent the previous writer from making any new updates before making any updates of its own. - -To recover a ledger, we first update the state in the metadata to IN_RECOVERY. We then send a fence message to all the bookies in the last fragment of the ledger. When a bookie receives a fence message for a ledger, the fenced state of the ledger is persisted to disk. Once we receive a response from at least (**Qw** - **Qa**)+1 bookies from each write quorum in the ensemble, the ledger is fenced. - -By ensuring we have received a response from at last (**Qw** - **Qa**) + 1 bookies in each write quorum, we ensure that, if the old writer is alive and tries to add a new entry there will be no write quorum in which Qa bookies will accept the write. If the old writer tries to update the ensemble, it will fail on the CAS metadata write, and then see that the ledger is in IN_RECOVERY state, and that it therefore shouldn’t try to write to it. - -The old writer will be able to write entries to individual bookies (we can’t guarantee that the fence message reaches all bookies), but as it will not be able reach ack quorum, it will not be able to send a success response to its client. The client will get a LedgerFenced error instead. - -It is important to note that when you get a ledger fenced message for an entry, it doesn’t mean that the entry has not been written. It means that the entry may or may not have been written, and this can only be determined after the ledger is recovered. In effect, LedgerFenced should be treated like a timeout. - -Once the ledger is fenced, recovery can begin. Recovery means finding the last entry of the ledger and closing the ledger. To find the last entry of the ledger, the client asks all bookies for the highest last add confirmed value they have seen. It waits until it has received a response at least (**Qw** - **Qa**) + 1 bookies from each write quorum, and takes the highest response as the entry id to start reading forward from. It then starts reading forward in the ledger, one entry at a time, replicating all entries it sees to the entire write quorum for that entry. Once it can no longer read any more entries, it updates the state in the metadata to `CLOSED`, and sets the last entry of the ledger to the last entry it wrote. Multiple readers can try to recovery a ledger at the same time, but as the metadata write is CAS they will all converge on the same last entry of the ledger. - -## Ledgers to logs - -In BookKeeper, {% pop ledgers %} can be used to build a replicated log for your system. All guarantees provided by BookKeeper are at the ledger level. Guarantees on the whole log can be built using the ledger guarantees and any consistent datastore with a compare-and-swap (CAS) primitive. BookKeeper uses ZooKeeper as the datastore but others could theoretically be used. - -A log in BookKeeper is built from some number of ledgers, with a fixed order. A ledger represents a single segment of the log. A ledger could be the whole period that one node was the leader, or there could be multiple ledgers for a single period of leadership. However, there can only ever be one leader that adds entries to a single ledger. Ledgers cannot be reopened for writing once they have been closed/recovered. - -> BookKeeper does *not* provide leader election. You must use a system like ZooKeeper for this. - -In many cases, leader election is really leader suggestion. Multiple nodes could think that they are leader at any one time. It is the job of the log to guarantee that only one can write changes to the system. - -### Opening a log - -Once a node thinks it is leader for a particular log, it must take the following steps: - -1. Read the list of ledgers for the log -1. {% pop Fence %} the last two ledgers in the list. Two ledgers are fenced because because the writer may be writing to the second-to-last ledger while adding the last ledger to the list. -1. Create a new ledger -1. Add the new ledger to the ledger list -1. Write the new ledger back to the datastore using a CAS operation - -The fencing in step 2 and the CAS operation in step 5 prevent two nodes from thinking that they have leadership at any one time. - -The CAS operation will fail if the list of ledgers has changed between reading it and writing back the new list. When the CAS operation fails, the leader must start at step 1 again. Even better, they should check that they are in fact still the leader with the system that is providing leader election. The protocol will work correctly without this step, though it will be able to make very little progress if two nodes think they are leader and are duelling for the log. - -The node must not serve any writes until step 5 completes successfully. - -### Rolling ledgers - -The leader may wish to close the current ledger and open a new one every so often. Ledgers can only be deleted as a whole. If you don't roll the log, you won't be able to clean up old entries in the log without a leader change. By closing the current ledger and adding a new one, the leader allows the log to be truncated whenever that data is no longer needed. The steps for rolling the log is similar to those for creating a new ledger. - -1. Create a new ledger -1. Add the new ledger to the ledger list -1. Write the new ledger list to the datastore using CAS -1. Close the previous ledger - -By deferring the closing of the previous ledger until step 4, we can continue writing to the log while we perform metadata update operations to add the new ledger. This is safe as long as you fence the last 2 ledgers when acquiring leadership. - diff --git a/site/docs/4.6.2/getting-started/concepts.md b/site/docs/4.6.2/getting-started/concepts.md deleted file mode 100644 index 7a3c92847b2..00000000000 --- a/site/docs/4.6.2/getting-started/concepts.md +++ /dev/null @@ -1,202 +0,0 @@ ---- -title: BookKeeper concepts and architecture -subtitle: The core components and how they work -prev: ../run-locally ---- - -BookKeeper is a service that provides persistent storage of streams of log [entries](#entries)---aka *records*---in sequences called [ledgers](#ledgers). BookKeeper replicates stored entries across multiple servers. - -## Basic terms - -In BookKeeper: - -* each unit of a log is an [*entry*](#entries) (aka record) -* streams of log entries are called [*ledgers*](#ledgers) -* individual servers storing ledgers of entries are called [*bookies*](#bookies) - -BookKeeper is designed to be reliable and resilient to a wide variety of failures. Bookies can crash, corrupt data, or discard data, but as long as there are enough bookies behaving correctly in the ensemble the service as a whole will behave correctly. - -## Entries - -> **Entries** contain the actual data written to ledgers, along with some important metadata. - -BookKeeper entries are sequences of bytes that are written to [ledgers](#ledgers). Each entry has the following fields: - -Field | Java type | Description -:-----|:----------|:----------- -Ledger number | `long` | The ID of the ledger to which the entry has been written -Entry number | `long` | The unique ID of the entry -Last confirmed (LC) | `long` | The ID of the last recorded entry -Data | `byte[]` | The entry's data (written by the client application) -Authentication code | `byte[]` | The message auth code, which includes *all* other fields in the entry - -## Ledgers - -> **Ledgers** are the basic unit of storage in BookKeeper. - -Ledgers are sequences of entries, while each entry is a sequence of bytes. Entries are written to a ledger: - -* sequentially, and -* at most once. - -This means that ledgers have *append-only* semantics. Entries cannot be modified once they've been written to a ledger. Determining the proper write order is the responsbility of [client applications](#clients). - -## Clients and APIs - -> BookKeeper clients have two main roles: they create and delete ledgers, and they read entries from and write entries to ledgers. -> -> BookKeeper provides both a lower-level and a higher-level API for ledger interaction. - -There are currently two APIs that can be used for interacting with BookKeeper: - -* The [ledger API](../../api/ledger-api) is a lower-level API that enables you to interact with {% pop ledgers %} directly. -* The [DistributedLog API](../../api/distributedlog-api) is a higher-level API that enables you to use BookKeeper without directly interacting with ledgers. - -In general, you should choose the API based on how much granular control you need over ledger semantics. The two APIs can also both be used within a single application. - -## Bookies - -> **Bookies** are individual BookKeeper servers that handle ledgers (more specifically, fragments of ledgers). Bookies function as part of an ensemble. - -A bookie is an individual BookKeeper storage server. Individual bookies store fragments of ledgers, not entire ledgers (for the sake of performance). For any given ledger **L**, an *ensemble* is the group of bookies storing the entries in **L**. - -Whenever entries are written to a ledger, those entries are {% pop striped %} across the ensemble (written to a sub-group of bookies rather than to all bookies). - -### Motivation - -> BookKeeper was initially inspired by the NameNode server in HDFS but its uses now extend far beyond this. - -The initial motivation for BookKeeper comes from the [Hadoop](http://hadoop.apache.org/) ecosystem. In the [Hadoop Distributed File System](https://wiki.apache.org/hadoop/HDFS) (HDFS), a special node called the [NameNode](https://wiki.apache.org/hadoop/NameNode) logs all operations in a reliable fashion, which ensures that recovery is possible in case of crashes. - -The NameNode, however, served only as initial inspiration for BookKeeper. The applications for BookKeeper extend far beyond this and include essentially any application that requires an append-based storage system. BookKeeper provides a number of advantages for such applications: - -* Highly efficient writes -* High fault tolerance via replication of messages within ensembles of bookies -* High throughput for write operations via {% pop striping %} (across as many bookies as you wish) - -## Metadata storage - -BookKeeper requires a metadata storage service to store information related to [ledgers](#ledgers) and available bookies. BookKeeper currently uses [ZooKeeper](https://zookeeper.apache.org) for this and other tasks. - -## Data management in bookies - -Bookies manage data in a [log-structured](https://en.wikipedia.org/wiki/Log-structured_file_system) way, which is implemented using three types of files: - -* [journals](#journals) -* [entry logs](#entry-logs) -* [index files](#index-files) - -### Journals - -A journal file contains BookKeeper transaction logs. Before any update to a ledger takes place, the bookie ensures that a transaction describing the update is written to non-volatile storage. A new journal file is created once the bookie starts or the older journal file reaches the journal file size threshold. - -### Entry logs - -An entry log file manages the written entries received from BookKeeper clients. Entries from different ledgers are aggregated and written sequentially, while their offsets are kept as pointers in a [ledger cache](#ledger-cache) for fast lookup. - -A new entry log file is created once the bookie starts or the older entry log file reaches the entry log size threshold. Old entry log files are removed by the Garbage Collector Thread once they are not associated with any active ledger. - -### Index files - -An index file is created for each ledger, which comprises a header and several fixed-length index pages that record the offsets of data stored in entry log files. - -Since updating index files would introduce random disk I/O index files are updated lazily by a sync thread running in the background. This ensures speedy performance for updates. Before index pages are persisted to disk, they are gathered in a ledger cache for lookup. - -### Ledger cache - -Ledger indexes pages are cached in a memory pool, which allows for more efficient management of disk head scheduling. - -### Adding entries - -When a client instructs a {% pop bookie %} to write an entry to a ledger, the entry will go through the following steps to be persisted on disk: - -1. The entry is appended to an [entry log](#entry-logs) -1. The index of the entry is updated in the [ledger cache](#ledger-cache) -1. A transaction corresponding to this entry update is appended to the [journal](#journals) -1. A response is sent to the BookKeeper client - -> For performance reasons, the entry log buffers entries in memory and commits them in batches, while the ledger cache holds index pages in memory and flushes them lazily. This process is described in more detail in the [Data flush](#data-flush) section below. - -### Data flush - -Ledger index pages are flushed to index files in the following two cases: - -* The ledger cache memory limit is reached. There is no more space available to hold newer index pages. Dirty index pages will be evicted from the ledger cache and persisted to index files. -* A background thread synchronous thread is responsible for flushing index pages from the ledger cache to index files periodically. - -Besides flushing index pages, the sync thread is responsible for rolling journal files in case that journal files use too much disk space. The data flush flow in the sync thread is as follows: - -* A `LastLogMark` is recorded in memory. The `LastLogMark` indicates that those entries before it have been persisted (to both index and entry log files) and contains two parts: - 1. A `txnLogId` (the file ID of a journal) - 1. A `txnLogPos` (offset in a journal) -* Dirty index pages are flushed from the ledger cache to the index file, and entry log files are flushed to ensure that all buffered entries in entry log files are persisted to disk. - - Ideally, a bookie only needs to flush index pages and entry log files that contain entries before `LastLogMark`. There is, however, no such information in the ledger and entry log mapping to journal files. Consequently, the thread flushes the ledger cache and entry log entirely here, and may flush entries after the `LastLogMark`. Flushing more is not a problem, though, just redundant. -* The `LastLogMark` is persisted to disk, which means that entries added before `LastLogMark` whose entry data and index page were also persisted to disk. It is now time to safely remove journal files created earlier than `txnLogId`. - -If the bookie has crashed before persisting `LastLogMark` to disk, it still has journal files containing entries for which index pages may not have been persisted. Consequently, when this bookie restarts, it inspects journal files to restore those entries and data isn't lost. - -Using the above data flush mechanism, it is safe for the sync thread to skip data flushing when the bookie shuts down. However, in the entry logger it uses a buffered channel to write entries in batches and there might be data buffered in the buffered channel upon a shut down. The bookie needs to ensure that the entry log flushes its buffered data during shutdown. Otherwise, entry log files become corrupted with partial entries. - -### Data compaction - -On bookies, entries of different ledgers are interleaved in entry log files. A bookie runs a garbage collector thread to delete un-associated entry log files to reclaim disk space. If a given entry log file contains entries from a ledger that has not been deleted, then the entry log file would never be removed and the occupied disk space never reclaimed. In order to avoid such a case, a bookie server compacts entry log files in a garbage collector thread to reclaim disk space. - -There are two kinds of compaction running with different frequency: minor compaction and major compaction. The differences between minor compaction and major compaction lies in their threshold value and compaction interval. - -* The garbage collection threshold is the size percentage of an entry log file occupied by those undeleted ledgers. The default minor compaction threshold is 0.2, while the major compaction threshold is 0.8. -* The garbage collection interval is how frequently to run the compaction. The default minor compaction interval is 1 hour, while the major compaction threshold is 1 day. - -> If either the threshold or interval is set to less than or equal to zero, compaction is disabled. - -The data compaction flow in the garbage collector thread is as follows: - -* The thread scans entry log files to get their entry log metadata, which records a list of ledgers comprising an entry log and their corresponding percentages. -* With the normal garbage collection flow, once the bookie determines that a ledger has been deleted, the ledger will be removed from the entry log metadata and the size of the entry log reduced. -* If the remaining size of an entry log file reaches a specified threshold, the entries of active ledgers in the entry log will be copied to a new entry log file. -* Once all valid entries have been copied, the old entry log file is deleted. - -## ZooKeeper metadata - -BookKeeper requires a ZooKeeper installation for storing [ledger](#ledger) metadata. Whenever you construct a [`BookKeeper`](../../api/javadoc/org/apache/bookkeeper/client/BookKeeper) client object, you need to pass a list of ZooKeeper servers as a parameter to the constructor, like this: - -```java -String zkConnectionString = "127.0.0.1:2181"; -BookKeeper bkClient = new BookKeeper(zkConnectionString); -``` - -> For more info on using the BookKeeper Java client, see [this guide](../../api/ledger-api#the-java-ledger-api-client). - -## Ledger manager - -A *ledger manager* handles ledgers' metadata (which is stored in ZooKeeper). BookKeeper offers two types of ledger managers: the [flat ledger manager](#flat-ledger-manager) and the [hierarchical ledger manager](#hierarchical-ledger-manager). Both ledger managers extend the [`AbstractZkLedgerManager`](../../api/javadoc/org/apache/bookkeeper/meta/AbstractZkLedgerManager) abstract class. - -> #### Use the flat ledger manager in most cases -> The flat ledger manager is the default and is recommended for nearly all use cases. The hierarchical ledger manager is better suited only for managing very large numbers of BookKeeper ledgers (> 50,000). - -### Flat ledger manager - -The *flat ledger manager*, implemented in the [`FlatLedgerManager`](../../api/javadoc/org/apache/bookkeeper/meta/FlatLedgerManager.html) class, stores all ledgers' metadata in child nodes of a single ZooKeeper path. The flat ledger manager creates [sequential nodes](https://zookeeper.apache.org/doc/trunk/zookeeperProgrammers.html#Sequence+Nodes+--+Unique+Naming) to ensure the uniqueness of the ledger ID and prefixes all nodes with `L`. Bookie servers manage their own active ledgers in a hash map so that it's easy to find which ledgers have been deleted from ZooKeeper and then garbage collect them. - -The flat ledger manager's garbage collection follow proceeds as follows: - -* All existing ledgers are fetched from ZooKeeper (`zkActiveLedgers`) -* All ledgers currently active within the bookie are fetched (`bkActiveLedgers`) -* The currently actively ledgers are looped through to determine which ledgers don't currently exist in ZooKeeper. Those are then garbage collected. -* The *hierarchical ledger manager* stores ledgers' metadata in two-level [znodes](https://zookeeper.apache.org/doc/current/zookeeperOver.html#Nodes+and+ephemeral+nodes). - -### Hierarchical ledger manager - -The *hierarchical ledger manager*, implemented in the [`HierarchicalLedgerManager`](../../api/javadoc/org/apache/bookkeeper/meta/HierarchicalLedgerManager) class, first obtains a global unique ID from ZooKeeper using an [`EPHEMERAL_SEQUENTIAL`](https://zookeeper.apache.org/doc/current/api/org/apache/zookeeper/CreateMode.html#EPHEMERAL_SEQUENTIAL) znode. Since ZooKeeper's sequence counter has a format of `%10d` (10 digits with 0 padding, for example `0000000001`), the hierarchical ledger manager splits the generated ID into 3 parts: - -```shell -{level1 (2 digits)}{level2 (4 digits)}{level3 (4 digits)} -``` - -These three parts are used to form the actual ledger node path to store ledger metadata: - -```shell -{ledgers_root_path}/{level1}/{level2}/L{level3} -``` - -For example, ledger 0000000001 is split into three parts, 00, 0000, and 00001, and stored in znode `/{ledgers_root_path}/00/0000/L0001`. Each znode could have as many 10,000 ledgers, which avoids the problem of the child list being larger than the maximum ZooKeeper packet size (which is the [limitation](https://issues.apache.org/jira/browse/BOOKKEEPER-39) that initially prompted the creation of the hierarchical ledger manager). diff --git a/site/docs/4.6.2/getting-started/installation.md b/site/docs/4.6.2/getting-started/installation.md deleted file mode 100644 index fac16ddd390..00000000000 --- a/site/docs/4.6.2/getting-started/installation.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -title: BookKeeper installation -subtitle: Download or clone BookKeeper and build it locally -next: ../run-locally ---- - -{% capture download_url %}http://apache.claz.org/bookkeeper/bookkeeper-{{ site.latest_release }}/bookkeeper-{{ site.latest_release }}-src.tar.gz{% endcapture %} - -You can install BookKeeper either by [downloading](#download) a [GZipped](http://www.gzip.org/) tarball package or [cloning](#clone) the BookKeeper repository. - -## Requirements - -* [Unix environment](http://www.opengroup.org/unix) -* [Java Development Kit 1.6](http://www.oracle.com/technetwork/java/javase/downloads/index.html) or later -* [Maven 3.0](https://maven.apache.org/install.html) or later - -## Download - -You can download Apache BookKeeper releases from one of many [Apache mirrors](http://www.apache.org/dyn/closer.cgi/bookkeeper). Here's an example for the [apache.claz.org](http://apache.claz.org/bookkeeper) mirror: - -```shell -$ curl -O {{ download_url }} -$ tar xvf bookkeeper-{{ site.latest_release }}-src.tar.gz -$ cd bookkeeper-{{ site.latest_release }} -``` - -## Clone - -To build BookKeeper from source, clone the repository, either from the [GitHub mirror]({{ site.github_repo }}) or from the [Apache repository](http://git.apache.org/bookkeeper.git/): - -```shell -# From the GitHub mirror -$ git clone {{ site.github_repo}} - -# From Apache directly -$ git clone git://git.apache.org/bookkeeper.git/ -``` - -## Build using Maven - -Once you have the BookKeeper on your local machine, either by [downloading](#download) or [cloning](#clone) it, you can then build BookKeeper from source using Maven: - -```shell -$ mvn package -``` - -> You can skip tests by adding the `-DskipTests` flag when running `mvn package`. - -### Useful Maven commands - -Some other useful Maven commands beyond `mvn package`: - -Command | Action -:-------|:------ -`mvn clean` | Removes build artifacts -`mvn compile` | Compiles JAR files from Java sources -`mvn compile findbugs:findbugs` | Compile using the Maven [FindBugs](http://gleclaire.github.io/findbugs-maven-plugin) plugin -`mvn install` | Install the BookKeeper JAR locally in your local Maven cache (usually in the `~/.m2` directory) -`mvn deploy` | Deploy the BookKeeper JAR to the Maven repo (if you have the proper credentials) -`mvn verify` | Performs a wide variety of verification and validation tasks -`mvn apache-rat:check` | Run Maven using the [Apache Rat](http://creadur.apache.org/rat/apache-rat-plugin/) plugin -`mvn compile javadoc:aggregate` | Build Javadocs locally -`mvn package assembly:single` | Build a complete distribution using the Maven [Assembly](http://maven.apache.org/plugins/maven-assembly-plugin/) plugin - -## Package directory - -The BookKeeper project contains several subfolders that you should be aware of: - -Subfolder | Contains -:---------|:-------- -[`bookkeeper-server`]({{ site.github_repo }}/tree/master/bookkeeper-server) | The BookKeeper server and client -[`bookkeeper-benchmark`]({{ site.github_repo }}/tree/master/bookkeeper-benchmark) | A benchmarking suite for measuring BookKeeper performance -[`bookkeeper-stats`]({{ site.github_repo }}/tree/master/bookkeeper-stats) | A BookKeeper stats library -[`bookkeeper-stats-providers`]({{ site.github_repo }}/tree/master/bookkeeper-stats-providers) | BookKeeper stats providers diff --git a/site/docs/4.6.2/getting-started/run-locally.md b/site/docs/4.6.2/getting-started/run-locally.md deleted file mode 100644 index edbfab9fda6..00000000000 --- a/site/docs/4.6.2/getting-started/run-locally.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Run bookies locally -prev: ../installation -next: ../concepts -toc_disable: true ---- - -{% pop Bookies %} are individual BookKeeper servers. You can run an ensemble of bookies locally on a single machine using the [`localbookie`](../../reference/cli#bookkeeper-localbookie) command of the `bookkeeper` CLI tool and specifying the number of bookies you'd like to include in the ensemble. - -This would start up an ensemble with 10 bookies: - -```shell -$ bookkeeper-server/bin/bookkeeper localbookie 10 -``` - -> When you start up an ensemble using `localbookie`, all bookies run in a single JVM process. diff --git a/site/docs/4.6.2/overview/overview.md b/site/docs/4.6.2/overview/overview.md deleted file mode 100644 index 95a0214c1da..00000000000 --- a/site/docs/4.6.2/overview/overview.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -title: Apache BookKeeper™ 4.6.2 ---- - - -This documentation is for Apache BookKeeper™ version `4.6.2`. - -Apache BookKeeper™ is a scalable, fault tolerant and low latency storage service optimized for realtime workloads. -It offers `durability`, `replication` and `strong consistency` as essentials for building reliable real-time applications. - -It is suitable for being used in following scenerios: - -- [WAL](https://en.wikipedia.org/wiki/Write-ahead_logging) (Write-Ahead-Logging), e.g. HDFS [namenode](https://hadoop.apache.org/docs/r2.5.2/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithNFS.html#BookKeeper_as_a_Shared_storage_EXPERIMENTAL). -- Message Store, e.g. [Apache Pulsar](https://pulsar.incubator.apache.org/). -- Offset/Cursor Store, e.g. Apache Pulsar. -- Object/Blob Store, e.g. storing snapshots to replicated state machines. - -Learn more about Apache BookKeeper™ and what it can do for your organization: - -- [Apache BookKeeper 4.6.2 Release Notes](../releaseNotes) -- [Java API docs](../../api/javadoc) - -Or start using Apache BookKeeper today. - -### Users - -- **Concepts**: Start with [concepts](../../getting-started/concepts). This will help you to fully understand - the other parts of the documentation, including the setup, integration and operation guides. -- **Getting Started**: Install [Apache BookKeeper](../../getting-started/installation) and run bookies [locally](../../getting-started/run-locally) -- **API**: Read the [API](../../api/overview) documentation to learn how to use Apache BookKeeper to build your applications. -- **Deployment**: The [Deployment Guide](../../deployment/manual) shows how to deploy Apache BookKeeper to production clusters. - -### Administrators - -- **Operations**: The [Admin Guide](../../admin/bookies) shows how to run Apache BookKeeper on production, what are the production - considerations and best practices. - -### Contributors - -- **Details**: Learn [design details](../../development/protocol) to know more internals. diff --git a/site/docs/4.6.2/overview/releaseNotes.md b/site/docs/4.6.2/overview/releaseNotes.md deleted file mode 100644 index 4f7cb7a5233..00000000000 --- a/site/docs/4.6.2/overview/releaseNotes.md +++ /dev/null @@ -1,30 +0,0 @@ ---- -title: Apache BookKeeper 4.6.2 Release Notes ---- - -This is the ninth release of BookKeeper as an Apache Top Level Project! - -The 4.6.2 release is a bugfix release which fixes a bunch of issues reported from users of 4.6.1. - -Apache BookKeeper users are encouraged to upgrade to 4.6.2. The technical details of this release are summarized -below. - -## Highlights - -- Fix performance regression is using Netty > 4.1.12, see [https://github.com/apache/bookkeeper/pull/1108](https://github.com/apache/bookkeeper/pull/1108) - -- Enhance performances on Prometheus stats provider, see [https://github.com/apache/bookkeeper/pull/1081](https://github.com/apache/bookkeeper/pull/1081) - -- Save memory resources on client by retaining for less time references to data to write, see [https://github.com/apache/bookkeeper/issues/1063](https://github.com/apache/bookkeeper/issues/1063) - -- Fix a problem on Java 9/10 with the 'shaded' artifacts, due to a bug in Maven Shade Plugin, see [https://github.com/apache/bookkeeper/pull/1144](https://github.com/apache/bookkeeper/pull/1144) - -- Fix Journal stats names, see [https://github.com/apache/bookkeeper/pull/1250](https://github.com/apache/bookkeeper/pull/1250) - -### Dependencies Upgrade - -There is no dependency upgrade since 4.6.0, and since 4.6.1 we distribute a 'shaded' version of main artifacts, see [Ledger API](../ledger-api) - -## Full list of changes - -- [https://github.com/apache/bookkeeper/issues?utf8=%E2%9C%93&q=label%3Arelease%2F4.6.2+is%3Aclosed](https://github.com/apache/bookkeeper/issues?utf8=%E2%9C%93&q=label%3Arelease%2F4.6.2+is%3Aclosed) diff --git a/site/docs/4.6.2/overview/releaseNotesTemplate.md b/site/docs/4.6.2/overview/releaseNotesTemplate.md deleted file mode 100644 index 53848e37a02..00000000000 --- a/site/docs/4.6.2/overview/releaseNotesTemplate.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Apache BookKeeper 4.6.1 Release Notes ---- - -[provide a summary of this release] - -Apache BookKeeper users are encouraged to upgrade to 4.6.1. The technical details of this release are summarized -below. - -## Highlights - -[List the highlights] - -## Details - -[list to issues list] - diff --git a/site/docs/4.6.2/reference/cli.md b/site/docs/4.6.2/reference/cli.md deleted file mode 100644 index 8beb36ff071..00000000000 --- a/site/docs/4.6.2/reference/cli.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: BookKeeper CLI tool reference -subtitle: A reference guide to the command-line tools that you can use to administer BookKeeper ---- - -{% include cli.html id="bookkeeper" %} - -## The BookKeeper shell - -{% include shell.html %} diff --git a/site/docs/4.6.2/reference/config.md b/site/docs/4.6.2/reference/config.md deleted file mode 100644 index 8997b6b62f0..00000000000 --- a/site/docs/4.6.2/reference/config.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: BookKeeper configuration -subtitle: A reference guide to all of BookKeeper's configurable parameters ---- - - -The table below lists parameters that you can set to configure {% pop bookies %}. All configuration takes place in the `bk_server.conf` file in the `bookkeeper-server/conf` directory of your [BookKeeper installation](../../getting-started/installing). - -{% include config.html id="bk_server" %} diff --git a/site/docs/4.6.2/reference/metrics.md b/site/docs/4.6.2/reference/metrics.md deleted file mode 100644 index 8bd6fe0a165..00000000000 --- a/site/docs/4.6.2/reference/metrics.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: BookKeeper metrics reference ---- diff --git a/site/docs/4.6.2/security/overview.md b/site/docs/4.6.2/security/overview.md deleted file mode 100644 index b825776eb67..00000000000 --- a/site/docs/4.6.2/security/overview.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -title: BookKeeper Security -next: ../tls ---- - -In the 4.5.0 release, the BookKeeper community added a number of features that can be used, together or separately, to secure a BookKeeper cluster. -The following security measures are currently supported: - -1. Authentication of connections to bookies from clients, using either [TLS](../tls) or [SASL (Kerberos)](../sasl). -2. Authentication of connections from clients, bookies, autorecovery daemons to [ZooKeeper](../zookeeper), when using zookeeper based ledger managers. -3. Encryption of data transferred between bookies and clients, between bookies and autorecovery daemons using [TLS](../tls). - -It’s worth noting that security is optional - non-secured clusters are supported, as well as a mix of authenticated, unauthenticated, encrypted and non-encrypted clients. - -NOTE: authorization is not yet available in 4.5.0. The Apache BookKeeper community is looking to add this feature in subsequent releases. - -## Next Steps - -- [Encryption and Authentication using TLS](../tls) -- [Authentication using SASL](../sasl) -- [ZooKeeper Authentication](../zookeeper) diff --git a/site/docs/4.6.2/security/sasl.md b/site/docs/4.6.2/security/sasl.md deleted file mode 100644 index ffb972a8936..00000000000 --- a/site/docs/4.6.2/security/sasl.md +++ /dev/null @@ -1,202 +0,0 @@ ---- -title: Authentication using SASL -prev: ../tls -next: ../zookeeper ---- - -Bookies support client authentication via SASL. Currently we only support GSSAPI (Kerberos). We will start -with a general description of how to configure `SASL` for bookies, clients and autorecovery daemons, followed -by mechanism-specific details and wrap up with some operational details. - -## SASL configuration for Bookies - -1. Select the mechanisms to enable in the bookies. `GSSAPI` is the only mechanism currently supported by BookKeeper. -2. Add a `JAAS` config file for the selected mechanisms as described in the examples for setting up [GSSAPI (Kerberos)](#kerberos). -3. Pass the `JAAS` config file location as JVM parameter to each Bookie. For example: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookie_jaas.conf - ``` - -4. Enable SASL auth plugin in bookies, by setting `bookieAuthProviderFactoryClass` to `org.apache.bookkeeper.sasl.SASLBookieAuthProviderFactory`. - - - ```shell - bookieAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLBookieAuthProviderFactory - ``` - -5. If you are running `autorecovery` along with bookies, then you want to enable SASL auth plugin for `autorecovery`, by setting - `clientAuthProviderFactoryClass` to `org.apache.bookkeeper.sasl.SASLClientProviderFactory`. - - ```shell - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -6. Follow the steps in [GSSAPI (Kerberos)](#kerberos) to configure SASL. - -#### Important Notes - -1. `Bookie` is a section name in the JAAS file used by each bookie. This section tells the bookie which principal to use - and the location of the keytab where the principal is stored. It allows the bookie to login using the keytab specified in this section. -2. `Auditor` is a section name in the JASS file used by `autorecovery` daemon (it can be co-run with bookies). This section tells the - `autorecovery` daemon which principal to use and the location of the keytab where the principal is stored. It allows the bookie to - login using the keytab specified in this section. -3. The `Client` section is used to authenticate a SASL connection with ZooKeeper. It also allows the bookies to set ACLs on ZooKeeper nodes - which locks these nodes down so that only the bookies can modify it. It is necessary to have the same primary name across all bookies. - If you want to use a section name other than `Client`, set the system property `zookeeper.sasl.client` to the appropriate name - (e.g `-Dzookeeper.sasl.client=ZKClient`). -4. ZooKeeper uses `zookeeper` as the service name by default. If you want to change this, set the system property - `zookeeper.sasl.client.username` to the appropriate name (e.g. `-Dzookeeper.sasl.client.username=zk`). - -## SASL configuration for Clients - -To configure `SASL` authentication on the clients: - -1. Select a `SASL` mechanism for authentication and add a `JAAS` config file for the selected mechanism as described in the examples for - setting up [GSSAPI (Kerberos)](#kerberos). -2. Pass the `JAAS` config file location as JVM parameter to each client JVM. For example: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookkeeper_jaas.conf - ``` - -3. Configure the following properties in bookkeeper `ClientConfiguration`: - - ```shell - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -Follow the steps in [GSSAPI (Kerberos)](#kerberos) to configure SASL for the selected mechanism. - -## Authentication using SASL/Kerberos - -### Prerequisites - -#### Kerberos - -If your organization is already using a Kerberos server (for example, by using `Active Directory`), there is no need to -install a new server just for BookKeeper. Otherwise you will need to install one, your Linux vendor likely has packages -for `Kerberos` and a short guide on how to install and configure it ([Ubuntu](https://help.ubuntu.com/community/Kerberos), -[Redhat](https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/6/html/Managing_Smart_Cards/installing-kerberos.html)). -Note that if you are using Oracle Java, you will need to download JCE policy files for your Java version and copy them to `$JAVA_HOME/jre/lib/security`. - -#### Kerberos Principals - -If you are using the organization’s Kerberos or Active Directory server, ask your Kerberos administrator for a principal -for each Bookie in your cluster and for every operating system user that will access BookKeeper with Kerberos authentication -(via clients and tools). - -If you have installed your own Kerberos, you will need to create these principals yourself using the following commands: - -```shell -sudo /usr/sbin/kadmin.local -q 'addprinc -randkey bookkeeper/{hostname}@{REALM}' -sudo /usr/sbin/kadmin.local -q "ktadd -k /etc/security/keytabs/{keytabname}.keytab bookkeeper/{hostname}@{REALM}" -``` - -##### All hosts must be reachable using hostnames - -It is a *Kerberos* requirement that all your hosts can be resolved with their FQDNs. - -### Configuring Bookies - -1. Add a suitably modified JAAS file similar to the one below to each Bookie’s config directory, let’s call it `bookie_jaas.conf` -for this example (note that each bookie should have its own keytab): - - ``` - Bookie { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookie.keytab" - principal="bookkeeper/bk1.hostname.com@EXAMPLE.COM"; - }; - // ZooKeeper client authentication - Client { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookie.keytab" - principal="bookkeeper/bk1.hostname.com@EXAMPLE.COM"; - }; - // If you are running `autorecovery` along with bookies - Auditor { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookie.keytab" - principal="bookkeeper/bk1.hostname.com@EXAMPLE.COM"; - }; - ``` - - The `Bookie` section in the JAAS file tells the bookie which principal to use and the location of the keytab where this principal is stored. - It allows the bookie to login using the keytab specified in this section. See [notes](#notes) for more details on Zookeeper’s SASL configuration. - -2. Pass the name of the JAAS file as a JVM parameter to each Bookie: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookie_jaas.conf - ``` - - You may also wish to specify the path to the `krb5.conf` file - (see [JDK’s Kerberos Requirements](https://docs.oracle.com/javase/8/docs/technotes/guides/security/jgss/tutorials/KerberosReq.html) for more details): - - ```shell - -Djava.security.krb5.conf=/etc/bookkeeper/krb5.conf - ``` - -3. Make sure the keytabs configured in the JAAS file are readable by the operating system user who is starting the Bookies. - -4. Enable SASL authentication plugin in the bookies by setting following parameters. - - ```shell - bookieAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLBookieAuthProviderFactory - # if you run `autorecovery` along with bookies - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -### Configuring Clients - -To configure SASL authentication on the clients: - -1. Clients will authenticate to the cluster with their own principal (usually with the same name as the user running the client), - so obtain or create these principals as needed. Then create a `JAAS` file for each principal. The `BookKeeper` section describes - how the clients like writers and readers can connect to the Bookies. The following is an example configuration for a client using - a keytab (recommended for long-running processes): - - ``` - BookKeeper { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookkeeper.keytab" - principal="bookkeeper-client-1@EXAMPLE.COM"; - }; - ``` - - -2. Pass the name of the JAAS file as a JVM parameter to the client JVM: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookkeeper_jaas.conf - ``` - - You may also wish to specify the path to the `krb5.conf` file (see - [JDK’s Kerberos Requirements](https://docs.oracle.com/javase/8/docs/technotes/guides/security/jgss/tutorials/KerberosReq.html) for more details). - - ```shell - -Djava.security.krb5.conf=/etc/bookkeeper/krb5.conf - ``` - - -3. Make sure the keytabs configured in the `bookkeeper_jaas.conf` are readable by the operating system user who is starting bookkeeper client. - -4. Enable SASL authentication plugin in the client by setting following parameters. - - ```shell - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -## Enabling Logging for SASL - -To enable SASL debug output, you can set `sun.security.krb5.debug` system property to `true`. - diff --git a/site/docs/4.6.2/security/tls.md b/site/docs/4.6.2/security/tls.md deleted file mode 100644 index cd250ab2aa5..00000000000 --- a/site/docs/4.6.2/security/tls.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -title: Encryption and Authentication using TLS -prev: ../overview -next: ../sasl ---- - -Apache BookKeeper allows clients and autorecovery daemons to communicate over TLS, although this is not enabled by default. - -## Overview - -The bookies need their own key and certificate in order to use TLS. Clients can optionally provide a key and a certificate -for mutual authentication. Each bookie or client can also be configured with a truststore, which is used to -determine which certificates (bookie or client identities) to trust (authenticate). - -The truststore can be configured in many ways. To understand the truststore, consider the following two examples: - -1. the truststore contains one or many certificates; -2. it contains a certificate authority (CA). - -In (1), with a list of certificates, the bookie or client will trust any certificate listed in the truststore. -In (2), with a CA, the bookie or client will trust any certificate that was signed by the CA in the truststore. - -(TBD: benefits) - -## Generate TLS key and certificate - -The first step of deploying TLS is to generate the key and the certificate for each machine in the cluster. -You can use Java’s `keytool` utility to accomplish this task. We will generate the key into a temporary keystore -initially so that we can export and sign it later with CA. - -```shell -keytool -keystore bookie.keystore.jks -alias localhost -validity {validity} -genkey -``` - -You need to specify two parameters in the above command: - -1. `keystore`: the keystore file that stores the certificate. The *keystore* file contains the private key of - the certificate; hence, it needs to be kept safely. -2. `validity`: the valid time of the certificate in days. - -
          -Ensure that common name (CN) matches exactly with the fully qualified domain name (FQDN) of the server. -The client compares the CN with the DNS domain name to ensure that it is indeed connecting to the desired server, not a malicious one. -
          - -## Creating your own CA - -After the first step, each machine in the cluster has a public-private key pair, and a certificate to identify the machine. -The certificate, however, is unsigned, which means that an attacker can create such a certificate to pretend to be any machine. - -Therefore, it is important to prevent forged certificates by signing them for each machine in the cluster. -A `certificate authority (CA)` is responsible for signing certificates. CA works likes a government that issues passports — -the government stamps (signs) each passport so that the passport becomes difficult to forge. Other governments verify the stamps -to ensure the passport is authentic. Similarly, the CA signs the certificates, and the cryptography guarantees that a signed -certificate is computationally difficult to forge. Thus, as long as the CA is a genuine and trusted authority, the clients have -high assurance that they are connecting to the authentic machines. - -```shell -openssl req -new -x509 -keyout ca-key -out ca-cert -days 365 -``` - -The generated CA is simply a *public-private* key pair and certificate, and it is intended to sign other certificates. - -The next step is to add the generated CA to the clients' truststore so that the clients can trust this CA: - -```shell -keytool -keystore bookie.truststore.jks -alias CARoot -import -file ca-cert -``` - -NOTE: If you configure the bookies to require client authentication by setting `sslClientAuthentication` to `true` on the -[bookie config](../../reference/config), then you must also provide a truststore for the bookies and it should have all the CA -certificates that clients keys were signed by. - -```shell -keytool -keystore client.truststore.jks -alias CARoot -import -file ca-cert -``` - -In contrast to the keystore, which stores each machine’s own identity, the truststore of a client stores all the certificates -that the client should trust. Importing a certificate into one’s truststore also means trusting all certificates that are signed -by that certificate. As the analogy above, trusting the government (CA) also means trusting all passports (certificates) that -it has issued. This attribute is called the chain of trust, and it is particularly useful when deploying TLS on a large BookKeeper cluster. -You can sign all certificates in the cluster with a single CA, and have all machines share the same truststore that trusts the CA. -That way all machines can authenticate all other machines. - -## Signing the certificate - -The next step is to sign all certificates in the keystore with the CA we generated. First, you need to export the certificate from the keystore: - -```shell -keytool -keystore bookie.keystore.jks -alias localhost -certreq -file cert-file -``` - -Then sign it with the CA: - -```shell -openssl x509 -req -CA ca-cert -CAkey ca-key -in cert-file -out cert-signed -days {validity} -CAcreateserial -passin pass:{ca-password} -``` - -Finally, you need to import both the certificate of the CA and the signed certificate into the keystore: - -```shell -keytool -keystore bookie.keystore.jks -alias CARoot -import -file ca-cert -keytool -keystore bookie.keystore.jks -alias localhost -import -file cert-signed -``` - -The definitions of the parameters are the following: - -1. `keystore`: the location of the keystore -2. `ca-cert`: the certificate of the CA -3. `ca-key`: the private key of the CA -4. `ca-password`: the passphrase of the CA -5. `cert-file`: the exported, unsigned certificate of the bookie -6. `cert-signed`: the signed certificate of the bookie - -(TBD: add a script to automatically generate truststores and keystores.) - -## Configuring Bookies - -Bookies support TLS for connections on the same service port. In order to enable TLS, you need to configure `tlsProvider` to be either -`JDK` or `OpenSSL`. If `OpenSSL` is configured, it will use `netty-tcnative-boringssl-static`, which loads a corresponding binding according -to the platforms to run bookies. - -> Current `OpenSSL` implementation doesn't depend on the system installed OpenSSL library. If you want to leverage the OpenSSL installed on -the system, you can check [this example](http://netty.io/wiki/forked-tomcat-native.html) on how to replaces the JARs on the classpath with -netty bindings to leverage installed OpenSSL. - -The following TLS configs are needed on the bookie side: - -```shell -tlsProvider=OpenSSL -# key store -tlsKeyStoreType=JKS -tlsKeyStore=/var/private/tls/bookie.keystore.jks -tlsKeyStorePasswordPath=/var/private/tls/bookie.keystore.passwd -# trust store -tlsTrustStoreType=JKS -tlsTrustStore=/var/private/tls/bookie.truststore.jks -tlsTrustStorePasswordPath=/var/private/tls/bookie.truststore.passwd -``` - -NOTE: it is important to restrict access to the store files and corresponding password files via filesystem permissions. - -Optional settings that are worth considering: - -1. tlsClientAuthentication=false: Enable/Disable using TLS for authentication. This config when enabled will authenticate the other end - of the communication channel. It should be enabled on both bookies and clients for mutual TLS. -2. tlsEnabledCipherSuites= A cipher suite is a named combination of authentication, encryption, MAC and key exchange - algorithm used to negotiate the security settings for a network connection using TLS network protocol. By default, - it is null. [OpenSSL Ciphers](https://www.openssl.org/docs/man1.0.2/apps/ciphers.html) - [JDK Ciphers](http://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html#ciphersuites) -3. tlsEnabledProtocols = TLSv1.2,TLSv1.1,TLSv1 (list out the TLS protocols that you are going to accept from clients). - By default, it is not set. - -To verify the bookie's keystore and truststore are setup correctly you can run the following command: - -```shell -openssl s_client -debug -connect localhost:3181 -tls1 -``` - -NOTE: TLSv1 should be listed under `tlsEnabledProtocols`. - -In the output of this command you should see the server's certificate: - -```shell ------BEGIN CERTIFICATE----- -{variable sized random bytes} ------END CERTIFICATE----- -``` - -If the certificate does not show up or if there are any other error messages then your keystore is not setup correctly. - -## Configuring Clients - -TLS is supported only for the new BookKeeper client (BookKeeper versions 4.5.0 and higher), the older clients are not -supported. The configs for TLS will be the same as bookies. - -If client authentication is not required by the bookies, the following is a minimal configuration example: - -```shell -tlsProvider=OpenSSL -clientTrustStore=/var/private/tls/client.truststore.jks -clientTrustStorePasswordPath=/var/private/tls/client.truststore.passwd -``` - -If client authentication is required, then a keystore must be created for each client, and the bookies' truststores must -trust the certificate in the client's keystore. This may be done using commands that are similar to what we used for -the [bookie keystore](#bookie-keystore). - -And the following must also be configured: - -```shell -tlsClientAuthentication=true -clientKeyStore=/var/private/tls/client.keystore.jks -clientKeyStorePasswordPath=/var/private/tls/client.keystore.passwd -``` - -NOTE: it is important to restrict access to the store files and corresponding password files via filesystem permissions. - -(TBD: add example to use tls in bin/bookkeeper script?) - -## Enabling TLS Logging - -You can enable TLS debug logging at the JVM level by starting the bookies and/or clients with `javax.net.debug` system property. For example: - -```shell --Djavax.net.debug=all -``` - -You can find more details on this in [Oracle documentation](http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/ReadDebug.html) on -[debugging SSL/TLS connections](http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/ReadDebug.html). diff --git a/site/docs/4.6.2/security/zookeeper.md b/site/docs/4.6.2/security/zookeeper.md deleted file mode 100644 index e16be69a1d3..00000000000 --- a/site/docs/4.6.2/security/zookeeper.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: ZooKeeper Authentication -prev: ../sasl ---- - -## New Clusters - -To enable `ZooKeeper` authentication on Bookies or Clients, there are two necessary steps: - -1. Create a `JAAS` login file and set the appropriate system property to point to it as described in [GSSAPI (Kerberos)](../sasl#notes). -2. Set the configuration property `zkEnableSecurity` in each bookie to `true`. - -The metadata stored in `ZooKeeper` is such that only certain clients will be able to modify and read the corresponding znodes. -The rationale behind this decision is that the data stored in ZooKeeper is not sensitive, but inappropriate manipulation of znodes can cause cluster -disruption. - -## Migrating Clusters - -If you are running a version of BookKeeper that does not support security or simply with security disabled, and you want to make the cluster secure, -then you need to execute the following steps to enable ZooKeeper authentication with minimal disruption to your operations. - -1. Perform a rolling restart setting the `JAAS` login file, which enables bookie or clients to authenticate. At the end of the rolling restart, - bookies (or clients) are able to manipulate znodes with strict ACLs, but they will not create znodes with those ACLs. -2. Perform a second rolling restart of bookies, this time setting the configuration parameter `zkEnableSecurity` to true, which enables the use - of secure ACLs when creating znodes. -3. Currently we don't have provide a tool to set acls on old znodes. You are recommended to set it manually using ZooKeeper tools. - -It is also possible to turn off authentication in a secured cluster. To do it, follow these steps: - -1. Perform a rolling restart of bookies setting the `JAAS` login file, which enable bookies to authenticate, but setting `zkEnableSecurity` to `false`. - At the end of rolling restart, bookies stop creating znodes with secure ACLs, but are still able to authenticate and manipulate all znodes. -2. You can use ZooKeeper tools to manually reset all ACLs under the znode set in `zkLedgersRootPath`, which defaults to `/ledgers`. -3. Perform a second rolling restart of bookies, this time omitting the system property that sets the `JAAS` login file. - -## Migrating the ZooKeeper ensemble - -It is also necessary to enable authentication on the `ZooKeeper` ensemble. To do it, we need to perform a rolling restart of the ensemble and -set a few properties. Please refer to the ZooKeeper documentation for more details. - -1. [Apache ZooKeeper Documentation](http://zookeeper.apache.org/doc/r3.4.6/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) -2. [Apache ZooKeeper Wiki](https://cwiki.apache.org/confluence/display/ZOOKEEPER/Zookeeper+and+SASL) diff --git a/site/docs/4.7.0/admin/autorecovery.md b/site/docs/4.7.0/admin/autorecovery.md deleted file mode 100644 index b1dd078f9b2..00000000000 --- a/site/docs/4.7.0/admin/autorecovery.md +++ /dev/null @@ -1,128 +0,0 @@ ---- -title: Using AutoRecovery ---- - -When a {% pop bookie %} crashes, all {% pop ledgers %} on that bookie become under-replicated. In order to bring all ledgers in your BookKeeper cluster back to full replication, you'll need to *recover* the data from any offline bookies. There are two ways to recover bookies' data: - -1. Using [manual recovery](#manual-recovery) -1. Automatically, using [*AutoRecovery*](#autorecovery) - -## Manual recovery - -You can manually recover failed bookies using the [`bookkeeper`](../../reference/cli) command-line tool. You need to specify: - -* the `shell recover` option -* an IP and port for your BookKeeper cluster's ZooKeeper ensemble -* the IP and port for the failed bookie - -Here's an example: - -```bash -$ bookkeeper-server/bin/bookkeeper shell recover \ - zk1.example.com:2181 \ # IP and port for ZooKeeper - 192.168.1.10:3181 # IP and port for the failed bookie -``` - -If you wish, you can also specify which bookie you'd like to rereplicate to. Here's an example: - -```bash -$ bookkeeper-server/bin/bookkeeper shell recover \ - zk1.example.com:2181 \ # IP and port for ZooKeeper - 192.168.1.10:3181 \ # IP and port for the failed bookie - 192.168.1.11:3181 # IP and port for the bookie to rereplicate to -``` - -### The manual recovery process - -When you initiate a manual recovery process, the following happens: - -1. The client (the process running ) reads the metadata of active ledgers from ZooKeeper. -1. The ledgers that contain fragments from the failed bookie in their ensemble are selected. -1. A recovery process is initiated for each ledger in this list and the rereplication process is run for each ledger. -1. Once all the ledgers are marked as fully replicated, bookie recovery is finished. - -## AutoRecovery - -AutoRecovery is a process that: - -* automatically detects when a {% pop bookie %} in your BookKeeper cluster has become unavailable and then -* rereplicates all the {% pop ledgers %} that were stored on that bookie. - -AutoRecovery can be run in two ways: - -1. On dedicated nodes in your BookKeeper cluster -1. On the same machines on which your bookies are running - -## Running AutoRecovery - -You can start up AutoRecovery using the [`autorecovery`](../../reference/cli#bookkeeper-autorecovery) command of the [`bookkeeper`](../../reference/cli) CLI tool. - -```bash -$ bookkeeper-server/bin/bookkeeper autorecovery -``` - -> The most important thing to ensure when starting up AutoRecovery is that the ZooKeeper connection string specified by the [`zkServers`](../../reference/config#zkServers) parameter points to the right ZooKeeper cluster. - -If you start up AutoRecovery on a machine that is already running a bookie, then the AutoRecovery process will run alongside the bookie on a separate thread. - -You can also start up AutoRecovery on a fresh machine if you'd like to create a dedicated cluster of AutoRecovery nodes. - -## Configuration - -There are a handful of AutoRecovery-related configs in the [`bk_server.conf`](../../reference/config) configuration file. For a listing of those configs, see [AutoRecovery settings](../../reference/config#autorecovery-settings). - -## Disable AutoRecovery - -You can disable AutoRecovery at any time, for example during maintenance. Disabling AutoRecovery ensures that bookies' data isn't unnecessarily rereplicated when the bookie is only taken down for a short period of time, for example when the bookie is being updated or the configuration if being changed. - -You can disable AutoRecover using the [`bookkeeper`](../../reference/cli#bookkeeper-shell-autorecovery) CLI tool: - -```bash -$ bookkeeper-server/bin/bookkeeper shell autorecovery -disable -``` - -Once disabled, you can reenable AutoRecovery using the [`enable`](../../reference/cli#bookkeeper-shell-autorecovery) shell command: - -```bash -$ bookkeeper-server/bin/bookkeeper shell autorecovery -enable -``` - -## AutoRecovery architecture - -AutoRecovery has two components: - -1. The [**auditor**](#auditor) (see the [`Auditor`](../../api/javadoc/org/apache/bookkeeper/replication/Auditor.html) class) is a singleton node that watches bookies to see if they fail and creates rereplication tasks for the ledgers on failed bookies. -1. The [**replication worker**](#replication-worker) (see the [`ReplicationWorker`](../../api/javadoc/org/apache/bookkeeper/replication/ReplicationWorker.html) class) runs on each bookie and executes rereplication tasks provided by the auditor. - -Both of these components run as threads in the [`AutoRecoveryMain`](../../api/javadoc/org/apache/bookkeeper/replication/AutoRecoveryMain) process, which runs on each bookie in the cluster. All recovery nodes participate in leader election---using ZooKeeper---to decide which node becomes the auditor. Nodes that fail to become the auditor watch the elected auditor and run an election process again if they see that the auditor node has failed. - -### Auditor - -The auditor watches all bookies in the cluster that are registered with ZooKeeper. Bookies register with ZooKeeper at startup. If the bookie crashes or is killed, the bookie's registration in ZooKeeper disappears and the auditor is notified of the change in the list of registered bookies. - -When the auditor sees that a bookie has disappeared, it immediately scans the complete {% pop ledger %} list to find ledgers that have data stored on the failed bookie. Once it has a list of ledgers for that bookie, the auditor will publish a rereplication task for each ledger under the `/underreplicated/` [znode](https://zookeeper.apache.org/doc/current/zookeeperOver.html) in ZooKeeper. - -### Replication Worker - -Each replication worker watches for tasks being published by the auditor on the `/underreplicated/` znode in ZooKeeper. When a new task appears, the replication worker will try to get a lock on it. If it cannot acquire the lock, it will try the next entry. The locks are implemented using ZooKeeper ephemeral znodes. - -The replication worker will scan through the rereplication task's ledger for fragments of which its local bookie is not a member. When it finds fragments matching this criterion, it will replicate the entries of that fragment to the local bookie. If, after this process, the ledger is fully replicated, the ledgers entry under /underreplicated/ is deleted, and the lock is released. If there is a problem replicating, or there are still fragments in the ledger which are still underreplicated (due to the local bookie already being part of the ensemble for the fragment), then the lock is simply released. - -If the replication worker finds a fragment which needs rereplication, but does not have a defined endpoint (i.e. the final fragment of a ledger currently being written to), it will wait for a grace period before attempting rereplication. If the fragment needing rereplication still does not have a defined endpoint, the ledger is fenced and rereplication then takes place. - -This avoids the situation in which a client is writing to a ledger and one of the bookies goes down, but the client has not written an entry to that bookie before rereplication takes place. The client could continue writing to the old fragment, even though the ensemble for the fragment had changed. This could lead to data loss. Fencing prevents this scenario from happening. In the normal case, the client will try to write to the failed bookie within the grace period, and will have started a new fragment before rereplication starts. - -You can configure this grace period using the [`openLedgerRereplicationGracePeriod`](../../reference/config#openLedgerRereplicationGracePeriod) parameter. - -### The rereplication process - -The ledger rereplication process happens in these steps: - -1. The client goes through all ledger fragments in the ledger, selecting those that contain the failed bookie. -1. A recovery process is initiated for each ledger fragment in this list. - 1. The client selects a bookie to which all entries in the ledger fragment will be replicated; In the case of autorecovery, this will always be the local bookie. - 1. The client reads entries that belong to the ledger fragment from other bookies in the ensemble and writes them to the selected bookie. - 1. Once all entries have been replicated, the zookeeper metadata for the fragment is updated to reflect the new ensemble. - 1. The fragment is marked as fully replicated in the recovery tool. -1. Once all ledger fragments are marked as fully replicated, the ledger is marked as fully replicated. - diff --git a/site/docs/4.7.0/admin/bookies.md b/site/docs/4.7.0/admin/bookies.md deleted file mode 100644 index 1b0427dae3c..00000000000 --- a/site/docs/4.7.0/admin/bookies.md +++ /dev/null @@ -1,180 +0,0 @@ ---- -title: BookKeeper administration -subtitle: A guide to deploying and administering BookKeeper ---- - -This document is a guide to deploying, administering, and maintaining BookKeeper. It also discusses [best practices](#best-practices) and [common problems](#common-problems). - -## Requirements - -A typical BookKeeper installation consists of an ensemble of {% pop bookies %} and a ZooKeeper quorum. The exact number of bookies depends on the quorum mode that you choose, desired throughput, and the number of clients using the installation simultaneously. - -The minimum number of bookies depends on the type of installation: - -* For *self-verifying* entries you should run at least three bookies. In this mode, clients store a message authentication code along with each {% pop entry %}. -* For *generic* entries you should run at least four - -There is no upper limit on the number of bookies that you can run in a single ensemble. - -### Performance - -To achieve optimal performance, BookKeeper requires each server to have at least two disks. It's possible to run a bookie with a single disk but performance will be significantly degraded. - -### ZooKeeper - -There is no constraint on the number of ZooKeeper nodes you can run with BookKeeper. A single machine running ZooKeeper in [standalone mode](https://zookeeper.apache.org/doc/current/zookeeperStarted.html#sc_InstallingSingleMode) is sufficient for BookKeeper, although for the sake of higher resilience we recommend running ZooKeeper in [quorum mode](https://zookeeper.apache.org/doc/current/zookeeperStarted.html#sc_RunningReplicatedZooKeeper) with multiple servers. - -## Starting and stopping bookies - -You can run bookies either in the foreground or in the background, using [nohup](https://en.wikipedia.org/wiki/Nohup). You can also run [local bookies](#local-bookie) for development purposes. - -To start a bookie in the foreground, use the [`bookie`](../../reference/cli#bookkeeper-bookie) command of the [`bookkeeper`](../../reference/cli#bookkeeper) CLI tool: - -```shell -$ bookkeeper-server/bin/bookkeeper bookie -``` - -To start a bookie in the background, use the [`bookkeeper-daemon.sh`](../../reference/cli#bookkeeper-daemon.sh) script and run `start bookie`: - -```shell -$ bookkeeper-server/bin/bookkeeper-daemon.sh start bookie -``` - -### Local bookies - -The instructions above showed you how to run bookies intended for production use. If you'd like to experiment with ensembles of bookies locally, you can use the [`localbookie`](../../reference/cli#bookkeeper-localbookie) command of the `bookkeeper` CLI tool and specify the number of bookies you'd like to run. - -This would spin up a local ensemble of 6 bookies: - -```shell -$ bookkeeper-server/bin/bookkeeper localbookie 6 -``` - -> When you run a local bookie ensemble, all bookies run in a single JVM process. - -## Configuring bookies - -There's a wide variety of parameters that you can set in the bookie configuration file in `bookkeeper-server/conf/bk_server.conf` of your [BookKeeper installation](../../reference/config). A full listing can be found in [Bookie configuration](../../reference/config). - -Some of the more important parameters to be aware of: - -Parameter | Description | Default -:---------|:------------|:------- -`bookiePort` | The TCP port that the bookie listens on | `3181` -`zkServers` | A comma-separated list of ZooKeeper servers in `hostname:port` format | `localhost:2181` -`journalDirectory` | The directory where the [log device](../../getting-started/concepts#log-device) stores the bookie's write-ahead log (WAL) | `/tmp/bk-txn` -`ledgerDirectories` | The directories where the [ledger device](../../getting-started/concepts#ledger-device) stores the bookie's ledger entries (as a comma-separated list) | `/tmp/bk-data` - -> Ideally, the directories specified `journalDirectory` and `ledgerDirectories` should be on difference devices. - -## Logging - -BookKeeper uses [slf4j](http://www.slf4j.org/) for logging, with [log4j](https://logging.apache.org/log4j/2.x/) bindings enabled by default. - -To enable logging for a bookie, create a `log4j.properties` file and point the `BOOKIE_LOG_CONF` environment variable to the configuration file. Here's an example: - -```shell -$ export BOOKIE_LOG_CONF=/some/path/log4j.properties -$ bookkeeper-server/bin/bookkeeper bookie -``` - -## Upgrading - -From time to time you may need to make changes to the filesystem layout of bookies---changes that are incompatible with previous versions of BookKeeper and require that directories used with previous versions are upgraded. If a filesystem upgrade is required when updating BookKeeper, the bookie will fail to start and return an error like this: - -``` -2017-05-25 10:41:50,494 - ERROR - [main:Bookie@246] - Directory layout version is less than 3, upgrade needed -``` - -BookKeeper provides a utility for upgrading the filesystem. You can perform an upgrade using the [`upgrade`](../../reference/cli#bookkeeper-upgrade) command of the `bookkeeper` CLI tool. When running `bookkeeper upgrade` you need to specify one of three flags: - -Flag | Action -:----|:------ -`--upgrade` | Performs an upgrade -`--rollback` | Performs a rollback to the initial filesystem version -`--finalize` | Marks the upgrade as complete - -### Upgrade pattern - -A standard upgrade pattern is to run an upgrade... - -```shell -$ bookkeeper-server/bin/bookkeeper upgrade --upgrade -``` - -...then check that everything is working normally, then kill the bookie. If everything is okay, finalize the upgrade... - -```shell -$ bookkeeper-server/bin/bookkeeper upgrade --finalize -``` - -...and then restart the server: - -```shell -$ bookkeeper-server/bin/bookkeeper bookie -``` - -If something has gone wrong, you can always perform a rollback: - -```shell -$ bookkeeper-server/bin/bookkeeper upgrade --rollback -``` - -## Formatting - -You can format bookie metadata in ZooKeeper using the [`metaformat`](../../reference/cli#bookkeeper-shell-metaformat) command of the [BookKeeper shell](../../reference/cli#the-bookkeeper-shell). - -By default, formatting is done in interactive mode, which prompts you to confirm the format operation if old data exists. You can disable confirmation using the `-nonInteractive` flag. If old data does exist, the format operation will abort *unless* you set the `-force` flag. Here's an example: - -```shell -$ bookkeeper-server/bin/bookkeeper shell metaformat -``` - -You can format the local filesystem data on a bookie using the [`bookieformat`](../../reference/cli#bookkeeper-shell-bookieformat) command on each bookie. Here's an example: - -```shell -$ bookkeeper-server/bin/bookkeeper shell bookieformat -``` - -> The `-force` and `-nonInteractive` flags are also available for the `bookieformat` command. - -## AutoRecovery - -For a guide to AutoRecovery in BookKeeper, see [this doc](../autorecovery). - -## Missing disks or directories - -Accidentally replacing disks or removing directories can cause a bookie to fail while trying to read a ledger fragment that, according to the ledger metadata, exists on the bookie. For this reason, when a bookie is started for the first time, its disk configuration is fixed for the lifetime of that bookie. Any change to its disk configuration, such as a crashed disk or an accidental configuration change, will result in the bookie being unable to start. That will throw an error like this: - -``` -2017-05-29 18:19:13,790 - ERROR - [main:BookieServer314] – Exception running bookie server : @ -org.apache.bookkeeper.bookie.BookieException$InvalidCookieException -.......at org.apache.bookkeeper.bookie.Cookie.verify(Cookie.java:82) -.......at org.apache.bookkeeper.bookie.Bookie.checkEnvironment(Bookie.java:275) -.......at org.apache.bookkeeper.bookie.Bookie.(Bookie.java:351) -``` - -If the change was the result of an accidental configuration change, the change can be reverted and the bookie can be restarted. However, if the change *cannot* be reverted, such as is the case when you want to add a new disk or replace a disk, the bookie must be wiped and then all its data re-replicated onto it. - -1. Increment the [`bookiePort`](../../reference/config#bookiePort) parameter in the [`bk_server.conf`](../../reference/config) -1. Ensure that all directories specified by [`journalDirectory`](../../reference/config#journalDirectory) and [`ledgerDirectories`](../../reference/config#ledgerDirectories) are empty. -1. [Start the bookie](#starting-and-stopping-bookies). -1. Run the following command to re-replicate the data: - - ```bash - $ bookkeeper-server/bin/bookkeeper shell recover \ - \ - \ - - ``` - - The ZooKeeper server, old bookie, and new bookie, are all identified by their external IP and `bookiePort` (3181 by default). Here's an example: - - ```bash - $ bookkeeper-server/bin/bookkeeper shell recover \ - zk1.example.com \ - 192.168.1.10:3181 \ - 192.168.1.10:3181 - ``` - - See the [AutoRecovery](../autorecovery) documentation for more info on the re-replication process. diff --git a/site/docs/4.7.0/admin/geo-replication.md b/site/docs/4.7.0/admin/geo-replication.md deleted file mode 100644 index 38b972345ef..00000000000 --- a/site/docs/4.7.0/admin/geo-replication.md +++ /dev/null @@ -1,22 +0,0 @@ ---- -title: Geo-replication -subtitle: Replicate data across BookKeeper clusters ---- - -*Geo-replication* is the replication of data across BookKeeper clusters. In order to enable geo-replication for a group of BookKeeper clusters, - -## Global ZooKeeper - -Setting up a global ZooKeeper quorum is a lot like setting up a cluster-specific quorum. The crucial difference is that - -### Geo-replication across three clusters - -Let's say that you want to set up geo-replication across clusters in regions A, B, and C. First, the BookKeeper clusters in each region must have their own local (cluster-specific) ZooKeeper quorum. - -> BookKeeper clusters use global ZooKeeper only for metadata storage. Traffic from bookies to ZooKeeper should thus be fairly light in general. - -The crucial difference between using cluster-specific ZooKeeper and global ZooKeeper is that {% pop bookies %} is that you need to point all bookies to use the global ZooKeeper setup. - -## Region-aware placement polocy - -## Autorecovery diff --git a/site/docs/4.7.0/admin/http.md b/site/docs/4.7.0/admin/http.md deleted file mode 100644 index 0097adc62b8..00000000000 --- a/site/docs/4.7.0/admin/http.md +++ /dev/null @@ -1,394 +0,0 @@ ---- -title: BookKeeper Admin REST API ---- - -This document introduces BookKeeper HTTP endpoints, which can be used for BookKeeper administration. -To use this feature, set `httpServerEnabled` to `true` in file `conf/bk_server.conf`. - -## All the endpoints - -Currently all the HTTP endpoints could be divided into these 4 components: -1. Heartbeat: heartbeat for a specific bookie. -1. Config: doing the server configuration for a specific bookie. -1. Ledger: HTTP endpoints related to ledgers. -1. Bookie: HTTP endpoints related to bookies. -1. AutoRecovery: HTTP endpoints related to auto recovery. - -## Heartbeat - -### Endpoint: /heartbeat -* Method: GET -* Description: Get heartbeat status for a specific bookie -* Response: - -| Code | Description | -|:-------|:------------| -|200 | Successful operation | - -## Config - -### Endpoint: /api/v1/config/server_config -1. Method: GET - * Description: Get value of all configured values overridden on local server config - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | -1. Method: PUT - * Description: Update a local server config - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |configName | String | Yes | Configuration name(key) | - |configValue | String | Yes | Configuration value(value) | - * Body: - ```json - { - "configName1": "configValue1", - "configName2": "configValue2" - } - ``` - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -## Ledger - -### Endpoint: /api/v1/ledger/delete/?ledger_id=<ledger_id> -1. Method: DELETE - * Description: Delete a ledger. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |ledger_id | Long | Yes | ledger id of the ledger. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -### Endpoint: /api/v1/ledger/list/?print_metadata=<metadata> -1. Method: GET - * Description: List all the ledgers. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |print_metadata | Boolean | No | whether print out metadata | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "ledgerId1": "ledgerMetadata1", - "ledgerId2": "ledgerMetadata2", - ... - } - ``` - -### Endpoint: /api/v1/ledger/metadata/?ledger_id=<ledger_id> -1. Method: GET - * Description: Get the metadata of a ledger. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |ledger_id | Long | Yes | ledger id of the ledger. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "ledgerId1": "ledgerMetadata1" - } - ``` - -### Endpoint: /api/v1/ledger/read/?ledger_id=<ledger_id>&start_entry_id=<start_entry_id>&end_entry_id=<end_entry_id> -1. Method: GET - * Description: Read a range of entries from ledger. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |ledger_id | Long | Yes| ledger id of the ledger. | - |start_entry_id | Long | No | start entry id of read range. | - |end_entry_id | Long | No | end entry id of read range. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "entryId1": "entry content 1", - "entryId2": "entry content 2", - ... - } - ``` - -## Bookie - -### Endpoint: /api/v1/bookie/list_bookies/?type=<type>&print_hostnames=<hostnames> -1. Method: GET - * Description: Get all the available bookies. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |type | String | Yes | value: "rw" or "ro" , list read-write/read-only bookies. | - |print_hostnames | Boolean | No | whether print hostname of bookies. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "bookieSocketAddress1": "hostname1", - "bookieSocketAddress2": "hostname2", - ... - } - ``` - -### Endpoint: /api/v1/bookie/list_bookie_info -1. Method: GET - * Description: Get bookies disk usage info of this cluster. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "bookieAddress" : {free: xxx, total: xxx}, - "bookieAddress" : {free: xxx, total: xxx}, - ... - "clusterInfo" : {total_free: xxx, total: xxx} - } - ``` - -### Endpoint: /api/v1/bookie/last_log_mark -1. Method: GET - * Description: Get the last log marker. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - JournalId1 : position1, - JournalId2 : position2, - ... - } - ``` - -### Endpoint: /api/v1/bookie/list_disk_file/?file_type=<type> -1. Method: GET - * Description: Get all the files on disk of current bookie. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |type | String | No | file type: journal/entrylog/index. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "journal files" : "filename1 filename2 ...", - "entrylog files" : "filename1 filename2...", - "index files" : "filename1 filename2 ..." - } - ``` - -### Endpoint: /api/v1/bookie/expand_storage -1. Method: PUT - * Description: Expand storage for a bookie. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -## Auto recovery - -### Endpoint: /api/v1/autorecovery/bookie/ -1. Method: PUT - * Description: Ledger data recovery for failed bookie - * Body: - ```json - { - "bookie_src": [ "bookie_src1", "bookie_src2"... ], - "bookie_dest": [ "bookie_dest1", "bookie_dest2"... ], - "delete_cookie": - } - ``` - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |bookie_src | Strings | Yes | bookie source to recovery | - |bookie_dest | Strings | No | bookie data recovery destination | - |delete_cookie | Boolean | No | Whether delete cookie | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -### Endpoint: /api/v1/autorecovery/list_under_replicated_ledger/?missingreplica=<bookie_address>&excludingmissingreplica=<bookie_address> -1. Method: GET - * Description: Get all under replicated ledgers. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |missingreplica | String | No | missing replica bookieId | - |excludingmissingreplica | String | No | exclude missing replica bookieId | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - [ledgerId1, ledgerId2...] - } - ``` - -### Endpoint: /api/v1/autorecovery/who_is_auditor -1. Method: GET - * Description: Get auditor bookie id. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "Auditor": "hostname/hostAddress:Port" - } - ``` - -### Endpoint: /api/v1/autorecovery/trigger_audit -1. Method: PUT - * Description: Force trigger audit by resting the lostBookieRecoveryDelay. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -### Endpoint: /api/v1/autorecovery/lost_bookie_recovery_delay -1. Method: GET - * Description: Get lostBookieRecoveryDelay value in seconds. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -1. Method: PUT - * Description: Set lostBookieRecoveryDelay value in seconds. - * Body: - ```json - { - "delay_seconds": - } - ``` - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - | delay_seconds | Long | Yes | set delay value in seconds. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -### Endpoint: /api/v1/autorecovery/decommission -1. Method: PUT - * Description: Decommission Bookie, Force trigger Audit task and make sure all the ledgers stored in the decommissioning bookie are replicated. - * Body: - ```json - { - "bookie_src": - } - ``` - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - | bookie_src | String | Yes | Bookie src to decommission.. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | diff --git a/site/docs/4.7.0/admin/metrics.md b/site/docs/4.7.0/admin/metrics.md deleted file mode 100644 index 142df3dcd2d..00000000000 --- a/site/docs/4.7.0/admin/metrics.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: Metric collection ---- - -BookKeeper enables metrics collection through a variety of [stats providers](#stats-providers). - -> For a full listing of available metrics, see the [Metrics](../../reference/metrics) reference doc. - -## Stats providers - -BookKeeper has stats provider implementations for four five sinks: - -Provider | Provider class name -:--------|:------------------- -[Codahale Metrics](https://mvnrepository.com/artifact/org.apache.bookkeeper.stats/codahale-metrics-provider) | `org.apache.bookkeeper.stats.CodahaleMetricsProvider` -[Prometheus](https://prometheus.io/) | `org.apache.bookkeeper.stats.prometheus.PrometheusMetricsProvider` -[Finagle](https://twitter.github.io/finagle/guide/Metrics.html) | `org.apache.bookkeeper.stats.FinagleStatsProvider` -[Ostrich](https://github.com/twitter/ostrich) | `org.apache.bookkeeper.stats.OstrichProvider` -[Twitter Science Provider](https://mvnrepository.com/artifact/org.apache.bookkeeper.stats/twitter-science-provider) | `org.apache.bookkeeper.stats.TwitterStatsProvider` - -> The [Codahale Metrics]({{ site.github_master }}/bookkeeper-stats-providers/codahale-metrics-provider) stats provider is the default provider. - -## Enabling stats providers in bookies - -There are two stats-related [configuration parameters](../../reference/config#statistics) available for bookies: - -Parameter | Description | Default -:---------|:------------|:------- -`enableStatistics` | Whether statistics are enabled for the bookie | `false` -`statsProviderClass` | The stats provider class used by the bookie | `org.apache.bookkeeper.stats.CodahaleMetricsProvider` - - -To enable stats: - -* set the `enableStatistics` parameter to `true` -* set `statsProviderClass` to the desired provider (see the [table above](#stats-providers) for a listing of classes) - - diff --git a/site/docs/4.7.0/admin/perf.md b/site/docs/4.7.0/admin/perf.md deleted file mode 100644 index 82956326e5d..00000000000 --- a/site/docs/4.7.0/admin/perf.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: Performance tuning ---- diff --git a/site/docs/4.7.0/admin/placement.md b/site/docs/4.7.0/admin/placement.md deleted file mode 100644 index ded456e1aea..00000000000 --- a/site/docs/4.7.0/admin/placement.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: Customized placement policies ---- diff --git a/site/docs/4.7.0/admin/upgrade.md b/site/docs/4.7.0/admin/upgrade.md deleted file mode 100644 index 89cd3c7dfc1..00000000000 --- a/site/docs/4.7.0/admin/upgrade.md +++ /dev/null @@ -1,171 +0,0 @@ ---- -title: Upgrade ---- - -> If you have questions about upgrades (or need help), please feel free to reach out to us by [mailing list]({{ site.baseurl }}community/mailing-lists) or [Slack Channel]({{ site.baseurl }}community/slack). - -## Overview - -Consider the below guidelines in preparation for upgrading. - -- Always back up all your configuration files before upgrading. -- Read through the documentation and draft an upgrade plan that matches your specific requirements and environment before starting the upgrade process. - Put differently, don't start working through the guide on a live cluster. Read guide entirely, make a plan, then execute the plan. -- Pay careful consideration to the order in which components are upgraded. In general, you need to upgrade bookies first and then upgrade your clients. -- If autorecovery is running along with bookies, you need to pay attention to the upgrade sequence. -- Read the release notes carefully for each release. They contain not only information about noteworthy features, but also changes to configurations - that may impact your upgrade. -- Always upgrade one or a small set of bookies to canary new version before upgraing all bookies in your cluster. - -## Canary - -It is wise to canary an upgraded version in one or small set of bookies before upgrading all bookies in your live cluster. - -You can follow below steps on how to canary a upgraded version: - -1. Stop a Bookie. -2. Upgrade the binary and configuration. -3. Start the Bookie in `ReadOnly` mode. This can be used to verify if the Bookie of this new version can run well for read workload. -4. Once the Bookie is running at `ReadOnly` mode successfully for a while, restart the Bookie in `Write/Read` mode. -5. After step 4, the Bookie will serve both write and read traffic. - -### Rollback Canaries - -If problems occur during canarying an upgraded version, you can simply take down the problematic Bookie node. The remain bookies in the old cluster -will repair this problematic bookie node by autorecovery. Nothing needs to be worried about. - -## Upgrade Steps - -Once you determined a version is safe to upgrade in a few nodes in your cluster, you can perform following steps to upgrade all bookies in your cluster. - -1. Determine if autorecovery is running along with bookies. If yes, check if the clients (either new clients with new binary or old clients with new configurations) -are allowed to talk to old bookies; if clients are not allowed to talk to old bookies, please [disable autorecovery](../../reference/cli/#autorecovery-1) during upgrade. -2. Decide on performing a rolling upgrade or a downtime upgrade. -3. Upgrade all Bookies (more below) -4. If autorecovery was disabled during upgrade, [enable autorecovery](../../reference/cli/#autorecovery-1). -5. After all bookies are upgraded, build applications that use `BookKeeper client` against the new bookkeeper libraries and deploy the new versions. - -### Upgrade Bookies - -In a rolling upgrade scenario, upgrade one Bookie at a time. In a downtime upgrade scenario, take the entire cluster down, upgrade each Bookie, then start the cluster. - -For each Bookie: - -1. Stop the bookie. -2. Upgrade the software (either new binary or new configuration) -2. Start the bookie. - -## Upgrade Guides - -We describes the general upgrade method in Apache BookKeeper as above. We will cover the details for individual versions. - -### 4.6.x to 4.7.0 upgrade - -There isn't any protocol related backward compabilities changes in 4.7.0. So you can follow the general upgrade sequence to upgrade from 4.6.x to 4.7.0. - -However, we list a list of changes that you might want to know. - -#### Common Configuration Changes - -This section documents the common configuration changes that applied for both clients and servers. - -##### New Settings - -Following settings are newly added in 4.7.0. - -| Name | Default Value | Description | -|------|---------------|-------------| -| allowShadedLedgerManagerFactoryClass | false | The allows bookkeeper client to connect to a bookkeeper cluster using a shaded ledger manager factory | -| shadedLedgerManagerFactoryClassPrefix | `dlshade.` | The shaded ledger manager factory prefix. This is used when `allowShadedLedgerManagerFactoryClass` is set to true | -| metadataServiceUri | null | metadata service uri that bookkeeper is used for loading corresponding metadata driver and resolving its metadata service location | -| permittedStartupUsers | null | The list of users are permitted to run the bookie process. Any users can run the bookie process if it is not set | - -##### Deprecated Settings - -There are no common settings deprecated at 4.7.0. - -##### Changed Settings - -There are no common settings whose default value are changed at 4.7.0. - -#### Server Configuration Changes - -##### New Settings - -Following settings are newly added in 4.7.0. - -| Name | Default Value | Description | -|------|---------------|-------------| -| verifyMetadataOnGC | false | Whether the bookie is configured to double check the ledgers' metadata prior to garbage collecting them | -| auditorLedgerVerificationPercentage | 0 | The percentage of a ledger (fragment)'s entries will be verified by Auditor before claiming a ledger (fragment) is missing | -| numHighPriorityWorkerThreads | 8 | The number of threads that should be used for high priority requests (i.e. recovery reads and adds, and fencing). If zero, reads are handled by Netty threads directly. | -| useShortHostName | false | Whether the bookie should use short hostname or [FQDN](https://en.wikipedia.org/wiki/Fully_qualified_domain_name) hostname for registration and ledger metadata when useHostNameAsBookieID is enabled. | -| minUsableSizeForEntryLogCreation | 1.2 * `logSizeLimit` | Minimum safe usable size to be available in ledger directory for bookie to create entry log files (in bytes). | -| minUsableSizeForHighPriorityWrites | 1.2 * `logSizeLimit` | Minimum safe usable size to be available in ledger directory for bookie to accept high priority writes even it is in readonly mode. | - -##### Deprecated Settings - -Following settings are deprecated since 4.7.0. - -| Name | Description | -|------|-------------| -| registrationManagerClass | The registration manager class used by server to discover registration manager. It is replaced by `metadataServiceUri`. | - - -##### Changed Settings - -The default values of following settings are changed since 4.7.0. - -| Name | Old Default Value | New Default Value | Notes | -|------|-------------------|-------------------|-------| -| numLongPollWorkerThreads | 10 | 0 | If the number of threads is zero or negative, bookie can fallback to use read threads for long poll. This allows not creating threads if application doesn't use long poll feature. | - -#### Client Configuration Changes - -##### New Settings - -Following settings are newly added in 4.7.0. - -| Name | Default Value | Description | -|------|---------------|-------------| -| maxNumEnsembleChanges | Integer.MAX\_VALUE | The max allowed ensemble change number before sealing a ledger on failures | -| timeoutMonitorIntervalSec | min(`addEntryTimeoutSec`, `addEntryQuorumTimeoutSec`, `readEntryTimeoutSec`) | The interval between successive executions of the operation timeout monitor, in seconds | -| ensemblePlacementPolicyOrderSlowBookies | false | Flag to enable/disable reordering slow bookies in placement policy | - -##### Deprecated Settings - -Following settings are deprecated since 4.7.0. - -| Name | Description | -|------|-------------| -| clientKeyStoreType | Replaced by `tlsKeyStoreType` | -| clientKeyStore | Replaced by `tlsKeyStore` | -| clientKeyStorePasswordPath | Replaced by `tlsKeyStorePasswordPath` | -| clientTrustStoreType | Replaced by `tlsTrustStoreType` | -| clientTrustStore | Replaced by `tlsTrustStore` | -| clientTrustStorePasswordPath | Replaced by `tlsTrustStorePasswordPath` | -| registrationClientClass | The registration client class used by client to discover registration service. It is replaced by `metadataServiceUri`. | - -##### Changed Settings - -The default values of following settings are changed since 4.7.0. - -| Name | Old Default Value | New Default Value | Notes | -|------|-------------------|-------------------|-------| -| enableDigestTypeAutodetection | false | true | Autodetect the digest type and passwd when opening a ledger. It will ignore the provided digest type, but still verify the provided passwd. | - -### 4.5.x to 4.6.x upgrade - -There isn't any protocol related backward compabilities changes in 4.6.x. So you can follow the general upgrade sequence to upgrade from 4.5.x to 4.6.x. - -### 4.4.x to 4.5.x upgrade - -There isn't any protocol related backward compabilities changes in 4.5.0. So you can follow the general upgrade sequence to upgrade from 4.4.x to 4.5.x. -However, we list a list of things that you might want to know. - -1. 4.5.x upgrades netty from 3.x to 4.x. The memory usage pattern might be changed a bit. Netty 4 uses more direct memory. Please pay attention to your memory usage - and adjust the JVM settings accordingly. -2. `multi journals` is a non-rollbackable feature. If you configure a bookie to use multiple journals on 4.5.x you can not roll the bookie back to use 4.4.x. You have - to take a bookie out and recover it if you want to rollback to 4.4.x. - -If you are planning to upgrade a non-secured cluster to a secured cluster enabling security features in 4.5.0, please read [BookKeeper Security](../../security/overview) for more details. diff --git a/site/docs/4.7.0/api/distributedlog-api.md b/site/docs/4.7.0/api/distributedlog-api.md deleted file mode 100644 index db6acd07a5b..00000000000 --- a/site/docs/4.7.0/api/distributedlog-api.md +++ /dev/null @@ -1,395 +0,0 @@ ---- -title: DistributedLog -subtitle: A higher-level API for managing BookKeeper entries ---- - -> DistributedLog began its life as a separate project under the Apache Foundation. It was merged into BookKeeper in 2017. - -The DistributedLog API is an easy-to-use interface for managing BookKeeper entries that enables you to use BookKeeper without needing to interact with [ledgers](../ledger-api) directly. - -DistributedLog (DL) maintains sequences of records in categories called *logs* (aka *log streams*). *Writers* append records to DL logs, while *readers* fetch and process those records. - -## Architecture - -The diagram below illustrates how the DistributedLog API works with BookKeeper: - -![DistributedLog API]({{ site.baseurl }}img/distributedlog.png) - -## Logs - -A *log* in DistributedLog is an ordered, immutable sequence of *log records*. - -The diagram below illustrates the anatomy of a log stream: - -![DistributedLog log]({{ site.baseurl }}img/logs.png) - -### Log records - -Each log record is a sequence of bytes. Applications are responsible for serializing and deserializing byte sequences stored in log records. - -Log records are written sequentially into a *log stream* and assigned with a a unique sequence number called a DLSN (DistributedLog Sequence Number). - -In addition to a DLSN, applications can assign their own sequence number when constructing log records. Application-defined sequence numbers are known as *TransactionIDs* (or *txid*). Either a DLSN or a TransactionID can be used for positioning readers to start reading from a specific log record. - -### Log segments - -Each log is broken down into *log segments* that contain subsets of records. Log segments are distributed and stored in BookKeeper. DistributedLog rolls the log segments based on the configured *rolling policy*, which be either - -* a configurable period of time (such as every 2 hours), or -* a configurable maximum size (such as every 128 MB). - -The data in logs is divided up into equally sized log segments and distributed evenly across {% pop bookies %}. This allows logs to scale beyond a size that would fit on a single server and spreads read traffic across the cluster. - -### Namespaces - -Log streams that belong to the same organization are typically categorized and managed under a *namespace*. DistributedLog namespaces essentially enable applications to locate log streams. Applications can perform the following actions under a namespace: - -* create streams -* delete streams -* truncate streams to a given sequence number (either a DLSN or a TransactionID) - -## Writers - -Through the DistributedLog API, writers write data into logs of their choice. All records are appended into logs in order. The sequencing is performed by the writer, which means that there is only one active writer for a log at any given time. - -DistributedLog guarantees correctness when two writers attempt to write to the same log when a network partition occurs using a *fencing* mechanism in the log segment store. - -### Write Proxy - -Log writers are served and managed in a service tier called the *Write Proxy* (see the diagram [above](#architecture)). The Write Proxy is used for accepting writes from a large number of clients. - -## Readers - -DistributedLog readers read records from logs of their choice, starting with a provided position. The provided position can be either a DLSN or a TransactionID. - -Readers read records from logs in strict order. Different readers can read records from different positions in the same log. - -Unlike other pub-sub systems, DistributedLog doesn't record or manage readers' positions. This means that tracking is the responsibility of applications, as different applications may have different requirements for tracking and coordinating positions. This is hard to get right with a single approach. Distributed databases, for example, might store reader positions along with SSTables, so they would resume applying transactions from the positions store in SSTables. Tracking reader positions could easily be done at the application level using various stores (such as ZooKeeper, the filesystem, or key-value stores). - -### Read Proxy - -Log records can be cached in a service tier called the *Read Proxy* to serve a large number of readers. See the diagram [above](#architecture). The Read Proxy is the analogue of the [Write Proxy](#write-proxy). - -## Guarantees - -The DistributedLog API for BookKeeper provides a number of guarantees for applications: - -* Records written by a [writer](#writers) to a [log](#logs) are appended in the order in which they are written. If a record **R1** is written by the same writer as a record **R2**, **R1** will have a smaller sequence number than **R2**. -* [Readers](#readers) see [records](#log-records) in the same order in which they are [written](#writers) to the log. -* All records are persisted on disk by BookKeeper before acknowledgements, which guarantees durability. -* For a log with a replication factor of N, DistributedLog tolerates up to N-1 server failures without losing any records. - -## API - -Documentation for the DistributedLog API can be found [here](https://bookkeeper.apache.org/distributedlog/docs/latest/user_guide/api/core). - -> At a later date, the DistributedLog API docs will be added here. - - diff --git a/site/docs/4.7.0/api/ledger-adv-api.md b/site/docs/4.7.0/api/ledger-adv-api.md deleted file mode 100644 index df6224dd7ec..00000000000 --- a/site/docs/4.7.0/api/ledger-adv-api.md +++ /dev/null @@ -1,111 +0,0 @@ ---- -title: The Advanced Ledger API ---- - -In release `4.5.0`, Apache BookKeeper introduces a few advanced API for advanced usage. -This sections covers these advanced APIs. - -> Before learn the advanced API, please read [Ledger API](../ledger-api) first. - -## LedgerHandleAdv - -[`LedgerHandleAdv`](../javadoc/org/apache/bookkeeper/client/LedgerHandleAdv) is an advanced extension of [`LedgerHandle`](../javadoc/org/apache/bookkeeper/client/LedgerHandle). -It allows user passing in an `entryId` when adding an entry. - -### Creating advanced ledgers - -Here's an exmaple: - -```java -byte[] passwd = "some-passwd".getBytes(); -LedgerHandleAdv handle = bkClient.createLedgerAdv( - 3, 3, 2, // replica settings - DigestType.CRC32, - passwd); -``` - -You can also create advanced ledgers asynchronously. - -```java -class LedgerCreationCallback implements AsyncCallback.CreateCallback { - public void createComplete(int returnCode, LedgerHandle handle, Object ctx) { - System.out.println("Ledger successfully created"); - } -} -client.asyncCreateLedgerAdv( - 3, // ensemble size - 3, // write quorum size - 2, // ack quorum size - BookKeeper.DigestType.CRC32, - password, - new LedgerCreationCallback(), - "some context" -); -``` - -Besides the APIs above, BookKeeper allows users providing `ledger-id` when creating advanced ledgers. - -```java -long ledgerId = ...; // the ledger id is generated externally. - -byte[] passwd = "some-passwd".getBytes(); -LedgerHandleAdv handle = bkClient.createLedgerAdv( - ledgerId, // ledger id generated externally - 3, 3, 2, // replica settings - DigestType.CRC32, - passwd); -``` - -> Please note, it is users' responsibility to provide a unique ledger id when using the API above. -> If a ledger already exists when users try to create an advanced ledger with same ledger id, -> a [LedgerExistsException](../javadoc/org/apache/bookkeeper/client/BKException.BKLedgerExistException.html) is thrown by the bookkeeper client. - -Creating advanced ledgers can be done throught a fluent API since 4.6. - -```java -BookKeeper bk = ...; - -byte[] passwd = "some-passwd".getBytes(); - -WriteHandle wh = bk.newCreateLedgerOp() - .withDigestType(DigestType.CRC32) - .withPassword(passwd) - .withEnsembleSize(3) - .withWriteQuorumSize(3) - .withAckQuorumSize(2) - .makeAdv() // convert the create ledger builder to create ledger adv builder - .withLedgerId(1234L) - .execute() // execute the creation op - .get(); // wait for the execution to complete - -``` - -### Add Entries - -The normal [add entries api](ledger-api/#adding-entries-to-ledgers) in advanced ledgers are disabled. Instead, when users want to add entries -to advanced ledgers, an entry id is required to pass in along with the entry data when adding an entry. - -```java -long entryId = ...; // entry id generated externally - -ledger.addEntry(entryId, "Some entry data".getBytes()); -``` - -If you are using the new API, you can do as following: - -```java -WriteHandle wh = ...; -long entryId = ...; // entry id generated externally - -wh.write(entryId, "Some entry data".getBytes()).get(); -``` - -A few notes when using this API: - -- The entry id has to be non-negative. -- Clients are okay to add entries out of order. -- However, the entries are only acknowledged in a monotonic order starting from 0. - -### Read Entries - -The read entries api in advanced ledgers remain same as [normal ledgers](../ledger-api/#reading-entries-from-ledgers). diff --git a/site/docs/4.7.0/api/ledger-api.md b/site/docs/4.7.0/api/ledger-api.md deleted file mode 100644 index edb67f5ab26..00000000000 --- a/site/docs/4.7.0/api/ledger-api.md +++ /dev/null @@ -1,802 +0,0 @@ ---- -title: The Ledger API ---- - -The ledger API is a lower-level API for BookKeeper that enables you to interact with {% pop ledgers %} directly. - -## The Java ledger API client - -To get started with the Java client for BookKeeper, install the `bookkeeper-server` library as a dependency in your Java application. - -> For a more in-depth tutorial that involves a real use case for BookKeeper, see the [Example application](../example-application) guide. - -## Installation - -The BookKeeper Java client library is available via [Maven Central](http://search.maven.org/) and can be installed using [Maven](#maven), [Gradle](#gradle), and other build tools. - -### Maven - -If you're using [Maven](https://maven.apache.org/), add this to your [`pom.xml`](https://maven.apache.org/guides/introduction/introduction-to-the-pom.html) build configuration file: - -```xml - -4.7.0 - - - - org.apache.bookkeeper - bookkeeper-server - ${bookkeeper.version} - -``` - -BookKeeper uses google [protobuf](https://github.com/google/protobuf/tree/master/java) and [guava](https://github.com/google/guava) libraries -a lot. If your application might include different versions of protobuf or guava introduced by other dependencies, you can choose to use the -shaded library, which relocate classes of protobuf and guava into a different namespace to avoid conflicts. - -```xml - -4.7.0 - - - - org.apache.bookkeeper - bookkeeper-server-shaded - ${bookkeeper.version} - -``` - -### Gradle - -If you're using [Gradle](https://gradle.org/), add this to your [`build.gradle`](https://spring.io/guides/gs/gradle/) build configuration file: - -```groovy -dependencies { - compile group: 'org.apache.bookkeeper', name: 'bookkeeper-server', version: '4.7.0' -} - -// Alternatively: -dependencies { - compile 'org.apache.bookkeeper:bookkeeper-server:4.7.0' -} -``` - -Similarly as using maven, you can also configure to use the shaded jars. - -```groovy -// use the `bookkeeper-server-shaded` jar -dependencies { - compile 'org.apache.bookkeeper:bookkeeper-server-shaded:{{ site.latest-version }}' -} -``` - -## Connection string - -When interacting with BookKeeper using the Java client, you need to provide your client with a connection string, for which you have three options: - -* Provide your entire ZooKeeper connection string, for example `zk1:2181,zk2:2181,zk3:2181`. -* Provide a host and port for one node in your ZooKeeper cluster, for example `zk1:2181`. In general, it's better to provide a full connection string (in case the ZooKeeper node you attempt to connect to is down). -* If your ZooKeeper cluster can be discovered via DNS, you can provide the DNS name, for example `my-zookeeper-cluster.com`. - -## Creating a new client - -In order to create a new [`BookKeeper`](../javadoc/org/apache/bookkeeper/client/BookKeeper) client object, you need to pass in a [connection string](#connection-string). Here is an example client object using a ZooKeeper connection string: - -```java -try { - String connectionString = "127.0.0.1:2181"; // For a single-node, local ZooKeeper cluster - BookKeeper bkClient = new BookKeeper(connectionString); -} catch (InterruptedException | IOException | KeeperException e) { - e.printStackTrace(); -} -``` - -> If you're running BookKeeper [locally](../../getting-started/run-locally), using the [`localbookie`](../../reference/cli#bookkeeper-localbookie) command, use `"127.0.0.1:2181"` for your connection string, as in the example above. - -There are, however, other ways that you can create a client object: - -* By passing in a [`ClientConfiguration`](../javadoc/org/apache/bookkeeper/conf/ClientConfiguration) object. Here's an example: - - ```java - ClientConfiguration config = new ClientConfiguration(); - config.setZkServers(zkConnectionString); - config.setAddEntryTimeout(2000); - BookKeeper bkClient = new BookKeeper(config); - ``` - -* By specifying a `ClientConfiguration` and a [`ZooKeeper`](http://zookeeper.apache.org/doc/current/api/org/apache/zookeeper/ZooKeeper.html) client object: - - ```java - ClientConfiguration config = new ClientConfiguration(); - config.setAddEntryTimeout(5000); - ZooKeeper zkClient = new ZooKeeper(/* client args */); - BookKeeper bkClient = new BookKeeper(config, zkClient); - ``` - -* Using the `forConfig` method: - - ```java - BookKeeper bkClient = BookKeeper.forConfig(conf).build(); - ``` - -## Creating ledgers - -The easiest way to create a {% pop ledger %} using the Java client is via the `createLedger` method, which creates a new ledger synchronously and returns a [`LedgerHandle`](../javadoc/org/apache/bookkeeper/client/LedgerHandle). You must specify at least a [`DigestType`](../javadoc/org/apache/bookkeeper/client/BookKeeper.DigestType) and a password. - -Here's an example: - -```java -byte[] password = "some-password".getBytes(); -LedgerHandle handle = bkClient.createLedger(BookKeeper.DigestType.MAC, password); -``` - -You can also create ledgers asynchronously - -### Create ledgers asynchronously - -```java -class LedgerCreationCallback implements AsyncCallback.CreateCallback { - public void createComplete(int returnCode, LedgerHandle handle, Object ctx) { - System.out.println("Ledger successfully created"); - } -} - -client.asyncCreateLedger( - 3, - 2, - BookKeeper.DigestType.MAC, - password, - new LedgerCreationCallback(), - "some context" -); -``` - -## Adding entries to ledgers - -```java -long entryId = ledger.addEntry("Some entry data".getBytes()); -``` - -### Add entries asynchronously - -## Reading entries from ledgers - -```java -Enumerator entries = handle.readEntries(1, 99); -``` - -To read all possible entries from the ledger: - -```java -Enumerator entries = - handle.readEntries(0, handle.getLastAddConfirmed()); - -while (entries.hasNextElement()) { - LedgerEntry entry = entries.nextElement(); - System.out.println("Successfully read entry " + entry.getId()); -} -``` - -### Reading entries after the LastAddConfirmed range - -`readUnconfirmedEntries` allowing to read after the LastAddConfirmed range. -It lets the client read without checking the local value of LastAddConfirmed, so that it is possible to read entries for which the writer has not received the acknowledge yet -For entries which are within the range 0..LastAddConfirmed BookKeeper guarantees that the writer has successfully received the acknowledge. -For entries outside that range it is possible that the writer never received the acknowledge and so there is the risk that the reader is seeing entries before the writer and this could result in a consistency issue in some cases. -With this method you can even read entries before the LastAddConfirmed and entries after it with one call, the expected consistency will be as described above. - -```java -Enumerator entries = - handle.readUnconfirmedEntries(0, lastEntryIdExpectedToRead); - -while (entries.hasNextElement()) { - LedgerEntry entry = entries.nextElement(); - System.out.println("Successfully read entry " + entry.getId()); -} -``` - -## Deleting ledgers - -{% pop Ledgers %} can also be deleted synchronously or asynchronously. - -```java -long ledgerId = 1234; - -try { - bkClient.deleteLedger(ledgerId); -} catch (Exception e) { - e.printStackTrace(); -} -``` - -### Delete entries asynchronously - -Exceptions thrown: - -* - -```java -class DeleteEntryCallback implements AsyncCallback.DeleteCallback { - public void deleteComplete() { - System.out.println("Delete completed"); - } -} -``` - -## Simple example - -> For a more involved BookKeeper client example, see the [example application](#example-application) below. - -In the code sample below, a BookKeeper client: - -* creates a ledger -* writes entries to the ledger -* closes the ledger (meaning no further writes are possible) -* re-opens the ledger for reading -* reads all available entries - -```java -// Create a client object for the local ensemble. This -// operation throws multiple exceptions, so make sure to -// use a try/catch block when instantiating client objects. -BookKeeper bkc = new BookKeeper("localhost:2181"); - -// A password for the new ledger -byte[] ledgerPassword = /* some sequence of bytes, perhaps random */; - -// Create a new ledger and fetch its identifier -LedgerHandle lh = bkc.createLedger(BookKeeper.DigestType.MAC, ledgerPassword); -long ledgerId = lh.getId(); - -// Create a buffer for four-byte entries -ByteBuffer entry = ByteBuffer.allocate(4); - -int numberOfEntries = 100; - -// Add entries to the ledger, then close it -for (int i = 0; i < numberOfEntries; i++){ - entry.putInt(i); - entry.position(0); - lh.addEntry(entry.array()); -} -lh.close(); - -// Open the ledger for reading -lh = bkc.openLedger(ledgerId, BookKeeper.DigestType.MAC, ledgerPassword); - -// Read all available entries -Enumeration entries = lh.readEntries(0, numberOfEntries - 1); - -while(entries.hasMoreElements()) { - ByteBuffer result = ByteBuffer.wrap(ls.nextElement().getEntry()); - Integer retrEntry = result.getInt(); - - // Print the integer stored in each entry - System.out.println(String.format("Result: %s", retrEntry)); -} - -// Close the ledger and the client -lh.close(); -bkc.close(); -``` - -Running this should return this output: - -```shell -Result: 0 -Result: 1 -Result: 2 -# etc -``` - -## Example application - -This tutorial walks you through building an example application that uses BookKeeper as the replicated log. The application uses the [BookKeeper Java client](../java-client) to interact with BookKeeper. - -> The code for this tutorial can be found in [this GitHub repo](https://github.com/ivankelly/bookkeeper-tutorial/). The final code for the `Dice` class can be found [here](https://github.com/ivankelly/bookkeeper-tutorial/blob/master/src/main/java/org/apache/bookkeeper/Dice.java). - -### Setup - -Before you start, you will need to have a BookKeeper cluster running locally on your machine. For installation instructions, see [Installation](../../getting-started/installation). - -To start up a cluster consisting of six {% pop bookies %} locally: - -```shell -$ bookkeeper-server/bin/bookkeeper localbookie 6 -``` - -You can specify a different number of bookies if you'd like. - -### Goal - -The goal of the dice application is to have - -* multiple instances of this application, -* possibly running on different machines, -* all of which display the exact same sequence of numbers. - -In other words, the log needs to be both durable and consistent, regardless of how many {% pop bookies %} are participating in the BookKeeper ensemble. If one of the bookies crashes or becomes unable to communicate with the other bookies in any way, it should *still* display the same sequence of numbers as the others. This tutorial will show you how to achieve this. - -To begin, download the base application, compile and run it. - -```shell -$ git clone https://github.com/ivankelly/bookkeeper-tutorial.git -$ mvn package -$ mvn exec:java -Dexec.mainClass=org.apache.bookkeeper.Dice -``` - -That should yield output that looks something like this: - -``` -[INFO] Scanning for projects... -[INFO] -[INFO] ------------------------------------------------------------------------ -[INFO] Building tutorial 1.0-SNAPSHOT -[INFO] ------------------------------------------------------------------------ -[INFO] -[INFO] --- exec-maven-plugin:1.3.2:java (default-cli) @ tutorial --- -[WARNING] Warning: killAfter is now deprecated. Do you need it ? Please comment on MEXEC-6. -Value = 4 -Value = 5 -Value = 3 -``` - -### The base application - -The application in this tutorial is a dice application. The `Dice` class below has a `playDice` function that generates a random number between 1 and 6 every second, prints the value of the dice roll, and runs indefinitely. - -```java -public class Dice { - Random r = new Random(); - - void playDice() throws InterruptedException { - while (true) { - Thread.sleep(1000); - System.out.println("Value = " + (r.nextInt(6) + 1)); - } - } -} -``` - -When you run the `main` function of this class, a new `Dice` object will be instantiated and then run indefinitely: - -```java -public class Dice { - // other methods - - public static void main(String[] args) throws InterruptedException { - Dice d = new Dice(); - d.playDice(); - } -} -``` - -### Leaders and followers (and a bit of background) - -To achieve this common view in multiple instances of the program, we need each instance to agree on what the next number in the sequence will be. For example, the instances must agree that 4 is the first number and 2 is the second number and 5 is the third number and so on. This is a difficult problem, especially in the case that any instance may go away at any time, and messages between the instances can be lost or reordered. - -Luckily, there are already algorithms to solve this. Paxos is an abstract algorithm to implement this kind of agreement, while Zab and Raft are more practical protocols. This video gives a good overview about how these algorithms usually look. They all have a similar core. - -It would be possible to run the Paxos to agree on each number in the sequence. However, running Paxos each time can be expensive. What Zab and Raft do is that they use a Paxos-like algorithm to elect a leader. The leader then decides what the sequence of events should be, putting them in a log, which the other instances can then follow to maintain the same state as the leader. - -Bookkeeper provides the functionality for the second part of the protocol, allowing a leader to write events to a log and have multiple followers tailing the log. However, bookkeeper does not do leader election. You will need a zookeeper or raft instance for that purpose. - -### Why not just use ZooKeeper? - -There are a number of reasons: - -1. Zookeeper's log is only exposed through a tree like interface. It can be hard to shoehorn your application into this. -2. A zookeeper ensemble of multiple machines is limited to one log. You may want one log per resource, which will become expensive very quickly. -3. Adding extra machines to a zookeeper ensemble does not increase capacity nor throughput. - -Bookkeeper can be seen as a means of exposing ZooKeeper's replicated log to applications in a scalable fashion. ZooKeeper is still used by BookKeeper, however, to maintain consistency guarantees, though clients don't need to interact with ZooKeeper directly. - -### Electing a leader - -We'll use zookeeper to elect a leader. A zookeeper instance will have started locally when you started the localbookie application above. To verify it's running, run the following command. - -```shell -$ echo stat | nc localhost 2181 -Zookeeper version: 3.4.6-1569965, built on 02/20/2014 09:09 GMT -Clients: - /127.0.0.1:59343[1](queued=0,recved=40,sent=41) - /127.0.0.1:49354[1](queued=0,recved=11,sent=11) - /127.0.0.1:49361[0](queued=0,recved=1,sent=0) - /127.0.0.1:59344[1](queued=0,recved=38,sent=39) - /127.0.0.1:59345[1](queued=0,recved=38,sent=39) - /127.0.0.1:59346[1](queued=0,recved=38,sent=39) - -Latency min/avg/max: 0/0/23 -Received: 167 -Sent: 170 -Connections: 6 -Outstanding: 0 -Zxid: 0x11 -Mode: standalone -Node count: 16 -``` - -To interact with zookeeper, we'll use the Curator client rather than the stock zookeeper client. Getting things right with the zookeeper client can be tricky, and curator removes a lot of the pointy corners for you. In fact, curator even provides a leader election recipe, so we need to do very little work to get leader election in our application. - -```java -public class Dice extends LeaderSelectorListenerAdapter implements Closeable { - - final static String ZOOKEEPER_SERVER = "127.0.0.1:2181"; - final static String ELECTION_PATH = "/dice-elect"; - - ... - - Dice() throws InterruptedException { - curator = CuratorFrameworkFactory.newClient(ZOOKEEPER_SERVER, - 2000, 10000, new ExponentialBackoffRetry(1000, 3)); - curator.start(); - curator.blockUntilConnected(); - - leaderSelector = new LeaderSelector(curator, ELECTION_PATH, this); - leaderSelector.autoRequeue(); - leaderSelector.start(); - } -``` - -In the constructor for Dice, we need to create the curator client. We specify four things when creating the client, the location of the zookeeper service, the session timeout, the connect timeout and the retry policy. - -The session timeout is a zookeeper concept. If the zookeeper server doesn't hear anything from the client for this amount of time, any leases which the client holds will be timed out. This is important in leader election. For leader election, the curator client will take a lease on ELECTION_PATH. The first instance to take the lease will become leader and the rest will become followers. However, their claim on the lease will remain in the cue. If the first instance then goes away, due to a crash etc., its session will timeout. Once the session times out, the lease will be released and the next instance in the queue will become the leader. The call to autoRequeue() will make the client queue itself again if it loses the lease for some other reason, such as if it was still alive, but it a garbage collection cycle caused it to lose its session, and thereby its lease. I've set the lease to be quite low so that when we test out leader election, transitions will be quite quick. The optimum length for session timeout depends very much on the use case. The other parameters are the connection timeout, i.e. the amount of time it will spend trying to connect to a zookeeper server before giving up, and the retry policy. The retry policy specifies how the client should respond to transient errors, such as connection loss. Operations that fail with transient errors can be retried, and this argument specifies how often the retries should occur. - -Finally, you'll have noticed that Dice now extends LeaderSelectorListenerAdapter and implements Closeable. Closeable is there to close the resource we have initialized in the constructor, the curator client and the leaderSelector. LeaderSelectorListenerAdapter is a callback that the leaderSelector uses to notify the instance that it is now the leader. It is passed as the third argument to the LeaderSelector constructor. - -```java - @Override - public void takeLeadership(CuratorFramework client) - throws Exception { - synchronized (this) { - leader = true; - try { - while (true) { - this.wait(); - } - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - leader = false; - } - } - } -``` - -takeLeadership() is the callback called by LeaderSelector when the instance is leader. It should only return when the instance wants to give up leadership. In our case, we never do so we wait on the current object until we're interrupted. To signal to the rest of the program that we are leader we set a volatile boolean called leader to true. This is unset after we are interrupted. - -```java - void playDice() throws InterruptedException { - while (true) { - while (leader) { - Thread.sleep(1000); - System.out.println("Value = " + (r.nextInt(6) + 1) - + ", isLeader = " + leader); - } - } - } -``` - -Finally, we modify the `playDice` function to only generate random numbers when it is the leader. - -Run two instances of the program in two different terminals. You'll see that one becomes leader and prints numbers and the other just sits there. - -Now stop the leader using Control-Z. This will pause the process, but it won't kill it. You will be dropped back to the shell in that terminal. After a couple of seconds, the session timeout, you will see that the other instance has become the leader. Zookeeper will guarantee that only one instance is selected as leader at any time. - -Now go back to the shell that the original leader was on and wake up the process using fg. You'll see something like the following: - -```shell -... -... -Value = 4, isLeader = true -Value = 4, isLeader = true -^Z -[1]+ Stopped mvn exec:java -Dexec.mainClass=org.apache.bookkeeper.Dice -$ fg -mvn exec:java -Dexec.mainClass=org.apache.bookkeeper.Dice -Value = 3, isLeader = true -Value = 1, isLeader = false -``` - -## New API - -Since 4.6 BookKeeper provides a new client API which leverages Java8 [CompletableFuture](https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/CompletableFuture.html) facility. -[WriteHandle](../javadoc/org/apache/bookkeeper/client/api/WriteHandle), [WriteAdvHandle](../javadoc/org/apache/bookkeeper/client/api/WriteAdvHandle), [ReadHandle](../javadoc/org/apache/bookkeeper/client/api/ReadHandle) are introduced for replacing the generic [LedgerHandle](../javadoc/org/apache/bookkeeper/client/LedgerHandle). - -> All the new API now is available in `org.apache.bookkeeper.client.api`. You should only use interfaces defined in this package. - -*Beware* that this API in 4.6 is still experimental API and can be subject to changes in next minor releases. - -### Create a new client - -In order to create a new [`BookKeeper`](../javadoc/org/apache/bookkeeper/client/api/BookKeeper) client object, you need to construct a [`ClientConfiguration`](../javadoc/org/apache/bookkeeper/conf/ClientConfiguration) object and set a [connection string](#connection-string) first, and then use [`BookKeeperBuilder`](../javadoc/org/apache/bookkeeper/client/api/BookKeeperBuilder) to build the client. - -Here is an example building the bookkeeper client. - -```java -// construct a client configuration instance -ClientConfiguration conf = new ClientConfiguration(); -conf.setZkServers(zkConnectionString); -conf.setZkLedgersRootPath("/path/to/ledgers/root"); - -// build the bookkeeper client -BookKeeper bk = BookKeeper.newBuilder(conf) - .statsLogger(...) - ... - .build(); - -``` - -### Create ledgers - -the easiest way to create a {% pop ledger %} using the java client is via the [`createbuilder`](../javadoc/org/apache/bookkeeper/client/api/createbuilder). you must specify at least -a [`digesttype`](../javadoc/org/apache/bookkeeper/client/api/digesttype) and a password. - -here's an example: - -```java -BookKeeper bk = ...; - -byte[] password = "some-password".getBytes(); - -WriteHandle wh = bk.newCreateLedgerOp() - .withDigestType(DigestType.CRC32) - .withPassword(password) - .withEnsembleSize(3) - .withWriteQuorumSize(3) - .withAckQuorumSize(2) - .execute() // execute the creation op - .get(); // wait for the execution to complete -``` - -A [`WriteHandle`](../javadoc/org/apache/bookkeeper/client/api/WriteHandle) is returned for applications to write and read entries to and from the ledger. - -### Write flags - -You can specify behaviour of the writer by setting [`WriteFlags`](../javadoc/org/apache/bookkeeper/client/api/WriteFlag) at ledger creation type. -These flags are applied only during write operations and are not recorded on metadata. - - -Available write flags: - -| Flag | Explanation | Notes | -:---------|:------------|:------- -DEFERRED_SYNC | Writes are acknowledged early, without waiting for -guarantees of durability | Data will be only written to the OS page cache, without forcing an fsync. - -```java -BookKeeper bk = ...; - -byte[] password = "some-password".getBytes(); - -WriteHandle wh = bk.newCreateLedgerOp() - .withDigestType(DigestType.CRC32) - .withPassword(password) - .withEnsembleSize(3) - .withWriteQuorumSize(3) - .withAckQuorumSize(2) - .withWriteFlags(DEFERRED_SYNC) - .execute() // execute the creation op - .get(); // wait for the execution to complete -``` - - -### Append entries to ledgers - -The [`WriteHandle`](../javadoc/org/apache/bookkeeper/client/api/WriteHandle) can be used for applications to append entries to the ledgers. - -```java -WriteHandle wh = ...; - -CompletableFuture addFuture = wh.append("Some entry data".getBytes()); - -// option 1: you can wait for add to complete synchronously -try { - long entryId = FutureUtils.result(addFuture.get()); -} catch (BKException bke) { - // error handling -} - -// option 2: you can process the result and exception asynchronously -addFuture - .thenApply(entryId -> { - // process the result - }) - .exceptionally(cause -> { - // handle the exception - }) - -// option 3: bookkeeper provides a twitter-future-like event listener for processing result and exception asynchronously -addFuture.whenComplete(new FutureEventListener() { - @Override - public void onSuccess(long entryId) { - // process the result - } - @Override - public void onFailure(Throwable cause) { - // handle the exception - } -}); -``` - -The append method supports three representations of a bytes array: the native java `byte[]`, java nio `ByteBuffer` and netty `ByteBuf`. -It is recommended to use `ByteBuf` as it is more gc friendly. - -### Open ledgers - -You can open ledgers to read entries. Opening ledgers is done by [`openBuilder`](../javadoc/org/apache/bookkeeper/client/api/openBuilder). You must specify the ledgerId and the password -in order to open the ledgers. - -here's an example: - -```java -BookKeeper bk = ...; - -long ledgerId = ...; -byte[] password = "some-password".getBytes(); - -ReadHandle rh = bk.newOpenLedgerOp() - .withLedgerId(ledgerId) - .withPassword(password) - .execute() // execute the open op - .get(); // wait for the execution to complete -``` - -A [`ReadHandle`](../javadoc/org/apache/bookkeeper/client/api/ReadHandle) is returned for applications to read entries to and from the ledger. - -#### Recovery vs NoRecovery - -By default, the [`openBuilder`](../javadoc/org/apache/bookkeeper/client/api/openBuilder) opens the ledger in a `NoRecovery` mode. You can open the ledger in `Recovery` mode by specifying -`withRecovery(true)` in the open builder. - -```java -BookKeeper bk = ...; - -long ledgerId = ...; -byte[] password = "some-password".getBytes(); - -ReadHandle rh = bk.newOpenLedgerOp() - .withLedgerId(ledgerId) - .withPassword(password) - .withRecovery(true) - .execute() - .get(); - -``` - -**What is the difference between "Recovery" and "NoRecovery"?** - -If you are opening a ledger in "Recovery" mode, it will basically fence and seal the ledger -- no more entries are allowed -to be appended to it. The writer which is currently appending entries to the ledger will fail with [`LedgerFencedException`](../javadoc/org/apache/bookkeeper/client/api/BKException.Code#LedgerFencedException). - -In constrat, opening a ledger in "NoRecovery" mode, it will not fence and seal the ledger. "NoRecovery" mode is usually used by applications to tailing-read from a ledger. - -### Read entries from ledgers - -The [`ReadHandle`](../javadoc/org/apache/bookkeeper/client/api/ReadHandle) returned from the open builder can be used for applications to read entries from the ledgers. - -```java -ReadHandle rh = ...; - -long startEntryId = ...; -long endEntryId = ...; -CompletableFuture readFuture = rh.read(startEntryId, endEntryId); - -// option 1: you can wait for read to complete synchronously -try { - LedgerEntries entries = FutureUtils.result(readFuture.get()); -} catch (BKException bke) { - // error handling -} - -// option 2: you can process the result and exception asynchronously -readFuture - .thenApply(entries -> { - // process the result - }) - .exceptionally(cause -> { - // handle the exception - }) - -// option 3: bookkeeper provides a twitter-future-like event listener for processing result and exception asynchronously -readFuture.whenComplete(new FutureEventListener<>() { - @Override - public void onSuccess(LedgerEntries entries) { - // process the result - } - @Override - public void onFailure(Throwable cause) { - // handle the exception - } -}); -``` - -Once you are done with processing the [`LedgerEntries`](../javadoc/org/apache/bookkeeper/client/api/LedgerEntries), you can call `#close()` on the `LedgerEntries` instance to -release the buffers held by it. - -Applications are allowed to read any entries between `0` and [`LastAddConfirmed`](../javadoc/org/apache/bookkeeper/client/api/ReadHandle.html#getLastAddConfirmed). If the applications -attempts to read entries beyond `LastAddConfirmed`, they will receive [`IncorrectParameterException`](../javadoc/org/apache/bookkeeper/client/api/BKException.Code#IncorrectParameterException). - -### Read unconfirmed entries from ledgers - -`readUnconfirmed` is provided the mechanism for applications to read entries beyond `LastAddConfirmed`. Applications should be aware of `readUnconfirmed` doesn't provide any -repeatable read consistency. - -```java -CompletableFuture readFuture = rh.readUnconfirmed(startEntryId, endEntryId); -``` - -### Tailing Reads - -There are two methods for applications to achieve tailing reads: `Polling` and `Long-Polling`. - -#### Polling - -You can do this in synchronous way: - -```java -ReadHandle rh = ...; - -long startEntryId = 0L; -long nextEntryId = startEntryId; -int numEntriesPerBatch = 4; -while (!rh.isClosed() || nextEntryId <= rh.getLastAddConfirmed()) { - long lac = rh.getLastAddConfirmed(); - if (nextEntryId > lac) { - // no more entries are added - Thread.sleep(1000); - - lac = rh.readLastAddConfirmed().get(); - continue; - } - - long endEntryId = Math.min(lac, nextEntryId + numEntriesPerBatch - 1); - LedgerEntries entries = rh.read(nextEntryId, endEntryId).get(); - - // process the entries - - nextEntryId = endEntryId + 1; -} -``` - -#### Long Polling - -```java -ReadHandle rh = ...; - -long startEntryId = 0L; -long nextEntryId = startEntryId; -int numEntriesPerBatch = 4; -while (!rh.isClosed() || nextEntryId <= rh.getLastAddConfirmed()) { - long lac = rh.getLastAddConfirmed(); - if (nextEntryId > lac) { - // no more entries are added - try (LastConfirmedAndEntry lacAndEntry = rh.readLastAddConfirmedAndEntry(nextEntryId, 1000, false).get()) { - if (lacAndEntry.hasEntry()) { - // process the entry - - ++nextEntryId; - } - } - } else { - long endEntryId = Math.min(lac, nextEntryId + numEntriesPerBatch - 1); - LedgerEntries entries = rh.read(nextEntryId, endEntryId).get(); - - // process the entries - nextEntryId = endEntryId + 1; - } -} -``` - -### Delete ledgers - -{% pop Ledgers %} can be deleted by using [`DeleteBuilder`](../javadoc/org/apache/bookkeeper/client/api/DeleteBuilder). - -```java -BookKeeper bk = ...; -long ledgerId = ...; - -bk.newDeleteLedgerOp() - .withLedgerId(ledgerId) - .execute() - .get(); -``` diff --git a/site/docs/4.7.0/api/overview.md b/site/docs/4.7.0/api/overview.md deleted file mode 100644 index 3e0adcd61af..00000000000 --- a/site/docs/4.7.0/api/overview.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: BookKeeper API ---- - -BookKeeper offers a few APIs that applications can use to interact with it: - -* The [ledger API](../ledger-api) is a lower-level API that enables you to interact with {% pop ledgers %} directly -* The [Ledger Advanced API](../ledger-adv-api) is an advanced extension to [Ledger API](../ledger-api) to provide more flexibilities to applications. -* The [DistributedLog API](../distributedlog-api) is a higher-level API that provides convenient abstractions. - -## Trade-offs - -The `Ledger API` provides direct access to ledgers and thus enables you to use BookKeeper however you'd like. - -However, in most of use cases, if you want a `log stream`-like abstraction, it requires you to manage things like tracking list of ledgers, -managing rolling ledgers and data retention on your own. In such cases, you are recommended to use [DistributedLog API](../distributedlog-api), -with semantics resembling continous log streams from the standpoint of applications. diff --git a/site/docs/4.7.0/deployment/dcos.md b/site/docs/4.7.0/deployment/dcos.md deleted file mode 100644 index b7dd65946a2..00000000000 --- a/site/docs/4.7.0/deployment/dcos.md +++ /dev/null @@ -1,142 +0,0 @@ ---- -title: Deploying BookKeeper on DC/OS -subtitle: Get up and running easily on an Apache Mesos cluster -logo: img/dcos-logo.png ---- - -[DC/OS](https://dcos.io/) (the DataCenter Operating System) is a distributed operating system used for deploying and managing applications and systems on [Apache Mesos](http://mesos.apache.org/). DC/OS is an open-source tool created and maintained by [Mesosphere](https://mesosphere.com/). - -BookKeeper is available as a [DC/OS package](http://universe.dcos.io/#/package/bookkeeper/version/latest) from the [Mesosphere DC/OS Universe](http://universe.dcos.io/#/packages). - -## Prerequisites - -In order to run BookKeeper on DC/OS, you will need: - -* DC/OS version [1.8](https://dcos.io/docs/1.8/) or higher -* A DC/OS cluster with at least three nodes -* The [DC/OS CLI tool](https://dcos.io/docs/1.8/usage/cli/install/) installed - -Each node in your DC/OS-managed Mesos cluster must have at least: - -* 1 CPU -* 1 GB of memory -* 10 GB of total persistent disk storage - -## Installing BookKeeper - -```shell -$ dcos package install bookkeeper --yes -``` - -This command will: - -* Install the `bookkeeper` subcommand for the `dcos` CLI tool -* Start a single {% pop bookie %} on the Mesos cluster with the [default configuration](../../reference/config) - -The bookie that is automatically started up uses the host mode of the network and by default exports the service at `agent_ip:3181`. - -> If you run `dcos package install bookkeeper` without setting the `--yes` flag, the install will run in interactive mode. For more information on the `package install` command, see the [DC/OS docs](https://docs.mesosphere.com/latest/cli/command-reference/dcos-package/dcos-package-install/). - -### Services - -To watch BookKeeper start up, click on the **Services** tab in the DC/OS [user interface](https://docs.mesosphere.com/latest/gui/) and you should see the `bookkeeper` package listed: - -![DC/OS services]({{ site.baseurl }}img/dcos/services.png) - -### Tasks - -To see which tasks have started, click on the `bookkeeper` service and you'll see an interface that looks like this; - -![DC/OS tasks]({{ site.baseurl }}img/dcos/tasks.png) - -## Scaling BookKeeper - -Once the first {% pop bookie %} has started up, you can click on the **Scale** tab to scale up your BookKeeper ensemble by adding more bookies (or scale down the ensemble by removing bookies). - -![DC/OS scale]({{ site.baseurl }}img/dcos/scale.png) - -## ZooKeeper Exhibitor - -ZooKeeper contains the information for all bookies in the ensemble. When deployed on DC/OS, BookKeeper uses a ZooKeeper instance provided by DC/OS. You can access a visual UI for ZooKeeper using [Exhibitor](https://github.com/soabase/exhibitor/wiki), which is available at [http://master.dcos/exhibitor](http://master.dcos/exhibitor). - -![ZooKeeper Exhibitor]({{ site.baseurl }}img/dcos/exhibitor.png) - -You should see a listing of IP/host information for all bookies under the `messaging/bookkeeper/ledgers/available` node. - -## Client connections - -To connect to bookies running on DC/OS using clients running within your Mesos cluster, you need to specify the ZooKeeper connection string for DC/OS's ZooKeeper cluster: - -``` -master.mesos:2181 -``` - -This is the *only* ZooKeeper host/port you need to include in your connection string. Here's an example using the [Java client](../../api/ledger-api#the-java-ledger-api-client): - -```java -BookKeeper bkClient = new BookKeeper("master.mesos:2181"); -``` - -If you're connecting using a client running outside your Mesos cluster, you need to supply the public-facing connection string for your DC/OS ZooKeeper cluster. - -## Configuring BookKeeper - -By default, the `bookkeeper` package will start up a BookKeeper ensemble consisting of one {% pop bookie %} with one CPU, 1 GB of memory, and a 70 MB persistent volume. - -You can supply a non-default configuration when installing the package using a JSON file. Here's an example command: - -```shell -$ dcos package install bookkeeper \ - --options=/path/to/config.json -``` - -You can then fetch the current configuration for BookKeeper at any time using the `package describe` command: - -```shell -$ dcos package describe bookkeeper \ - --config -``` - -### Available parameters - -> Not all [configurable parameters](../../reference/config) for BookKeeper are available for BookKeeper on DC/OS. Only the parameters show in the table below are available. - -Param | Type | Description | Default -:-----|:-----|:------------|:------- -`name` | String | The name of the DC/OS service. | `bookkeeper` -`cpus` | Integer | The number of CPU shares to allocate to each {% pop bookie %}. The minimum is 1. | `1` | -`instances` | Integer | The number of {% pop bookies %} top run. The minimum is 1. | `1` -`mem` | Number | The memory, in MB, to allocate to each BookKeeper task | `1024.0` (1 GB) -`volume_size` | Number | The persistent volume size, in MB | `70` -`zk_client` | String | The connection string for the ZooKeeper client instance | `master.mesos:2181` -`service_port` | Integer | The BookKeeper export service port, using `PORT0` in Marathon | `3181` - -### Example JSON configuration - -Here's an example JSON configuration object for BookKeeper on DC/OS: - -```json -{ - "instances": 5, - "cpus": 3, - "mem": 2048.0, - "volume_size": 250 -} -``` - -If that configuration were stored in a file called `bk-config.json`, you could apply that configuration upon installating the BookKeeper package using this command: - -```shell -$ dcos package install bookkeeper \ - --options=./bk-config.json -``` - -## Uninstalling BookKeeper - -You can shut down and uninstall the `bookkeeper` from DC/OS at any time using the `package uninstall` command: - -```shell -$ dcos package uninstall bookkeeper -Uninstalled package [bookkeeper] version [4.7.0] -Thank you for using bookkeeper. -``` diff --git a/site/docs/4.7.0/deployment/kubernetes.md b/site/docs/4.7.0/deployment/kubernetes.md deleted file mode 100644 index 0f113169edc..00000000000 --- a/site/docs/4.7.0/deployment/kubernetes.md +++ /dev/null @@ -1,181 +0,0 @@ ---- -title: Deploying Apache BookKeeper on Kubernetes -tags: [Kubernetes, Google Container Engine] -logo: img/kubernetes-logo.png ---- - -Apache BookKeeper can be easily deployed in [Kubernetes](https://kubernetes.io/) clusters. The managed clusters on [Google Container Engine](https://cloud.google.com/compute/) is the most convenient way. - -The deployment method shown in this guide relies on [YAML](http://yaml.org/) definitions for Kubernetes [resources](https://kubernetes.io/docs/resources-reference/v1.6/). The [`kubernetes`](https://github.com/apache/bookkeeper/tree/master/deploy/kubernetes) subdirectory holds resource definitions for: - -* A three-node ZooKeeper cluster -* A BookKeeper cluster with a bookie runs on each node. - -## Setup on Google Container Engine - -To get started, get source code of [`kubernetes`](https://github.com/apache/bookkeeper/tree/master/deploy/kubernetes) from github by git clone. - -If you'd like to change the number of bookies, or ZooKeeper nodes in your BookKeeper cluster, modify the `replicas` parameter in the `spec` section of the appropriate [`Deployment`](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/) or [`StatefulSet`](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/) resource. - -[Google Container Engine](https://cloud.google.com/container-engine) (GKE) automates the creation and management of Kubernetes clusters in [Google Compute Engine](https://cloud.google.com/compute/) (GCE). - -### Prerequisites - -To get started, you'll need: - -* A Google Cloud Platform account, which you can sign up for at [cloud.google.com](https://cloud.google.com) -* An existing Cloud Platform project -* The [Google Cloud SDK](https://cloud.google.com/sdk/downloads) (in particular the [`gcloud`](https://cloud.google.com/sdk/gcloud/) and [`kubectl`]() tools). - -### Create a new Kubernetes cluster - -You can create a new GKE cluster using the [`container clusters create`](https://cloud.google.com/sdk/gcloud/reference/container/clusters/create) command for `gcloud`. This command enables you to specify the number of nodes in the cluster, the machine types of those nodes, and more. - -As an example, we'll create a new GKE cluster for Kubernetes version [1.6.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG.md#v164) in the [us-central1-a](https://cloud.google.com/compute/docs/regions-zones/regions-zones#available) zone. The cluster will be named `bookkeeper-gke-cluster` and will consist of three VMs, each using two locally attached SSDs and running on [n1-standard-8](https://cloud.google.com/compute/docs/machine-types) machines. These SSDs will be used by Bookie instances, one for the BookKeeper journal and the other for storing the actual data. - -```bash -$ gcloud config set compute/zone us-central1-a -$ gcloud config set project your-project-name -$ gcloud container clusters create bookkeeper-gke-cluster \ - --machine-type=n1-standard-8 \ - --num-nodes=3 \ - --local-ssd-count=2 \ - --enable-kubernetes-alpha -``` - -By default, bookies will run on all the machines that have locally attached SSD disks. In this example, all of those machines will have two SSDs, but you can add different types of machines to the cluster later. You can control which machines host bookie servers using [labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels). - -### Dashboard - -You can observe your cluster in the [Kubernetes Dashboard](https://kubernetes.io/docs/tasks/access-application-cluster/web-ui-dashboard/) by downloading the credentials for your Kubernetes cluster and opening up a proxy to the cluster: - -```bash -$ gcloud container clusters get-credentials bookkeeper-gke-cluster \ - --zone=us-central1-a \ - --project=your-project-name -$ kubectl proxy -``` - -By default, the proxy will be opened on port 8001. Now you can navigate to [localhost:8001/ui](http://localhost:8001/ui) in your browser to access the dashboard. At first your GKE cluster will be empty, but that will change as you begin deploying. - -When you create a cluster, your `kubectl` config in `~/.kube/config` (on MacOS and Linux) will be updated for you, so you probably won't need to change your configuration. Nonetheless, you can ensure that `kubectl` can interact with your cluster by listing the nodes in the cluster: - -```bash -$ kubectl get nodes -``` - -If `kubectl` is working with your cluster, you can proceed to deploy ZooKeeper and Bookies. - -### ZooKeeper - -You *must* deploy ZooKeeper as the first component, as it is a dependency for the others. - -```bash -$ kubectl apply -f zookeeper.yaml -``` - -Wait until all three ZooKeeper server pods are up and have the status `Running`. You can check on the status of the ZooKeeper pods at any time: - -```bash -$ kubectl get pods -l component=zookeeper -NAME READY STATUS RESTARTS AGE -zk-0 1/1 Running 0 18m -zk-1 1/1 Running 0 17m -zk-2 0/1 Running 6 15m -``` - -This step may take several minutes, as Kubernetes needs to download the Docker image on the VMs. - - -If you want to connect to one of the remote zookeeper server, you can use[zk-shell](https://github.com/rgs1/zk_shell), you need to forward a local port to the -remote zookeeper server: - -```bash -$ kubectl port-forward zk-0 2181:2181 -$ zk-shell localhost 2181 -``` - -### Deploy Bookies - -Once ZooKeeper cluster is Running, you can then deploy the bookies. You can deploy the bookies either using a [DaemonSet](https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/) or a [StatefulSet](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/). - -> NOTE: _DaemonSet_ vs _StatefulSet_ -> -> A _DaemonSet_ ensures that all (or some) nodes run a pod of bookie instance. As nodes are added to the cluster, bookie pods are added automatically to them. As nodes are removed from the -> cluster, those bookie pods are garbage collected. The bookies deployed in a DaemonSet stores data on the local disks on those nodes. So it doesn't require any external storage for Persistent -> Volumes. -> -> A _StatefulSet_ maintains a sticky identity for the pods that it runs and manages. It provides stable and unique network identifiers, and stable and persistent storage for each pod. The pods -> are not interchangeable, the idenifiers for each pod are maintained across any rescheduling. -> -> Which one to use? A _DaemonSet_ is the easiest way to deploy a bookkeeper cluster, because it doesn't require additional persistent volume provisioner and use local disks. BookKeeper manages -> the data replication. It maintains the best latency property. However, it uses `hostIP` and `hostPort` for communications between pods. In some k8s platform (such as DC/OS), `hostIP` and -> `hostPort` are not well supported. A _StatefulSet_ is only practical when deploying in a cloud environment or any K8S installation that has persistent volumes available. Also be aware, latency -> can be potentially higher when using persistent volumes, because there is usually built-in replication in the persistent volumes. - -```bash -# deploy bookies in a daemon set -$ kubectl apply -f bookkeeper.yaml - -# deploy bookies in a stateful set -$ kubectl apply -f bookkeeper.stateful.yaml -``` - -You can check on the status of the Bookie pods for these components either in the Kubernetes Dashboard or using `kubectl`: - -```bash -$ kubectl get pods -``` - -While all BookKeeper pods is Running, by zk-shell you could find all available bookies under /ledgers/ - -You could also run a [bookkeeper tutorial](https://github.com/ivankelly/bookkeeper-tutorial/) instance, which named as 'dice' here, in this bookkeeper cluster. - -```bash -$kubectl run -i --tty --attach dice --image=caiok/bookkeeper-tutorial --env ZOOKEEPER_SERVERS="zk-0.zookeeper" -``` - -An example output of Dice instance is like this: -```aidl -➜ $ kubectl run -i --tty --attach dice --image=caiok/bookkeeper-tutorial --env ZOOKEEPER_SERVERS="zk-0.zookeeper" -If you don't see a command prompt, try pressing enter. -Value = 1, epoch = 5, leading -Value = 2, epoch = 5, leading -Value = 1, epoch = 5, leading -Value = 4, epoch = 5, leading -Value = 5, epoch = 5, leading -Value = 4, epoch = 5, leading -Value = 3, epoch = 5, leading -Value = 5, epoch = 5, leading -Value = 3, epoch = 5, leading -Value = 2, epoch = 5, leading -Value = 1, epoch = 5, leading -Value = 4, epoch = 5, leading -Value = 2, epoch = 5, leading -``` - -### Un-Deploy - -Delete Demo dice instance - -```bash -$kubectl delete deployment dice -``` - -Delete BookKeeper -```bash -$ kubectl delete -f bookkeeper.yaml -``` - -Delete ZooKeeper -```bash -$ kubectl delete -f zookeeper.yaml -``` - -Delete cluster -```bash -$ gcloud container clusters delete bookkeeper-gke-cluster -``` - - - diff --git a/site/docs/4.7.0/deployment/manual.md b/site/docs/4.7.0/deployment/manual.md deleted file mode 100644 index daafd5556f5..00000000000 --- a/site/docs/4.7.0/deployment/manual.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -title: Manual deployment ---- - -The easiest way to deploy BookKeeper is using schedulers like [DC/OS](../dcos), but you can also deploy BookKeeper clusters manually. A BookKeeper cluster consists of two main components: - -* A [ZooKeeper](#zookeeper-setup) cluster that is used for configuration- and coordination-related tasks -* An [ensemble](#starting-up-bookies) of {% pop bookies %} - -## ZooKeeper setup - -We won't provide a full guide to setting up a ZooKeeper cluster here. We recommend that you consult [this guide](https://zookeeper.apache.org/doc/current/zookeeperAdmin.html) in the official ZooKeeper documentation. - -## Starting up bookies - -Once your ZooKeeper cluster is up and running, you can start up as many {% pop bookies %} as you'd like to form a cluster. Before starting up each bookie, you need to modify the bookie's configuration to make sure that it points to the right ZooKeeper cluster. - -On each bookie host, you need to [download](../../getting-started/installation#download) the BookKeeper package as a tarball. Once you've done that, you need to configure the bookie by setting values in the `bookkeeper-server/conf/bk_server.conf` config file. The one parameter that you will absolutely need to change is the [`zkServers`](../../config#zkServers) parameter, which you will need to set to the ZooKeeper connection string for your ZooKeeper cluster. Here's an example: - -```properties -zkServers=100.0.0.1:2181,100.0.0.2:2181,100.0.0.3:2181 -``` - -> A full listing of configurable parameters available in `bookkeeper-server/conf/bk_server.conf` can be found in the [Configuration](../../reference/config) reference manual. - -Once the bookie's configuration is set, you can start it up using the [`bookie`](../../reference/cli#bookkeeper-bookie) command of the [`bookkeeper`](../../reference/cli#bookkeeper) CLI tool: - -```shell -$ bookkeeper-server/bin/bookkeeper bookie -``` - -> You can also build BookKeeper [by cloning it from source](../../getting-started/installation#clone) or [using Maven](../../getting-started/installation#build-using-maven). - -### System requirements - -{% include system-requirements.md %} - -## Cluster metadata setup - -Once you've started up a cluster of bookies, you need to set up cluster metadata for the cluster by running the following command from any bookie in the cluster: - -```shell -$ bookkeeper-server/bin/bookkeeper shell metaformat -``` - -You can run in the formatting - -> The `metaformat` command performs all the necessary ZooKeeper cluster metadata tasks and thus only needs to be run *once* and from *any* bookie in the BookKeeper cluster. - -Once cluster metadata formatting has been completed, your BookKeeper cluster is ready to go! - - diff --git a/site/docs/4.7.0/development/codebase.md b/site/docs/4.7.0/development/codebase.md deleted file mode 100644 index 9a83073ea4c..00000000000 --- a/site/docs/4.7.0/development/codebase.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: The BookKeeper codebase ---- diff --git a/site/docs/4.7.0/development/protocol.md b/site/docs/4.7.0/development/protocol.md deleted file mode 100644 index 6d17aa0ed45..00000000000 --- a/site/docs/4.7.0/development/protocol.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: The BookKeeper protocol ---- - -BookKeeper uses a special replication protocol for guaranteeing persistent storage of entries in an ensemble of bookies. - -> This document assumes that you have some knowledge of leader election and log replication and how these can be used in a distributed system. If not, we recommend reading the [example application](../../api/ledger-api#example-application) documentation first. - -## Ledgers - -{% pop Ledgers %} are the basic building block of BookKeeper and the level at which BookKeeper makes its persistent storage guarantees. A replicated log consists of an ordered list of ledgers. See [Ledgers to logs](#ledgers-to-logs) for info on building a replicated log from ledgers. - -Ledgers are composed of metadata and {% pop entries %}. The metadata is stored in ZooKeeper, which provides a *compare-and-swap* (CAS) operation. Entries are stored on storage nodes known as {% pop bookies %}. - -A ledger has a single writer and multiple readers (SWMR). - -### Ledger metadata - -A ledger's metadata contains the following: - -Parameter | Name | Meaning -:---------|:-----|:------- -Identifer | | A 64-bit integer, unique within the system -Ensemble size | **E** | The number of nodes the ledger is stored on -Write quorum size | **Qw** | The number of nodes each entry is written to. In effect, the max replication for the entry. -Ack quorum size | **Qa** | The number of nodes an entry must be acknowledged on. In effect, the minimum replication for the entry. -Current state | | The current status of the ledger. One of `OPEN`, `CLOSED`, or `IN_RECOVERY`. -Last entry | | The last entry in the ledger or `NULL` is the current state is not `CLOSED`. - -In addition, each ledger's metadata consists of one or more *fragments*. Each fragment is either - -* the first entry of a fragment or -* a list of bookies for the fragment. - -When creating a ledger, the following invariant must hold: - -**E >= Qw >= Qa** - -Thus, the ensemble size (**E**) must be larger than the write quorum size (**Qw**), which must in turn be larger than the ack quorum size (**Qa**). If that condition does not hold, then the ledger creation operation will fail. - -### Ensembles - -When a ledger is created, **E** bookies are chosen for the entries of that ledger. The bookies are the initial ensemble of the ledger. A ledger can have multiple ensembles, but an entry has only one ensemble. Changes in the ensemble involve a new fragment being added to the ledger. - -Take the following example. In this ledger, with ensemble size of 3, there are two fragments and thus two ensembles, one starting at entry 0, the second at entry 12. The second ensemble differs from the first only by its first element. This could be because bookie1 has failed and therefore had to be replaced. - -First entry | Bookies -:-----------|:------- -0 | B1, B2, B3 -12 | B4, B2, B3 - -### Write quorums - -Each entry in the log is written to **Qw** nodes. This is considered the write quorum for that entry. The write quorum is the subsequence of the ensemble, **Qw** in length, and starting at the bookie at index (entryid % **E**). - -For example, in a ledger of **E** = 4, **Qw**, and **Qa** = 2, with an ensemble consisting of B1, B2, B3, and B4, the write quorums for the first 6 entries will be: - -Entry | Write quorum -:-----|:------------ -0 | B1, B2, B3 -1 | B2, B3, B4 -2 | B3, B4, B1 -3 | B4, B1, B2 -4 | B1, B2, B3 -5 | B2, B3, B4 - -There are only **E** distinct write quorums in any ensemble. If **Qw** = **Qa**, then there is only one, as no striping occurs. - -### Ack quorums - -The ack quorum for an entry is any subset of the write quorum of size **Qa**. If **Qa** bookies acknowledge an entry, it means it has been fully replicated. - -### Guarantees - -The system can tolerate **Qa** – 1 failures without data loss. - -Bookkeeper guarantees that: - -1. All updates to a ledger will be read in the same order as they were written. -2. All clients will read the same sequence of updates from the ledger. - -## Writing to ledgers - -writer, ensuring that entry ids are sequential is trivial. A bookie acknowledges a write once it has been persisted to disk and is therefore durable. Once **Qa** bookies from the write quorum acknowledge the write, the write is acknowledged to the client, but only if all entries with lower entry ids in the ledger have already been acknowledged to the client. - -The entry written contains the ledger id, the entry id, the last add confirmed and the payload. The last add confirmed is the last entry which had been acknowledged to the client when this entry was written. Sending this with the entry speeds up recovery of the ledger in the case that the writer crashes. - -Another client can also read entries in the ledger up as far as the last add confirmed, as we guarantee that all entries thus far have been replicated on Qa nodes, and therefore all future readers will be able to also read it. However, to read like this, the ledger should be opened with a non-fencing open. Otherwise, it would kill the writer. - -If a node fails to acknowledge a write, the writer will create a new ensemble by replacing the failed node in the current ensemble. It creates a new fragment with this ensemble, starting from the first message that has not been acknowledged to the client. Creating the new fragment involves making a CAS write to the metadata. If the CAS write fails, someone else has modified something in the ledger metadata. This concurrent modification could have been caused by recovery or {% pop rereplication %}. We reread the metadata. If the state of the ledger is no longer `OPEN`, we send an error to the client for any outstanding writes. Otherwise, we try to replace the failed node again. - -### Closing a ledger as a writer - -Closing a ledger is straightforward for a writer. The writer makes a CAS write to the metadata, changing the state to `CLOSED` and setting the last entry of the ledger to the last entry which we have acknowledged to the client. - -If the CAS write fails, it means someone else has modified the metadata. We reread the metadata, and retry closing as long as the state of the ledger is still `OPEN`. If the state is `IN_RECOVERY` we send an error to the client. If the state is `CLOSED` and the last entry is the same as the last entry we have acknowledged to the client, we complete the close operation successfully. If the last entry is different from what we have acknowledged to the client, we send an error to the client. - -### Closing a ledger as a reader - -A reader can also force a ledger to close. Forcing the ledger to close will prevent any writer from adding new entries to the ledger. This is called {% pop fencing %}. This can occur when a writer has crashed or become unavailable, and a new writer wants to take over writing to the log. The new writer must ensure that it has seen all updates from the previous writer, and prevent the previous writer from making any new updates before making any updates of its own. - -To recover a ledger, we first update the state in the metadata to IN_RECOVERY. We then send a fence message to all the bookies in the last fragment of the ledger. When a bookie receives a fence message for a ledger, the fenced state of the ledger is persisted to disk. Once we receive a response from at least (**Qw** - **Qa**)+1 bookies from each write quorum in the ensemble, the ledger is fenced. - -By ensuring we have received a response from at last (**Qw** - **Qa**) + 1 bookies in each write quorum, we ensure that, if the old writer is alive and tries to add a new entry there will be no write quorum in which Qa bookies will accept the write. If the old writer tries to update the ensemble, it will fail on the CAS metadata write, and then see that the ledger is in IN_RECOVERY state, and that it therefore shouldn’t try to write to it. - -The old writer will be able to write entries to individual bookies (we can’t guarantee that the fence message reaches all bookies), but as it will not be able reach ack quorum, it will not be able to send a success response to its client. The client will get a LedgerFenced error instead. - -It is important to note that when you get a ledger fenced message for an entry, it doesn’t mean that the entry has not been written. It means that the entry may or may not have been written, and this can only be determined after the ledger is recovered. In effect, LedgerFenced should be treated like a timeout. - -Once the ledger is fenced, recovery can begin. Recovery means finding the last entry of the ledger and closing the ledger. To find the last entry of the ledger, the client asks all bookies for the highest last add confirmed value they have seen. It waits until it has received a response at least (**Qw** - **Qa**) + 1 bookies from each write quorum, and takes the highest response as the entry id to start reading forward from. It then starts reading forward in the ledger, one entry at a time, replicating all entries it sees to the entire write quorum for that entry. Once it can no longer read any more entries, it updates the state in the metadata to `CLOSED`, and sets the last entry of the ledger to the last entry it wrote. Multiple readers can try to recovery a ledger at the same time, but as the metadata write is CAS they will all converge on the same last entry of the ledger. - -## Ledgers to logs - -In BookKeeper, {% pop ledgers %} can be used to build a replicated log for your system. All guarantees provided by BookKeeper are at the ledger level. Guarantees on the whole log can be built using the ledger guarantees and any consistent datastore with a compare-and-swap (CAS) primitive. BookKeeper uses ZooKeeper as the datastore but others could theoretically be used. - -A log in BookKeeper is built from some number of ledgers, with a fixed order. A ledger represents a single segment of the log. A ledger could be the whole period that one node was the leader, or there could be multiple ledgers for a single period of leadership. However, there can only ever be one leader that adds entries to a single ledger. Ledgers cannot be reopened for writing once they have been closed/recovered. - -> BookKeeper does *not* provide leader election. You must use a system like ZooKeeper for this. - -In many cases, leader election is really leader suggestion. Multiple nodes could think that they are leader at any one time. It is the job of the log to guarantee that only one can write changes to the system. - -### Opening a log - -Once a node thinks it is leader for a particular log, it must take the following steps: - -1. Read the list of ledgers for the log -1. {% pop Fence %} the last two ledgers in the list. Two ledgers are fenced because because the writer may be writing to the second-to-last ledger while adding the last ledger to the list. -1. Create a new ledger -1. Add the new ledger to the ledger list -1. Write the new ledger back to the datastore using a CAS operation - -The fencing in step 2 and the CAS operation in step 5 prevent two nodes from thinking that they have leadership at any one time. - -The CAS operation will fail if the list of ledgers has changed between reading it and writing back the new list. When the CAS operation fails, the leader must start at step 1 again. Even better, they should check that they are in fact still the leader with the system that is providing leader election. The protocol will work correctly without this step, though it will be able to make very little progress if two nodes think they are leader and are duelling for the log. - -The node must not serve any writes until step 5 completes successfully. - -### Rolling ledgers - -The leader may wish to close the current ledger and open a new one every so often. Ledgers can only be deleted as a whole. If you don't roll the log, you won't be able to clean up old entries in the log without a leader change. By closing the current ledger and adding a new one, the leader allows the log to be truncated whenever that data is no longer needed. The steps for rolling the log is similar to those for creating a new ledger. - -1. Create a new ledger -1. Add the new ledger to the ledger list -1. Write the new ledger list to the datastore using CAS -1. Close the previous ledger - -By deferring the closing of the previous ledger until step 4, we can continue writing to the log while we perform metadata update operations to add the new ledger. This is safe as long as you fence the last 2 ledgers when acquiring leadership. - diff --git a/site/docs/4.7.0/getting-started/concepts.md b/site/docs/4.7.0/getting-started/concepts.md deleted file mode 100644 index 7a3c92847b2..00000000000 --- a/site/docs/4.7.0/getting-started/concepts.md +++ /dev/null @@ -1,202 +0,0 @@ ---- -title: BookKeeper concepts and architecture -subtitle: The core components and how they work -prev: ../run-locally ---- - -BookKeeper is a service that provides persistent storage of streams of log [entries](#entries)---aka *records*---in sequences called [ledgers](#ledgers). BookKeeper replicates stored entries across multiple servers. - -## Basic terms - -In BookKeeper: - -* each unit of a log is an [*entry*](#entries) (aka record) -* streams of log entries are called [*ledgers*](#ledgers) -* individual servers storing ledgers of entries are called [*bookies*](#bookies) - -BookKeeper is designed to be reliable and resilient to a wide variety of failures. Bookies can crash, corrupt data, or discard data, but as long as there are enough bookies behaving correctly in the ensemble the service as a whole will behave correctly. - -## Entries - -> **Entries** contain the actual data written to ledgers, along with some important metadata. - -BookKeeper entries are sequences of bytes that are written to [ledgers](#ledgers). Each entry has the following fields: - -Field | Java type | Description -:-----|:----------|:----------- -Ledger number | `long` | The ID of the ledger to which the entry has been written -Entry number | `long` | The unique ID of the entry -Last confirmed (LC) | `long` | The ID of the last recorded entry -Data | `byte[]` | The entry's data (written by the client application) -Authentication code | `byte[]` | The message auth code, which includes *all* other fields in the entry - -## Ledgers - -> **Ledgers** are the basic unit of storage in BookKeeper. - -Ledgers are sequences of entries, while each entry is a sequence of bytes. Entries are written to a ledger: - -* sequentially, and -* at most once. - -This means that ledgers have *append-only* semantics. Entries cannot be modified once they've been written to a ledger. Determining the proper write order is the responsbility of [client applications](#clients). - -## Clients and APIs - -> BookKeeper clients have two main roles: they create and delete ledgers, and they read entries from and write entries to ledgers. -> -> BookKeeper provides both a lower-level and a higher-level API for ledger interaction. - -There are currently two APIs that can be used for interacting with BookKeeper: - -* The [ledger API](../../api/ledger-api) is a lower-level API that enables you to interact with {% pop ledgers %} directly. -* The [DistributedLog API](../../api/distributedlog-api) is a higher-level API that enables you to use BookKeeper without directly interacting with ledgers. - -In general, you should choose the API based on how much granular control you need over ledger semantics. The two APIs can also both be used within a single application. - -## Bookies - -> **Bookies** are individual BookKeeper servers that handle ledgers (more specifically, fragments of ledgers). Bookies function as part of an ensemble. - -A bookie is an individual BookKeeper storage server. Individual bookies store fragments of ledgers, not entire ledgers (for the sake of performance). For any given ledger **L**, an *ensemble* is the group of bookies storing the entries in **L**. - -Whenever entries are written to a ledger, those entries are {% pop striped %} across the ensemble (written to a sub-group of bookies rather than to all bookies). - -### Motivation - -> BookKeeper was initially inspired by the NameNode server in HDFS but its uses now extend far beyond this. - -The initial motivation for BookKeeper comes from the [Hadoop](http://hadoop.apache.org/) ecosystem. In the [Hadoop Distributed File System](https://wiki.apache.org/hadoop/HDFS) (HDFS), a special node called the [NameNode](https://wiki.apache.org/hadoop/NameNode) logs all operations in a reliable fashion, which ensures that recovery is possible in case of crashes. - -The NameNode, however, served only as initial inspiration for BookKeeper. The applications for BookKeeper extend far beyond this and include essentially any application that requires an append-based storage system. BookKeeper provides a number of advantages for such applications: - -* Highly efficient writes -* High fault tolerance via replication of messages within ensembles of bookies -* High throughput for write operations via {% pop striping %} (across as many bookies as you wish) - -## Metadata storage - -BookKeeper requires a metadata storage service to store information related to [ledgers](#ledgers) and available bookies. BookKeeper currently uses [ZooKeeper](https://zookeeper.apache.org) for this and other tasks. - -## Data management in bookies - -Bookies manage data in a [log-structured](https://en.wikipedia.org/wiki/Log-structured_file_system) way, which is implemented using three types of files: - -* [journals](#journals) -* [entry logs](#entry-logs) -* [index files](#index-files) - -### Journals - -A journal file contains BookKeeper transaction logs. Before any update to a ledger takes place, the bookie ensures that a transaction describing the update is written to non-volatile storage. A new journal file is created once the bookie starts or the older journal file reaches the journal file size threshold. - -### Entry logs - -An entry log file manages the written entries received from BookKeeper clients. Entries from different ledgers are aggregated and written sequentially, while their offsets are kept as pointers in a [ledger cache](#ledger-cache) for fast lookup. - -A new entry log file is created once the bookie starts or the older entry log file reaches the entry log size threshold. Old entry log files are removed by the Garbage Collector Thread once they are not associated with any active ledger. - -### Index files - -An index file is created for each ledger, which comprises a header and several fixed-length index pages that record the offsets of data stored in entry log files. - -Since updating index files would introduce random disk I/O index files are updated lazily by a sync thread running in the background. This ensures speedy performance for updates. Before index pages are persisted to disk, they are gathered in a ledger cache for lookup. - -### Ledger cache - -Ledger indexes pages are cached in a memory pool, which allows for more efficient management of disk head scheduling. - -### Adding entries - -When a client instructs a {% pop bookie %} to write an entry to a ledger, the entry will go through the following steps to be persisted on disk: - -1. The entry is appended to an [entry log](#entry-logs) -1. The index of the entry is updated in the [ledger cache](#ledger-cache) -1. A transaction corresponding to this entry update is appended to the [journal](#journals) -1. A response is sent to the BookKeeper client - -> For performance reasons, the entry log buffers entries in memory and commits them in batches, while the ledger cache holds index pages in memory and flushes them lazily. This process is described in more detail in the [Data flush](#data-flush) section below. - -### Data flush - -Ledger index pages are flushed to index files in the following two cases: - -* The ledger cache memory limit is reached. There is no more space available to hold newer index pages. Dirty index pages will be evicted from the ledger cache and persisted to index files. -* A background thread synchronous thread is responsible for flushing index pages from the ledger cache to index files periodically. - -Besides flushing index pages, the sync thread is responsible for rolling journal files in case that journal files use too much disk space. The data flush flow in the sync thread is as follows: - -* A `LastLogMark` is recorded in memory. The `LastLogMark` indicates that those entries before it have been persisted (to both index and entry log files) and contains two parts: - 1. A `txnLogId` (the file ID of a journal) - 1. A `txnLogPos` (offset in a journal) -* Dirty index pages are flushed from the ledger cache to the index file, and entry log files are flushed to ensure that all buffered entries in entry log files are persisted to disk. - - Ideally, a bookie only needs to flush index pages and entry log files that contain entries before `LastLogMark`. There is, however, no such information in the ledger and entry log mapping to journal files. Consequently, the thread flushes the ledger cache and entry log entirely here, and may flush entries after the `LastLogMark`. Flushing more is not a problem, though, just redundant. -* The `LastLogMark` is persisted to disk, which means that entries added before `LastLogMark` whose entry data and index page were also persisted to disk. It is now time to safely remove journal files created earlier than `txnLogId`. - -If the bookie has crashed before persisting `LastLogMark` to disk, it still has journal files containing entries for which index pages may not have been persisted. Consequently, when this bookie restarts, it inspects journal files to restore those entries and data isn't lost. - -Using the above data flush mechanism, it is safe for the sync thread to skip data flushing when the bookie shuts down. However, in the entry logger it uses a buffered channel to write entries in batches and there might be data buffered in the buffered channel upon a shut down. The bookie needs to ensure that the entry log flushes its buffered data during shutdown. Otherwise, entry log files become corrupted with partial entries. - -### Data compaction - -On bookies, entries of different ledgers are interleaved in entry log files. A bookie runs a garbage collector thread to delete un-associated entry log files to reclaim disk space. If a given entry log file contains entries from a ledger that has not been deleted, then the entry log file would never be removed and the occupied disk space never reclaimed. In order to avoid such a case, a bookie server compacts entry log files in a garbage collector thread to reclaim disk space. - -There are two kinds of compaction running with different frequency: minor compaction and major compaction. The differences between minor compaction and major compaction lies in their threshold value and compaction interval. - -* The garbage collection threshold is the size percentage of an entry log file occupied by those undeleted ledgers. The default minor compaction threshold is 0.2, while the major compaction threshold is 0.8. -* The garbage collection interval is how frequently to run the compaction. The default minor compaction interval is 1 hour, while the major compaction threshold is 1 day. - -> If either the threshold or interval is set to less than or equal to zero, compaction is disabled. - -The data compaction flow in the garbage collector thread is as follows: - -* The thread scans entry log files to get their entry log metadata, which records a list of ledgers comprising an entry log and their corresponding percentages. -* With the normal garbage collection flow, once the bookie determines that a ledger has been deleted, the ledger will be removed from the entry log metadata and the size of the entry log reduced. -* If the remaining size of an entry log file reaches a specified threshold, the entries of active ledgers in the entry log will be copied to a new entry log file. -* Once all valid entries have been copied, the old entry log file is deleted. - -## ZooKeeper metadata - -BookKeeper requires a ZooKeeper installation for storing [ledger](#ledger) metadata. Whenever you construct a [`BookKeeper`](../../api/javadoc/org/apache/bookkeeper/client/BookKeeper) client object, you need to pass a list of ZooKeeper servers as a parameter to the constructor, like this: - -```java -String zkConnectionString = "127.0.0.1:2181"; -BookKeeper bkClient = new BookKeeper(zkConnectionString); -``` - -> For more info on using the BookKeeper Java client, see [this guide](../../api/ledger-api#the-java-ledger-api-client). - -## Ledger manager - -A *ledger manager* handles ledgers' metadata (which is stored in ZooKeeper). BookKeeper offers two types of ledger managers: the [flat ledger manager](#flat-ledger-manager) and the [hierarchical ledger manager](#hierarchical-ledger-manager). Both ledger managers extend the [`AbstractZkLedgerManager`](../../api/javadoc/org/apache/bookkeeper/meta/AbstractZkLedgerManager) abstract class. - -> #### Use the flat ledger manager in most cases -> The flat ledger manager is the default and is recommended for nearly all use cases. The hierarchical ledger manager is better suited only for managing very large numbers of BookKeeper ledgers (> 50,000). - -### Flat ledger manager - -The *flat ledger manager*, implemented in the [`FlatLedgerManager`](../../api/javadoc/org/apache/bookkeeper/meta/FlatLedgerManager.html) class, stores all ledgers' metadata in child nodes of a single ZooKeeper path. The flat ledger manager creates [sequential nodes](https://zookeeper.apache.org/doc/trunk/zookeeperProgrammers.html#Sequence+Nodes+--+Unique+Naming) to ensure the uniqueness of the ledger ID and prefixes all nodes with `L`. Bookie servers manage their own active ledgers in a hash map so that it's easy to find which ledgers have been deleted from ZooKeeper and then garbage collect them. - -The flat ledger manager's garbage collection follow proceeds as follows: - -* All existing ledgers are fetched from ZooKeeper (`zkActiveLedgers`) -* All ledgers currently active within the bookie are fetched (`bkActiveLedgers`) -* The currently actively ledgers are looped through to determine which ledgers don't currently exist in ZooKeeper. Those are then garbage collected. -* The *hierarchical ledger manager* stores ledgers' metadata in two-level [znodes](https://zookeeper.apache.org/doc/current/zookeeperOver.html#Nodes+and+ephemeral+nodes). - -### Hierarchical ledger manager - -The *hierarchical ledger manager*, implemented in the [`HierarchicalLedgerManager`](../../api/javadoc/org/apache/bookkeeper/meta/HierarchicalLedgerManager) class, first obtains a global unique ID from ZooKeeper using an [`EPHEMERAL_SEQUENTIAL`](https://zookeeper.apache.org/doc/current/api/org/apache/zookeeper/CreateMode.html#EPHEMERAL_SEQUENTIAL) znode. Since ZooKeeper's sequence counter has a format of `%10d` (10 digits with 0 padding, for example `0000000001`), the hierarchical ledger manager splits the generated ID into 3 parts: - -```shell -{level1 (2 digits)}{level2 (4 digits)}{level3 (4 digits)} -``` - -These three parts are used to form the actual ledger node path to store ledger metadata: - -```shell -{ledgers_root_path}/{level1}/{level2}/L{level3} -``` - -For example, ledger 0000000001 is split into three parts, 00, 0000, and 00001, and stored in znode `/{ledgers_root_path}/00/0000/L0001`. Each znode could have as many 10,000 ledgers, which avoids the problem of the child list being larger than the maximum ZooKeeper packet size (which is the [limitation](https://issues.apache.org/jira/browse/BOOKKEEPER-39) that initially prompted the creation of the hierarchical ledger manager). diff --git a/site/docs/4.7.0/getting-started/installation.md b/site/docs/4.7.0/getting-started/installation.md deleted file mode 100644 index 9986cd8e043..00000000000 --- a/site/docs/4.7.0/getting-started/installation.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -title: BookKeeper installation -subtitle: Download or clone BookKeeper and build it locally -next: ../run-locally ---- - -{% capture download_url %}http://apache.claz.org/bookkeeper/bookkeeper-{{ site.latest_release }}/bookkeeper-{{ site.latest_release }}-src.tar.gz{% endcapture %} - -You can install BookKeeper either by [downloading](#download) a [GZipped](http://www.gzip.org/) tarball package or [cloning](#clone) the BookKeeper repository. - -## Requirements - -* [Unix environment](http://www.opengroup.org/unix) -* [Java Development Kit 1.6](http://www.oracle.com/technetwork/java/javase/downloads/index.html) or later -* [Maven 3.0](https://maven.apache.org/install.html) or later - -## Download - -You can download Apache BookKeeper releases from one of many [Apache mirrors](http://www.apache.org/dyn/closer.cgi/bookkeeper). Here's an example for the [apache.claz.org](http://apache.claz.org/bookkeeper) mirror: - -```shell -$ curl -O {{ download_url }} -$ tar xvf bookkeeper-{{ site.latest_release }}-src.tar.gz -$ cd bookkeeper-{{ site.latest_release }} -``` - -## Clone - -To build BookKeeper from source, clone the repository, either from the [GitHub mirror]({{ site.github_repo }}) or from the [Apache repository](http://git.apache.org/bookkeeper.git/): - -```shell -# From the GitHub mirror -$ git clone {{ site.github_repo}} - -# From Apache directly -$ git clone git://git.apache.org/bookkeeper.git/ -``` - -## Build using Maven - -Once you have the BookKeeper on your local machine, either by [downloading](#download) or [cloning](#clone) it, you can then build BookKeeper from source using Maven: - -```shell -$ mvn package -``` - -> You can skip tests by adding the `-DskipTests` flag when running `mvn package`. - -### Useful Maven commands - -Some other useful Maven commands beyond `mvn package`: - -Command | Action -:-------|:------ -`mvn clean` | Removes build artifacts -`mvn compile` | Compiles JAR files from Java sources -`mvn compile spotbugs:spotbugs` | Compile using the Maven [SpotBugs](https://github.com/spotbugs/spotbugs-maven-plugin) plugin -`mvn install` | Install the BookKeeper JAR locally in your local Maven cache (usually in the `~/.m2` directory) -`mvn deploy` | Deploy the BookKeeper JAR to the Maven repo (if you have the proper credentials) -`mvn verify` | Performs a wide variety of verification and validation tasks -`mvn apache-rat:check` | Run Maven using the [Apache Rat](http://creadur.apache.org/rat/apache-rat-plugin/) plugin -`mvn compile javadoc:aggregate` | Build Javadocs locally -`mvn package assembly:single` | Build a complete distribution using the Maven [Assembly](http://maven.apache.org/plugins/maven-assembly-plugin/) plugin - -## Package directory - -The BookKeeper project contains several subfolders that you should be aware of: - -Subfolder | Contains -:---------|:-------- -[`bookkeeper-server`]({{ site.github_repo }}/tree/master/bookkeeper-server) | The BookKeeper server and client -[`bookkeeper-benchmark`]({{ site.github_repo }}/tree/master/bookkeeper-benchmark) | A benchmarking suite for measuring BookKeeper performance -[`bookkeeper-stats`]({{ site.github_repo }}/tree/master/bookkeeper-stats) | A BookKeeper stats library -[`bookkeeper-stats-providers`]({{ site.github_repo }}/tree/master/bookkeeper-stats-providers) | BookKeeper stats providers diff --git a/site/docs/4.7.0/getting-started/run-locally.md b/site/docs/4.7.0/getting-started/run-locally.md deleted file mode 100644 index edbfab9fda6..00000000000 --- a/site/docs/4.7.0/getting-started/run-locally.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Run bookies locally -prev: ../installation -next: ../concepts -toc_disable: true ---- - -{% pop Bookies %} are individual BookKeeper servers. You can run an ensemble of bookies locally on a single machine using the [`localbookie`](../../reference/cli#bookkeeper-localbookie) command of the `bookkeeper` CLI tool and specifying the number of bookies you'd like to include in the ensemble. - -This would start up an ensemble with 10 bookies: - -```shell -$ bookkeeper-server/bin/bookkeeper localbookie 10 -``` - -> When you start up an ensemble using `localbookie`, all bookies run in a single JVM process. diff --git a/site/docs/4.7.0/overview/overview.md b/site/docs/4.7.0/overview/overview.md deleted file mode 100644 index 610057b76c1..00000000000 --- a/site/docs/4.7.0/overview/overview.md +++ /dev/null @@ -1,58 +0,0 @@ ---- -title: Apache BookKeeper™ 4.7.0 ---- - - -This documentation is for Apache BookKeeper™ version 4.7.0. - -Apache BookKeeper™ is a scalable, fault-tolerant, low-latency storage service optimized for real-time workloads. It offers durability, replication, and strong consistency as essentials for building reliable real-time applications. - -BookKeeper is suitable for a wide variety of use cases, including: - -Use case | Example -:--------|:------- -[WAL](https://en.wikipedia.org/wiki/Write-ahead_logging) (write-ahead logging) | The HDFS [namenode](https://hadoop.apache.org/docs/r2.5.2/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithNFS.html#BookKeeper_as_a_Shared_storage_EXPERIMENTAL) -Message storage | [Apache Pulsar](http://pulsar.incubator.apache.org/docs/latest/getting-started/ConceptsAndArchitecture/#persistent-storage) -Offset/cursor storage | [Apache Pulsar](http://pulsar.incubator.apache.org/docs/latest/getting-started/ConceptsAndArchitecture/#persistent-storage) -Object/[BLOB](https://en.wikipedia.org/wiki/Binary_large_object) storage | Storing snapshots to replicated state machines - -Learn more about Apache BookKeeper™ and what it can do for your organization: - -- [Apache BookKeeper 4.7.0 Release Notes](../releaseNotes) -- [Java API docs](../../api/javadoc) - -Or start [using](../../getting-started/installation) Apache BookKeeper today. - -### Users - -- **Concepts**: Start with [concepts](../../getting-started/concepts). This will help you to fully understand - the other parts of the documentation, including the setup, integration and operation guides. -- **Getting Started**: Install [Apache BookKeeper](../../getting-started/installation) and run bookies [locally](../../getting-started/run-locally) -- **API**: Read the [API](../../api/overview) documentation to learn how to use Apache BookKeeper to build your applications. -- **Deployment**: The [Deployment Guide](../../deployment/manual) shows how to deploy Apache BookKeeper to production clusters. - -### Administrators - -- **Operations**: The [Admin Guide](../../admin/bookies) shows how to run Apache BookKeeper on production, what are the production - considerations and best practices. - -### Contributors - -- **Details**: Learn [design details](../../development/protocol) to know more internals. diff --git a/site/docs/4.7.0/overview/releaseNotes.md b/site/docs/4.7.0/overview/releaseNotes.md deleted file mode 100644 index efefe3e93e6..00000000000 --- a/site/docs/4.7.0/overview/releaseNotes.md +++ /dev/null @@ -1,107 +0,0 @@ ---- -title: Apache BookKeeper 4.7.0 Release Notes ---- - -This is the tenth release of Apache BookKeeper! - -The 4.7.0 release incorporates hundreds of bug fixes, improvements, and features since previous major release, 4.6.0, -which was released four months ago. It is a big milestone in Apache BookKeeper community - Yahoo branch is fully merged -back to upstream, and Apache Pulsar (incubating) starts using official BookKeeper release for its upcoming 2.0 release. - -It is also the first release of Apache DistributedLog after it is merged as sub modules of Apache BookKeeper. - -Apache BookKeeper/DistributedLog users are encouraged to [upgrade to 4.7.0](../../admin/upgrade). The technical details of -this release are summarized below. - -## Highlights - -The main features in 4.7.0 cover are around following areas: - -- Dependencies Changes -- Public API -- Security -- DbLedgerStorage -- Metadata API -- Performance -- Operations -- Builds & Testing -- Bug Fixes - -### Dependencies Changes - -Here is a list of dependencies changed in 4.7.0: - -- [JCommander](http://jcommander.org/) 1.48 is added as a dependency of bookkeeper-server module. -- [RocksDB](http://rocksdb.org/) 5.8.6 is introduced as part of `DbLedgerStorage` as a dependency of bookkeeper-server module. -- [DataSketches](https://datasketches.github.io/) 0.8.3 is introduced as a dependency of prometheus-metrics-provider module. -- Upgrade [Guava](https://github.com/google/guava) from `20.0` to `21.0`. - -### Public API - -There are multiple new client features introduced in 4.7.0. Here are two highlighted features: - -#### Fluent API - -The new fluent style APi is evolving in 4.7.0. All the methods in handlers are now having both async and sync methods. -See [#1288](https://github.com/apache/bookkeeper/pull/1288) for more details - -#### CRC32C - -`circe-checksum` module is ported from Apache Pulsar to Apache BookKeeper, and CRC32C digest type is added as one digest type option. -The JNI based CRC32C in `circe-checksum` module provides excellent performance than existing CRC32 digest type. Users are encouraged -to start use CRC32C digest type. - -### Security - -- New PEM format `X.509` certificates are introduced for TLS authentication. See [#965](https://github.com/apache/bookkeeper/pull/965) for more details. -- TLS related settings are converged into same settings as bookie server. See [Upgrade Guide](../../admin/upgrade) for more details. - -### DbLedgerStorage - -`DbLedgerStorage` is a new ledger storage that introduced by Yahoo and now fully merged into Apache BookKeeper. It is fully compatible for both v2 and v3 -protocols and also support long polling. It uses [RocksDB](http://rocksdb.org/) to store ledger index, which eliminates the needed of ledger index files and -reduces the number of open file descriptors and the amount of random IOs can occurs during flushing ledger index. - -### Metadata API - -New serviceUri based metadata API is introduced as [BP-29](http://bookkeeper.apache.org/bps/BP-29-metadata-store-api-module). This metadata API provides the metadata -abstraction over ledger manager, registration service, allowing plugin different type of data stores as the metadata service. - -### Performance - -There are a lot for performance related bug fixes and improvements in 4.7.0. Some of the changes are highlighted as below: - -- Leverage netty object recycler to reduce object allocations -- A bunch of contentions around locking are removed. E.g. [#1321](https://github.com/apache/bookkeeper/pull/1321) [#1292](https://github.com/apache/bookkeeper/pull/1292) [#1258](https://github.com/apache/bookkeeper/pull/1258) -- Introduce priority thread pool for accepting high priority reads/writes. This allows high priority reads/writes such as ledger recovery operations can - succeed even bookies are overwhelmed. [#898](https://github.com/apache/bookkeeper/pull/898) -- Reorder slow bookies in read sequence. [#883](https://github.com/apache/bookkeeper/pull/883) -- Use atomic field updater and long adder to replace AtomicInteger/AtomicLong/AtomicReference in Dlog. [#1299](https://github.com/apache/bookkeeper/pull/1299) -- DataSketches library is used for implementing prometheus provider. [#1245](https://github.com/apache/bookkeeper/pull/1245) - -### Operations - -### BookieShell - -There are are multiple new commands are added in BookieShell. Here are a few highlighted: - -- `metaformat` is deprecated with two new commands `initnewcluster` and `nukeexistingcluster`. This separation provides better operability and reduces mistakes. -- `initbookie` command is introduced for initializing a new bookie. `bookieformat` keeps serving as the purpose of reformatting a bookie. - -A new BookKeeper CLI is proposed in [BP-27](http://bookkeeper.apache.org/bps/BP-27-new-bookkeeper-cli). Some commands are already ported to new bookkeeper CLI. -The full list of shell commands will be fully ported to new bookkeeper CLI in next release. - -### ReadOnly Mode Support - -Operations are improved around readonly mode for handling bookkeeper outage situation. New settings are introduce allow entry log creation, high priority writes -even when bookies are readonly. See [Upgrade Guide](../../admin/upgrade) to learn all newly added settings. - - -### Builds & Testing - -- [Arquillian](http://arquillian.org/) framework is introduced in 4.7.0 for backward compatibility and integration tests. -- Both Java8 and Java9 are now supported for running bookkeeper. - -## Full list of changes - -- [https://github.com/apache/bookkeeper/milestone/3](https://github.com/apache/bookkeeper/milestone/3?closed=1) diff --git a/site/docs/4.7.0/overview/releaseNotesTemplate.md b/site/docs/4.7.0/overview/releaseNotesTemplate.md deleted file mode 100644 index 1330c0355c4..00000000000 --- a/site/docs/4.7.0/overview/releaseNotesTemplate.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Apache BookKeeper 4.7.0 Release Notes ---- - -[provide a summary of this release] - -Apache BookKeeper users are encouraged to upgrade to 4.7.0. The technical details of this release are summarized -below. - -## Highlights - -[List the highlights] - -## Details - -[list to issues list] - diff --git a/site/docs/4.7.0/reference/cli.md b/site/docs/4.7.0/reference/cli.md deleted file mode 100644 index 8beb36ff071..00000000000 --- a/site/docs/4.7.0/reference/cli.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: BookKeeper CLI tool reference -subtitle: A reference guide to the command-line tools that you can use to administer BookKeeper ---- - -{% include cli.html id="bookkeeper" %} - -## The BookKeeper shell - -{% include shell.html %} diff --git a/site/docs/4.7.0/reference/config.md b/site/docs/4.7.0/reference/config.md deleted file mode 100644 index 8997b6b62f0..00000000000 --- a/site/docs/4.7.0/reference/config.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: BookKeeper configuration -subtitle: A reference guide to all of BookKeeper's configurable parameters ---- - - -The table below lists parameters that you can set to configure {% pop bookies %}. All configuration takes place in the `bk_server.conf` file in the `bookkeeper-server/conf` directory of your [BookKeeper installation](../../getting-started/installing). - -{% include config.html id="bk_server" %} diff --git a/site/docs/4.7.0/reference/metrics.md b/site/docs/4.7.0/reference/metrics.md deleted file mode 100644 index 8bd6fe0a165..00000000000 --- a/site/docs/4.7.0/reference/metrics.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: BookKeeper metrics reference ---- diff --git a/site/docs/4.7.0/security/overview.md b/site/docs/4.7.0/security/overview.md deleted file mode 100644 index b825776eb67..00000000000 --- a/site/docs/4.7.0/security/overview.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -title: BookKeeper Security -next: ../tls ---- - -In the 4.5.0 release, the BookKeeper community added a number of features that can be used, together or separately, to secure a BookKeeper cluster. -The following security measures are currently supported: - -1. Authentication of connections to bookies from clients, using either [TLS](../tls) or [SASL (Kerberos)](../sasl). -2. Authentication of connections from clients, bookies, autorecovery daemons to [ZooKeeper](../zookeeper), when using zookeeper based ledger managers. -3. Encryption of data transferred between bookies and clients, between bookies and autorecovery daemons using [TLS](../tls). - -It’s worth noting that security is optional - non-secured clusters are supported, as well as a mix of authenticated, unauthenticated, encrypted and non-encrypted clients. - -NOTE: authorization is not yet available in 4.5.0. The Apache BookKeeper community is looking to add this feature in subsequent releases. - -## Next Steps - -- [Encryption and Authentication using TLS](../tls) -- [Authentication using SASL](../sasl) -- [ZooKeeper Authentication](../zookeeper) diff --git a/site/docs/4.7.0/security/sasl.md b/site/docs/4.7.0/security/sasl.md deleted file mode 100644 index ffb972a8936..00000000000 --- a/site/docs/4.7.0/security/sasl.md +++ /dev/null @@ -1,202 +0,0 @@ ---- -title: Authentication using SASL -prev: ../tls -next: ../zookeeper ---- - -Bookies support client authentication via SASL. Currently we only support GSSAPI (Kerberos). We will start -with a general description of how to configure `SASL` for bookies, clients and autorecovery daemons, followed -by mechanism-specific details and wrap up with some operational details. - -## SASL configuration for Bookies - -1. Select the mechanisms to enable in the bookies. `GSSAPI` is the only mechanism currently supported by BookKeeper. -2. Add a `JAAS` config file for the selected mechanisms as described in the examples for setting up [GSSAPI (Kerberos)](#kerberos). -3. Pass the `JAAS` config file location as JVM parameter to each Bookie. For example: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookie_jaas.conf - ``` - -4. Enable SASL auth plugin in bookies, by setting `bookieAuthProviderFactoryClass` to `org.apache.bookkeeper.sasl.SASLBookieAuthProviderFactory`. - - - ```shell - bookieAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLBookieAuthProviderFactory - ``` - -5. If you are running `autorecovery` along with bookies, then you want to enable SASL auth plugin for `autorecovery`, by setting - `clientAuthProviderFactoryClass` to `org.apache.bookkeeper.sasl.SASLClientProviderFactory`. - - ```shell - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -6. Follow the steps in [GSSAPI (Kerberos)](#kerberos) to configure SASL. - -#### Important Notes - -1. `Bookie` is a section name in the JAAS file used by each bookie. This section tells the bookie which principal to use - and the location of the keytab where the principal is stored. It allows the bookie to login using the keytab specified in this section. -2. `Auditor` is a section name in the JASS file used by `autorecovery` daemon (it can be co-run with bookies). This section tells the - `autorecovery` daemon which principal to use and the location of the keytab where the principal is stored. It allows the bookie to - login using the keytab specified in this section. -3. The `Client` section is used to authenticate a SASL connection with ZooKeeper. It also allows the bookies to set ACLs on ZooKeeper nodes - which locks these nodes down so that only the bookies can modify it. It is necessary to have the same primary name across all bookies. - If you want to use a section name other than `Client`, set the system property `zookeeper.sasl.client` to the appropriate name - (e.g `-Dzookeeper.sasl.client=ZKClient`). -4. ZooKeeper uses `zookeeper` as the service name by default. If you want to change this, set the system property - `zookeeper.sasl.client.username` to the appropriate name (e.g. `-Dzookeeper.sasl.client.username=zk`). - -## SASL configuration for Clients - -To configure `SASL` authentication on the clients: - -1. Select a `SASL` mechanism for authentication and add a `JAAS` config file for the selected mechanism as described in the examples for - setting up [GSSAPI (Kerberos)](#kerberos). -2. Pass the `JAAS` config file location as JVM parameter to each client JVM. For example: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookkeeper_jaas.conf - ``` - -3. Configure the following properties in bookkeeper `ClientConfiguration`: - - ```shell - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -Follow the steps in [GSSAPI (Kerberos)](#kerberos) to configure SASL for the selected mechanism. - -## Authentication using SASL/Kerberos - -### Prerequisites - -#### Kerberos - -If your organization is already using a Kerberos server (for example, by using `Active Directory`), there is no need to -install a new server just for BookKeeper. Otherwise you will need to install one, your Linux vendor likely has packages -for `Kerberos` and a short guide on how to install and configure it ([Ubuntu](https://help.ubuntu.com/community/Kerberos), -[Redhat](https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/6/html/Managing_Smart_Cards/installing-kerberos.html)). -Note that if you are using Oracle Java, you will need to download JCE policy files for your Java version and copy them to `$JAVA_HOME/jre/lib/security`. - -#### Kerberos Principals - -If you are using the organization’s Kerberos or Active Directory server, ask your Kerberos administrator for a principal -for each Bookie in your cluster and for every operating system user that will access BookKeeper with Kerberos authentication -(via clients and tools). - -If you have installed your own Kerberos, you will need to create these principals yourself using the following commands: - -```shell -sudo /usr/sbin/kadmin.local -q 'addprinc -randkey bookkeeper/{hostname}@{REALM}' -sudo /usr/sbin/kadmin.local -q "ktadd -k /etc/security/keytabs/{keytabname}.keytab bookkeeper/{hostname}@{REALM}" -``` - -##### All hosts must be reachable using hostnames - -It is a *Kerberos* requirement that all your hosts can be resolved with their FQDNs. - -### Configuring Bookies - -1. Add a suitably modified JAAS file similar to the one below to each Bookie’s config directory, let’s call it `bookie_jaas.conf` -for this example (note that each bookie should have its own keytab): - - ``` - Bookie { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookie.keytab" - principal="bookkeeper/bk1.hostname.com@EXAMPLE.COM"; - }; - // ZooKeeper client authentication - Client { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookie.keytab" - principal="bookkeeper/bk1.hostname.com@EXAMPLE.COM"; - }; - // If you are running `autorecovery` along with bookies - Auditor { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookie.keytab" - principal="bookkeeper/bk1.hostname.com@EXAMPLE.COM"; - }; - ``` - - The `Bookie` section in the JAAS file tells the bookie which principal to use and the location of the keytab where this principal is stored. - It allows the bookie to login using the keytab specified in this section. See [notes](#notes) for more details on Zookeeper’s SASL configuration. - -2. Pass the name of the JAAS file as a JVM parameter to each Bookie: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookie_jaas.conf - ``` - - You may also wish to specify the path to the `krb5.conf` file - (see [JDK’s Kerberos Requirements](https://docs.oracle.com/javase/8/docs/technotes/guides/security/jgss/tutorials/KerberosReq.html) for more details): - - ```shell - -Djava.security.krb5.conf=/etc/bookkeeper/krb5.conf - ``` - -3. Make sure the keytabs configured in the JAAS file are readable by the operating system user who is starting the Bookies. - -4. Enable SASL authentication plugin in the bookies by setting following parameters. - - ```shell - bookieAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLBookieAuthProviderFactory - # if you run `autorecovery` along with bookies - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -### Configuring Clients - -To configure SASL authentication on the clients: - -1. Clients will authenticate to the cluster with their own principal (usually with the same name as the user running the client), - so obtain or create these principals as needed. Then create a `JAAS` file for each principal. The `BookKeeper` section describes - how the clients like writers and readers can connect to the Bookies. The following is an example configuration for a client using - a keytab (recommended for long-running processes): - - ``` - BookKeeper { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookkeeper.keytab" - principal="bookkeeper-client-1@EXAMPLE.COM"; - }; - ``` - - -2. Pass the name of the JAAS file as a JVM parameter to the client JVM: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookkeeper_jaas.conf - ``` - - You may also wish to specify the path to the `krb5.conf` file (see - [JDK’s Kerberos Requirements](https://docs.oracle.com/javase/8/docs/technotes/guides/security/jgss/tutorials/KerberosReq.html) for more details). - - ```shell - -Djava.security.krb5.conf=/etc/bookkeeper/krb5.conf - ``` - - -3. Make sure the keytabs configured in the `bookkeeper_jaas.conf` are readable by the operating system user who is starting bookkeeper client. - -4. Enable SASL authentication plugin in the client by setting following parameters. - - ```shell - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -## Enabling Logging for SASL - -To enable SASL debug output, you can set `sun.security.krb5.debug` system property to `true`. - diff --git a/site/docs/4.7.0/security/tls.md b/site/docs/4.7.0/security/tls.md deleted file mode 100644 index cd250ab2aa5..00000000000 --- a/site/docs/4.7.0/security/tls.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -title: Encryption and Authentication using TLS -prev: ../overview -next: ../sasl ---- - -Apache BookKeeper allows clients and autorecovery daemons to communicate over TLS, although this is not enabled by default. - -## Overview - -The bookies need their own key and certificate in order to use TLS. Clients can optionally provide a key and a certificate -for mutual authentication. Each bookie or client can also be configured with a truststore, which is used to -determine which certificates (bookie or client identities) to trust (authenticate). - -The truststore can be configured in many ways. To understand the truststore, consider the following two examples: - -1. the truststore contains one or many certificates; -2. it contains a certificate authority (CA). - -In (1), with a list of certificates, the bookie or client will trust any certificate listed in the truststore. -In (2), with a CA, the bookie or client will trust any certificate that was signed by the CA in the truststore. - -(TBD: benefits) - -## Generate TLS key and certificate - -The first step of deploying TLS is to generate the key and the certificate for each machine in the cluster. -You can use Java’s `keytool` utility to accomplish this task. We will generate the key into a temporary keystore -initially so that we can export and sign it later with CA. - -```shell -keytool -keystore bookie.keystore.jks -alias localhost -validity {validity} -genkey -``` - -You need to specify two parameters in the above command: - -1. `keystore`: the keystore file that stores the certificate. The *keystore* file contains the private key of - the certificate; hence, it needs to be kept safely. -2. `validity`: the valid time of the certificate in days. - -
          -Ensure that common name (CN) matches exactly with the fully qualified domain name (FQDN) of the server. -The client compares the CN with the DNS domain name to ensure that it is indeed connecting to the desired server, not a malicious one. -
          - -## Creating your own CA - -After the first step, each machine in the cluster has a public-private key pair, and a certificate to identify the machine. -The certificate, however, is unsigned, which means that an attacker can create such a certificate to pretend to be any machine. - -Therefore, it is important to prevent forged certificates by signing them for each machine in the cluster. -A `certificate authority (CA)` is responsible for signing certificates. CA works likes a government that issues passports — -the government stamps (signs) each passport so that the passport becomes difficult to forge. Other governments verify the stamps -to ensure the passport is authentic. Similarly, the CA signs the certificates, and the cryptography guarantees that a signed -certificate is computationally difficult to forge. Thus, as long as the CA is a genuine and trusted authority, the clients have -high assurance that they are connecting to the authentic machines. - -```shell -openssl req -new -x509 -keyout ca-key -out ca-cert -days 365 -``` - -The generated CA is simply a *public-private* key pair and certificate, and it is intended to sign other certificates. - -The next step is to add the generated CA to the clients' truststore so that the clients can trust this CA: - -```shell -keytool -keystore bookie.truststore.jks -alias CARoot -import -file ca-cert -``` - -NOTE: If you configure the bookies to require client authentication by setting `sslClientAuthentication` to `true` on the -[bookie config](../../reference/config), then you must also provide a truststore for the bookies and it should have all the CA -certificates that clients keys were signed by. - -```shell -keytool -keystore client.truststore.jks -alias CARoot -import -file ca-cert -``` - -In contrast to the keystore, which stores each machine’s own identity, the truststore of a client stores all the certificates -that the client should trust. Importing a certificate into one’s truststore also means trusting all certificates that are signed -by that certificate. As the analogy above, trusting the government (CA) also means trusting all passports (certificates) that -it has issued. This attribute is called the chain of trust, and it is particularly useful when deploying TLS on a large BookKeeper cluster. -You can sign all certificates in the cluster with a single CA, and have all machines share the same truststore that trusts the CA. -That way all machines can authenticate all other machines. - -## Signing the certificate - -The next step is to sign all certificates in the keystore with the CA we generated. First, you need to export the certificate from the keystore: - -```shell -keytool -keystore bookie.keystore.jks -alias localhost -certreq -file cert-file -``` - -Then sign it with the CA: - -```shell -openssl x509 -req -CA ca-cert -CAkey ca-key -in cert-file -out cert-signed -days {validity} -CAcreateserial -passin pass:{ca-password} -``` - -Finally, you need to import both the certificate of the CA and the signed certificate into the keystore: - -```shell -keytool -keystore bookie.keystore.jks -alias CARoot -import -file ca-cert -keytool -keystore bookie.keystore.jks -alias localhost -import -file cert-signed -``` - -The definitions of the parameters are the following: - -1. `keystore`: the location of the keystore -2. `ca-cert`: the certificate of the CA -3. `ca-key`: the private key of the CA -4. `ca-password`: the passphrase of the CA -5. `cert-file`: the exported, unsigned certificate of the bookie -6. `cert-signed`: the signed certificate of the bookie - -(TBD: add a script to automatically generate truststores and keystores.) - -## Configuring Bookies - -Bookies support TLS for connections on the same service port. In order to enable TLS, you need to configure `tlsProvider` to be either -`JDK` or `OpenSSL`. If `OpenSSL` is configured, it will use `netty-tcnative-boringssl-static`, which loads a corresponding binding according -to the platforms to run bookies. - -> Current `OpenSSL` implementation doesn't depend on the system installed OpenSSL library. If you want to leverage the OpenSSL installed on -the system, you can check [this example](http://netty.io/wiki/forked-tomcat-native.html) on how to replaces the JARs on the classpath with -netty bindings to leverage installed OpenSSL. - -The following TLS configs are needed on the bookie side: - -```shell -tlsProvider=OpenSSL -# key store -tlsKeyStoreType=JKS -tlsKeyStore=/var/private/tls/bookie.keystore.jks -tlsKeyStorePasswordPath=/var/private/tls/bookie.keystore.passwd -# trust store -tlsTrustStoreType=JKS -tlsTrustStore=/var/private/tls/bookie.truststore.jks -tlsTrustStorePasswordPath=/var/private/tls/bookie.truststore.passwd -``` - -NOTE: it is important to restrict access to the store files and corresponding password files via filesystem permissions. - -Optional settings that are worth considering: - -1. tlsClientAuthentication=false: Enable/Disable using TLS for authentication. This config when enabled will authenticate the other end - of the communication channel. It should be enabled on both bookies and clients for mutual TLS. -2. tlsEnabledCipherSuites= A cipher suite is a named combination of authentication, encryption, MAC and key exchange - algorithm used to negotiate the security settings for a network connection using TLS network protocol. By default, - it is null. [OpenSSL Ciphers](https://www.openssl.org/docs/man1.0.2/apps/ciphers.html) - [JDK Ciphers](http://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html#ciphersuites) -3. tlsEnabledProtocols = TLSv1.2,TLSv1.1,TLSv1 (list out the TLS protocols that you are going to accept from clients). - By default, it is not set. - -To verify the bookie's keystore and truststore are setup correctly you can run the following command: - -```shell -openssl s_client -debug -connect localhost:3181 -tls1 -``` - -NOTE: TLSv1 should be listed under `tlsEnabledProtocols`. - -In the output of this command you should see the server's certificate: - -```shell ------BEGIN CERTIFICATE----- -{variable sized random bytes} ------END CERTIFICATE----- -``` - -If the certificate does not show up or if there are any other error messages then your keystore is not setup correctly. - -## Configuring Clients - -TLS is supported only for the new BookKeeper client (BookKeeper versions 4.5.0 and higher), the older clients are not -supported. The configs for TLS will be the same as bookies. - -If client authentication is not required by the bookies, the following is a minimal configuration example: - -```shell -tlsProvider=OpenSSL -clientTrustStore=/var/private/tls/client.truststore.jks -clientTrustStorePasswordPath=/var/private/tls/client.truststore.passwd -``` - -If client authentication is required, then a keystore must be created for each client, and the bookies' truststores must -trust the certificate in the client's keystore. This may be done using commands that are similar to what we used for -the [bookie keystore](#bookie-keystore). - -And the following must also be configured: - -```shell -tlsClientAuthentication=true -clientKeyStore=/var/private/tls/client.keystore.jks -clientKeyStorePasswordPath=/var/private/tls/client.keystore.passwd -``` - -NOTE: it is important to restrict access to the store files and corresponding password files via filesystem permissions. - -(TBD: add example to use tls in bin/bookkeeper script?) - -## Enabling TLS Logging - -You can enable TLS debug logging at the JVM level by starting the bookies and/or clients with `javax.net.debug` system property. For example: - -```shell --Djavax.net.debug=all -``` - -You can find more details on this in [Oracle documentation](http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/ReadDebug.html) on -[debugging SSL/TLS connections](http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/ReadDebug.html). diff --git a/site/docs/4.7.0/security/zookeeper.md b/site/docs/4.7.0/security/zookeeper.md deleted file mode 100644 index e16be69a1d3..00000000000 --- a/site/docs/4.7.0/security/zookeeper.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: ZooKeeper Authentication -prev: ../sasl ---- - -## New Clusters - -To enable `ZooKeeper` authentication on Bookies or Clients, there are two necessary steps: - -1. Create a `JAAS` login file and set the appropriate system property to point to it as described in [GSSAPI (Kerberos)](../sasl#notes). -2. Set the configuration property `zkEnableSecurity` in each bookie to `true`. - -The metadata stored in `ZooKeeper` is such that only certain clients will be able to modify and read the corresponding znodes. -The rationale behind this decision is that the data stored in ZooKeeper is not sensitive, but inappropriate manipulation of znodes can cause cluster -disruption. - -## Migrating Clusters - -If you are running a version of BookKeeper that does not support security or simply with security disabled, and you want to make the cluster secure, -then you need to execute the following steps to enable ZooKeeper authentication with minimal disruption to your operations. - -1. Perform a rolling restart setting the `JAAS` login file, which enables bookie or clients to authenticate. At the end of the rolling restart, - bookies (or clients) are able to manipulate znodes with strict ACLs, but they will not create znodes with those ACLs. -2. Perform a second rolling restart of bookies, this time setting the configuration parameter `zkEnableSecurity` to true, which enables the use - of secure ACLs when creating znodes. -3. Currently we don't have provide a tool to set acls on old znodes. You are recommended to set it manually using ZooKeeper tools. - -It is also possible to turn off authentication in a secured cluster. To do it, follow these steps: - -1. Perform a rolling restart of bookies setting the `JAAS` login file, which enable bookies to authenticate, but setting `zkEnableSecurity` to `false`. - At the end of rolling restart, bookies stop creating znodes with secure ACLs, but are still able to authenticate and manipulate all znodes. -2. You can use ZooKeeper tools to manually reset all ACLs under the znode set in `zkLedgersRootPath`, which defaults to `/ledgers`. -3. Perform a second rolling restart of bookies, this time omitting the system property that sets the `JAAS` login file. - -## Migrating the ZooKeeper ensemble - -It is also necessary to enable authentication on the `ZooKeeper` ensemble. To do it, we need to perform a rolling restart of the ensemble and -set a few properties. Please refer to the ZooKeeper documentation for more details. - -1. [Apache ZooKeeper Documentation](http://zookeeper.apache.org/doc/r3.4.6/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) -2. [Apache ZooKeeper Wiki](https://cwiki.apache.org/confluence/display/ZOOKEEPER/Zookeeper+and+SASL) diff --git a/site/docs/4.7.1/admin/autorecovery.md b/site/docs/4.7.1/admin/autorecovery.md deleted file mode 100644 index b1dd078f9b2..00000000000 --- a/site/docs/4.7.1/admin/autorecovery.md +++ /dev/null @@ -1,128 +0,0 @@ ---- -title: Using AutoRecovery ---- - -When a {% pop bookie %} crashes, all {% pop ledgers %} on that bookie become under-replicated. In order to bring all ledgers in your BookKeeper cluster back to full replication, you'll need to *recover* the data from any offline bookies. There are two ways to recover bookies' data: - -1. Using [manual recovery](#manual-recovery) -1. Automatically, using [*AutoRecovery*](#autorecovery) - -## Manual recovery - -You can manually recover failed bookies using the [`bookkeeper`](../../reference/cli) command-line tool. You need to specify: - -* the `shell recover` option -* an IP and port for your BookKeeper cluster's ZooKeeper ensemble -* the IP and port for the failed bookie - -Here's an example: - -```bash -$ bookkeeper-server/bin/bookkeeper shell recover \ - zk1.example.com:2181 \ # IP and port for ZooKeeper - 192.168.1.10:3181 # IP and port for the failed bookie -``` - -If you wish, you can also specify which bookie you'd like to rereplicate to. Here's an example: - -```bash -$ bookkeeper-server/bin/bookkeeper shell recover \ - zk1.example.com:2181 \ # IP and port for ZooKeeper - 192.168.1.10:3181 \ # IP and port for the failed bookie - 192.168.1.11:3181 # IP and port for the bookie to rereplicate to -``` - -### The manual recovery process - -When you initiate a manual recovery process, the following happens: - -1. The client (the process running ) reads the metadata of active ledgers from ZooKeeper. -1. The ledgers that contain fragments from the failed bookie in their ensemble are selected. -1. A recovery process is initiated for each ledger in this list and the rereplication process is run for each ledger. -1. Once all the ledgers are marked as fully replicated, bookie recovery is finished. - -## AutoRecovery - -AutoRecovery is a process that: - -* automatically detects when a {% pop bookie %} in your BookKeeper cluster has become unavailable and then -* rereplicates all the {% pop ledgers %} that were stored on that bookie. - -AutoRecovery can be run in two ways: - -1. On dedicated nodes in your BookKeeper cluster -1. On the same machines on which your bookies are running - -## Running AutoRecovery - -You can start up AutoRecovery using the [`autorecovery`](../../reference/cli#bookkeeper-autorecovery) command of the [`bookkeeper`](../../reference/cli) CLI tool. - -```bash -$ bookkeeper-server/bin/bookkeeper autorecovery -``` - -> The most important thing to ensure when starting up AutoRecovery is that the ZooKeeper connection string specified by the [`zkServers`](../../reference/config#zkServers) parameter points to the right ZooKeeper cluster. - -If you start up AutoRecovery on a machine that is already running a bookie, then the AutoRecovery process will run alongside the bookie on a separate thread. - -You can also start up AutoRecovery on a fresh machine if you'd like to create a dedicated cluster of AutoRecovery nodes. - -## Configuration - -There are a handful of AutoRecovery-related configs in the [`bk_server.conf`](../../reference/config) configuration file. For a listing of those configs, see [AutoRecovery settings](../../reference/config#autorecovery-settings). - -## Disable AutoRecovery - -You can disable AutoRecovery at any time, for example during maintenance. Disabling AutoRecovery ensures that bookies' data isn't unnecessarily rereplicated when the bookie is only taken down for a short period of time, for example when the bookie is being updated or the configuration if being changed. - -You can disable AutoRecover using the [`bookkeeper`](../../reference/cli#bookkeeper-shell-autorecovery) CLI tool: - -```bash -$ bookkeeper-server/bin/bookkeeper shell autorecovery -disable -``` - -Once disabled, you can reenable AutoRecovery using the [`enable`](../../reference/cli#bookkeeper-shell-autorecovery) shell command: - -```bash -$ bookkeeper-server/bin/bookkeeper shell autorecovery -enable -``` - -## AutoRecovery architecture - -AutoRecovery has two components: - -1. The [**auditor**](#auditor) (see the [`Auditor`](../../api/javadoc/org/apache/bookkeeper/replication/Auditor.html) class) is a singleton node that watches bookies to see if they fail and creates rereplication tasks for the ledgers on failed bookies. -1. The [**replication worker**](#replication-worker) (see the [`ReplicationWorker`](../../api/javadoc/org/apache/bookkeeper/replication/ReplicationWorker.html) class) runs on each bookie and executes rereplication tasks provided by the auditor. - -Both of these components run as threads in the [`AutoRecoveryMain`](../../api/javadoc/org/apache/bookkeeper/replication/AutoRecoveryMain) process, which runs on each bookie in the cluster. All recovery nodes participate in leader election---using ZooKeeper---to decide which node becomes the auditor. Nodes that fail to become the auditor watch the elected auditor and run an election process again if they see that the auditor node has failed. - -### Auditor - -The auditor watches all bookies in the cluster that are registered with ZooKeeper. Bookies register with ZooKeeper at startup. If the bookie crashes or is killed, the bookie's registration in ZooKeeper disappears and the auditor is notified of the change in the list of registered bookies. - -When the auditor sees that a bookie has disappeared, it immediately scans the complete {% pop ledger %} list to find ledgers that have data stored on the failed bookie. Once it has a list of ledgers for that bookie, the auditor will publish a rereplication task for each ledger under the `/underreplicated/` [znode](https://zookeeper.apache.org/doc/current/zookeeperOver.html) in ZooKeeper. - -### Replication Worker - -Each replication worker watches for tasks being published by the auditor on the `/underreplicated/` znode in ZooKeeper. When a new task appears, the replication worker will try to get a lock on it. If it cannot acquire the lock, it will try the next entry. The locks are implemented using ZooKeeper ephemeral znodes. - -The replication worker will scan through the rereplication task's ledger for fragments of which its local bookie is not a member. When it finds fragments matching this criterion, it will replicate the entries of that fragment to the local bookie. If, after this process, the ledger is fully replicated, the ledgers entry under /underreplicated/ is deleted, and the lock is released. If there is a problem replicating, or there are still fragments in the ledger which are still underreplicated (due to the local bookie already being part of the ensemble for the fragment), then the lock is simply released. - -If the replication worker finds a fragment which needs rereplication, but does not have a defined endpoint (i.e. the final fragment of a ledger currently being written to), it will wait for a grace period before attempting rereplication. If the fragment needing rereplication still does not have a defined endpoint, the ledger is fenced and rereplication then takes place. - -This avoids the situation in which a client is writing to a ledger and one of the bookies goes down, but the client has not written an entry to that bookie before rereplication takes place. The client could continue writing to the old fragment, even though the ensemble for the fragment had changed. This could lead to data loss. Fencing prevents this scenario from happening. In the normal case, the client will try to write to the failed bookie within the grace period, and will have started a new fragment before rereplication starts. - -You can configure this grace period using the [`openLedgerRereplicationGracePeriod`](../../reference/config#openLedgerRereplicationGracePeriod) parameter. - -### The rereplication process - -The ledger rereplication process happens in these steps: - -1. The client goes through all ledger fragments in the ledger, selecting those that contain the failed bookie. -1. A recovery process is initiated for each ledger fragment in this list. - 1. The client selects a bookie to which all entries in the ledger fragment will be replicated; In the case of autorecovery, this will always be the local bookie. - 1. The client reads entries that belong to the ledger fragment from other bookies in the ensemble and writes them to the selected bookie. - 1. Once all entries have been replicated, the zookeeper metadata for the fragment is updated to reflect the new ensemble. - 1. The fragment is marked as fully replicated in the recovery tool. -1. Once all ledger fragments are marked as fully replicated, the ledger is marked as fully replicated. - diff --git a/site/docs/4.7.1/admin/bookies.md b/site/docs/4.7.1/admin/bookies.md deleted file mode 100644 index 1b0427dae3c..00000000000 --- a/site/docs/4.7.1/admin/bookies.md +++ /dev/null @@ -1,180 +0,0 @@ ---- -title: BookKeeper administration -subtitle: A guide to deploying and administering BookKeeper ---- - -This document is a guide to deploying, administering, and maintaining BookKeeper. It also discusses [best practices](#best-practices) and [common problems](#common-problems). - -## Requirements - -A typical BookKeeper installation consists of an ensemble of {% pop bookies %} and a ZooKeeper quorum. The exact number of bookies depends on the quorum mode that you choose, desired throughput, and the number of clients using the installation simultaneously. - -The minimum number of bookies depends on the type of installation: - -* For *self-verifying* entries you should run at least three bookies. In this mode, clients store a message authentication code along with each {% pop entry %}. -* For *generic* entries you should run at least four - -There is no upper limit on the number of bookies that you can run in a single ensemble. - -### Performance - -To achieve optimal performance, BookKeeper requires each server to have at least two disks. It's possible to run a bookie with a single disk but performance will be significantly degraded. - -### ZooKeeper - -There is no constraint on the number of ZooKeeper nodes you can run with BookKeeper. A single machine running ZooKeeper in [standalone mode](https://zookeeper.apache.org/doc/current/zookeeperStarted.html#sc_InstallingSingleMode) is sufficient for BookKeeper, although for the sake of higher resilience we recommend running ZooKeeper in [quorum mode](https://zookeeper.apache.org/doc/current/zookeeperStarted.html#sc_RunningReplicatedZooKeeper) with multiple servers. - -## Starting and stopping bookies - -You can run bookies either in the foreground or in the background, using [nohup](https://en.wikipedia.org/wiki/Nohup). You can also run [local bookies](#local-bookie) for development purposes. - -To start a bookie in the foreground, use the [`bookie`](../../reference/cli#bookkeeper-bookie) command of the [`bookkeeper`](../../reference/cli#bookkeeper) CLI tool: - -```shell -$ bookkeeper-server/bin/bookkeeper bookie -``` - -To start a bookie in the background, use the [`bookkeeper-daemon.sh`](../../reference/cli#bookkeeper-daemon.sh) script and run `start bookie`: - -```shell -$ bookkeeper-server/bin/bookkeeper-daemon.sh start bookie -``` - -### Local bookies - -The instructions above showed you how to run bookies intended for production use. If you'd like to experiment with ensembles of bookies locally, you can use the [`localbookie`](../../reference/cli#bookkeeper-localbookie) command of the `bookkeeper` CLI tool and specify the number of bookies you'd like to run. - -This would spin up a local ensemble of 6 bookies: - -```shell -$ bookkeeper-server/bin/bookkeeper localbookie 6 -``` - -> When you run a local bookie ensemble, all bookies run in a single JVM process. - -## Configuring bookies - -There's a wide variety of parameters that you can set in the bookie configuration file in `bookkeeper-server/conf/bk_server.conf` of your [BookKeeper installation](../../reference/config). A full listing can be found in [Bookie configuration](../../reference/config). - -Some of the more important parameters to be aware of: - -Parameter | Description | Default -:---------|:------------|:------- -`bookiePort` | The TCP port that the bookie listens on | `3181` -`zkServers` | A comma-separated list of ZooKeeper servers in `hostname:port` format | `localhost:2181` -`journalDirectory` | The directory where the [log device](../../getting-started/concepts#log-device) stores the bookie's write-ahead log (WAL) | `/tmp/bk-txn` -`ledgerDirectories` | The directories where the [ledger device](../../getting-started/concepts#ledger-device) stores the bookie's ledger entries (as a comma-separated list) | `/tmp/bk-data` - -> Ideally, the directories specified `journalDirectory` and `ledgerDirectories` should be on difference devices. - -## Logging - -BookKeeper uses [slf4j](http://www.slf4j.org/) for logging, with [log4j](https://logging.apache.org/log4j/2.x/) bindings enabled by default. - -To enable logging for a bookie, create a `log4j.properties` file and point the `BOOKIE_LOG_CONF` environment variable to the configuration file. Here's an example: - -```shell -$ export BOOKIE_LOG_CONF=/some/path/log4j.properties -$ bookkeeper-server/bin/bookkeeper bookie -``` - -## Upgrading - -From time to time you may need to make changes to the filesystem layout of bookies---changes that are incompatible with previous versions of BookKeeper and require that directories used with previous versions are upgraded. If a filesystem upgrade is required when updating BookKeeper, the bookie will fail to start and return an error like this: - -``` -2017-05-25 10:41:50,494 - ERROR - [main:Bookie@246] - Directory layout version is less than 3, upgrade needed -``` - -BookKeeper provides a utility for upgrading the filesystem. You can perform an upgrade using the [`upgrade`](../../reference/cli#bookkeeper-upgrade) command of the `bookkeeper` CLI tool. When running `bookkeeper upgrade` you need to specify one of three flags: - -Flag | Action -:----|:------ -`--upgrade` | Performs an upgrade -`--rollback` | Performs a rollback to the initial filesystem version -`--finalize` | Marks the upgrade as complete - -### Upgrade pattern - -A standard upgrade pattern is to run an upgrade... - -```shell -$ bookkeeper-server/bin/bookkeeper upgrade --upgrade -``` - -...then check that everything is working normally, then kill the bookie. If everything is okay, finalize the upgrade... - -```shell -$ bookkeeper-server/bin/bookkeeper upgrade --finalize -``` - -...and then restart the server: - -```shell -$ bookkeeper-server/bin/bookkeeper bookie -``` - -If something has gone wrong, you can always perform a rollback: - -```shell -$ bookkeeper-server/bin/bookkeeper upgrade --rollback -``` - -## Formatting - -You can format bookie metadata in ZooKeeper using the [`metaformat`](../../reference/cli#bookkeeper-shell-metaformat) command of the [BookKeeper shell](../../reference/cli#the-bookkeeper-shell). - -By default, formatting is done in interactive mode, which prompts you to confirm the format operation if old data exists. You can disable confirmation using the `-nonInteractive` flag. If old data does exist, the format operation will abort *unless* you set the `-force` flag. Here's an example: - -```shell -$ bookkeeper-server/bin/bookkeeper shell metaformat -``` - -You can format the local filesystem data on a bookie using the [`bookieformat`](../../reference/cli#bookkeeper-shell-bookieformat) command on each bookie. Here's an example: - -```shell -$ bookkeeper-server/bin/bookkeeper shell bookieformat -``` - -> The `-force` and `-nonInteractive` flags are also available for the `bookieformat` command. - -## AutoRecovery - -For a guide to AutoRecovery in BookKeeper, see [this doc](../autorecovery). - -## Missing disks or directories - -Accidentally replacing disks or removing directories can cause a bookie to fail while trying to read a ledger fragment that, according to the ledger metadata, exists on the bookie. For this reason, when a bookie is started for the first time, its disk configuration is fixed for the lifetime of that bookie. Any change to its disk configuration, such as a crashed disk or an accidental configuration change, will result in the bookie being unable to start. That will throw an error like this: - -``` -2017-05-29 18:19:13,790 - ERROR - [main:BookieServer314] – Exception running bookie server : @ -org.apache.bookkeeper.bookie.BookieException$InvalidCookieException -.......at org.apache.bookkeeper.bookie.Cookie.verify(Cookie.java:82) -.......at org.apache.bookkeeper.bookie.Bookie.checkEnvironment(Bookie.java:275) -.......at org.apache.bookkeeper.bookie.Bookie.(Bookie.java:351) -``` - -If the change was the result of an accidental configuration change, the change can be reverted and the bookie can be restarted. However, if the change *cannot* be reverted, such as is the case when you want to add a new disk or replace a disk, the bookie must be wiped and then all its data re-replicated onto it. - -1. Increment the [`bookiePort`](../../reference/config#bookiePort) parameter in the [`bk_server.conf`](../../reference/config) -1. Ensure that all directories specified by [`journalDirectory`](../../reference/config#journalDirectory) and [`ledgerDirectories`](../../reference/config#ledgerDirectories) are empty. -1. [Start the bookie](#starting-and-stopping-bookies). -1. Run the following command to re-replicate the data: - - ```bash - $ bookkeeper-server/bin/bookkeeper shell recover \ - \ - \ - - ``` - - The ZooKeeper server, old bookie, and new bookie, are all identified by their external IP and `bookiePort` (3181 by default). Here's an example: - - ```bash - $ bookkeeper-server/bin/bookkeeper shell recover \ - zk1.example.com \ - 192.168.1.10:3181 \ - 192.168.1.10:3181 - ``` - - See the [AutoRecovery](../autorecovery) documentation for more info on the re-replication process. diff --git a/site/docs/4.7.1/admin/geo-replication.md b/site/docs/4.7.1/admin/geo-replication.md deleted file mode 100644 index 38b972345ef..00000000000 --- a/site/docs/4.7.1/admin/geo-replication.md +++ /dev/null @@ -1,22 +0,0 @@ ---- -title: Geo-replication -subtitle: Replicate data across BookKeeper clusters ---- - -*Geo-replication* is the replication of data across BookKeeper clusters. In order to enable geo-replication for a group of BookKeeper clusters, - -## Global ZooKeeper - -Setting up a global ZooKeeper quorum is a lot like setting up a cluster-specific quorum. The crucial difference is that - -### Geo-replication across three clusters - -Let's say that you want to set up geo-replication across clusters in regions A, B, and C. First, the BookKeeper clusters in each region must have their own local (cluster-specific) ZooKeeper quorum. - -> BookKeeper clusters use global ZooKeeper only for metadata storage. Traffic from bookies to ZooKeeper should thus be fairly light in general. - -The crucial difference between using cluster-specific ZooKeeper and global ZooKeeper is that {% pop bookies %} is that you need to point all bookies to use the global ZooKeeper setup. - -## Region-aware placement polocy - -## Autorecovery diff --git a/site/docs/4.7.1/admin/http.md b/site/docs/4.7.1/admin/http.md deleted file mode 100644 index 0097adc62b8..00000000000 --- a/site/docs/4.7.1/admin/http.md +++ /dev/null @@ -1,394 +0,0 @@ ---- -title: BookKeeper Admin REST API ---- - -This document introduces BookKeeper HTTP endpoints, which can be used for BookKeeper administration. -To use this feature, set `httpServerEnabled` to `true` in file `conf/bk_server.conf`. - -## All the endpoints - -Currently all the HTTP endpoints could be divided into these 4 components: -1. Heartbeat: heartbeat for a specific bookie. -1. Config: doing the server configuration for a specific bookie. -1. Ledger: HTTP endpoints related to ledgers. -1. Bookie: HTTP endpoints related to bookies. -1. AutoRecovery: HTTP endpoints related to auto recovery. - -## Heartbeat - -### Endpoint: /heartbeat -* Method: GET -* Description: Get heartbeat status for a specific bookie -* Response: - -| Code | Description | -|:-------|:------------| -|200 | Successful operation | - -## Config - -### Endpoint: /api/v1/config/server_config -1. Method: GET - * Description: Get value of all configured values overridden on local server config - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | -1. Method: PUT - * Description: Update a local server config - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |configName | String | Yes | Configuration name(key) | - |configValue | String | Yes | Configuration value(value) | - * Body: - ```json - { - "configName1": "configValue1", - "configName2": "configValue2" - } - ``` - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -## Ledger - -### Endpoint: /api/v1/ledger/delete/?ledger_id=<ledger_id> -1. Method: DELETE - * Description: Delete a ledger. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |ledger_id | Long | Yes | ledger id of the ledger. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -### Endpoint: /api/v1/ledger/list/?print_metadata=<metadata> -1. Method: GET - * Description: List all the ledgers. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |print_metadata | Boolean | No | whether print out metadata | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "ledgerId1": "ledgerMetadata1", - "ledgerId2": "ledgerMetadata2", - ... - } - ``` - -### Endpoint: /api/v1/ledger/metadata/?ledger_id=<ledger_id> -1. Method: GET - * Description: Get the metadata of a ledger. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |ledger_id | Long | Yes | ledger id of the ledger. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "ledgerId1": "ledgerMetadata1" - } - ``` - -### Endpoint: /api/v1/ledger/read/?ledger_id=<ledger_id>&start_entry_id=<start_entry_id>&end_entry_id=<end_entry_id> -1. Method: GET - * Description: Read a range of entries from ledger. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |ledger_id | Long | Yes| ledger id of the ledger. | - |start_entry_id | Long | No | start entry id of read range. | - |end_entry_id | Long | No | end entry id of read range. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "entryId1": "entry content 1", - "entryId2": "entry content 2", - ... - } - ``` - -## Bookie - -### Endpoint: /api/v1/bookie/list_bookies/?type=<type>&print_hostnames=<hostnames> -1. Method: GET - * Description: Get all the available bookies. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |type | String | Yes | value: "rw" or "ro" , list read-write/read-only bookies. | - |print_hostnames | Boolean | No | whether print hostname of bookies. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "bookieSocketAddress1": "hostname1", - "bookieSocketAddress2": "hostname2", - ... - } - ``` - -### Endpoint: /api/v1/bookie/list_bookie_info -1. Method: GET - * Description: Get bookies disk usage info of this cluster. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "bookieAddress" : {free: xxx, total: xxx}, - "bookieAddress" : {free: xxx, total: xxx}, - ... - "clusterInfo" : {total_free: xxx, total: xxx} - } - ``` - -### Endpoint: /api/v1/bookie/last_log_mark -1. Method: GET - * Description: Get the last log marker. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - JournalId1 : position1, - JournalId2 : position2, - ... - } - ``` - -### Endpoint: /api/v1/bookie/list_disk_file/?file_type=<type> -1. Method: GET - * Description: Get all the files on disk of current bookie. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |type | String | No | file type: journal/entrylog/index. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "journal files" : "filename1 filename2 ...", - "entrylog files" : "filename1 filename2...", - "index files" : "filename1 filename2 ..." - } - ``` - -### Endpoint: /api/v1/bookie/expand_storage -1. Method: PUT - * Description: Expand storage for a bookie. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -## Auto recovery - -### Endpoint: /api/v1/autorecovery/bookie/ -1. Method: PUT - * Description: Ledger data recovery for failed bookie - * Body: - ```json - { - "bookie_src": [ "bookie_src1", "bookie_src2"... ], - "bookie_dest": [ "bookie_dest1", "bookie_dest2"... ], - "delete_cookie": - } - ``` - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |bookie_src | Strings | Yes | bookie source to recovery | - |bookie_dest | Strings | No | bookie data recovery destination | - |delete_cookie | Boolean | No | Whether delete cookie | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -### Endpoint: /api/v1/autorecovery/list_under_replicated_ledger/?missingreplica=<bookie_address>&excludingmissingreplica=<bookie_address> -1. Method: GET - * Description: Get all under replicated ledgers. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |missingreplica | String | No | missing replica bookieId | - |excludingmissingreplica | String | No | exclude missing replica bookieId | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - [ledgerId1, ledgerId2...] - } - ``` - -### Endpoint: /api/v1/autorecovery/who_is_auditor -1. Method: GET - * Description: Get auditor bookie id. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "Auditor": "hostname/hostAddress:Port" - } - ``` - -### Endpoint: /api/v1/autorecovery/trigger_audit -1. Method: PUT - * Description: Force trigger audit by resting the lostBookieRecoveryDelay. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -### Endpoint: /api/v1/autorecovery/lost_bookie_recovery_delay -1. Method: GET - * Description: Get lostBookieRecoveryDelay value in seconds. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -1. Method: PUT - * Description: Set lostBookieRecoveryDelay value in seconds. - * Body: - ```json - { - "delay_seconds": - } - ``` - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - | delay_seconds | Long | Yes | set delay value in seconds. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -### Endpoint: /api/v1/autorecovery/decommission -1. Method: PUT - * Description: Decommission Bookie, Force trigger Audit task and make sure all the ledgers stored in the decommissioning bookie are replicated. - * Body: - ```json - { - "bookie_src": - } - ``` - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - | bookie_src | String | Yes | Bookie src to decommission.. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | diff --git a/site/docs/4.7.1/admin/metrics.md b/site/docs/4.7.1/admin/metrics.md deleted file mode 100644 index 142df3dcd2d..00000000000 --- a/site/docs/4.7.1/admin/metrics.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: Metric collection ---- - -BookKeeper enables metrics collection through a variety of [stats providers](#stats-providers). - -> For a full listing of available metrics, see the [Metrics](../../reference/metrics) reference doc. - -## Stats providers - -BookKeeper has stats provider implementations for four five sinks: - -Provider | Provider class name -:--------|:------------------- -[Codahale Metrics](https://mvnrepository.com/artifact/org.apache.bookkeeper.stats/codahale-metrics-provider) | `org.apache.bookkeeper.stats.CodahaleMetricsProvider` -[Prometheus](https://prometheus.io/) | `org.apache.bookkeeper.stats.prometheus.PrometheusMetricsProvider` -[Finagle](https://twitter.github.io/finagle/guide/Metrics.html) | `org.apache.bookkeeper.stats.FinagleStatsProvider` -[Ostrich](https://github.com/twitter/ostrich) | `org.apache.bookkeeper.stats.OstrichProvider` -[Twitter Science Provider](https://mvnrepository.com/artifact/org.apache.bookkeeper.stats/twitter-science-provider) | `org.apache.bookkeeper.stats.TwitterStatsProvider` - -> The [Codahale Metrics]({{ site.github_master }}/bookkeeper-stats-providers/codahale-metrics-provider) stats provider is the default provider. - -## Enabling stats providers in bookies - -There are two stats-related [configuration parameters](../../reference/config#statistics) available for bookies: - -Parameter | Description | Default -:---------|:------------|:------- -`enableStatistics` | Whether statistics are enabled for the bookie | `false` -`statsProviderClass` | The stats provider class used by the bookie | `org.apache.bookkeeper.stats.CodahaleMetricsProvider` - - -To enable stats: - -* set the `enableStatistics` parameter to `true` -* set `statsProviderClass` to the desired provider (see the [table above](#stats-providers) for a listing of classes) - - diff --git a/site/docs/4.7.1/admin/perf.md b/site/docs/4.7.1/admin/perf.md deleted file mode 100644 index 82956326e5d..00000000000 --- a/site/docs/4.7.1/admin/perf.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: Performance tuning ---- diff --git a/site/docs/4.7.1/admin/placement.md b/site/docs/4.7.1/admin/placement.md deleted file mode 100644 index ded456e1aea..00000000000 --- a/site/docs/4.7.1/admin/placement.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: Customized placement policies ---- diff --git a/site/docs/4.7.1/admin/upgrade.md b/site/docs/4.7.1/admin/upgrade.md deleted file mode 100644 index 1188282b400..00000000000 --- a/site/docs/4.7.1/admin/upgrade.md +++ /dev/null @@ -1,175 +0,0 @@ ---- -title: Upgrade ---- - -> If you have questions about upgrades (or need help), please feel free to reach out to us by [mailing list]({{ site.baseurl }}community/mailing-lists) or [Slack Channel]({{ site.baseurl }}community/slack). - -## Overview - -Consider the below guidelines in preparation for upgrading. - -- Always back up all your configuration files before upgrading. -- Read through the documentation and draft an upgrade plan that matches your specific requirements and environment before starting the upgrade process. - Put differently, don't start working through the guide on a live cluster. Read guide entirely, make a plan, then execute the plan. -- Pay careful consideration to the order in which components are upgraded. In general, you need to upgrade bookies first and then upgrade your clients. -- If autorecovery is running along with bookies, you need to pay attention to the upgrade sequence. -- Read the release notes carefully for each release. They contain not only information about noteworthy features, but also changes to configurations - that may impact your upgrade. -- Always upgrade one or a small set of bookies to canary new version before upgraing all bookies in your cluster. - -## Canary - -It is wise to canary an upgraded version in one or small set of bookies before upgrading all bookies in your live cluster. - -You can follow below steps on how to canary a upgraded version: - -1. Stop a Bookie. -2. Upgrade the binary and configuration. -3. Start the Bookie in `ReadOnly` mode. This can be used to verify if the Bookie of this new version can run well for read workload. -4. Once the Bookie is running at `ReadOnly` mode successfully for a while, restart the Bookie in `Write/Read` mode. -5. After step 4, the Bookie will serve both write and read traffic. - -### Rollback Canaries - -If problems occur during canarying an upgraded version, you can simply take down the problematic Bookie node. The remain bookies in the old cluster -will repair this problematic bookie node by autorecovery. Nothing needs to be worried about. - -## Upgrade Steps - -Once you determined a version is safe to upgrade in a few nodes in your cluster, you can perform following steps to upgrade all bookies in your cluster. - -1. Determine if autorecovery is running along with bookies. If yes, check if the clients (either new clients with new binary or old clients with new configurations) -are allowed to talk to old bookies; if clients are not allowed to talk to old bookies, please [disable autorecovery](../../reference/cli/#autorecovery-1) during upgrade. -2. Decide on performing a rolling upgrade or a downtime upgrade. -3. Upgrade all Bookies (more below) -4. If autorecovery was disabled during upgrade, [enable autorecovery](../../reference/cli/#autorecovery-1). -5. After all bookies are upgraded, build applications that use `BookKeeper client` against the new bookkeeper libraries and deploy the new versions. - -### Upgrade Bookies - -In a rolling upgrade scenario, upgrade one Bookie at a time. In a downtime upgrade scenario, take the entire cluster down, upgrade each Bookie, then start the cluster. - -For each Bookie: - -1. Stop the bookie. -2. Upgrade the software (either new binary or new configuration) -2. Start the bookie. - -## Upgrade Guides - -We describes the general upgrade method in Apache BookKeeper as above. We will cover the details for individual versions. - -### 4.7.0 to 4.7.1 upgrade - -There isn't any protocol related backward compabilities changes in 4.7.1. So you can follow the general upgrade sequence to upgrade from 4.7.0 to 4.7.1. - -### 4.6.x to 4.7.0 upgrade - -There isn't any protocol related backward compabilities changes in 4.7.0. So you can follow the general upgrade sequence to upgrade from 4.6.x to 4.7.0. - -However, we list a list of changes that you might want to know. - -#### Common Configuration Changes - -This section documents the common configuration changes that applied for both clients and servers. - -##### New Settings - -Following settings are newly added in 4.7.0. - -| Name | Default Value | Description | -|------|---------------|-------------| -| allowShadedLedgerManagerFactoryClass | false | The allows bookkeeper client to connect to a bookkeeper cluster using a shaded ledger manager factory | -| shadedLedgerManagerFactoryClassPrefix | `dlshade.` | The shaded ledger manager factory prefix. This is used when `allowShadedLedgerManagerFactoryClass` is set to true | -| metadataServiceUri | null | metadata service uri that bookkeeper is used for loading corresponding metadata driver and resolving its metadata service location | -| permittedStartupUsers | null | The list of users are permitted to run the bookie process. Any users can run the bookie process if it is not set | - -##### Deprecated Settings - -There are no common settings deprecated at 4.7.0. - -##### Changed Settings - -There are no common settings whose default value are changed at 4.7.0. - -#### Server Configuration Changes - -##### New Settings - -Following settings are newly added in 4.7.0. - -| Name | Default Value | Description | -|------|---------------|-------------| -| verifyMetadataOnGC | false | Whether the bookie is configured to double check the ledgers' metadata prior to garbage collecting them | -| auditorLedgerVerificationPercentage | 0 | The percentage of a ledger (fragment)'s entries will be verified by Auditor before claiming a ledger (fragment) is missing | -| numHighPriorityWorkerThreads | 8 | The number of threads that should be used for high priority requests (i.e. recovery reads and adds, and fencing). If zero, reads are handled by Netty threads directly. | -| useShortHostName | false | Whether the bookie should use short hostname or [FQDN](https://en.wikipedia.org/wiki/Fully_qualified_domain_name) hostname for registration and ledger metadata when useHostNameAsBookieID is enabled. | -| minUsableSizeForEntryLogCreation | 1.2 * `logSizeLimit` | Minimum safe usable size to be available in ledger directory for bookie to create entry log files (in bytes). | -| minUsableSizeForHighPriorityWrites | 1.2 * `logSizeLimit` | Minimum safe usable size to be available in ledger directory for bookie to accept high priority writes even it is in readonly mode. | - -##### Deprecated Settings - -Following settings are deprecated since 4.7.0. - -| Name | Description | -|------|-------------| -| registrationManagerClass | The registration manager class used by server to discover registration manager. It is replaced by `metadataServiceUri`. | - - -##### Changed Settings - -The default values of following settings are changed since 4.7.0. - -| Name | Old Default Value | New Default Value | Notes | -|------|-------------------|-------------------|-------| -| numLongPollWorkerThreads | 10 | 0 | If the number of threads is zero or negative, bookie can fallback to use read threads for long poll. This allows not creating threads if application doesn't use long poll feature. | - -#### Client Configuration Changes - -##### New Settings - -Following settings are newly added in 4.7.0. - -| Name | Default Value | Description | -|------|---------------|-------------| -| maxNumEnsembleChanges | Integer.MAX\_VALUE | The max allowed ensemble change number before sealing a ledger on failures | -| timeoutMonitorIntervalSec | min(`addEntryTimeoutSec`, `addEntryQuorumTimeoutSec`, `readEntryTimeoutSec`) | The interval between successive executions of the operation timeout monitor, in seconds | -| ensemblePlacementPolicyOrderSlowBookies | false | Flag to enable/disable reordering slow bookies in placement policy | - -##### Deprecated Settings - -Following settings are deprecated since 4.7.0. - -| Name | Description | -|------|-------------| -| clientKeyStoreType | Replaced by `tlsKeyStoreType` | -| clientKeyStore | Replaced by `tlsKeyStore` | -| clientKeyStorePasswordPath | Replaced by `tlsKeyStorePasswordPath` | -| clientTrustStoreType | Replaced by `tlsTrustStoreType` | -| clientTrustStore | Replaced by `tlsTrustStore` | -| clientTrustStorePasswordPath | Replaced by `tlsTrustStorePasswordPath` | -| registrationClientClass | The registration client class used by client to discover registration service. It is replaced by `metadataServiceUri`. | - -##### Changed Settings - -The default values of following settings are changed since 4.7.0. - -| Name | Old Default Value | New Default Value | Notes | -|------|-------------------|-------------------|-------| -| enableDigestTypeAutodetection | false | true | Autodetect the digest type and passwd when opening a ledger. It will ignore the provided digest type, but still verify the provided passwd. | - -### 4.5.x to 4.6.x upgrade - -There isn't any protocol related backward compabilities changes in 4.6.x. So you can follow the general upgrade sequence to upgrade from 4.5.x to 4.6.x. - -### 4.4.x to 4.5.x upgrade - -There isn't any protocol related backward compabilities changes in 4.5.0. So you can follow the general upgrade sequence to upgrade from 4.4.x to 4.5.x. -However, we list a list of things that you might want to know. - -1. 4.5.x upgrades netty from 3.x to 4.x. The memory usage pattern might be changed a bit. Netty 4 uses more direct memory. Please pay attention to your memory usage - and adjust the JVM settings accordingly. -2. `multi journals` is a non-rollbackable feature. If you configure a bookie to use multiple journals on 4.5.x you can not roll the bookie back to use 4.4.x. You have - to take a bookie out and recover it if you want to rollback to 4.4.x. - -If you are planning to upgrade a non-secured cluster to a secured cluster enabling security features in 4.5.0, please read [BookKeeper Security](../../security/overview) for more details. diff --git a/site/docs/4.7.1/api/distributedlog-api.md b/site/docs/4.7.1/api/distributedlog-api.md deleted file mode 100644 index 22a12ec5603..00000000000 --- a/site/docs/4.7.1/api/distributedlog-api.md +++ /dev/null @@ -1,395 +0,0 @@ ---- -title: DistributedLog -subtitle: A higher-level API for managing BookKeeper entries ---- - -> DistributedLog began its life as a separate project under the Apache Foundation. It was merged into BookKeeper in 2017. - -The DistributedLog API is an easy-to-use interface for managing BookKeeper entries that enables you to use BookKeeper without needing to interact with [ledgers](../ledger-api) directly. - -DistributedLog (DL) maintains sequences of records in categories called *logs* (aka *log streams*). *Writers* append records to DL logs, while *readers* fetch and process those records. - -## Architecture - -The diagram below illustrates how the DistributedLog API works with BookKeeper: - -![DistributedLog API]({{ site.baseurl }}img/distributedlog.png) - -## Logs - -A *log* in DistributedLog is an ordered, immutable sequence of *log records*. - -The diagram below illustrates the anatomy of a log stream: - -![DistributedLog log]({{ site.baseurl }}img/logs.png) - -### Log records - -Each log record is a sequence of bytes. Applications are responsible for serializing and deserializing byte sequences stored in log records. - -Log records are written sequentially into a *log stream* and assigned with a a unique sequence number called a DLSN (DistributedLog Sequence Number). - -In addition to a DLSN, applications can assign their own sequence number when constructing log records. Application-defined sequence numbers are known as *TransactionIDs* (or *txid*). Either a DLSN or a TransactionID can be used for positioning readers to start reading from a specific log record. - -### Log segments - -Each log is broken down into *log segments* that contain subsets of records. Log segments are distributed and stored in BookKeeper. DistributedLog rolls the log segments based on the configured *rolling policy*, which be either - -* a configurable period of time (such as every 2 hours), or -* a configurable maximum size (such as every 128 MB). - -The data in logs is divided up into equally sized log segments and distributed evenly across {% pop bookies %}. This allows logs to scale beyond a size that would fit on a single server and spreads read traffic across the cluster. - -### Namespaces - -Log streams that belong to the same organization are typically categorized and managed under a *namespace*. DistributedLog namespaces essentially enable applications to locate log streams. Applications can perform the following actions under a namespace: - -* create streams -* delete streams -* truncate streams to a given sequence number (either a DLSN or a TransactionID) - -## Writers - -Through the DistributedLog API, writers write data into logs of their choice. All records are appended into logs in order. The sequencing is performed by the writer, which means that there is only one active writer for a log at any given time. - -DistributedLog guarantees correctness when two writers attempt to write to the same log when a network partition occurs using a *fencing* mechanism in the log segment store. - -### Write Proxy - -Log writers are served and managed in a service tier called the *Write Proxy* (see the diagram [above](#architecture)). The Write Proxy is used for accepting writes from a large number of clients. - -## Readers - -DistributedLog readers read records from logs of their choice, starting with a provided position. The provided position can be either a DLSN or a TransactionID. - -Readers read records from logs in strict order. Different readers can read records from different positions in the same log. - -Unlike other pub-sub systems, DistributedLog doesn't record or manage readers' positions. This means that tracking is the responsibility of applications, as different applications may have different requirements for tracking and coordinating positions. This is hard to get right with a single approach. Distributed databases, for example, might store reader positions along with SSTables, so they would resume applying transactions from the positions store in SSTables. Tracking reader positions could easily be done at the application level using various stores (such as ZooKeeper, the filesystem, or key-value stores). - -### Read Proxy - -Log records can be cached in a service tier called the *Read Proxy* to serve a large number of readers. See the diagram [above](#architecture). The Read Proxy is the analogue of the [Write Proxy](#write-proxy). - -## Guarantees - -The DistributedLog API for BookKeeper provides a number of guarantees for applications: - -* Records written by a [writer](#writers) to a [log](#logs) are appended in the order in which they are written. If a record **R1** is written by the same writer as a record **R2**, **R1** will have a smaller sequence number than **R2**. -* [Readers](#readers) see [records](#log-records) in the same order in which they are [written](#writers) to the log. -* All records are persisted on disk by BookKeeper before acknowledgements, which guarantees durability. -* For a log with a replication factor of N, DistributedLog tolerates up to N-1 server failures without losing any records. - -## API - -Documentation for the DistributedLog API can be found [here](https://bookkeeper.apache.org/distributedlog/docs/latest/user_guide/api/core). - -> At a later date, the DistributedLog API docs will be added here. - - diff --git a/site/docs/4.7.1/api/ledger-adv-api.md b/site/docs/4.7.1/api/ledger-adv-api.md deleted file mode 100644 index df6224dd7ec..00000000000 --- a/site/docs/4.7.1/api/ledger-adv-api.md +++ /dev/null @@ -1,111 +0,0 @@ ---- -title: The Advanced Ledger API ---- - -In release `4.5.0`, Apache BookKeeper introduces a few advanced API for advanced usage. -This sections covers these advanced APIs. - -> Before learn the advanced API, please read [Ledger API](../ledger-api) first. - -## LedgerHandleAdv - -[`LedgerHandleAdv`](../javadoc/org/apache/bookkeeper/client/LedgerHandleAdv) is an advanced extension of [`LedgerHandle`](../javadoc/org/apache/bookkeeper/client/LedgerHandle). -It allows user passing in an `entryId` when adding an entry. - -### Creating advanced ledgers - -Here's an exmaple: - -```java -byte[] passwd = "some-passwd".getBytes(); -LedgerHandleAdv handle = bkClient.createLedgerAdv( - 3, 3, 2, // replica settings - DigestType.CRC32, - passwd); -``` - -You can also create advanced ledgers asynchronously. - -```java -class LedgerCreationCallback implements AsyncCallback.CreateCallback { - public void createComplete(int returnCode, LedgerHandle handle, Object ctx) { - System.out.println("Ledger successfully created"); - } -} -client.asyncCreateLedgerAdv( - 3, // ensemble size - 3, // write quorum size - 2, // ack quorum size - BookKeeper.DigestType.CRC32, - password, - new LedgerCreationCallback(), - "some context" -); -``` - -Besides the APIs above, BookKeeper allows users providing `ledger-id` when creating advanced ledgers. - -```java -long ledgerId = ...; // the ledger id is generated externally. - -byte[] passwd = "some-passwd".getBytes(); -LedgerHandleAdv handle = bkClient.createLedgerAdv( - ledgerId, // ledger id generated externally - 3, 3, 2, // replica settings - DigestType.CRC32, - passwd); -``` - -> Please note, it is users' responsibility to provide a unique ledger id when using the API above. -> If a ledger already exists when users try to create an advanced ledger with same ledger id, -> a [LedgerExistsException](../javadoc/org/apache/bookkeeper/client/BKException.BKLedgerExistException.html) is thrown by the bookkeeper client. - -Creating advanced ledgers can be done throught a fluent API since 4.6. - -```java -BookKeeper bk = ...; - -byte[] passwd = "some-passwd".getBytes(); - -WriteHandle wh = bk.newCreateLedgerOp() - .withDigestType(DigestType.CRC32) - .withPassword(passwd) - .withEnsembleSize(3) - .withWriteQuorumSize(3) - .withAckQuorumSize(2) - .makeAdv() // convert the create ledger builder to create ledger adv builder - .withLedgerId(1234L) - .execute() // execute the creation op - .get(); // wait for the execution to complete - -``` - -### Add Entries - -The normal [add entries api](ledger-api/#adding-entries-to-ledgers) in advanced ledgers are disabled. Instead, when users want to add entries -to advanced ledgers, an entry id is required to pass in along with the entry data when adding an entry. - -```java -long entryId = ...; // entry id generated externally - -ledger.addEntry(entryId, "Some entry data".getBytes()); -``` - -If you are using the new API, you can do as following: - -```java -WriteHandle wh = ...; -long entryId = ...; // entry id generated externally - -wh.write(entryId, "Some entry data".getBytes()).get(); -``` - -A few notes when using this API: - -- The entry id has to be non-negative. -- Clients are okay to add entries out of order. -- However, the entries are only acknowledged in a monotonic order starting from 0. - -### Read Entries - -The read entries api in advanced ledgers remain same as [normal ledgers](../ledger-api/#reading-entries-from-ledgers). diff --git a/site/docs/4.7.1/api/ledger-api.md b/site/docs/4.7.1/api/ledger-api.md deleted file mode 100644 index d44437b61e6..00000000000 --- a/site/docs/4.7.1/api/ledger-api.md +++ /dev/null @@ -1,802 +0,0 @@ ---- -title: The Ledger API ---- - -The ledger API is a lower-level API for BookKeeper that enables you to interact with {% pop ledgers %} directly. - -## The Java ledger API client - -To get started with the Java client for BookKeeper, install the `bookkeeper-server` library as a dependency in your Java application. - -> For a more in-depth tutorial that involves a real use case for BookKeeper, see the [Example application](../example-application) guide. - -## Installation - -The BookKeeper Java client library is available via [Maven Central](http://search.maven.org/) and can be installed using [Maven](#maven), [Gradle](#gradle), and other build tools. - -### Maven - -If you're using [Maven](https://maven.apache.org/), add this to your [`pom.xml`](https://maven.apache.org/guides/introduction/introduction-to-the-pom.html) build configuration file: - -```xml - -4.7.1 - - - - org.apache.bookkeeper - bookkeeper-server - ${bookkeeper.version} - -``` - -BookKeeper uses google [protobuf](https://github.com/google/protobuf/tree/master/java) and [guava](https://github.com/google/guava) libraries -a lot. If your application might include different versions of protobuf or guava introduced by other dependencies, you can choose to use the -shaded library, which relocate classes of protobuf and guava into a different namespace to avoid conflicts. - -```xml - -4.7.1 - - - - org.apache.bookkeeper - bookkeeper-server-shaded - ${bookkeeper.version} - -``` - -### Gradle - -If you're using [Gradle](https://gradle.org/), add this to your [`build.gradle`](https://spring.io/guides/gs/gradle/) build configuration file: - -```groovy -dependencies { - compile group: 'org.apache.bookkeeper', name: 'bookkeeper-server', version: '4.7.1' -} - -// Alternatively: -dependencies { - compile 'org.apache.bookkeeper:bookkeeper-server:4.7.1' -} -``` - -Similarly as using maven, you can also configure to use the shaded jars. - -```groovy -// use the `bookkeeper-server-shaded` jar -dependencies { - compile 'org.apache.bookkeeper:bookkeeper-server-shaded:{{ site.latest-version }}' -} -``` - -## Connection string - -When interacting with BookKeeper using the Java client, you need to provide your client with a connection string, for which you have three options: - -* Provide your entire ZooKeeper connection string, for example `zk1:2181,zk2:2181,zk3:2181`. -* Provide a host and port for one node in your ZooKeeper cluster, for example `zk1:2181`. In general, it's better to provide a full connection string (in case the ZooKeeper node you attempt to connect to is down). -* If your ZooKeeper cluster can be discovered via DNS, you can provide the DNS name, for example `my-zookeeper-cluster.com`. - -## Creating a new client - -In order to create a new [`BookKeeper`](../javadoc/org/apache/bookkeeper/client/BookKeeper) client object, you need to pass in a [connection string](#connection-string). Here is an example client object using a ZooKeeper connection string: - -```java -try { - String connectionString = "127.0.0.1:2181"; // For a single-node, local ZooKeeper cluster - BookKeeper bkClient = new BookKeeper(connectionString); -} catch (InterruptedException | IOException | KeeperException e) { - e.printStackTrace(); -} -``` - -> If you're running BookKeeper [locally](../../getting-started/run-locally), using the [`localbookie`](../../reference/cli#bookkeeper-localbookie) command, use `"127.0.0.1:2181"` for your connection string, as in the example above. - -There are, however, other ways that you can create a client object: - -* By passing in a [`ClientConfiguration`](../javadoc/org/apache/bookkeeper/conf/ClientConfiguration) object. Here's an example: - - ```java - ClientConfiguration config = new ClientConfiguration(); - config.setZkServers(zkConnectionString); - config.setAddEntryTimeout(2000); - BookKeeper bkClient = new BookKeeper(config); - ``` - -* By specifying a `ClientConfiguration` and a [`ZooKeeper`](http://zookeeper.apache.org/doc/current/api/org/apache/zookeeper/ZooKeeper.html) client object: - - ```java - ClientConfiguration config = new ClientConfiguration(); - config.setAddEntryTimeout(5000); - ZooKeeper zkClient = new ZooKeeper(/* client args */); - BookKeeper bkClient = new BookKeeper(config, zkClient); - ``` - -* Using the `forConfig` method: - - ```java - BookKeeper bkClient = BookKeeper.forConfig(conf).build(); - ``` - -## Creating ledgers - -The easiest way to create a {% pop ledger %} using the Java client is via the `createLedger` method, which creates a new ledger synchronously and returns a [`LedgerHandle`](../javadoc/org/apache/bookkeeper/client/LedgerHandle). You must specify at least a [`DigestType`](../javadoc/org/apache/bookkeeper/client/BookKeeper.DigestType) and a password. - -Here's an example: - -```java -byte[] password = "some-password".getBytes(); -LedgerHandle handle = bkClient.createLedger(BookKeeper.DigestType.MAC, password); -``` - -You can also create ledgers asynchronously - -### Create ledgers asynchronously - -```java -class LedgerCreationCallback implements AsyncCallback.CreateCallback { - public void createComplete(int returnCode, LedgerHandle handle, Object ctx) { - System.out.println("Ledger successfully created"); - } -} - -client.asyncCreateLedger( - 3, - 2, - BookKeeper.DigestType.MAC, - password, - new LedgerCreationCallback(), - "some context" -); -``` - -## Adding entries to ledgers - -```java -long entryId = ledger.addEntry("Some entry data".getBytes()); -``` - -### Add entries asynchronously - -## Reading entries from ledgers - -```java -Enumerator entries = handle.readEntries(1, 99); -``` - -To read all possible entries from the ledger: - -```java -Enumerator entries = - handle.readEntries(0, handle.getLastAddConfirmed()); - -while (entries.hasNextElement()) { - LedgerEntry entry = entries.nextElement(); - System.out.println("Successfully read entry " + entry.getId()); -} -``` - -### Reading entries after the LastAddConfirmed range - -`readUnconfirmedEntries` allowing to read after the LastAddConfirmed range. -It lets the client read without checking the local value of LastAddConfirmed, so that it is possible to read entries for which the writer has not received the acknowledge yet -For entries which are within the range 0..LastAddConfirmed BookKeeper guarantees that the writer has successfully received the acknowledge. -For entries outside that range it is possible that the writer never received the acknowledge and so there is the risk that the reader is seeing entries before the writer and this could result in a consistency issue in some cases. -With this method you can even read entries before the LastAddConfirmed and entries after it with one call, the expected consistency will be as described above. - -```java -Enumerator entries = - handle.readUnconfirmedEntries(0, lastEntryIdExpectedToRead); - -while (entries.hasNextElement()) { - LedgerEntry entry = entries.nextElement(); - System.out.println("Successfully read entry " + entry.getId()); -} -``` - -## Deleting ledgers - -{% pop Ledgers %} can also be deleted synchronously or asynchronously. - -```java -long ledgerId = 1234; - -try { - bkClient.deleteLedger(ledgerId); -} catch (Exception e) { - e.printStackTrace(); -} -``` - -### Delete entries asynchronously - -Exceptions thrown: - -* - -```java -class DeleteEntryCallback implements AsyncCallback.DeleteCallback { - public void deleteComplete() { - System.out.println("Delete completed"); - } -} -``` - -## Simple example - -> For a more involved BookKeeper client example, see the [example application](#example-application) below. - -In the code sample below, a BookKeeper client: - -* creates a ledger -* writes entries to the ledger -* closes the ledger (meaning no further writes are possible) -* re-opens the ledger for reading -* reads all available entries - -```java -// Create a client object for the local ensemble. This -// operation throws multiple exceptions, so make sure to -// use a try/catch block when instantiating client objects. -BookKeeper bkc = new BookKeeper("localhost:2181"); - -// A password for the new ledger -byte[] ledgerPassword = /* some sequence of bytes, perhaps random */; - -// Create a new ledger and fetch its identifier -LedgerHandle lh = bkc.createLedger(BookKeeper.DigestType.MAC, ledgerPassword); -long ledgerId = lh.getId(); - -// Create a buffer for four-byte entries -ByteBuffer entry = ByteBuffer.allocate(4); - -int numberOfEntries = 100; - -// Add entries to the ledger, then close it -for (int i = 0; i < numberOfEntries; i++){ - entry.putInt(i); - entry.position(0); - lh.addEntry(entry.array()); -} -lh.close(); - -// Open the ledger for reading -lh = bkc.openLedger(ledgerId, BookKeeper.DigestType.MAC, ledgerPassword); - -// Read all available entries -Enumeration entries = lh.readEntries(0, numberOfEntries - 1); - -while(entries.hasMoreElements()) { - ByteBuffer result = ByteBuffer.wrap(ls.nextElement().getEntry()); - Integer retrEntry = result.getInt(); - - // Print the integer stored in each entry - System.out.println(String.format("Result: %s", retrEntry)); -} - -// Close the ledger and the client -lh.close(); -bkc.close(); -``` - -Running this should return this output: - -```shell -Result: 0 -Result: 1 -Result: 2 -# etc -``` - -## Example application - -This tutorial walks you through building an example application that uses BookKeeper as the replicated log. The application uses the [BookKeeper Java client](../java-client) to interact with BookKeeper. - -> The code for this tutorial can be found in [this GitHub repo](https://github.com/ivankelly/bookkeeper-tutorial/). The final code for the `Dice` class can be found [here](https://github.com/ivankelly/bookkeeper-tutorial/blob/master/src/main/java/org/apache/bookkeeper/Dice.java). - -### Setup - -Before you start, you will need to have a BookKeeper cluster running locally on your machine. For installation instructions, see [Installation](../../getting-started/installation). - -To start up a cluster consisting of six {% pop bookies %} locally: - -```shell -$ bookkeeper-server/bin/bookkeeper localbookie 6 -``` - -You can specify a different number of bookies if you'd like. - -### Goal - -The goal of the dice application is to have - -* multiple instances of this application, -* possibly running on different machines, -* all of which display the exact same sequence of numbers. - -In other words, the log needs to be both durable and consistent, regardless of how many {% pop bookies %} are participating in the BookKeeper ensemble. If one of the bookies crashes or becomes unable to communicate with the other bookies in any way, it should *still* display the same sequence of numbers as the others. This tutorial will show you how to achieve this. - -To begin, download the base application, compile and run it. - -```shell -$ git clone https://github.com/ivankelly/bookkeeper-tutorial.git -$ mvn package -$ mvn exec:java -Dexec.mainClass=org.apache.bookkeeper.Dice -``` - -That should yield output that looks something like this: - -``` -[INFO] Scanning for projects... -[INFO] -[INFO] ------------------------------------------------------------------------ -[INFO] Building tutorial 1.0-SNAPSHOT -[INFO] ------------------------------------------------------------------------ -[INFO] -[INFO] --- exec-maven-plugin:1.3.2:java (default-cli) @ tutorial --- -[WARNING] Warning: killAfter is now deprecated. Do you need it ? Please comment on MEXEC-6. -Value = 4 -Value = 5 -Value = 3 -``` - -### The base application - -The application in this tutorial is a dice application. The `Dice` class below has a `playDice` function that generates a random number between 1 and 6 every second, prints the value of the dice roll, and runs indefinitely. - -```java -public class Dice { - Random r = new Random(); - - void playDice() throws InterruptedException { - while (true) { - Thread.sleep(1000); - System.out.println("Value = " + (r.nextInt(6) + 1)); - } - } -} -``` - -When you run the `main` function of this class, a new `Dice` object will be instantiated and then run indefinitely: - -```java -public class Dice { - // other methods - - public static void main(String[] args) throws InterruptedException { - Dice d = new Dice(); - d.playDice(); - } -} -``` - -### Leaders and followers (and a bit of background) - -To achieve this common view in multiple instances of the program, we need each instance to agree on what the next number in the sequence will be. For example, the instances must agree that 4 is the first number and 2 is the second number and 5 is the third number and so on. This is a difficult problem, especially in the case that any instance may go away at any time, and messages between the instances can be lost or reordered. - -Luckily, there are already algorithms to solve this. Paxos is an abstract algorithm to implement this kind of agreement, while Zab and Raft are more practical protocols. This video gives a good overview about how these algorithms usually look. They all have a similar core. - -It would be possible to run the Paxos to agree on each number in the sequence. However, running Paxos each time can be expensive. What Zab and Raft do is that they use a Paxos-like algorithm to elect a leader. The leader then decides what the sequence of events should be, putting them in a log, which the other instances can then follow to maintain the same state as the leader. - -Bookkeeper provides the functionality for the second part of the protocol, allowing a leader to write events to a log and have multiple followers tailing the log. However, bookkeeper does not do leader election. You will need a zookeeper or raft instance for that purpose. - -### Why not just use ZooKeeper? - -There are a number of reasons: - -1. Zookeeper's log is only exposed through a tree like interface. It can be hard to shoehorn your application into this. -2. A zookeeper ensemble of multiple machines is limited to one log. You may want one log per resource, which will become expensive very quickly. -3. Adding extra machines to a zookeeper ensemble does not increase capacity nor throughput. - -Bookkeeper can be seen as a means of exposing ZooKeeper's replicated log to applications in a scalable fashion. ZooKeeper is still used by BookKeeper, however, to maintain consistency guarantees, though clients don't need to interact with ZooKeeper directly. - -### Electing a leader - -We'll use zookeeper to elect a leader. A zookeeper instance will have started locally when you started the localbookie application above. To verify it's running, run the following command. - -```shell -$ echo stat | nc localhost 2181 -Zookeeper version: 3.4.6-1569965, built on 02/20/2014 09:09 GMT -Clients: - /127.0.0.1:59343[1](queued=0,recved=40,sent=41) - /127.0.0.1:49354[1](queued=0,recved=11,sent=11) - /127.0.0.1:49361[0](queued=0,recved=1,sent=0) - /127.0.0.1:59344[1](queued=0,recved=38,sent=39) - /127.0.0.1:59345[1](queued=0,recved=38,sent=39) - /127.0.0.1:59346[1](queued=0,recved=38,sent=39) - -Latency min/avg/max: 0/0/23 -Received: 167 -Sent: 170 -Connections: 6 -Outstanding: 0 -Zxid: 0x11 -Mode: standalone -Node count: 16 -``` - -To interact with zookeeper, we'll use the Curator client rather than the stock zookeeper client. Getting things right with the zookeeper client can be tricky, and curator removes a lot of the pointy corners for you. In fact, curator even provides a leader election recipe, so we need to do very little work to get leader election in our application. - -```java -public class Dice extends LeaderSelectorListenerAdapter implements Closeable { - - final static String ZOOKEEPER_SERVER = "127.0.0.1:2181"; - final static String ELECTION_PATH = "/dice-elect"; - - ... - - Dice() throws InterruptedException { - curator = CuratorFrameworkFactory.newClient(ZOOKEEPER_SERVER, - 2000, 10000, new ExponentialBackoffRetry(1000, 3)); - curator.start(); - curator.blockUntilConnected(); - - leaderSelector = new LeaderSelector(curator, ELECTION_PATH, this); - leaderSelector.autoRequeue(); - leaderSelector.start(); - } -``` - -In the constructor for Dice, we need to create the curator client. We specify four things when creating the client, the location of the zookeeper service, the session timeout, the connect timeout and the retry policy. - -The session timeout is a zookeeper concept. If the zookeeper server doesn't hear anything from the client for this amount of time, any leases which the client holds will be timed out. This is important in leader election. For leader election, the curator client will take a lease on ELECTION_PATH. The first instance to take the lease will become leader and the rest will become followers. However, their claim on the lease will remain in the cue. If the first instance then goes away, due to a crash etc., its session will timeout. Once the session times out, the lease will be released and the next instance in the queue will become the leader. The call to autoRequeue() will make the client queue itself again if it loses the lease for some other reason, such as if it was still alive, but it a garbage collection cycle caused it to lose its session, and thereby its lease. I've set the lease to be quite low so that when we test out leader election, transitions will be quite quick. The optimum length for session timeout depends very much on the use case. The other parameters are the connection timeout, i.e. the amount of time it will spend trying to connect to a zookeeper server before giving up, and the retry policy. The retry policy specifies how the client should respond to transient errors, such as connection loss. Operations that fail with transient errors can be retried, and this argument specifies how often the retries should occur. - -Finally, you'll have noticed that Dice now extends LeaderSelectorListenerAdapter and implements Closeable. Closeable is there to close the resource we have initialized in the constructor, the curator client and the leaderSelector. LeaderSelectorListenerAdapter is a callback that the leaderSelector uses to notify the instance that it is now the leader. It is passed as the third argument to the LeaderSelector constructor. - -```java - @Override - public void takeLeadership(CuratorFramework client) - throws Exception { - synchronized (this) { - leader = true; - try { - while (true) { - this.wait(); - } - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - leader = false; - } - } - } -``` - -takeLeadership() is the callback called by LeaderSelector when the instance is leader. It should only return when the instance wants to give up leadership. In our case, we never do so we wait on the current object until we're interrupted. To signal to the rest of the program that we are leader we set a volatile boolean called leader to true. This is unset after we are interrupted. - -```java - void playDice() throws InterruptedException { - while (true) { - while (leader) { - Thread.sleep(1000); - System.out.println("Value = " + (r.nextInt(6) + 1) - + ", isLeader = " + leader); - } - } - } -``` - -Finally, we modify the `playDice` function to only generate random numbers when it is the leader. - -Run two instances of the program in two different terminals. You'll see that one becomes leader and prints numbers and the other just sits there. - -Now stop the leader using Control-Z. This will pause the process, but it won't kill it. You will be dropped back to the shell in that terminal. After a couple of seconds, the session timeout, you will see that the other instance has become the leader. Zookeeper will guarantee that only one instance is selected as leader at any time. - -Now go back to the shell that the original leader was on and wake up the process using fg. You'll see something like the following: - -```shell -... -... -Value = 4, isLeader = true -Value = 4, isLeader = true -^Z -[1]+ Stopped mvn exec:java -Dexec.mainClass=org.apache.bookkeeper.Dice -$ fg -mvn exec:java -Dexec.mainClass=org.apache.bookkeeper.Dice -Value = 3, isLeader = true -Value = 1, isLeader = false -``` - -## New API - -Since 4.6 BookKeeper provides a new client API which leverages Java8 [CompletableFuture](https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/CompletableFuture.html) facility. -[WriteHandle](../javadoc/org/apache/bookkeeper/client/api/WriteHandle), [WriteAdvHandle](../javadoc/org/apache/bookkeeper/client/api/WriteAdvHandle), [ReadHandle](../javadoc/org/apache/bookkeeper/client/api/ReadHandle) are introduced for replacing the generic [LedgerHandle](../javadoc/org/apache/bookkeeper/client/LedgerHandle). - -> All the new API now is available in `org.apache.bookkeeper.client.api`. You should only use interfaces defined in this package. - -*Beware* that this API in 4.6 is still experimental API and can be subject to changes in next minor releases. - -### Create a new client - -In order to create a new [`BookKeeper`](../javadoc/org/apache/bookkeeper/client/api/BookKeeper) client object, you need to construct a [`ClientConfiguration`](../javadoc/org/apache/bookkeeper/conf/ClientConfiguration) object and set a [connection string](#connection-string) first, and then use [`BookKeeperBuilder`](../javadoc/org/apache/bookkeeper/client/api/BookKeeperBuilder) to build the client. - -Here is an example building the bookkeeper client. - -```java -// construct a client configuration instance -ClientConfiguration conf = new ClientConfiguration(); -conf.setZkServers(zkConnectionString); -conf.setZkLedgersRootPath("/path/to/ledgers/root"); - -// build the bookkeeper client -BookKeeper bk = BookKeeper.newBuilder(conf) - .statsLogger(...) - ... - .build(); - -``` - -### Create ledgers - -the easiest way to create a {% pop ledger %} using the java client is via the [`createbuilder`](../javadoc/org/apache/bookkeeper/client/api/createbuilder). you must specify at least -a [`digesttype`](../javadoc/org/apache/bookkeeper/client/api/digesttype) and a password. - -here's an example: - -```java -BookKeeper bk = ...; - -byte[] password = "some-password".getBytes(); - -WriteHandle wh = bk.newCreateLedgerOp() - .withDigestType(DigestType.CRC32) - .withPassword(password) - .withEnsembleSize(3) - .withWriteQuorumSize(3) - .withAckQuorumSize(2) - .execute() // execute the creation op - .get(); // wait for the execution to complete -``` - -A [`WriteHandle`](../javadoc/org/apache/bookkeeper/client/api/WriteHandle) is returned for applications to write and read entries to and from the ledger. - -### Write flags - -You can specify behaviour of the writer by setting [`WriteFlags`](../javadoc/org/apache/bookkeeper/client/api/WriteFlag) at ledger creation type. -These flags are applied only during write operations and are not recorded on metadata. - - -Available write flags: - -| Flag | Explanation | Notes | -:---------|:------------|:------- -DEFERRED_SYNC | Writes are acknowledged early, without waiting for -guarantees of durability | Data will be only written to the OS page cache, without forcing an fsync. - -```java -BookKeeper bk = ...; - -byte[] password = "some-password".getBytes(); - -WriteHandle wh = bk.newCreateLedgerOp() - .withDigestType(DigestType.CRC32) - .withPassword(password) - .withEnsembleSize(3) - .withWriteQuorumSize(3) - .withAckQuorumSize(2) - .withWriteFlags(DEFERRED_SYNC) - .execute() // execute the creation op - .get(); // wait for the execution to complete -``` - - -### Append entries to ledgers - -The [`WriteHandle`](../javadoc/org/apache/bookkeeper/client/api/WriteHandle) can be used for applications to append entries to the ledgers. - -```java -WriteHandle wh = ...; - -CompletableFuture addFuture = wh.append("Some entry data".getBytes()); - -// option 1: you can wait for add to complete synchronously -try { - long entryId = FutureUtils.result(addFuture.get()); -} catch (BKException bke) { - // error handling -} - -// option 2: you can process the result and exception asynchronously -addFuture - .thenApply(entryId -> { - // process the result - }) - .exceptionally(cause -> { - // handle the exception - }) - -// option 3: bookkeeper provides a twitter-future-like event listener for processing result and exception asynchronously -addFuture.whenComplete(new FutureEventListener() { - @Override - public void onSuccess(long entryId) { - // process the result - } - @Override - public void onFailure(Throwable cause) { - // handle the exception - } -}); -``` - -The append method supports three representations of a bytes array: the native java `byte[]`, java nio `ByteBuffer` and netty `ByteBuf`. -It is recommended to use `ByteBuf` as it is more gc friendly. - -### Open ledgers - -You can open ledgers to read entries. Opening ledgers is done by [`openBuilder`](../javadoc/org/apache/bookkeeper/client/api/openBuilder). You must specify the ledgerId and the password -in order to open the ledgers. - -here's an example: - -```java -BookKeeper bk = ...; - -long ledgerId = ...; -byte[] password = "some-password".getBytes(); - -ReadHandle rh = bk.newOpenLedgerOp() - .withLedgerId(ledgerId) - .withPassword(password) - .execute() // execute the open op - .get(); // wait for the execution to complete -``` - -A [`ReadHandle`](../javadoc/org/apache/bookkeeper/client/api/ReadHandle) is returned for applications to read entries to and from the ledger. - -#### Recovery vs NoRecovery - -By default, the [`openBuilder`](../javadoc/org/apache/bookkeeper/client/api/openBuilder) opens the ledger in a `NoRecovery` mode. You can open the ledger in `Recovery` mode by specifying -`withRecovery(true)` in the open builder. - -```java -BookKeeper bk = ...; - -long ledgerId = ...; -byte[] password = "some-password".getBytes(); - -ReadHandle rh = bk.newOpenLedgerOp() - .withLedgerId(ledgerId) - .withPassword(password) - .withRecovery(true) - .execute() - .get(); - -``` - -**What is the difference between "Recovery" and "NoRecovery"?** - -If you are opening a ledger in "Recovery" mode, it will basically fence and seal the ledger -- no more entries are allowed -to be appended to it. The writer which is currently appending entries to the ledger will fail with [`LedgerFencedException`](../javadoc/org/apache/bookkeeper/client/api/BKException.Code#LedgerFencedException). - -In constrat, opening a ledger in "NoRecovery" mode, it will not fence and seal the ledger. "NoRecovery" mode is usually used by applications to tailing-read from a ledger. - -### Read entries from ledgers - -The [`ReadHandle`](../javadoc/org/apache/bookkeeper/client/api/ReadHandle) returned from the open builder can be used for applications to read entries from the ledgers. - -```java -ReadHandle rh = ...; - -long startEntryId = ...; -long endEntryId = ...; -CompletableFuture readFuture = rh.read(startEntryId, endEntryId); - -// option 1: you can wait for read to complete synchronously -try { - LedgerEntries entries = FutureUtils.result(readFuture.get()); -} catch (BKException bke) { - // error handling -} - -// option 2: you can process the result and exception asynchronously -readFuture - .thenApply(entries -> { - // process the result - }) - .exceptionally(cause -> { - // handle the exception - }) - -// option 3: bookkeeper provides a twitter-future-like event listener for processing result and exception asynchronously -readFuture.whenComplete(new FutureEventListener<>() { - @Override - public void onSuccess(LedgerEntries entries) { - // process the result - } - @Override - public void onFailure(Throwable cause) { - // handle the exception - } -}); -``` - -Once you are done with processing the [`LedgerEntries`](../javadoc/org/apache/bookkeeper/client/api/LedgerEntries), you can call `#close()` on the `LedgerEntries` instance to -release the buffers held by it. - -Applications are allowed to read any entries between `0` and [`LastAddConfirmed`](../javadoc/org/apache/bookkeeper/client/api/ReadHandle.html#getLastAddConfirmed). If the applications -attempts to read entries beyond `LastAddConfirmed`, they will receive [`IncorrectParameterException`](../javadoc/org/apache/bookkeeper/client/api/BKException.Code#IncorrectParameterException). - -### Read unconfirmed entries from ledgers - -`readUnconfirmed` is provided the mechanism for applications to read entries beyond `LastAddConfirmed`. Applications should be aware of `readUnconfirmed` doesn't provide any -repeatable read consistency. - -```java -CompletableFuture readFuture = rh.readUnconfirmed(startEntryId, endEntryId); -``` - -### Tailing Reads - -There are two methods for applications to achieve tailing reads: `Polling` and `Long-Polling`. - -#### Polling - -You can do this in synchronous way: - -```java -ReadHandle rh = ...; - -long startEntryId = 0L; -long nextEntryId = startEntryId; -int numEntriesPerBatch = 4; -while (!rh.isClosed() || nextEntryId <= rh.getLastAddConfirmed()) { - long lac = rh.getLastAddConfirmed(); - if (nextEntryId > lac) { - // no more entries are added - Thread.sleep(1000); - - lac = rh.readLastAddConfirmed().get(); - continue; - } - - long endEntryId = Math.min(lac, nextEntryId + numEntriesPerBatch - 1); - LedgerEntries entries = rh.read(nextEntryId, endEntryId).get(); - - // process the entries - - nextEntryId = endEntryId + 1; -} -``` - -#### Long Polling - -```java -ReadHandle rh = ...; - -long startEntryId = 0L; -long nextEntryId = startEntryId; -int numEntriesPerBatch = 4; -while (!rh.isClosed() || nextEntryId <= rh.getLastAddConfirmed()) { - long lac = rh.getLastAddConfirmed(); - if (nextEntryId > lac) { - // no more entries are added - try (LastConfirmedAndEntry lacAndEntry = rh.readLastAddConfirmedAndEntry(nextEntryId, 1000, false).get()) { - if (lacAndEntry.hasEntry()) { - // process the entry - - ++nextEntryId; - } - } - } else { - long endEntryId = Math.min(lac, nextEntryId + numEntriesPerBatch - 1); - LedgerEntries entries = rh.read(nextEntryId, endEntryId).get(); - - // process the entries - nextEntryId = endEntryId + 1; - } -} -``` - -### Delete ledgers - -{% pop Ledgers %} can be deleted by using [`DeleteBuilder`](../javadoc/org/apache/bookkeeper/client/api/DeleteBuilder). - -```java -BookKeeper bk = ...; -long ledgerId = ...; - -bk.newDeleteLedgerOp() - .withLedgerId(ledgerId) - .execute() - .get(); -``` diff --git a/site/docs/4.7.1/api/overview.md b/site/docs/4.7.1/api/overview.md deleted file mode 100644 index 3e0adcd61af..00000000000 --- a/site/docs/4.7.1/api/overview.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: BookKeeper API ---- - -BookKeeper offers a few APIs that applications can use to interact with it: - -* The [ledger API](../ledger-api) is a lower-level API that enables you to interact with {% pop ledgers %} directly -* The [Ledger Advanced API](../ledger-adv-api) is an advanced extension to [Ledger API](../ledger-api) to provide more flexibilities to applications. -* The [DistributedLog API](../distributedlog-api) is a higher-level API that provides convenient abstractions. - -## Trade-offs - -The `Ledger API` provides direct access to ledgers and thus enables you to use BookKeeper however you'd like. - -However, in most of use cases, if you want a `log stream`-like abstraction, it requires you to manage things like tracking list of ledgers, -managing rolling ledgers and data retention on your own. In such cases, you are recommended to use [DistributedLog API](../distributedlog-api), -with semantics resembling continous log streams from the standpoint of applications. diff --git a/site/docs/4.7.1/deployment/dcos.md b/site/docs/4.7.1/deployment/dcos.md deleted file mode 100644 index a1e4996d6f6..00000000000 --- a/site/docs/4.7.1/deployment/dcos.md +++ /dev/null @@ -1,142 +0,0 @@ ---- -title: Deploying BookKeeper on DC/OS -subtitle: Get up and running easily on an Apache Mesos cluster -logo: img/dcos-logo.png ---- - -[DC/OS](https://dcos.io/) (the DataCenter Operating System) is a distributed operating system used for deploying and managing applications and systems on [Apache Mesos](http://mesos.apache.org/). DC/OS is an open-source tool created and maintained by [Mesosphere](https://mesosphere.com/). - -BookKeeper is available as a [DC/OS package](http://universe.dcos.io/#/package/bookkeeper/version/latest) from the [Mesosphere DC/OS Universe](http://universe.dcos.io/#/packages). - -## Prerequisites - -In order to run BookKeeper on DC/OS, you will need: - -* DC/OS version [1.8](https://dcos.io/docs/1.8/) or higher -* A DC/OS cluster with at least three nodes -* The [DC/OS CLI tool](https://dcos.io/docs/1.8/usage/cli/install/) installed - -Each node in your DC/OS-managed Mesos cluster must have at least: - -* 1 CPU -* 1 GB of memory -* 10 GB of total persistent disk storage - -## Installing BookKeeper - -```shell -$ dcos package install bookkeeper --yes -``` - -This command will: - -* Install the `bookkeeper` subcommand for the `dcos` CLI tool -* Start a single {% pop bookie %} on the Mesos cluster with the [default configuration](../../reference/config) - -The bookie that is automatically started up uses the host mode of the network and by default exports the service at `agent_ip:3181`. - -> If you run `dcos package install bookkeeper` without setting the `--yes` flag, the install will run in interactive mode. For more information on the `package install` command, see the [DC/OS docs](https://docs.mesosphere.com/latest/cli/command-reference/dcos-package/dcos-package-install/). - -### Services - -To watch BookKeeper start up, click on the **Services** tab in the DC/OS [user interface](https://docs.mesosphere.com/latest/gui/) and you should see the `bookkeeper` package listed: - -![DC/OS services]({{ site.baseurl }}img/dcos/services.png) - -### Tasks - -To see which tasks have started, click on the `bookkeeper` service and you'll see an interface that looks like this; - -![DC/OS tasks]({{ site.baseurl }}img/dcos/tasks.png) - -## Scaling BookKeeper - -Once the first {% pop bookie %} has started up, you can click on the **Scale** tab to scale up your BookKeeper ensemble by adding more bookies (or scale down the ensemble by removing bookies). - -![DC/OS scale]({{ site.baseurl }}img/dcos/scale.png) - -## ZooKeeper Exhibitor - -ZooKeeper contains the information for all bookies in the ensemble. When deployed on DC/OS, BookKeeper uses a ZooKeeper instance provided by DC/OS. You can access a visual UI for ZooKeeper using [Exhibitor](https://github.com/soabase/exhibitor/wiki), which is available at [http://master.dcos/exhibitor](http://master.dcos/exhibitor). - -![ZooKeeper Exhibitor]({{ site.baseurl }}img/dcos/exhibitor.png) - -You should see a listing of IP/host information for all bookies under the `messaging/bookkeeper/ledgers/available` node. - -## Client connections - -To connect to bookies running on DC/OS using clients running within your Mesos cluster, you need to specify the ZooKeeper connection string for DC/OS's ZooKeeper cluster: - -``` -master.mesos:2181 -``` - -This is the *only* ZooKeeper host/port you need to include in your connection string. Here's an example using the [Java client](../../api/ledger-api#the-java-ledger-api-client): - -```java -BookKeeper bkClient = new BookKeeper("master.mesos:2181"); -``` - -If you're connecting using a client running outside your Mesos cluster, you need to supply the public-facing connection string for your DC/OS ZooKeeper cluster. - -## Configuring BookKeeper - -By default, the `bookkeeper` package will start up a BookKeeper ensemble consisting of one {% pop bookie %} with one CPU, 1 GB of memory, and a 70 MB persistent volume. - -You can supply a non-default configuration when installing the package using a JSON file. Here's an example command: - -```shell -$ dcos package install bookkeeper \ - --options=/path/to/config.json -``` - -You can then fetch the current configuration for BookKeeper at any time using the `package describe` command: - -```shell -$ dcos package describe bookkeeper \ - --config -``` - -### Available parameters - -> Not all [configurable parameters](../../reference/config) for BookKeeper are available for BookKeeper on DC/OS. Only the parameters show in the table below are available. - -Param | Type | Description | Default -:-----|:-----|:------------|:------- -`name` | String | The name of the DC/OS service. | `bookkeeper` -`cpus` | Integer | The number of CPU shares to allocate to each {% pop bookie %}. The minimum is 1. | `1` | -`instances` | Integer | The number of {% pop bookies %} top run. The minimum is 1. | `1` -`mem` | Number | The memory, in MB, to allocate to each BookKeeper task | `1024.0` (1 GB) -`volume_size` | Number | The persistent volume size, in MB | `70` -`zk_client` | String | The connection string for the ZooKeeper client instance | `master.mesos:2181` -`service_port` | Integer | The BookKeeper export service port, using `PORT0` in Marathon | `3181` - -### Example JSON configuration - -Here's an example JSON configuration object for BookKeeper on DC/OS: - -```json -{ - "instances": 5, - "cpus": 3, - "mem": 2048.0, - "volume_size": 250 -} -``` - -If that configuration were stored in a file called `bk-config.json`, you could apply that configuration upon installating the BookKeeper package using this command: - -```shell -$ dcos package install bookkeeper \ - --options=./bk-config.json -``` - -## Uninstalling BookKeeper - -You can shut down and uninstall the `bookkeeper` from DC/OS at any time using the `package uninstall` command: - -```shell -$ dcos package uninstall bookkeeper -Uninstalled package [bookkeeper] version [4.7.1] -Thank you for using bookkeeper. -``` diff --git a/site/docs/4.7.1/deployment/kubernetes.md b/site/docs/4.7.1/deployment/kubernetes.md deleted file mode 100644 index 0f113169edc..00000000000 --- a/site/docs/4.7.1/deployment/kubernetes.md +++ /dev/null @@ -1,181 +0,0 @@ ---- -title: Deploying Apache BookKeeper on Kubernetes -tags: [Kubernetes, Google Container Engine] -logo: img/kubernetes-logo.png ---- - -Apache BookKeeper can be easily deployed in [Kubernetes](https://kubernetes.io/) clusters. The managed clusters on [Google Container Engine](https://cloud.google.com/compute/) is the most convenient way. - -The deployment method shown in this guide relies on [YAML](http://yaml.org/) definitions for Kubernetes [resources](https://kubernetes.io/docs/resources-reference/v1.6/). The [`kubernetes`](https://github.com/apache/bookkeeper/tree/master/deploy/kubernetes) subdirectory holds resource definitions for: - -* A three-node ZooKeeper cluster -* A BookKeeper cluster with a bookie runs on each node. - -## Setup on Google Container Engine - -To get started, get source code of [`kubernetes`](https://github.com/apache/bookkeeper/tree/master/deploy/kubernetes) from github by git clone. - -If you'd like to change the number of bookies, or ZooKeeper nodes in your BookKeeper cluster, modify the `replicas` parameter in the `spec` section of the appropriate [`Deployment`](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/) or [`StatefulSet`](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/) resource. - -[Google Container Engine](https://cloud.google.com/container-engine) (GKE) automates the creation and management of Kubernetes clusters in [Google Compute Engine](https://cloud.google.com/compute/) (GCE). - -### Prerequisites - -To get started, you'll need: - -* A Google Cloud Platform account, which you can sign up for at [cloud.google.com](https://cloud.google.com) -* An existing Cloud Platform project -* The [Google Cloud SDK](https://cloud.google.com/sdk/downloads) (in particular the [`gcloud`](https://cloud.google.com/sdk/gcloud/) and [`kubectl`]() tools). - -### Create a new Kubernetes cluster - -You can create a new GKE cluster using the [`container clusters create`](https://cloud.google.com/sdk/gcloud/reference/container/clusters/create) command for `gcloud`. This command enables you to specify the number of nodes in the cluster, the machine types of those nodes, and more. - -As an example, we'll create a new GKE cluster for Kubernetes version [1.6.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG.md#v164) in the [us-central1-a](https://cloud.google.com/compute/docs/regions-zones/regions-zones#available) zone. The cluster will be named `bookkeeper-gke-cluster` and will consist of three VMs, each using two locally attached SSDs and running on [n1-standard-8](https://cloud.google.com/compute/docs/machine-types) machines. These SSDs will be used by Bookie instances, one for the BookKeeper journal and the other for storing the actual data. - -```bash -$ gcloud config set compute/zone us-central1-a -$ gcloud config set project your-project-name -$ gcloud container clusters create bookkeeper-gke-cluster \ - --machine-type=n1-standard-8 \ - --num-nodes=3 \ - --local-ssd-count=2 \ - --enable-kubernetes-alpha -``` - -By default, bookies will run on all the machines that have locally attached SSD disks. In this example, all of those machines will have two SSDs, but you can add different types of machines to the cluster later. You can control which machines host bookie servers using [labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels). - -### Dashboard - -You can observe your cluster in the [Kubernetes Dashboard](https://kubernetes.io/docs/tasks/access-application-cluster/web-ui-dashboard/) by downloading the credentials for your Kubernetes cluster and opening up a proxy to the cluster: - -```bash -$ gcloud container clusters get-credentials bookkeeper-gke-cluster \ - --zone=us-central1-a \ - --project=your-project-name -$ kubectl proxy -``` - -By default, the proxy will be opened on port 8001. Now you can navigate to [localhost:8001/ui](http://localhost:8001/ui) in your browser to access the dashboard. At first your GKE cluster will be empty, but that will change as you begin deploying. - -When you create a cluster, your `kubectl` config in `~/.kube/config` (on MacOS and Linux) will be updated for you, so you probably won't need to change your configuration. Nonetheless, you can ensure that `kubectl` can interact with your cluster by listing the nodes in the cluster: - -```bash -$ kubectl get nodes -``` - -If `kubectl` is working with your cluster, you can proceed to deploy ZooKeeper and Bookies. - -### ZooKeeper - -You *must* deploy ZooKeeper as the first component, as it is a dependency for the others. - -```bash -$ kubectl apply -f zookeeper.yaml -``` - -Wait until all three ZooKeeper server pods are up and have the status `Running`. You can check on the status of the ZooKeeper pods at any time: - -```bash -$ kubectl get pods -l component=zookeeper -NAME READY STATUS RESTARTS AGE -zk-0 1/1 Running 0 18m -zk-1 1/1 Running 0 17m -zk-2 0/1 Running 6 15m -``` - -This step may take several minutes, as Kubernetes needs to download the Docker image on the VMs. - - -If you want to connect to one of the remote zookeeper server, you can use[zk-shell](https://github.com/rgs1/zk_shell), you need to forward a local port to the -remote zookeeper server: - -```bash -$ kubectl port-forward zk-0 2181:2181 -$ zk-shell localhost 2181 -``` - -### Deploy Bookies - -Once ZooKeeper cluster is Running, you can then deploy the bookies. You can deploy the bookies either using a [DaemonSet](https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/) or a [StatefulSet](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/). - -> NOTE: _DaemonSet_ vs _StatefulSet_ -> -> A _DaemonSet_ ensures that all (or some) nodes run a pod of bookie instance. As nodes are added to the cluster, bookie pods are added automatically to them. As nodes are removed from the -> cluster, those bookie pods are garbage collected. The bookies deployed in a DaemonSet stores data on the local disks on those nodes. So it doesn't require any external storage for Persistent -> Volumes. -> -> A _StatefulSet_ maintains a sticky identity for the pods that it runs and manages. It provides stable and unique network identifiers, and stable and persistent storage for each pod. The pods -> are not interchangeable, the idenifiers for each pod are maintained across any rescheduling. -> -> Which one to use? A _DaemonSet_ is the easiest way to deploy a bookkeeper cluster, because it doesn't require additional persistent volume provisioner and use local disks. BookKeeper manages -> the data replication. It maintains the best latency property. However, it uses `hostIP` and `hostPort` for communications between pods. In some k8s platform (such as DC/OS), `hostIP` and -> `hostPort` are not well supported. A _StatefulSet_ is only practical when deploying in a cloud environment or any K8S installation that has persistent volumes available. Also be aware, latency -> can be potentially higher when using persistent volumes, because there is usually built-in replication in the persistent volumes. - -```bash -# deploy bookies in a daemon set -$ kubectl apply -f bookkeeper.yaml - -# deploy bookies in a stateful set -$ kubectl apply -f bookkeeper.stateful.yaml -``` - -You can check on the status of the Bookie pods for these components either in the Kubernetes Dashboard or using `kubectl`: - -```bash -$ kubectl get pods -``` - -While all BookKeeper pods is Running, by zk-shell you could find all available bookies under /ledgers/ - -You could also run a [bookkeeper tutorial](https://github.com/ivankelly/bookkeeper-tutorial/) instance, which named as 'dice' here, in this bookkeeper cluster. - -```bash -$kubectl run -i --tty --attach dice --image=caiok/bookkeeper-tutorial --env ZOOKEEPER_SERVERS="zk-0.zookeeper" -``` - -An example output of Dice instance is like this: -```aidl -➜ $ kubectl run -i --tty --attach dice --image=caiok/bookkeeper-tutorial --env ZOOKEEPER_SERVERS="zk-0.zookeeper" -If you don't see a command prompt, try pressing enter. -Value = 1, epoch = 5, leading -Value = 2, epoch = 5, leading -Value = 1, epoch = 5, leading -Value = 4, epoch = 5, leading -Value = 5, epoch = 5, leading -Value = 4, epoch = 5, leading -Value = 3, epoch = 5, leading -Value = 5, epoch = 5, leading -Value = 3, epoch = 5, leading -Value = 2, epoch = 5, leading -Value = 1, epoch = 5, leading -Value = 4, epoch = 5, leading -Value = 2, epoch = 5, leading -``` - -### Un-Deploy - -Delete Demo dice instance - -```bash -$kubectl delete deployment dice -``` - -Delete BookKeeper -```bash -$ kubectl delete -f bookkeeper.yaml -``` - -Delete ZooKeeper -```bash -$ kubectl delete -f zookeeper.yaml -``` - -Delete cluster -```bash -$ gcloud container clusters delete bookkeeper-gke-cluster -``` - - - diff --git a/site/docs/4.7.1/deployment/manual.md b/site/docs/4.7.1/deployment/manual.md deleted file mode 100644 index daafd5556f5..00000000000 --- a/site/docs/4.7.1/deployment/manual.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -title: Manual deployment ---- - -The easiest way to deploy BookKeeper is using schedulers like [DC/OS](../dcos), but you can also deploy BookKeeper clusters manually. A BookKeeper cluster consists of two main components: - -* A [ZooKeeper](#zookeeper-setup) cluster that is used for configuration- and coordination-related tasks -* An [ensemble](#starting-up-bookies) of {% pop bookies %} - -## ZooKeeper setup - -We won't provide a full guide to setting up a ZooKeeper cluster here. We recommend that you consult [this guide](https://zookeeper.apache.org/doc/current/zookeeperAdmin.html) in the official ZooKeeper documentation. - -## Starting up bookies - -Once your ZooKeeper cluster is up and running, you can start up as many {% pop bookies %} as you'd like to form a cluster. Before starting up each bookie, you need to modify the bookie's configuration to make sure that it points to the right ZooKeeper cluster. - -On each bookie host, you need to [download](../../getting-started/installation#download) the BookKeeper package as a tarball. Once you've done that, you need to configure the bookie by setting values in the `bookkeeper-server/conf/bk_server.conf` config file. The one parameter that you will absolutely need to change is the [`zkServers`](../../config#zkServers) parameter, which you will need to set to the ZooKeeper connection string for your ZooKeeper cluster. Here's an example: - -```properties -zkServers=100.0.0.1:2181,100.0.0.2:2181,100.0.0.3:2181 -``` - -> A full listing of configurable parameters available in `bookkeeper-server/conf/bk_server.conf` can be found in the [Configuration](../../reference/config) reference manual. - -Once the bookie's configuration is set, you can start it up using the [`bookie`](../../reference/cli#bookkeeper-bookie) command of the [`bookkeeper`](../../reference/cli#bookkeeper) CLI tool: - -```shell -$ bookkeeper-server/bin/bookkeeper bookie -``` - -> You can also build BookKeeper [by cloning it from source](../../getting-started/installation#clone) or [using Maven](../../getting-started/installation#build-using-maven). - -### System requirements - -{% include system-requirements.md %} - -## Cluster metadata setup - -Once you've started up a cluster of bookies, you need to set up cluster metadata for the cluster by running the following command from any bookie in the cluster: - -```shell -$ bookkeeper-server/bin/bookkeeper shell metaformat -``` - -You can run in the formatting - -> The `metaformat` command performs all the necessary ZooKeeper cluster metadata tasks and thus only needs to be run *once* and from *any* bookie in the BookKeeper cluster. - -Once cluster metadata formatting has been completed, your BookKeeper cluster is ready to go! - - diff --git a/site/docs/4.7.1/development/codebase.md b/site/docs/4.7.1/development/codebase.md deleted file mode 100644 index 9a83073ea4c..00000000000 --- a/site/docs/4.7.1/development/codebase.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: The BookKeeper codebase ---- diff --git a/site/docs/4.7.1/development/protocol.md b/site/docs/4.7.1/development/protocol.md deleted file mode 100644 index 6d17aa0ed45..00000000000 --- a/site/docs/4.7.1/development/protocol.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: The BookKeeper protocol ---- - -BookKeeper uses a special replication protocol for guaranteeing persistent storage of entries in an ensemble of bookies. - -> This document assumes that you have some knowledge of leader election and log replication and how these can be used in a distributed system. If not, we recommend reading the [example application](../../api/ledger-api#example-application) documentation first. - -## Ledgers - -{% pop Ledgers %} are the basic building block of BookKeeper and the level at which BookKeeper makes its persistent storage guarantees. A replicated log consists of an ordered list of ledgers. See [Ledgers to logs](#ledgers-to-logs) for info on building a replicated log from ledgers. - -Ledgers are composed of metadata and {% pop entries %}. The metadata is stored in ZooKeeper, which provides a *compare-and-swap* (CAS) operation. Entries are stored on storage nodes known as {% pop bookies %}. - -A ledger has a single writer and multiple readers (SWMR). - -### Ledger metadata - -A ledger's metadata contains the following: - -Parameter | Name | Meaning -:---------|:-----|:------- -Identifer | | A 64-bit integer, unique within the system -Ensemble size | **E** | The number of nodes the ledger is stored on -Write quorum size | **Qw** | The number of nodes each entry is written to. In effect, the max replication for the entry. -Ack quorum size | **Qa** | The number of nodes an entry must be acknowledged on. In effect, the minimum replication for the entry. -Current state | | The current status of the ledger. One of `OPEN`, `CLOSED`, or `IN_RECOVERY`. -Last entry | | The last entry in the ledger or `NULL` is the current state is not `CLOSED`. - -In addition, each ledger's metadata consists of one or more *fragments*. Each fragment is either - -* the first entry of a fragment or -* a list of bookies for the fragment. - -When creating a ledger, the following invariant must hold: - -**E >= Qw >= Qa** - -Thus, the ensemble size (**E**) must be larger than the write quorum size (**Qw**), which must in turn be larger than the ack quorum size (**Qa**). If that condition does not hold, then the ledger creation operation will fail. - -### Ensembles - -When a ledger is created, **E** bookies are chosen for the entries of that ledger. The bookies are the initial ensemble of the ledger. A ledger can have multiple ensembles, but an entry has only one ensemble. Changes in the ensemble involve a new fragment being added to the ledger. - -Take the following example. In this ledger, with ensemble size of 3, there are two fragments and thus two ensembles, one starting at entry 0, the second at entry 12. The second ensemble differs from the first only by its first element. This could be because bookie1 has failed and therefore had to be replaced. - -First entry | Bookies -:-----------|:------- -0 | B1, B2, B3 -12 | B4, B2, B3 - -### Write quorums - -Each entry in the log is written to **Qw** nodes. This is considered the write quorum for that entry. The write quorum is the subsequence of the ensemble, **Qw** in length, and starting at the bookie at index (entryid % **E**). - -For example, in a ledger of **E** = 4, **Qw**, and **Qa** = 2, with an ensemble consisting of B1, B2, B3, and B4, the write quorums for the first 6 entries will be: - -Entry | Write quorum -:-----|:------------ -0 | B1, B2, B3 -1 | B2, B3, B4 -2 | B3, B4, B1 -3 | B4, B1, B2 -4 | B1, B2, B3 -5 | B2, B3, B4 - -There are only **E** distinct write quorums in any ensemble. If **Qw** = **Qa**, then there is only one, as no striping occurs. - -### Ack quorums - -The ack quorum for an entry is any subset of the write quorum of size **Qa**. If **Qa** bookies acknowledge an entry, it means it has been fully replicated. - -### Guarantees - -The system can tolerate **Qa** – 1 failures without data loss. - -Bookkeeper guarantees that: - -1. All updates to a ledger will be read in the same order as they were written. -2. All clients will read the same sequence of updates from the ledger. - -## Writing to ledgers - -writer, ensuring that entry ids are sequential is trivial. A bookie acknowledges a write once it has been persisted to disk and is therefore durable. Once **Qa** bookies from the write quorum acknowledge the write, the write is acknowledged to the client, but only if all entries with lower entry ids in the ledger have already been acknowledged to the client. - -The entry written contains the ledger id, the entry id, the last add confirmed and the payload. The last add confirmed is the last entry which had been acknowledged to the client when this entry was written. Sending this with the entry speeds up recovery of the ledger in the case that the writer crashes. - -Another client can also read entries in the ledger up as far as the last add confirmed, as we guarantee that all entries thus far have been replicated on Qa nodes, and therefore all future readers will be able to also read it. However, to read like this, the ledger should be opened with a non-fencing open. Otherwise, it would kill the writer. - -If a node fails to acknowledge a write, the writer will create a new ensemble by replacing the failed node in the current ensemble. It creates a new fragment with this ensemble, starting from the first message that has not been acknowledged to the client. Creating the new fragment involves making a CAS write to the metadata. If the CAS write fails, someone else has modified something in the ledger metadata. This concurrent modification could have been caused by recovery or {% pop rereplication %}. We reread the metadata. If the state of the ledger is no longer `OPEN`, we send an error to the client for any outstanding writes. Otherwise, we try to replace the failed node again. - -### Closing a ledger as a writer - -Closing a ledger is straightforward for a writer. The writer makes a CAS write to the metadata, changing the state to `CLOSED` and setting the last entry of the ledger to the last entry which we have acknowledged to the client. - -If the CAS write fails, it means someone else has modified the metadata. We reread the metadata, and retry closing as long as the state of the ledger is still `OPEN`. If the state is `IN_RECOVERY` we send an error to the client. If the state is `CLOSED` and the last entry is the same as the last entry we have acknowledged to the client, we complete the close operation successfully. If the last entry is different from what we have acknowledged to the client, we send an error to the client. - -### Closing a ledger as a reader - -A reader can also force a ledger to close. Forcing the ledger to close will prevent any writer from adding new entries to the ledger. This is called {% pop fencing %}. This can occur when a writer has crashed or become unavailable, and a new writer wants to take over writing to the log. The new writer must ensure that it has seen all updates from the previous writer, and prevent the previous writer from making any new updates before making any updates of its own. - -To recover a ledger, we first update the state in the metadata to IN_RECOVERY. We then send a fence message to all the bookies in the last fragment of the ledger. When a bookie receives a fence message for a ledger, the fenced state of the ledger is persisted to disk. Once we receive a response from at least (**Qw** - **Qa**)+1 bookies from each write quorum in the ensemble, the ledger is fenced. - -By ensuring we have received a response from at last (**Qw** - **Qa**) + 1 bookies in each write quorum, we ensure that, if the old writer is alive and tries to add a new entry there will be no write quorum in which Qa bookies will accept the write. If the old writer tries to update the ensemble, it will fail on the CAS metadata write, and then see that the ledger is in IN_RECOVERY state, and that it therefore shouldn’t try to write to it. - -The old writer will be able to write entries to individual bookies (we can’t guarantee that the fence message reaches all bookies), but as it will not be able reach ack quorum, it will not be able to send a success response to its client. The client will get a LedgerFenced error instead. - -It is important to note that when you get a ledger fenced message for an entry, it doesn’t mean that the entry has not been written. It means that the entry may or may not have been written, and this can only be determined after the ledger is recovered. In effect, LedgerFenced should be treated like a timeout. - -Once the ledger is fenced, recovery can begin. Recovery means finding the last entry of the ledger and closing the ledger. To find the last entry of the ledger, the client asks all bookies for the highest last add confirmed value they have seen. It waits until it has received a response at least (**Qw** - **Qa**) + 1 bookies from each write quorum, and takes the highest response as the entry id to start reading forward from. It then starts reading forward in the ledger, one entry at a time, replicating all entries it sees to the entire write quorum for that entry. Once it can no longer read any more entries, it updates the state in the metadata to `CLOSED`, and sets the last entry of the ledger to the last entry it wrote. Multiple readers can try to recovery a ledger at the same time, but as the metadata write is CAS they will all converge on the same last entry of the ledger. - -## Ledgers to logs - -In BookKeeper, {% pop ledgers %} can be used to build a replicated log for your system. All guarantees provided by BookKeeper are at the ledger level. Guarantees on the whole log can be built using the ledger guarantees and any consistent datastore with a compare-and-swap (CAS) primitive. BookKeeper uses ZooKeeper as the datastore but others could theoretically be used. - -A log in BookKeeper is built from some number of ledgers, with a fixed order. A ledger represents a single segment of the log. A ledger could be the whole period that one node was the leader, or there could be multiple ledgers for a single period of leadership. However, there can only ever be one leader that adds entries to a single ledger. Ledgers cannot be reopened for writing once they have been closed/recovered. - -> BookKeeper does *not* provide leader election. You must use a system like ZooKeeper for this. - -In many cases, leader election is really leader suggestion. Multiple nodes could think that they are leader at any one time. It is the job of the log to guarantee that only one can write changes to the system. - -### Opening a log - -Once a node thinks it is leader for a particular log, it must take the following steps: - -1. Read the list of ledgers for the log -1. {% pop Fence %} the last two ledgers in the list. Two ledgers are fenced because because the writer may be writing to the second-to-last ledger while adding the last ledger to the list. -1. Create a new ledger -1. Add the new ledger to the ledger list -1. Write the new ledger back to the datastore using a CAS operation - -The fencing in step 2 and the CAS operation in step 5 prevent two nodes from thinking that they have leadership at any one time. - -The CAS operation will fail if the list of ledgers has changed between reading it and writing back the new list. When the CAS operation fails, the leader must start at step 1 again. Even better, they should check that they are in fact still the leader with the system that is providing leader election. The protocol will work correctly without this step, though it will be able to make very little progress if two nodes think they are leader and are duelling for the log. - -The node must not serve any writes until step 5 completes successfully. - -### Rolling ledgers - -The leader may wish to close the current ledger and open a new one every so often. Ledgers can only be deleted as a whole. If you don't roll the log, you won't be able to clean up old entries in the log without a leader change. By closing the current ledger and adding a new one, the leader allows the log to be truncated whenever that data is no longer needed. The steps for rolling the log is similar to those for creating a new ledger. - -1. Create a new ledger -1. Add the new ledger to the ledger list -1. Write the new ledger list to the datastore using CAS -1. Close the previous ledger - -By deferring the closing of the previous ledger until step 4, we can continue writing to the log while we perform metadata update operations to add the new ledger. This is safe as long as you fence the last 2 ledgers when acquiring leadership. - diff --git a/site/docs/4.7.1/getting-started/concepts.md b/site/docs/4.7.1/getting-started/concepts.md deleted file mode 100644 index 7a3c92847b2..00000000000 --- a/site/docs/4.7.1/getting-started/concepts.md +++ /dev/null @@ -1,202 +0,0 @@ ---- -title: BookKeeper concepts and architecture -subtitle: The core components and how they work -prev: ../run-locally ---- - -BookKeeper is a service that provides persistent storage of streams of log [entries](#entries)---aka *records*---in sequences called [ledgers](#ledgers). BookKeeper replicates stored entries across multiple servers. - -## Basic terms - -In BookKeeper: - -* each unit of a log is an [*entry*](#entries) (aka record) -* streams of log entries are called [*ledgers*](#ledgers) -* individual servers storing ledgers of entries are called [*bookies*](#bookies) - -BookKeeper is designed to be reliable and resilient to a wide variety of failures. Bookies can crash, corrupt data, or discard data, but as long as there are enough bookies behaving correctly in the ensemble the service as a whole will behave correctly. - -## Entries - -> **Entries** contain the actual data written to ledgers, along with some important metadata. - -BookKeeper entries are sequences of bytes that are written to [ledgers](#ledgers). Each entry has the following fields: - -Field | Java type | Description -:-----|:----------|:----------- -Ledger number | `long` | The ID of the ledger to which the entry has been written -Entry number | `long` | The unique ID of the entry -Last confirmed (LC) | `long` | The ID of the last recorded entry -Data | `byte[]` | The entry's data (written by the client application) -Authentication code | `byte[]` | The message auth code, which includes *all* other fields in the entry - -## Ledgers - -> **Ledgers** are the basic unit of storage in BookKeeper. - -Ledgers are sequences of entries, while each entry is a sequence of bytes. Entries are written to a ledger: - -* sequentially, and -* at most once. - -This means that ledgers have *append-only* semantics. Entries cannot be modified once they've been written to a ledger. Determining the proper write order is the responsbility of [client applications](#clients). - -## Clients and APIs - -> BookKeeper clients have two main roles: they create and delete ledgers, and they read entries from and write entries to ledgers. -> -> BookKeeper provides both a lower-level and a higher-level API for ledger interaction. - -There are currently two APIs that can be used for interacting with BookKeeper: - -* The [ledger API](../../api/ledger-api) is a lower-level API that enables you to interact with {% pop ledgers %} directly. -* The [DistributedLog API](../../api/distributedlog-api) is a higher-level API that enables you to use BookKeeper without directly interacting with ledgers. - -In general, you should choose the API based on how much granular control you need over ledger semantics. The two APIs can also both be used within a single application. - -## Bookies - -> **Bookies** are individual BookKeeper servers that handle ledgers (more specifically, fragments of ledgers). Bookies function as part of an ensemble. - -A bookie is an individual BookKeeper storage server. Individual bookies store fragments of ledgers, not entire ledgers (for the sake of performance). For any given ledger **L**, an *ensemble* is the group of bookies storing the entries in **L**. - -Whenever entries are written to a ledger, those entries are {% pop striped %} across the ensemble (written to a sub-group of bookies rather than to all bookies). - -### Motivation - -> BookKeeper was initially inspired by the NameNode server in HDFS but its uses now extend far beyond this. - -The initial motivation for BookKeeper comes from the [Hadoop](http://hadoop.apache.org/) ecosystem. In the [Hadoop Distributed File System](https://wiki.apache.org/hadoop/HDFS) (HDFS), a special node called the [NameNode](https://wiki.apache.org/hadoop/NameNode) logs all operations in a reliable fashion, which ensures that recovery is possible in case of crashes. - -The NameNode, however, served only as initial inspiration for BookKeeper. The applications for BookKeeper extend far beyond this and include essentially any application that requires an append-based storage system. BookKeeper provides a number of advantages for such applications: - -* Highly efficient writes -* High fault tolerance via replication of messages within ensembles of bookies -* High throughput for write operations via {% pop striping %} (across as many bookies as you wish) - -## Metadata storage - -BookKeeper requires a metadata storage service to store information related to [ledgers](#ledgers) and available bookies. BookKeeper currently uses [ZooKeeper](https://zookeeper.apache.org) for this and other tasks. - -## Data management in bookies - -Bookies manage data in a [log-structured](https://en.wikipedia.org/wiki/Log-structured_file_system) way, which is implemented using three types of files: - -* [journals](#journals) -* [entry logs](#entry-logs) -* [index files](#index-files) - -### Journals - -A journal file contains BookKeeper transaction logs. Before any update to a ledger takes place, the bookie ensures that a transaction describing the update is written to non-volatile storage. A new journal file is created once the bookie starts or the older journal file reaches the journal file size threshold. - -### Entry logs - -An entry log file manages the written entries received from BookKeeper clients. Entries from different ledgers are aggregated and written sequentially, while their offsets are kept as pointers in a [ledger cache](#ledger-cache) for fast lookup. - -A new entry log file is created once the bookie starts or the older entry log file reaches the entry log size threshold. Old entry log files are removed by the Garbage Collector Thread once they are not associated with any active ledger. - -### Index files - -An index file is created for each ledger, which comprises a header and several fixed-length index pages that record the offsets of data stored in entry log files. - -Since updating index files would introduce random disk I/O index files are updated lazily by a sync thread running in the background. This ensures speedy performance for updates. Before index pages are persisted to disk, they are gathered in a ledger cache for lookup. - -### Ledger cache - -Ledger indexes pages are cached in a memory pool, which allows for more efficient management of disk head scheduling. - -### Adding entries - -When a client instructs a {% pop bookie %} to write an entry to a ledger, the entry will go through the following steps to be persisted on disk: - -1. The entry is appended to an [entry log](#entry-logs) -1. The index of the entry is updated in the [ledger cache](#ledger-cache) -1. A transaction corresponding to this entry update is appended to the [journal](#journals) -1. A response is sent to the BookKeeper client - -> For performance reasons, the entry log buffers entries in memory and commits them in batches, while the ledger cache holds index pages in memory and flushes them lazily. This process is described in more detail in the [Data flush](#data-flush) section below. - -### Data flush - -Ledger index pages are flushed to index files in the following two cases: - -* The ledger cache memory limit is reached. There is no more space available to hold newer index pages. Dirty index pages will be evicted from the ledger cache and persisted to index files. -* A background thread synchronous thread is responsible for flushing index pages from the ledger cache to index files periodically. - -Besides flushing index pages, the sync thread is responsible for rolling journal files in case that journal files use too much disk space. The data flush flow in the sync thread is as follows: - -* A `LastLogMark` is recorded in memory. The `LastLogMark` indicates that those entries before it have been persisted (to both index and entry log files) and contains two parts: - 1. A `txnLogId` (the file ID of a journal) - 1. A `txnLogPos` (offset in a journal) -* Dirty index pages are flushed from the ledger cache to the index file, and entry log files are flushed to ensure that all buffered entries in entry log files are persisted to disk. - - Ideally, a bookie only needs to flush index pages and entry log files that contain entries before `LastLogMark`. There is, however, no such information in the ledger and entry log mapping to journal files. Consequently, the thread flushes the ledger cache and entry log entirely here, and may flush entries after the `LastLogMark`. Flushing more is not a problem, though, just redundant. -* The `LastLogMark` is persisted to disk, which means that entries added before `LastLogMark` whose entry data and index page were also persisted to disk. It is now time to safely remove journal files created earlier than `txnLogId`. - -If the bookie has crashed before persisting `LastLogMark` to disk, it still has journal files containing entries for which index pages may not have been persisted. Consequently, when this bookie restarts, it inspects journal files to restore those entries and data isn't lost. - -Using the above data flush mechanism, it is safe for the sync thread to skip data flushing when the bookie shuts down. However, in the entry logger it uses a buffered channel to write entries in batches and there might be data buffered in the buffered channel upon a shut down. The bookie needs to ensure that the entry log flushes its buffered data during shutdown. Otherwise, entry log files become corrupted with partial entries. - -### Data compaction - -On bookies, entries of different ledgers are interleaved in entry log files. A bookie runs a garbage collector thread to delete un-associated entry log files to reclaim disk space. If a given entry log file contains entries from a ledger that has not been deleted, then the entry log file would never be removed and the occupied disk space never reclaimed. In order to avoid such a case, a bookie server compacts entry log files in a garbage collector thread to reclaim disk space. - -There are two kinds of compaction running with different frequency: minor compaction and major compaction. The differences between minor compaction and major compaction lies in their threshold value and compaction interval. - -* The garbage collection threshold is the size percentage of an entry log file occupied by those undeleted ledgers. The default minor compaction threshold is 0.2, while the major compaction threshold is 0.8. -* The garbage collection interval is how frequently to run the compaction. The default minor compaction interval is 1 hour, while the major compaction threshold is 1 day. - -> If either the threshold or interval is set to less than or equal to zero, compaction is disabled. - -The data compaction flow in the garbage collector thread is as follows: - -* The thread scans entry log files to get their entry log metadata, which records a list of ledgers comprising an entry log and their corresponding percentages. -* With the normal garbage collection flow, once the bookie determines that a ledger has been deleted, the ledger will be removed from the entry log metadata and the size of the entry log reduced. -* If the remaining size of an entry log file reaches a specified threshold, the entries of active ledgers in the entry log will be copied to a new entry log file. -* Once all valid entries have been copied, the old entry log file is deleted. - -## ZooKeeper metadata - -BookKeeper requires a ZooKeeper installation for storing [ledger](#ledger) metadata. Whenever you construct a [`BookKeeper`](../../api/javadoc/org/apache/bookkeeper/client/BookKeeper) client object, you need to pass a list of ZooKeeper servers as a parameter to the constructor, like this: - -```java -String zkConnectionString = "127.0.0.1:2181"; -BookKeeper bkClient = new BookKeeper(zkConnectionString); -``` - -> For more info on using the BookKeeper Java client, see [this guide](../../api/ledger-api#the-java-ledger-api-client). - -## Ledger manager - -A *ledger manager* handles ledgers' metadata (which is stored in ZooKeeper). BookKeeper offers two types of ledger managers: the [flat ledger manager](#flat-ledger-manager) and the [hierarchical ledger manager](#hierarchical-ledger-manager). Both ledger managers extend the [`AbstractZkLedgerManager`](../../api/javadoc/org/apache/bookkeeper/meta/AbstractZkLedgerManager) abstract class. - -> #### Use the flat ledger manager in most cases -> The flat ledger manager is the default and is recommended for nearly all use cases. The hierarchical ledger manager is better suited only for managing very large numbers of BookKeeper ledgers (> 50,000). - -### Flat ledger manager - -The *flat ledger manager*, implemented in the [`FlatLedgerManager`](../../api/javadoc/org/apache/bookkeeper/meta/FlatLedgerManager.html) class, stores all ledgers' metadata in child nodes of a single ZooKeeper path. The flat ledger manager creates [sequential nodes](https://zookeeper.apache.org/doc/trunk/zookeeperProgrammers.html#Sequence+Nodes+--+Unique+Naming) to ensure the uniqueness of the ledger ID and prefixes all nodes with `L`. Bookie servers manage their own active ledgers in a hash map so that it's easy to find which ledgers have been deleted from ZooKeeper and then garbage collect them. - -The flat ledger manager's garbage collection follow proceeds as follows: - -* All existing ledgers are fetched from ZooKeeper (`zkActiveLedgers`) -* All ledgers currently active within the bookie are fetched (`bkActiveLedgers`) -* The currently actively ledgers are looped through to determine which ledgers don't currently exist in ZooKeeper. Those are then garbage collected. -* The *hierarchical ledger manager* stores ledgers' metadata in two-level [znodes](https://zookeeper.apache.org/doc/current/zookeeperOver.html#Nodes+and+ephemeral+nodes). - -### Hierarchical ledger manager - -The *hierarchical ledger manager*, implemented in the [`HierarchicalLedgerManager`](../../api/javadoc/org/apache/bookkeeper/meta/HierarchicalLedgerManager) class, first obtains a global unique ID from ZooKeeper using an [`EPHEMERAL_SEQUENTIAL`](https://zookeeper.apache.org/doc/current/api/org/apache/zookeeper/CreateMode.html#EPHEMERAL_SEQUENTIAL) znode. Since ZooKeeper's sequence counter has a format of `%10d` (10 digits with 0 padding, for example `0000000001`), the hierarchical ledger manager splits the generated ID into 3 parts: - -```shell -{level1 (2 digits)}{level2 (4 digits)}{level3 (4 digits)} -``` - -These three parts are used to form the actual ledger node path to store ledger metadata: - -```shell -{ledgers_root_path}/{level1}/{level2}/L{level3} -``` - -For example, ledger 0000000001 is split into three parts, 00, 0000, and 00001, and stored in znode `/{ledgers_root_path}/00/0000/L0001`. Each znode could have as many 10,000 ledgers, which avoids the problem of the child list being larger than the maximum ZooKeeper packet size (which is the [limitation](https://issues.apache.org/jira/browse/BOOKKEEPER-39) that initially prompted the creation of the hierarchical ledger manager). diff --git a/site/docs/4.7.1/getting-started/installation.md b/site/docs/4.7.1/getting-started/installation.md deleted file mode 100644 index 9986cd8e043..00000000000 --- a/site/docs/4.7.1/getting-started/installation.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -title: BookKeeper installation -subtitle: Download or clone BookKeeper and build it locally -next: ../run-locally ---- - -{% capture download_url %}http://apache.claz.org/bookkeeper/bookkeeper-{{ site.latest_release }}/bookkeeper-{{ site.latest_release }}-src.tar.gz{% endcapture %} - -You can install BookKeeper either by [downloading](#download) a [GZipped](http://www.gzip.org/) tarball package or [cloning](#clone) the BookKeeper repository. - -## Requirements - -* [Unix environment](http://www.opengroup.org/unix) -* [Java Development Kit 1.6](http://www.oracle.com/technetwork/java/javase/downloads/index.html) or later -* [Maven 3.0](https://maven.apache.org/install.html) or later - -## Download - -You can download Apache BookKeeper releases from one of many [Apache mirrors](http://www.apache.org/dyn/closer.cgi/bookkeeper). Here's an example for the [apache.claz.org](http://apache.claz.org/bookkeeper) mirror: - -```shell -$ curl -O {{ download_url }} -$ tar xvf bookkeeper-{{ site.latest_release }}-src.tar.gz -$ cd bookkeeper-{{ site.latest_release }} -``` - -## Clone - -To build BookKeeper from source, clone the repository, either from the [GitHub mirror]({{ site.github_repo }}) or from the [Apache repository](http://git.apache.org/bookkeeper.git/): - -```shell -# From the GitHub mirror -$ git clone {{ site.github_repo}} - -# From Apache directly -$ git clone git://git.apache.org/bookkeeper.git/ -``` - -## Build using Maven - -Once you have the BookKeeper on your local machine, either by [downloading](#download) or [cloning](#clone) it, you can then build BookKeeper from source using Maven: - -```shell -$ mvn package -``` - -> You can skip tests by adding the `-DskipTests` flag when running `mvn package`. - -### Useful Maven commands - -Some other useful Maven commands beyond `mvn package`: - -Command | Action -:-------|:------ -`mvn clean` | Removes build artifacts -`mvn compile` | Compiles JAR files from Java sources -`mvn compile spotbugs:spotbugs` | Compile using the Maven [SpotBugs](https://github.com/spotbugs/spotbugs-maven-plugin) plugin -`mvn install` | Install the BookKeeper JAR locally in your local Maven cache (usually in the `~/.m2` directory) -`mvn deploy` | Deploy the BookKeeper JAR to the Maven repo (if you have the proper credentials) -`mvn verify` | Performs a wide variety of verification and validation tasks -`mvn apache-rat:check` | Run Maven using the [Apache Rat](http://creadur.apache.org/rat/apache-rat-plugin/) plugin -`mvn compile javadoc:aggregate` | Build Javadocs locally -`mvn package assembly:single` | Build a complete distribution using the Maven [Assembly](http://maven.apache.org/plugins/maven-assembly-plugin/) plugin - -## Package directory - -The BookKeeper project contains several subfolders that you should be aware of: - -Subfolder | Contains -:---------|:-------- -[`bookkeeper-server`]({{ site.github_repo }}/tree/master/bookkeeper-server) | The BookKeeper server and client -[`bookkeeper-benchmark`]({{ site.github_repo }}/tree/master/bookkeeper-benchmark) | A benchmarking suite for measuring BookKeeper performance -[`bookkeeper-stats`]({{ site.github_repo }}/tree/master/bookkeeper-stats) | A BookKeeper stats library -[`bookkeeper-stats-providers`]({{ site.github_repo }}/tree/master/bookkeeper-stats-providers) | BookKeeper stats providers diff --git a/site/docs/4.7.1/getting-started/run-locally.md b/site/docs/4.7.1/getting-started/run-locally.md deleted file mode 100644 index edbfab9fda6..00000000000 --- a/site/docs/4.7.1/getting-started/run-locally.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Run bookies locally -prev: ../installation -next: ../concepts -toc_disable: true ---- - -{% pop Bookies %} are individual BookKeeper servers. You can run an ensemble of bookies locally on a single machine using the [`localbookie`](../../reference/cli#bookkeeper-localbookie) command of the `bookkeeper` CLI tool and specifying the number of bookies you'd like to include in the ensemble. - -This would start up an ensemble with 10 bookies: - -```shell -$ bookkeeper-server/bin/bookkeeper localbookie 10 -``` - -> When you start up an ensemble using `localbookie`, all bookies run in a single JVM process. diff --git a/site/docs/4.7.1/overview/overview.md b/site/docs/4.7.1/overview/overview.md deleted file mode 100644 index 6c81716eac0..00000000000 --- a/site/docs/4.7.1/overview/overview.md +++ /dev/null @@ -1,58 +0,0 @@ ---- -title: Apache BookKeeper™ 4.7.1 ---- - - -This documentation is for Apache BookKeeper™ version 4.7.1. - -Apache BookKeeper™ is a scalable, fault-tolerant, low-latency storage service optimized for real-time workloads. It offers durability, replication, and strong consistency as essentials for building reliable real-time applications. - -BookKeeper is suitable for a wide variety of use cases, including: - -Use case | Example -:--------|:------- -[WAL](https://en.wikipedia.org/wiki/Write-ahead_logging) (write-ahead logging) | The HDFS [namenode](https://hadoop.apache.org/docs/r2.5.2/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithNFS.html#BookKeeper_as_a_Shared_storage_EXPERIMENTAL) -Message storage | [Apache Pulsar](http://pulsar.incubator.apache.org/docs/latest/getting-started/ConceptsAndArchitecture/#persistent-storage) -Offset/cursor storage | [Apache Pulsar](http://pulsar.incubator.apache.org/docs/latest/getting-started/ConceptsAndArchitecture/#persistent-storage) -Object/[BLOB](https://en.wikipedia.org/wiki/Binary_large_object) storage | Storing snapshots to replicated state machines - -Learn more about Apache BookKeeper™ and what it can do for your organization: - -- [Apache BookKeeper 4.7.1 Release Notes](../releaseNotes) -- [Java API docs](../../api/javadoc) - -Or start [using](../../getting-started/installation) Apache BookKeeper today. - -### Users - -- **Concepts**: Start with [concepts](../../getting-started/concepts). This will help you to fully understand - the other parts of the documentation, including the setup, integration and operation guides. -- **Getting Started**: Install [Apache BookKeeper](../../getting-started/installation) and run bookies [locally](../../getting-started/run-locally) -- **API**: Read the [API](../../api/overview) documentation to learn how to use Apache BookKeeper to build your applications. -- **Deployment**: The [Deployment Guide](../../deployment/manual) shows how to deploy Apache BookKeeper to production clusters. - -### Administrators - -- **Operations**: The [Admin Guide](../../admin/bookies) shows how to run Apache BookKeeper on production, what are the production - considerations and best practices. - -### Contributors - -- **Details**: Learn [design details](../../development/protocol) to know more internals. diff --git a/site/docs/4.7.1/overview/releaseNotes.md b/site/docs/4.7.1/overview/releaseNotes.md deleted file mode 100644 index 5c2b2110a99..00000000000 --- a/site/docs/4.7.1/overview/releaseNotes.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -title: Apache BookKeeper 4.7.1 Release Notes ---- - -This is the eleventh release of Apache BookKeeper! - -The 4.7.1 release is a bugfix release which fixes a bunch of issues reported from users of 4.7.0. - -Apache BookKeeper users who are using 4.7.0 are encouraged to upgrade to 4.7.1. The technical details of this release are summarized -below. - -## Highlights - -- Performance enhancement on eliminating bytes copying in `AddEntry` code path, see [apache/bookkeeper#1361](https://github.com/apache/bookkeeper/pull/1361) - -- Introduce Fast and Garbage-Free Statistics Timers in Codahale Stats Provider, see [apache/bookkeeper#1364](https://github.com/apache/bookkeeper/pull/1364) - -- Fix OrderedScheduler handling null key, see [apache/bookkeeper#1372](https://github.com/apache/bookkeeper/pull/1372) - -- Fix zookeeper ledger manager on handling no ledger exists, see [apache/bookkeeper#1382](https://github.com/apache/bookkeeper/pull/1382) - -- Fix long poll reads when ensemble size is larger than write quorum size, see [apache/bookkeeper#1404](https://github.com/apache/bookkeeper/pull/1404) - -- Fix IllegalReferenceCount on filling readahead cache for DbLedgerStorage, see [apache/bookkeeper#1487](https://github.com/apache/bookkeeper/issues/1487) - -- Fix LedgerEntry recycling issue on long poll speculative reads, see [apache/bookkeeper#1509](https://github.com/apache/bookkeeper/pull/1509) - -- Various bug fixes and improvements around bookkeeper table service, see changes under [apache/bookkeeper#release/4.7.1](https://github.com/apache/bookkeeper/issues?utf8=%E2%9C%93&q=is%3Aclosed+label%3Aarea%2Ftableservice+label%3Arelease%2F4.7.1) - -### Dependencies Upgrade - -Here is a list of dependencies changed in 4.7.1: - -- [Grpc](https://grpc.io/) is upgraded from `1.5.0` to `1.12.0`. See [apache/bookkeeper#1441](https://github.com/apache/bookkeeper/pull/1441) -- [Netty](http://netty.io/) is upgraded from `4.1.12` to `4.1.22`. See [apache/bookkeeper#1441](https://github.com/apache/bookkeeper/pull/1441) -- [Protobuf](https://developers.google.com/protocol-buffers/) is upgraded from `3.4.0` to `3.5.1`. See [apache/bookkeeper#1466](https://github.com/apache/bookkeeper/pull/1466) -- [RocksDB](http://rocksdb.org/) is upgraded from `5.8.6` to `5.13.1`. See [apache/bookkeeper#1466](https://github.com/apache/bookkeeper/pull/1466) - -`Reflective setAccessible(true)` is disabled by default in Netty while using java9+. This might result in performance degradation. Consider reenabling `Reflective setAccessible(true)` by setting -environment value `io.netty.tryReflectionSetAccessible` to `true`. See [netty/netty#7650](https://github.com/netty/netty/pull/7650) for more details. - -## Full list of changes - -- [https://github.com/apache/bookkeeper/issues?q=label%3Arelease%2F4.7.1+is%3Aclosed](https://github.com/apache/bookkeeper/issues?q=label%3Arelease%2F4.7.1+is%3Aclosed) diff --git a/site/docs/4.7.1/overview/releaseNotesTemplate.md b/site/docs/4.7.1/overview/releaseNotesTemplate.md deleted file mode 100644 index 6fecd045c21..00000000000 --- a/site/docs/4.7.1/overview/releaseNotesTemplate.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Apache BookKeeper 4.7.1 Release Notes ---- - -[provide a summary of this release] - -Apache BookKeeper users are encouraged to upgrade to 4.7.1. The technical details of this release are summarized -below. - -## Highlights - -[List the highlights] - -## Details - -[list to issues list] - diff --git a/site/docs/4.7.1/reference/cli.md b/site/docs/4.7.1/reference/cli.md deleted file mode 100644 index 8beb36ff071..00000000000 --- a/site/docs/4.7.1/reference/cli.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: BookKeeper CLI tool reference -subtitle: A reference guide to the command-line tools that you can use to administer BookKeeper ---- - -{% include cli.html id="bookkeeper" %} - -## The BookKeeper shell - -{% include shell.html %} diff --git a/site/docs/4.7.1/reference/config.md b/site/docs/4.7.1/reference/config.md deleted file mode 100644 index 8997b6b62f0..00000000000 --- a/site/docs/4.7.1/reference/config.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: BookKeeper configuration -subtitle: A reference guide to all of BookKeeper's configurable parameters ---- - - -The table below lists parameters that you can set to configure {% pop bookies %}. All configuration takes place in the `bk_server.conf` file in the `bookkeeper-server/conf` directory of your [BookKeeper installation](../../getting-started/installing). - -{% include config.html id="bk_server" %} diff --git a/site/docs/4.7.1/reference/metrics.md b/site/docs/4.7.1/reference/metrics.md deleted file mode 100644 index 8bd6fe0a165..00000000000 --- a/site/docs/4.7.1/reference/metrics.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: BookKeeper metrics reference ---- diff --git a/site/docs/4.7.1/security/overview.md b/site/docs/4.7.1/security/overview.md deleted file mode 100644 index b825776eb67..00000000000 --- a/site/docs/4.7.1/security/overview.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -title: BookKeeper Security -next: ../tls ---- - -In the 4.5.0 release, the BookKeeper community added a number of features that can be used, together or separately, to secure a BookKeeper cluster. -The following security measures are currently supported: - -1. Authentication of connections to bookies from clients, using either [TLS](../tls) or [SASL (Kerberos)](../sasl). -2. Authentication of connections from clients, bookies, autorecovery daemons to [ZooKeeper](../zookeeper), when using zookeeper based ledger managers. -3. Encryption of data transferred between bookies and clients, between bookies and autorecovery daemons using [TLS](../tls). - -It’s worth noting that security is optional - non-secured clusters are supported, as well as a mix of authenticated, unauthenticated, encrypted and non-encrypted clients. - -NOTE: authorization is not yet available in 4.5.0. The Apache BookKeeper community is looking to add this feature in subsequent releases. - -## Next Steps - -- [Encryption and Authentication using TLS](../tls) -- [Authentication using SASL](../sasl) -- [ZooKeeper Authentication](../zookeeper) diff --git a/site/docs/4.7.1/security/sasl.md b/site/docs/4.7.1/security/sasl.md deleted file mode 100644 index ffb972a8936..00000000000 --- a/site/docs/4.7.1/security/sasl.md +++ /dev/null @@ -1,202 +0,0 @@ ---- -title: Authentication using SASL -prev: ../tls -next: ../zookeeper ---- - -Bookies support client authentication via SASL. Currently we only support GSSAPI (Kerberos). We will start -with a general description of how to configure `SASL` for bookies, clients and autorecovery daemons, followed -by mechanism-specific details and wrap up with some operational details. - -## SASL configuration for Bookies - -1. Select the mechanisms to enable in the bookies. `GSSAPI` is the only mechanism currently supported by BookKeeper. -2. Add a `JAAS` config file for the selected mechanisms as described in the examples for setting up [GSSAPI (Kerberos)](#kerberos). -3. Pass the `JAAS` config file location as JVM parameter to each Bookie. For example: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookie_jaas.conf - ``` - -4. Enable SASL auth plugin in bookies, by setting `bookieAuthProviderFactoryClass` to `org.apache.bookkeeper.sasl.SASLBookieAuthProviderFactory`. - - - ```shell - bookieAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLBookieAuthProviderFactory - ``` - -5. If you are running `autorecovery` along with bookies, then you want to enable SASL auth plugin for `autorecovery`, by setting - `clientAuthProviderFactoryClass` to `org.apache.bookkeeper.sasl.SASLClientProviderFactory`. - - ```shell - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -6. Follow the steps in [GSSAPI (Kerberos)](#kerberos) to configure SASL. - -#### Important Notes - -1. `Bookie` is a section name in the JAAS file used by each bookie. This section tells the bookie which principal to use - and the location of the keytab where the principal is stored. It allows the bookie to login using the keytab specified in this section. -2. `Auditor` is a section name in the JASS file used by `autorecovery` daemon (it can be co-run with bookies). This section tells the - `autorecovery` daemon which principal to use and the location of the keytab where the principal is stored. It allows the bookie to - login using the keytab specified in this section. -3. The `Client` section is used to authenticate a SASL connection with ZooKeeper. It also allows the bookies to set ACLs on ZooKeeper nodes - which locks these nodes down so that only the bookies can modify it. It is necessary to have the same primary name across all bookies. - If you want to use a section name other than `Client`, set the system property `zookeeper.sasl.client` to the appropriate name - (e.g `-Dzookeeper.sasl.client=ZKClient`). -4. ZooKeeper uses `zookeeper` as the service name by default. If you want to change this, set the system property - `zookeeper.sasl.client.username` to the appropriate name (e.g. `-Dzookeeper.sasl.client.username=zk`). - -## SASL configuration for Clients - -To configure `SASL` authentication on the clients: - -1. Select a `SASL` mechanism for authentication and add a `JAAS` config file for the selected mechanism as described in the examples for - setting up [GSSAPI (Kerberos)](#kerberos). -2. Pass the `JAAS` config file location as JVM parameter to each client JVM. For example: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookkeeper_jaas.conf - ``` - -3. Configure the following properties in bookkeeper `ClientConfiguration`: - - ```shell - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -Follow the steps in [GSSAPI (Kerberos)](#kerberos) to configure SASL for the selected mechanism. - -## Authentication using SASL/Kerberos - -### Prerequisites - -#### Kerberos - -If your organization is already using a Kerberos server (for example, by using `Active Directory`), there is no need to -install a new server just for BookKeeper. Otherwise you will need to install one, your Linux vendor likely has packages -for `Kerberos` and a short guide on how to install and configure it ([Ubuntu](https://help.ubuntu.com/community/Kerberos), -[Redhat](https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/6/html/Managing_Smart_Cards/installing-kerberos.html)). -Note that if you are using Oracle Java, you will need to download JCE policy files for your Java version and copy them to `$JAVA_HOME/jre/lib/security`. - -#### Kerberos Principals - -If you are using the organization’s Kerberos or Active Directory server, ask your Kerberos administrator for a principal -for each Bookie in your cluster and for every operating system user that will access BookKeeper with Kerberos authentication -(via clients and tools). - -If you have installed your own Kerberos, you will need to create these principals yourself using the following commands: - -```shell -sudo /usr/sbin/kadmin.local -q 'addprinc -randkey bookkeeper/{hostname}@{REALM}' -sudo /usr/sbin/kadmin.local -q "ktadd -k /etc/security/keytabs/{keytabname}.keytab bookkeeper/{hostname}@{REALM}" -``` - -##### All hosts must be reachable using hostnames - -It is a *Kerberos* requirement that all your hosts can be resolved with their FQDNs. - -### Configuring Bookies - -1. Add a suitably modified JAAS file similar to the one below to each Bookie’s config directory, let’s call it `bookie_jaas.conf` -for this example (note that each bookie should have its own keytab): - - ``` - Bookie { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookie.keytab" - principal="bookkeeper/bk1.hostname.com@EXAMPLE.COM"; - }; - // ZooKeeper client authentication - Client { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookie.keytab" - principal="bookkeeper/bk1.hostname.com@EXAMPLE.COM"; - }; - // If you are running `autorecovery` along with bookies - Auditor { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookie.keytab" - principal="bookkeeper/bk1.hostname.com@EXAMPLE.COM"; - }; - ``` - - The `Bookie` section in the JAAS file tells the bookie which principal to use and the location of the keytab where this principal is stored. - It allows the bookie to login using the keytab specified in this section. See [notes](#notes) for more details on Zookeeper’s SASL configuration. - -2. Pass the name of the JAAS file as a JVM parameter to each Bookie: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookie_jaas.conf - ``` - - You may also wish to specify the path to the `krb5.conf` file - (see [JDK’s Kerberos Requirements](https://docs.oracle.com/javase/8/docs/technotes/guides/security/jgss/tutorials/KerberosReq.html) for more details): - - ```shell - -Djava.security.krb5.conf=/etc/bookkeeper/krb5.conf - ``` - -3. Make sure the keytabs configured in the JAAS file are readable by the operating system user who is starting the Bookies. - -4. Enable SASL authentication plugin in the bookies by setting following parameters. - - ```shell - bookieAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLBookieAuthProviderFactory - # if you run `autorecovery` along with bookies - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -### Configuring Clients - -To configure SASL authentication on the clients: - -1. Clients will authenticate to the cluster with their own principal (usually with the same name as the user running the client), - so obtain or create these principals as needed. Then create a `JAAS` file for each principal. The `BookKeeper` section describes - how the clients like writers and readers can connect to the Bookies. The following is an example configuration for a client using - a keytab (recommended for long-running processes): - - ``` - BookKeeper { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookkeeper.keytab" - principal="bookkeeper-client-1@EXAMPLE.COM"; - }; - ``` - - -2. Pass the name of the JAAS file as a JVM parameter to the client JVM: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookkeeper_jaas.conf - ``` - - You may also wish to specify the path to the `krb5.conf` file (see - [JDK’s Kerberos Requirements](https://docs.oracle.com/javase/8/docs/technotes/guides/security/jgss/tutorials/KerberosReq.html) for more details). - - ```shell - -Djava.security.krb5.conf=/etc/bookkeeper/krb5.conf - ``` - - -3. Make sure the keytabs configured in the `bookkeeper_jaas.conf` are readable by the operating system user who is starting bookkeeper client. - -4. Enable SASL authentication plugin in the client by setting following parameters. - - ```shell - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -## Enabling Logging for SASL - -To enable SASL debug output, you can set `sun.security.krb5.debug` system property to `true`. - diff --git a/site/docs/4.7.1/security/tls.md b/site/docs/4.7.1/security/tls.md deleted file mode 100644 index cd250ab2aa5..00000000000 --- a/site/docs/4.7.1/security/tls.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -title: Encryption and Authentication using TLS -prev: ../overview -next: ../sasl ---- - -Apache BookKeeper allows clients and autorecovery daemons to communicate over TLS, although this is not enabled by default. - -## Overview - -The bookies need their own key and certificate in order to use TLS. Clients can optionally provide a key and a certificate -for mutual authentication. Each bookie or client can also be configured with a truststore, which is used to -determine which certificates (bookie or client identities) to trust (authenticate). - -The truststore can be configured in many ways. To understand the truststore, consider the following two examples: - -1. the truststore contains one or many certificates; -2. it contains a certificate authority (CA). - -In (1), with a list of certificates, the bookie or client will trust any certificate listed in the truststore. -In (2), with a CA, the bookie or client will trust any certificate that was signed by the CA in the truststore. - -(TBD: benefits) - -## Generate TLS key and certificate - -The first step of deploying TLS is to generate the key and the certificate for each machine in the cluster. -You can use Java’s `keytool` utility to accomplish this task. We will generate the key into a temporary keystore -initially so that we can export and sign it later with CA. - -```shell -keytool -keystore bookie.keystore.jks -alias localhost -validity {validity} -genkey -``` - -You need to specify two parameters in the above command: - -1. `keystore`: the keystore file that stores the certificate. The *keystore* file contains the private key of - the certificate; hence, it needs to be kept safely. -2. `validity`: the valid time of the certificate in days. - -
          -Ensure that common name (CN) matches exactly with the fully qualified domain name (FQDN) of the server. -The client compares the CN with the DNS domain name to ensure that it is indeed connecting to the desired server, not a malicious one. -
          - -## Creating your own CA - -After the first step, each machine in the cluster has a public-private key pair, and a certificate to identify the machine. -The certificate, however, is unsigned, which means that an attacker can create such a certificate to pretend to be any machine. - -Therefore, it is important to prevent forged certificates by signing them for each machine in the cluster. -A `certificate authority (CA)` is responsible for signing certificates. CA works likes a government that issues passports — -the government stamps (signs) each passport so that the passport becomes difficult to forge. Other governments verify the stamps -to ensure the passport is authentic. Similarly, the CA signs the certificates, and the cryptography guarantees that a signed -certificate is computationally difficult to forge. Thus, as long as the CA is a genuine and trusted authority, the clients have -high assurance that they are connecting to the authentic machines. - -```shell -openssl req -new -x509 -keyout ca-key -out ca-cert -days 365 -``` - -The generated CA is simply a *public-private* key pair and certificate, and it is intended to sign other certificates. - -The next step is to add the generated CA to the clients' truststore so that the clients can trust this CA: - -```shell -keytool -keystore bookie.truststore.jks -alias CARoot -import -file ca-cert -``` - -NOTE: If you configure the bookies to require client authentication by setting `sslClientAuthentication` to `true` on the -[bookie config](../../reference/config), then you must also provide a truststore for the bookies and it should have all the CA -certificates that clients keys were signed by. - -```shell -keytool -keystore client.truststore.jks -alias CARoot -import -file ca-cert -``` - -In contrast to the keystore, which stores each machine’s own identity, the truststore of a client stores all the certificates -that the client should trust. Importing a certificate into one’s truststore also means trusting all certificates that are signed -by that certificate. As the analogy above, trusting the government (CA) also means trusting all passports (certificates) that -it has issued. This attribute is called the chain of trust, and it is particularly useful when deploying TLS on a large BookKeeper cluster. -You can sign all certificates in the cluster with a single CA, and have all machines share the same truststore that trusts the CA. -That way all machines can authenticate all other machines. - -## Signing the certificate - -The next step is to sign all certificates in the keystore with the CA we generated. First, you need to export the certificate from the keystore: - -```shell -keytool -keystore bookie.keystore.jks -alias localhost -certreq -file cert-file -``` - -Then sign it with the CA: - -```shell -openssl x509 -req -CA ca-cert -CAkey ca-key -in cert-file -out cert-signed -days {validity} -CAcreateserial -passin pass:{ca-password} -``` - -Finally, you need to import both the certificate of the CA and the signed certificate into the keystore: - -```shell -keytool -keystore bookie.keystore.jks -alias CARoot -import -file ca-cert -keytool -keystore bookie.keystore.jks -alias localhost -import -file cert-signed -``` - -The definitions of the parameters are the following: - -1. `keystore`: the location of the keystore -2. `ca-cert`: the certificate of the CA -3. `ca-key`: the private key of the CA -4. `ca-password`: the passphrase of the CA -5. `cert-file`: the exported, unsigned certificate of the bookie -6. `cert-signed`: the signed certificate of the bookie - -(TBD: add a script to automatically generate truststores and keystores.) - -## Configuring Bookies - -Bookies support TLS for connections on the same service port. In order to enable TLS, you need to configure `tlsProvider` to be either -`JDK` or `OpenSSL`. If `OpenSSL` is configured, it will use `netty-tcnative-boringssl-static`, which loads a corresponding binding according -to the platforms to run bookies. - -> Current `OpenSSL` implementation doesn't depend on the system installed OpenSSL library. If you want to leverage the OpenSSL installed on -the system, you can check [this example](http://netty.io/wiki/forked-tomcat-native.html) on how to replaces the JARs on the classpath with -netty bindings to leverage installed OpenSSL. - -The following TLS configs are needed on the bookie side: - -```shell -tlsProvider=OpenSSL -# key store -tlsKeyStoreType=JKS -tlsKeyStore=/var/private/tls/bookie.keystore.jks -tlsKeyStorePasswordPath=/var/private/tls/bookie.keystore.passwd -# trust store -tlsTrustStoreType=JKS -tlsTrustStore=/var/private/tls/bookie.truststore.jks -tlsTrustStorePasswordPath=/var/private/tls/bookie.truststore.passwd -``` - -NOTE: it is important to restrict access to the store files and corresponding password files via filesystem permissions. - -Optional settings that are worth considering: - -1. tlsClientAuthentication=false: Enable/Disable using TLS for authentication. This config when enabled will authenticate the other end - of the communication channel. It should be enabled on both bookies and clients for mutual TLS. -2. tlsEnabledCipherSuites= A cipher suite is a named combination of authentication, encryption, MAC and key exchange - algorithm used to negotiate the security settings for a network connection using TLS network protocol. By default, - it is null. [OpenSSL Ciphers](https://www.openssl.org/docs/man1.0.2/apps/ciphers.html) - [JDK Ciphers](http://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html#ciphersuites) -3. tlsEnabledProtocols = TLSv1.2,TLSv1.1,TLSv1 (list out the TLS protocols that you are going to accept from clients). - By default, it is not set. - -To verify the bookie's keystore and truststore are setup correctly you can run the following command: - -```shell -openssl s_client -debug -connect localhost:3181 -tls1 -``` - -NOTE: TLSv1 should be listed under `tlsEnabledProtocols`. - -In the output of this command you should see the server's certificate: - -```shell ------BEGIN CERTIFICATE----- -{variable sized random bytes} ------END CERTIFICATE----- -``` - -If the certificate does not show up or if there are any other error messages then your keystore is not setup correctly. - -## Configuring Clients - -TLS is supported only for the new BookKeeper client (BookKeeper versions 4.5.0 and higher), the older clients are not -supported. The configs for TLS will be the same as bookies. - -If client authentication is not required by the bookies, the following is a minimal configuration example: - -```shell -tlsProvider=OpenSSL -clientTrustStore=/var/private/tls/client.truststore.jks -clientTrustStorePasswordPath=/var/private/tls/client.truststore.passwd -``` - -If client authentication is required, then a keystore must be created for each client, and the bookies' truststores must -trust the certificate in the client's keystore. This may be done using commands that are similar to what we used for -the [bookie keystore](#bookie-keystore). - -And the following must also be configured: - -```shell -tlsClientAuthentication=true -clientKeyStore=/var/private/tls/client.keystore.jks -clientKeyStorePasswordPath=/var/private/tls/client.keystore.passwd -``` - -NOTE: it is important to restrict access to the store files and corresponding password files via filesystem permissions. - -(TBD: add example to use tls in bin/bookkeeper script?) - -## Enabling TLS Logging - -You can enable TLS debug logging at the JVM level by starting the bookies and/or clients with `javax.net.debug` system property. For example: - -```shell --Djavax.net.debug=all -``` - -You can find more details on this in [Oracle documentation](http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/ReadDebug.html) on -[debugging SSL/TLS connections](http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/ReadDebug.html). diff --git a/site/docs/4.7.1/security/zookeeper.md b/site/docs/4.7.1/security/zookeeper.md deleted file mode 100644 index e16be69a1d3..00000000000 --- a/site/docs/4.7.1/security/zookeeper.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: ZooKeeper Authentication -prev: ../sasl ---- - -## New Clusters - -To enable `ZooKeeper` authentication on Bookies or Clients, there are two necessary steps: - -1. Create a `JAAS` login file and set the appropriate system property to point to it as described in [GSSAPI (Kerberos)](../sasl#notes). -2. Set the configuration property `zkEnableSecurity` in each bookie to `true`. - -The metadata stored in `ZooKeeper` is such that only certain clients will be able to modify and read the corresponding znodes. -The rationale behind this decision is that the data stored in ZooKeeper is not sensitive, but inappropriate manipulation of znodes can cause cluster -disruption. - -## Migrating Clusters - -If you are running a version of BookKeeper that does not support security or simply with security disabled, and you want to make the cluster secure, -then you need to execute the following steps to enable ZooKeeper authentication with minimal disruption to your operations. - -1. Perform a rolling restart setting the `JAAS` login file, which enables bookie or clients to authenticate. At the end of the rolling restart, - bookies (or clients) are able to manipulate znodes with strict ACLs, but they will not create znodes with those ACLs. -2. Perform a second rolling restart of bookies, this time setting the configuration parameter `zkEnableSecurity` to true, which enables the use - of secure ACLs when creating znodes. -3. Currently we don't have provide a tool to set acls on old znodes. You are recommended to set it manually using ZooKeeper tools. - -It is also possible to turn off authentication in a secured cluster. To do it, follow these steps: - -1. Perform a rolling restart of bookies setting the `JAAS` login file, which enable bookies to authenticate, but setting `zkEnableSecurity` to `false`. - At the end of rolling restart, bookies stop creating znodes with secure ACLs, but are still able to authenticate and manipulate all znodes. -2. You can use ZooKeeper tools to manually reset all ACLs under the znode set in `zkLedgersRootPath`, which defaults to `/ledgers`. -3. Perform a second rolling restart of bookies, this time omitting the system property that sets the `JAAS` login file. - -## Migrating the ZooKeeper ensemble - -It is also necessary to enable authentication on the `ZooKeeper` ensemble. To do it, we need to perform a rolling restart of the ensemble and -set a few properties. Please refer to the ZooKeeper documentation for more details. - -1. [Apache ZooKeeper Documentation](http://zookeeper.apache.org/doc/r3.4.6/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) -2. [Apache ZooKeeper Wiki](https://cwiki.apache.org/confluence/display/ZOOKEEPER/Zookeeper+and+SASL) diff --git a/site/docs/latest/admin/autorecovery.md b/site/docs/latest/admin/autorecovery.md deleted file mode 100644 index b1dd078f9b2..00000000000 --- a/site/docs/latest/admin/autorecovery.md +++ /dev/null @@ -1,128 +0,0 @@ ---- -title: Using AutoRecovery ---- - -When a {% pop bookie %} crashes, all {% pop ledgers %} on that bookie become under-replicated. In order to bring all ledgers in your BookKeeper cluster back to full replication, you'll need to *recover* the data from any offline bookies. There are two ways to recover bookies' data: - -1. Using [manual recovery](#manual-recovery) -1. Automatically, using [*AutoRecovery*](#autorecovery) - -## Manual recovery - -You can manually recover failed bookies using the [`bookkeeper`](../../reference/cli) command-line tool. You need to specify: - -* the `shell recover` option -* an IP and port for your BookKeeper cluster's ZooKeeper ensemble -* the IP and port for the failed bookie - -Here's an example: - -```bash -$ bookkeeper-server/bin/bookkeeper shell recover \ - zk1.example.com:2181 \ # IP and port for ZooKeeper - 192.168.1.10:3181 # IP and port for the failed bookie -``` - -If you wish, you can also specify which bookie you'd like to rereplicate to. Here's an example: - -```bash -$ bookkeeper-server/bin/bookkeeper shell recover \ - zk1.example.com:2181 \ # IP and port for ZooKeeper - 192.168.1.10:3181 \ # IP and port for the failed bookie - 192.168.1.11:3181 # IP and port for the bookie to rereplicate to -``` - -### The manual recovery process - -When you initiate a manual recovery process, the following happens: - -1. The client (the process running ) reads the metadata of active ledgers from ZooKeeper. -1. The ledgers that contain fragments from the failed bookie in their ensemble are selected. -1. A recovery process is initiated for each ledger in this list and the rereplication process is run for each ledger. -1. Once all the ledgers are marked as fully replicated, bookie recovery is finished. - -## AutoRecovery - -AutoRecovery is a process that: - -* automatically detects when a {% pop bookie %} in your BookKeeper cluster has become unavailable and then -* rereplicates all the {% pop ledgers %} that were stored on that bookie. - -AutoRecovery can be run in two ways: - -1. On dedicated nodes in your BookKeeper cluster -1. On the same machines on which your bookies are running - -## Running AutoRecovery - -You can start up AutoRecovery using the [`autorecovery`](../../reference/cli#bookkeeper-autorecovery) command of the [`bookkeeper`](../../reference/cli) CLI tool. - -```bash -$ bookkeeper-server/bin/bookkeeper autorecovery -``` - -> The most important thing to ensure when starting up AutoRecovery is that the ZooKeeper connection string specified by the [`zkServers`](../../reference/config#zkServers) parameter points to the right ZooKeeper cluster. - -If you start up AutoRecovery on a machine that is already running a bookie, then the AutoRecovery process will run alongside the bookie on a separate thread. - -You can also start up AutoRecovery on a fresh machine if you'd like to create a dedicated cluster of AutoRecovery nodes. - -## Configuration - -There are a handful of AutoRecovery-related configs in the [`bk_server.conf`](../../reference/config) configuration file. For a listing of those configs, see [AutoRecovery settings](../../reference/config#autorecovery-settings). - -## Disable AutoRecovery - -You can disable AutoRecovery at any time, for example during maintenance. Disabling AutoRecovery ensures that bookies' data isn't unnecessarily rereplicated when the bookie is only taken down for a short period of time, for example when the bookie is being updated or the configuration if being changed. - -You can disable AutoRecover using the [`bookkeeper`](../../reference/cli#bookkeeper-shell-autorecovery) CLI tool: - -```bash -$ bookkeeper-server/bin/bookkeeper shell autorecovery -disable -``` - -Once disabled, you can reenable AutoRecovery using the [`enable`](../../reference/cli#bookkeeper-shell-autorecovery) shell command: - -```bash -$ bookkeeper-server/bin/bookkeeper shell autorecovery -enable -``` - -## AutoRecovery architecture - -AutoRecovery has two components: - -1. The [**auditor**](#auditor) (see the [`Auditor`](../../api/javadoc/org/apache/bookkeeper/replication/Auditor.html) class) is a singleton node that watches bookies to see if they fail and creates rereplication tasks for the ledgers on failed bookies. -1. The [**replication worker**](#replication-worker) (see the [`ReplicationWorker`](../../api/javadoc/org/apache/bookkeeper/replication/ReplicationWorker.html) class) runs on each bookie and executes rereplication tasks provided by the auditor. - -Both of these components run as threads in the [`AutoRecoveryMain`](../../api/javadoc/org/apache/bookkeeper/replication/AutoRecoveryMain) process, which runs on each bookie in the cluster. All recovery nodes participate in leader election---using ZooKeeper---to decide which node becomes the auditor. Nodes that fail to become the auditor watch the elected auditor and run an election process again if they see that the auditor node has failed. - -### Auditor - -The auditor watches all bookies in the cluster that are registered with ZooKeeper. Bookies register with ZooKeeper at startup. If the bookie crashes or is killed, the bookie's registration in ZooKeeper disappears and the auditor is notified of the change in the list of registered bookies. - -When the auditor sees that a bookie has disappeared, it immediately scans the complete {% pop ledger %} list to find ledgers that have data stored on the failed bookie. Once it has a list of ledgers for that bookie, the auditor will publish a rereplication task for each ledger under the `/underreplicated/` [znode](https://zookeeper.apache.org/doc/current/zookeeperOver.html) in ZooKeeper. - -### Replication Worker - -Each replication worker watches for tasks being published by the auditor on the `/underreplicated/` znode in ZooKeeper. When a new task appears, the replication worker will try to get a lock on it. If it cannot acquire the lock, it will try the next entry. The locks are implemented using ZooKeeper ephemeral znodes. - -The replication worker will scan through the rereplication task's ledger for fragments of which its local bookie is not a member. When it finds fragments matching this criterion, it will replicate the entries of that fragment to the local bookie. If, after this process, the ledger is fully replicated, the ledgers entry under /underreplicated/ is deleted, and the lock is released. If there is a problem replicating, or there are still fragments in the ledger which are still underreplicated (due to the local bookie already being part of the ensemble for the fragment), then the lock is simply released. - -If the replication worker finds a fragment which needs rereplication, but does not have a defined endpoint (i.e. the final fragment of a ledger currently being written to), it will wait for a grace period before attempting rereplication. If the fragment needing rereplication still does not have a defined endpoint, the ledger is fenced and rereplication then takes place. - -This avoids the situation in which a client is writing to a ledger and one of the bookies goes down, but the client has not written an entry to that bookie before rereplication takes place. The client could continue writing to the old fragment, even though the ensemble for the fragment had changed. This could lead to data loss. Fencing prevents this scenario from happening. In the normal case, the client will try to write to the failed bookie within the grace period, and will have started a new fragment before rereplication starts. - -You can configure this grace period using the [`openLedgerRereplicationGracePeriod`](../../reference/config#openLedgerRereplicationGracePeriod) parameter. - -### The rereplication process - -The ledger rereplication process happens in these steps: - -1. The client goes through all ledger fragments in the ledger, selecting those that contain the failed bookie. -1. A recovery process is initiated for each ledger fragment in this list. - 1. The client selects a bookie to which all entries in the ledger fragment will be replicated; In the case of autorecovery, this will always be the local bookie. - 1. The client reads entries that belong to the ledger fragment from other bookies in the ensemble and writes them to the selected bookie. - 1. Once all entries have been replicated, the zookeeper metadata for the fragment is updated to reflect the new ensemble. - 1. The fragment is marked as fully replicated in the recovery tool. -1. Once all ledger fragments are marked as fully replicated, the ledger is marked as fully replicated. - diff --git a/site/docs/latest/admin/bookies.md b/site/docs/latest/admin/bookies.md deleted file mode 100644 index 1b0427dae3c..00000000000 --- a/site/docs/latest/admin/bookies.md +++ /dev/null @@ -1,180 +0,0 @@ ---- -title: BookKeeper administration -subtitle: A guide to deploying and administering BookKeeper ---- - -This document is a guide to deploying, administering, and maintaining BookKeeper. It also discusses [best practices](#best-practices) and [common problems](#common-problems). - -## Requirements - -A typical BookKeeper installation consists of an ensemble of {% pop bookies %} and a ZooKeeper quorum. The exact number of bookies depends on the quorum mode that you choose, desired throughput, and the number of clients using the installation simultaneously. - -The minimum number of bookies depends on the type of installation: - -* For *self-verifying* entries you should run at least three bookies. In this mode, clients store a message authentication code along with each {% pop entry %}. -* For *generic* entries you should run at least four - -There is no upper limit on the number of bookies that you can run in a single ensemble. - -### Performance - -To achieve optimal performance, BookKeeper requires each server to have at least two disks. It's possible to run a bookie with a single disk but performance will be significantly degraded. - -### ZooKeeper - -There is no constraint on the number of ZooKeeper nodes you can run with BookKeeper. A single machine running ZooKeeper in [standalone mode](https://zookeeper.apache.org/doc/current/zookeeperStarted.html#sc_InstallingSingleMode) is sufficient for BookKeeper, although for the sake of higher resilience we recommend running ZooKeeper in [quorum mode](https://zookeeper.apache.org/doc/current/zookeeperStarted.html#sc_RunningReplicatedZooKeeper) with multiple servers. - -## Starting and stopping bookies - -You can run bookies either in the foreground or in the background, using [nohup](https://en.wikipedia.org/wiki/Nohup). You can also run [local bookies](#local-bookie) for development purposes. - -To start a bookie in the foreground, use the [`bookie`](../../reference/cli#bookkeeper-bookie) command of the [`bookkeeper`](../../reference/cli#bookkeeper) CLI tool: - -```shell -$ bookkeeper-server/bin/bookkeeper bookie -``` - -To start a bookie in the background, use the [`bookkeeper-daemon.sh`](../../reference/cli#bookkeeper-daemon.sh) script and run `start bookie`: - -```shell -$ bookkeeper-server/bin/bookkeeper-daemon.sh start bookie -``` - -### Local bookies - -The instructions above showed you how to run bookies intended for production use. If you'd like to experiment with ensembles of bookies locally, you can use the [`localbookie`](../../reference/cli#bookkeeper-localbookie) command of the `bookkeeper` CLI tool and specify the number of bookies you'd like to run. - -This would spin up a local ensemble of 6 bookies: - -```shell -$ bookkeeper-server/bin/bookkeeper localbookie 6 -``` - -> When you run a local bookie ensemble, all bookies run in a single JVM process. - -## Configuring bookies - -There's a wide variety of parameters that you can set in the bookie configuration file in `bookkeeper-server/conf/bk_server.conf` of your [BookKeeper installation](../../reference/config). A full listing can be found in [Bookie configuration](../../reference/config). - -Some of the more important parameters to be aware of: - -Parameter | Description | Default -:---------|:------------|:------- -`bookiePort` | The TCP port that the bookie listens on | `3181` -`zkServers` | A comma-separated list of ZooKeeper servers in `hostname:port` format | `localhost:2181` -`journalDirectory` | The directory where the [log device](../../getting-started/concepts#log-device) stores the bookie's write-ahead log (WAL) | `/tmp/bk-txn` -`ledgerDirectories` | The directories where the [ledger device](../../getting-started/concepts#ledger-device) stores the bookie's ledger entries (as a comma-separated list) | `/tmp/bk-data` - -> Ideally, the directories specified `journalDirectory` and `ledgerDirectories` should be on difference devices. - -## Logging - -BookKeeper uses [slf4j](http://www.slf4j.org/) for logging, with [log4j](https://logging.apache.org/log4j/2.x/) bindings enabled by default. - -To enable logging for a bookie, create a `log4j.properties` file and point the `BOOKIE_LOG_CONF` environment variable to the configuration file. Here's an example: - -```shell -$ export BOOKIE_LOG_CONF=/some/path/log4j.properties -$ bookkeeper-server/bin/bookkeeper bookie -``` - -## Upgrading - -From time to time you may need to make changes to the filesystem layout of bookies---changes that are incompatible with previous versions of BookKeeper and require that directories used with previous versions are upgraded. If a filesystem upgrade is required when updating BookKeeper, the bookie will fail to start and return an error like this: - -``` -2017-05-25 10:41:50,494 - ERROR - [main:Bookie@246] - Directory layout version is less than 3, upgrade needed -``` - -BookKeeper provides a utility for upgrading the filesystem. You can perform an upgrade using the [`upgrade`](../../reference/cli#bookkeeper-upgrade) command of the `bookkeeper` CLI tool. When running `bookkeeper upgrade` you need to specify one of three flags: - -Flag | Action -:----|:------ -`--upgrade` | Performs an upgrade -`--rollback` | Performs a rollback to the initial filesystem version -`--finalize` | Marks the upgrade as complete - -### Upgrade pattern - -A standard upgrade pattern is to run an upgrade... - -```shell -$ bookkeeper-server/bin/bookkeeper upgrade --upgrade -``` - -...then check that everything is working normally, then kill the bookie. If everything is okay, finalize the upgrade... - -```shell -$ bookkeeper-server/bin/bookkeeper upgrade --finalize -``` - -...and then restart the server: - -```shell -$ bookkeeper-server/bin/bookkeeper bookie -``` - -If something has gone wrong, you can always perform a rollback: - -```shell -$ bookkeeper-server/bin/bookkeeper upgrade --rollback -``` - -## Formatting - -You can format bookie metadata in ZooKeeper using the [`metaformat`](../../reference/cli#bookkeeper-shell-metaformat) command of the [BookKeeper shell](../../reference/cli#the-bookkeeper-shell). - -By default, formatting is done in interactive mode, which prompts you to confirm the format operation if old data exists. You can disable confirmation using the `-nonInteractive` flag. If old data does exist, the format operation will abort *unless* you set the `-force` flag. Here's an example: - -```shell -$ bookkeeper-server/bin/bookkeeper shell metaformat -``` - -You can format the local filesystem data on a bookie using the [`bookieformat`](../../reference/cli#bookkeeper-shell-bookieformat) command on each bookie. Here's an example: - -```shell -$ bookkeeper-server/bin/bookkeeper shell bookieformat -``` - -> The `-force` and `-nonInteractive` flags are also available for the `bookieformat` command. - -## AutoRecovery - -For a guide to AutoRecovery in BookKeeper, see [this doc](../autorecovery). - -## Missing disks or directories - -Accidentally replacing disks or removing directories can cause a bookie to fail while trying to read a ledger fragment that, according to the ledger metadata, exists on the bookie. For this reason, when a bookie is started for the first time, its disk configuration is fixed for the lifetime of that bookie. Any change to its disk configuration, such as a crashed disk or an accidental configuration change, will result in the bookie being unable to start. That will throw an error like this: - -``` -2017-05-29 18:19:13,790 - ERROR - [main:BookieServer314] – Exception running bookie server : @ -org.apache.bookkeeper.bookie.BookieException$InvalidCookieException -.......at org.apache.bookkeeper.bookie.Cookie.verify(Cookie.java:82) -.......at org.apache.bookkeeper.bookie.Bookie.checkEnvironment(Bookie.java:275) -.......at org.apache.bookkeeper.bookie.Bookie.(Bookie.java:351) -``` - -If the change was the result of an accidental configuration change, the change can be reverted and the bookie can be restarted. However, if the change *cannot* be reverted, such as is the case when you want to add a new disk or replace a disk, the bookie must be wiped and then all its data re-replicated onto it. - -1. Increment the [`bookiePort`](../../reference/config#bookiePort) parameter in the [`bk_server.conf`](../../reference/config) -1. Ensure that all directories specified by [`journalDirectory`](../../reference/config#journalDirectory) and [`ledgerDirectories`](../../reference/config#ledgerDirectories) are empty. -1. [Start the bookie](#starting-and-stopping-bookies). -1. Run the following command to re-replicate the data: - - ```bash - $ bookkeeper-server/bin/bookkeeper shell recover \ - \ - \ - - ``` - - The ZooKeeper server, old bookie, and new bookie, are all identified by their external IP and `bookiePort` (3181 by default). Here's an example: - - ```bash - $ bookkeeper-server/bin/bookkeeper shell recover \ - zk1.example.com \ - 192.168.1.10:3181 \ - 192.168.1.10:3181 - ``` - - See the [AutoRecovery](../autorecovery) documentation for more info on the re-replication process. diff --git a/site/docs/latest/admin/geo-replication.md b/site/docs/latest/admin/geo-replication.md deleted file mode 100644 index 38b972345ef..00000000000 --- a/site/docs/latest/admin/geo-replication.md +++ /dev/null @@ -1,22 +0,0 @@ ---- -title: Geo-replication -subtitle: Replicate data across BookKeeper clusters ---- - -*Geo-replication* is the replication of data across BookKeeper clusters. In order to enable geo-replication for a group of BookKeeper clusters, - -## Global ZooKeeper - -Setting up a global ZooKeeper quorum is a lot like setting up a cluster-specific quorum. The crucial difference is that - -### Geo-replication across three clusters - -Let's say that you want to set up geo-replication across clusters in regions A, B, and C. First, the BookKeeper clusters in each region must have their own local (cluster-specific) ZooKeeper quorum. - -> BookKeeper clusters use global ZooKeeper only for metadata storage. Traffic from bookies to ZooKeeper should thus be fairly light in general. - -The crucial difference between using cluster-specific ZooKeeper and global ZooKeeper is that {% pop bookies %} is that you need to point all bookies to use the global ZooKeeper setup. - -## Region-aware placement polocy - -## Autorecovery diff --git a/site/docs/latest/admin/http.md b/site/docs/latest/admin/http.md deleted file mode 100644 index dc647449621..00000000000 --- a/site/docs/latest/admin/http.md +++ /dev/null @@ -1,407 +0,0 @@ ---- -title: BookKeeper Admin REST API ---- - -This document introduces BookKeeper HTTP endpoints, which can be used for BookKeeper administration. -To use this feature, set `httpServerEnabled` to `true` in file `conf/bk_server.conf`. - -## All the endpoints - -Currently all the HTTP endpoints could be divided into these 4 components: -1. Heartbeat: heartbeat for a specific bookie. -1. Config: doing the server configuration for a specific bookie. -1. Ledger: HTTP endpoints related to ledgers. -1. Bookie: HTTP endpoints related to bookies. -1. AutoRecovery: HTTP endpoints related to auto recovery. - -## Heartbeat - -### Endpoint: /heartbeat -* Method: GET -* Description: Get heartbeat status for a specific bookie -* Response: - -| Code | Description | -|:-------|:------------| -|200 | Successful operation | - -## Config - -### Endpoint: /api/v1/config/server_config -1. Method: GET - * Description: Get value of all configured values overridden on local server config - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | -1. Method: PUT - * Description: Update a local server config - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |configName | String | Yes | Configuration name(key) | - |configValue | String | Yes | Configuration value(value) | - * Body: - ```json - { - "configName1": "configValue1", - "configName2": "configValue2" - } - ``` - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -## Config - -### Endpoint: /metrics -1. Method: GET - * Description: Get all metrics by calling `writeAllMetrics()` of `statsProvider` internally - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -## Ledger - -### Endpoint: /api/v1/ledger/delete/?ledger_id=<ledger_id> -1. Method: DELETE - * Description: Delete a ledger. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |ledger_id | Long | Yes | ledger id of the ledger. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -### Endpoint: /api/v1/ledger/list/?print_metadata=<metadata> -1. Method: GET - * Description: List all the ledgers. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |print_metadata | Boolean | No | whether print out metadata | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "ledgerId1": "ledgerMetadata1", - "ledgerId2": "ledgerMetadata2", - ... - } - ``` - -### Endpoint: /api/v1/ledger/metadata/?ledger_id=<ledger_id> -1. Method: GET - * Description: Get the metadata of a ledger. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |ledger_id | Long | Yes | ledger id of the ledger. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "ledgerId1": "ledgerMetadata1" - } - ``` - -### Endpoint: /api/v1/ledger/read/?ledger_id=<ledger_id>&start_entry_id=<start_entry_id>&end_entry_id=<end_entry_id> -1. Method: GET - * Description: Read a range of entries from ledger. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |ledger_id | Long | Yes| ledger id of the ledger. | - |start_entry_id | Long | No | start entry id of read range. | - |end_entry_id | Long | No | end entry id of read range. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "entryId1": "entry content 1", - "entryId2": "entry content 2", - ... - } - ``` - -## Bookie - -### Endpoint: /api/v1/bookie/list_bookies/?type=<type>&print_hostnames=<hostnames> -1. Method: GET - * Description: Get all the available bookies. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |type | String | Yes | value: "rw" or "ro" , list read-write/read-only bookies. | - |print_hostnames | Boolean | No | whether print hostname of bookies. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "bookieSocketAddress1": "hostname1", - "bookieSocketAddress2": "hostname2", - ... - } - ``` - -### Endpoint: /api/v1/bookie/list_bookie_info -1. Method: GET - * Description: Get bookies disk usage info of this cluster. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "bookieAddress" : {free: xxx, total: xxx}, - "bookieAddress" : {free: xxx, total: xxx}, - ... - "clusterInfo" : {total_free: xxx, total: xxx} - } - ``` - -### Endpoint: /api/v1/bookie/last_log_mark -1. Method: GET - * Description: Get the last log marker. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - JournalId1 : position1, - JournalId2 : position2, - ... - } - ``` - -### Endpoint: /api/v1/bookie/list_disk_file/?file_type=<type> -1. Method: GET - * Description: Get all the files on disk of current bookie. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |type | String | No | file type: journal/entrylog/index. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "journal files" : "filename1 filename2 ...", - "entrylog files" : "filename1 filename2...", - "index files" : "filename1 filename2 ..." - } - ``` - -### Endpoint: /api/v1/bookie/expand_storage -1. Method: PUT - * Description: Expand storage for a bookie. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -## Auto recovery - -### Endpoint: /api/v1/autorecovery/bookie/ -1. Method: PUT - * Description: Ledger data recovery for failed bookie - * Body: - ```json - { - "bookie_src": [ "bookie_src1", "bookie_src2"... ], - "bookie_dest": [ "bookie_dest1", "bookie_dest2"... ], - "delete_cookie": - } - ``` - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |bookie_src | Strings | Yes | bookie source to recovery | - |bookie_dest | Strings | No | bookie data recovery destination | - |delete_cookie | Boolean | No | Whether delete cookie | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -### Endpoint: /api/v1/autorecovery/list_under_replicated_ledger/?missingreplica=<bookie_address>&excludingmissingreplica=<bookie_address> -1. Method: GET - * Description: Get all under replicated ledgers. - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - |missingreplica | String | No | missing replica bookieId | - |excludingmissingreplica | String | No | exclude missing replica bookieId | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - [ledgerId1, ledgerId2...] - } - ``` - -### Endpoint: /api/v1/autorecovery/who_is_auditor -1. Method: GET - * Description: Get auditor bookie id. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - * Response Body format: - - ```json - { - "Auditor": "hostname/hostAddress:Port" - } - ``` - -### Endpoint: /api/v1/autorecovery/trigger_audit -1. Method: PUT - * Description: Force trigger audit by resting the lostBookieRecoveryDelay. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -### Endpoint: /api/v1/autorecovery/lost_bookie_recovery_delay -1. Method: GET - * Description: Get lostBookieRecoveryDelay value in seconds. - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -1. Method: PUT - * Description: Set lostBookieRecoveryDelay value in seconds. - * Body: - ```json - { - "delay_seconds": - } - ``` - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - | delay_seconds | Long | Yes | set delay value in seconds. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | - -### Endpoint: /api/v1/autorecovery/decommission -1. Method: PUT - * Description: Decommission Bookie, Force trigger Audit task and make sure all the ledgers stored in the decommissioning bookie are replicated. - * Body: - ```json - { - "bookie_src": - } - ``` - * Parameters: - - | Name | Type | Required | Description | - |:-----|:-----|:---------|:------------| - | bookie_src | String | Yes | Bookie src to decommission.. | - * Response: - - | Code | Description | - |:-------|:------------| - |200 | Successful operation | - |403 | Permission denied | - |404 | Not found | diff --git a/site/docs/latest/admin/metrics.md b/site/docs/latest/admin/metrics.md deleted file mode 100644 index 142df3dcd2d..00000000000 --- a/site/docs/latest/admin/metrics.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: Metric collection ---- - -BookKeeper enables metrics collection through a variety of [stats providers](#stats-providers). - -> For a full listing of available metrics, see the [Metrics](../../reference/metrics) reference doc. - -## Stats providers - -BookKeeper has stats provider implementations for four five sinks: - -Provider | Provider class name -:--------|:------------------- -[Codahale Metrics](https://mvnrepository.com/artifact/org.apache.bookkeeper.stats/codahale-metrics-provider) | `org.apache.bookkeeper.stats.CodahaleMetricsProvider` -[Prometheus](https://prometheus.io/) | `org.apache.bookkeeper.stats.prometheus.PrometheusMetricsProvider` -[Finagle](https://twitter.github.io/finagle/guide/Metrics.html) | `org.apache.bookkeeper.stats.FinagleStatsProvider` -[Ostrich](https://github.com/twitter/ostrich) | `org.apache.bookkeeper.stats.OstrichProvider` -[Twitter Science Provider](https://mvnrepository.com/artifact/org.apache.bookkeeper.stats/twitter-science-provider) | `org.apache.bookkeeper.stats.TwitterStatsProvider` - -> The [Codahale Metrics]({{ site.github_master }}/bookkeeper-stats-providers/codahale-metrics-provider) stats provider is the default provider. - -## Enabling stats providers in bookies - -There are two stats-related [configuration parameters](../../reference/config#statistics) available for bookies: - -Parameter | Description | Default -:---------|:------------|:------- -`enableStatistics` | Whether statistics are enabled for the bookie | `false` -`statsProviderClass` | The stats provider class used by the bookie | `org.apache.bookkeeper.stats.CodahaleMetricsProvider` - - -To enable stats: - -* set the `enableStatistics` parameter to `true` -* set `statsProviderClass` to the desired provider (see the [table above](#stats-providers) for a listing of classes) - - diff --git a/site/docs/latest/admin/perf.md b/site/docs/latest/admin/perf.md deleted file mode 100644 index 82956326e5d..00000000000 --- a/site/docs/latest/admin/perf.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: Performance tuning ---- diff --git a/site/docs/latest/admin/placement.md b/site/docs/latest/admin/placement.md deleted file mode 100644 index ded456e1aea..00000000000 --- a/site/docs/latest/admin/placement.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: Customized placement policies ---- diff --git a/site/docs/latest/admin/upgrade.md b/site/docs/latest/admin/upgrade.md deleted file mode 100644 index 57c65208131..00000000000 --- a/site/docs/latest/admin/upgrade.md +++ /dev/null @@ -1,175 +0,0 @@ ---- -title: Upgrade ---- - -> If you have questions about upgrades (or need help), please feel free to reach out to us by [mailing list]({{ site.baseurl }}community/mailing-lists) or [Slack Channel]({{ site.baseurl }}community/slack). - -## Overview - -Consider the below guidelines in preparation for upgrading. - -- Always back up all your configuration files before upgrading. -- Read through the documentation and draft an upgrade plan that matches your specific requirements and environment before starting the upgrade process. - Put differently, don't start working through the guide on a live cluster. Read guide entirely, make a plan, then execute the plan. -- Pay careful consideration to the order in which components are upgraded. In general, you need to upgrade bookies first and then upgrade your clients. -- If autorecovery is running along with bookies, you need to pay attention to the upgrade sequence. -- Read the release notes carefully for each release. They contain not only information about noteworthy features, but also changes to configurations - that may impact your upgrade. -- Always upgrade one or a small set of bookies to canary new version before upgraing all bookies in your cluster. - -## Canary - -It is wise to canary an upgraded version in one or small set of bookies before upgrading all bookies in your live cluster. - -You can follow below steps on how to canary a upgraded version: - -1. Stop a Bookie. -2. Upgrade the binary and configuration. -3. Start the Bookie in `ReadOnly` mode. This can be used to verify if the Bookie of this new version can run well for read workload. -4. Once the Bookie is running at `ReadOnly` mode successfully for a while, restart the Bookie in `Write/Read` mode. -5. After step 4, the Bookie will serve both write and read traffic. - -### Rollback Canaries - -If problems occur during canarying an upgraded version, you can simply take down the problematic Bookie node. The remain bookies in the old cluster -will repair this problematic bookie node by autorecovery. Nothing needs to be worried about. - -## Upgrade Steps - -Once you determined a version is safe to upgrade in a few nodes in your cluster, you can perform following steps to upgrade all bookies in your cluster. - -1. Determine if autorecovery is running along with bookies. If yes, check if the clients (either new clients with new binary or old clients with new configurations) -are allowed to talk to old bookies; if clients are not allowed to talk to old bookies, please [disable autorecovery](../../reference/cli/#autorecovery-1) during upgrade. -2. Decide on performing a rolling upgrade or a downtime upgrade. -3. Upgrade all Bookies (more below) -4. If autorecovery was disabled during upgrade, [enable autorecovery](../../reference/cli/#autorecovery-1). -5. After all bookies are upgraded, build applications that use `BookKeeper client` against the new bookkeeper libraries and deploy the new versions. - -### Upgrade Bookies - -In a rolling upgrade scenario, upgrade one Bookie at a time. In a downtime upgrade scenario, take the entire cluster down, upgrade each Bookie, then start the cluster. - -For each Bookie: - -1. Stop the bookie. -2. Upgrade the software (either new binary or new configuration) -2. Start the bookie. - -## Upgrade Guides - -We describes the general upgrade method in Apache BookKeeper as above. We will cover the details for individual versions. - -### 4.6.x to 4.7.0 upgrade - -There isn't any protocol related backward compabilities changes in 4.7.0. So you can follow the general upgrade sequence to upgrade from 4.6.x to 4.7.0. - -However, we list a list of changes that you might want to know. - -#### Common Configuration Changes - -This section documents the common configuration changes that applied for both clients and servers. - -##### New Settings - -Following settings are newly added in 4.7.0. - -| Name | Default Value | Description | -|------|---------------|-------------| -| allowShadedLedgerManagerFactoryClass | false | The allows bookkeeper client to connect to a bookkeeper cluster using a shaded ledger manager factory | -| shadedLedgerManagerFactoryClassPrefix | `dlshade.` | The shaded ledger manager factory prefix. This is used when `allowShadedLedgerManagerFactoryClass` is set to true | -| metadataServiceUri | null | metadata service uri that bookkeeper is used for loading corresponding metadata driver and resolving its metadata service location | -| permittedStartupUsers | null | The list of users are permitted to run the bookie process. Any users can run the bookie process if it is not set | - -##### Deprecated Settings - -There are no common settings deprecated at 4.7.0. - -##### Changed Settings - -There are no common settings whose default value are changed at 4.7.0. - -#### Server Configuration Changes - -##### New Settings - -Following settings are newly added in 4.7.0. - -| Name | Default Value | Description | -|------|---------------|-------------| -| verifyMetadataOnGC | false | Whether the bookie is configured to double check the ledgers' metadata prior to garbage collecting them | -| auditorLedgerVerificationPercentage | 0 | The percentage of a ledger (fragment)'s entries will be verified by Auditor before claiming a ledger (fragment) is missing | -| numHighPriorityWorkerThreads | 8 | The number of threads that should be used for high priority requests (i.e. recovery reads and adds, and fencing). If zero, reads are handled by Netty threads directly. | -| useShortHostName | false | Whether the bookie should use short hostname or [FQDN](https://en.wikipedia.org/wiki/Fully_qualified_domain_name) hostname for registration and ledger metadata when useHostNameAsBookieID is enabled. | -| minUsableSizeForEntryLogCreation | 1.2 * `logSizeLimit` | Minimum safe usable size to be available in ledger directory for bookie to create entry log files (in bytes). | -| minUsableSizeForHighPriorityWrites | 1.2 * `logSizeLimit` | Minimum safe usable size to be available in ledger directory for bookie to accept high priority writes even it is in readonly mode. | - -##### Deprecated Settings - -Following settings are deprecated since 4.7.0. - -| Name | Description | -|------|-------------| -| registrationManagerClass | The registration manager class used by server to discover registration manager. It is replaced by `metadataServiceUri`. | - - -##### Changed Settings - -The default values of following settings are changed since 4.7.0. - -| Name | Old Default Value | New Default Value | Notes | -|------|-------------------|-------------------|-------| -| numLongPollWorkerThreads | 10 | 0 | If the number of threads is zero or negative, bookie can fallback to use read threads for long poll. This allows not creating threads if application doesn't use long poll feature. | - -#### Client Configuration Changes - -##### New Settings - -Following settings are newly added in 4.7.0. - -| Name | Default Value | Description | -|------|---------------|-------------| -| maxNumEnsembleChanges | Integer.MAX\_VALUE | The max allowed ensemble change number before sealing a ledger on failures | -| timeoutMonitorIntervalSec | min(`addEntryTimeoutSec`, `addEntryQuorumTimeoutSec`, `readEntryTimeoutSec`) | The interval between successive executions of the operation timeout monitor, in seconds | -| ensemblePlacementPolicyOrderSlowBookies | false | Flag to enable/disable reordering slow bookies in placement policy | - -##### Deprecated Settings - -Following settings are deprecated since 4.7.0. - -| Name | Description | -|------|-------------| -| clientKeyStoreType | Replaced by `tlsKeyStoreType` | -| clientKeyStore | Replaced by `tlsKeyStore` | -| clientKeyStorePasswordPath | Replaced by `tlsKeyStorePasswordPath` | -| clientTrustStoreType | Replaced by `tlsTrustStoreType` | -| clientTrustStore | Replaced by `tlsTrustStore` | -| clientTrustStorePasswordPath | Replaced by `tlsTrustStorePasswordPath` | -| registrationClientClass | The registration client class used by client to discover registration service. It is replaced by `metadataServiceUri`. | - -##### Changed Settings - -The default values of following settings are changed since 4.7.0. - -| Name | Old Default Value | New Default Value | Notes | -|------|-------------------|-------------------|-------| -| enableDigestTypeAutodetection | false | true | Autodetect the digest type and passwd when opening a ledger. It will ignore the provided digest type, but still verify the provided passwd. | - -### 4.7.x to 4.8.X upgrade - -In 4.8.x a new feature is added to persist explicitLac in FileInfo and explicitLac entry in Journal. (Note: Currently this feature is not available if your ledgerStorageClass is DbLedgerStorage, ISSUE #1533 is going to address it) Hence current journal format version is bumped to 6 and current FileInfo header version is bumped to 1. But since default config values of 'journalFormatVersionToWrite' and 'fileInfoFormatVersionToWrite' are set to older versions, this feature is off by default. To enable this feature those config values should be set to current versions. Once this is enabled then we cannot rollback to previous Bookie versions (4.7.x and older), since older version code would not be able to deal with explicitLac entry in Journal file while replaying journal and also reading Header of Index files / FileInfo would fail reading Index files with newer FileInfo version. So in summary, it is a non-rollbackable feature and it applies even if explicitLac is not being used. - -### 4.5.x to 4.6.x upgrade - -There isn't any protocol related backward compabilities changes in 4.6.x. So you can follow the general upgrade sequence to upgrade from 4.5.x to 4.6.x. - -### 4.4.x to 4.5.x upgrade - -There isn't any protocol related backward compabilities changes in 4.5.0. So you can follow the general upgrade sequence to upgrade from 4.4.x to 4.5.x. -However, we list a list of things that you might want to know. - -1. 4.5.x upgrades netty from 3.x to 4.x. The memory usage pattern might be changed a bit. Netty 4 uses more direct memory. Please pay attention to your memory usage - and adjust the JVM settings accordingly. -2. `multi journals` is a non-rollbackable feature. If you configure a bookie to use multiple journals on 4.5.x you can not roll the bookie back to use 4.4.x. You have - to take a bookie out and recover it if you want to rollback to 4.4.x. - -If you are planning to upgrade a non-secured cluster to a secured cluster enabling security features in 4.5.0, please read [BookKeeper Security](../../security/overview) for more details. diff --git a/site/docs/latest/api/distributedlog-api.md b/site/docs/latest/api/distributedlog-api.md deleted file mode 100644 index f073b291048..00000000000 --- a/site/docs/latest/api/distributedlog-api.md +++ /dev/null @@ -1,395 +0,0 @@ ---- -title: DistributedLog -subtitle: A higher-level API for managing BookKeeper entries ---- - -> DistributedLog began its life as a separate project under the Apache Foundation. It was merged into BookKeeper in 2017. - -The DistributedLog API is an easy-to-use interface for managing BookKeeper entries that enables you to use BookKeeper without needing to interact with [ledgers](../ledger-api) directly. - -DistributedLog (DL) maintains sequences of records in categories called *logs* (aka *log streams*). *Writers* append records to DL logs, while *readers* fetch and process those records. - -## Architecture - -The diagram below illustrates how the DistributedLog API works with BookKeeper: - -![DistributedLog API]({{ site.baseurl }}img/distributedlog.png) - -## Logs - -A *log* in DistributedLog is an ordered, immutable sequence of *log records*. - -The diagram below illustrates the anatomy of a log stream: - -![DistributedLog log]({{ site.baseurl }}img/logs.png) - -### Log records - -Each log record is a sequence of bytes. Applications are responsible for serializing and deserializing byte sequences stored in log records. - -Log records are written sequentially into a *log stream* and assigned with a a unique sequence number called a DLSN (DistributedLog Sequence Number). - -In addition to a DLSN, applications can assign their own sequence number when constructing log records. Application-defined sequence numbers are known as *TransactionIDs* (or *txid*). Either a DLSN or a TransactionID can be used for positioning readers to start reading from a specific log record. - -### Log segments - -Each log is broken down into *log segments* that contain subsets of records. Log segments are distributed and stored in BookKeeper. DistributedLog rolls the log segments based on the configured *rolling policy*, which be either - -* a configurable period of time (such as every 2 hours), or -* a configurable maximum size (such as every 128 MB). - -The data in logs is divided up into equally sized log segments and distributed evenly across {% pop bookies %}. This allows logs to scale beyond a size that would fit on a single server and spreads read traffic across the cluster. - -### Namespaces - -Log streams that belong to the same organization are typically categorized and managed under a *namespace*. DistributedLog namespaces essentially enable applications to locate log streams. Applications can perform the following actions under a namespace: - -* create streams -* delete streams -* truncate streams to a given sequence number (either a DLSN or a TransactionID) - -## Writers - -Through the DistributedLog API, writers write data into logs of their choice. All records are appended into logs in order. The sequencing is performed by the writer, which means that there is only one active writer for a log at any given time. - -DistributedLog guarantees correctness when two writers attempt to write to the same log when a network partition occurs using a *fencing* mechanism in the log segment store. - -### Write Proxy - -Log writers are served and managed in a service tier called the *Write Proxy* (see the diagram [above](#architecture)). The Write Proxy is used for accepting writes from a large number of clients. - -## Readers - -DistributedLog readers read records from logs of their choice, starting with a provided position. The provided position can be either a DLSN or a TransactionID. - -Readers read records from logs in strict order. Different readers can read records from different positions in the same log. - -Unlike other pub-sub systems, DistributedLog doesn't record or manage readers' positions. This means that tracking is the responsibility of applications, as different applications may have different requirements for tracking and coordinating positions. This is hard to get right with a single approach. Distributed databases, for example, might store reader positions along with SSTables, so they would resume applying transactions from the positions store in SSTables. Tracking reader positions could easily be done at the application level using various stores (such as ZooKeeper, the filesystem, or key-value stores). - -### Read Proxy - -Log records can be cached in a service tier called the *Read Proxy* to serve a large number of readers. See the diagram [above](#architecture). The Read Proxy is the analogue of the [Write Proxy](#write-proxy). - -## Guarantees - -The DistributedLog API for BookKeeper provides a number of guarantees for applications: - -* Records written by a [writer](#writers) to a [log](#logs) are appended in the order in which they are written. If a record **R1** is written by the same writer as a record **R2**, **R1** will have a smaller sequence number than **R2**. -* [Readers](#readers) see [records](#log-records) in the same order in which they are [written](#writers) to the log. -* All records are persisted on disk by BookKeeper before acknowledgements, which guarantees durability. -* For a log with a replication factor of N, DistributedLog tolerates up to N-1 server failures without losing any records. - -## API - -Documentation for the DistributedLog API can be found [here](https://bookkeeper.apache.org/distributedlog/docs/latest/user_guide/api/core). - -> At a later date, the DistributedLog API docs will be added here. - - diff --git a/site/docs/latest/api/ledger-adv-api.md b/site/docs/latest/api/ledger-adv-api.md deleted file mode 100644 index df6224dd7ec..00000000000 --- a/site/docs/latest/api/ledger-adv-api.md +++ /dev/null @@ -1,111 +0,0 @@ ---- -title: The Advanced Ledger API ---- - -In release `4.5.0`, Apache BookKeeper introduces a few advanced API for advanced usage. -This sections covers these advanced APIs. - -> Before learn the advanced API, please read [Ledger API](../ledger-api) first. - -## LedgerHandleAdv - -[`LedgerHandleAdv`](../javadoc/org/apache/bookkeeper/client/LedgerHandleAdv) is an advanced extension of [`LedgerHandle`](../javadoc/org/apache/bookkeeper/client/LedgerHandle). -It allows user passing in an `entryId` when adding an entry. - -### Creating advanced ledgers - -Here's an exmaple: - -```java -byte[] passwd = "some-passwd".getBytes(); -LedgerHandleAdv handle = bkClient.createLedgerAdv( - 3, 3, 2, // replica settings - DigestType.CRC32, - passwd); -``` - -You can also create advanced ledgers asynchronously. - -```java -class LedgerCreationCallback implements AsyncCallback.CreateCallback { - public void createComplete(int returnCode, LedgerHandle handle, Object ctx) { - System.out.println("Ledger successfully created"); - } -} -client.asyncCreateLedgerAdv( - 3, // ensemble size - 3, // write quorum size - 2, // ack quorum size - BookKeeper.DigestType.CRC32, - password, - new LedgerCreationCallback(), - "some context" -); -``` - -Besides the APIs above, BookKeeper allows users providing `ledger-id` when creating advanced ledgers. - -```java -long ledgerId = ...; // the ledger id is generated externally. - -byte[] passwd = "some-passwd".getBytes(); -LedgerHandleAdv handle = bkClient.createLedgerAdv( - ledgerId, // ledger id generated externally - 3, 3, 2, // replica settings - DigestType.CRC32, - passwd); -``` - -> Please note, it is users' responsibility to provide a unique ledger id when using the API above. -> If a ledger already exists when users try to create an advanced ledger with same ledger id, -> a [LedgerExistsException](../javadoc/org/apache/bookkeeper/client/BKException.BKLedgerExistException.html) is thrown by the bookkeeper client. - -Creating advanced ledgers can be done throught a fluent API since 4.6. - -```java -BookKeeper bk = ...; - -byte[] passwd = "some-passwd".getBytes(); - -WriteHandle wh = bk.newCreateLedgerOp() - .withDigestType(DigestType.CRC32) - .withPassword(passwd) - .withEnsembleSize(3) - .withWriteQuorumSize(3) - .withAckQuorumSize(2) - .makeAdv() // convert the create ledger builder to create ledger adv builder - .withLedgerId(1234L) - .execute() // execute the creation op - .get(); // wait for the execution to complete - -``` - -### Add Entries - -The normal [add entries api](ledger-api/#adding-entries-to-ledgers) in advanced ledgers are disabled. Instead, when users want to add entries -to advanced ledgers, an entry id is required to pass in along with the entry data when adding an entry. - -```java -long entryId = ...; // entry id generated externally - -ledger.addEntry(entryId, "Some entry data".getBytes()); -``` - -If you are using the new API, you can do as following: - -```java -WriteHandle wh = ...; -long entryId = ...; // entry id generated externally - -wh.write(entryId, "Some entry data".getBytes()).get(); -``` - -A few notes when using this API: - -- The entry id has to be non-negative. -- Clients are okay to add entries out of order. -- However, the entries are only acknowledged in a monotonic order starting from 0. - -### Read Entries - -The read entries api in advanced ledgers remain same as [normal ledgers](../ledger-api/#reading-entries-from-ledgers). diff --git a/site/docs/latest/api/ledger-api.md b/site/docs/latest/api/ledger-api.md deleted file mode 100644 index b6cb0f02a95..00000000000 --- a/site/docs/latest/api/ledger-api.md +++ /dev/null @@ -1,841 +0,0 @@ ---- -title: The Ledger API ---- - -The ledger API is a lower-level API for BookKeeper that enables you to interact with {% pop ledgers %} directly. - -## The Java ledger API client - -To get started with the Java client for BookKeeper, install the `bookkeeper-server` library as a dependency in your Java application. - -> For a more in-depth tutorial that involves a real use case for BookKeeper, see the [Example application](../example-application) guide. - -## Installation - -The BookKeeper Java client library is available via [Maven Central](http://search.maven.org/) and can be installed using [Maven](#maven), [Gradle](#gradle), and other build tools. - -### Maven - -If you're using [Maven](https://maven.apache.org/), add this to your [`pom.xml`](https://maven.apache.org/guides/introduction/introduction-to-the-pom.html) build configuration file: - -```xml - -{{ site.latest_version }} - - - - org.apache.bookkeeper - bookkeeper-server - ${bookkeeper.version} - -``` - -BookKeeper uses google [protobuf](https://github.com/google/protobuf/tree/master/java) and [guava](https://github.com/google/guava) libraries -a lot. If your application might include different versions of protobuf or guava introduced by other dependencies, you can choose to use the -shaded library, which relocate classes of protobuf and guava into a different namespace to avoid conflicts. - -```xml - -{{ site.latest_version }} - - - - org.apache.bookkeeper - bookkeeper-server-shaded - ${bookkeeper.version} - -``` - -### Gradle - -If you're using [Gradle](https://gradle.org/), add this to your [`build.gradle`](https://spring.io/guides/gs/gradle/) build configuration file: - -```groovy -dependencies { - compile group: 'org.apache.bookkeeper', name: 'bookkeeper-server', version: '{{ site.latest_version }}' -} - -// Alternatively: -dependencies { - compile 'org.apache.bookkeeper:bookkeeper-server:{{ site.latest_version }}' -} -``` - -Similarly as using maven, you can also configure to use the shaded jars. - -```groovy -// use the `bookkeeper-server-shaded` jar -dependencies { - compile 'org.apache.bookkeeper:bookkeeper-server-shaded:{{ site.latest-version }}' -} -``` - -## Connection string - -When interacting with BookKeeper using the Java client, you need to provide your client with a connection string, for which you have three options: - -* Provide your entire ZooKeeper connection string, for example `zk1:2181,zk2:2181,zk3:2181`. -* Provide a host and port for one node in your ZooKeeper cluster, for example `zk1:2181`. In general, it's better to provide a full connection string (in case the ZooKeeper node you attempt to connect to is down). -* If your ZooKeeper cluster can be discovered via DNS, you can provide the DNS name, for example `my-zookeeper-cluster.com`. - -## Creating a new client - -In order to create a new [`BookKeeper`](../javadoc/org/apache/bookkeeper/client/BookKeeper) client object, you need to pass in a [connection string](#connection-string). Here is an example client object using a ZooKeeper connection string: - -```java -try { - String connectionString = "127.0.0.1:2181"; // For a single-node, local ZooKeeper cluster - BookKeeper bkClient = new BookKeeper(connectionString); -} catch (InterruptedException | IOException | KeeperException e) { - e.printStackTrace(); -} -``` - -> If you're running BookKeeper [locally](../../getting-started/run-locally), using the [`localbookie`](../../reference/cli#bookkeeper-localbookie) command, use `"127.0.0.1:2181"` for your connection string, as in the example above. - -There are, however, other ways that you can create a client object: - -* By passing in a [`ClientConfiguration`](../javadoc/org/apache/bookkeeper/conf/ClientConfiguration) object. Here's an example: - - ```java - ClientConfiguration config = new ClientConfiguration(); - config.setZkServers(zkConnectionString); - config.setAddEntryTimeout(2000); - BookKeeper bkClient = new BookKeeper(config); - ``` - -* By specifying a `ClientConfiguration` and a [`ZooKeeper`](http://zookeeper.apache.org/doc/current/api/org/apache/zookeeper/ZooKeeper.html) client object: - - ```java - ClientConfiguration config = new ClientConfiguration(); - config.setAddEntryTimeout(5000); - ZooKeeper zkClient = new ZooKeeper(/* client args */); - BookKeeper bkClient = new BookKeeper(config, zkClient); - ``` - -* Using the `forConfig` method: - - ```java - BookKeeper bkClient = BookKeeper.forConfig(conf).build(); - ``` - -## Creating ledgers - -The easiest way to create a {% pop ledger %} using the Java client is via the `createLedger` method, which creates a new ledger synchronously and returns a [`LedgerHandle`](../javadoc/org/apache/bookkeeper/client/LedgerHandle). You must specify at least a [`DigestType`](../javadoc/org/apache/bookkeeper/client/BookKeeper.DigestType) and a password. - -Here's an example: - -```java -byte[] password = "some-password".getBytes(); -LedgerHandle handle = bkClient.createLedger(BookKeeper.DigestType.MAC, password); -``` - -You can also create ledgers asynchronously - -### Create ledgers asynchronously - -```java -class LedgerCreationCallback implements AsyncCallback.CreateCallback { - public void createComplete(int returnCode, LedgerHandle handle, Object ctx) { - System.out.println("Ledger successfully created"); - } -} - -client.asyncCreateLedger( - 3, - 2, - BookKeeper.DigestType.MAC, - password, - new LedgerCreationCallback(), - "some context" -); -``` - -## Adding entries to ledgers - -```java -long entryId = ledger.addEntry("Some entry data".getBytes()); -``` - -### Add entries asynchronously - -## Reading entries from ledgers - -```java -Enumerator entries = handle.readEntries(1, 99); -``` - -To read all possible entries from the ledger: - -```java -Enumerator entries = - handle.readEntries(0, handle.getLastAddConfirmed()); - -while (entries.hasNextElement()) { - LedgerEntry entry = entries.nextElement(); - System.out.println("Successfully read entry " + entry.getId()); -} -``` - -### Reading entries after the LastAddConfirmed range - -`readUnconfirmedEntries` allowing to read after the LastAddConfirmed range. -It lets the client read without checking the local value of LastAddConfirmed, so that it is possible to read entries for which the writer has not received the acknowledge yet -For entries which are within the range 0..LastAddConfirmed BookKeeper guarantees that the writer has successfully received the acknowledge. -For entries outside that range it is possible that the writer never received the acknowledge and so there is the risk that the reader is seeing entries before the writer and this could result in a consistency issue in some cases. -With this method you can even read entries before the LastAddConfirmed and entries after it with one call, the expected consistency will be as described above. - -```java -Enumerator entries = - handle.readUnconfirmedEntries(0, lastEntryIdExpectedToRead); - -while (entries.hasNextElement()) { - LedgerEntry entry = entries.nextElement(); - System.out.println("Successfully read entry " + entry.getId()); -} -``` - -## Deleting ledgers - -{% pop Ledgers %} can also be deleted synchronously or asynchronously. - -```java -long ledgerId = 1234; - -try { - bkClient.deleteLedger(ledgerId); -} catch (Exception e) { - e.printStackTrace(); -} -``` - -### Delete entries asynchronously - -Exceptions thrown: - -* - -```java -class DeleteEntryCallback implements AsyncCallback.DeleteCallback { - public void deleteComplete() { - System.out.println("Delete completed"); - } -} -``` - -## Simple example - -> For a more involved BookKeeper client example, see the [example application](#example-application) below. - -In the code sample below, a BookKeeper client: - -* creates a ledger -* writes entries to the ledger -* closes the ledger (meaning no further writes are possible) -* re-opens the ledger for reading -* reads all available entries - -```java -// Create a client object for the local ensemble. This -// operation throws multiple exceptions, so make sure to -// use a try/catch block when instantiating client objects. -BookKeeper bkc = new BookKeeper("localhost:2181"); - -// A password for the new ledger -byte[] ledgerPassword = /* some sequence of bytes, perhaps random */; - -// Create a new ledger and fetch its identifier -LedgerHandle lh = bkc.createLedger(BookKeeper.DigestType.MAC, ledgerPassword); -long ledgerId = lh.getId(); - -// Create a buffer for four-byte entries -ByteBuffer entry = ByteBuffer.allocate(4); - -int numberOfEntries = 100; - -// Add entries to the ledger, then close it -for (int i = 0; i < numberOfEntries; i++){ - entry.putInt(i); - entry.position(0); - lh.addEntry(entry.array()); -} -lh.close(); - -// Open the ledger for reading -lh = bkc.openLedger(ledgerId, BookKeeper.DigestType.MAC, ledgerPassword); - -// Read all available entries -Enumeration entries = lh.readEntries(0, numberOfEntries - 1); - -while(entries.hasMoreElements()) { - ByteBuffer result = ByteBuffer.wrap(ls.nextElement().getEntry()); - Integer retrEntry = result.getInt(); - - // Print the integer stored in each entry - System.out.println(String.format("Result: %s", retrEntry)); -} - -// Close the ledger and the client -lh.close(); -bkc.close(); -``` - -Running this should return this output: - -```shell -Result: 0 -Result: 1 -Result: 2 -# etc -``` - -## Example application - -This tutorial walks you through building an example application that uses BookKeeper as the replicated log. The application uses the [BookKeeper Java client](../java-client) to interact with BookKeeper. - -> The code for this tutorial can be found in [this GitHub repo](https://github.com/ivankelly/bookkeeper-tutorial/). The final code for the `Dice` class can be found [here](https://github.com/ivankelly/bookkeeper-tutorial/blob/master/src/main/java/org/apache/bookkeeper/Dice.java). - -### Setup - -Before you start, you will need to have a BookKeeper cluster running locally on your machine. For installation instructions, see [Installation](../../getting-started/installation). - -To start up a cluster consisting of six {% pop bookies %} locally: - -```shell -$ bookkeeper-server/bin/bookkeeper localbookie 6 -``` - -You can specify a different number of bookies if you'd like. - -### Goal - -The goal of the dice application is to have - -* multiple instances of this application, -* possibly running on different machines, -* all of which display the exact same sequence of numbers. - -In other words, the log needs to be both durable and consistent, regardless of how many {% pop bookies %} are participating in the BookKeeper ensemble. If one of the bookies crashes or becomes unable to communicate with the other bookies in any way, it should *still* display the same sequence of numbers as the others. This tutorial will show you how to achieve this. - -To begin, download the base application, compile and run it. - -```shell -$ git clone https://github.com/ivankelly/bookkeeper-tutorial.git -$ mvn package -$ mvn exec:java -Dexec.mainClass=org.apache.bookkeeper.Dice -``` - -That should yield output that looks something like this: - -``` -[INFO] Scanning for projects... -[INFO] -[INFO] ------------------------------------------------------------------------ -[INFO] Building tutorial 1.0-SNAPSHOT -[INFO] ------------------------------------------------------------------------ -[INFO] -[INFO] --- exec-maven-plugin:1.3.2:java (default-cli) @ tutorial --- -[WARNING] Warning: killAfter is now deprecated. Do you need it ? Please comment on MEXEC-6. -Value = 4 -Value = 5 -Value = 3 -``` - -### The base application - -The application in this tutorial is a dice application. The `Dice` class below has a `playDice` function that generates a random number between 1 and 6 every second, prints the value of the dice roll, and runs indefinitely. - -```java -public class Dice { - Random r = new Random(); - - void playDice() throws InterruptedException { - while (true) { - Thread.sleep(1000); - System.out.println("Value = " + (r.nextInt(6) + 1)); - } - } -} -``` - -When you run the `main` function of this class, a new `Dice` object will be instantiated and then run indefinitely: - -```java -public class Dice { - // other methods - - public static void main(String[] args) throws InterruptedException { - Dice d = new Dice(); - d.playDice(); - } -} -``` - -### Leaders and followers (and a bit of background) - -To achieve this common view in multiple instances of the program, we need each instance to agree on what the next number in the sequence will be. For example, the instances must agree that 4 is the first number and 2 is the second number and 5 is the third number and so on. This is a difficult problem, especially in the case that any instance may go away at any time, and messages between the instances can be lost or reordered. - -Luckily, there are already algorithms to solve this. Paxos is an abstract algorithm to implement this kind of agreement, while Zab and Raft are more practical protocols. This video gives a good overview about how these algorithms usually look. They all have a similar core. - -It would be possible to run the Paxos to agree on each number in the sequence. However, running Paxos each time can be expensive. What Zab and Raft do is that they use a Paxos-like algorithm to elect a leader. The leader then decides what the sequence of events should be, putting them in a log, which the other instances can then follow to maintain the same state as the leader. - -Bookkeeper provides the functionality for the second part of the protocol, allowing a leader to write events to a log and have multiple followers tailing the log. However, bookkeeper does not do leader election. You will need a zookeeper or raft instance for that purpose. - -### Why not just use ZooKeeper? - -There are a number of reasons: - -1. Zookeeper's log is only exposed through a tree like interface. It can be hard to shoehorn your application into this. -2. A zookeeper ensemble of multiple machines is limited to one log. You may want one log per resource, which will become expensive very quickly. -3. Adding extra machines to a zookeeper ensemble does not increase capacity nor throughput. - -Bookkeeper can be seen as a means of exposing ZooKeeper's replicated log to applications in a scalable fashion. ZooKeeper is still used by BookKeeper, however, to maintain consistency guarantees, though clients don't need to interact with ZooKeeper directly. - -### Electing a leader - -We'll use zookeeper to elect a leader. A zookeeper instance will have started locally when you started the localbookie application above. To verify it's running, run the following command. - -```shell -$ echo stat | nc localhost 2181 -Zookeeper version: 3.4.6-1569965, built on 02/20/2014 09:09 GMT -Clients: - /127.0.0.1:59343[1](queued=0,recved=40,sent=41) - /127.0.0.1:49354[1](queued=0,recved=11,sent=11) - /127.0.0.1:49361[0](queued=0,recved=1,sent=0) - /127.0.0.1:59344[1](queued=0,recved=38,sent=39) - /127.0.0.1:59345[1](queued=0,recved=38,sent=39) - /127.0.0.1:59346[1](queued=0,recved=38,sent=39) - -Latency min/avg/max: 0/0/23 -Received: 167 -Sent: 170 -Connections: 6 -Outstanding: 0 -Zxid: 0x11 -Mode: standalone -Node count: 16 -``` - -To interact with zookeeper, we'll use the Curator client rather than the stock zookeeper client. Getting things right with the zookeeper client can be tricky, and curator removes a lot of the pointy corners for you. In fact, curator even provides a leader election recipe, so we need to do very little work to get leader election in our application. - -```java -public class Dice extends LeaderSelectorListenerAdapter implements Closeable { - - final static String ZOOKEEPER_SERVER = "127.0.0.1:2181"; - final static String ELECTION_PATH = "/dice-elect"; - - ... - - Dice() throws InterruptedException { - curator = CuratorFrameworkFactory.newClient(ZOOKEEPER_SERVER, - 2000, 10000, new ExponentialBackoffRetry(1000, 3)); - curator.start(); - curator.blockUntilConnected(); - - leaderSelector = new LeaderSelector(curator, ELECTION_PATH, this); - leaderSelector.autoRequeue(); - leaderSelector.start(); - } -``` - -In the constructor for Dice, we need to create the curator client. We specify four things when creating the client, the location of the zookeeper service, the session timeout, the connect timeout and the retry policy. - -The session timeout is a zookeeper concept. If the zookeeper server doesn't hear anything from the client for this amount of time, any leases which the client holds will be timed out. This is important in leader election. For leader election, the curator client will take a lease on ELECTION_PATH. The first instance to take the lease will become leader and the rest will become followers. However, their claim on the lease will remain in the cue. If the first instance then goes away, due to a crash etc., its session will timeout. Once the session times out, the lease will be released and the next instance in the queue will become the leader. The call to autoRequeue() will make the client queue itself again if it loses the lease for some other reason, such as if it was still alive, but it a garbage collection cycle caused it to lose its session, and thereby its lease. I've set the lease to be quite low so that when we test out leader election, transitions will be quite quick. The optimum length for session timeout depends very much on the use case. The other parameters are the connection timeout, i.e. the amount of time it will spend trying to connect to a zookeeper server before giving up, and the retry policy. The retry policy specifies how the client should respond to transient errors, such as connection loss. Operations that fail with transient errors can be retried, and this argument specifies how often the retries should occur. - -Finally, you'll have noticed that Dice now extends LeaderSelectorListenerAdapter and implements Closeable. Closeable is there to close the resource we have initialized in the constructor, the curator client and the leaderSelector. LeaderSelectorListenerAdapter is a callback that the leaderSelector uses to notify the instance that it is now the leader. It is passed as the third argument to the LeaderSelector constructor. - -```java - @Override - public void takeLeadership(CuratorFramework client) - throws Exception { - synchronized (this) { - leader = true; - try { - while (true) { - this.wait(); - } - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - leader = false; - } - } - } -``` - -takeLeadership() is the callback called by LeaderSelector when the instance is leader. It should only return when the instance wants to give up leadership. In our case, we never do so we wait on the current object until we're interrupted. To signal to the rest of the program that we are leader we set a volatile boolean called leader to true. This is unset after we are interrupted. - -```java - void playDice() throws InterruptedException { - while (true) { - while (leader) { - Thread.sleep(1000); - System.out.println("Value = " + (r.nextInt(6) + 1) - + ", isLeader = " + leader); - } - } - } -``` - -Finally, we modify the `playDice` function to only generate random numbers when it is the leader. - -Run two instances of the program in two different terminals. You'll see that one becomes leader and prints numbers and the other just sits there. - -Now stop the leader using Control-Z. This will pause the process, but it won't kill it. You will be dropped back to the shell in that terminal. After a couple of seconds, the session timeout, you will see that the other instance has become the leader. Zookeeper will guarantee that only one instance is selected as leader at any time. - -Now go back to the shell that the original leader was on and wake up the process using fg. You'll see something like the following: - -```shell -... -... -Value = 4, isLeader = true -Value = 4, isLeader = true -^Z -[1]+ Stopped mvn exec:java -Dexec.mainClass=org.apache.bookkeeper.Dice -$ fg -mvn exec:java -Dexec.mainClass=org.apache.bookkeeper.Dice -Value = 3, isLeader = true -Value = 1, isLeader = false -``` - -## New API - -Since 4.6 BookKeeper provides a new client API which leverages Java8 [CompletableFuture](https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/CompletableFuture.html) facility. -[WriteHandle](../javadoc/org/apache/bookkeeper/client/api/WriteHandle), [WriteAdvHandle](../javadoc/org/apache/bookkeeper/client/api/WriteAdvHandle), [ReadHandle](../javadoc/org/apache/bookkeeper/client/api/ReadHandle) are introduced for replacing the generic [LedgerHandle](../javadoc/org/apache/bookkeeper/client/LedgerHandle). - -> All the new API now is available in `org.apache.bookkeeper.client.api`. You should only use interfaces defined in this package. - -*Beware* that this API in 4.6 is still experimental API and can be subject to changes in next minor releases. - -### Create a new client - -In order to create a new [`BookKeeper`](../javadoc/org/apache/bookkeeper/client/api/BookKeeper) client object, you need to construct a [`ClientConfiguration`](../javadoc/org/apache/bookkeeper/conf/ClientConfiguration) object and set a [connection string](#connection-string) first, and then use [`BookKeeperBuilder`](../javadoc/org/apache/bookkeeper/client/api/BookKeeperBuilder) to build the client. - -Here is an example building the bookkeeper client. - -```java -// construct a client configuration instance -ClientConfiguration conf = new ClientConfiguration(); -conf.setZkServers(zkConnectionString); -conf.setZkLedgersRootPath("/path/to/ledgers/root"); - -// build the bookkeeper client -BookKeeper bk = BookKeeper.newBuilder(conf) - .statsLogger(...) - ... - .build(); - -``` - -### Create ledgers - -the easiest way to create a {% pop ledger %} using the java client is via the [`createbuilder`](../javadoc/org/apache/bookkeeper/client/api/createbuilder). you must specify at least -a [`digesttype`](../javadoc/org/apache/bookkeeper/client/api/digesttype) and a password. - -here's an example: - -```java -BookKeeper bk = ...; - -byte[] password = "some-password".getBytes(); - -WriteHandle wh = bk.newCreateLedgerOp() - .withDigestType(DigestType.CRC32) - .withPassword(password) - .withEnsembleSize(3) - .withWriteQuorumSize(3) - .withAckQuorumSize(2) - .execute() // execute the creation op - .get(); // wait for the execution to complete -``` - -A [`WriteHandle`](../javadoc/org/apache/bookkeeper/client/api/WriteHandle) is returned for applications to write and read entries to and from the ledger. - -### Write flags - -You can specify behaviour of the writer by setting [`WriteFlags`](../javadoc/org/apache/bookkeeper/client/api/WriteFlag) at ledger creation type. -These flags are applied only during write operations and are not recorded on metadata. - - -Available write flags: - -| Flag | Explanation | Notes | -:---------|:------------|:------- -DEFERRED_SYNC | Writes are acknowledged early, without waiting for guarantees of durability | Data will be only written to the OS page cache, without forcing an fsync. - -```java -BookKeeper bk = ...; - -byte[] password = "some-password".getBytes(); - -WriteHandle wh = bk.newCreateLedgerOp() - .withDigestType(DigestType.CRC32) - .withPassword(password) - .withEnsembleSize(3) - .withWriteQuorumSize(3) - .withAckQuorumSize(2) - .withWriteFlags(DEFERRED_SYNC) - .execute() // execute the creation op - .get(); // wait for the execution to complete -``` - - -### Append entries to ledgers - -The [`WriteHandle`](../javadoc/org/apache/bookkeeper/client/api/WriteHandle) can be used for applications to append entries to the ledgers. - -```java -WriteHandle wh = ...; - -CompletableFuture addFuture = wh.append("Some entry data".getBytes()); - -// option 1: you can wait for add to complete synchronously -try { - long entryId = FutureUtils.result(addFuture.get()); -} catch (BKException bke) { - // error handling -} - -// option 2: you can process the result and exception asynchronously -addFuture - .thenApply(entryId -> { - // process the result - }) - .exceptionally(cause -> { - // handle the exception - }) - -// option 3: bookkeeper provides a twitter-future-like event listener for processing result and exception asynchronously -addFuture.whenComplete(new FutureEventListener() { - @Override - public void onSuccess(long entryId) { - // process the result - } - @Override - public void onFailure(Throwable cause) { - // handle the exception - } -}); -``` - -The append method supports three representations of a bytes array: the native java `byte[]`, java nio `ByteBuffer` and netty `ByteBuf`. -It is recommended to use `ByteBuf` as it is more gc friendly. - -### Open ledgers - -You can open ledgers to read entries. Opening ledgers is done by [`openBuilder`](../javadoc/org/apache/bookkeeper/client/api/openBuilder). You must specify the ledgerId and the password -in order to open the ledgers. - -here's an example: - -```java -BookKeeper bk = ...; - -long ledgerId = ...; -byte[] password = "some-password".getBytes(); - -ReadHandle rh = bk.newOpenLedgerOp() - .withLedgerId(ledgerId) - .withPassword(password) - .execute() // execute the open op - .get(); // wait for the execution to complete -``` - -A [`ReadHandle`](../javadoc/org/apache/bookkeeper/client/api/ReadHandle) is returned for applications to read entries to and from the ledger. - -#### Recovery vs NoRecovery - -By default, the [`openBuilder`](../javadoc/org/apache/bookkeeper/client/api/openBuilder) opens the ledger in a `NoRecovery` mode. You can open the ledger in `Recovery` mode by specifying -`withRecovery(true)` in the open builder. - -```java -BookKeeper bk = ...; - -long ledgerId = ...; -byte[] password = "some-password".getBytes(); - -ReadHandle rh = bk.newOpenLedgerOp() - .withLedgerId(ledgerId) - .withPassword(password) - .withRecovery(true) - .execute() - .get(); - -``` - -**What is the difference between "Recovery" and "NoRecovery"?** - -If you are opening a ledger in "Recovery" mode, it will basically fence and seal the ledger -- no more entries are allowed -to be appended to it. The writer which is currently appending entries to the ledger will fail with [`LedgerFencedException`](../javadoc/org/apache/bookkeeper/client/api/BKException.Code#LedgerFencedException). - -In constrat, opening a ledger in "NoRecovery" mode, it will not fence and seal the ledger. "NoRecovery" mode is usually used by applications to tailing-read from a ledger. - -### Read entries from ledgers - -The [`ReadHandle`](../javadoc/org/apache/bookkeeper/client/api/ReadHandle) returned from the open builder can be used for applications to read entries from the ledgers. - -```java -ReadHandle rh = ...; - -long startEntryId = ...; -long endEntryId = ...; -CompletableFuture readFuture = rh.read(startEntryId, endEntryId); - -// option 1: you can wait for read to complete synchronously -try { - LedgerEntries entries = FutureUtils.result(readFuture.get()); -} catch (BKException bke) { - // error handling -} - -// option 2: you can process the result and exception asynchronously -readFuture - .thenApply(entries -> { - // process the result - }) - .exceptionally(cause -> { - // handle the exception - }) - -// option 3: bookkeeper provides a twitter-future-like event listener for processing result and exception asynchronously -readFuture.whenComplete(new FutureEventListener<>() { - @Override - public void onSuccess(LedgerEntries entries) { - // process the result - } - @Override - public void onFailure(Throwable cause) { - // handle the exception - } -}); -``` - -Once you are done with processing the [`LedgerEntries`](../javadoc/org/apache/bookkeeper/client/api/LedgerEntries), you can call `#close()` on the `LedgerEntries` instance to -release the buffers held by it. - -Applications are allowed to read any entries between `0` and [`LastAddConfirmed`](../javadoc/org/apache/bookkeeper/client/api/ReadHandle.html#getLastAddConfirmed). If the applications -attempts to read entries beyond `LastAddConfirmed`, they will receive [`IncorrectParameterException`](../javadoc/org/apache/bookkeeper/client/api/BKException.Code#IncorrectParameterException). - -### Read unconfirmed entries from ledgers - -`readUnconfirmed` is provided the mechanism for applications to read entries beyond `LastAddConfirmed`. Applications should be aware of `readUnconfirmed` doesn't provide any -repeatable read consistency. - -```java -CompletableFuture readFuture = rh.readUnconfirmed(startEntryId, endEntryId); -``` - -### Tailing Reads - -There are two methods for applications to achieve tailing reads: `Polling` and `Long-Polling`. - -#### Polling - -You can do this in synchronous way: - -```java -ReadHandle rh = ...; - -long startEntryId = 0L; -long nextEntryId = startEntryId; -int numEntriesPerBatch = 4; -while (!rh.isClosed() || nextEntryId <= rh.getLastAddConfirmed()) { - long lac = rh.getLastAddConfirmed(); - if (nextEntryId > lac) { - // no more entries are added - Thread.sleep(1000); - - lac = rh.readLastAddConfirmed().get(); - continue; - } - - long endEntryId = Math.min(lac, nextEntryId + numEntriesPerBatch - 1); - LedgerEntries entries = rh.read(nextEntryId, endEntryId).get(); - - // process the entries - - nextEntryId = endEntryId + 1; -} -``` - -#### Long Polling - -```java -ReadHandle rh = ...; - -long startEntryId = 0L; -long nextEntryId = startEntryId; -int numEntriesPerBatch = 4; -while (!rh.isClosed() || nextEntryId <= rh.getLastAddConfirmed()) { - long lac = rh.getLastAddConfirmed(); - if (nextEntryId > lac) { - // no more entries are added - try (LastConfirmedAndEntry lacAndEntry = rh.readLastAddConfirmedAndEntry(nextEntryId, 1000, false).get()) { - if (lacAndEntry.hasEntry()) { - // process the entry - - ++nextEntryId; - } - } - } else { - long endEntryId = Math.min(lac, nextEntryId + numEntriesPerBatch - 1); - LedgerEntries entries = rh.read(nextEntryId, endEntryId).get(); - - // process the entries - nextEntryId = endEntryId + 1; - } -} -``` - -### Delete ledgers - -{% pop Ledgers %} can be deleted by using [`DeleteBuilder`](../javadoc/org/apache/bookkeeper/client/api/DeleteBuilder). - -```java -BookKeeper bk = ...; -long ledgerId = ...; - -bk.newDeleteLedgerOp() - .withLedgerId(ledgerId) - .execute() - .get(); -``` - -### Relaxing Durability - -In BookKeeper by default each write will be acklowledged to the client if and only if it has been persisted durably (fsync called on the file system) by a quorum of bookies. -In this case the LastAddConfirmed pointer is updated on the writer side, this is the guarantee for the writer that data will not be lost and it will -be always readable by other clients. - -On the client side you can temporary relax this constraint by using the [`DEFERRED_SYNC`](../javadoc/org/apache/bookkeeper/client/api/WriteFlag) Write flag. Using this flag bookies will acknowledge each entry after -writing the entry to SO buffers without waiting for an fsync. -In this case the LastAddConfirmed pointer is not advanced to the writer side neither is updated on the reader's side, this is because **there is some chance to lose the entry**. -Such entries will be still readable using readUnconfirmed() API, but they won't be readable using Long Poll reads or regular read() API. - -In order to get guarantees of durability the writer must use explicitly the [force()](../javadoc/org/apache/bookkeeper/client/api/ForceableHandle) API which will return only after all the bookies in the ensemble ackknowledge the call after -performing an fsync to the disk which is storing the journal. -This way the LastAddConfirmed pointer is advanced on the writer side and it will be eventually available to the readers. - -The *close()* operation on the writer writes on ledger's metadata the current LastAddConfirmed pointer, **it is up to the application to call force() before issuing the close command**. -In case that you never call explicitly [force()](../javadoc/org/apache/bookkeeper/client/api/ForceableHandle) the LastAddConfirmed will remain unset (-1) on ledger metadata and regular readers won't be able to access data. - - -```java -BookKeeper bk = ...; -long ledgerId = ...; - -WriteHandle wh = bk.newCreateLedgerOp() - .withDigestType(DigestType.CRC32) - .withPassword(password) - .withEnsembleSize(3) - .withWriteQuorumSize(3) - .withAckQuorumSize(2) - .withWriteFlags(DEFERRED_SYNC) - .execute() // execute the creation op - .get(); // wait for the execution to complete - - -wh.force().get(); // wait for fsync, make data available to readers and to the replicator - -wh.close(); // seal the ledger - -``` diff --git a/site/docs/latest/api/overview.md b/site/docs/latest/api/overview.md deleted file mode 100644 index 3e0adcd61af..00000000000 --- a/site/docs/latest/api/overview.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: BookKeeper API ---- - -BookKeeper offers a few APIs that applications can use to interact with it: - -* The [ledger API](../ledger-api) is a lower-level API that enables you to interact with {% pop ledgers %} directly -* The [Ledger Advanced API](../ledger-adv-api) is an advanced extension to [Ledger API](../ledger-api) to provide more flexibilities to applications. -* The [DistributedLog API](../distributedlog-api) is a higher-level API that provides convenient abstractions. - -## Trade-offs - -The `Ledger API` provides direct access to ledgers and thus enables you to use BookKeeper however you'd like. - -However, in most of use cases, if you want a `log stream`-like abstraction, it requires you to manage things like tracking list of ledgers, -managing rolling ledgers and data retention on your own. In such cases, you are recommended to use [DistributedLog API](../distributedlog-api), -with semantics resembling continous log streams from the standpoint of applications. diff --git a/site/docs/latest/deployment/dcos.md b/site/docs/latest/deployment/dcos.md deleted file mode 100644 index dab023deb7a..00000000000 --- a/site/docs/latest/deployment/dcos.md +++ /dev/null @@ -1,142 +0,0 @@ ---- -title: Deploying BookKeeper on DC/OS -subtitle: Get up and running easily on an Apache Mesos cluster -logo: img/dcos-logo.png ---- - -[DC/OS](https://dcos.io/) (the DataCenter Operating System) is a distributed operating system used for deploying and managing applications and systems on [Apache Mesos](http://mesos.apache.org/). DC/OS is an open-source tool created and maintained by [Mesosphere](https://mesosphere.com/). - -BookKeeper is available as a [DC/OS package](http://universe.dcos.io/#/package/bookkeeper/version/latest) from the [Mesosphere DC/OS Universe](http://universe.dcos.io/#/packages). - -## Prerequisites - -In order to run BookKeeper on DC/OS, you will need: - -* DC/OS version [1.8](https://dcos.io/docs/1.8/) or higher -* A DC/OS cluster with at least three nodes -* The [DC/OS CLI tool](https://dcos.io/docs/1.8/usage/cli/install/) installed - -Each node in your DC/OS-managed Mesos cluster must have at least: - -* 1 CPU -* 1 GB of memory -* 10 GB of total persistent disk storage - -## Installing BookKeeper - -```shell -$ dcos package install bookkeeper --yes -``` - -This command will: - -* Install the `bookkeeper` subcommand for the `dcos` CLI tool -* Start a single {% pop bookie %} on the Mesos cluster with the [default configuration](../../reference/config) - -The bookie that is automatically started up uses the host mode of the network and by default exports the service at `agent_ip:3181`. - -> If you run `dcos package install bookkeeper` without setting the `--yes` flag, the install will run in interactive mode. For more information on the `package install` command, see the [DC/OS docs](https://docs.mesosphere.com/latest/cli/command-reference/dcos-package/dcos-package-install/). - -### Services - -To watch BookKeeper start up, click on the **Services** tab in the DC/OS [user interface](https://docs.mesosphere.com/latest/gui/) and you should see the `bookkeeper` package listed: - -![DC/OS services]({{ site.baseurl }}img/dcos/services.png) - -### Tasks - -To see which tasks have started, click on the `bookkeeper` service and you'll see an interface that looks like this; - -![DC/OS tasks]({{ site.baseurl }}img/dcos/tasks.png) - -## Scaling BookKeeper - -Once the first {% pop bookie %} has started up, you can click on the **Scale** tab to scale up your BookKeeper ensemble by adding more bookies (or scale down the ensemble by removing bookies). - -![DC/OS scale]({{ site.baseurl }}img/dcos/scale.png) - -## ZooKeeper Exhibitor - -ZooKeeper contains the information for all bookies in the ensemble. When deployed on DC/OS, BookKeeper uses a ZooKeeper instance provided by DC/OS. You can access a visual UI for ZooKeeper using [Exhibitor](https://github.com/soabase/exhibitor/wiki), which is available at [http://master.dcos/exhibitor](http://master.dcos/exhibitor). - -![ZooKeeper Exhibitor]({{ site.baseurl }}img/dcos/exhibitor.png) - -You should see a listing of IP/host information for all bookies under the `messaging/bookkeeper/ledgers/available` node. - -## Client connections - -To connect to bookies running on DC/OS using clients running within your Mesos cluster, you need to specify the ZooKeeper connection string for DC/OS's ZooKeeper cluster: - -``` -master.mesos:2181 -``` - -This is the *only* ZooKeeper host/port you need to include in your connection string. Here's an example using the [Java client](../../api/ledger-api#the-java-ledger-api-client): - -```java -BookKeeper bkClient = new BookKeeper("master.mesos:2181"); -``` - -If you're connecting using a client running outside your Mesos cluster, you need to supply the public-facing connection string for your DC/OS ZooKeeper cluster. - -## Configuring BookKeeper - -By default, the `bookkeeper` package will start up a BookKeeper ensemble consisting of one {% pop bookie %} with one CPU, 1 GB of memory, and a 70 MB persistent volume. - -You can supply a non-default configuration when installing the package using a JSON file. Here's an example command: - -```shell -$ dcos package install bookkeeper \ - --options=/path/to/config.json -``` - -You can then fetch the current configuration for BookKeeper at any time using the `package describe` command: - -```shell -$ dcos package describe bookkeeper \ - --config -``` - -### Available parameters - -> Not all [configurable parameters](../../reference/config) for BookKeeper are available for BookKeeper on DC/OS. Only the parameters show in the table below are available. - -Param | Type | Description | Default -:-----|:-----|:------------|:------- -`name` | String | The name of the DC/OS service. | `bookkeeper` -`cpus` | Integer | The number of CPU shares to allocate to each {% pop bookie %}. The minimum is 1. | `1` | -`instances` | Integer | The number of {% pop bookies %} top run. The minimum is 1. | `1` -`mem` | Number | The memory, in MB, to allocate to each BookKeeper task | `1024.0` (1 GB) -`volume_size` | Number | The persistent volume size, in MB | `70` -`zk_client` | String | The connection string for the ZooKeeper client instance | `master.mesos:2181` -`service_port` | Integer | The BookKeeper export service port, using `PORT0` in Marathon | `3181` - -### Example JSON configuration - -Here's an example JSON configuration object for BookKeeper on DC/OS: - -```json -{ - "instances": 5, - "cpus": 3, - "mem": 2048.0, - "volume_size": 250 -} -``` - -If that configuration were stored in a file called `bk-config.json`, you could apply that configuration upon installating the BookKeeper package using this command: - -```shell -$ dcos package install bookkeeper \ - --options=./bk-config.json -``` - -## Uninstalling BookKeeper - -You can shut down and uninstall the `bookkeeper` from DC/OS at any time using the `package uninstall` command: - -```shell -$ dcos package uninstall bookkeeper -Uninstalled package [bookkeeper] version [{{ site.latest_version }}] -Thank you for using bookkeeper. -``` diff --git a/site/docs/latest/deployment/kubernetes.md b/site/docs/latest/deployment/kubernetes.md deleted file mode 100644 index 0f113169edc..00000000000 --- a/site/docs/latest/deployment/kubernetes.md +++ /dev/null @@ -1,181 +0,0 @@ ---- -title: Deploying Apache BookKeeper on Kubernetes -tags: [Kubernetes, Google Container Engine] -logo: img/kubernetes-logo.png ---- - -Apache BookKeeper can be easily deployed in [Kubernetes](https://kubernetes.io/) clusters. The managed clusters on [Google Container Engine](https://cloud.google.com/compute/) is the most convenient way. - -The deployment method shown in this guide relies on [YAML](http://yaml.org/) definitions for Kubernetes [resources](https://kubernetes.io/docs/resources-reference/v1.6/). The [`kubernetes`](https://github.com/apache/bookkeeper/tree/master/deploy/kubernetes) subdirectory holds resource definitions for: - -* A three-node ZooKeeper cluster -* A BookKeeper cluster with a bookie runs on each node. - -## Setup on Google Container Engine - -To get started, get source code of [`kubernetes`](https://github.com/apache/bookkeeper/tree/master/deploy/kubernetes) from github by git clone. - -If you'd like to change the number of bookies, or ZooKeeper nodes in your BookKeeper cluster, modify the `replicas` parameter in the `spec` section of the appropriate [`Deployment`](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/) or [`StatefulSet`](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/) resource. - -[Google Container Engine](https://cloud.google.com/container-engine) (GKE) automates the creation and management of Kubernetes clusters in [Google Compute Engine](https://cloud.google.com/compute/) (GCE). - -### Prerequisites - -To get started, you'll need: - -* A Google Cloud Platform account, which you can sign up for at [cloud.google.com](https://cloud.google.com) -* An existing Cloud Platform project -* The [Google Cloud SDK](https://cloud.google.com/sdk/downloads) (in particular the [`gcloud`](https://cloud.google.com/sdk/gcloud/) and [`kubectl`]() tools). - -### Create a new Kubernetes cluster - -You can create a new GKE cluster using the [`container clusters create`](https://cloud.google.com/sdk/gcloud/reference/container/clusters/create) command for `gcloud`. This command enables you to specify the number of nodes in the cluster, the machine types of those nodes, and more. - -As an example, we'll create a new GKE cluster for Kubernetes version [1.6.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG.md#v164) in the [us-central1-a](https://cloud.google.com/compute/docs/regions-zones/regions-zones#available) zone. The cluster will be named `bookkeeper-gke-cluster` and will consist of three VMs, each using two locally attached SSDs and running on [n1-standard-8](https://cloud.google.com/compute/docs/machine-types) machines. These SSDs will be used by Bookie instances, one for the BookKeeper journal and the other for storing the actual data. - -```bash -$ gcloud config set compute/zone us-central1-a -$ gcloud config set project your-project-name -$ gcloud container clusters create bookkeeper-gke-cluster \ - --machine-type=n1-standard-8 \ - --num-nodes=3 \ - --local-ssd-count=2 \ - --enable-kubernetes-alpha -``` - -By default, bookies will run on all the machines that have locally attached SSD disks. In this example, all of those machines will have two SSDs, but you can add different types of machines to the cluster later. You can control which machines host bookie servers using [labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels). - -### Dashboard - -You can observe your cluster in the [Kubernetes Dashboard](https://kubernetes.io/docs/tasks/access-application-cluster/web-ui-dashboard/) by downloading the credentials for your Kubernetes cluster and opening up a proxy to the cluster: - -```bash -$ gcloud container clusters get-credentials bookkeeper-gke-cluster \ - --zone=us-central1-a \ - --project=your-project-name -$ kubectl proxy -``` - -By default, the proxy will be opened on port 8001. Now you can navigate to [localhost:8001/ui](http://localhost:8001/ui) in your browser to access the dashboard. At first your GKE cluster will be empty, but that will change as you begin deploying. - -When you create a cluster, your `kubectl` config in `~/.kube/config` (on MacOS and Linux) will be updated for you, so you probably won't need to change your configuration. Nonetheless, you can ensure that `kubectl` can interact with your cluster by listing the nodes in the cluster: - -```bash -$ kubectl get nodes -``` - -If `kubectl` is working with your cluster, you can proceed to deploy ZooKeeper and Bookies. - -### ZooKeeper - -You *must* deploy ZooKeeper as the first component, as it is a dependency for the others. - -```bash -$ kubectl apply -f zookeeper.yaml -``` - -Wait until all three ZooKeeper server pods are up and have the status `Running`. You can check on the status of the ZooKeeper pods at any time: - -```bash -$ kubectl get pods -l component=zookeeper -NAME READY STATUS RESTARTS AGE -zk-0 1/1 Running 0 18m -zk-1 1/1 Running 0 17m -zk-2 0/1 Running 6 15m -``` - -This step may take several minutes, as Kubernetes needs to download the Docker image on the VMs. - - -If you want to connect to one of the remote zookeeper server, you can use[zk-shell](https://github.com/rgs1/zk_shell), you need to forward a local port to the -remote zookeeper server: - -```bash -$ kubectl port-forward zk-0 2181:2181 -$ zk-shell localhost 2181 -``` - -### Deploy Bookies - -Once ZooKeeper cluster is Running, you can then deploy the bookies. You can deploy the bookies either using a [DaemonSet](https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/) or a [StatefulSet](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/). - -> NOTE: _DaemonSet_ vs _StatefulSet_ -> -> A _DaemonSet_ ensures that all (or some) nodes run a pod of bookie instance. As nodes are added to the cluster, bookie pods are added automatically to them. As nodes are removed from the -> cluster, those bookie pods are garbage collected. The bookies deployed in a DaemonSet stores data on the local disks on those nodes. So it doesn't require any external storage for Persistent -> Volumes. -> -> A _StatefulSet_ maintains a sticky identity for the pods that it runs and manages. It provides stable and unique network identifiers, and stable and persistent storage for each pod. The pods -> are not interchangeable, the idenifiers for each pod are maintained across any rescheduling. -> -> Which one to use? A _DaemonSet_ is the easiest way to deploy a bookkeeper cluster, because it doesn't require additional persistent volume provisioner and use local disks. BookKeeper manages -> the data replication. It maintains the best latency property. However, it uses `hostIP` and `hostPort` for communications between pods. In some k8s platform (such as DC/OS), `hostIP` and -> `hostPort` are not well supported. A _StatefulSet_ is only practical when deploying in a cloud environment or any K8S installation that has persistent volumes available. Also be aware, latency -> can be potentially higher when using persistent volumes, because there is usually built-in replication in the persistent volumes. - -```bash -# deploy bookies in a daemon set -$ kubectl apply -f bookkeeper.yaml - -# deploy bookies in a stateful set -$ kubectl apply -f bookkeeper.stateful.yaml -``` - -You can check on the status of the Bookie pods for these components either in the Kubernetes Dashboard or using `kubectl`: - -```bash -$ kubectl get pods -``` - -While all BookKeeper pods is Running, by zk-shell you could find all available bookies under /ledgers/ - -You could also run a [bookkeeper tutorial](https://github.com/ivankelly/bookkeeper-tutorial/) instance, which named as 'dice' here, in this bookkeeper cluster. - -```bash -$kubectl run -i --tty --attach dice --image=caiok/bookkeeper-tutorial --env ZOOKEEPER_SERVERS="zk-0.zookeeper" -``` - -An example output of Dice instance is like this: -```aidl -➜ $ kubectl run -i --tty --attach dice --image=caiok/bookkeeper-tutorial --env ZOOKEEPER_SERVERS="zk-0.zookeeper" -If you don't see a command prompt, try pressing enter. -Value = 1, epoch = 5, leading -Value = 2, epoch = 5, leading -Value = 1, epoch = 5, leading -Value = 4, epoch = 5, leading -Value = 5, epoch = 5, leading -Value = 4, epoch = 5, leading -Value = 3, epoch = 5, leading -Value = 5, epoch = 5, leading -Value = 3, epoch = 5, leading -Value = 2, epoch = 5, leading -Value = 1, epoch = 5, leading -Value = 4, epoch = 5, leading -Value = 2, epoch = 5, leading -``` - -### Un-Deploy - -Delete Demo dice instance - -```bash -$kubectl delete deployment dice -``` - -Delete BookKeeper -```bash -$ kubectl delete -f bookkeeper.yaml -``` - -Delete ZooKeeper -```bash -$ kubectl delete -f zookeeper.yaml -``` - -Delete cluster -```bash -$ gcloud container clusters delete bookkeeper-gke-cluster -``` - - - diff --git a/site/docs/latest/deployment/manual.md b/site/docs/latest/deployment/manual.md deleted file mode 100644 index daafd5556f5..00000000000 --- a/site/docs/latest/deployment/manual.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -title: Manual deployment ---- - -The easiest way to deploy BookKeeper is using schedulers like [DC/OS](../dcos), but you can also deploy BookKeeper clusters manually. A BookKeeper cluster consists of two main components: - -* A [ZooKeeper](#zookeeper-setup) cluster that is used for configuration- and coordination-related tasks -* An [ensemble](#starting-up-bookies) of {% pop bookies %} - -## ZooKeeper setup - -We won't provide a full guide to setting up a ZooKeeper cluster here. We recommend that you consult [this guide](https://zookeeper.apache.org/doc/current/zookeeperAdmin.html) in the official ZooKeeper documentation. - -## Starting up bookies - -Once your ZooKeeper cluster is up and running, you can start up as many {% pop bookies %} as you'd like to form a cluster. Before starting up each bookie, you need to modify the bookie's configuration to make sure that it points to the right ZooKeeper cluster. - -On each bookie host, you need to [download](../../getting-started/installation#download) the BookKeeper package as a tarball. Once you've done that, you need to configure the bookie by setting values in the `bookkeeper-server/conf/bk_server.conf` config file. The one parameter that you will absolutely need to change is the [`zkServers`](../../config#zkServers) parameter, which you will need to set to the ZooKeeper connection string for your ZooKeeper cluster. Here's an example: - -```properties -zkServers=100.0.0.1:2181,100.0.0.2:2181,100.0.0.3:2181 -``` - -> A full listing of configurable parameters available in `bookkeeper-server/conf/bk_server.conf` can be found in the [Configuration](../../reference/config) reference manual. - -Once the bookie's configuration is set, you can start it up using the [`bookie`](../../reference/cli#bookkeeper-bookie) command of the [`bookkeeper`](../../reference/cli#bookkeeper) CLI tool: - -```shell -$ bookkeeper-server/bin/bookkeeper bookie -``` - -> You can also build BookKeeper [by cloning it from source](../../getting-started/installation#clone) or [using Maven](../../getting-started/installation#build-using-maven). - -### System requirements - -{% include system-requirements.md %} - -## Cluster metadata setup - -Once you've started up a cluster of bookies, you need to set up cluster metadata for the cluster by running the following command from any bookie in the cluster: - -```shell -$ bookkeeper-server/bin/bookkeeper shell metaformat -``` - -You can run in the formatting - -> The `metaformat` command performs all the necessary ZooKeeper cluster metadata tasks and thus only needs to be run *once* and from *any* bookie in the BookKeeper cluster. - -Once cluster metadata formatting has been completed, your BookKeeper cluster is ready to go! - - diff --git a/site/docs/latest/development/codebase.md b/site/docs/latest/development/codebase.md deleted file mode 100644 index 9a83073ea4c..00000000000 --- a/site/docs/latest/development/codebase.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: The BookKeeper codebase ---- diff --git a/site/docs/latest/development/protocol.md b/site/docs/latest/development/protocol.md deleted file mode 100644 index 6d17aa0ed45..00000000000 --- a/site/docs/latest/development/protocol.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: The BookKeeper protocol ---- - -BookKeeper uses a special replication protocol for guaranteeing persistent storage of entries in an ensemble of bookies. - -> This document assumes that you have some knowledge of leader election and log replication and how these can be used in a distributed system. If not, we recommend reading the [example application](../../api/ledger-api#example-application) documentation first. - -## Ledgers - -{% pop Ledgers %} are the basic building block of BookKeeper and the level at which BookKeeper makes its persistent storage guarantees. A replicated log consists of an ordered list of ledgers. See [Ledgers to logs](#ledgers-to-logs) for info on building a replicated log from ledgers. - -Ledgers are composed of metadata and {% pop entries %}. The metadata is stored in ZooKeeper, which provides a *compare-and-swap* (CAS) operation. Entries are stored on storage nodes known as {% pop bookies %}. - -A ledger has a single writer and multiple readers (SWMR). - -### Ledger metadata - -A ledger's metadata contains the following: - -Parameter | Name | Meaning -:---------|:-----|:------- -Identifer | | A 64-bit integer, unique within the system -Ensemble size | **E** | The number of nodes the ledger is stored on -Write quorum size | **Qw** | The number of nodes each entry is written to. In effect, the max replication for the entry. -Ack quorum size | **Qa** | The number of nodes an entry must be acknowledged on. In effect, the minimum replication for the entry. -Current state | | The current status of the ledger. One of `OPEN`, `CLOSED`, or `IN_RECOVERY`. -Last entry | | The last entry in the ledger or `NULL` is the current state is not `CLOSED`. - -In addition, each ledger's metadata consists of one or more *fragments*. Each fragment is either - -* the first entry of a fragment or -* a list of bookies for the fragment. - -When creating a ledger, the following invariant must hold: - -**E >= Qw >= Qa** - -Thus, the ensemble size (**E**) must be larger than the write quorum size (**Qw**), which must in turn be larger than the ack quorum size (**Qa**). If that condition does not hold, then the ledger creation operation will fail. - -### Ensembles - -When a ledger is created, **E** bookies are chosen for the entries of that ledger. The bookies are the initial ensemble of the ledger. A ledger can have multiple ensembles, but an entry has only one ensemble. Changes in the ensemble involve a new fragment being added to the ledger. - -Take the following example. In this ledger, with ensemble size of 3, there are two fragments and thus two ensembles, one starting at entry 0, the second at entry 12. The second ensemble differs from the first only by its first element. This could be because bookie1 has failed and therefore had to be replaced. - -First entry | Bookies -:-----------|:------- -0 | B1, B2, B3 -12 | B4, B2, B3 - -### Write quorums - -Each entry in the log is written to **Qw** nodes. This is considered the write quorum for that entry. The write quorum is the subsequence of the ensemble, **Qw** in length, and starting at the bookie at index (entryid % **E**). - -For example, in a ledger of **E** = 4, **Qw**, and **Qa** = 2, with an ensemble consisting of B1, B2, B3, and B4, the write quorums for the first 6 entries will be: - -Entry | Write quorum -:-----|:------------ -0 | B1, B2, B3 -1 | B2, B3, B4 -2 | B3, B4, B1 -3 | B4, B1, B2 -4 | B1, B2, B3 -5 | B2, B3, B4 - -There are only **E** distinct write quorums in any ensemble. If **Qw** = **Qa**, then there is only one, as no striping occurs. - -### Ack quorums - -The ack quorum for an entry is any subset of the write quorum of size **Qa**. If **Qa** bookies acknowledge an entry, it means it has been fully replicated. - -### Guarantees - -The system can tolerate **Qa** – 1 failures without data loss. - -Bookkeeper guarantees that: - -1. All updates to a ledger will be read in the same order as they were written. -2. All clients will read the same sequence of updates from the ledger. - -## Writing to ledgers - -writer, ensuring that entry ids are sequential is trivial. A bookie acknowledges a write once it has been persisted to disk and is therefore durable. Once **Qa** bookies from the write quorum acknowledge the write, the write is acknowledged to the client, but only if all entries with lower entry ids in the ledger have already been acknowledged to the client. - -The entry written contains the ledger id, the entry id, the last add confirmed and the payload. The last add confirmed is the last entry which had been acknowledged to the client when this entry was written. Sending this with the entry speeds up recovery of the ledger in the case that the writer crashes. - -Another client can also read entries in the ledger up as far as the last add confirmed, as we guarantee that all entries thus far have been replicated on Qa nodes, and therefore all future readers will be able to also read it. However, to read like this, the ledger should be opened with a non-fencing open. Otherwise, it would kill the writer. - -If a node fails to acknowledge a write, the writer will create a new ensemble by replacing the failed node in the current ensemble. It creates a new fragment with this ensemble, starting from the first message that has not been acknowledged to the client. Creating the new fragment involves making a CAS write to the metadata. If the CAS write fails, someone else has modified something in the ledger metadata. This concurrent modification could have been caused by recovery or {% pop rereplication %}. We reread the metadata. If the state of the ledger is no longer `OPEN`, we send an error to the client for any outstanding writes. Otherwise, we try to replace the failed node again. - -### Closing a ledger as a writer - -Closing a ledger is straightforward for a writer. The writer makes a CAS write to the metadata, changing the state to `CLOSED` and setting the last entry of the ledger to the last entry which we have acknowledged to the client. - -If the CAS write fails, it means someone else has modified the metadata. We reread the metadata, and retry closing as long as the state of the ledger is still `OPEN`. If the state is `IN_RECOVERY` we send an error to the client. If the state is `CLOSED` and the last entry is the same as the last entry we have acknowledged to the client, we complete the close operation successfully. If the last entry is different from what we have acknowledged to the client, we send an error to the client. - -### Closing a ledger as a reader - -A reader can also force a ledger to close. Forcing the ledger to close will prevent any writer from adding new entries to the ledger. This is called {% pop fencing %}. This can occur when a writer has crashed or become unavailable, and a new writer wants to take over writing to the log. The new writer must ensure that it has seen all updates from the previous writer, and prevent the previous writer from making any new updates before making any updates of its own. - -To recover a ledger, we first update the state in the metadata to IN_RECOVERY. We then send a fence message to all the bookies in the last fragment of the ledger. When a bookie receives a fence message for a ledger, the fenced state of the ledger is persisted to disk. Once we receive a response from at least (**Qw** - **Qa**)+1 bookies from each write quorum in the ensemble, the ledger is fenced. - -By ensuring we have received a response from at last (**Qw** - **Qa**) + 1 bookies in each write quorum, we ensure that, if the old writer is alive and tries to add a new entry there will be no write quorum in which Qa bookies will accept the write. If the old writer tries to update the ensemble, it will fail on the CAS metadata write, and then see that the ledger is in IN_RECOVERY state, and that it therefore shouldn’t try to write to it. - -The old writer will be able to write entries to individual bookies (we can’t guarantee that the fence message reaches all bookies), but as it will not be able reach ack quorum, it will not be able to send a success response to its client. The client will get a LedgerFenced error instead. - -It is important to note that when you get a ledger fenced message for an entry, it doesn’t mean that the entry has not been written. It means that the entry may or may not have been written, and this can only be determined after the ledger is recovered. In effect, LedgerFenced should be treated like a timeout. - -Once the ledger is fenced, recovery can begin. Recovery means finding the last entry of the ledger and closing the ledger. To find the last entry of the ledger, the client asks all bookies for the highest last add confirmed value they have seen. It waits until it has received a response at least (**Qw** - **Qa**) + 1 bookies from each write quorum, and takes the highest response as the entry id to start reading forward from. It then starts reading forward in the ledger, one entry at a time, replicating all entries it sees to the entire write quorum for that entry. Once it can no longer read any more entries, it updates the state in the metadata to `CLOSED`, and sets the last entry of the ledger to the last entry it wrote. Multiple readers can try to recovery a ledger at the same time, but as the metadata write is CAS they will all converge on the same last entry of the ledger. - -## Ledgers to logs - -In BookKeeper, {% pop ledgers %} can be used to build a replicated log for your system. All guarantees provided by BookKeeper are at the ledger level. Guarantees on the whole log can be built using the ledger guarantees and any consistent datastore with a compare-and-swap (CAS) primitive. BookKeeper uses ZooKeeper as the datastore but others could theoretically be used. - -A log in BookKeeper is built from some number of ledgers, with a fixed order. A ledger represents a single segment of the log. A ledger could be the whole period that one node was the leader, or there could be multiple ledgers for a single period of leadership. However, there can only ever be one leader that adds entries to a single ledger. Ledgers cannot be reopened for writing once they have been closed/recovered. - -> BookKeeper does *not* provide leader election. You must use a system like ZooKeeper for this. - -In many cases, leader election is really leader suggestion. Multiple nodes could think that they are leader at any one time. It is the job of the log to guarantee that only one can write changes to the system. - -### Opening a log - -Once a node thinks it is leader for a particular log, it must take the following steps: - -1. Read the list of ledgers for the log -1. {% pop Fence %} the last two ledgers in the list. Two ledgers are fenced because because the writer may be writing to the second-to-last ledger while adding the last ledger to the list. -1. Create a new ledger -1. Add the new ledger to the ledger list -1. Write the new ledger back to the datastore using a CAS operation - -The fencing in step 2 and the CAS operation in step 5 prevent two nodes from thinking that they have leadership at any one time. - -The CAS operation will fail if the list of ledgers has changed between reading it and writing back the new list. When the CAS operation fails, the leader must start at step 1 again. Even better, they should check that they are in fact still the leader with the system that is providing leader election. The protocol will work correctly without this step, though it will be able to make very little progress if two nodes think they are leader and are duelling for the log. - -The node must not serve any writes until step 5 completes successfully. - -### Rolling ledgers - -The leader may wish to close the current ledger and open a new one every so often. Ledgers can only be deleted as a whole. If you don't roll the log, you won't be able to clean up old entries in the log without a leader change. By closing the current ledger and adding a new one, the leader allows the log to be truncated whenever that data is no longer needed. The steps for rolling the log is similar to those for creating a new ledger. - -1. Create a new ledger -1. Add the new ledger to the ledger list -1. Write the new ledger list to the datastore using CAS -1. Close the previous ledger - -By deferring the closing of the previous ledger until step 4, we can continue writing to the log while we perform metadata update operations to add the new ledger. This is safe as long as you fence the last 2 ledgers when acquiring leadership. - diff --git a/site/docs/latest/getting-started/concepts.md b/site/docs/latest/getting-started/concepts.md deleted file mode 100644 index 7a3c92847b2..00000000000 --- a/site/docs/latest/getting-started/concepts.md +++ /dev/null @@ -1,202 +0,0 @@ ---- -title: BookKeeper concepts and architecture -subtitle: The core components and how they work -prev: ../run-locally ---- - -BookKeeper is a service that provides persistent storage of streams of log [entries](#entries)---aka *records*---in sequences called [ledgers](#ledgers). BookKeeper replicates stored entries across multiple servers. - -## Basic terms - -In BookKeeper: - -* each unit of a log is an [*entry*](#entries) (aka record) -* streams of log entries are called [*ledgers*](#ledgers) -* individual servers storing ledgers of entries are called [*bookies*](#bookies) - -BookKeeper is designed to be reliable and resilient to a wide variety of failures. Bookies can crash, corrupt data, or discard data, but as long as there are enough bookies behaving correctly in the ensemble the service as a whole will behave correctly. - -## Entries - -> **Entries** contain the actual data written to ledgers, along with some important metadata. - -BookKeeper entries are sequences of bytes that are written to [ledgers](#ledgers). Each entry has the following fields: - -Field | Java type | Description -:-----|:----------|:----------- -Ledger number | `long` | The ID of the ledger to which the entry has been written -Entry number | `long` | The unique ID of the entry -Last confirmed (LC) | `long` | The ID of the last recorded entry -Data | `byte[]` | The entry's data (written by the client application) -Authentication code | `byte[]` | The message auth code, which includes *all* other fields in the entry - -## Ledgers - -> **Ledgers** are the basic unit of storage in BookKeeper. - -Ledgers are sequences of entries, while each entry is a sequence of bytes. Entries are written to a ledger: - -* sequentially, and -* at most once. - -This means that ledgers have *append-only* semantics. Entries cannot be modified once they've been written to a ledger. Determining the proper write order is the responsbility of [client applications](#clients). - -## Clients and APIs - -> BookKeeper clients have two main roles: they create and delete ledgers, and they read entries from and write entries to ledgers. -> -> BookKeeper provides both a lower-level and a higher-level API for ledger interaction. - -There are currently two APIs that can be used for interacting with BookKeeper: - -* The [ledger API](../../api/ledger-api) is a lower-level API that enables you to interact with {% pop ledgers %} directly. -* The [DistributedLog API](../../api/distributedlog-api) is a higher-level API that enables you to use BookKeeper without directly interacting with ledgers. - -In general, you should choose the API based on how much granular control you need over ledger semantics. The two APIs can also both be used within a single application. - -## Bookies - -> **Bookies** are individual BookKeeper servers that handle ledgers (more specifically, fragments of ledgers). Bookies function as part of an ensemble. - -A bookie is an individual BookKeeper storage server. Individual bookies store fragments of ledgers, not entire ledgers (for the sake of performance). For any given ledger **L**, an *ensemble* is the group of bookies storing the entries in **L**. - -Whenever entries are written to a ledger, those entries are {% pop striped %} across the ensemble (written to a sub-group of bookies rather than to all bookies). - -### Motivation - -> BookKeeper was initially inspired by the NameNode server in HDFS but its uses now extend far beyond this. - -The initial motivation for BookKeeper comes from the [Hadoop](http://hadoop.apache.org/) ecosystem. In the [Hadoop Distributed File System](https://wiki.apache.org/hadoop/HDFS) (HDFS), a special node called the [NameNode](https://wiki.apache.org/hadoop/NameNode) logs all operations in a reliable fashion, which ensures that recovery is possible in case of crashes. - -The NameNode, however, served only as initial inspiration for BookKeeper. The applications for BookKeeper extend far beyond this and include essentially any application that requires an append-based storage system. BookKeeper provides a number of advantages for such applications: - -* Highly efficient writes -* High fault tolerance via replication of messages within ensembles of bookies -* High throughput for write operations via {% pop striping %} (across as many bookies as you wish) - -## Metadata storage - -BookKeeper requires a metadata storage service to store information related to [ledgers](#ledgers) and available bookies. BookKeeper currently uses [ZooKeeper](https://zookeeper.apache.org) for this and other tasks. - -## Data management in bookies - -Bookies manage data in a [log-structured](https://en.wikipedia.org/wiki/Log-structured_file_system) way, which is implemented using three types of files: - -* [journals](#journals) -* [entry logs](#entry-logs) -* [index files](#index-files) - -### Journals - -A journal file contains BookKeeper transaction logs. Before any update to a ledger takes place, the bookie ensures that a transaction describing the update is written to non-volatile storage. A new journal file is created once the bookie starts or the older journal file reaches the journal file size threshold. - -### Entry logs - -An entry log file manages the written entries received from BookKeeper clients. Entries from different ledgers are aggregated and written sequentially, while their offsets are kept as pointers in a [ledger cache](#ledger-cache) for fast lookup. - -A new entry log file is created once the bookie starts or the older entry log file reaches the entry log size threshold. Old entry log files are removed by the Garbage Collector Thread once they are not associated with any active ledger. - -### Index files - -An index file is created for each ledger, which comprises a header and several fixed-length index pages that record the offsets of data stored in entry log files. - -Since updating index files would introduce random disk I/O index files are updated lazily by a sync thread running in the background. This ensures speedy performance for updates. Before index pages are persisted to disk, they are gathered in a ledger cache for lookup. - -### Ledger cache - -Ledger indexes pages are cached in a memory pool, which allows for more efficient management of disk head scheduling. - -### Adding entries - -When a client instructs a {% pop bookie %} to write an entry to a ledger, the entry will go through the following steps to be persisted on disk: - -1. The entry is appended to an [entry log](#entry-logs) -1. The index of the entry is updated in the [ledger cache](#ledger-cache) -1. A transaction corresponding to this entry update is appended to the [journal](#journals) -1. A response is sent to the BookKeeper client - -> For performance reasons, the entry log buffers entries in memory and commits them in batches, while the ledger cache holds index pages in memory and flushes them lazily. This process is described in more detail in the [Data flush](#data-flush) section below. - -### Data flush - -Ledger index pages are flushed to index files in the following two cases: - -* The ledger cache memory limit is reached. There is no more space available to hold newer index pages. Dirty index pages will be evicted from the ledger cache and persisted to index files. -* A background thread synchronous thread is responsible for flushing index pages from the ledger cache to index files periodically. - -Besides flushing index pages, the sync thread is responsible for rolling journal files in case that journal files use too much disk space. The data flush flow in the sync thread is as follows: - -* A `LastLogMark` is recorded in memory. The `LastLogMark` indicates that those entries before it have been persisted (to both index and entry log files) and contains two parts: - 1. A `txnLogId` (the file ID of a journal) - 1. A `txnLogPos` (offset in a journal) -* Dirty index pages are flushed from the ledger cache to the index file, and entry log files are flushed to ensure that all buffered entries in entry log files are persisted to disk. - - Ideally, a bookie only needs to flush index pages and entry log files that contain entries before `LastLogMark`. There is, however, no such information in the ledger and entry log mapping to journal files. Consequently, the thread flushes the ledger cache and entry log entirely here, and may flush entries after the `LastLogMark`. Flushing more is not a problem, though, just redundant. -* The `LastLogMark` is persisted to disk, which means that entries added before `LastLogMark` whose entry data and index page were also persisted to disk. It is now time to safely remove journal files created earlier than `txnLogId`. - -If the bookie has crashed before persisting `LastLogMark` to disk, it still has journal files containing entries for which index pages may not have been persisted. Consequently, when this bookie restarts, it inspects journal files to restore those entries and data isn't lost. - -Using the above data flush mechanism, it is safe for the sync thread to skip data flushing when the bookie shuts down. However, in the entry logger it uses a buffered channel to write entries in batches and there might be data buffered in the buffered channel upon a shut down. The bookie needs to ensure that the entry log flushes its buffered data during shutdown. Otherwise, entry log files become corrupted with partial entries. - -### Data compaction - -On bookies, entries of different ledgers are interleaved in entry log files. A bookie runs a garbage collector thread to delete un-associated entry log files to reclaim disk space. If a given entry log file contains entries from a ledger that has not been deleted, then the entry log file would never be removed and the occupied disk space never reclaimed. In order to avoid such a case, a bookie server compacts entry log files in a garbage collector thread to reclaim disk space. - -There are two kinds of compaction running with different frequency: minor compaction and major compaction. The differences between minor compaction and major compaction lies in their threshold value and compaction interval. - -* The garbage collection threshold is the size percentage of an entry log file occupied by those undeleted ledgers. The default minor compaction threshold is 0.2, while the major compaction threshold is 0.8. -* The garbage collection interval is how frequently to run the compaction. The default minor compaction interval is 1 hour, while the major compaction threshold is 1 day. - -> If either the threshold or interval is set to less than or equal to zero, compaction is disabled. - -The data compaction flow in the garbage collector thread is as follows: - -* The thread scans entry log files to get their entry log metadata, which records a list of ledgers comprising an entry log and their corresponding percentages. -* With the normal garbage collection flow, once the bookie determines that a ledger has been deleted, the ledger will be removed from the entry log metadata and the size of the entry log reduced. -* If the remaining size of an entry log file reaches a specified threshold, the entries of active ledgers in the entry log will be copied to a new entry log file. -* Once all valid entries have been copied, the old entry log file is deleted. - -## ZooKeeper metadata - -BookKeeper requires a ZooKeeper installation for storing [ledger](#ledger) metadata. Whenever you construct a [`BookKeeper`](../../api/javadoc/org/apache/bookkeeper/client/BookKeeper) client object, you need to pass a list of ZooKeeper servers as a parameter to the constructor, like this: - -```java -String zkConnectionString = "127.0.0.1:2181"; -BookKeeper bkClient = new BookKeeper(zkConnectionString); -``` - -> For more info on using the BookKeeper Java client, see [this guide](../../api/ledger-api#the-java-ledger-api-client). - -## Ledger manager - -A *ledger manager* handles ledgers' metadata (which is stored in ZooKeeper). BookKeeper offers two types of ledger managers: the [flat ledger manager](#flat-ledger-manager) and the [hierarchical ledger manager](#hierarchical-ledger-manager). Both ledger managers extend the [`AbstractZkLedgerManager`](../../api/javadoc/org/apache/bookkeeper/meta/AbstractZkLedgerManager) abstract class. - -> #### Use the flat ledger manager in most cases -> The flat ledger manager is the default and is recommended for nearly all use cases. The hierarchical ledger manager is better suited only for managing very large numbers of BookKeeper ledgers (> 50,000). - -### Flat ledger manager - -The *flat ledger manager*, implemented in the [`FlatLedgerManager`](../../api/javadoc/org/apache/bookkeeper/meta/FlatLedgerManager.html) class, stores all ledgers' metadata in child nodes of a single ZooKeeper path. The flat ledger manager creates [sequential nodes](https://zookeeper.apache.org/doc/trunk/zookeeperProgrammers.html#Sequence+Nodes+--+Unique+Naming) to ensure the uniqueness of the ledger ID and prefixes all nodes with `L`. Bookie servers manage their own active ledgers in a hash map so that it's easy to find which ledgers have been deleted from ZooKeeper and then garbage collect them. - -The flat ledger manager's garbage collection follow proceeds as follows: - -* All existing ledgers are fetched from ZooKeeper (`zkActiveLedgers`) -* All ledgers currently active within the bookie are fetched (`bkActiveLedgers`) -* The currently actively ledgers are looped through to determine which ledgers don't currently exist in ZooKeeper. Those are then garbage collected. -* The *hierarchical ledger manager* stores ledgers' metadata in two-level [znodes](https://zookeeper.apache.org/doc/current/zookeeperOver.html#Nodes+and+ephemeral+nodes). - -### Hierarchical ledger manager - -The *hierarchical ledger manager*, implemented in the [`HierarchicalLedgerManager`](../../api/javadoc/org/apache/bookkeeper/meta/HierarchicalLedgerManager) class, first obtains a global unique ID from ZooKeeper using an [`EPHEMERAL_SEQUENTIAL`](https://zookeeper.apache.org/doc/current/api/org/apache/zookeeper/CreateMode.html#EPHEMERAL_SEQUENTIAL) znode. Since ZooKeeper's sequence counter has a format of `%10d` (10 digits with 0 padding, for example `0000000001`), the hierarchical ledger manager splits the generated ID into 3 parts: - -```shell -{level1 (2 digits)}{level2 (4 digits)}{level3 (4 digits)} -``` - -These three parts are used to form the actual ledger node path to store ledger metadata: - -```shell -{ledgers_root_path}/{level1}/{level2}/L{level3} -``` - -For example, ledger 0000000001 is split into three parts, 00, 0000, and 00001, and stored in znode `/{ledgers_root_path}/00/0000/L0001`. Each znode could have as many 10,000 ledgers, which avoids the problem of the child list being larger than the maximum ZooKeeper packet size (which is the [limitation](https://issues.apache.org/jira/browse/BOOKKEEPER-39) that initially prompted the creation of the hierarchical ledger manager). diff --git a/site/docs/latest/getting-started/installation.md b/site/docs/latest/getting-started/installation.md deleted file mode 100644 index 9986cd8e043..00000000000 --- a/site/docs/latest/getting-started/installation.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -title: BookKeeper installation -subtitle: Download or clone BookKeeper and build it locally -next: ../run-locally ---- - -{% capture download_url %}http://apache.claz.org/bookkeeper/bookkeeper-{{ site.latest_release }}/bookkeeper-{{ site.latest_release }}-src.tar.gz{% endcapture %} - -You can install BookKeeper either by [downloading](#download) a [GZipped](http://www.gzip.org/) tarball package or [cloning](#clone) the BookKeeper repository. - -## Requirements - -* [Unix environment](http://www.opengroup.org/unix) -* [Java Development Kit 1.6](http://www.oracle.com/technetwork/java/javase/downloads/index.html) or later -* [Maven 3.0](https://maven.apache.org/install.html) or later - -## Download - -You can download Apache BookKeeper releases from one of many [Apache mirrors](http://www.apache.org/dyn/closer.cgi/bookkeeper). Here's an example for the [apache.claz.org](http://apache.claz.org/bookkeeper) mirror: - -```shell -$ curl -O {{ download_url }} -$ tar xvf bookkeeper-{{ site.latest_release }}-src.tar.gz -$ cd bookkeeper-{{ site.latest_release }} -``` - -## Clone - -To build BookKeeper from source, clone the repository, either from the [GitHub mirror]({{ site.github_repo }}) or from the [Apache repository](http://git.apache.org/bookkeeper.git/): - -```shell -# From the GitHub mirror -$ git clone {{ site.github_repo}} - -# From Apache directly -$ git clone git://git.apache.org/bookkeeper.git/ -``` - -## Build using Maven - -Once you have the BookKeeper on your local machine, either by [downloading](#download) or [cloning](#clone) it, you can then build BookKeeper from source using Maven: - -```shell -$ mvn package -``` - -> You can skip tests by adding the `-DskipTests` flag when running `mvn package`. - -### Useful Maven commands - -Some other useful Maven commands beyond `mvn package`: - -Command | Action -:-------|:------ -`mvn clean` | Removes build artifacts -`mvn compile` | Compiles JAR files from Java sources -`mvn compile spotbugs:spotbugs` | Compile using the Maven [SpotBugs](https://github.com/spotbugs/spotbugs-maven-plugin) plugin -`mvn install` | Install the BookKeeper JAR locally in your local Maven cache (usually in the `~/.m2` directory) -`mvn deploy` | Deploy the BookKeeper JAR to the Maven repo (if you have the proper credentials) -`mvn verify` | Performs a wide variety of verification and validation tasks -`mvn apache-rat:check` | Run Maven using the [Apache Rat](http://creadur.apache.org/rat/apache-rat-plugin/) plugin -`mvn compile javadoc:aggregate` | Build Javadocs locally -`mvn package assembly:single` | Build a complete distribution using the Maven [Assembly](http://maven.apache.org/plugins/maven-assembly-plugin/) plugin - -## Package directory - -The BookKeeper project contains several subfolders that you should be aware of: - -Subfolder | Contains -:---------|:-------- -[`bookkeeper-server`]({{ site.github_repo }}/tree/master/bookkeeper-server) | The BookKeeper server and client -[`bookkeeper-benchmark`]({{ site.github_repo }}/tree/master/bookkeeper-benchmark) | A benchmarking suite for measuring BookKeeper performance -[`bookkeeper-stats`]({{ site.github_repo }}/tree/master/bookkeeper-stats) | A BookKeeper stats library -[`bookkeeper-stats-providers`]({{ site.github_repo }}/tree/master/bookkeeper-stats-providers) | BookKeeper stats providers diff --git a/site/docs/latest/getting-started/run-locally.md b/site/docs/latest/getting-started/run-locally.md deleted file mode 100644 index edbfab9fda6..00000000000 --- a/site/docs/latest/getting-started/run-locally.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Run bookies locally -prev: ../installation -next: ../concepts -toc_disable: true ---- - -{% pop Bookies %} are individual BookKeeper servers. You can run an ensemble of bookies locally on a single machine using the [`localbookie`](../../reference/cli#bookkeeper-localbookie) command of the `bookkeeper` CLI tool and specifying the number of bookies you'd like to include in the ensemble. - -This would start up an ensemble with 10 bookies: - -```shell -$ bookkeeper-server/bin/bookkeeper localbookie 10 -``` - -> When you start up an ensemble using `localbookie`, all bookies run in a single JVM process. diff --git a/site/docs/latest/overview/overview.md b/site/docs/latest/overview/overview.md deleted file mode 100644 index 2b89711d117..00000000000 --- a/site/docs/latest/overview/overview.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: Apache BookKeeper™ 4.8.0-SNAPSHOT ---- - - -This documentation is for Apache BookKeeper™ version {{ site.latest_version }}. - -Apache BookKeeper™ is a scalable, fault-tolerant, low-latency storage service optimized for real-time workloads. It offers durability, replication, and strong consistency as essentials for building reliable real-time applications. - -BookKeeper is suitable for a wide variety of use cases, including: - -Use case | Example -:--------|:------- -[WAL](https://en.wikipedia.org/wiki/Write-ahead_logging) (write-ahead logging) | The HDFS [namenode](https://hadoop.apache.org/docs/r2.5.2/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithNFS.html#BookKeeper_as_a_Shared_storage_EXPERIMENTAL) -[WAL](https://en.wikipedia.org/wiki/Write-ahead_logging) (write-ahead logging) | Twitter [Manhattan](https://blog.twitter.com/engineering/en_us/a/2016/strong-consistency-in-manhattan.html) -[WAL](https://en.wikipedia.org/wiki/Write-ahead_logging) (write-ahead logging) | [HerdDB](https://github.com/diennea/herddb) -Message storage | [Apache Pulsar](http://pulsar.incubator.apache.org/docs/latest/getting-started/ConceptsAndArchitecture/#persistent-storage) -Offset/cursor storage | [Apache Pulsar](http://pulsar.incubator.apache.org/docs/latest/getting-started/ConceptsAndArchitecture/#persistent-storage) -Object/[BLOB](https://en.wikipedia.org/wiki/Binary_large_object) storage | Storing snapshots to replicated state machines - -Learn more about Apache BookKeeper™ and what it can do for your organization: - -- [Apache BookKeeper {{ site.latest_version }} Release Notes](../releaseNotes) -- [Java API docs](../../api/javadoc) - -Or start [using](../../getting-started/installation) Apache BookKeeper today. - -### Users - -- **Concepts**: Start with [concepts](../../getting-started/concepts). This will help you to fully understand - the other parts of the documentation, including the setup, integration and operation guides. -- **Getting Started**: Install [Apache BookKeeper](../../getting-started/installation) and run bookies [locally](../../getting-started/run-locally) -- **API**: Read the [API](../../api/overview) documentation to learn how to use Apache BookKeeper to build your applications. -- **Deployment**: The [Deployment Guide](../../deployment/manual) shows how to deploy Apache BookKeeper to production clusters. - -### Administrators - -- **Operations**: The [Admin Guide](../../admin/bookies) shows how to run Apache BookKeeper on production, what are the production - considerations and best practices. - -### Contributors - -- **Details**: Learn [design details](../../development/protocol) to know more internals. diff --git a/site/docs/latest/overview/releaseNotes.md b/site/docs/latest/overview/releaseNotes.md deleted file mode 100644 index 609835da408..00000000000 --- a/site/docs/latest/overview/releaseNotes.md +++ /dev/null @@ -1,18 +0,0 @@ ---- -title: Apache BookKeeper 4.8.0-SNAPSHOT Release Notes ---- - -Apache BookKeeper {{ site.latest_version }} is still under developement. - -If you want to learn the progress of `{{ site.latest_version }}`, you can do: - -- Track the progress by following the [issues](https://github.com/apache/bookkeeper/issues) on Github. -- Or [subscribe](mailto:dev-subscribe@bookkeeper.apache.org) the [dev@bookkeeper.apache.org](mailto:dev@bookkeeper.apache.org) - to join development discussions, propose new ideas and connect with contributors. -- Or [join us on Slack](https://apachebookkeeper.herokuapp.com/) to connect with Apache BookKeeper committers and contributors. - -### Dependencies Changes - -Here is a list of dependencies changed in 4.8.0-SNAPSHOT: - -### Existing API changes diff --git a/site/docs/latest/overview/releaseNotesTemplate.md b/site/docs/latest/overview/releaseNotesTemplate.md deleted file mode 100644 index 4e2dab105a5..00000000000 --- a/site/docs/latest/overview/releaseNotesTemplate.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Apache BookKeeper 4.8.0-SNAPSHOT Release Notes ---- - -[provide a summary of this release] - -Apache BookKeeper users are encouraged to upgrade to {{ site.latest_version }}. The technical details of this release are summarized -below. - -## Highlights - -[List the highlights] - -## Details - -[list to issues list] - diff --git a/site/docs/latest/reference/cli.md b/site/docs/latest/reference/cli.md deleted file mode 100644 index 8beb36ff071..00000000000 --- a/site/docs/latest/reference/cli.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: BookKeeper CLI tool reference -subtitle: A reference guide to the command-line tools that you can use to administer BookKeeper ---- - -{% include cli.html id="bookkeeper" %} - -## The BookKeeper shell - -{% include shell.html %} diff --git a/site/docs/latest/reference/config.md b/site/docs/latest/reference/config.md deleted file mode 100644 index 8997b6b62f0..00000000000 --- a/site/docs/latest/reference/config.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: BookKeeper configuration -subtitle: A reference guide to all of BookKeeper's configurable parameters ---- - - -The table below lists parameters that you can set to configure {% pop bookies %}. All configuration takes place in the `bk_server.conf` file in the `bookkeeper-server/conf` directory of your [BookKeeper installation](../../getting-started/installing). - -{% include config.html id="bk_server" %} diff --git a/site/docs/latest/reference/metrics.md b/site/docs/latest/reference/metrics.md deleted file mode 100644 index 8bd6fe0a165..00000000000 --- a/site/docs/latest/reference/metrics.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: BookKeeper metrics reference ---- diff --git a/site/docs/latest/security/overview.md b/site/docs/latest/security/overview.md deleted file mode 100644 index b825776eb67..00000000000 --- a/site/docs/latest/security/overview.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -title: BookKeeper Security -next: ../tls ---- - -In the 4.5.0 release, the BookKeeper community added a number of features that can be used, together or separately, to secure a BookKeeper cluster. -The following security measures are currently supported: - -1. Authentication of connections to bookies from clients, using either [TLS](../tls) or [SASL (Kerberos)](../sasl). -2. Authentication of connections from clients, bookies, autorecovery daemons to [ZooKeeper](../zookeeper), when using zookeeper based ledger managers. -3. Encryption of data transferred between bookies and clients, between bookies and autorecovery daemons using [TLS](../tls). - -It’s worth noting that security is optional - non-secured clusters are supported, as well as a mix of authenticated, unauthenticated, encrypted and non-encrypted clients. - -NOTE: authorization is not yet available in 4.5.0. The Apache BookKeeper community is looking to add this feature in subsequent releases. - -## Next Steps - -- [Encryption and Authentication using TLS](../tls) -- [Authentication using SASL](../sasl) -- [ZooKeeper Authentication](../zookeeper) diff --git a/site/docs/latest/security/sasl.md b/site/docs/latest/security/sasl.md deleted file mode 100644 index ffb972a8936..00000000000 --- a/site/docs/latest/security/sasl.md +++ /dev/null @@ -1,202 +0,0 @@ ---- -title: Authentication using SASL -prev: ../tls -next: ../zookeeper ---- - -Bookies support client authentication via SASL. Currently we only support GSSAPI (Kerberos). We will start -with a general description of how to configure `SASL` for bookies, clients and autorecovery daemons, followed -by mechanism-specific details and wrap up with some operational details. - -## SASL configuration for Bookies - -1. Select the mechanisms to enable in the bookies. `GSSAPI` is the only mechanism currently supported by BookKeeper. -2. Add a `JAAS` config file for the selected mechanisms as described in the examples for setting up [GSSAPI (Kerberos)](#kerberos). -3. Pass the `JAAS` config file location as JVM parameter to each Bookie. For example: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookie_jaas.conf - ``` - -4. Enable SASL auth plugin in bookies, by setting `bookieAuthProviderFactoryClass` to `org.apache.bookkeeper.sasl.SASLBookieAuthProviderFactory`. - - - ```shell - bookieAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLBookieAuthProviderFactory - ``` - -5. If you are running `autorecovery` along with bookies, then you want to enable SASL auth plugin for `autorecovery`, by setting - `clientAuthProviderFactoryClass` to `org.apache.bookkeeper.sasl.SASLClientProviderFactory`. - - ```shell - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -6. Follow the steps in [GSSAPI (Kerberos)](#kerberos) to configure SASL. - -#### Important Notes - -1. `Bookie` is a section name in the JAAS file used by each bookie. This section tells the bookie which principal to use - and the location of the keytab where the principal is stored. It allows the bookie to login using the keytab specified in this section. -2. `Auditor` is a section name in the JASS file used by `autorecovery` daemon (it can be co-run with bookies). This section tells the - `autorecovery` daemon which principal to use and the location of the keytab where the principal is stored. It allows the bookie to - login using the keytab specified in this section. -3. The `Client` section is used to authenticate a SASL connection with ZooKeeper. It also allows the bookies to set ACLs on ZooKeeper nodes - which locks these nodes down so that only the bookies can modify it. It is necessary to have the same primary name across all bookies. - If you want to use a section name other than `Client`, set the system property `zookeeper.sasl.client` to the appropriate name - (e.g `-Dzookeeper.sasl.client=ZKClient`). -4. ZooKeeper uses `zookeeper` as the service name by default. If you want to change this, set the system property - `zookeeper.sasl.client.username` to the appropriate name (e.g. `-Dzookeeper.sasl.client.username=zk`). - -## SASL configuration for Clients - -To configure `SASL` authentication on the clients: - -1. Select a `SASL` mechanism for authentication and add a `JAAS` config file for the selected mechanism as described in the examples for - setting up [GSSAPI (Kerberos)](#kerberos). -2. Pass the `JAAS` config file location as JVM parameter to each client JVM. For example: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookkeeper_jaas.conf - ``` - -3. Configure the following properties in bookkeeper `ClientConfiguration`: - - ```shell - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -Follow the steps in [GSSAPI (Kerberos)](#kerberos) to configure SASL for the selected mechanism. - -## Authentication using SASL/Kerberos - -### Prerequisites - -#### Kerberos - -If your organization is already using a Kerberos server (for example, by using `Active Directory`), there is no need to -install a new server just for BookKeeper. Otherwise you will need to install one, your Linux vendor likely has packages -for `Kerberos` and a short guide on how to install and configure it ([Ubuntu](https://help.ubuntu.com/community/Kerberos), -[Redhat](https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/6/html/Managing_Smart_Cards/installing-kerberos.html)). -Note that if you are using Oracle Java, you will need to download JCE policy files for your Java version and copy them to `$JAVA_HOME/jre/lib/security`. - -#### Kerberos Principals - -If you are using the organization’s Kerberos or Active Directory server, ask your Kerberos administrator for a principal -for each Bookie in your cluster and for every operating system user that will access BookKeeper with Kerberos authentication -(via clients and tools). - -If you have installed your own Kerberos, you will need to create these principals yourself using the following commands: - -```shell -sudo /usr/sbin/kadmin.local -q 'addprinc -randkey bookkeeper/{hostname}@{REALM}' -sudo /usr/sbin/kadmin.local -q "ktadd -k /etc/security/keytabs/{keytabname}.keytab bookkeeper/{hostname}@{REALM}" -``` - -##### All hosts must be reachable using hostnames - -It is a *Kerberos* requirement that all your hosts can be resolved with their FQDNs. - -### Configuring Bookies - -1. Add a suitably modified JAAS file similar to the one below to each Bookie’s config directory, let’s call it `bookie_jaas.conf` -for this example (note that each bookie should have its own keytab): - - ``` - Bookie { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookie.keytab" - principal="bookkeeper/bk1.hostname.com@EXAMPLE.COM"; - }; - // ZooKeeper client authentication - Client { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookie.keytab" - principal="bookkeeper/bk1.hostname.com@EXAMPLE.COM"; - }; - // If you are running `autorecovery` along with bookies - Auditor { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookie.keytab" - principal="bookkeeper/bk1.hostname.com@EXAMPLE.COM"; - }; - ``` - - The `Bookie` section in the JAAS file tells the bookie which principal to use and the location of the keytab where this principal is stored. - It allows the bookie to login using the keytab specified in this section. See [notes](#notes) for more details on Zookeeper’s SASL configuration. - -2. Pass the name of the JAAS file as a JVM parameter to each Bookie: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookie_jaas.conf - ``` - - You may also wish to specify the path to the `krb5.conf` file - (see [JDK’s Kerberos Requirements](https://docs.oracle.com/javase/8/docs/technotes/guides/security/jgss/tutorials/KerberosReq.html) for more details): - - ```shell - -Djava.security.krb5.conf=/etc/bookkeeper/krb5.conf - ``` - -3. Make sure the keytabs configured in the JAAS file are readable by the operating system user who is starting the Bookies. - -4. Enable SASL authentication plugin in the bookies by setting following parameters. - - ```shell - bookieAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLBookieAuthProviderFactory - # if you run `autorecovery` along with bookies - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -### Configuring Clients - -To configure SASL authentication on the clients: - -1. Clients will authenticate to the cluster with their own principal (usually with the same name as the user running the client), - so obtain or create these principals as needed. Then create a `JAAS` file for each principal. The `BookKeeper` section describes - how the clients like writers and readers can connect to the Bookies. The following is an example configuration for a client using - a keytab (recommended for long-running processes): - - ``` - BookKeeper { - com.sun.security.auth.module.Krb5LoginModule required - useKeyTab=true - storeKey=true - keyTab="/etc/security/keytabs/bookkeeper.keytab" - principal="bookkeeper-client-1@EXAMPLE.COM"; - }; - ``` - - -2. Pass the name of the JAAS file as a JVM parameter to the client JVM: - - ```shell - -Djava.security.auth.login.config=/etc/bookkeeper/bookkeeper_jaas.conf - ``` - - You may also wish to specify the path to the `krb5.conf` file (see - [JDK’s Kerberos Requirements](https://docs.oracle.com/javase/8/docs/technotes/guides/security/jgss/tutorials/KerberosReq.html) for more details). - - ```shell - -Djava.security.krb5.conf=/etc/bookkeeper/krb5.conf - ``` - - -3. Make sure the keytabs configured in the `bookkeeper_jaas.conf` are readable by the operating system user who is starting bookkeeper client. - -4. Enable SASL authentication plugin in the client by setting following parameters. - - ```shell - clientAuthProviderFactoryClass=org.apache.bookkeeper.sasl.SASLClientProviderFactory - ``` - -## Enabling Logging for SASL - -To enable SASL debug output, you can set `sun.security.krb5.debug` system property to `true`. - diff --git a/site/docs/latest/security/tls.md b/site/docs/latest/security/tls.md deleted file mode 100644 index cd250ab2aa5..00000000000 --- a/site/docs/latest/security/tls.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -title: Encryption and Authentication using TLS -prev: ../overview -next: ../sasl ---- - -Apache BookKeeper allows clients and autorecovery daemons to communicate over TLS, although this is not enabled by default. - -## Overview - -The bookies need their own key and certificate in order to use TLS. Clients can optionally provide a key and a certificate -for mutual authentication. Each bookie or client can also be configured with a truststore, which is used to -determine which certificates (bookie or client identities) to trust (authenticate). - -The truststore can be configured in many ways. To understand the truststore, consider the following two examples: - -1. the truststore contains one or many certificates; -2. it contains a certificate authority (CA). - -In (1), with a list of certificates, the bookie or client will trust any certificate listed in the truststore. -In (2), with a CA, the bookie or client will trust any certificate that was signed by the CA in the truststore. - -(TBD: benefits) - -## Generate TLS key and certificate - -The first step of deploying TLS is to generate the key and the certificate for each machine in the cluster. -You can use Java’s `keytool` utility to accomplish this task. We will generate the key into a temporary keystore -initially so that we can export and sign it later with CA. - -```shell -keytool -keystore bookie.keystore.jks -alias localhost -validity {validity} -genkey -``` - -You need to specify two parameters in the above command: - -1. `keystore`: the keystore file that stores the certificate. The *keystore* file contains the private key of - the certificate; hence, it needs to be kept safely. -2. `validity`: the valid time of the certificate in days. - -
          -Ensure that common name (CN) matches exactly with the fully qualified domain name (FQDN) of the server. -The client compares the CN with the DNS domain name to ensure that it is indeed connecting to the desired server, not a malicious one. -
          - -## Creating your own CA - -After the first step, each machine in the cluster has a public-private key pair, and a certificate to identify the machine. -The certificate, however, is unsigned, which means that an attacker can create such a certificate to pretend to be any machine. - -Therefore, it is important to prevent forged certificates by signing them for each machine in the cluster. -A `certificate authority (CA)` is responsible for signing certificates. CA works likes a government that issues passports — -the government stamps (signs) each passport so that the passport becomes difficult to forge. Other governments verify the stamps -to ensure the passport is authentic. Similarly, the CA signs the certificates, and the cryptography guarantees that a signed -certificate is computationally difficult to forge. Thus, as long as the CA is a genuine and trusted authority, the clients have -high assurance that they are connecting to the authentic machines. - -```shell -openssl req -new -x509 -keyout ca-key -out ca-cert -days 365 -``` - -The generated CA is simply a *public-private* key pair and certificate, and it is intended to sign other certificates. - -The next step is to add the generated CA to the clients' truststore so that the clients can trust this CA: - -```shell -keytool -keystore bookie.truststore.jks -alias CARoot -import -file ca-cert -``` - -NOTE: If you configure the bookies to require client authentication by setting `sslClientAuthentication` to `true` on the -[bookie config](../../reference/config), then you must also provide a truststore for the bookies and it should have all the CA -certificates that clients keys were signed by. - -```shell -keytool -keystore client.truststore.jks -alias CARoot -import -file ca-cert -``` - -In contrast to the keystore, which stores each machine’s own identity, the truststore of a client stores all the certificates -that the client should trust. Importing a certificate into one’s truststore also means trusting all certificates that are signed -by that certificate. As the analogy above, trusting the government (CA) also means trusting all passports (certificates) that -it has issued. This attribute is called the chain of trust, and it is particularly useful when deploying TLS on a large BookKeeper cluster. -You can sign all certificates in the cluster with a single CA, and have all machines share the same truststore that trusts the CA. -That way all machines can authenticate all other machines. - -## Signing the certificate - -The next step is to sign all certificates in the keystore with the CA we generated. First, you need to export the certificate from the keystore: - -```shell -keytool -keystore bookie.keystore.jks -alias localhost -certreq -file cert-file -``` - -Then sign it with the CA: - -```shell -openssl x509 -req -CA ca-cert -CAkey ca-key -in cert-file -out cert-signed -days {validity} -CAcreateserial -passin pass:{ca-password} -``` - -Finally, you need to import both the certificate of the CA and the signed certificate into the keystore: - -```shell -keytool -keystore bookie.keystore.jks -alias CARoot -import -file ca-cert -keytool -keystore bookie.keystore.jks -alias localhost -import -file cert-signed -``` - -The definitions of the parameters are the following: - -1. `keystore`: the location of the keystore -2. `ca-cert`: the certificate of the CA -3. `ca-key`: the private key of the CA -4. `ca-password`: the passphrase of the CA -5. `cert-file`: the exported, unsigned certificate of the bookie -6. `cert-signed`: the signed certificate of the bookie - -(TBD: add a script to automatically generate truststores and keystores.) - -## Configuring Bookies - -Bookies support TLS for connections on the same service port. In order to enable TLS, you need to configure `tlsProvider` to be either -`JDK` or `OpenSSL`. If `OpenSSL` is configured, it will use `netty-tcnative-boringssl-static`, which loads a corresponding binding according -to the platforms to run bookies. - -> Current `OpenSSL` implementation doesn't depend on the system installed OpenSSL library. If you want to leverage the OpenSSL installed on -the system, you can check [this example](http://netty.io/wiki/forked-tomcat-native.html) on how to replaces the JARs on the classpath with -netty bindings to leverage installed OpenSSL. - -The following TLS configs are needed on the bookie side: - -```shell -tlsProvider=OpenSSL -# key store -tlsKeyStoreType=JKS -tlsKeyStore=/var/private/tls/bookie.keystore.jks -tlsKeyStorePasswordPath=/var/private/tls/bookie.keystore.passwd -# trust store -tlsTrustStoreType=JKS -tlsTrustStore=/var/private/tls/bookie.truststore.jks -tlsTrustStorePasswordPath=/var/private/tls/bookie.truststore.passwd -``` - -NOTE: it is important to restrict access to the store files and corresponding password files via filesystem permissions. - -Optional settings that are worth considering: - -1. tlsClientAuthentication=false: Enable/Disable using TLS for authentication. This config when enabled will authenticate the other end - of the communication channel. It should be enabled on both bookies and clients for mutual TLS. -2. tlsEnabledCipherSuites= A cipher suite is a named combination of authentication, encryption, MAC and key exchange - algorithm used to negotiate the security settings for a network connection using TLS network protocol. By default, - it is null. [OpenSSL Ciphers](https://www.openssl.org/docs/man1.0.2/apps/ciphers.html) - [JDK Ciphers](http://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html#ciphersuites) -3. tlsEnabledProtocols = TLSv1.2,TLSv1.1,TLSv1 (list out the TLS protocols that you are going to accept from clients). - By default, it is not set. - -To verify the bookie's keystore and truststore are setup correctly you can run the following command: - -```shell -openssl s_client -debug -connect localhost:3181 -tls1 -``` - -NOTE: TLSv1 should be listed under `tlsEnabledProtocols`. - -In the output of this command you should see the server's certificate: - -```shell ------BEGIN CERTIFICATE----- -{variable sized random bytes} ------END CERTIFICATE----- -``` - -If the certificate does not show up or if there are any other error messages then your keystore is not setup correctly. - -## Configuring Clients - -TLS is supported only for the new BookKeeper client (BookKeeper versions 4.5.0 and higher), the older clients are not -supported. The configs for TLS will be the same as bookies. - -If client authentication is not required by the bookies, the following is a minimal configuration example: - -```shell -tlsProvider=OpenSSL -clientTrustStore=/var/private/tls/client.truststore.jks -clientTrustStorePasswordPath=/var/private/tls/client.truststore.passwd -``` - -If client authentication is required, then a keystore must be created for each client, and the bookies' truststores must -trust the certificate in the client's keystore. This may be done using commands that are similar to what we used for -the [bookie keystore](#bookie-keystore). - -And the following must also be configured: - -```shell -tlsClientAuthentication=true -clientKeyStore=/var/private/tls/client.keystore.jks -clientKeyStorePasswordPath=/var/private/tls/client.keystore.passwd -``` - -NOTE: it is important to restrict access to the store files and corresponding password files via filesystem permissions. - -(TBD: add example to use tls in bin/bookkeeper script?) - -## Enabling TLS Logging - -You can enable TLS debug logging at the JVM level by starting the bookies and/or clients with `javax.net.debug` system property. For example: - -```shell --Djavax.net.debug=all -``` - -You can find more details on this in [Oracle documentation](http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/ReadDebug.html) on -[debugging SSL/TLS connections](http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/ReadDebug.html). diff --git a/site/docs/latest/security/zookeeper.md b/site/docs/latest/security/zookeeper.md deleted file mode 100644 index e16be69a1d3..00000000000 --- a/site/docs/latest/security/zookeeper.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: ZooKeeper Authentication -prev: ../sasl ---- - -## New Clusters - -To enable `ZooKeeper` authentication on Bookies or Clients, there are two necessary steps: - -1. Create a `JAAS` login file and set the appropriate system property to point to it as described in [GSSAPI (Kerberos)](../sasl#notes). -2. Set the configuration property `zkEnableSecurity` in each bookie to `true`. - -The metadata stored in `ZooKeeper` is such that only certain clients will be able to modify and read the corresponding znodes. -The rationale behind this decision is that the data stored in ZooKeeper is not sensitive, but inappropriate manipulation of znodes can cause cluster -disruption. - -## Migrating Clusters - -If you are running a version of BookKeeper that does not support security or simply with security disabled, and you want to make the cluster secure, -then you need to execute the following steps to enable ZooKeeper authentication with minimal disruption to your operations. - -1. Perform a rolling restart setting the `JAAS` login file, which enables bookie or clients to authenticate. At the end of the rolling restart, - bookies (or clients) are able to manipulate znodes with strict ACLs, but they will not create znodes with those ACLs. -2. Perform a second rolling restart of bookies, this time setting the configuration parameter `zkEnableSecurity` to true, which enables the use - of secure ACLs when creating znodes. -3. Currently we don't have provide a tool to set acls on old znodes. You are recommended to set it manually using ZooKeeper tools. - -It is also possible to turn off authentication in a secured cluster. To do it, follow these steps: - -1. Perform a rolling restart of bookies setting the `JAAS` login file, which enable bookies to authenticate, but setting `zkEnableSecurity` to `false`. - At the end of rolling restart, bookies stop creating znodes with secure ACLs, but are still able to authenticate and manipulate all znodes. -2. You can use ZooKeeper tools to manually reset all ACLs under the znode set in `zkLedgersRootPath`, which defaults to `/ledgers`. -3. Perform a second rolling restart of bookies, this time omitting the system property that sets the `JAAS` login file. - -## Migrating the ZooKeeper ensemble - -It is also necessary to enable authentication on the `ZooKeeper` ensemble. To do it, we need to perform a rolling restart of the ensemble and -set a few properties. Please refer to the ZooKeeper documentation for more details. - -1. [Apache ZooKeeper Documentation](http://zookeeper.apache.org/doc/r3.4.6/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) -2. [Apache ZooKeeper Wiki](https://cwiki.apache.org/confluence/display/ZOOKEEPER/Zookeeper+and+SASL) diff --git a/site/fonts/FontAwesome.otf b/site/fonts/FontAwesome.otf deleted file mode 100644 index 401ec0f36e4..00000000000 Binary files a/site/fonts/FontAwesome.otf and /dev/null differ diff --git a/site/fonts/fontawesome-webfont.eot b/site/fonts/fontawesome-webfont.eot deleted file mode 100644 index e9f60ca953f..00000000000 Binary files a/site/fonts/fontawesome-webfont.eot and /dev/null differ diff --git a/site/fonts/fontawesome-webfont.svg b/site/fonts/fontawesome-webfont.svg deleted file mode 100644 index 855c845e538..00000000000 --- a/site/fonts/fontawesome-webfont.svg +++ /dev/null @@ -1,2671 +0,0 @@ - - - - -Created by FontForge 20120731 at Mon Oct 24 17:37:40 2016 - By ,,, -Copyright Dave Gandy 2016. All rights reserved. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/site/fonts/fontawesome-webfont.ttf b/site/fonts/fontawesome-webfont.ttf deleted file mode 100644 index 35acda2fa11..00000000000 Binary files a/site/fonts/fontawesome-webfont.ttf and /dev/null differ diff --git a/site/fonts/fontawesome-webfont.woff b/site/fonts/fontawesome-webfont.woff deleted file mode 100644 index 400014a4b06..00000000000 Binary files a/site/fonts/fontawesome-webfont.woff and /dev/null differ diff --git a/site/fonts/fontawesome-webfont.woff2 b/site/fonts/fontawesome-webfont.woff2 deleted file mode 100644 index 4d13fc60404..00000000000 Binary files a/site/fonts/fontawesome-webfont.woff2 and /dev/null differ diff --git a/site/img/dcos-logo.png b/site/img/dcos-logo.png deleted file mode 100644 index a9a23e0e123..00000000000 Binary files a/site/img/dcos-logo.png and /dev/null differ diff --git a/site/img/dcos/exhibitor.png b/site/img/dcos/exhibitor.png deleted file mode 100644 index 3bb1c7f5541..00000000000 Binary files a/site/img/dcos/exhibitor.png and /dev/null differ diff --git a/site/img/dcos/scale.png b/site/img/dcos/scale.png deleted file mode 100644 index a7efeeed99f..00000000000 Binary files a/site/img/dcos/scale.png and /dev/null differ diff --git a/site/img/dcos/services.png b/site/img/dcos/services.png deleted file mode 100644 index 60d7e6f0bbd..00000000000 Binary files a/site/img/dcos/services.png and /dev/null differ diff --git a/site/img/dcos/tasks.png b/site/img/dcos/tasks.png deleted file mode 100644 index f5bf5ceeec4..00000000000 Binary files a/site/img/dcos/tasks.png and /dev/null differ diff --git a/site/img/java-icon.svg b/site/img/java-icon.svg deleted file mode 100644 index 00a1b88acff..00000000000 --- a/site/img/java-icon.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/site/img/kubernetes-logo.png b/site/img/kubernetes-logo.png deleted file mode 100644 index 77f507fe986..00000000000 Binary files a/site/img/kubernetes-logo.png and /dev/null differ diff --git a/site/img/logos/bk-logo-with-text.png b/site/img/logos/bk-logo-with-text.png deleted file mode 100644 index 80ca369bbad..00000000000 Binary files a/site/img/logos/bk-logo-with-text.png and /dev/null differ diff --git a/site/img/logos/bk-logo-with-text.svg b/site/img/logos/bk-logo-with-text.svg deleted file mode 100644 index 36ff7e7d565..00000000000 --- a/site/img/logos/bk-logo-with-text.svg +++ /dev/null @@ -1,79 +0,0 @@ - - - - Produced by OmniGraffle 7.4.2 - 2017-09-12 22:13:49 +0000 - - - - - - - - - - - - - - - - - - - - - - - Canvas 1 - - Layer 1 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/site/img/logos/bk-logo-with-tm.png b/site/img/logos/bk-logo-with-tm.png deleted file mode 100644 index b9b27682ede..00000000000 Binary files a/site/img/logos/bk-logo-with-tm.png and /dev/null differ diff --git a/site/img/logos/bk-logo-with-tm.svg b/site/img/logos/bk-logo-with-tm.svg deleted file mode 100644 index ac91efb7b7f..00000000000 --- a/site/img/logos/bk-logo-with-tm.svg +++ /dev/null @@ -1,81 +0,0 @@ - - - - Produced by OmniGraffle 7.4.2 - 2017-09-12 22:15:33 +0000 - - - - - - - - - - - - - - - - - - - - - - - Canvas 1 - - Layer 1 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/site/img/logos/bk-logo.png b/site/img/logos/bk-logo.png deleted file mode 100644 index cd532883f04..00000000000 Binary files a/site/img/logos/bk-logo.png and /dev/null differ diff --git a/site/img/logos/bk-logo.svg b/site/img/logos/bk-logo.svg deleted file mode 100644 index e42c2d46593..00000000000 --- a/site/img/logos/bk-logo.svg +++ /dev/null @@ -1,53 +0,0 @@ - - - - Produced by OmniGraffle 7.4.2 - 2017-09-12 22:10:18 +0000 - - - - - - - - - - - - - - - - - - - - - - - Canvas 1 - - Layer 1 - - - - - - - - - - - - - - - - - - - - - - - diff --git a/site/index.html b/site/index.html deleted file mode 100644 index ab55e2b2daf..00000000000 --- a/site/index.html +++ /dev/null @@ -1,57 +0,0 @@ ---- -title: Home ---- - - - - - {% include head.html %} - - -
          -
          -
          - {% include navbar.html %} -
          - -
          -
          -
          -
          -

          - {{ site.title }} -

          -

          - {{ site.subtitle }} -

          - -
          -
          -
          - -
          -
          -
          -
          -
          -
          -
          - - {% include footer.html %} - {% include javascript.html %} - - diff --git a/site/js/app.js b/site/js/app.js deleted file mode 100644 index 1d28a40d6db..00000000000 --- a/site/js/app.js +++ /dev/null @@ -1,32 +0,0 @@ -document.addEventListener('DOMContentLoaded', function () { - - // Get all "navbar-burger" elements - var $navbarBurgers = Array.prototype.slice.call(document.querySelectorAll('.navbar-burger'), 0); - - // Check if there are any nav burgers - if ($navbarBurgers.length > 0) { - - // Add a click event on each of them - $navbarBurgers.forEach(function ($el) { - $el.addEventListener('click', () => { - - // Get the target from the "data-target" attribute - var target = $el.dataset.target; - var $target = document.getElementById(target); - - // Toggle the class on both the "navbar-burger" and the "navbar-menu" - $el.classList.toggle('is-active'); - $target.classList.toggle('is-active'); - - }); - }); - } - - // Enable popovers for terms like "bookie" and "ledger" - tippy('.popover', { - position: 'top', - duration: 100, - arrow: true, - html: true - }); -}); \ No newline at end of file diff --git a/site/js/jquery-3.2.1.min.js b/site/js/jquery-3.2.1.min.js deleted file mode 100644 index 644d35e274f..00000000000 --- a/site/js/jquery-3.2.1.min.js +++ /dev/null @@ -1,4 +0,0 @@ -/*! jQuery v3.2.1 | (c) JS Foundation and other contributors | jquery.org/license */ -!function(a,b){"use strict";"object"==typeof module&&"object"==typeof module.exports?module.exports=a.document?b(a,!0):function(a){if(!a.document)throw new Error("jQuery requires a window with a document");return b(a)}:b(a)}("undefined"!=typeof window?window:this,function(a,b){"use strict";var c=[],d=a.document,e=Object.getPrototypeOf,f=c.slice,g=c.concat,h=c.push,i=c.indexOf,j={},k=j.toString,l=j.hasOwnProperty,m=l.toString,n=m.call(Object),o={};function p(a,b){b=b||d;var c=b.createElement("script");c.text=a,b.head.appendChild(c).parentNode.removeChild(c)}var q="3.2.1",r=function(a,b){return new r.fn.init(a,b)},s=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g,t=/^-ms-/,u=/-([a-z])/g,v=function(a,b){return b.toUpperCase()};r.fn=r.prototype={jquery:q,constructor:r,length:0,toArray:function(){return f.call(this)},get:function(a){return null==a?f.call(this):a<0?this[a+this.length]:this[a]},pushStack:function(a){var b=r.merge(this.constructor(),a);return b.prevObject=this,b},each:function(a){return r.each(this,a)},map:function(a){return this.pushStack(r.map(this,function(b,c){return a.call(b,c,b)}))},slice:function(){return this.pushStack(f.apply(this,arguments))},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},eq:function(a){var b=this.length,c=+a+(a<0?b:0);return this.pushStack(c>=0&&c0&&b-1 in a)}var x=function(a){var b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u="sizzle"+1*new Date,v=a.document,w=0,x=0,y=ha(),z=ha(),A=ha(),B=function(a,b){return a===b&&(l=!0),0},C={}.hasOwnProperty,D=[],E=D.pop,F=D.push,G=D.push,H=D.slice,I=function(a,b){for(var c=0,d=a.length;c+~]|"+K+")"+K+"*"),S=new RegExp("="+K+"*([^\\]'\"]*?)"+K+"*\\]","g"),T=new RegExp(N),U=new RegExp("^"+L+"$"),V={ID:new RegExp("^#("+L+")"),CLASS:new RegExp("^\\.("+L+")"),TAG:new RegExp("^("+L+"|[*])"),ATTR:new RegExp("^"+M),PSEUDO:new RegExp("^"+N),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+K+"*(even|odd|(([+-]|)(\\d*)n|)"+K+"*(?:([+-]|)"+K+"*(\\d+)|))"+K+"*\\)|)","i"),bool:new RegExp("^(?:"+J+")$","i"),needsContext:new RegExp("^"+K+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+K+"*((?:-\\d)?\\d*)"+K+"*\\)|)(?=[^-]|$)","i")},W=/^(?:input|select|textarea|button)$/i,X=/^h\d$/i,Y=/^[^{]+\{\s*\[native \w/,Z=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,$=/[+~]/,_=new RegExp("\\\\([\\da-f]{1,6}"+K+"?|("+K+")|.)","ig"),aa=function(a,b,c){var d="0x"+b-65536;return d!==d||c?b:d<0?String.fromCharCode(d+65536):String.fromCharCode(d>>10|55296,1023&d|56320)},ba=/([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g,ca=function(a,b){return b?"\0"===a?"\ufffd":a.slice(0,-1)+"\\"+a.charCodeAt(a.length-1).toString(16)+" ":"\\"+a},da=function(){m()},ea=ta(function(a){return a.disabled===!0&&("form"in a||"label"in a)},{dir:"parentNode",next:"legend"});try{G.apply(D=H.call(v.childNodes),v.childNodes),D[v.childNodes.length].nodeType}catch(fa){G={apply:D.length?function(a,b){F.apply(a,H.call(b))}:function(a,b){var c=a.length,d=0;while(a[c++]=b[d++]);a.length=c-1}}}function ga(a,b,d,e){var f,h,j,k,l,o,r,s=b&&b.ownerDocument,w=b?b.nodeType:9;if(d=d||[],"string"!=typeof a||!a||1!==w&&9!==w&&11!==w)return d;if(!e&&((b?b.ownerDocument||b:v)!==n&&m(b),b=b||n,p)){if(11!==w&&(l=Z.exec(a)))if(f=l[1]){if(9===w){if(!(j=b.getElementById(f)))return d;if(j.id===f)return d.push(j),d}else if(s&&(j=s.getElementById(f))&&t(b,j)&&j.id===f)return d.push(j),d}else{if(l[2])return G.apply(d,b.getElementsByTagName(a)),d;if((f=l[3])&&c.getElementsByClassName&&b.getElementsByClassName)return G.apply(d,b.getElementsByClassName(f)),d}if(c.qsa&&!A[a+" "]&&(!q||!q.test(a))){if(1!==w)s=b,r=a;else if("object"!==b.nodeName.toLowerCase()){(k=b.getAttribute("id"))?k=k.replace(ba,ca):b.setAttribute("id",k=u),o=g(a),h=o.length;while(h--)o[h]="#"+k+" "+sa(o[h]);r=o.join(","),s=$.test(a)&&qa(b.parentNode)||b}if(r)try{return G.apply(d,s.querySelectorAll(r)),d}catch(x){}finally{k===u&&b.removeAttribute("id")}}}return i(a.replace(P,"$1"),b,d,e)}function ha(){var a=[];function b(c,e){return a.push(c+" ")>d.cacheLength&&delete b[a.shift()],b[c+" "]=e}return b}function ia(a){return a[u]=!0,a}function ja(a){var b=n.createElement("fieldset");try{return!!a(b)}catch(c){return!1}finally{b.parentNode&&b.parentNode.removeChild(b),b=null}}function ka(a,b){var c=a.split("|"),e=c.length;while(e--)d.attrHandle[c[e]]=b}function la(a,b){var c=b&&a,d=c&&1===a.nodeType&&1===b.nodeType&&a.sourceIndex-b.sourceIndex;if(d)return d;if(c)while(c=c.nextSibling)if(c===b)return-1;return a?1:-1}function ma(a){return function(b){var c=b.nodeName.toLowerCase();return"input"===c&&b.type===a}}function na(a){return function(b){var c=b.nodeName.toLowerCase();return("input"===c||"button"===c)&&b.type===a}}function oa(a){return function(b){return"form"in b?b.parentNode&&b.disabled===!1?"label"in b?"label"in b.parentNode?b.parentNode.disabled===a:b.disabled===a:b.isDisabled===a||b.isDisabled!==!a&&ea(b)===a:b.disabled===a:"label"in b&&b.disabled===a}}function pa(a){return ia(function(b){return b=+b,ia(function(c,d){var e,f=a([],c.length,b),g=f.length;while(g--)c[e=f[g]]&&(c[e]=!(d[e]=c[e]))})})}function qa(a){return a&&"undefined"!=typeof a.getElementsByTagName&&a}c=ga.support={},f=ga.isXML=function(a){var b=a&&(a.ownerDocument||a).documentElement;return!!b&&"HTML"!==b.nodeName},m=ga.setDocument=function(a){var b,e,g=a?a.ownerDocument||a:v;return g!==n&&9===g.nodeType&&g.documentElement?(n=g,o=n.documentElement,p=!f(n),v!==n&&(e=n.defaultView)&&e.top!==e&&(e.addEventListener?e.addEventListener("unload",da,!1):e.attachEvent&&e.attachEvent("onunload",da)),c.attributes=ja(function(a){return a.className="i",!a.getAttribute("className")}),c.getElementsByTagName=ja(function(a){return a.appendChild(n.createComment("")),!a.getElementsByTagName("*").length}),c.getElementsByClassName=Y.test(n.getElementsByClassName),c.getById=ja(function(a){return o.appendChild(a).id=u,!n.getElementsByName||!n.getElementsByName(u).length}),c.getById?(d.filter.ID=function(a){var b=a.replace(_,aa);return function(a){return a.getAttribute("id")===b}},d.find.ID=function(a,b){if("undefined"!=typeof b.getElementById&&p){var c=b.getElementById(a);return c?[c]:[]}}):(d.filter.ID=function(a){var b=a.replace(_,aa);return function(a){var c="undefined"!=typeof a.getAttributeNode&&a.getAttributeNode("id");return c&&c.value===b}},d.find.ID=function(a,b){if("undefined"!=typeof b.getElementById&&p){var c,d,e,f=b.getElementById(a);if(f){if(c=f.getAttributeNode("id"),c&&c.value===a)return[f];e=b.getElementsByName(a),d=0;while(f=e[d++])if(c=f.getAttributeNode("id"),c&&c.value===a)return[f]}return[]}}),d.find.TAG=c.getElementsByTagName?function(a,b){return"undefined"!=typeof b.getElementsByTagName?b.getElementsByTagName(a):c.qsa?b.querySelectorAll(a):void 0}:function(a,b){var c,d=[],e=0,f=b.getElementsByTagName(a);if("*"===a){while(c=f[e++])1===c.nodeType&&d.push(c);return d}return f},d.find.CLASS=c.getElementsByClassName&&function(a,b){if("undefined"!=typeof b.getElementsByClassName&&p)return b.getElementsByClassName(a)},r=[],q=[],(c.qsa=Y.test(n.querySelectorAll))&&(ja(function(a){o.appendChild(a).innerHTML="",a.querySelectorAll("[msallowcapture^='']").length&&q.push("[*^$]="+K+"*(?:''|\"\")"),a.querySelectorAll("[selected]").length||q.push("\\["+K+"*(?:value|"+J+")"),a.querySelectorAll("[id~="+u+"-]").length||q.push("~="),a.querySelectorAll(":checked").length||q.push(":checked"),a.querySelectorAll("a#"+u+"+*").length||q.push(".#.+[+~]")}),ja(function(a){a.innerHTML="";var b=n.createElement("input");b.setAttribute("type","hidden"),a.appendChild(b).setAttribute("name","D"),a.querySelectorAll("[name=d]").length&&q.push("name"+K+"*[*^$|!~]?="),2!==a.querySelectorAll(":enabled").length&&q.push(":enabled",":disabled"),o.appendChild(a).disabled=!0,2!==a.querySelectorAll(":disabled").length&&q.push(":enabled",":disabled"),a.querySelectorAll("*,:x"),q.push(",.*:")})),(c.matchesSelector=Y.test(s=o.matches||o.webkitMatchesSelector||o.mozMatchesSelector||o.oMatchesSelector||o.msMatchesSelector))&&ja(function(a){c.disconnectedMatch=s.call(a,"*"),s.call(a,"[s!='']:x"),r.push("!=",N)}),q=q.length&&new RegExp(q.join("|")),r=r.length&&new RegExp(r.join("|")),b=Y.test(o.compareDocumentPosition),t=b||Y.test(o.contains)?function(a,b){var c=9===a.nodeType?a.documentElement:a,d=b&&b.parentNode;return a===d||!(!d||1!==d.nodeType||!(c.contains?c.contains(d):a.compareDocumentPosition&&16&a.compareDocumentPosition(d)))}:function(a,b){if(b)while(b=b.parentNode)if(b===a)return!0;return!1},B=b?function(a,b){if(a===b)return l=!0,0;var d=!a.compareDocumentPosition-!b.compareDocumentPosition;return d?d:(d=(a.ownerDocument||a)===(b.ownerDocument||b)?a.compareDocumentPosition(b):1,1&d||!c.sortDetached&&b.compareDocumentPosition(a)===d?a===n||a.ownerDocument===v&&t(v,a)?-1:b===n||b.ownerDocument===v&&t(v,b)?1:k?I(k,a)-I(k,b):0:4&d?-1:1)}:function(a,b){if(a===b)return l=!0,0;var c,d=0,e=a.parentNode,f=b.parentNode,g=[a],h=[b];if(!e||!f)return a===n?-1:b===n?1:e?-1:f?1:k?I(k,a)-I(k,b):0;if(e===f)return la(a,b);c=a;while(c=c.parentNode)g.unshift(c);c=b;while(c=c.parentNode)h.unshift(c);while(g[d]===h[d])d++;return d?la(g[d],h[d]):g[d]===v?-1:h[d]===v?1:0},n):n},ga.matches=function(a,b){return ga(a,null,null,b)},ga.matchesSelector=function(a,b){if((a.ownerDocument||a)!==n&&m(a),b=b.replace(S,"='$1']"),c.matchesSelector&&p&&!A[b+" "]&&(!r||!r.test(b))&&(!q||!q.test(b)))try{var d=s.call(a,b);if(d||c.disconnectedMatch||a.document&&11!==a.document.nodeType)return d}catch(e){}return ga(b,n,null,[a]).length>0},ga.contains=function(a,b){return(a.ownerDocument||a)!==n&&m(a),t(a,b)},ga.attr=function(a,b){(a.ownerDocument||a)!==n&&m(a);var e=d.attrHandle[b.toLowerCase()],f=e&&C.call(d.attrHandle,b.toLowerCase())?e(a,b,!p):void 0;return void 0!==f?f:c.attributes||!p?a.getAttribute(b):(f=a.getAttributeNode(b))&&f.specified?f.value:null},ga.escape=function(a){return(a+"").replace(ba,ca)},ga.error=function(a){throw new Error("Syntax error, unrecognized expression: "+a)},ga.uniqueSort=function(a){var b,d=[],e=0,f=0;if(l=!c.detectDuplicates,k=!c.sortStable&&a.slice(0),a.sort(B),l){while(b=a[f++])b===a[f]&&(e=d.push(f));while(e--)a.splice(d[e],1)}return k=null,a},e=ga.getText=function(a){var b,c="",d=0,f=a.nodeType;if(f){if(1===f||9===f||11===f){if("string"==typeof a.textContent)return a.textContent;for(a=a.firstChild;a;a=a.nextSibling)c+=e(a)}else if(3===f||4===f)return a.nodeValue}else while(b=a[d++])c+=e(b);return c},d=ga.selectors={cacheLength:50,createPseudo:ia,match:V,attrHandle:{},find:{},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(a){return a[1]=a[1].replace(_,aa),a[3]=(a[3]||a[4]||a[5]||"").replace(_,aa),"~="===a[2]&&(a[3]=" "+a[3]+" "),a.slice(0,4)},CHILD:function(a){return a[1]=a[1].toLowerCase(),"nth"===a[1].slice(0,3)?(a[3]||ga.error(a[0]),a[4]=+(a[4]?a[5]+(a[6]||1):2*("even"===a[3]||"odd"===a[3])),a[5]=+(a[7]+a[8]||"odd"===a[3])):a[3]&&ga.error(a[0]),a},PSEUDO:function(a){var b,c=!a[6]&&a[2];return V.CHILD.test(a[0])?null:(a[3]?a[2]=a[4]||a[5]||"":c&&T.test(c)&&(b=g(c,!0))&&(b=c.indexOf(")",c.length-b)-c.length)&&(a[0]=a[0].slice(0,b),a[2]=c.slice(0,b)),a.slice(0,3))}},filter:{TAG:function(a){var b=a.replace(_,aa).toLowerCase();return"*"===a?function(){return!0}:function(a){return a.nodeName&&a.nodeName.toLowerCase()===b}},CLASS:function(a){var b=y[a+" "];return b||(b=new RegExp("(^|"+K+")"+a+"("+K+"|$)"))&&y(a,function(a){return b.test("string"==typeof a.className&&a.className||"undefined"!=typeof a.getAttribute&&a.getAttribute("class")||"")})},ATTR:function(a,b,c){return function(d){var e=ga.attr(d,a);return null==e?"!="===b:!b||(e+="","="===b?e===c:"!="===b?e!==c:"^="===b?c&&0===e.indexOf(c):"*="===b?c&&e.indexOf(c)>-1:"$="===b?c&&e.slice(-c.length)===c:"~="===b?(" "+e.replace(O," ")+" ").indexOf(c)>-1:"|="===b&&(e===c||e.slice(0,c.length+1)===c+"-"))}},CHILD:function(a,b,c,d,e){var f="nth"!==a.slice(0,3),g="last"!==a.slice(-4),h="of-type"===b;return 1===d&&0===e?function(a){return!!a.parentNode}:function(b,c,i){var j,k,l,m,n,o,p=f!==g?"nextSibling":"previousSibling",q=b.parentNode,r=h&&b.nodeName.toLowerCase(),s=!i&&!h,t=!1;if(q){if(f){while(p){m=b;while(m=m[p])if(h?m.nodeName.toLowerCase()===r:1===m.nodeType)return!1;o=p="only"===a&&!o&&"nextSibling"}return!0}if(o=[g?q.firstChild:q.lastChild],g&&s){m=q,l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),j=k[a]||[],n=j[0]===w&&j[1],t=n&&j[2],m=n&&q.childNodes[n];while(m=++n&&m&&m[p]||(t=n=0)||o.pop())if(1===m.nodeType&&++t&&m===b){k[a]=[w,n,t];break}}else if(s&&(m=b,l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),j=k[a]||[],n=j[0]===w&&j[1],t=n),t===!1)while(m=++n&&m&&m[p]||(t=n=0)||o.pop())if((h?m.nodeName.toLowerCase()===r:1===m.nodeType)&&++t&&(s&&(l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),k[a]=[w,t]),m===b))break;return t-=e,t===d||t%d===0&&t/d>=0}}},PSEUDO:function(a,b){var c,e=d.pseudos[a]||d.setFilters[a.toLowerCase()]||ga.error("unsupported pseudo: "+a);return e[u]?e(b):e.length>1?(c=[a,a,"",b],d.setFilters.hasOwnProperty(a.toLowerCase())?ia(function(a,c){var d,f=e(a,b),g=f.length;while(g--)d=I(a,f[g]),a[d]=!(c[d]=f[g])}):function(a){return e(a,0,c)}):e}},pseudos:{not:ia(function(a){var b=[],c=[],d=h(a.replace(P,"$1"));return d[u]?ia(function(a,b,c,e){var f,g=d(a,null,e,[]),h=a.length;while(h--)(f=g[h])&&(a[h]=!(b[h]=f))}):function(a,e,f){return b[0]=a,d(b,null,f,c),b[0]=null,!c.pop()}}),has:ia(function(a){return function(b){return ga(a,b).length>0}}),contains:ia(function(a){return a=a.replace(_,aa),function(b){return(b.textContent||b.innerText||e(b)).indexOf(a)>-1}}),lang:ia(function(a){return U.test(a||"")||ga.error("unsupported lang: "+a),a=a.replace(_,aa).toLowerCase(),function(b){var c;do if(c=p?b.lang:b.getAttribute("xml:lang")||b.getAttribute("lang"))return c=c.toLowerCase(),c===a||0===c.indexOf(a+"-");while((b=b.parentNode)&&1===b.nodeType);return!1}}),target:function(b){var c=a.location&&a.location.hash;return c&&c.slice(1)===b.id},root:function(a){return a===o},focus:function(a){return a===n.activeElement&&(!n.hasFocus||n.hasFocus())&&!!(a.type||a.href||~a.tabIndex)},enabled:oa(!1),disabled:oa(!0),checked:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&!!a.checked||"option"===b&&!!a.selected},selected:function(a){return a.parentNode&&a.parentNode.selectedIndex,a.selected===!0},empty:function(a){for(a=a.firstChild;a;a=a.nextSibling)if(a.nodeType<6)return!1;return!0},parent:function(a){return!d.pseudos.empty(a)},header:function(a){return X.test(a.nodeName)},input:function(a){return W.test(a.nodeName)},button:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&"button"===a.type||"button"===b},text:function(a){var b;return"input"===a.nodeName.toLowerCase()&&"text"===a.type&&(null==(b=a.getAttribute("type"))||"text"===b.toLowerCase())},first:pa(function(){return[0]}),last:pa(function(a,b){return[b-1]}),eq:pa(function(a,b,c){return[c<0?c+b:c]}),even:pa(function(a,b){for(var c=0;c=0;)a.push(d);return a}),gt:pa(function(a,b,c){for(var d=c<0?c+b:c;++d1?function(b,c,d){var e=a.length;while(e--)if(!a[e](b,c,d))return!1;return!0}:a[0]}function va(a,b,c){for(var d=0,e=b.length;d-1&&(f[j]=!(g[j]=l))}}else r=wa(r===g?r.splice(o,r.length):r),e?e(null,g,r,i):G.apply(g,r)})}function ya(a){for(var b,c,e,f=a.length,g=d.relative[a[0].type],h=g||d.relative[" "],i=g?1:0,k=ta(function(a){return a===b},h,!0),l=ta(function(a){return I(b,a)>-1},h,!0),m=[function(a,c,d){var e=!g&&(d||c!==j)||((b=c).nodeType?k(a,c,d):l(a,c,d));return b=null,e}];i1&&ua(m),i>1&&sa(a.slice(0,i-1).concat({value:" "===a[i-2].type?"*":""})).replace(P,"$1"),c,i0,e=a.length>0,f=function(f,g,h,i,k){var l,o,q,r=0,s="0",t=f&&[],u=[],v=j,x=f||e&&d.find.TAG("*",k),y=w+=null==v?1:Math.random()||.1,z=x.length;for(k&&(j=g===n||g||k);s!==z&&null!=(l=x[s]);s++){if(e&&l){o=0,g||l.ownerDocument===n||(m(l),h=!p);while(q=a[o++])if(q(l,g||n,h)){i.push(l);break}k&&(w=y)}c&&((l=!q&&l)&&r--,f&&t.push(l))}if(r+=s,c&&s!==r){o=0;while(q=b[o++])q(t,u,g,h);if(f){if(r>0)while(s--)t[s]||u[s]||(u[s]=E.call(i));u=wa(u)}G.apply(i,u),k&&!f&&u.length>0&&r+b.length>1&&ga.uniqueSort(i)}return k&&(w=y,j=v),t};return c?ia(f):f}return h=ga.compile=function(a,b){var c,d=[],e=[],f=A[a+" "];if(!f){b||(b=g(a)),c=b.length;while(c--)f=ya(b[c]),f[u]?d.push(f):e.push(f);f=A(a,za(e,d)),f.selector=a}return f},i=ga.select=function(a,b,c,e){var f,i,j,k,l,m="function"==typeof a&&a,n=!e&&g(a=m.selector||a);if(c=c||[],1===n.length){if(i=n[0]=n[0].slice(0),i.length>2&&"ID"===(j=i[0]).type&&9===b.nodeType&&p&&d.relative[i[1].type]){if(b=(d.find.ID(j.matches[0].replace(_,aa),b)||[])[0],!b)return c;m&&(b=b.parentNode),a=a.slice(i.shift().value.length)}f=V.needsContext.test(a)?0:i.length;while(f--){if(j=i[f],d.relative[k=j.type])break;if((l=d.find[k])&&(e=l(j.matches[0].replace(_,aa),$.test(i[0].type)&&qa(b.parentNode)||b))){if(i.splice(f,1),a=e.length&&sa(i),!a)return G.apply(c,e),c;break}}}return(m||h(a,n))(e,b,!p,c,!b||$.test(a)&&qa(b.parentNode)||b),c},c.sortStable=u.split("").sort(B).join("")===u,c.detectDuplicates=!!l,m(),c.sortDetached=ja(function(a){return 1&a.compareDocumentPosition(n.createElement("fieldset"))}),ja(function(a){return a.innerHTML="","#"===a.firstChild.getAttribute("href")})||ka("type|href|height|width",function(a,b,c){if(!c)return a.getAttribute(b,"type"===b.toLowerCase()?1:2)}),c.attributes&&ja(function(a){return a.innerHTML="",a.firstChild.setAttribute("value",""),""===a.firstChild.getAttribute("value")})||ka("value",function(a,b,c){if(!c&&"input"===a.nodeName.toLowerCase())return a.defaultValue}),ja(function(a){return null==a.getAttribute("disabled")})||ka(J,function(a,b,c){var d;if(!c)return a[b]===!0?b.toLowerCase():(d=a.getAttributeNode(b))&&d.specified?d.value:null}),ga}(a);r.find=x,r.expr=x.selectors,r.expr[":"]=r.expr.pseudos,r.uniqueSort=r.unique=x.uniqueSort,r.text=x.getText,r.isXMLDoc=x.isXML,r.contains=x.contains,r.escapeSelector=x.escape;var y=function(a,b,c){var d=[],e=void 0!==c;while((a=a[b])&&9!==a.nodeType)if(1===a.nodeType){if(e&&r(a).is(c))break;d.push(a)}return d},z=function(a,b){for(var c=[];a;a=a.nextSibling)1===a.nodeType&&a!==b&&c.push(a);return c},A=r.expr.match.needsContext;function B(a,b){return a.nodeName&&a.nodeName.toLowerCase()===b.toLowerCase()}var C=/^<([a-z][^\/\0>:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i,D=/^.[^:#\[\.,]*$/;function E(a,b,c){return r.isFunction(b)?r.grep(a,function(a,d){return!!b.call(a,d,a)!==c}):b.nodeType?r.grep(a,function(a){return a===b!==c}):"string"!=typeof b?r.grep(a,function(a){return i.call(b,a)>-1!==c}):D.test(b)?r.filter(b,a,c):(b=r.filter(b,a),r.grep(a,function(a){return i.call(b,a)>-1!==c&&1===a.nodeType}))}r.filter=function(a,b,c){var d=b[0];return c&&(a=":not("+a+")"),1===b.length&&1===d.nodeType?r.find.matchesSelector(d,a)?[d]:[]:r.find.matches(a,r.grep(b,function(a){return 1===a.nodeType}))},r.fn.extend({find:function(a){var b,c,d=this.length,e=this;if("string"!=typeof a)return this.pushStack(r(a).filter(function(){for(b=0;b1?r.uniqueSort(c):c},filter:function(a){return this.pushStack(E(this,a||[],!1))},not:function(a){return this.pushStack(E(this,a||[],!0))},is:function(a){return!!E(this,"string"==typeof a&&A.test(a)?r(a):a||[],!1).length}});var F,G=/^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]+))$/,H=r.fn.init=function(a,b,c){var e,f;if(!a)return this;if(c=c||F,"string"==typeof a){if(e="<"===a[0]&&">"===a[a.length-1]&&a.length>=3?[null,a,null]:G.exec(a),!e||!e[1]&&b)return!b||b.jquery?(b||c).find(a):this.constructor(b).find(a);if(e[1]){if(b=b instanceof r?b[0]:b,r.merge(this,r.parseHTML(e[1],b&&b.nodeType?b.ownerDocument||b:d,!0)),C.test(e[1])&&r.isPlainObject(b))for(e in b)r.isFunction(this[e])?this[e](b[e]):this.attr(e,b[e]);return this}return f=d.getElementById(e[2]),f&&(this[0]=f,this.length=1),this}return a.nodeType?(this[0]=a,this.length=1,this):r.isFunction(a)?void 0!==c.ready?c.ready(a):a(r):r.makeArray(a,this)};H.prototype=r.fn,F=r(d);var I=/^(?:parents|prev(?:Until|All))/,J={children:!0,contents:!0,next:!0,prev:!0};r.fn.extend({has:function(a){var b=r(a,this),c=b.length;return this.filter(function(){for(var a=0;a-1:1===c.nodeType&&r.find.matchesSelector(c,a))){f.push(c);break}return this.pushStack(f.length>1?r.uniqueSort(f):f)},index:function(a){return a?"string"==typeof a?i.call(r(a),this[0]):i.call(this,a.jquery?a[0]:a):this[0]&&this[0].parentNode?this.first().prevAll().length:-1},add:function(a,b){return this.pushStack(r.uniqueSort(r.merge(this.get(),r(a,b))))},addBack:function(a){return this.add(null==a?this.prevObject:this.prevObject.filter(a))}});function K(a,b){while((a=a[b])&&1!==a.nodeType);return a}r.each({parent:function(a){var b=a.parentNode;return b&&11!==b.nodeType?b:null},parents:function(a){return y(a,"parentNode")},parentsUntil:function(a,b,c){return y(a,"parentNode",c)},next:function(a){return K(a,"nextSibling")},prev:function(a){return K(a,"previousSibling")},nextAll:function(a){return y(a,"nextSibling")},prevAll:function(a){return y(a,"previousSibling")},nextUntil:function(a,b,c){return y(a,"nextSibling",c)},prevUntil:function(a,b,c){return y(a,"previousSibling",c)},siblings:function(a){return z((a.parentNode||{}).firstChild,a)},children:function(a){return z(a.firstChild)},contents:function(a){return B(a,"iframe")?a.contentDocument:(B(a,"template")&&(a=a.content||a),r.merge([],a.childNodes))}},function(a,b){r.fn[a]=function(c,d){var e=r.map(this,b,c);return"Until"!==a.slice(-5)&&(d=c),d&&"string"==typeof d&&(e=r.filter(d,e)),this.length>1&&(J[a]||r.uniqueSort(e),I.test(a)&&e.reverse()),this.pushStack(e)}});var L=/[^\x20\t\r\n\f]+/g;function M(a){var b={};return r.each(a.match(L)||[],function(a,c){b[c]=!0}),b}r.Callbacks=function(a){a="string"==typeof a?M(a):r.extend({},a);var b,c,d,e,f=[],g=[],h=-1,i=function(){for(e=e||a.once,d=b=!0;g.length;h=-1){c=g.shift();while(++h-1)f.splice(c,1),c<=h&&h--}),this},has:function(a){return a?r.inArray(a,f)>-1:f.length>0},empty:function(){return f&&(f=[]),this},disable:function(){return e=g=[],f=c="",this},disabled:function(){return!f},lock:function(){return e=g=[],c||b||(f=c=""),this},locked:function(){return!!e},fireWith:function(a,c){return e||(c=c||[],c=[a,c.slice?c.slice():c],g.push(c),b||i()),this},fire:function(){return j.fireWith(this,arguments),this},fired:function(){return!!d}};return j};function N(a){return a}function O(a){throw a}function P(a,b,c,d){var e;try{a&&r.isFunction(e=a.promise)?e.call(a).done(b).fail(c):a&&r.isFunction(e=a.then)?e.call(a,b,c):b.apply(void 0,[a].slice(d))}catch(a){c.apply(void 0,[a])}}r.extend({Deferred:function(b){var c=[["notify","progress",r.Callbacks("memory"),r.Callbacks("memory"),2],["resolve","done",r.Callbacks("once memory"),r.Callbacks("once memory"),0,"resolved"],["reject","fail",r.Callbacks("once memory"),r.Callbacks("once memory"),1,"rejected"]],d="pending",e={state:function(){return d},always:function(){return f.done(arguments).fail(arguments),this},"catch":function(a){return e.then(null,a)},pipe:function(){var a=arguments;return r.Deferred(function(b){r.each(c,function(c,d){var e=r.isFunction(a[d[4]])&&a[d[4]];f[d[1]](function(){var a=e&&e.apply(this,arguments);a&&r.isFunction(a.promise)?a.promise().progress(b.notify).done(b.resolve).fail(b.reject):b[d[0]+"With"](this,e?[a]:arguments)})}),a=null}).promise()},then:function(b,d,e){var f=0;function g(b,c,d,e){return function(){var h=this,i=arguments,j=function(){var a,j;if(!(b=f&&(d!==O&&(h=void 0,i=[a]),c.rejectWith(h,i))}};b?k():(r.Deferred.getStackHook&&(k.stackTrace=r.Deferred.getStackHook()),a.setTimeout(k))}}return r.Deferred(function(a){c[0][3].add(g(0,a,r.isFunction(e)?e:N,a.notifyWith)),c[1][3].add(g(0,a,r.isFunction(b)?b:N)),c[2][3].add(g(0,a,r.isFunction(d)?d:O))}).promise()},promise:function(a){return null!=a?r.extend(a,e):e}},f={};return r.each(c,function(a,b){var g=b[2],h=b[5];e[b[1]]=g.add,h&&g.add(function(){d=h},c[3-a][2].disable,c[0][2].lock),g.add(b[3].fire),f[b[0]]=function(){return f[b[0]+"With"](this===f?void 0:this,arguments),this},f[b[0]+"With"]=g.fireWith}),e.promise(f),b&&b.call(f,f),f},when:function(a){var b=arguments.length,c=b,d=Array(c),e=f.call(arguments),g=r.Deferred(),h=function(a){return function(c){d[a]=this,e[a]=arguments.length>1?f.call(arguments):c,--b||g.resolveWith(d,e)}};if(b<=1&&(P(a,g.done(h(c)).resolve,g.reject,!b),"pending"===g.state()||r.isFunction(e[c]&&e[c].then)))return g.then();while(c--)P(e[c],h(c),g.reject);return g.promise()}});var Q=/^(Eval|Internal|Range|Reference|Syntax|Type|URI)Error$/;r.Deferred.exceptionHook=function(b,c){a.console&&a.console.warn&&b&&Q.test(b.name)&&a.console.warn("jQuery.Deferred exception: "+b.message,b.stack,c)},r.readyException=function(b){a.setTimeout(function(){throw b})};var R=r.Deferred();r.fn.ready=function(a){return R.then(a)["catch"](function(a){r.readyException(a)}),this},r.extend({isReady:!1,readyWait:1,ready:function(a){(a===!0?--r.readyWait:r.isReady)||(r.isReady=!0,a!==!0&&--r.readyWait>0||R.resolveWith(d,[r]))}}),r.ready.then=R.then;function S(){d.removeEventListener("DOMContentLoaded",S), -a.removeEventListener("load",S),r.ready()}"complete"===d.readyState||"loading"!==d.readyState&&!d.documentElement.doScroll?a.setTimeout(r.ready):(d.addEventListener("DOMContentLoaded",S),a.addEventListener("load",S));var T=function(a,b,c,d,e,f,g){var h=0,i=a.length,j=null==c;if("object"===r.type(c)){e=!0;for(h in c)T(a,b,h,c[h],!0,f,g)}else if(void 0!==d&&(e=!0,r.isFunction(d)||(g=!0),j&&(g?(b.call(a,d),b=null):(j=b,b=function(a,b,c){return j.call(r(a),c)})),b))for(;h1,null,!0)},removeData:function(a){return this.each(function(){X.remove(this,a)})}}),r.extend({queue:function(a,b,c){var d;if(a)return b=(b||"fx")+"queue",d=W.get(a,b),c&&(!d||Array.isArray(c)?d=W.access(a,b,r.makeArray(c)):d.push(c)),d||[]},dequeue:function(a,b){b=b||"fx";var c=r.queue(a,b),d=c.length,e=c.shift(),f=r._queueHooks(a,b),g=function(){r.dequeue(a,b)};"inprogress"===e&&(e=c.shift(),d--),e&&("fx"===b&&c.unshift("inprogress"),delete f.stop,e.call(a,g,f)),!d&&f&&f.empty.fire()},_queueHooks:function(a,b){var c=b+"queueHooks";return W.get(a,c)||W.access(a,c,{empty:r.Callbacks("once memory").add(function(){W.remove(a,[b+"queue",c])})})}}),r.fn.extend({queue:function(a,b){var c=2;return"string"!=typeof a&&(b=a,a="fx",c--),arguments.length\x20\t\r\n\f]+)/i,la=/^$|\/(?:java|ecma)script/i,ma={option:[1,""],thead:[1,"","
          "],col:[2,"","
          "],tr:[2,"","
          "],td:[3,"","
          "],_default:[0,"",""]};ma.optgroup=ma.option,ma.tbody=ma.tfoot=ma.colgroup=ma.caption=ma.thead,ma.th=ma.td;function na(a,b){var c;return c="undefined"!=typeof a.getElementsByTagName?a.getElementsByTagName(b||"*"):"undefined"!=typeof a.querySelectorAll?a.querySelectorAll(b||"*"):[],void 0===b||b&&B(a,b)?r.merge([a],c):c}function oa(a,b){for(var c=0,d=a.length;c-1)e&&e.push(f);else if(j=r.contains(f.ownerDocument,f),g=na(l.appendChild(f),"script"),j&&oa(g),c){k=0;while(f=g[k++])la.test(f.type||"")&&c.push(f)}return l}!function(){var a=d.createDocumentFragment(),b=a.appendChild(d.createElement("div")),c=d.createElement("input");c.setAttribute("type","radio"),c.setAttribute("checked","checked"),c.setAttribute("name","t"),b.appendChild(c),o.checkClone=b.cloneNode(!0).cloneNode(!0).lastChild.checked,b.innerHTML="",o.noCloneChecked=!!b.cloneNode(!0).lastChild.defaultValue}();var ra=d.documentElement,sa=/^key/,ta=/^(?:mouse|pointer|contextmenu|drag|drop)|click/,ua=/^([^.]*)(?:\.(.+)|)/;function va(){return!0}function wa(){return!1}function xa(){try{return d.activeElement}catch(a){}}function ya(a,b,c,d,e,f){var g,h;if("object"==typeof b){"string"!=typeof c&&(d=d||c,c=void 0);for(h in b)ya(a,h,c,d,b[h],f);return a}if(null==d&&null==e?(e=c,d=c=void 0):null==e&&("string"==typeof c?(e=d,d=void 0):(e=d,d=c,c=void 0)),e===!1)e=wa;else if(!e)return a;return 1===f&&(g=e,e=function(a){return r().off(a),g.apply(this,arguments)},e.guid=g.guid||(g.guid=r.guid++)),a.each(function(){r.event.add(this,b,e,d,c)})}r.event={global:{},add:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,n,o,p,q=W.get(a);if(q){c.handler&&(f=c,c=f.handler,e=f.selector),e&&r.find.matchesSelector(ra,e),c.guid||(c.guid=r.guid++),(i=q.events)||(i=q.events={}),(g=q.handle)||(g=q.handle=function(b){return"undefined"!=typeof r&&r.event.triggered!==b.type?r.event.dispatch.apply(a,arguments):void 0}),b=(b||"").match(L)||[""],j=b.length;while(j--)h=ua.exec(b[j])||[],n=p=h[1],o=(h[2]||"").split(".").sort(),n&&(l=r.event.special[n]||{},n=(e?l.delegateType:l.bindType)||n,l=r.event.special[n]||{},k=r.extend({type:n,origType:p,data:d,handler:c,guid:c.guid,selector:e,needsContext:e&&r.expr.match.needsContext.test(e),namespace:o.join(".")},f),(m=i[n])||(m=i[n]=[],m.delegateCount=0,l.setup&&l.setup.call(a,d,o,g)!==!1||a.addEventListener&&a.addEventListener(n,g)),l.add&&(l.add.call(a,k),k.handler.guid||(k.handler.guid=c.guid)),e?m.splice(m.delegateCount++,0,k):m.push(k),r.event.global[n]=!0)}},remove:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,n,o,p,q=W.hasData(a)&&W.get(a);if(q&&(i=q.events)){b=(b||"").match(L)||[""],j=b.length;while(j--)if(h=ua.exec(b[j])||[],n=p=h[1],o=(h[2]||"").split(".").sort(),n){l=r.event.special[n]||{},n=(d?l.delegateType:l.bindType)||n,m=i[n]||[],h=h[2]&&new RegExp("(^|\\.)"+o.join("\\.(?:.*\\.|)")+"(\\.|$)"),g=f=m.length;while(f--)k=m[f],!e&&p!==k.origType||c&&c.guid!==k.guid||h&&!h.test(k.namespace)||d&&d!==k.selector&&("**"!==d||!k.selector)||(m.splice(f,1),k.selector&&m.delegateCount--,l.remove&&l.remove.call(a,k));g&&!m.length&&(l.teardown&&l.teardown.call(a,o,q.handle)!==!1||r.removeEvent(a,n,q.handle),delete i[n])}else for(n in i)r.event.remove(a,n+b[j],c,d,!0);r.isEmptyObject(i)&&W.remove(a,"handle events")}},dispatch:function(a){var b=r.event.fix(a),c,d,e,f,g,h,i=new Array(arguments.length),j=(W.get(this,"events")||{})[b.type]||[],k=r.event.special[b.type]||{};for(i[0]=b,c=1;c=1))for(;j!==this;j=j.parentNode||this)if(1===j.nodeType&&("click"!==a.type||j.disabled!==!0)){for(f=[],g={},c=0;c-1:r.find(e,this,null,[j]).length),g[e]&&f.push(d);f.length&&h.push({elem:j,handlers:f})}return j=this,i\x20\t\r\n\f]*)[^>]*)\/>/gi,Aa=/\s*$/g;function Ea(a,b){return B(a,"table")&&B(11!==b.nodeType?b:b.firstChild,"tr")?r(">tbody",a)[0]||a:a}function Fa(a){return a.type=(null!==a.getAttribute("type"))+"/"+a.type,a}function Ga(a){var b=Ca.exec(a.type);return b?a.type=b[1]:a.removeAttribute("type"),a}function Ha(a,b){var c,d,e,f,g,h,i,j;if(1===b.nodeType){if(W.hasData(a)&&(f=W.access(a),g=W.set(b,f),j=f.events)){delete g.handle,g.events={};for(e in j)for(c=0,d=j[e].length;c1&&"string"==typeof q&&!o.checkClone&&Ba.test(q))return a.each(function(e){var f=a.eq(e);s&&(b[0]=q.call(this,e,f.html())),Ja(f,b,c,d)});if(m&&(e=qa(b,a[0].ownerDocument,!1,a,d),f=e.firstChild,1===e.childNodes.length&&(e=f),f||d)){for(h=r.map(na(e,"script"),Fa),i=h.length;l")},clone:function(a,b,c){var d,e,f,g,h=a.cloneNode(!0),i=r.contains(a.ownerDocument,a);if(!(o.noCloneChecked||1!==a.nodeType&&11!==a.nodeType||r.isXMLDoc(a)))for(g=na(h),f=na(a),d=0,e=f.length;d0&&oa(g,!i&&na(a,"script")),h},cleanData:function(a){for(var b,c,d,e=r.event.special,f=0;void 0!==(c=a[f]);f++)if(U(c)){if(b=c[W.expando]){if(b.events)for(d in b.events)e[d]?r.event.remove(c,d):r.removeEvent(c,d,b.handle);c[W.expando]=void 0}c[X.expando]&&(c[X.expando]=void 0)}}}),r.fn.extend({detach:function(a){return Ka(this,a,!0)},remove:function(a){return Ka(this,a)},text:function(a){return T(this,function(a){return void 0===a?r.text(this):this.empty().each(function(){1!==this.nodeType&&11!==this.nodeType&&9!==this.nodeType||(this.textContent=a)})},null,a,arguments.length)},append:function(){return Ja(this,arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=Ea(this,a);b.appendChild(a)}})},prepend:function(){return Ja(this,arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=Ea(this,a);b.insertBefore(a,b.firstChild)}})},before:function(){return Ja(this,arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this)})},after:function(){return Ja(this,arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this.nextSibling)})},empty:function(){for(var a,b=0;null!=(a=this[b]);b++)1===a.nodeType&&(r.cleanData(na(a,!1)),a.textContent="");return this},clone:function(a,b){return a=null!=a&&a,b=null==b?a:b,this.map(function(){return r.clone(this,a,b)})},html:function(a){return T(this,function(a){var b=this[0]||{},c=0,d=this.length;if(void 0===a&&1===b.nodeType)return b.innerHTML;if("string"==typeof a&&!Aa.test(a)&&!ma[(ka.exec(a)||["",""])[1].toLowerCase()]){a=r.htmlPrefilter(a);try{for(;c1)}});function _a(a,b,c,d,e){return new _a.prototype.init(a,b,c,d,e)}r.Tween=_a,_a.prototype={constructor:_a,init:function(a,b,c,d,e,f){this.elem=a,this.prop=c,this.easing=e||r.easing._default,this.options=b,this.start=this.now=this.cur(),this.end=d,this.unit=f||(r.cssNumber[c]?"":"px")},cur:function(){var a=_a.propHooks[this.prop];return a&&a.get?a.get(this):_a.propHooks._default.get(this)},run:function(a){var b,c=_a.propHooks[this.prop];return this.options.duration?this.pos=b=r.easing[this.easing](a,this.options.duration*a,0,1,this.options.duration):this.pos=b=a,this.now=(this.end-this.start)*b+this.start,this.options.step&&this.options.step.call(this.elem,this.now,this),c&&c.set?c.set(this):_a.propHooks._default.set(this),this}},_a.prototype.init.prototype=_a.prototype,_a.propHooks={_default:{get:function(a){var b;return 1!==a.elem.nodeType||null!=a.elem[a.prop]&&null==a.elem.style[a.prop]?a.elem[a.prop]:(b=r.css(a.elem,a.prop,""),b&&"auto"!==b?b:0)},set:function(a){r.fx.step[a.prop]?r.fx.step[a.prop](a):1!==a.elem.nodeType||null==a.elem.style[r.cssProps[a.prop]]&&!r.cssHooks[a.prop]?a.elem[a.prop]=a.now:r.style(a.elem,a.prop,a.now+a.unit)}}},_a.propHooks.scrollTop=_a.propHooks.scrollLeft={set:function(a){a.elem.nodeType&&a.elem.parentNode&&(a.elem[a.prop]=a.now)}},r.easing={linear:function(a){return a},swing:function(a){return.5-Math.cos(a*Math.PI)/2},_default:"swing"},r.fx=_a.prototype.init,r.fx.step={};var ab,bb,cb=/^(?:toggle|show|hide)$/,db=/queueHooks$/;function eb(){bb&&(d.hidden===!1&&a.requestAnimationFrame?a.requestAnimationFrame(eb):a.setTimeout(eb,r.fx.interval),r.fx.tick())}function fb(){return a.setTimeout(function(){ab=void 0}),ab=r.now()}function gb(a,b){var c,d=0,e={height:a};for(b=b?1:0;d<4;d+=2-b)c=ca[d],e["margin"+c]=e["padding"+c]=a;return b&&(e.opacity=e.width=a),e}function hb(a,b,c){for(var d,e=(kb.tweeners[b]||[]).concat(kb.tweeners["*"]),f=0,g=e.length;f1)},removeAttr:function(a){return this.each(function(){r.removeAttr(this,a)})}}),r.extend({attr:function(a,b,c){var d,e,f=a.nodeType;if(3!==f&&8!==f&&2!==f)return"undefined"==typeof a.getAttribute?r.prop(a,b,c):(1===f&&r.isXMLDoc(a)||(e=r.attrHooks[b.toLowerCase()]||(r.expr.match.bool.test(b)?lb:void 0)),void 0!==c?null===c?void r.removeAttr(a,b):e&&"set"in e&&void 0!==(d=e.set(a,c,b))?d:(a.setAttribute(b,c+""),c):e&&"get"in e&&null!==(d=e.get(a,b))?d:(d=r.find.attr(a,b), -null==d?void 0:d))},attrHooks:{type:{set:function(a,b){if(!o.radioValue&&"radio"===b&&B(a,"input")){var c=a.value;return a.setAttribute("type",b),c&&(a.value=c),b}}}},removeAttr:function(a,b){var c,d=0,e=b&&b.match(L);if(e&&1===a.nodeType)while(c=e[d++])a.removeAttribute(c)}}),lb={set:function(a,b,c){return b===!1?r.removeAttr(a,c):a.setAttribute(c,c),c}},r.each(r.expr.match.bool.source.match(/\w+/g),function(a,b){var c=mb[b]||r.find.attr;mb[b]=function(a,b,d){var e,f,g=b.toLowerCase();return d||(f=mb[g],mb[g]=e,e=null!=c(a,b,d)?g:null,mb[g]=f),e}});var nb=/^(?:input|select|textarea|button)$/i,ob=/^(?:a|area)$/i;r.fn.extend({prop:function(a,b){return T(this,r.prop,a,b,arguments.length>1)},removeProp:function(a){return this.each(function(){delete this[r.propFix[a]||a]})}}),r.extend({prop:function(a,b,c){var d,e,f=a.nodeType;if(3!==f&&8!==f&&2!==f)return 1===f&&r.isXMLDoc(a)||(b=r.propFix[b]||b,e=r.propHooks[b]),void 0!==c?e&&"set"in e&&void 0!==(d=e.set(a,c,b))?d:a[b]=c:e&&"get"in e&&null!==(d=e.get(a,b))?d:a[b]},propHooks:{tabIndex:{get:function(a){var b=r.find.attr(a,"tabindex");return b?parseInt(b,10):nb.test(a.nodeName)||ob.test(a.nodeName)&&a.href?0:-1}}},propFix:{"for":"htmlFor","class":"className"}}),o.optSelected||(r.propHooks.selected={get:function(a){var b=a.parentNode;return b&&b.parentNode&&b.parentNode.selectedIndex,null},set:function(a){var b=a.parentNode;b&&(b.selectedIndex,b.parentNode&&b.parentNode.selectedIndex)}}),r.each(["tabIndex","readOnly","maxLength","cellSpacing","cellPadding","rowSpan","colSpan","useMap","frameBorder","contentEditable"],function(){r.propFix[this.toLowerCase()]=this});function pb(a){var b=a.match(L)||[];return b.join(" ")}function qb(a){return a.getAttribute&&a.getAttribute("class")||""}r.fn.extend({addClass:function(a){var b,c,d,e,f,g,h,i=0;if(r.isFunction(a))return this.each(function(b){r(this).addClass(a.call(this,b,qb(this)))});if("string"==typeof a&&a){b=a.match(L)||[];while(c=this[i++])if(e=qb(c),d=1===c.nodeType&&" "+pb(e)+" "){g=0;while(f=b[g++])d.indexOf(" "+f+" ")<0&&(d+=f+" ");h=pb(d),e!==h&&c.setAttribute("class",h)}}return this},removeClass:function(a){var b,c,d,e,f,g,h,i=0;if(r.isFunction(a))return this.each(function(b){r(this).removeClass(a.call(this,b,qb(this)))});if(!arguments.length)return this.attr("class","");if("string"==typeof a&&a){b=a.match(L)||[];while(c=this[i++])if(e=qb(c),d=1===c.nodeType&&" "+pb(e)+" "){g=0;while(f=b[g++])while(d.indexOf(" "+f+" ")>-1)d=d.replace(" "+f+" "," ");h=pb(d),e!==h&&c.setAttribute("class",h)}}return this},toggleClass:function(a,b){var c=typeof a;return"boolean"==typeof b&&"string"===c?b?this.addClass(a):this.removeClass(a):r.isFunction(a)?this.each(function(c){r(this).toggleClass(a.call(this,c,qb(this),b),b)}):this.each(function(){var b,d,e,f;if("string"===c){d=0,e=r(this),f=a.match(L)||[];while(b=f[d++])e.hasClass(b)?e.removeClass(b):e.addClass(b)}else void 0!==a&&"boolean"!==c||(b=qb(this),b&&W.set(this,"__className__",b),this.setAttribute&&this.setAttribute("class",b||a===!1?"":W.get(this,"__className__")||""))})},hasClass:function(a){var b,c,d=0;b=" "+a+" ";while(c=this[d++])if(1===c.nodeType&&(" "+pb(qb(c))+" ").indexOf(b)>-1)return!0;return!1}});var rb=/\r/g;r.fn.extend({val:function(a){var b,c,d,e=this[0];{if(arguments.length)return d=r.isFunction(a),this.each(function(c){var e;1===this.nodeType&&(e=d?a.call(this,c,r(this).val()):a,null==e?e="":"number"==typeof e?e+="":Array.isArray(e)&&(e=r.map(e,function(a){return null==a?"":a+""})),b=r.valHooks[this.type]||r.valHooks[this.nodeName.toLowerCase()],b&&"set"in b&&void 0!==b.set(this,e,"value")||(this.value=e))});if(e)return b=r.valHooks[e.type]||r.valHooks[e.nodeName.toLowerCase()],b&&"get"in b&&void 0!==(c=b.get(e,"value"))?c:(c=e.value,"string"==typeof c?c.replace(rb,""):null==c?"":c)}}}),r.extend({valHooks:{option:{get:function(a){var b=r.find.attr(a,"value");return null!=b?b:pb(r.text(a))}},select:{get:function(a){var b,c,d,e=a.options,f=a.selectedIndex,g="select-one"===a.type,h=g?null:[],i=g?f+1:e.length;for(d=f<0?i:g?f:0;d-1)&&(c=!0);return c||(a.selectedIndex=-1),f}}}}),r.each(["radio","checkbox"],function(){r.valHooks[this]={set:function(a,b){if(Array.isArray(b))return a.checked=r.inArray(r(a).val(),b)>-1}},o.checkOn||(r.valHooks[this].get=function(a){return null===a.getAttribute("value")?"on":a.value})});var sb=/^(?:focusinfocus|focusoutblur)$/;r.extend(r.event,{trigger:function(b,c,e,f){var g,h,i,j,k,m,n,o=[e||d],p=l.call(b,"type")?b.type:b,q=l.call(b,"namespace")?b.namespace.split("."):[];if(h=i=e=e||d,3!==e.nodeType&&8!==e.nodeType&&!sb.test(p+r.event.triggered)&&(p.indexOf(".")>-1&&(q=p.split("."),p=q.shift(),q.sort()),k=p.indexOf(":")<0&&"on"+p,b=b[r.expando]?b:new r.Event(p,"object"==typeof b&&b),b.isTrigger=f?2:3,b.namespace=q.join("."),b.rnamespace=b.namespace?new RegExp("(^|\\.)"+q.join("\\.(?:.*\\.|)")+"(\\.|$)"):null,b.result=void 0,b.target||(b.target=e),c=null==c?[b]:r.makeArray(c,[b]),n=r.event.special[p]||{},f||!n.trigger||n.trigger.apply(e,c)!==!1)){if(!f&&!n.noBubble&&!r.isWindow(e)){for(j=n.delegateType||p,sb.test(j+p)||(h=h.parentNode);h;h=h.parentNode)o.push(h),i=h;i===(e.ownerDocument||d)&&o.push(i.defaultView||i.parentWindow||a)}g=0;while((h=o[g++])&&!b.isPropagationStopped())b.type=g>1?j:n.bindType||p,m=(W.get(h,"events")||{})[b.type]&&W.get(h,"handle"),m&&m.apply(h,c),m=k&&h[k],m&&m.apply&&U(h)&&(b.result=m.apply(h,c),b.result===!1&&b.preventDefault());return b.type=p,f||b.isDefaultPrevented()||n._default&&n._default.apply(o.pop(),c)!==!1||!U(e)||k&&r.isFunction(e[p])&&!r.isWindow(e)&&(i=e[k],i&&(e[k]=null),r.event.triggered=p,e[p](),r.event.triggered=void 0,i&&(e[k]=i)),b.result}},simulate:function(a,b,c){var d=r.extend(new r.Event,c,{type:a,isSimulated:!0});r.event.trigger(d,null,b)}}),r.fn.extend({trigger:function(a,b){return this.each(function(){r.event.trigger(a,b,this)})},triggerHandler:function(a,b){var c=this[0];if(c)return r.event.trigger(a,b,c,!0)}}),r.each("blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu".split(" "),function(a,b){r.fn[b]=function(a,c){return arguments.length>0?this.on(b,null,a,c):this.trigger(b)}}),r.fn.extend({hover:function(a,b){return this.mouseenter(a).mouseleave(b||a)}}),o.focusin="onfocusin"in a,o.focusin||r.each({focus:"focusin",blur:"focusout"},function(a,b){var c=function(a){r.event.simulate(b,a.target,r.event.fix(a))};r.event.special[b]={setup:function(){var d=this.ownerDocument||this,e=W.access(d,b);e||d.addEventListener(a,c,!0),W.access(d,b,(e||0)+1)},teardown:function(){var d=this.ownerDocument||this,e=W.access(d,b)-1;e?W.access(d,b,e):(d.removeEventListener(a,c,!0),W.remove(d,b))}}});var tb=a.location,ub=r.now(),vb=/\?/;r.parseXML=function(b){var c;if(!b||"string"!=typeof b)return null;try{c=(new a.DOMParser).parseFromString(b,"text/xml")}catch(d){c=void 0}return c&&!c.getElementsByTagName("parsererror").length||r.error("Invalid XML: "+b),c};var wb=/\[\]$/,xb=/\r?\n/g,yb=/^(?:submit|button|image|reset|file)$/i,zb=/^(?:input|select|textarea|keygen)/i;function Ab(a,b,c,d){var e;if(Array.isArray(b))r.each(b,function(b,e){c||wb.test(a)?d(a,e):Ab(a+"["+("object"==typeof e&&null!=e?b:"")+"]",e,c,d)});else if(c||"object"!==r.type(b))d(a,b);else for(e in b)Ab(a+"["+e+"]",b[e],c,d)}r.param=function(a,b){var c,d=[],e=function(a,b){var c=r.isFunction(b)?b():b;d[d.length]=encodeURIComponent(a)+"="+encodeURIComponent(null==c?"":c)};if(Array.isArray(a)||a.jquery&&!r.isPlainObject(a))r.each(a,function(){e(this.name,this.value)});else for(c in a)Ab(c,a[c],b,e);return d.join("&")},r.fn.extend({serialize:function(){return r.param(this.serializeArray())},serializeArray:function(){return this.map(function(){var a=r.prop(this,"elements");return a?r.makeArray(a):this}).filter(function(){var a=this.type;return this.name&&!r(this).is(":disabled")&&zb.test(this.nodeName)&&!yb.test(a)&&(this.checked||!ja.test(a))}).map(function(a,b){var c=r(this).val();return null==c?null:Array.isArray(c)?r.map(c,function(a){return{name:b.name,value:a.replace(xb,"\r\n")}}):{name:b.name,value:c.replace(xb,"\r\n")}}).get()}});var Bb=/%20/g,Cb=/#.*$/,Db=/([?&])_=[^&]*/,Eb=/^(.*?):[ \t]*([^\r\n]*)$/gm,Fb=/^(?:about|app|app-storage|.+-extension|file|res|widget):$/,Gb=/^(?:GET|HEAD)$/,Hb=/^\/\//,Ib={},Jb={},Kb="*/".concat("*"),Lb=d.createElement("a");Lb.href=tb.href;function Mb(a){return function(b,c){"string"!=typeof b&&(c=b,b="*");var d,e=0,f=b.toLowerCase().match(L)||[];if(r.isFunction(c))while(d=f[e++])"+"===d[0]?(d=d.slice(1)||"*",(a[d]=a[d]||[]).unshift(c)):(a[d]=a[d]||[]).push(c)}}function Nb(a,b,c,d){var e={},f=a===Jb;function g(h){var i;return e[h]=!0,r.each(a[h]||[],function(a,h){var j=h(b,c,d);return"string"!=typeof j||f||e[j]?f?!(i=j):void 0:(b.dataTypes.unshift(j),g(j),!1)}),i}return g(b.dataTypes[0])||!e["*"]&&g("*")}function Ob(a,b){var c,d,e=r.ajaxSettings.flatOptions||{};for(c in b)void 0!==b[c]&&((e[c]?a:d||(d={}))[c]=b[c]);return d&&r.extend(!0,a,d),a}function Pb(a,b,c){var d,e,f,g,h=a.contents,i=a.dataTypes;while("*"===i[0])i.shift(),void 0===d&&(d=a.mimeType||b.getResponseHeader("Content-Type"));if(d)for(e in h)if(h[e]&&h[e].test(d)){i.unshift(e);break}if(i[0]in c)f=i[0];else{for(e in c){if(!i[0]||a.converters[e+" "+i[0]]){f=e;break}g||(g=e)}f=f||g}if(f)return f!==i[0]&&i.unshift(f),c[f]}function Qb(a,b,c,d){var e,f,g,h,i,j={},k=a.dataTypes.slice();if(k[1])for(g in a.converters)j[g.toLowerCase()]=a.converters[g];f=k.shift();while(f)if(a.responseFields[f]&&(c[a.responseFields[f]]=b),!i&&d&&a.dataFilter&&(b=a.dataFilter(b,a.dataType)),i=f,f=k.shift())if("*"===f)f=i;else if("*"!==i&&i!==f){if(g=j[i+" "+f]||j["* "+f],!g)for(e in j)if(h=e.split(" "),h[1]===f&&(g=j[i+" "+h[0]]||j["* "+h[0]])){g===!0?g=j[e]:j[e]!==!0&&(f=h[0],k.unshift(h[1]));break}if(g!==!0)if(g&&a["throws"])b=g(b);else try{b=g(b)}catch(l){return{state:"parsererror",error:g?l:"No conversion from "+i+" to "+f}}}return{state:"success",data:b}}r.extend({active:0,lastModified:{},etag:{},ajaxSettings:{url:tb.href,type:"GET",isLocal:Fb.test(tb.protocol),global:!0,processData:!0,async:!0,contentType:"application/x-www-form-urlencoded; charset=UTF-8",accepts:{"*":Kb,text:"text/plain",html:"text/html",xml:"application/xml, text/xml",json:"application/json, text/javascript"},contents:{xml:/\bxml\b/,html:/\bhtml/,json:/\bjson\b/},responseFields:{xml:"responseXML",text:"responseText",json:"responseJSON"},converters:{"* text":String,"text html":!0,"text json":JSON.parse,"text xml":r.parseXML},flatOptions:{url:!0,context:!0}},ajaxSetup:function(a,b){return b?Ob(Ob(a,r.ajaxSettings),b):Ob(r.ajaxSettings,a)},ajaxPrefilter:Mb(Ib),ajaxTransport:Mb(Jb),ajax:function(b,c){"object"==typeof b&&(c=b,b=void 0),c=c||{};var e,f,g,h,i,j,k,l,m,n,o=r.ajaxSetup({},c),p=o.context||o,q=o.context&&(p.nodeType||p.jquery)?r(p):r.event,s=r.Deferred(),t=r.Callbacks("once memory"),u=o.statusCode||{},v={},w={},x="canceled",y={readyState:0,getResponseHeader:function(a){var b;if(k){if(!h){h={};while(b=Eb.exec(g))h[b[1].toLowerCase()]=b[2]}b=h[a.toLowerCase()]}return null==b?null:b},getAllResponseHeaders:function(){return k?g:null},setRequestHeader:function(a,b){return null==k&&(a=w[a.toLowerCase()]=w[a.toLowerCase()]||a,v[a]=b),this},overrideMimeType:function(a){return null==k&&(o.mimeType=a),this},statusCode:function(a){var b;if(a)if(k)y.always(a[y.status]);else for(b in a)u[b]=[u[b],a[b]];return this},abort:function(a){var b=a||x;return e&&e.abort(b),A(0,b),this}};if(s.promise(y),o.url=((b||o.url||tb.href)+"").replace(Hb,tb.protocol+"//"),o.type=c.method||c.type||o.method||o.type,o.dataTypes=(o.dataType||"*").toLowerCase().match(L)||[""],null==o.crossDomain){j=d.createElement("a");try{j.href=o.url,j.href=j.href,o.crossDomain=Lb.protocol+"//"+Lb.host!=j.protocol+"//"+j.host}catch(z){o.crossDomain=!0}}if(o.data&&o.processData&&"string"!=typeof o.data&&(o.data=r.param(o.data,o.traditional)),Nb(Ib,o,c,y),k)return y;l=r.event&&o.global,l&&0===r.active++&&r.event.trigger("ajaxStart"),o.type=o.type.toUpperCase(),o.hasContent=!Gb.test(o.type),f=o.url.replace(Cb,""),o.hasContent?o.data&&o.processData&&0===(o.contentType||"").indexOf("application/x-www-form-urlencoded")&&(o.data=o.data.replace(Bb,"+")):(n=o.url.slice(f.length),o.data&&(f+=(vb.test(f)?"&":"?")+o.data,delete o.data),o.cache===!1&&(f=f.replace(Db,"$1"),n=(vb.test(f)?"&":"?")+"_="+ub++ +n),o.url=f+n),o.ifModified&&(r.lastModified[f]&&y.setRequestHeader("If-Modified-Since",r.lastModified[f]),r.etag[f]&&y.setRequestHeader("If-None-Match",r.etag[f])),(o.data&&o.hasContent&&o.contentType!==!1||c.contentType)&&y.setRequestHeader("Content-Type",o.contentType),y.setRequestHeader("Accept",o.dataTypes[0]&&o.accepts[o.dataTypes[0]]?o.accepts[o.dataTypes[0]]+("*"!==o.dataTypes[0]?", "+Kb+"; q=0.01":""):o.accepts["*"]);for(m in o.headers)y.setRequestHeader(m,o.headers[m]);if(o.beforeSend&&(o.beforeSend.call(p,y,o)===!1||k))return y.abort();if(x="abort",t.add(o.complete),y.done(o.success),y.fail(o.error),e=Nb(Jb,o,c,y)){if(y.readyState=1,l&&q.trigger("ajaxSend",[y,o]),k)return y;o.async&&o.timeout>0&&(i=a.setTimeout(function(){y.abort("timeout")},o.timeout));try{k=!1,e.send(v,A)}catch(z){if(k)throw z;A(-1,z)}}else A(-1,"No Transport");function A(b,c,d,h){var j,m,n,v,w,x=c;k||(k=!0,i&&a.clearTimeout(i),e=void 0,g=h||"",y.readyState=b>0?4:0,j=b>=200&&b<300||304===b,d&&(v=Pb(o,y,d)),v=Qb(o,v,y,j),j?(o.ifModified&&(w=y.getResponseHeader("Last-Modified"),w&&(r.lastModified[f]=w),w=y.getResponseHeader("etag"),w&&(r.etag[f]=w)),204===b||"HEAD"===o.type?x="nocontent":304===b?x="notmodified":(x=v.state,m=v.data,n=v.error,j=!n)):(n=x,!b&&x||(x="error",b<0&&(b=0))),y.status=b,y.statusText=(c||x)+"",j?s.resolveWith(p,[m,x,y]):s.rejectWith(p,[y,x,n]),y.statusCode(u),u=void 0,l&&q.trigger(j?"ajaxSuccess":"ajaxError",[y,o,j?m:n]),t.fireWith(p,[y,x]),l&&(q.trigger("ajaxComplete",[y,o]),--r.active||r.event.trigger("ajaxStop")))}return y},getJSON:function(a,b,c){return r.get(a,b,c,"json")},getScript:function(a,b){return r.get(a,void 0,b,"script")}}),r.each(["get","post"],function(a,b){r[b]=function(a,c,d,e){return r.isFunction(c)&&(e=e||d,d=c,c=void 0),r.ajax(r.extend({url:a,type:b,dataType:e,data:c,success:d},r.isPlainObject(a)&&a))}}),r._evalUrl=function(a){return r.ajax({url:a,type:"GET",dataType:"script",cache:!0,async:!1,global:!1,"throws":!0})},r.fn.extend({wrapAll:function(a){var b;return this[0]&&(r.isFunction(a)&&(a=a.call(this[0])),b=r(a,this[0].ownerDocument).eq(0).clone(!0),this[0].parentNode&&b.insertBefore(this[0]),b.map(function(){var a=this;while(a.firstElementChild)a=a.firstElementChild;return a}).append(this)),this},wrapInner:function(a){return r.isFunction(a)?this.each(function(b){r(this).wrapInner(a.call(this,b))}):this.each(function(){var b=r(this),c=b.contents();c.length?c.wrapAll(a):b.append(a)})},wrap:function(a){var b=r.isFunction(a);return this.each(function(c){r(this).wrapAll(b?a.call(this,c):a)})},unwrap:function(a){return this.parent(a).not("body").each(function(){r(this).replaceWith(this.childNodes)}),this}}),r.expr.pseudos.hidden=function(a){return!r.expr.pseudos.visible(a)},r.expr.pseudos.visible=function(a){return!!(a.offsetWidth||a.offsetHeight||a.getClientRects().length)},r.ajaxSettings.xhr=function(){try{return new a.XMLHttpRequest}catch(b){}};var Rb={0:200,1223:204},Sb=r.ajaxSettings.xhr();o.cors=!!Sb&&"withCredentials"in Sb,o.ajax=Sb=!!Sb,r.ajaxTransport(function(b){var c,d;if(o.cors||Sb&&!b.crossDomain)return{send:function(e,f){var g,h=b.xhr();if(h.open(b.type,b.url,b.async,b.username,b.password),b.xhrFields)for(g in b.xhrFields)h[g]=b.xhrFields[g];b.mimeType&&h.overrideMimeType&&h.overrideMimeType(b.mimeType),b.crossDomain||e["X-Requested-With"]||(e["X-Requested-With"]="XMLHttpRequest");for(g in e)h.setRequestHeader(g,e[g]);c=function(a){return function(){c&&(c=d=h.onload=h.onerror=h.onabort=h.onreadystatechange=null,"abort"===a?h.abort():"error"===a?"number"!=typeof h.status?f(0,"error"):f(h.status,h.statusText):f(Rb[h.status]||h.status,h.statusText,"text"!==(h.responseType||"text")||"string"!=typeof h.responseText?{binary:h.response}:{text:h.responseText},h.getAllResponseHeaders()))}},h.onload=c(),d=h.onerror=c("error"),void 0!==h.onabort?h.onabort=d:h.onreadystatechange=function(){4===h.readyState&&a.setTimeout(function(){c&&d()})},c=c("abort");try{h.send(b.hasContent&&b.data||null)}catch(i){if(c)throw i}},abort:function(){c&&c()}}}),r.ajaxPrefilter(function(a){a.crossDomain&&(a.contents.script=!1)}),r.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/\b(?:java|ecma)script\b/},converters:{"text script":function(a){return r.globalEval(a),a}}}),r.ajaxPrefilter("script",function(a){void 0===a.cache&&(a.cache=!1),a.crossDomain&&(a.type="GET")}),r.ajaxTransport("script",function(a){if(a.crossDomain){var b,c;return{send:function(e,f){b=r("